From 220b0f5755f86745e4e16d001fe6f46b448565fa Mon Sep 17 00:00:00 2001
From: "Artem B. Bityutskiy" <dedekind@infradead.org>
Date: Wed, 5 Jul 2006 11:04:02 +0400
Subject: [PATCH 0001/1063] [PATCH] [MTD] NAND: fix dead URL in Kconfig

Signed-off-by: Artem B. Bityutskiy <dedekind@infradead.org>
---
 drivers/mtd/nand/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 3db77eec0ed25..c99302ed38232 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -11,7 +11,7 @@ config MTD_NAND
 	help
 	  This enables support for accessing all type of NAND flash
 	  devices. For further information see
-	  <http://www.linux-mtd.infradead.org/tech/nand.html>.
+	  <http://www.linux-mtd.infradead.org/doc/nand.html>.
 
 config MTD_NAND_VERIFY_WRITE
 	bool "Verify NAND page writes"
-- 
GitLab


From 90a18fab4ae07b77bf053b75a4d1285cd94faa79 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 6 Jul 2006 22:37:43 +0200
Subject: [PATCH 0002/1063] make fs/jffs2/nodelist.c:jffs2_obsolete_node_frag()
 static

This patch makes the needlessly global jffs2_obsolete_node_frag()
static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 fs/jffs2/nodelist.c | 6 +++++-
 fs/jffs2/nodelist.h | 1 -
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 7675b33396c7d..5a6b4d64206c2 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -21,6 +21,9 @@
 #include <linux/pagemap.h>
 #include "nodelist.h"
 
+static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c,
+				     struct jffs2_node_frag *this);
+
 void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list)
 {
 	struct jffs2_full_dirent **prev = list;
@@ -87,7 +90,8 @@ void jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list, uint
 	}
 }
 
-void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this)
+static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c,
+				     struct jffs2_node_frag *this)
 {
 	if (this->node) {
 		this->node->frags--;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index cae92c14116dc..0ddfd70307fb3 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -334,7 +334,6 @@ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c_delete);
 struct rb_node *rb_next(struct rb_node *);
 struct rb_node *rb_prev(struct rb_node *);
 void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root);
-void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this);
 int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn);
 void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size);
 int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn);
-- 
GitLab


From fda7ffd25fc5bbe1b4209dfafb854c7ad7308c93 Mon Sep 17 00:00:00 2001
From: Niels Kristian Bech Jensen <nkbj@mail.tele.dk>
Date: Sun, 2 Jul 2006 13:02:27 +0200
Subject: [PATCH 0003/1063] [POWERPC] Add -fno-stack-protector to BOOTCFLAGS in
 arch/powerpc/boot/Makefile.

I got some undefined references to __stack_chk_fail in
arch/powerpc/boot/stdio.o and arch/powerpc/boot/prom.o when I was trying
to build a kernel on Ubuntu Edgy Eft - which includes Stack Smashing
Protection.

This patch adds -fno-stack-protector to BOOTCFLAGS in
arch/powerpc/boot/Makefile (why does BOOTCFLAGS depend on HOSTCFLAGS and
not CFLAGS?).

Regards,
Niels Kristian Bech Jensen

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/boot/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index d961bfeed05fc..afc776f821e5e 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -41,6 +41,10 @@ src-boot += $(zlib)
 src-boot := $(addprefix $(obj)/, $(src-boot))
 obj-boot := $(addsuffix .o, $(basename $(src-boot)))
 
+ifeq ($(call cc-option-yn, -fstack-protector),y)
+BOOTCFLAGS	+= -fno-stack-protector
+endif
+
 BOOTCFLAGS	+= -I$(obj) -I$(srctree)/$(obj)
 
 quiet_cmd_copy_zlib = COPY    $@
-- 
GitLab


From 3a09aa4730f021ad917a66a0c6d2ff6d616a7e4f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 3 Jul 2006 14:28:14 +0200
Subject: [PATCH 0004/1063] [POWERPC] fix up front-LED Kconfig

Rather long patch, apparently no one has updated the pmac32_defconfig in
a while.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/configs/pmac32_defconfig | 112 ++++++++++++++++++++++----
 drivers/ide/Kconfig                   |  14 ----
 drivers/macintosh/Kconfig             |   9 +++
 drivers/macintosh/via-pmu-led.c       |   2 +-
 4 files changed, 106 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index addc79381c3b3..3545af9896af0 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -1,16 +1,18 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.17-rc5
-# Mon May 29 14:47:49 2006
+# Linux kernel version: 2.6.17
+# Mon Jul  3 14:20:49 2006
 #
 # CONFIG_PPC64 is not set
 CONFIG_PPC32=y
 CONFIG_PPC_MERGE=y
 CONFIG_MMU=y
 CONFIG_GENERIC_HARDIRQS=y
+CONFIG_IRQ_PER_CPU=y
 CONFIG_RWSEM_XCHGADD_ALGORITHM=y
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
 CONFIG_PPC=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_GENERIC_NVRAM=y
@@ -29,6 +31,7 @@ CONFIG_CLASSIC32=y
 # CONFIG_PPC_82xx is not set
 # CONFIG_PPC_83xx is not set
 # CONFIG_PPC_85xx is not set
+# CONFIG_PPC_86xx is not set
 # CONFIG_40x is not set
 # CONFIG_44x is not set
 # CONFIG_8xx is not set
@@ -39,6 +42,7 @@ CONFIG_ALTIVEC=y
 CONFIG_PPC_STD_MMU=y
 CONFIG_PPC_STD_MMU_32=y
 # CONFIG_SMP is not set
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
 # Code maturity level options
@@ -72,10 +76,12 @@ CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_BASE_FULL=y
+CONFIG_RT_MUTEXES=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
 CONFIG_SHMEM=y
 CONFIG_SLAB=y
+CONFIG_VM_EVENT_COUNTERS=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 # CONFIG_SLOB is not set
@@ -119,6 +125,9 @@ CONFIG_PPC_MULTIPLATFORM=y
 # CONFIG_APUS is not set
 # CONFIG_PPC_CHRP is not set
 CONFIG_PPC_PMAC=y
+# CONFIG_PPC_CELL is not set
+# CONFIG_PPC_CELL_NATIVE is not set
+# CONFIG_UDBG_RTAS_CONSOLE is not set
 CONFIG_MPIC=y
 # CONFIG_PPC_RTAS is not set
 # CONFIG_MMIO_NVRAM is not set
@@ -154,6 +163,7 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT is not set
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=m
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
 # CONFIG_KEXEC is not set
 CONFIG_ARCH_FLATMEM_ENABLE=y
 CONFIG_SELECT_MEMORY_MODEL=y
@@ -164,6 +174,7 @@ CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 # CONFIG_SPARSEMEM_STATIC is not set
 CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
 CONFIG_PROC_DEVICETREE=y
 # CONFIG_CMDLINE_BOOL is not set
 CONFIG_PM=y
@@ -182,6 +193,7 @@ CONFIG_GENERIC_ISA_DMA=y
 CONFIG_PPC_INDIRECT_PCI=y
 CONFIG_PCI=y
 CONFIG_PCI_DOMAINS=y
+# CONFIG_PCIEPORTBUS is not set
 # CONFIG_PCI_DEBUG is not set
 
 #
@@ -256,6 +268,8 @@ CONFIG_INET_ESP=y
 # CONFIG_INET_IPCOMP is not set
 # CONFIG_INET_XFRM_TUNNEL is not set
 # CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
 CONFIG_INET_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
@@ -268,6 +282,7 @@ CONFIG_TCP_CONG_BIC=y
 # CONFIG_IPV6 is not set
 # CONFIG_INET6_XFRM_TUNNEL is not set
 # CONFIG_INET6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_DEBUG is not set
 
@@ -292,9 +307,11 @@ CONFIG_NETFILTER_XT_MATCH_MARK=m
 CONFIG_NETFILTER_XT_MATCH_POLICY=m
 CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
 CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_SCTP=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
+# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set
 CONFIG_NETFILTER_XT_MATCH_STRING=m
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 
@@ -313,6 +330,7 @@ CONFIG_IP_NF_TFTP=m
 CONFIG_IP_NF_AMANDA=m
 CONFIG_IP_NF_PPTP=m
 CONFIG_IP_NF_H323=m
+# CONFIG_IP_NF_SIP is not set
 # CONFIG_IP_NF_QUEUE is not set
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_IPRANGE=m
@@ -457,6 +475,7 @@ CONFIG_IRTTY_SIR=m
 # CONFIG_ALI_FIR is not set
 # CONFIG_VLSI_FIR is not set
 # CONFIG_VIA_FIR is not set
+# CONFIG_MCS_FIR is not set
 CONFIG_BT=m
 CONFIG_BT_L2CAP=m
 CONFIG_BT_SCO=m
@@ -500,6 +519,7 @@ CONFIG_WIRELESS_EXT=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 CONFIG_FW_LOADER=y
 # CONFIG_DEBUG_DRIVER is not set
+# CONFIG_SYS_HYPERVISOR is not set
 
 #
 # Connector - unified userspace <-> kernelspace linker
@@ -600,7 +620,6 @@ CONFIG_BLK_DEV_PDC202XX_NEW=y
 CONFIG_BLK_DEV_IDE_PMAC=y
 CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y
 CONFIG_BLK_DEV_IDEDMA_PMAC=y
-CONFIG_BLK_DEV_IDE_PMAC_BLINK=y
 # CONFIG_IDE_ARM is not set
 CONFIG_BLK_DEV_IDEDMA=y
 # CONFIG_IDEDMA_IVB is not set
@@ -661,6 +680,7 @@ CONFIG_SCSI_AIC7XXX_OLD=m
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_SAS is not set
 # CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_HPTIOP is not set
 # CONFIG_SCSI_BUSLOGIC is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_EATA is not set
@@ -705,9 +725,7 @@ CONFIG_MD_LINEAR=m
 CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
 CONFIG_MD_RAID10=m
-CONFIG_MD_RAID5=m
-CONFIG_MD_RAID5_RESHAPE=y
-CONFIG_MD_RAID6=m
+# CONFIG_MD_RAID456 is not set
 CONFIG_MD_MULTIPATH=m
 CONFIG_MD_FAULTY=m
 CONFIG_BLK_DEV_DM=m
@@ -750,7 +768,6 @@ CONFIG_IEEE1394_OHCI1394=m
 #
 CONFIG_IEEE1394_VIDEO1394=m
 CONFIG_IEEE1394_SBP2=m
-# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set
 # CONFIG_IEEE1394_ETH1394 is not set
 CONFIG_IEEE1394_DV1394=m
 CONFIG_IEEE1394_RAWIO=m
@@ -766,9 +783,12 @@ CONFIG_IEEE1394_RAWIO=m
 CONFIG_ADB=y
 CONFIG_ADB_CUDA=y
 CONFIG_ADB_PMU=y
+CONFIG_ADB_PMU_LED=y
+CONFIG_ADB_PMU_LED_IDE=y
 CONFIG_PMAC_APM_EMU=m
 CONFIG_PMAC_MEDIABAY=y
 CONFIG_PMAC_BACKLIGHT=y
+CONFIG_PMAC_BACKLIGHT_LEGACY=y
 CONFIG_INPUT_ADBHID=y
 CONFIG_MAC_EMUMOUSEBTN=y
 CONFIG_THERM_WINDTUNNEL=m
@@ -858,6 +878,7 @@ CONFIG_PCNET32=y
 # CONFIG_CHELSIO_T1 is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_MYRI10GE is not set
 
 #
 # Token Ring devices
@@ -908,6 +929,7 @@ CONFIG_APPLE_AIRPORT=m
 # Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support
 #
 CONFIG_PRISM54=m
+# CONFIG_USB_ZD1201 is not set
 # CONFIG_HOSTAP is not set
 CONFIG_NET_WIRELESS=y
 
@@ -998,6 +1020,7 @@ CONFIG_SERIO=y
 CONFIG_VT=y
 CONFIG_VT_CONSOLE=y
 CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
 # CONFIG_SERIAL_NONSTANDARD is not set
 
 #
@@ -1029,6 +1052,7 @@ CONFIG_LEGACY_PTY_COUNT=256
 # Watchdog Cards
 #
 # CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
 CONFIG_NVRAM=y
 CONFIG_GEN_RTC=y
 # CONFIG_GEN_RTC_X is not set
@@ -1040,6 +1064,7 @@ CONFIG_GEN_RTC=y
 # Ftape, the floppy tape device driver
 #
 CONFIG_AGP=m
+# CONFIG_AGP_SIS is not set
 # CONFIG_AGP_VIA is not set
 CONFIG_AGP_UNINORTH=m
 CONFIG_DRM=m
@@ -1092,6 +1117,7 @@ CONFIG_I2C_ALGOBIT=y
 CONFIG_I2C_POWERMAC=y
 # CONFIG_I2C_MPC is not set
 # CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_OCORES is not set
 # CONFIG_I2C_PARPORT_LIGHT is not set
 # CONFIG_I2C_PROSAVAGE is not set
 # CONFIG_I2C_SAVAGE4 is not set
@@ -1156,12 +1182,13 @@ CONFIG_VIDEO_V4L2=y
 #
 # Graphics support
 #
+# CONFIG_FIRMWARE_EDID is not set
 CONFIG_FB=y
 CONFIG_FB_CFB_FILLRECT=y
 CONFIG_FB_CFB_COPYAREA=y
 CONFIG_FB_CFB_IMAGEBLIT=y
 CONFIG_FB_MACMODES=y
-CONFIG_FB_FIRMWARE_EDID=y
+CONFIG_FB_BACKLIGHT=y
 CONFIG_FB_MODE_HELPERS=y
 CONFIG_FB_TILEBLITTING=y
 # CONFIG_FB_CIRRUS is not set
@@ -1178,6 +1205,7 @@ CONFIG_FB_IMSTT=y
 # CONFIG_FB_S1D13XXX is not set
 CONFIG_FB_NVIDIA=y
 CONFIG_FB_NVIDIA_I2C=y
+CONFIG_FB_NVIDIA_BACKLIGHT=y
 # CONFIG_FB_RIVA is not set
 CONFIG_FB_MATROX=y
 CONFIG_FB_MATROX_MILLENIUM=y
@@ -1187,12 +1215,15 @@ CONFIG_FB_MATROX_MYSTIQUE=y
 # CONFIG_FB_MATROX_MULTIHEAD is not set
 CONFIG_FB_RADEON=y
 CONFIG_FB_RADEON_I2C=y
+CONFIG_FB_RADEON_BACKLIGHT=y
 # CONFIG_FB_RADEON_DEBUG is not set
 CONFIG_FB_ATY128=y
+CONFIG_FB_ATY128_BACKLIGHT=y
 CONFIG_FB_ATY=y
 CONFIG_FB_ATY_CT=y
 # CONFIG_FB_ATY_GENERIC_LCD is not set
 CONFIG_FB_ATY_GX=y
+CONFIG_FB_ATY_BACKLIGHT=y
 # CONFIG_FB_SAVAGE is not set
 # CONFIG_FB_SIS is not set
 # CONFIG_FB_NEOMAGIC is not set
@@ -1221,7 +1252,11 @@ CONFIG_LOGO=y
 CONFIG_LOGO_LINUX_MONO=y
 CONFIG_LOGO_LINUX_VGA16=y
 CONFIG_LOGO_LINUX_CLUT224=y
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BACKLIGHT_DEVICE=y
+CONFIG_LCD_CLASS_DEVICE=m
+CONFIG_LCD_DEVICE=y
 
 #
 # Sound
@@ -1278,6 +1313,18 @@ CONFIG_SND_DUMMY=m
 # CONFIG_SND_CMIPCI is not set
 # CONFIG_SND_CS4281 is not set
 # CONFIG_SND_CS46XX is not set
+# CONFIG_SND_DARLA20 is not set
+# CONFIG_SND_GINA20 is not set
+# CONFIG_SND_LAYLA20 is not set
+# CONFIG_SND_DARLA24 is not set
+# CONFIG_SND_GINA24 is not set
+# CONFIG_SND_LAYLA24 is not set
+# CONFIG_SND_MONA is not set
+# CONFIG_SND_MIA is not set
+# CONFIG_SND_ECHO3G is not set
+# CONFIG_SND_INDIGO is not set
+# CONFIG_SND_INDIGOIO is not set
+# CONFIG_SND_INDIGODJ is not set
 # CONFIG_SND_EMU10K1 is not set
 # CONFIG_SND_EMU10K1X is not set
 # CONFIG_SND_ENS1370 is not set
@@ -1314,6 +1361,17 @@ CONFIG_SND_DUMMY=m
 CONFIG_SND_POWERMAC=m
 CONFIG_SND_POWERMAC_AUTO_DRC=y
 
+#
+# Apple Onboard Audio driver
+#
+CONFIG_SND_AOA=m
+CONFIG_SND_AOA_FABRIC_LAYOUT=m
+CONFIG_SND_AOA_ONYX=m
+CONFIG_SND_AOA_TAS=m
+CONFIG_SND_AOA_TOONIE=m
+CONFIG_SND_AOA_SOUNDBUS=m
+CONFIG_SND_AOA_SOUNDBUS_I2S=m
+
 #
 # USB devices
 #
@@ -1355,6 +1413,7 @@ CONFIG_USB_DYNAMIC_MINORS=y
 CONFIG_USB_EHCI_HCD=m
 CONFIG_USB_EHCI_SPLIT_ISO=y
 CONFIG_USB_EHCI_ROOT_HUB_TT=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
 # CONFIG_USB_ISP116X_HCD is not set
 CONFIG_USB_OHCI_HCD=y
 # CONFIG_USB_OHCI_BIG_ENDIAN is not set
@@ -1431,7 +1490,6 @@ CONFIG_USB_NET_NET1080=m
 # CONFIG_USB_NET_RNDIS_HOST is not set
 # CONFIG_USB_NET_CDC_SUBSET is not set
 CONFIG_USB_NET_ZAURUS=m
-# CONFIG_USB_ZD1201 is not set
 CONFIG_USB_MON=y
 
 #
@@ -1499,10 +1557,12 @@ CONFIG_USB_EZUSB=y
 # CONFIG_USB_LEGOTOWER is not set
 # CONFIG_USB_LCD is not set
 # CONFIG_USB_LED is not set
+# CONFIG_USB_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
 # CONFIG_USB_PHIDGETKIT is not set
 # CONFIG_USB_PHIDGETSERVO is not set
 # CONFIG_USB_IDMOUSE is not set
+CONFIG_USB_APPLEDISPLAY=m
 # CONFIG_USB_SISUSBVGA is not set
 # CONFIG_USB_LD is not set
 # CONFIG_USB_TEST is not set
@@ -1524,7 +1584,8 @@ CONFIG_USB_EZUSB=y
 #
 # LED devices
 #
-# CONFIG_NEW_LEDS is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
 
 #
 # LED drivers
@@ -1533,6 +1594,10 @@ CONFIG_USB_EZUSB=y
 #
 # LED Triggers
 #
+CONFIG_LEDS_TRIGGERS=y
+# CONFIG_LEDS_TRIGGER_TIMER is not set
+CONFIG_LEDS_TRIGGER_IDE_DISK=y
+# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
 
 #
 # InfiniBand support
@@ -1548,6 +1613,19 @@ CONFIG_USB_EZUSB=y
 #
 # CONFIG_RTC_CLASS is not set
 
+#
+# DMA Engine support
+#
+# CONFIG_DMA_ENGINE is not set
+
+#
+# DMA Clients
+#
+
+#
+# DMA Devices
+#
+
 #
 # File systems
 #
@@ -1569,6 +1647,7 @@ CONFIG_FS_POSIX_ACL=y
 # CONFIG_MINIX_FS is not set
 # CONFIG_ROMFS_FS is not set
 CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
 # CONFIG_QUOTA is not set
 CONFIG_DNOTIFY=y
 # CONFIG_AUTOFS_FS is not set
@@ -1649,6 +1728,7 @@ CONFIG_RPCSEC_GSS_KRB5=y
 CONFIG_SMB_FS=m
 # CONFIG_SMB_NLS_DEFAULT is not set
 # CONFIG_CIFS is not set
+# CONFIG_CIFS_DEBUG2 is not set
 # CONFIG_NCP_FS is not set
 # CONFIG_CODA_FS is not set
 # CONFIG_AFS_FS is not set
@@ -1732,6 +1812,7 @@ CONFIG_TEXTSEARCH=y
 CONFIG_TEXTSEARCH_KMP=m
 CONFIG_TEXTSEARCH_BM=m
 CONFIG_TEXTSEARCH_FSM=m
+CONFIG_PLIST=y
 
 #
 # Instrumentation Support
@@ -1744,12 +1825,15 @@ CONFIG_OPROFILE=y
 #
 # CONFIG_PRINTK_TIME is not set
 # CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_DEBUG_KERNEL=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_DEBUG_SLAB is not set
 # CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_KOBJECT is not set
@@ -1763,11 +1847,7 @@ CONFIG_XMON=y
 CONFIG_XMON_DEFAULT=y
 # CONFIG_BDI_SWITCH is not set
 CONFIG_BOOTX_TEXT=y
-# CONFIG_PPC_EARLY_DEBUG_LPAR is not set
-# CONFIG_PPC_EARLY_DEBUG_G5 is not set
-# CONFIG_PPC_EARLY_DEBUG_RTAS is not set
-# CONFIG_PPC_EARLY_DEBUG_MAPLE is not set
-# CONFIG_PPC_EARLY_DEBUG_ISERIES is not set
+# CONFIG_PPC_EARLY_DEBUG is not set
 
 #
 # Security options
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index d1266fe2d1abc..53bba41f29bcc 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -773,20 +773,6 @@ config BLK_DEV_IDEDMA_PMAC
 	  to transfer data to and from memory.  Saying Y is safe and improves
 	  performance.
 
-config BLK_DEV_IDE_PMAC_BLINK
-	bool "Blink laptop LED on drive activity (DEPRECATED)"
-	depends on BLK_DEV_IDE_PMAC && ADB_PMU
-	select ADB_PMU_LED
-	select LEDS_TRIGGERS
-	select LEDS_TRIGGER_IDE_DISK
-	help
-	  This option enables the use of the sleep LED as a hard drive
-	  activity LED.
-	  This option is deprecated, it only selects ADB_PMU_LED and
-	  LEDS_TRIGGER_IDE_DISK and changes the code in the new led class
-	  device to default to the ide-disk trigger (which should be set
-	  from userspace via sysfs).
-
 config BLK_DEV_IDE_SWARM
 	tristate "IDE for Sibyte evaluation boards"
 	depends on SIBYTE_SB1xxx_SOC
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index 54f3f6b94efc1..dc6003859e540 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -90,6 +90,15 @@ config ADB_PMU_LED
 	  and the ide-disk LED trigger and configure appropriately through
 	  sysfs.
 
+config ADB_PMU_LED_IDE
+	bool "Use front LED as IDE LED by default"
+	depends on ADB_PMU_LED
+	select LEDS_TRIGGERS
+	select LEDS_TRIGGER_IDE_DISK
+	help
+	  This option makes the front LED default to the IDE trigger
+	  so that it blinks on IDE activity.
+
 config PMAC_SMU
 	bool "Support for SMU  based PowerMacs"
 	depends on PPC_PMAC64
diff --git a/drivers/macintosh/via-pmu-led.c b/drivers/macintosh/via-pmu-led.c
index af8375ed0f5eb..5189d5454b1f2 100644
--- a/drivers/macintosh/via-pmu-led.c
+++ b/drivers/macintosh/via-pmu-led.c
@@ -74,7 +74,7 @@ static void pmu_led_set(struct led_classdev *led_cdev,
 
 static struct led_classdev pmu_led = {
 	.name = "pmu-front-led",
-#ifdef CONFIG_BLK_DEV_IDE_PMAC_BLINK
+#ifdef CONFIG_ADB_PMU_LED_IDE
 	.default_trigger = "ide-disk",
 #endif
 	.brightness_set = pmu_led_set,
-- 
GitLab


From e8c0acf9a4fe3b2b6847541bf5cc3c86c18272ec Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 4 Jul 2006 14:06:29 +1000
Subject: [PATCH 0005/1063] [POWERPC] Workaround Pegasos incorrect ISA "ranges"

The Pegasos firmware doesn't create a valid "ranges" property for the
ISA bridge, thus causing translation of ISA addresses and IO ports to
fail. This fixes it, thus re-enabling proper early serial console to
work on Pegasos.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/prom_init.c | 34 +++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index ebd501a59abda..b6c3ac20c14cb 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2030,6 +2030,39 @@ static void __init fixup_device_tree_maple(void)
 #define fixup_device_tree_maple()
 #endif
 
+#ifdef CONFIG_PPC_CHRP
+/* Pegasos lacks the "ranges" property in the isa node */
+static void __init fixup_device_tree_chrp(void)
+{
+	phandle isa;
+	u32 isa_ranges[6];
+	char *name;
+	int rc;
+
+	name = "/pci@80000000/isa@c";
+	isa = call_prom("finddevice", 1, 1, ADDR(name));
+	if (!PHANDLE_VALID(isa))
+		return;
+
+	rc = prom_getproplen(isa, "ranges");
+	if (rc != 0 && rc != PROM_ERROR)
+		return;
+
+	prom_printf("Fixing up missing ISA range on Pegasos...\n");
+
+	isa_ranges[0] = 0x1;
+	isa_ranges[1] = 0x0;
+	isa_ranges[2] = 0x01006000;
+	isa_ranges[3] = 0x0;
+	isa_ranges[4] = 0x0;
+	isa_ranges[5] = 0x00010000;
+	prom_setprop(isa, name, "ranges",
+			isa_ranges, sizeof(isa_ranges));
+}
+#else
+#define fixup_device_tree_chrp()
+#endif
+
 #if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC)
 static void __init fixup_device_tree_pmac(void)
 {
@@ -2077,6 +2110,7 @@ static void __init fixup_device_tree_pmac(void)
 static void __init fixup_device_tree(void)
 {
 	fixup_device_tree_maple();
+	fixup_device_tree_chrp();
 	fixup_device_tree_pmac();
 }
 
-- 
GitLab


From 470407a88e549135dce5fba7d86fb9910f500e56 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 4 Jul 2006 14:07:42 +1000
Subject: [PATCH 0006/1063] [POWERPC] Fix 32 bits warning in prom_init.c

A warning is hurting my eyes when building 32 bits kernels

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/prom_init.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index b6c3ac20c14cb..462bced40c128 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -557,7 +557,9 @@ unsigned long prom_memparse(const char *ptr, const char **retptr)
 static void __init early_cmdline_parse(void)
 {
 	struct prom_t *_prom = &RELOC(prom);
+#ifdef CONFIG_PPC64
 	const char *opt;
+#endif
 	char *p;
 	int l = 0;
 
-- 
GitLab


From 1e031d65b0cb5f882b20ebc356ea0345ff18dbf0 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 4 Jul 2006 14:09:36 +1000
Subject: [PATCH 0007/1063] [POWERPC] Fix non-MPIC CHRPs with CONFIG_SMP set

Pseudo-CHRP machines like Pegasos without an MPIC would crash at boot if
CONFIG_SMP was set because the "smp_ops" pointer was set to MPIC related
ops unconditionally. This patch makes it NULL on machines that don't
support SMP and provides proper default behaviour in the callers when
smp_ops is NULL.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/smp.c           | 24 +++++++++++++++++-------
 arch/powerpc/platforms/chrp/setup.c | 12 ++++++++----
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 46c56cfd1b2f9..6a9bc9ce54e0a 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -144,13 +144,15 @@ void smp_message_recv(int msg, struct pt_regs *regs)
 
 void smp_send_reschedule(int cpu)
 {
-	smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE);
+	if (likely(smp_ops))
+		smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE);
 }
 
 #ifdef CONFIG_DEBUGGER
 void smp_send_debugger_break(int cpu)
 {
-	smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+	if (likely(smp_ops))
+		smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
 }
 #endif
 
@@ -158,7 +160,7 @@ void smp_send_debugger_break(int cpu)
 void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 {
 	crash_ipi_function_ptr = crash_ipi_callback;
-	if (crash_ipi_callback) {
+	if (crash_ipi_callback && smp_ops) {
 		mb();
 		smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK);
 	}
@@ -220,6 +222,9 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
 	/* Can deadlock when called with interrupts disabled */
 	WARN_ON(irqs_disabled());
 
+	if (unlikely(smp_ops == NULL))
+		return -1;
+
 	data.func = func;
 	data.info = info;
 	atomic_set(&data.started, 0);
@@ -357,7 +362,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	smp_store_cpu_info(boot_cpuid);
 	cpu_callin_map[boot_cpuid] = 1;
 
-	max_cpus = smp_ops->probe();
+	if (smp_ops)
+		max_cpus = smp_ops->probe();
+	else
+		max_cpus = 1;
  
 	smp_space_timers(max_cpus);
 
@@ -453,7 +461,7 @@ void generic_mach_cpu_die(void)
 
 static int __devinit cpu_enable(unsigned int cpu)
 {
-	if (smp_ops->cpu_enable)
+	if (smp_ops && smp_ops->cpu_enable)
 		return smp_ops->cpu_enable(cpu);
 
 	return -ENOSYS;
@@ -467,7 +475,8 @@ int __devinit __cpu_up(unsigned int cpu)
 	if (!cpu_enable(cpu))
 		return 0;
 
-	if (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))
+	if (smp_ops == NULL ||
+	    (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
 		return -EINVAL;
 
 	/* Make sure callin-map entry is 0 (can be leftover a CPU
@@ -568,7 +577,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	old_mask = current->cpus_allowed;
 	set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid));
 	
-	smp_ops->setup_cpu(boot_cpuid);
+	if (smp_ops)
+		smp_ops->setup_cpu(boot_cpuid);
 
 	set_cpus_allowed(current, old_mask);
 
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 538e337d63e24..9c08ff3222900 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -291,10 +291,6 @@ void __init chrp_setup_arch(void)
 
 	pci_create_OF_bus_map();
 
-#ifdef CONFIG_SMP
-	smp_ops = &chrp_smp_ops;
-#endif /* CONFIG_SMP */
-
 	/*
 	 * Print the banner, then scroll down so boot progress
 	 * can be printed.  -- Cort
@@ -479,6 +475,14 @@ void __init chrp_init_IRQ(void)
 	chrp_find_openpic();
 	chrp_find_8259();
 
+#ifdef CONFIG_SMP
+	/* Pegasos has no MPIC, those ops would make it crash. It might be an
+	 * option to move setting them to after we probe the PIC though
+	 */
+	if (chrp_mpic != NULL)
+		smp_ops = &chrp_smp_ops;
+#endif /* CONFIG_SMP */
+
 	if (_chrp_type == _CHRP_Pegasos)
 		ppc_md.get_irq        = i8259_irq;
 
-- 
GitLab


From e70e943847bdae13175bf3a8bca6328e369de90a Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 4 Jul 2006 14:11:23 +1000
Subject: [PATCH 0008/1063] [POWERPC] Fix default clock for udbg_16550

This patch makes it possible to provide 0 as the clock value for
udbg_16550, making it default to the standard 1.8432Mhz clock

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/udbg_16550.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 0835b4841dea4..2d17f2b8eda73 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -81,10 +81,14 @@ static int udbg_550_getc(void)
 void udbg_init_uart(void __iomem *comport, unsigned int speed,
 		    unsigned int clock)
 {
-	unsigned int dll, base_bauds = clock / 16;
+	unsigned int dll, base_bauds;
 
+	if (clock == 0)
+		clock = 1843200;
 	if (speed == 0)
 		speed = 9600;
+
+	base_bauds = clock / 16;
 	dll = base_bauds / speed;
 
 	if (comport) {
-- 
GitLab


From f704b8d1f080ee71b7a9a88bcf585e7dd4272f4b Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 4 Jul 2006 14:14:07 +1000
Subject: [PATCH 0009/1063] [POWERPC] Fix legacy_serial.c error handling on 32
 bits

The code in legacy_serial.c wouldn't properly compare OF translation
results against OF_BAD_ADDR as it's using a phys_addr_t which is 32
bits on some 32-bit powerpc platforms. This fixes it by always using
a u64 which is what is returned by the OF parsing routines. It also
makes translation failure harmless for ISA serial ports.  If they
can't translate, we can't use the UART early, but we can still let the
8250 driver use it later on by using IO port accessors.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/legacy_serial.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 7e98e778b52ff..359ab89748e05 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -112,7 +112,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
 static int __init add_legacy_soc_port(struct device_node *np,
 				      struct device_node *soc_dev)
 {
-	phys_addr_t addr;
+	u64 addr;
 	u32 *addrp;
 	upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ;
 
@@ -143,7 +143,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
 	u32 *reg;
 	char *typep;
 	int index = -1;
-	phys_addr_t taddr;
+	u64 taddr;
 
 	DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
 
@@ -165,10 +165,13 @@ static int __init add_legacy_isa_port(struct device_node *np,
 	if (typep && *typep == 'S')
 		index = simple_strtol(typep+1, NULL, 0) - 1;
 
-	/* Translate ISA address */
+	/* Translate ISA address. If it fails, we still register the port
+	 * with no translated address so that it can be picked up as an IO
+	 * port later by the serial driver
+	 */
 	taddr = of_translate_address(np, reg);
 	if (taddr == OF_BAD_ADDR)
-		return -1;
+		taddr = 0;
 
 	/* Add port, irq will be dealt with later */
 	return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr,
@@ -180,7 +183,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
 static int __init add_legacy_pci_port(struct device_node *np,
 				      struct device_node *pci_dev)
 {
-	phys_addr_t addr, base;
+	u64 addr, base;
 	u32 *addrp;
 	unsigned int flags;
 	int iotype, index = -1, lindex = 0;
-- 
GitLab


From 26c5032eaa64090b2a01973b0c6ea9e7f6a80fa7 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 4 Jul 2006 14:16:28 +1000
Subject: [PATCH 0010/1063] [POWERPC] Add briq support to CHRP

The support for Briq machines has been floating around as patches for
ages. This cleans it up and adds it once for all.

Some of this is based on initial code provided by Karsten Jeppesen
<karsten@jeppesens.com> and mostly rewritten from scratch by me.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/prom_init.c     | 10 +++++--
 arch/powerpc/platforms/chrp/pci.c   | 42 ++++++++++++++++++++++++++---
 arch/powerpc/platforms/chrp/setup.c | 27 ++++++++++++++++++-
 include/asm-powerpc/processor.h     |  1 +
 4 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 462bced40c128..90972ef6c471f 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2033,16 +2033,22 @@ static void __init fixup_device_tree_maple(void)
 #endif
 
 #ifdef CONFIG_PPC_CHRP
-/* Pegasos lacks the "ranges" property in the isa node */
+/* Pegasos and BriQ lacks the "ranges" property in the isa node */
 static void __init fixup_device_tree_chrp(void)
 {
 	phandle isa;
 	u32 isa_ranges[6];
+	u32 rloc = 0x01006000; /* IO space; PCI device = 12 */
 	char *name;
 	int rc;
 
 	name = "/pci@80000000/isa@c";
 	isa = call_prom("finddevice", 1, 1, ADDR(name));
+	if (!PHANDLE_VALID(isa)) {
+		name = "/pci@ff500000/isa@6";
+		isa = call_prom("finddevice", 1, 1, ADDR(name));
+		rloc = 0x01003000; /* IO space; PCI device = 6 */
+	}
 	if (!PHANDLE_VALID(isa))
 		return;
 
@@ -2054,7 +2060,7 @@ static void __init fixup_device_tree_chrp(void)
 
 	isa_ranges[0] = 0x1;
 	isa_ranges[1] = 0x0;
-	isa_ranges[2] = 0x01006000;
+	isa_ranges[2] = rloc;
 	isa_ranges[3] = 0x0;
 	isa_ranges[4] = 0x0;
 	isa_ranges[5] = 0x00010000;
diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
index 6802cdc3168a1..6d7ac649b45e8 100644
--- a/arch/powerpc/platforms/chrp/pci.c
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -257,7 +257,7 @@ chrp_find_bridges(void)
 		else
 			printk(KERN_INFO "PCI buses %d..%d",
 			       bus_range[0], bus_range[1]);
-		printk(" controlled by %s", dev->type);
+		printk(" controlled by %s", dev->full_name);
 		if (!is_longtrail)
 			printk(" at %llx", (unsigned long long)r.start);
 		printk("\n");
@@ -289,6 +289,19 @@ chrp_find_bridges(void)
 			setup_indirect_pci(hose, 0xfec00cf8, 0xfee00cfc);
 		} else if (is_pegasos == 2) {
 			setup_peg2(hose, dev);
+		} else if (!strncmp(model, "IBM,CPC710", 10)) {
+			setup_indirect_pci(hose,
+					   r.start + 0x000f8000,
+					   r.start + 0x000f8010);
+			if (index == 0) {
+				dma = get_property(dev, "system-dma-base",&len);
+				if (dma && len >= sizeof(*dma)) {
+					dma = (unsigned int *)
+						(((unsigned long)dma) +
+						len - sizeof(*dma));
+						pci_dram_offset = *dma;
+				}
+			}
 		} else {
 			printk("No methods for %s (model %s), using RTAS\n",
 			       dev->full_name, model);
@@ -306,8 +319,29 @@ chrp_find_bridges(void)
 			printk("pci_dram_offset = %lx\n", pci_dram_offset);
 		}
 	}
+}
+
+/* SL82C105 IDE Control/Status Register */
+#define SL82C105_IDECSR                0x40
+
+/* Fixup for Winbond ATA quirk, required for briq */
+void chrp_pci_fixup_winbond_ata(struct pci_dev *sl82c105)
+{
+	u8 progif;
 
-	/* Do not fixup interrupts from OF tree on pegasos */
-	if (is_pegasos)
-		ppc_md.pcibios_fixup = NULL;
+	/* If non-briq machines need that fixup too, please speak up */
+	if (!machine_is(chrp) || _chrp_type != _CHRP_briq)
+		return;
+
+	if ((sl82c105->class & 5) != 5) {
+		printk("W83C553: Switching SL82C105 IDE to PCI native mode\n");
+		/* Enable SL82C105 PCI native IDE mode */
+		pci_read_config_byte(sl82c105, PCI_CLASS_PROG, &progif);
+		pci_write_config_byte(sl82c105, PCI_CLASS_PROG, progif | 0x05);
+		sl82c105->class |= 0x05;
+		/* Disable SL82C105 second port */
+		pci_write_config_word(sl82c105, SL82C105_IDECSR, 0x0003);
+	}
 }
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
+		chrp_pci_fixup_winbond_ata);
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 9c08ff3222900..be39742db809b 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -74,6 +74,9 @@ extern irqreturn_t xmon_irq(int, void *, struct pt_regs *);
 
 extern unsigned long loops_per_jiffy;
 
+/* To be replaced by RTAS when available */
+static unsigned int *briq_SPOR;
+
 #ifdef CONFIG_SMP
 extern struct smp_ops_t chrp_smp_ops;
 #endif
@@ -92,6 +95,15 @@ static const char *gg2_cachemodes[4] = {
 	"Disabled", "Write-Through", "Copy-Back", "Transparent Mode"
 };
 
+static const char *chrp_names[] = {
+	"Unknown",
+	"","","",
+	"Motorola",
+	"IBM or Longtrail",
+	"Genesi Pegasos",
+	"Total Impact Briq"
+};
+
 void chrp_show_cpuinfo(struct seq_file *m)
 {
 	int i, sdramen;
@@ -229,6 +241,14 @@ static void __init pegasos_set_l2cr(void)
 	}
 }
 
+static void briq_restart(char *cmd)
+{
+	local_irq_disable();
+	if (briq_SPOR)
+		out_be32(briq_SPOR, 0);
+	for(;;);
+}
+
 void __init chrp_setup_arch(void)
 {
 	struct device_node *root = find_path_device ("/");
@@ -245,11 +265,16 @@ void __init chrp_setup_arch(void)
 		_chrp_type = _CHRP_IBM;
 	} else if (machine && strncmp(machine, "MOT", 3) == 0) {
 		_chrp_type = _CHRP_Motorola;
+	} else if (machine && strncmp(machine, "TotalImpact,BRIQ-1", 18) == 0) {
+		_chrp_type = _CHRP_briq;
+		/* Map the SPOR register on briq and change the restart hook */
+		briq_SPOR = (unsigned int *)ioremap(0xff0000e8, 4);
+		ppc_md.restart = briq_restart;
 	} else {
 		/* Let's assume it is an IBM chrp if all else fails */
 		_chrp_type = _CHRP_IBM;
 	}
-	printk("chrp type = %x\n", _chrp_type);
+	printk("chrp type = %x [%s]\n", _chrp_type, chrp_names[_chrp_type]);
 
 	rtas_initialize();
 	if (rtas_token("display-character") >= 0)
diff --git a/include/asm-powerpc/processor.h b/include/asm-powerpc/processor.h
index 22e54a2a6604b..6cb6fb19e57f2 100644
--- a/include/asm-powerpc/processor.h
+++ b/include/asm-powerpc/processor.h
@@ -32,6 +32,7 @@
 #define _CHRP_Motorola	0x04	/* motorola chrp, the cobra */
 #define _CHRP_IBM	0x05	/* IBM chrp, the longtrail and longtrail 2 */
 #define _CHRP_Pegasos	0x06	/* Genesi/bplan's Pegasos and Pegasos2 */
+#define _CHRP_briq	0x07	/* TotalImpact's briQ */
 
 #if defined(__KERNEL__) && defined(CONFIG_PPC32)
 
-- 
GitLab


From a45b83957deabbdac9a3d908c6ca4c25f05ce1ad Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 4 Jul 2006 15:06:20 +1000
Subject: [PATCH 0011/1063] [POWERPC] Add support for briq front panel

This adds the driver for the Briq front panel. This is a cleaned up
version of a driver that has been floating around for some time now,
initially written by Karsten Jeppesen <karsten@jeppesens.com> and
cleaned up by jk and myself.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/char/Kconfig      |  14 ++
 drivers/char/Makefile     |   1 +
 drivers/char/briq_panel.c | 268 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 283 insertions(+)
 create mode 100644 drivers/char/briq_panel.c

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index c40e487d9f5cd..11de59ff4229d 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -495,6 +495,20 @@ config LEGACY_PTY_COUNT
 	  When not in use, each legacy PTY occupies 12 bytes on 32-bit
 	  architectures and 24 bytes on 64-bit architectures.
 
+config BRIQ_PANEL
+	tristate 'Total Impact briQ front panel driver'
+	---help---
+	  The briQ is a small footprint CHRP computer with a frontpanel VFD, a
+	  tristate led and two switches. It is the size of a CDROM drive.
+
+	  If you have such one and want anything showing on the VFD then you
+	  must answer Y here.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called briq_panel.
+
+	  It's safe to say N here.
+
 config PRINTER
 	tristate "Parallel printer support"
 	depends on PARPORT
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 6e0f4469d8bbd..7a7ee57212797 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_VIOCONS)		+= viocons.o
 obj-$(CONFIG_VIOTAPE)		+= viotape.o
 obj-$(CONFIG_HVCS)		+= hvcs.o
 obj-$(CONFIG_SGI_MBCS)		+= mbcs.o
+obj-$(CONFIG_BRIQ_PANEL)	+= briq_panel.o
 
 obj-$(CONFIG_PRINTER)		+= lp.o
 obj-$(CONFIG_TIPAR)		+= tipar.o
diff --git a/drivers/char/briq_panel.c b/drivers/char/briq_panel.c
new file mode 100644
index 0000000000000..a0e5eac5f33aa
--- /dev/null
+++ b/drivers/char/briq_panel.c
@@ -0,0 +1,268 @@
+/*
+ * Drivers for the Total Impact PPC based computer "BRIQ"
+ * by Dr. Karsten Jeppesen
+ *
+ */
+
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/timer.h>
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/wait.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+
+#define		BRIQ_PANEL_MINOR	156
+#define		BRIQ_PANEL_VFD_IOPORT	0x0390
+#define		BRIQ_PANEL_LED_IOPORT	0x0398
+#define		BRIQ_PANEL_VER		"1.1 (04/20/2002)"
+#define		BRIQ_PANEL_MSG0		"Loading Linux"
+
+static int		vfd_is_open;
+static unsigned char	vfd[40];
+static int		vfd_cursor;
+static unsigned char	ledpb, led;
+
+static void update_vfd(void)
+{
+	int	i;
+
+	/* cursor home */
+	outb(0x02, BRIQ_PANEL_VFD_IOPORT);
+	for (i=0; i<20; i++)
+		outb(vfd[i], BRIQ_PANEL_VFD_IOPORT + 1);
+
+	/* cursor to next line */
+	outb(0xc0, BRIQ_PANEL_VFD_IOPORT);
+	for (i=20; i<40; i++)
+		outb(vfd[i], BRIQ_PANEL_VFD_IOPORT + 1);
+
+}
+
+static void set_led(char state)
+{
+	if (state == 'R')
+		led = 0x01;
+	else if (state == 'G')
+		led = 0x02;
+	else if (state == 'Y')
+		led = 0x03;
+	else if (state == 'X')
+		led = 0x00;
+	outb(led, BRIQ_PANEL_LED_IOPORT);
+}
+
+static int briq_panel_open(struct inode *ino, struct file *filep)
+{
+	/* enforce single access */
+	if (vfd_is_open)
+		return -EBUSY;
+	vfd_is_open = 1;
+
+	return 0;
+}
+
+static int briq_panel_release(struct inode *ino, struct file *filep)
+{
+	if (!vfd_is_open)
+		return -ENODEV;
+
+	vfd_is_open = 0;
+
+	return 0;
+}
+
+static ssize_t briq_panel_read(struct file *file, char *buf, size_t count,
+			 loff_t *ppos)
+{
+	unsigned short c;
+	unsigned char cp;
+
+#if 0	/*  Can't seek (pread) on this device  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+#endif
+
+	if (!vfd_is_open)
+		return -ENODEV;
+
+	c = (inb(BRIQ_PANEL_LED_IOPORT) & 0x000c) | (ledpb & 0x0003);
+	set_led(' ');
+	/* upper button released */
+	if ((!(ledpb & 0x0004)) && (c & 0x0004)) {
+		cp = ' ';
+		ledpb = c;
+		if (copy_to_user(buf, &cp, 1))
+			return -EFAULT;
+		return 1;
+	}
+	/* lower button released */
+	else if ((!(ledpb & 0x0008)) && (c & 0x0008)) {
+		cp = '\r';
+		ledpb = c;
+		if (copy_to_user(buf, &cp, 1))
+			return -EFAULT;
+		return 1;
+	} else {
+		ledpb = c;
+		return 0;
+	}
+}
+
+static void scroll_vfd( void )
+{
+	int	i;
+
+	for (i=0; i<20; i++) {
+		vfd[i] = vfd[i+20];
+		vfd[i+20] = ' ';
+	}
+	vfd_cursor = 20;
+}
+
+static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len,
+			  loff_t *ppos)
+{
+	size_t indx = len;
+	int i, esc = 0;
+
+#if 0	/*  Can't seek (pwrite) on this device  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+#endif
+
+	if (!vfd_is_open)
+		return -EBUSY;
+
+	for (;;) {
+		if (!indx)
+			break;
+		if (esc) {
+			set_led(*buf);
+			esc = 0;
+		} else if (*buf == 27) {
+			esc = 1;
+		} else if (*buf == 12) {
+			/* do a form feed */
+			for (i=0; i<40; i++)
+				vfd[i] = ' ';
+			vfd_cursor = 0;
+		} else if (*buf == 10) {
+			if (vfd_cursor < 20)
+				vfd_cursor = 20;
+			else if (vfd_cursor < 40)
+				vfd_cursor = 40;
+			else if (vfd_cursor < 60)
+				vfd_cursor = 60;
+			if (vfd_cursor > 59)
+				scroll_vfd();
+		} else {
+			/* just a character */
+			if (vfd_cursor > 39)
+				scroll_vfd();
+			vfd[vfd_cursor++] = *buf;
+		}
+		indx--;
+		buf++;
+	}
+	update_vfd();
+
+	return len;
+}
+
+static struct file_operations briq_panel_fops = {
+	.owner		= THIS_MODULE,
+	.read		= briq_panel_read,
+	.write		= briq_panel_write,
+	.open		= briq_panel_open,
+	.release	= briq_panel_release,
+};
+
+static struct miscdevice briq_panel_miscdev = {
+	BRIQ_PANEL_MINOR,
+	"briq_panel",
+	&briq_panel_fops
+};
+
+static int __init briq_panel_init(void)
+{
+	struct device_node *root = find_path_device("/");
+	char *machine;
+	int i;
+
+	machine = get_property(root, "model", NULL);
+	if (!machine || strncmp(machine, "TotalImpact,BRIQ-1", 18) != 0)
+		return -ENODEV;
+
+	printk(KERN_INFO
+		"briq_panel: v%s Dr. Karsten Jeppesen (kj@totalimpact.com)\n",
+		BRIQ_PANEL_VER);
+
+	if (!request_region(BRIQ_PANEL_VFD_IOPORT, 4, "BRIQ Front Panel"))
+		return -EBUSY;
+
+	if (!request_region(BRIQ_PANEL_LED_IOPORT, 2, "BRIQ Front Panel")) {
+		release_region(BRIQ_PANEL_VFD_IOPORT, 4);
+		return -EBUSY;
+	}
+	ledpb = inb(BRIQ_PANEL_LED_IOPORT) & 0x000c;
+
+	if (misc_register(&briq_panel_miscdev) < 0) {
+		release_region(BRIQ_PANEL_VFD_IOPORT, 4);
+		release_region(BRIQ_PANEL_LED_IOPORT, 2);
+		return -EBUSY;
+	}
+
+	outb(0x38, BRIQ_PANEL_VFD_IOPORT);	/* Function set */
+	outb(0x01, BRIQ_PANEL_VFD_IOPORT);	/* Clear display */
+	outb(0x0c, BRIQ_PANEL_VFD_IOPORT);	/* Display on */
+	outb(0x06, BRIQ_PANEL_VFD_IOPORT);	/* Entry normal */
+	for (i=0; i<40; i++)
+		vfd[i]=' ';
+#ifndef MODULE
+	vfd[0] = 'L';
+	vfd[1] = 'o';
+	vfd[2] = 'a';
+	vfd[3] = 'd';
+	vfd[4] = 'i';
+	vfd[5] = 'n';
+	vfd[6] = 'g';
+	vfd[7] = ' ';
+	vfd[8] = '.';
+	vfd[9] = '.';
+	vfd[10] = '.';
+#endif /* !MODULE */
+
+	update_vfd();
+
+	return 0;
+}
+
+static void __exit briq_panel_exit(void)
+{
+	misc_deregister(&briq_panel_miscdev);
+	release_region(BRIQ_PANEL_VFD_IOPORT, 4);
+	release_region(BRIQ_PANEL_LED_IOPORT, 2);
+}
+
+module_init(briq_panel_init);
+module_exit(briq_panel_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Karsten Jeppesen <karsten@jeppesens.com>");
+MODULE_DESCRIPTION("Driver for the Total Impact briQ front panel");
-- 
GitLab


From 73ea6959b11821ba5ade77fb1d3d4aed52be3b67 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 4 Jul 2006 17:07:18 +1000
Subject: [PATCH 0012/1063] [POWERPC] More offb/bootx fixes

There were still some issues with offb when BootX doesn't provide a
proper display node, this fixes them.  This also re-instates the
palette hacks that were disabled a couple of kernel versions ago when
I converted to the new OF parsing, and shuffles some functions around
to avoid prototypes.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/powermac/bootx_init.c |  35 ++-
 drivers/video/offb.c                         | 307 ++++++++++---------
 2 files changed, 185 insertions(+), 157 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 871b002c9f90d..6a026c733f6a0 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -181,13 +181,18 @@ static void __init bootx_add_chosen_props(unsigned long base,
 }
 
 static void __init bootx_add_display_props(unsigned long base,
-					   unsigned long *mem_end)
+					   unsigned long *mem_end,
+					   int has_real_node)
 {
 	boot_infos_t *bi = bootx_info;
 	u32 tmp;
 
-	bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end);
-	bootx_dt_add_prop("linux,opened", NULL, 0, mem_end);
+	if (has_real_node) {
+		bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end);
+		bootx_dt_add_prop("linux,opened", NULL, 0, mem_end);
+	} else
+		bootx_dt_add_prop("linux,bootx-noscreen", NULL, 0, mem_end);
+
 	tmp = bi->dispDeviceDepth;
 	bootx_dt_add_prop("linux,bootx-depth", &tmp, 4, mem_end);
 	tmp = bi->dispDeviceRect[2] - bi->dispDeviceRect[0];
@@ -241,11 +246,6 @@ static void __init bootx_scan_dt_build_strings(unsigned long base,
 		DBG(" detected display ! adding properties names !\n");
 		bootx_dt_add_string("linux,boot-display", mem_end);
 		bootx_dt_add_string("linux,opened", mem_end);
-		bootx_dt_add_string("linux,bootx-depth", mem_end);
-		bootx_dt_add_string("linux,bootx-width", mem_end);
-		bootx_dt_add_string("linux,bootx-height", mem_end);
-		bootx_dt_add_string("linux,bootx-linebytes", mem_end);
-		bootx_dt_add_string("linux,bootx-addr", mem_end);
 		strncpy(bootx_disp_path, namep, 255);
 	}
 
@@ -329,10 +329,13 @@ static void __init bootx_scan_dt_build_struct(unsigned long base,
 		ppp = &pp->next;
 	}
 
-	if (node == bootx_node_chosen)
+	if (node == bootx_node_chosen) {
 		bootx_add_chosen_props(base, mem_end);
-	if (node == bootx_info->dispDeviceRegEntryOffset)
-		bootx_add_display_props(base, mem_end);
+		if (bootx_info->dispDeviceRegEntryOffset == 0)
+			bootx_add_display_props(base, mem_end, 0);
+	}
+	else if (node == bootx_info->dispDeviceRegEntryOffset)
+		bootx_add_display_props(base, mem_end, 1);
 
 	/* do all our children */
 	cpp = &np->child;
@@ -374,6 +377,14 @@ static unsigned long __init bootx_flatten_dt(unsigned long start)
 	mem_end += 4;
 	bootx_dt_strend = mem_end;
 	bootx_scan_dt_build_strings(base, 4, &mem_end);
+	/* Add some strings */
+	bootx_dt_add_string("linux,bootx-noscreen", &mem_end);
+	bootx_dt_add_string("linux,bootx-depth", &mem_end);
+	bootx_dt_add_string("linux,bootx-width", &mem_end);
+	bootx_dt_add_string("linux,bootx-height", &mem_end);
+	bootx_dt_add_string("linux,bootx-linebytes", &mem_end);
+	bootx_dt_add_string("linux,bootx-addr", &mem_end);
+	/* Wrap up strings */
 	hdr->off_dt_strings = bootx_dt_strbase - mem_start;
 	hdr->dt_strings_size = bootx_dt_strend - bootx_dt_strbase;
 
@@ -471,6 +482,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 	if (bi->dispDeviceDepth == 16)
 		bi->dispDeviceDepth = 15;
 
+
 #ifdef CONFIG_BOOTX_TEXT
 	ptr = (unsigned long)bi->logicalDisplayBase;
 	ptr += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes;
@@ -508,6 +520,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 #ifdef CONFIG_BOOTX_TEXT
 	btext_welcome(bi);
 #endif
+
 	/* New BootX enters kernel with MMU off, i/os are not allowed
 	 * here. This hack will have been done by the boostrap anyway.
 	 */
diff --git a/drivers/video/offb.c b/drivers/video/offb.c
index 71ce1fa45cf4d..faba672285260 100644
--- a/drivers/video/offb.c
+++ b/drivers/video/offb.c
@@ -63,8 +63,6 @@ struct offb_par default_par;
      *  Interface used by the world
      */
 
-int offb_init(void);
-
 static int offb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 			  u_int transp, struct fb_info *info);
 static int offb_blank(int blank, struct fb_info *info);
@@ -73,11 +71,6 @@ static int offb_blank(int blank, struct fb_info *info);
 extern boot_infos_t *boot_infos;
 #endif
 
-static void offb_init_nodriver(struct device_node *);
-static void offb_init_fb(const char *name, const char *full_name,
-			 int width, int height, int depth, int pitch,
-			 unsigned long address, struct device_node *dp);
-
 static struct fb_ops offb_ops = {
 	.owner		= THIS_MODULE,
 	.fb_setcolreg	= offb_setcolreg,
@@ -230,123 +223,17 @@ static int offb_blank(int blank, struct fb_info *info)
 	return 0;
 }
 
-    /*
-     *  Initialisation
-     */
 
-int __init offb_init(void)
+static void __iomem *offb_map_reg(struct device_node *np, int index,
+				  unsigned long offset, unsigned long size)
 {
-	struct device_node *dp = NULL, *boot_disp = NULL;
-
-	if (fb_get_options("offb", NULL))
-		return -ENODEV;
+	struct resource r;
 
-	for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) {
-		if (get_property(dp, "linux,opened", NULL) &&
-		    get_property(dp, "linux,boot-display", NULL)) {
-			boot_disp = dp;
-			offb_init_nodriver(dp);
-		}
-	}
-	for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) {
-		if (get_property(dp, "linux,opened", NULL) &&
-		    dp != boot_disp)
-			offb_init_nodriver(dp);
-	}
-
-	return 0;
-}
-
-
-static void __init offb_init_nodriver(struct device_node *dp)
-{
-	unsigned int len;
-	int i, width = 640, height = 480, depth = 8, pitch = 640;
-	unsigned int flags, rsize, addr_prop = 0;
-	unsigned long max_size = 0;
-	u64 rstart, address = OF_BAD_ADDR;
-	u32 *pp, *addrp, *up;
-	u64 asize;
-
-	pp = (u32 *)get_property(dp, "linux,bootx-depth", &len);
-	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "depth", &len);
-	if (pp && len == sizeof(u32))
-		depth = *pp;
-
-	pp = (u32 *)get_property(dp, "linux,bootx-width", &len);
-	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "width", &len);
-	if (pp && len == sizeof(u32))
-		width = *pp;
-
-	pp = (u32 *)get_property(dp, "linux,bootx-height", &len);
-	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "height", &len);
-	if (pp && len == sizeof(u32))
-		height = *pp;
-
-	pp = (u32 *)get_property(dp, "linux,bootx-linebytes", &len);
-	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "linebytes", &len);
-	if (pp && len == sizeof(u32))
-		pitch = *pp;
-	else
-		pitch = width * ((depth + 7) / 8);
-
-	rsize = (unsigned long)pitch * (unsigned long)height;
-
-	/* Ok, now we try to figure out the address of the framebuffer.
-	 *
-	 * Unfortunately, Open Firmware doesn't provide a standard way to do
-	 * so. All we can do is a dodgy heuristic that happens to work in
-	 * practice. On most machines, the "address" property contains what
-	 * we need, though not on Matrox cards found in IBM machines. What I've
-	 * found that appears to give good results is to go through the PCI
-	 * ranges and pick one that is both big enough and if possible encloses
-	 * the "address" property. If none match, we pick the biggest
-	 */
-	up = (u32 *)get_property(dp, "linux,bootx-addr", &len);
-	if (up == NULL)
-		up = (u32 *)get_property(dp, "address", &len);
-	if (up && len == sizeof(u32))
-		addr_prop = *up;
-
-	for (i = 0; (addrp = of_get_address(dp, i, &asize, &flags))
-		     != NULL; i++) {
-		int match_addrp = 0;
-
-		if (!(flags & IORESOURCE_MEM))
-			continue;
-		if (asize < rsize)
-			continue;
-		rstart = of_translate_address(dp, addrp);
-		if (rstart == OF_BAD_ADDR)
-			continue;
-		if (addr_prop && (rstart <= addr_prop) &&
-		    ((rstart + asize) >= (addr_prop + rsize)))
-			match_addrp = 1;
-		if (match_addrp) {
-			address = addr_prop;
-			break;
-		}
-		if (rsize > max_size) {
-			max_size = rsize;
-			address = OF_BAD_ADDR;
- 		}
-
-		if (address == OF_BAD_ADDR)
-			address = rstart;
-	}
-	if (address == OF_BAD_ADDR && addr_prop)
-		address = (u64)addr_prop;
-	if (address != OF_BAD_ADDR) {
-		/* kludge for valkyrie */
-		if (strcmp(dp->name, "valkyrie") == 0)
-			address += 0x1000;
-		offb_init_fb(dp->name, dp->full_name, width, height, depth,
-			     pitch, address, dp);
-	}
+	if (of_address_to_resource(np, index, &r))
+		return 0;
+	if ((r.start + offset + size) > r.end)
+		return 0;
+	return ioremap(r.start + offset, size);
 }
 
 static void __init offb_init_fb(const char *name, const char *full_name,
@@ -403,45 +290,39 @@ static void __init offb_init_fb(const char *name, const char *full_name,
 
 	par->cmap_type = cmap_unknown;
 	if (depth == 8) {
-
 		/* Palette hacks disabled for now */
-#if 0
 		if (dp && !strncmp(name, "ATY,Rage128", 11)) {
-			unsigned long regbase = dp->addrs[2].address;
-			par->cmap_adr = ioremap(regbase, 0x1FFF);
-			par->cmap_type = cmap_r128;
+			par->cmap_adr = offb_map_reg(dp, 2, 0, 0x1fff);
+			if (par->cmap_adr)
+				par->cmap_type = cmap_r128;
 		} else if (dp && (!strncmp(name, "ATY,RageM3pA", 12)
 				  || !strncmp(name, "ATY,RageM3p12A", 14))) {
-			unsigned long regbase =
-			    dp->parent->addrs[2].address;
-			par->cmap_adr = ioremap(regbase, 0x1FFF);
-			par->cmap_type = cmap_M3A;
+			par->cmap_adr = offb_map_reg(dp, 2, 0, 0x1fff);
+			if (par->cmap_adr)
+				par->cmap_type = cmap_M3A;
 		} else if (dp && !strncmp(name, "ATY,RageM3pB", 12)) {
-			unsigned long regbase =
-			    dp->parent->addrs[2].address;
-			par->cmap_adr = ioremap(regbase, 0x1FFF);
-			par->cmap_type = cmap_M3B;
+			par->cmap_adr = offb_map_reg(dp, 2, 0, 0x1fff);
+			if (par->cmap_adr)
+				par->cmap_type = cmap_M3B;
 		} else if (dp && !strncmp(name, "ATY,Rage6", 9)) {
-			unsigned long regbase = dp->addrs[1].address;
-			par->cmap_adr = ioremap(regbase, 0x1FFF);
-			par->cmap_type = cmap_radeon;
+			par->cmap_adr = offb_map_reg(dp, 1, 0, 0x1fff);
+			if (par->cmap_adr)
+				par->cmap_type = cmap_radeon;
 		} else if (!strncmp(name, "ATY,", 4)) {
 			unsigned long base = address & 0xff000000UL;
 			par->cmap_adr =
 			    ioremap(base + 0x7ff000, 0x1000) + 0xcc0;
 			par->cmap_data = par->cmap_adr + 1;
 			par->cmap_type = cmap_m64;
-		} else if (device_is_compatible(dp, "pci1014,b7")) {
-			unsigned long regbase = dp->addrs[0].address;
-			par->cmap_adr = ioremap(regbase + 0x6000, 0x1000);
-			par->cmap_type = cmap_gxt2000;
+		} else if (dp && device_is_compatible(dp, "pci1014,b7")) {
+			par->cmap_adr = offb_map_reg(dp, 0, 0x6000, 0x1000);
+			if (par->cmap_adr)
+				par->cmap_type = cmap_gxt2000;
 		}
-#endif
-		fix->visual = par->cmap_adr ? FB_VISUAL_PSEUDOCOLOR
-		    : FB_VISUAL_STATIC_PSEUDOCOLOR;
+		fix->visual = (par->cmap_type != cmap_unknown) ?
+			FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_STATIC_PSEUDOCOLOR;
 	} else
-		fix->visual =	/* par->cmap_adr ? FB_VISUAL_DIRECTCOLOR
-				   : */ FB_VISUAL_TRUECOLOR;
+		fix->visual = FB_VISUAL_TRUECOLOR;
 
 	var->xoffset = var->yoffset = 0;
 	switch (depth) {
@@ -521,5 +402,139 @@ static void __init offb_init_fb(const char *name, const char *full_name,
 	       info->node, full_name);
 }
 
+
+static void __init offb_init_nodriver(struct device_node *dp, int no_real_node)
+{
+	unsigned int len;
+	int i, width = 640, height = 480, depth = 8, pitch = 640;
+	unsigned int flags, rsize, addr_prop = 0;
+	unsigned long max_size = 0;
+	u64 rstart, address = OF_BAD_ADDR;
+	u32 *pp, *addrp, *up;
+	u64 asize;
+
+	pp = (u32 *)get_property(dp, "linux,bootx-depth", &len);
+	if (pp == NULL)
+		pp = (u32 *)get_property(dp, "depth", &len);
+	if (pp && len == sizeof(u32))
+		depth = *pp;
+
+	pp = (u32 *)get_property(dp, "linux,bootx-width", &len);
+	if (pp == NULL)
+		pp = (u32 *)get_property(dp, "width", &len);
+	if (pp && len == sizeof(u32))
+		width = *pp;
+
+	pp = (u32 *)get_property(dp, "linux,bootx-height", &len);
+	if (pp == NULL)
+		pp = (u32 *)get_property(dp, "height", &len);
+	if (pp && len == sizeof(u32))
+		height = *pp;
+
+	pp = (u32 *)get_property(dp, "linux,bootx-linebytes", &len);
+	if (pp == NULL)
+		pp = (u32 *)get_property(dp, "linebytes", &len);
+	if (pp && len == sizeof(u32))
+		pitch = *pp;
+	else
+		pitch = width * ((depth + 7) / 8);
+
+	rsize = (unsigned long)pitch * (unsigned long)height;
+
+	/* Ok, now we try to figure out the address of the framebuffer.
+	 *
+	 * Unfortunately, Open Firmware doesn't provide a standard way to do
+	 * so. All we can do is a dodgy heuristic that happens to work in
+	 * practice. On most machines, the "address" property contains what
+	 * we need, though not on Matrox cards found in IBM machines. What I've
+	 * found that appears to give good results is to go through the PCI
+	 * ranges and pick one that is both big enough and if possible encloses
+	 * the "address" property. If none match, we pick the biggest
+	 */
+	up = (u32 *)get_property(dp, "linux,bootx-addr", &len);
+	if (up == NULL)
+		up = (u32 *)get_property(dp, "address", &len);
+	if (up && len == sizeof(u32))
+		addr_prop = *up;
+
+	/* Hack for when BootX is passing us */
+	if (no_real_node)
+		goto skip_addr;
+
+	for (i = 0; (addrp = of_get_address(dp, i, &asize, &flags))
+		     != NULL; i++) {
+		int match_addrp = 0;
+
+		if (!(flags & IORESOURCE_MEM))
+			continue;
+		if (asize < rsize)
+			continue;
+		rstart = of_translate_address(dp, addrp);
+		if (rstart == OF_BAD_ADDR)
+			continue;
+		if (addr_prop && (rstart <= addr_prop) &&
+		    ((rstart + asize) >= (addr_prop + rsize)))
+			match_addrp = 1;
+		if (match_addrp) {
+			address = addr_prop;
+			break;
+		}
+		if (rsize > max_size) {
+			max_size = rsize;
+			address = OF_BAD_ADDR;
+ 		}
+
+		if (address == OF_BAD_ADDR)
+			address = rstart;
+	}
+ skip_addr:
+	if (address == OF_BAD_ADDR && addr_prop)
+		address = (u64)addr_prop;
+	if (address != OF_BAD_ADDR) {
+		/* kludge for valkyrie */
+		if (strcmp(dp->name, "valkyrie") == 0)
+			address += 0x1000;
+		offb_init_fb(no_real_node ? "bootx" : dp->name,
+			     no_real_node ? "display" : dp->full_name,
+			     width, height, depth, pitch, address,
+			     no_real_node ? dp : NULL);
+	}
+}
+
+static int __init offb_init(void)
+{
+	struct device_node *dp = NULL, *boot_disp = NULL;
+
+	if (fb_get_options("offb", NULL))
+		return -ENODEV;
+
+	/* Check if we have a MacOS display without a node spec */
+	if (get_property(of_chosen, "linux,bootx-noscreen", NULL) != NULL) {
+		/* The old code tried to work out which node was the MacOS
+		 * display based on the address. I'm dropping that since the
+		 * lack of a node spec only happens with old BootX versions
+		 * (users can update) and with this code, they'll still get
+		 * a display (just not the palette hacks).
+		 */
+		offb_init_nodriver(of_chosen, 1);
+	}
+
+	for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) {
+		if (get_property(dp, "linux,opened", NULL) &&
+		    get_property(dp, "linux,boot-display", NULL)) {
+			boot_disp = dp;
+			offb_init_nodriver(dp, 0);
+		}
+	}
+	for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) {
+		if (get_property(dp, "linux,opened", NULL) &&
+		    dp != boot_disp)
+			offb_init_nodriver(dp, 0);
+	}
+
+	return 0;
+}
+
+
 module_init(offb_init);
 MODULE_LICENSE("GPL");
-- 
GitLab


From e7c1f69d4fa4da47dc995b5de64b6cb76ae32081 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 4 Jul 2006 17:13:23 +1000
Subject: [PATCH 0013/1063] [POWERPC] Fix mem= handling when the memory limit
 is > RMO size

There's a bug in my cleaned up mem= handling, if the memory limit is
larger than the RMO size we'll erroneously enlarge the RMO size.

Fix is to only change the RMO size if the memory limit is less than
the current RMO value.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/lmb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c
index 4b17a7359924e..716a2906a24da 100644
--- a/arch/powerpc/mm/lmb.c
+++ b/arch/powerpc/mm/lmb.c
@@ -320,7 +320,8 @@ void __init lmb_enforce_memory_limit(unsigned long memory_limit)
 		break;
 	}
 
-	lmb.rmo_size = lmb.memory.region[0].size;
+	if (lmb.memory.region[0].size < lmb.rmo_size)
+		lmb.rmo_size = lmb.memory.region[0].size;
 
 	/* And truncate any reserves above the limit also. */
 	for (i = 0; i < lmb.reserved.cnt; i++) {
-- 
GitLab


From 861fa7737db889ae1701ba58c083d4a7bd8705d3 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 6 Jul 2006 18:03:06 +1000
Subject: [PATCH 0014/1063] [POWERPC] Xserve G5 thermal control fixes

The thermal control for the Xserve G5s had a few issues. For one, the
way to program the RPM fans speeds into the FCU is different between it
and the desktop models, which I didn't figure out until recently, and it
was missing a control loop for the slots fan, running it too fast.  Both
of those problems were causing the machine to be much more noisy than
necessary.  This patch also changes the fixed value of the slots fan for
desktop G5s to 40% instead of 50%.  It seems to still have a pretty good
airflow that way and is much less noisy.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/macintosh/therm_pm72.c | 218 ++++++++++++++++++++++++++++++---
 drivers/macintosh/therm_pm72.h |  33 ++++-
 2 files changed, 234 insertions(+), 17 deletions(-)

diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index c1fe0b368f762..20bf67244e2c1 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -95,6 +95,17 @@
  *	- Use min/max macros here or there
  *	- Latest darwin updated U3H min fan speed to 20% PWM
  *
+ *  July. 06, 2006 : 1.3
+ *	- Fix setting of RPM fans on Xserve G5 (they were going too fast)
+ *      - Add missing slots fan control loop for Xserve G5
+ *	- Lower fixed slots fan speed from 50% to 40% on desktop G5s. We
+ *        still can't properly implement the control loop for these, so let's
+ *        reduce the noise a little bit, it appears that 40% still gives us
+ *        a pretty good air flow
+ *	- Add code to "tickle" the FCU regulary so it doesn't think that
+ *        we are gone while in fact, the machine just didn't need any fan
+ *        speed change lately
+ *
  */
 
 #include <linux/types.h>
@@ -121,7 +132,7 @@
 
 #include "therm_pm72.h"
 
-#define VERSION "1.2b2"
+#define VERSION "1.3"
 
 #undef DEBUG
 
@@ -146,6 +157,7 @@ static struct basckside_pid_params	backside_params;
 static struct backside_pid_state	backside_state;
 static struct drives_pid_state		drives_state;
 static struct dimm_pid_state		dimms_state;
+static struct slots_pid_state		slots_state;
 static int				state;
 static int				cpu_count;
 static int				cpu_pid_type;
@@ -154,7 +166,8 @@ static struct completion		ctrl_complete;
 static int				critical_state;
 static int				rackmac;
 static s32				dimm_output_clamp;
-
+static int 				fcu_rpm_shift;
+static int				fcu_tickle_ticks;
 static DECLARE_MUTEX(driver_lock);
 
 /*
@@ -495,13 +508,20 @@ static int start_fcu(void)
 	rc = fan_write_reg(0x2e, &buf, 1);
 	if (rc < 0)
 		return -EIO;
+	rc = fan_read_reg(0, &buf, 1);
+	if (rc < 0)
+		return -EIO;
+	fcu_rpm_shift = (buf == 1) ? 2 : 3;
+	printk(KERN_DEBUG "FCU Initialized, RPM fan shift is %d\n",
+	       fcu_rpm_shift);
+
 	return 0;
 }
 
 static int set_rpm_fan(int fan_index, int rpm)
 {
 	unsigned char buf[2];
-	int rc, id;
+	int rc, id, min, max;
 
 	if (fcu_fans[fan_index].type != FCU_FAN_RPM)
 		return -EINVAL;
@@ -509,12 +529,15 @@ static int set_rpm_fan(int fan_index, int rpm)
 	if (id == FCU_FAN_ABSENT_ID)
 		return -EINVAL;
 
-	if (rpm < 300)
-		rpm = 300;
-	else if (rpm > 8191)
-		rpm = 8191;
-	buf[0] = rpm >> 5;
-	buf[1] = rpm << 3;
+	min = 2400 >> fcu_rpm_shift;
+	max = 56000 >> fcu_rpm_shift;
+
+	if (rpm < min)
+		rpm = min;
+	else if (rpm > max)
+		rpm = max;
+	buf[0] = rpm >> (8 - fcu_rpm_shift);
+	buf[1] = rpm << fcu_rpm_shift;
 	rc = fan_write_reg(0x10 + (id * 2), buf, 2);
 	if (rc < 0)
 		return -EIO;
@@ -551,7 +574,7 @@ static int get_rpm_fan(int fan_index, int programmed)
 	if (rc != 2)
 		return -EIO;
 
-	return (buf[0] << 5) | buf[1] >> 3;
+	return (buf[0] << (8 - fcu_rpm_shift)) | buf[1] >> fcu_rpm_shift;
 }
 
 static int set_pwm_fan(int fan_index, int pwm)
@@ -609,6 +632,26 @@ static int get_pwm_fan(int fan_index)
 	return (buf[0] * 1000) / 2559;
 }
 
+static void tickle_fcu(void)
+{
+	int pwm;
+
+	pwm = get_pwm_fan(SLOTS_FAN_PWM_INDEX);
+
+	DBG("FCU Tickle, slots fan is: %d\n", pwm);
+	if (pwm < 0)
+		pwm = 100;
+
+	if (!rackmac) {
+		pwm = SLOTS_FAN_DEFAULT_PWM;
+	} else if (pwm < SLOTS_PID_OUTPUT_MIN)
+		pwm = SLOTS_PID_OUTPUT_MIN;
+
+	/* That is hopefully enough to make the FCU happy */
+	set_pwm_fan(SLOTS_FAN_PWM_INDEX, pwm);
+}
+
+
 /*
  * Utility routine to read the CPU calibration EEPROM data
  * from the device-tree
@@ -715,6 +758,9 @@ BUILD_SHOW_FUNC_INT(backside_fan_pwm, backside_state.pwm)
 BUILD_SHOW_FUNC_FIX(drives_temperature, drives_state.last_temp)
 BUILD_SHOW_FUNC_INT(drives_fan_rpm, drives_state.rpm)
 
+BUILD_SHOW_FUNC_FIX(slots_temperature, slots_state.last_temp)
+BUILD_SHOW_FUNC_INT(slots_fan_pwm, slots_state.pwm)
+
 BUILD_SHOW_FUNC_FIX(dimms_temperature, dimms_state.last_temp)
 
 static DEVICE_ATTR(cpu0_temperature,S_IRUGO,show_cpu0_temperature,NULL);
@@ -735,6 +781,9 @@ static DEVICE_ATTR(backside_fan_pwm,S_IRUGO,show_backside_fan_pwm,NULL);
 static DEVICE_ATTR(drives_temperature,S_IRUGO,show_drives_temperature,NULL);
 static DEVICE_ATTR(drives_fan_rpm,S_IRUGO,show_drives_fan_rpm,NULL);
 
+static DEVICE_ATTR(slots_temperature,S_IRUGO,show_slots_temperature,NULL);
+static DEVICE_ATTR(slots_fan_pwm,S_IRUGO,show_slots_fan_pwm,NULL);
+
 static DEVICE_ATTR(dimms_temperature,S_IRUGO,show_dimms_temperature,NULL);
 
 /*
@@ -1076,6 +1125,9 @@ static void do_monitor_cpu_rack(struct cpu_pid_state *state)
 	fan_min = dimm_output_clamp;
 	fan_min = max(fan_min, (int)state->mpu.rminn_intake_fan);
 
+	DBG(" CPU min mpu = %d, min dimm = %d\n",
+	    state->mpu.rminn_intake_fan, dimm_output_clamp);
+
 	state->rpm = max(state->rpm, (int)fan_min);
 	state->rpm = min(state->rpm, (int)state->mpu.rmaxn_intake_fan);
 	state->intake_rpm = state->rpm;
@@ -1374,7 +1426,8 @@ static void do_monitor_drives(struct drives_pid_state *state)
 	DBG("  current rpm: %d\n", state->rpm);
 
 	/* Get some sensor readings */
-	temp = le16_to_cpu(i2c_smbus_read_word_data(state->monitor, DS1775_TEMP)) << 8;
+	temp = le16_to_cpu(i2c_smbus_read_word_data(state->monitor,
+						    DS1775_TEMP)) << 8;
 	state->last_temp = temp;
 	DBG("  temp: %d.%03d, target: %d.%03d\n", FIX32TOPRINT(temp),
 	    FIX32TOPRINT(DRIVES_PID_INPUT_TARGET));
@@ -1575,7 +1628,7 @@ static int init_dimms_state(struct dimm_pid_state *state)
 }
 
 /*
- * Dispose of the state data for the drives control loop
+ * Dispose of the state data for the DIMM control loop
  */
 static void dispose_dimms_state(struct dimm_pid_state *state)
 {
@@ -1588,6 +1641,127 @@ static void dispose_dimms_state(struct dimm_pid_state *state)
 	state->monitor = NULL;
 }
 
+/*
+ * Slots fan control loop
+ */
+static void do_monitor_slots(struct slots_pid_state *state)
+{
+	s32 temp, integral, derivative;
+	s64 integ_p, deriv_p, prop_p, sum;
+	int i, rc;
+
+	if (--state->ticks != 0)
+		return;
+	state->ticks = SLOTS_PID_INTERVAL;
+
+	DBG("slots:\n");
+
+	/* Check fan status */
+	rc = get_pwm_fan(SLOTS_FAN_PWM_INDEX);
+	if (rc < 0) {
+		printk(KERN_WARNING "Error %d reading slots fan !\n", rc);
+		/* XXX What do we do now ? */
+	} else
+		state->pwm = rc;
+	DBG("  current pwm: %d\n", state->pwm);
+
+	/* Get some sensor readings */
+	temp = le16_to_cpu(i2c_smbus_read_word_data(state->monitor,
+						    DS1775_TEMP)) << 8;
+	state->last_temp = temp;
+	DBG("  temp: %d.%03d, target: %d.%03d\n", FIX32TOPRINT(temp),
+	    FIX32TOPRINT(SLOTS_PID_INPUT_TARGET));
+
+	/* Store temperature and error in history array */
+	state->cur_sample = (state->cur_sample + 1) % SLOTS_PID_HISTORY_SIZE;
+	state->sample_history[state->cur_sample] = temp;
+	state->error_history[state->cur_sample] = temp - SLOTS_PID_INPUT_TARGET;
+
+	/* If first loop, fill the history table */
+	if (state->first) {
+		for (i = 0; i < (SLOTS_PID_HISTORY_SIZE - 1); i++) {
+			state->cur_sample = (state->cur_sample + 1) %
+				SLOTS_PID_HISTORY_SIZE;
+			state->sample_history[state->cur_sample] = temp;
+			state->error_history[state->cur_sample] =
+				temp - SLOTS_PID_INPUT_TARGET;
+		}
+		state->first = 0;
+	}
+
+	/* Calculate the integral term */
+	sum = 0;
+	integral = 0;
+	for (i = 0; i < SLOTS_PID_HISTORY_SIZE; i++)
+		integral += state->error_history[i];
+	integral *= SLOTS_PID_INTERVAL;
+	DBG("  integral: %08x\n", integral);
+	integ_p = ((s64)SLOTS_PID_G_r) * (s64)integral;
+	DBG("   integ_p: %d\n", (int)(integ_p >> 36));
+	sum += integ_p;
+
+	/* Calculate the derivative term */
+	derivative = state->error_history[state->cur_sample] -
+		state->error_history[(state->cur_sample + SLOTS_PID_HISTORY_SIZE - 1)
+				    % SLOTS_PID_HISTORY_SIZE];
+	derivative /= SLOTS_PID_INTERVAL;
+	deriv_p = ((s64)SLOTS_PID_G_d) * (s64)derivative;
+	DBG("   deriv_p: %d\n", (int)(deriv_p >> 36));
+	sum += deriv_p;
+
+	/* Calculate the proportional term */
+	prop_p = ((s64)SLOTS_PID_G_p) * (s64)(state->error_history[state->cur_sample]);
+	DBG("   prop_p: %d\n", (int)(prop_p >> 36));
+	sum += prop_p;
+
+	/* Scale sum */
+	sum >>= 36;
+
+	DBG("   sum: %d\n", (int)sum);
+	state->pwm = (s32)sum;
+
+	state->pwm = max(state->pwm, SLOTS_PID_OUTPUT_MIN);
+	state->pwm = min(state->pwm, SLOTS_PID_OUTPUT_MAX);
+
+	DBG("** DRIVES PWM: %d\n", (int)state->pwm);
+	set_pwm_fan(SLOTS_FAN_PWM_INDEX, state->pwm);
+}
+
+/*
+ * Initialize the state structure for the slots bay fan control loop
+ */
+static int init_slots_state(struct slots_pid_state *state)
+{
+	state->ticks = 1;
+	state->first = 1;
+	state->pwm = 50;
+
+	state->monitor = attach_i2c_chip(XSERVE_SLOTS_LM75, "slots_temp");
+	if (state->monitor == NULL)
+		return -ENODEV;
+
+	device_create_file(&of_dev->dev, &dev_attr_slots_temperature);
+	device_create_file(&of_dev->dev, &dev_attr_slots_fan_pwm);
+
+	return 0;
+}
+
+/*
+ * Dispose of the state data for the slots control loop
+ */
+static void dispose_slots_state(struct slots_pid_state *state)
+{
+	if (state->monitor == NULL)
+		return;
+
+	device_remove_file(&of_dev->dev, &dev_attr_slots_temperature);
+	device_remove_file(&of_dev->dev, &dev_attr_slots_fan_pwm);
+
+	detach_i2c_chip(state->monitor);
+	state->monitor = NULL;
+}
+
+
 static int call_critical_overtemp(void)
 {
 	char *argv[] = { critical_overtemp_path, NULL };
@@ -1617,14 +1791,17 @@ static int main_control_loop(void *x)
 		goto out;
 	}
 
-	/* Set the PCI fan once for now */
-	set_pwm_fan(SLOTS_FAN_PWM_INDEX, SLOTS_FAN_DEFAULT_PWM);
+	/* Set the PCI fan once for now on non-RackMac */
+	if (!rackmac)
+		set_pwm_fan(SLOTS_FAN_PWM_INDEX, SLOTS_FAN_DEFAULT_PWM);
 
 	/* Initialize ADCs */
 	initialize_adc(&cpu_state[0]);
 	if (cpu_state[1].monitor != NULL)
 		initialize_adc(&cpu_state[1]);
 
+	fcu_tickle_ticks = FCU_TICKLE_TICKS;
+
 	up(&driver_lock);
 
 	while (state == state_attached) {
@@ -1634,6 +1811,12 @@ static int main_control_loop(void *x)
 
 		down(&driver_lock);
 
+		/* Tickle the FCU just in case */
+		if (--fcu_tickle_ticks < 0) {
+			fcu_tickle_ticks = FCU_TICKLE_TICKS;
+			tickle_fcu();
+		}
+
 		/* First, we always calculate the new DIMMs state on an Xserve */
 		if (rackmac)
 			do_monitor_dimms(&dimms_state);
@@ -1654,7 +1837,9 @@ static int main_control_loop(void *x)
 		}
 		/* Then, the rest */
 		do_monitor_backside(&backside_state);
-		if (!rackmac)
+		if (rackmac)
+			do_monitor_slots(&slots_state);
+		else
 			do_monitor_drives(&drives_state);
 		up(&driver_lock);
 
@@ -1696,6 +1881,7 @@ static void dispose_control_loops(void)
 	dispose_cpu_state(&cpu_state[1]);
 	dispose_backside_state(&backside_state);
 	dispose_drives_state(&drives_state);
+	dispose_slots_state(&slots_state);
 	dispose_dimms_state(&dimms_state);
 }
 
@@ -1745,6 +1931,8 @@ static int create_control_loops(void)
 		goto fail;
 	if (rackmac && init_dimms_state(&dimms_state))
 		goto fail;
+	if (rackmac && init_slots_state(&slots_state))
+		goto fail;
 	if (!rackmac && init_drives_state(&drives_state))
 		goto fail;
 
diff --git a/drivers/macintosh/therm_pm72.h b/drivers/macintosh/therm_pm72.h
index fc7e9b7ecaf2d..393cc9df94e16 100644
--- a/drivers/macintosh/therm_pm72.h
+++ b/drivers/macintosh/therm_pm72.h
@@ -105,6 +105,7 @@ static char * critical_overtemp_path = "/sbin/critical_overtemp";
 #define DRIVES_DALLAS_ID	0x94
 #define BACKSIDE_MAX_ID		0x98
 #define XSERVE_DIMMS_LM87	0x25a
+#define XSERVE_SLOTS_LM75	0x290
 
 /*
  * Some MAX6690, DS1775, LM87 register definitions
@@ -198,7 +199,7 @@ struct drives_pid_state
 
 #define SLOTS_FAN_PWM_DEFAULT_ID	2
 #define SLOTS_FAN_PWM_INDEX		2
-#define	SLOTS_FAN_DEFAULT_PWM		50 /* Do better here ! */
+#define	SLOTS_FAN_DEFAULT_PWM		40 /* Do better here ! */
 
 
 /*
@@ -206,7 +207,7 @@ struct drives_pid_state
  */
 #define DIMM_PID_G_d			0
 #define DIMM_PID_G_p			0
-#define DIMM_PID_G_r			0x6553600
+#define DIMM_PID_G_r			0x06553600
 #define DIMM_PID_INPUT_TARGET		3276800
 #define DIMM_PID_INTERVAL    		1
 #define DIMM_PID_OUTPUT_MAX		14000
@@ -226,6 +227,31 @@ struct dimm_pid_state
 };
 
 
+/*
+ * PID factors for the Xserve Slots control loop
+ */
+#define SLOTS_PID_G_d			0
+#define SLOTS_PID_G_p			0
+#define SLOTS_PID_G_r			0x00100000
+#define SLOTS_PID_INPUT_TARGET		3200000
+#define SLOTS_PID_INTERVAL    		1
+#define SLOTS_PID_OUTPUT_MAX		100
+#define SLOTS_PID_OUTPUT_MIN		20
+#define SLOTS_PID_HISTORY_SIZE		20
+
+struct slots_pid_state
+{
+	int			ticks;
+	struct i2c_client *	monitor;
+	s32	       		sample_history[SLOTS_PID_HISTORY_SIZE];
+	s32			error_history[SLOTS_PID_HISTORY_SIZE];
+	int			cur_sample;
+	s32			last_temp;
+	int			first;
+	int			pwm;
+};
+
+
 
 /* Desktops */
 
@@ -283,6 +309,9 @@ struct cpu_pid_state
 	s32			pump_max;
 };
 
+/* Tickle FCU every 10 seconds */
+#define FCU_TICKLE_TICKS	10
+
 /*
  * Driver state
  */
-- 
GitLab


From 7ed14c2177694ce086180eb9ca9ca4c6cd72c7ef Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 6 Jul 2006 15:09:19 +1000
Subject: [PATCH 0015/1063] [POWERPC] Add cpufreq support for Xserve G5

The Xserve G5 are capable of frequency switching like other desktop G5s.
This enables it. It also fix a Kconfig issue which prevented from
building the G5 cpufreq support if CONFIG_PMAC_SMU was not set (the
first version of that driver only worked with SMU based macs, but this
isn't the case anymore).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/Kconfig                         |  2 +-
 arch/powerpc/platforms/powermac/cpufreq_64.c | 78 ++++++++++++--------
 2 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2643dbc3f2894..13e583f16ede5 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -504,7 +504,7 @@ config CPU_FREQ_PMAC
 
 config CPU_FREQ_PMAC64
 	bool "Support for some Apple G5s"
-	depends on CPU_FREQ && PMAC_SMU && PPC64
+	depends on CPU_FREQ && PPC64
 	select CPU_FREQ_TABLE
 	help
 	  This adds support for frequency switching on Apple iMac G5,
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
index f08a14516139f..a6a84ac5433e8 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_64.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_64.c
@@ -10,6 +10,8 @@
  * that is iMac G5 and latest single CPU desktop.
  */
 
+#undef DEBUG
+
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/errno.h>
@@ -30,13 +32,7 @@
 #include <asm/smu.h>
 #include <asm/pmac_pfunc.h>
 
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(fmt...) printk(fmt)
-#else
-#define DBG(fmt...)
-#endif
+#define DBG(fmt...) pr_debug(fmt)
 
 /* see 970FX user manual */
 
@@ -82,8 +78,6 @@ static struct freq_attr* g5_cpu_freqs_attr[] = {
 /* Power mode data is an array of the 32 bits PCR values to use for
  * the various frequencies, retrieved from the device-tree
  */
-static u32 *g5_pmode_data;
-static int g5_pmode_max;
 static int g5_pmode_cur;
 
 static void (*g5_switch_volt)(int speed_mode);
@@ -93,6 +87,11 @@ static int (*g5_query_freq)(void);
 static DEFINE_MUTEX(g5_switch_mutex);
 
 
+#ifdef CONFIG_PPC_SMU
+
+static u32 *g5_pmode_data;
+static int g5_pmode_max;
+
 static struct smu_sdbp_fvt *g5_fvt_table;	/* table of op. points */
 static int g5_fvt_count;			/* number of op. points */
 static int g5_fvt_cur;				/* current op. point */
@@ -209,6 +208,16 @@ static int g5_scom_query_freq(void)
 	return i;
 }
 
+/*
+ * Fake voltage switching for platforms with missing support
+ */
+
+static void g5_dummy_switch_volt(int speed_mode)
+{
+}
+
+#endif /* CONFIG_PPC_SMU */
+
 /*
  * Platform function based voltage switching for PowerMac7,2 & 7,3
  */
@@ -248,6 +257,9 @@ static int g5_pfunc_switch_freq(int speed_mode)
 	struct pmf_args args;
 	u32 done = 0;
 	unsigned long timeout;
+	int rc;
+
+	DBG("g5_pfunc_switch_freq(%d)\n", speed_mode);
 
 	/* If frequency is going up, first ramp up the voltage */
 	if (speed_mode < g5_pmode_cur)
@@ -255,9 +267,12 @@ static int g5_pfunc_switch_freq(int speed_mode)
 
 	/* Do it */
 	if (speed_mode == CPUFREQ_HIGH)
-		pmf_call_one(pfunc_cpu_setfreq_high, NULL);
+		rc = pmf_call_one(pfunc_cpu_setfreq_high, NULL);
 	else
-		pmf_call_one(pfunc_cpu_setfreq_low, NULL);
+		rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL);
+
+	if (rc)
+		printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc);
 
 	/* It's an irq GPIO so we should be able to just block here,
 	 * I'll do that later after I've properly tested the IRQ code for
@@ -296,13 +311,6 @@ static int g5_pfunc_query_freq(void)
 	return val ? CPUFREQ_HIGH : CPUFREQ_LOW;
 }
 
-/*
- * Fake voltage switching for platforms with missing support
- */
-
-static void g5_dummy_switch_volt(int speed_mode)
-{
-}
 
 /*
  * Common interface to the cpufreq core
@@ -375,6 +383,8 @@ static struct cpufreq_driver g5_cpufreq_driver = {
 };
 
 
+#ifdef CONFIG_PPC_SMU
+
 static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
 {
 	struct device_node *cpunode;
@@ -525,6 +535,9 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
 	return rc;
 }
 
+#endif /* CONFIG_PPC_SMU */
+
+
 static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
 {
 	struct device_node *cpuid = NULL, *hwclock = NULL, *cpunode = NULL;
@@ -533,6 +546,9 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
 	u64 max_freq, min_freq, ih, il;
 	int has_volt = 1, rc = 0;
 
+	DBG("cpufreq: Initializing for PowerMac7,2, PowerMac7,3 and"
+	    " RackMac3,1...\n");
+
 	/* Get first CPU node */
 	for (cpunode = NULL;
 	     (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) {
@@ -636,6 +652,15 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
 	 */
 	ih = *((u32 *)(eeprom + 0x10));
 	il = *((u32 *)(eeprom + 0x20));
+
+	/* Check for machines with no useful settings */
+	if (il == ih) {
+		printk(KERN_WARNING "cpufreq: No low frequency mode available"
+		       " on this model !\n");
+		rc = -ENODEV;
+		goto bail;
+	}
+
 	min_freq = 0;
 	if (ih != 0 && il != 0)
 		min_freq = (max_freq * il) / ih;
@@ -643,7 +668,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
 	/* Sanity check */
 	if (min_freq >= max_freq || min_freq < 1000) {
 		printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n");
-		rc = -ENODEV;
+		rc = -ENXIO;
 		goto bail;
 	}
 	g5_cpu_freqs[0].frequency = max_freq;
@@ -690,16 +715,10 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
 	return rc;
 }
 
-static int __init g5_rm31_cpufreq_init(struct device_node *cpus)
-{
-	/* NYI */
-	return 0;
-}
-
 static int __init g5_cpufreq_init(void)
 {
 	struct device_node *cpus;
-	int rc;
+	int rc = 0;
 
 	cpus = of_find_node_by_path("/cpus");
 	if (cpus == NULL) {
@@ -708,12 +727,13 @@ static int __init g5_cpufreq_init(void)
 	}
 
 	if (machine_is_compatible("PowerMac7,2") ||
-	    machine_is_compatible("PowerMac7,3"))
+	    machine_is_compatible("PowerMac7,3") ||
+	    machine_is_compatible("RackMac3,1"))
 		rc = g5_pm72_cpufreq_init(cpus);
-	else if (machine_is_compatible("RackMac3,1"))
-		rc = g5_rm31_cpufreq_init(cpus);
+#ifdef CONFIG_PPC_SMU
 	else
 		rc = g5_neo2_cpufreq_init(cpus);
+#endif /* CONFIG_PPC_SMU */
 
 	of_node_put(cpus);
 	return rc;
-- 
GitLab


From 980ffd3258dbcdb011e929de5d658ec81febba8d Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Tue, 4 Jul 2006 16:44:46 +1000
Subject: [PATCH 0016/1063] [POWERPC] Remove linux,device properties

The linux,device property isn't used anywhere within the kernel, and
since it's a kernel pointer, it's a little useless for userspace.

This change removes the code to create this property in
of_device_register.

Built for pmac32.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/of_device.c | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c
index 3262b73a3a689..397c83eda20ee 100644
--- a/arch/powerpc/kernel/of_device.c
+++ b/arch/powerpc/kernel/of_device.c
@@ -189,27 +189,9 @@ void of_release_dev(struct device *dev)
 int of_device_register(struct of_device *ofdev)
 {
 	int rc;
-	struct of_device **odprop;
 
 	BUG_ON(ofdev->node == NULL);
 
-	odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL);
-	if (!odprop) {
-		struct property *new_prop;
-	
-		new_prop = kmalloc(sizeof(struct property) + sizeof(struct of_device *),
-			GFP_KERNEL);
-		if (new_prop == NULL)
-			return -ENOMEM;
-		new_prop->name = "linux,device";
-		new_prop->length = sizeof(sizeof(struct of_device *));
-		new_prop->value = (unsigned char *)&new_prop[1];
-		odprop = (struct of_device **)new_prop->value;
-		*odprop = NULL;
-		prom_add_property(ofdev->node, new_prop);
-	}
-	*odprop = ofdev;
-
 	rc = device_register(&ofdev->dev);
 	if (rc)
 		return rc;
@@ -221,14 +203,8 @@ int of_device_register(struct of_device *ofdev)
 
 void of_device_unregister(struct of_device *ofdev)
 {
-	struct of_device **odprop;
-
 	device_remove_file(&ofdev->dev, &dev_attr_devspec);
 
-	odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL);
-	if (odprop)
-		*odprop = NULL;
-
 	device_unregister(&ofdev->dev);
 }
 
-- 
GitLab


From b5a1a9abe1a54ba40a9612001920f98bbdd0c56f Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Tue, 4 Jul 2006 16:46:44 +1000
Subject: [PATCH 0017/1063] [POWERPC] Use const qualifiers for prom parsing
 utilites

The of_bus callbacks map and get_flags can be constified, as they don't
alter the range or addr arguments. of_dump_addr and of_read_addr can
also be constified.

Built for 32- and 64-bit powerpc

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/prom_parse.c | 22 ++++++++++++----------
 include/asm-powerpc/prom.h       |  2 +-
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 21009b1f78698..e9960170667b2 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -27,7 +27,7 @@
 
 /* Debug utility */
 #ifdef DEBUG
-static void of_dump_addr(const char *s, u32 *addr, int na)
+static void of_dump_addr(const char *s, const u32 *addr, int na)
 {
 	printk("%s", s);
 	while(na--)
@@ -35,7 +35,7 @@ static void of_dump_addr(const char *s, u32 *addr, int na)
 	printk("\n");
 }
 #else
-static void of_dump_addr(const char *s, u32 *addr, int na) { }
+static void of_dump_addr(const char *s, const u32 *addr, int na) { }
 #endif
 
 
@@ -46,9 +46,10 @@ struct of_bus {
 	int		(*match)(struct device_node *parent);
 	void		(*count_cells)(struct device_node *child,
 				       int *addrc, int *sizec);
-	u64		(*map)(u32 *addr, u32 *range, int na, int ns, int pna);
+	u64		(*map)(u32 *addr, const u32 *range,
+				int na, int ns, int pna);
 	int		(*translate)(u32 *addr, u64 offset, int na);
-	unsigned int	(*get_flags)(u32 *addr);
+	unsigned int	(*get_flags)(const u32 *addr);
 };
 
 
@@ -65,7 +66,8 @@ static void of_bus_default_count_cells(struct device_node *dev,
 		*sizec = prom_n_size_cells(dev);
 }
 
-static u64 of_bus_default_map(u32 *addr, u32 *range, int na, int ns, int pna)
+static u64 of_bus_default_map(u32 *addr, const u32 *range,
+		int na, int ns, int pna)
 {
 	u64 cp, s, da;
 
@@ -93,7 +95,7 @@ static int of_bus_default_translate(u32 *addr, u64 offset, int na)
 	return 0;
 }
 
-static unsigned int of_bus_default_get_flags(u32 *addr)
+static unsigned int of_bus_default_get_flags(const u32 *addr)
 {
 	return IORESOURCE_MEM;
 }
@@ -118,7 +120,7 @@ static void of_bus_pci_count_cells(struct device_node *np,
 		*sizec = 2;
 }
 
-static u64 of_bus_pci_map(u32 *addr, u32 *range, int na, int ns, int pna)
+static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna)
 {
 	u64 cp, s, da;
 
@@ -143,7 +145,7 @@ static int of_bus_pci_translate(u32 *addr, u64 offset, int na)
 	return of_bus_default_translate(addr + 1, offset, na - 1);
 }
 
-static unsigned int of_bus_pci_get_flags(u32 *addr)
+static unsigned int of_bus_pci_get_flags(const u32 *addr)
 {
 	unsigned int flags = 0;
 	u32 w = addr[0];
@@ -178,7 +180,7 @@ static void of_bus_isa_count_cells(struct device_node *child,
 		*sizec = 1;
 }
 
-static u64 of_bus_isa_map(u32 *addr, u32 *range, int na, int ns, int pna)
+static u64 of_bus_isa_map(u32 *addr, const u32 *range, int na, int ns, int pna)
 {
 	u64 cp, s, da;
 
@@ -203,7 +205,7 @@ static int of_bus_isa_translate(u32 *addr, u64 offset, int na)
 	return of_bus_default_translate(addr + 1, offset, na - 1);
 }
 
-static unsigned int of_bus_isa_get_flags(u32 *addr)
+static unsigned int of_bus_isa_get_flags(const u32 *addr)
 {
 	unsigned int flags = 0;
 	u32 w = addr[0];
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index b095a285c84b5..56f6ea0c76de9 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -198,7 +198,7 @@ extern int release_OF_resource(struct device_node* node, int index);
 
 
 /* Helper to read a big number */
-static inline u64 of_read_number(u32 *cell, int size)
+static inline u64 of_read_number(const u32 *cell, int size)
 {
 	u64 r = 0;
 	while (size--)
-- 
GitLab


From 3da27289a8ecc688fc62c0961dfe89d392370480 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Tue, 4 Jul 2006 16:47:18 +1000
Subject: [PATCH 0018/1063] [POWERPC] Remove linux,pci-domain properties

The linux,pci-domain property is no longer used by DLPAR/PCI Hotplug
utilites, or LSVPD. This change removes it.

Built for ppc64_defconfig.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/pci_64.c | 39 +-----------------------------------
 1 file changed, 1 insertion(+), 38 deletions(-)

diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index efc0b5559ee0e..1d85fcba51e4d 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -185,34 +185,6 @@ static void __devinit pci_setup_pci_controller(struct pci_controller *hose)
 	spin_unlock(&hose_spinlock);
 }
 
-static void add_linux_pci_domain(struct device_node *dev,
-				 struct pci_controller *phb)
-{
-	struct property *of_prop;
-	unsigned int size;
-
-	of_prop = (struct property *)
-		get_property(dev, "linux,pci-domain", &size);
-	if (of_prop != NULL)
-		return;
-	WARN_ON(of_prop && size < sizeof(int));
-	if (of_prop && size < sizeof(int))
-		of_prop = NULL;
-	size = sizeof(struct property) + sizeof(int);
-	if (of_prop == NULL) {
-		if (mem_init_done)
-			of_prop = kmalloc(size, GFP_KERNEL);
-		else
-			of_prop = alloc_bootmem(size);
-	}
-	memset(of_prop, 0, sizeof(struct property));
-	of_prop->name = "linux,pci-domain";
-	of_prop->length = sizeof(int);
-	of_prop->value = (unsigned char *)&of_prop[1];
-	*((int *)of_prop->value) = phb->global_number;
-	prom_add_property(dev, of_prop);
-}
-
 struct pci_controller * pcibios_alloc_controller(struct device_node *dev)
 {
 	struct pci_controller *phb;
@@ -226,22 +198,13 @@ struct pci_controller * pcibios_alloc_controller(struct device_node *dev)
 	pci_setup_pci_controller(phb);
 	phb->arch_data = dev;
 	phb->is_dynamic = mem_init_done;
-	if (dev) {
+	if (dev)
 		PHB_SET_NODE(phb, of_node_to_nid(dev));
-		add_linux_pci_domain(dev, phb);
-	}
 	return phb;
 }
 
 void pcibios_free_controller(struct pci_controller *phb)
 {
-	if (phb->arch_data) {
-		struct device_node *np = phb->arch_data;
-		int *domain = (int *)get_property(np,
-						  "linux,pci-domain", NULL);
-		if (domain)
-			*domain = -1;
-	}
 	if (phb->is_dynamic)
 		kfree(phb);
 }
-- 
GitLab


From 8b0036eefd7a96f23244b969417684c8627f5ad6 Mon Sep 17 00:00:00 2001
From: Vitaly Wool <vwool@ru.mvista.com>
Date: Tue, 11 Jul 2006 09:11:25 +0200
Subject: [PATCH 0019/1063] [MTD] NAND: OOB buffer offset fixups

In the case of data-pad-ecc-pad-data... layout the oob start position
has to be sizeof(data) in nand_write_oob_syndrom().

In nand_fill_oob() we need to copy to buf + buffer offset instead of
buf + write offset.

From: Vitaly Wool <vwool@ru.mvista.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/mtd/nand/nand_base.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 62b861304e03e..cffd66309ffa0 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1203,7 +1203,7 @@ static int nand_write_oob_syndrome(struct mtd_info *mtd,
 		pos = steps * (eccsize + chunk);
 		steps = 0;
 	} else
-		pos = eccsize + chunk;
+		pos = eccsize;
 
 	chip->cmdfunc(mtd, NAND_CMD_SEQIN, pos, page);
 	for (i = 0; i < steps; i++) {
@@ -1566,7 +1566,7 @@ static uint8_t *nand_fill_oob(struct nand_chip *chip, uint8_t *oob,
 				bytes = min_t(size_t, len, free->length);
 				boffs = free->offset;
 			}
-			memcpy(chip->oob_poi + woffs, oob, bytes);
+			memcpy(chip->oob_poi + boffs, oob, bytes);
 			oob += bytes;
 		}
 		return oob;
-- 
GitLab


From a749690ecf7ab55aa46df1698bcee3ec110612df Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 13 Jul 2006 17:52:01 +1000
Subject: [PATCH 0020/1063] [POWERPC] iseries: Use device tree /system-id in
 /proc/iSeries/config

We export a bunch of info in /proc/iSeries/config. Currently we pull it
directly out of some iSeries specific structs, but we could use the device
tree instead, this saves decoding it twice and is a little neater.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/platforms/iseries/viopath.c | 27 +++++++++++++++---------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c
index 622a30149b48b..efeb6ae9df64c 100644
--- a/arch/powerpc/platforms/iseries/viopath.c
+++ b/arch/powerpc/platforms/iseries/viopath.c
@@ -41,8 +41,8 @@
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/prom.h>
 #include <asm/iseries/hv_types.h>
-#include <asm/iseries/it_exp_vpd_panel.h>
 #include <asm/iseries/hv_lp_event.h>
 #include <asm/iseries/hv_lp_config.h>
 #include <asm/iseries/mf.h>
@@ -116,6 +116,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
 	dma_addr_t handle;
 	HvLpEvent_Rc hvrc;
 	DECLARE_MUTEX_LOCKED(Semaphore);
+	struct device_node *node;
 
 	buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL);
 	if (!buf)
@@ -143,20 +144,26 @@ static int proc_viopath_show(struct seq_file *m, void *v)
 
 	buf[HW_PAGE_SIZE-1] = '\0';
 	seq_printf(m, "%s", buf);
-	seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
-	seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n",
-		   e2a(xItExtVpdPanel.mfgID[2]),
-		   e2a(xItExtVpdPanel.mfgID[3]),
-		   e2a(xItExtVpdPanel.systemSerial[1]),
-		   e2a(xItExtVpdPanel.systemSerial[2]),
-		   e2a(xItExtVpdPanel.systemSerial[3]),
-		   e2a(xItExtVpdPanel.systemSerial[4]),
-		   e2a(xItExtVpdPanel.systemSerial[5]));
 
 	dma_unmap_single(iSeries_vio_dev, handle, HW_PAGE_SIZE,
 			 DMA_FROM_DEVICE);
 	kfree(buf);
 
+	seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
+
+	node = of_find_node_by_path("/");
+	buf = NULL;
+	if (node != NULL)
+		buf = get_property(node, "system-id", NULL);
+
+	if (buf == NULL)
+		seq_printf(m, "SRLNBR=<UNKNOWN>\n");
+	else
+		/* Skip "IBM," on front of serial number, see dt.c */
+		seq_printf(m, "SRLNBR=%s\n", buf + 4);
+
+	of_node_put(node);
+
 	return 0;
 }
 
-- 
GitLab


From dac411e7aa92d23dadbcb8721845ab88577294c7 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 13 Jul 2006 17:52:04 +1000
Subject: [PATCH 0021/1063] [POWERPC] iseries: Move e2a()/strne2a() into their
 only caller

The ASCII -> EBCDIC functions, e2a() and strne2a() are now only used in
dt.c, so move them in there.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/Makefile           |   1 -
 arch/powerpc/lib/e2a.c              | 116 ----------------------------
 arch/powerpc/platforms/iseries/dt.c |  98 ++++++++++++++++++++++-
 include/asm-powerpc/system.h        |   5 --
 4 files changed, 97 insertions(+), 123 deletions(-)
 delete mode 100644 arch/powerpc/lib/e2a.c

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index ff70964582495..336dd191f768c 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -14,7 +14,6 @@ endif
 obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o \
 			   memcpy_64.o usercopy_64.o mem_64.o string.o \
 			   strcase.o
-obj-$(CONFIG_PPC_ISERIES) += e2a.o
 obj-$(CONFIG_XMON)	+= sstep.o
 
 ifeq ($(CONFIG_PPC64),y)
diff --git a/arch/powerpc/lib/e2a.c b/arch/powerpc/lib/e2a.c
deleted file mode 100644
index 4b72ed8fd50e1..0000000000000
--- a/arch/powerpc/lib/e2a.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- *  EBCDIC to ASCII conversion
- *
- * This function moved here from arch/powerpc/platforms/iseries/viopath.c 
- *
- * (C) Copyright 2000-2004 IBM Corporation
- *
- * This program is free software;  you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) anyu later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include <linux/module.h>
-
-unsigned char e2a(unsigned char x)
-{
-	switch (x) {
-	case 0xF0:
-		return '0';
-	case 0xF1:
-		return '1';
-	case 0xF2:
-		return '2';
-	case 0xF3:
-		return '3';
-	case 0xF4:
-		return '4';
-	case 0xF5:
-		return '5';
-	case 0xF6:
-		return '6';
-	case 0xF7:
-		return '7';
-	case 0xF8:
-		return '8';
-	case 0xF9:
-		return '9';
-	case 0xC1:
-		return 'A';
-	case 0xC2:
-		return 'B';
-	case 0xC3:
-		return 'C';
-	case 0xC4:
-		return 'D';
-	case 0xC5:
-		return 'E';
-	case 0xC6:
-		return 'F';
-	case 0xC7:
-		return 'G';
-	case 0xC8:
-		return 'H';
-	case 0xC9:
-		return 'I';
-	case 0xD1:
-		return 'J';
-	case 0xD2:
-		return 'K';
-	case 0xD3:
-		return 'L';
-	case 0xD4:
-		return 'M';
-	case 0xD5:
-		return 'N';
-	case 0xD6:
-		return 'O';
-	case 0xD7:
-		return 'P';
-	case 0xD8:
-		return 'Q';
-	case 0xD9:
-		return 'R';
-	case 0xE2:
-		return 'S';
-	case 0xE3:
-		return 'T';
-	case 0xE4:
-		return 'U';
-	case 0xE5:
-		return 'V';
-	case 0xE6:
-		return 'W';
-	case 0xE7:
-		return 'X';
-	case 0xE8:
-		return 'Y';
-	case 0xE9:
-		return 'Z';
-	}
-	return ' ';
-}
-EXPORT_SYMBOL(e2a);
-
-unsigned char* strne2a(unsigned char *dest, const unsigned char *src, size_t n)
-{
-	int i;
-
-	n = strnlen(src, n);
-
-	for (i = 0; i < n; i++)
-		dest[i] = e2a(src[i]);
-
-	return dest;
-}
diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index d194140c1ebf0..39c676ab9d6df 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -1,5 +1,6 @@
 /*
- *    Copyright (c) 2005-2006 Michael Ellerman, IBM Corporation
+ *    Copyright (C) 2005-2006 Michael Ellerman, IBM Corporation
+ *    Copyright (C) 2000-2004, IBM Corporation
  *
  *    Description:
  *      This file contains all the routines to build a flattened device
@@ -76,6 +77,101 @@ static char __initdata device_type_pci[] = "pci";
 static char __initdata device_type_vdevice[] = "vdevice";
 static char __initdata device_type_vscsi[] = "vscsi";
 
+
+/* EBCDIC to ASCII conversion routines */
+
+unsigned char e2a(unsigned char x)
+{
+	switch (x) {
+	case 0xF0:
+		return '0';
+	case 0xF1:
+		return '1';
+	case 0xF2:
+		return '2';
+	case 0xF3:
+		return '3';
+	case 0xF4:
+		return '4';
+	case 0xF5:
+		return '5';
+	case 0xF6:
+		return '6';
+	case 0xF7:
+		return '7';
+	case 0xF8:
+		return '8';
+	case 0xF9:
+		return '9';
+	case 0xC1:
+		return 'A';
+	case 0xC2:
+		return 'B';
+	case 0xC3:
+		return 'C';
+	case 0xC4:
+		return 'D';
+	case 0xC5:
+		return 'E';
+	case 0xC6:
+		return 'F';
+	case 0xC7:
+		return 'G';
+	case 0xC8:
+		return 'H';
+	case 0xC9:
+		return 'I';
+	case 0xD1:
+		return 'J';
+	case 0xD2:
+		return 'K';
+	case 0xD3:
+		return 'L';
+	case 0xD4:
+		return 'M';
+	case 0xD5:
+		return 'N';
+	case 0xD6:
+		return 'O';
+	case 0xD7:
+		return 'P';
+	case 0xD8:
+		return 'Q';
+	case 0xD9:
+		return 'R';
+	case 0xE2:
+		return 'S';
+	case 0xE3:
+		return 'T';
+	case 0xE4:
+		return 'U';
+	case 0xE5:
+		return 'V';
+	case 0xE6:
+		return 'W';
+	case 0xE7:
+		return 'X';
+	case 0xE8:
+		return 'Y';
+	case 0xE9:
+		return 'Z';
+	}
+	return ' ';
+}
+EXPORT_SYMBOL(e2a);
+
+unsigned char* strne2a(unsigned char *dest, const unsigned char *src, size_t n)
+{
+	int i;
+
+	n = strnlen(src, n);
+
+	for (i = 0; i < n; i++)
+		dest[i] = e2a(src[i]);
+
+	return dest;
+}
+
 static struct iseries_flat_dt * __init dt_init(void)
 {
 	struct iseries_flat_dt *dt;
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index d075725bf444b..5deb7bc7bb1fe 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -169,11 +169,6 @@ extern u32 booke_wdt_enabled;
 extern u32 booke_wdt_period;
 #endif /* CONFIG_BOOKE_WDT */
 
-/* EBCDIC -> ASCII conversion for [0-9A-Z] on iSeries */
-extern unsigned char e2a(unsigned char);
-extern unsigned char* strne2a(unsigned char *dest,
-		const unsigned char *src, size_t n);
-
 struct device_node;
 extern void note_scsi_host(struct device_node *, void *);
 
-- 
GitLab


From a892e5d7fa7fb893b5873f7150a83f6f1ee141b5 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 13 Jul 2006 17:52:06 +1000
Subject: [PATCH 0022/1063] [POWERPC] iseries: Cleanup e2a() and strne2a()

e2a() was formally used by lparcfg, and so had to be exported, but isn't
anymore, so don't.

e2a() and strne2a() can both be static, and __init.

And e2a can be made much more concise if we use x ... y case labels, while
we're there add support for lower case letters.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/platforms/iseries/dt.c | 92 ++++++-----------------------
 1 file changed, 17 insertions(+), 75 deletions(-)

diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index 39c676ab9d6df..c5f59a8b9ef3b 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -80,87 +80,29 @@ static char __initdata device_type_vscsi[] = "vscsi";
 
 /* EBCDIC to ASCII conversion routines */
 
-unsigned char e2a(unsigned char x)
+static unsigned char __init e2a(unsigned char x)
 {
 	switch (x) {
-	case 0xF0:
-		return '0';
-	case 0xF1:
-		return '1';
-	case 0xF2:
-		return '2';
-	case 0xF3:
-		return '3';
-	case 0xF4:
-		return '4';
-	case 0xF5:
-		return '5';
-	case 0xF6:
-		return '6';
-	case 0xF7:
-		return '7';
-	case 0xF8:
-		return '8';
-	case 0xF9:
-		return '9';
-	case 0xC1:
-		return 'A';
-	case 0xC2:
-		return 'B';
-	case 0xC3:
-		return 'C';
-	case 0xC4:
-		return 'D';
-	case 0xC5:
-		return 'E';
-	case 0xC6:
-		return 'F';
-	case 0xC7:
-		return 'G';
-	case 0xC8:
-		return 'H';
-	case 0xC9:
-		return 'I';
-	case 0xD1:
-		return 'J';
-	case 0xD2:
-		return 'K';
-	case 0xD3:
-		return 'L';
-	case 0xD4:
-		return 'M';
-	case 0xD5:
-		return 'N';
-	case 0xD6:
-		return 'O';
-	case 0xD7:
-		return 'P';
-	case 0xD8:
-		return 'Q';
-	case 0xD9:
-		return 'R';
-	case 0xE2:
-		return 'S';
-	case 0xE3:
-		return 'T';
-	case 0xE4:
-		return 'U';
-	case 0xE5:
-		return 'V';
-	case 0xE6:
-		return 'W';
-	case 0xE7:
-		return 'X';
-	case 0xE8:
-		return 'Y';
-	case 0xE9:
-		return 'Z';
+	case 0x81 ... 0x89:
+		return x - 0x81 + 'a';
+	case 0x91 ... 0x99:
+		return x - 0x91 + 'j';
+	case 0xA2 ... 0xA9:
+		return x - 0xA2 + 's';
+	case 0xC1 ... 0xC9:
+		return x - 0xC1 + 'A';
+	case 0xD1 ... 0xD9:
+		return x - 0xD1 + 'J';
+	case 0xE2 ... 0xE9:
+		return x - 0xE2 + 'S';
+	case 0xF0 ... 0xF9:
+		return x - 0xF0 + '0';
 	}
 	return ' ';
 }
-EXPORT_SYMBOL(e2a);
 
-unsigned char* strne2a(unsigned char *dest, const unsigned char *src, size_t n)
+static unsigned char * __init strne2a(unsigned char *dest,
+		const unsigned char *src, size_t n)
 {
 	int i;
 
-- 
GitLab


From c59acae85409fdf5d7574e90009c8410daf38938 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 13 Jul 2006 17:52:09 +1000
Subject: [PATCH 0023/1063] [POWERPC] iseries: Make ItExtVpdPanel private to
 iSeries

No one outside platforms/iseries needs ItExtVpdPanel anymore, so move
it in there. It used to be needed by lparcfg, and so was exported, but
isn't needed anymore, so unexport it.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/kernel/lparcfg.c                               | 1 -
 arch/powerpc/platforms/iseries/dt.c                         | 2 +-
 .../powerpc/platforms}/iseries/it_exp_vpd_panel.h           | 6 +++---
 arch/powerpc/platforms/iseries/lpardata.c                   | 3 +--
 4 files changed, 5 insertions(+), 7 deletions(-)
 rename {include/asm-powerpc => arch/powerpc/platforms}/iseries/it_exp_vpd_panel.h (89%)

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 23f34daa044a0..2d94b372d49b2 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -32,7 +32,6 @@
 #include <asm/rtas.h>
 #include <asm/system.h>
 #include <asm/time.h>
-#include <asm/iseries/it_exp_vpd_panel.h>
 #include <asm/prom.h>
 #include <asm/vdso_datapage.h>
 
diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index c5f59a8b9ef3b..aa582262aefd2 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -34,13 +34,13 @@
 #include <asm/iseries/hv_types.h>
 #include <asm/iseries/hv_lp_config.h>
 #include <asm/iseries/hv_call_xm.h>
-#include <asm/iseries/it_exp_vpd_panel.h>
 #include <asm/udbg.h>
 
 #include "processor_vpd.h"
 #include "call_hpt.h"
 #include "call_pci.h"
 #include "pci.h"
+#include "it_exp_vpd_panel.h"
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
diff --git a/include/asm-powerpc/iseries/it_exp_vpd_panel.h b/arch/powerpc/platforms/iseries/it_exp_vpd_panel.h
similarity index 89%
rename from include/asm-powerpc/iseries/it_exp_vpd_panel.h
rename to arch/powerpc/platforms/iseries/it_exp_vpd_panel.h
index 304a609ae21a9..6de9097b7f57a 100644
--- a/include/asm-powerpc/iseries/it_exp_vpd_panel.h
+++ b/arch/powerpc/platforms/iseries/it_exp_vpd_panel.h
@@ -15,8 +15,8 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
-#ifndef _ASM_POWERPC_ISERIES_IT_EXT_VPD_PANEL_H
-#define _ASM_POWERPC_ISERIES_IT_EXT_VPD_PANEL_H
+#ifndef _PLATFORMS_ISERIES_IT_EXT_VPD_PANEL_H
+#define _PLATFORMS_ISERIES_IT_EXT_VPD_PANEL_H
 
 /*
  *	This struct maps the panel information
@@ -48,4 +48,4 @@ struct ItExtVpdPanel {
 
 extern struct ItExtVpdPanel	xItExtVpdPanel;
 
-#endif /* _ASM_POWERPC_ISERIES_IT_EXT_VPD_PANEL_H */
+#endif /* _PLATFORMS_ISERIES_IT_EXT_VPD_PANEL_H */
diff --git a/arch/powerpc/platforms/iseries/lpardata.c b/arch/powerpc/platforms/iseries/lpardata.c
index a7769445d6c70..13e9bd1322541 100644
--- a/arch/powerpc/platforms/iseries/lpardata.c
+++ b/arch/powerpc/platforms/iseries/lpardata.c
@@ -18,7 +18,6 @@
 #include <asm/iseries/it_lp_reg_save.h>
 #include <asm/paca.h>
 #include <asm/iseries/lpar_map.h>
-#include <asm/iseries/it_exp_vpd_panel.h>
 #include <asm/iseries/it_lp_queue.h>
 
 #include "naca.h"
@@ -27,6 +26,7 @@
 #include "ipl_parms.h"
 #include "processor_vpd.h"
 #include "release_data.h"
+#include "it_exp_vpd_panel.h"
 
 /* The HvReleaseData is the root of the information shared between
  * the hypervisor and Linux.
@@ -134,7 +134,6 @@ struct ItIplParmsReal xItIplParmsReal __attribute__((__section__(".data")));
 
 /* May be filled in by the hypervisor so cannot end up in the BSS */
 struct ItExtVpdPanel xItExtVpdPanel __attribute__((__section__(".data")));
-EXPORT_SYMBOL(xItExtVpdPanel);
 
 #define maxPhysicalProcessors 32
 
-- 
GitLab


From a2ced11b6af59854cc2a2791dccd8b6c0da2f733 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 13 Jul 2006 17:52:12 +1000
Subject: [PATCH 0024/1063] [POWERPC] iseries: Make
 HvLpConfig_get(Primary)LpIndex functions

HvLpConfig_get(Primary)LpIndex are currently static inlines that return
fields from the itLpNaca, if we make them real functions we can make the
itLpNaca private to iSeries.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/platforms/iseries/hvlpconfig.c | 13 +++++++++++++
 arch/powerpc/platforms/iseries/setup.c      |  1 +
 include/asm-powerpc/iseries/hv_lp_config.h  | 13 ++-----------
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/platforms/iseries/hvlpconfig.c b/arch/powerpc/platforms/iseries/hvlpconfig.c
index 663a1affb4bbc..cfcedaae6ea28 100644
--- a/arch/powerpc/platforms/iseries/hvlpconfig.c
+++ b/arch/powerpc/platforms/iseries/hvlpconfig.c
@@ -18,9 +18,22 @@
 
 #include <linux/module.h>
 #include <asm/iseries/hv_lp_config.h>
+#include <asm/iseries/it_lp_naca.h>
 
 HvLpIndex HvLpConfig_getLpIndex_outline(void)
 {
 	return HvLpConfig_getLpIndex();
 }
 EXPORT_SYMBOL(HvLpConfig_getLpIndex_outline);
+
+HvLpIndex HvLpConfig_getLpIndex(void)
+{
+	return itLpNaca.xLpIndex;
+}
+EXPORT_SYMBOL(HvLpConfig_getLpIndex);
+
+HvLpIndex HvLpConfig_getPrimaryLpIndex(void)
+{
+	return itLpNaca.xPrimaryLpIndex;
+}
+EXPORT_SYMBOL_GPL(HvLpConfig_getPrimaryLpIndex);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index c9605d773a775..c34299b18d8b8 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -48,6 +48,7 @@
 #include <asm/iseries/hv_call_event.h>
 #include <asm/iseries/hv_call_xm.h>
 #include <asm/iseries/it_lp_queue.h>
+#include <asm/iseries/it_lp_naca.h>
 #include <asm/iseries/mf.h>
 #include <asm/iseries/hv_lp_event.h>
 #include <asm/iseries/lpar_map.h>
diff --git a/include/asm-powerpc/iseries/hv_lp_config.h b/include/asm-powerpc/iseries/hv_lp_config.h
index df8b20739719f..a006fd1e4a2ce 100644
--- a/include/asm-powerpc/iseries/hv_lp_config.h
+++ b/include/asm-powerpc/iseries/hv_lp_config.h
@@ -25,7 +25,6 @@
 
 #include <asm/iseries/hv_call_sc.h>
 #include <asm/iseries/hv_types.h>
-#include <asm/iseries/it_lp_naca.h>
 
 enum {
 	HvCallCfg_Cur	= 0,
@@ -44,16 +43,8 @@ enum {
 #define HvCallCfgGetHostingLpIndex			HvCallCfg + 32
 
 extern HvLpIndex HvLpConfig_getLpIndex_outline(void);
-
-static inline HvLpIndex	HvLpConfig_getLpIndex(void)
-{
-	return itLpNaca.xLpIndex;
-}
-
-static inline HvLpIndex	HvLpConfig_getPrimaryLpIndex(void)
-{
-	return itLpNaca.xPrimaryLpIndex;
-}
+extern HvLpIndex HvLpConfig_getLpIndex(void);
+extern HvLpIndex HvLpConfig_getPrimaryLpIndex(void);
 
 static inline u64 HvLpConfig_getMsChunks(void)
 {
-- 
GitLab


From 06a36db1d712242a00cb30aaebdd088b4be28082 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 13 Jul 2006 17:52:17 +1000
Subject: [PATCH 0025/1063] [POWERPC] iseries: Move ItLpNaca into
 platforms/iseries

Move ItLpNaca into platforms/iseries now that it's not used elsewhere.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/kernel/setup_64.c                              | 1 -
 arch/powerpc/platforms/iseries/hvlpconfig.c                 | 2 +-
 .../powerpc/platforms}/iseries/it_lp_naca.h                 | 6 +++---
 arch/powerpc/platforms/iseries/lpardata.c                   | 3 +--
 arch/powerpc/platforms/iseries/lpevents.c                   | 2 +-
 arch/powerpc/platforms/iseries/setup.c                      | 2 +-
 6 files changed, 7 insertions(+), 9 deletions(-)
 rename {include/asm-powerpc => arch/powerpc/platforms}/iseries/it_lp_naca.h (96%)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index fd1785e4c9bbe..e2447aef3a8fb 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -56,7 +56,6 @@
 #include <asm/page.h>
 #include <asm/mmu.h>
 #include <asm/lmb.h>
-#include <asm/iseries/it_lp_naca.h>
 #include <asm/firmware.h>
 #include <asm/xmon.h>
 #include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/iseries/hvlpconfig.c b/arch/powerpc/platforms/iseries/hvlpconfig.c
index cfcedaae6ea28..f0475f0b18533 100644
--- a/arch/powerpc/platforms/iseries/hvlpconfig.c
+++ b/arch/powerpc/platforms/iseries/hvlpconfig.c
@@ -18,7 +18,7 @@
 
 #include <linux/module.h>
 #include <asm/iseries/hv_lp_config.h>
-#include <asm/iseries/it_lp_naca.h>
+#include "it_lp_naca.h"
 
 HvLpIndex HvLpConfig_getLpIndex_outline(void)
 {
diff --git a/include/asm-powerpc/iseries/it_lp_naca.h b/arch/powerpc/platforms/iseries/it_lp_naca.h
similarity index 96%
rename from include/asm-powerpc/iseries/it_lp_naca.h
rename to arch/powerpc/platforms/iseries/it_lp_naca.h
index 4fdcf052927ff..9bbf589868191 100644
--- a/include/asm-powerpc/iseries/it_lp_naca.h
+++ b/arch/powerpc/platforms/iseries/it_lp_naca.h
@@ -15,8 +15,8 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
-#ifndef _ASM_POWERPC_ISERIES_IT_LP_NACA_H
-#define _ASM_POWERPC_ISERIES_IT_LP_NACA_H
+#ifndef _PLATFORMS_ISERIES_IT_LP_NACA_H
+#define _PLATFORMS_ISERIES_IT_LP_NACA_H
 
 #include <linux/types.h>
 
@@ -77,4 +77,4 @@ extern struct ItLpNaca		itLpNaca;
 #define ITLPNACA_HWSYNCEDTBS	0x20	/* Hardware synced TBs */
 #define ITLPNACA_HMTINT		0x10	/* Utilize MHT for interrupts */
 
-#endif /* _ASM_POWERPC_ISERIES_IT_LP_NACA_H */
+#endif /* _PLATFORMS_ISERIES_IT_LP_NACA_H */
diff --git a/arch/powerpc/platforms/iseries/lpardata.c b/arch/powerpc/platforms/iseries/lpardata.c
index 13e9bd1322541..8162049bb04dc 100644
--- a/arch/powerpc/platforms/iseries/lpardata.c
+++ b/arch/powerpc/platforms/iseries/lpardata.c
@@ -13,7 +13,6 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/abs_addr.h>
-#include <asm/iseries/it_lp_naca.h>
 #include <asm/lppaca.h>
 #include <asm/iseries/it_lp_reg_save.h>
 #include <asm/paca.h>
@@ -27,6 +26,7 @@
 #include "processor_vpd.h"
 #include "release_data.h"
 #include "it_exp_vpd_panel.h"
+#include "it_lp_naca.h"
 
 /* The HvReleaseData is the root of the information shared between
  * the hypervisor and Linux.
@@ -127,7 +127,6 @@ struct ItLpNaca itLpNaca = {
 		(u64)instruction_access_slb_iSeries /* 0x480 I-SLB */
 	}
 };
-EXPORT_SYMBOL(itLpNaca);
 
 /* May be filled in by the hypervisor so cannot end up in the BSS */
 struct ItIplParmsReal xItIplParmsReal __attribute__((__section__(".data")));
diff --git a/arch/powerpc/platforms/iseries/lpevents.c b/arch/powerpc/platforms/iseries/lpevents.c
index 2a9f81ea27d6b..98c1c2440aad0 100644
--- a/arch/powerpc/platforms/iseries/lpevents.c
+++ b/arch/powerpc/platforms/iseries/lpevents.c
@@ -20,7 +20,7 @@
 #include <asm/iseries/it_lp_queue.h>
 #include <asm/iseries/hv_lp_event.h>
 #include <asm/iseries/hv_call_event.h>
-#include <asm/iseries/it_lp_naca.h>
+#include "it_lp_naca.h"
 
 /*
  * The LpQueue is used to pass event data from the hypervisor to
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index c34299b18d8b8..7f1953066ff8f 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -48,7 +48,6 @@
 #include <asm/iseries/hv_call_event.h>
 #include <asm/iseries/hv_call_xm.h>
 #include <asm/iseries/it_lp_queue.h>
-#include <asm/iseries/it_lp_naca.h>
 #include <asm/iseries/mf.h>
 #include <asm/iseries/hv_lp_event.h>
 #include <asm/iseries/lpar_map.h>
@@ -60,6 +59,7 @@
 #include "irq.h"
 #include "vpd_areas.h"
 #include "processor_vpd.h"
+#include "it_lp_naca.h"
 #include "main_store.h"
 #include "call_sm.h"
 #include "call_hpt.h"
-- 
GitLab


From f357b4cc5826ae55a5f3893424502cb15c6b6eba Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 13 Jul 2006 17:54:39 +1000
Subject: [PATCH 0026/1063] [POWERPC] iseries: Fix a compiler warning in
 platforms/iseries/vpdinfo.c

iSeries_Get_Location_Code() has error paths, but currently returns void, so
give it a return code and only print the output if it returns successfully.
Gcc isn't smart enough to be quiet though, so set frame to 0 to shut it up.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/platforms/iseries/vpdinfo.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/iseries/vpdinfo.c b/arch/powerpc/platforms/iseries/vpdinfo.c
index 23a6d1e5b4293..ba7f6a62c3bfd 100644
--- a/arch/powerpc/platforms/iseries/vpdinfo.c
+++ b/arch/powerpc/platforms/iseries/vpdinfo.c
@@ -205,15 +205,16 @@ static void __init iSeries_Parse_Vpd(u8 *VpdData, int VpdDataLen,
 	}
 }
 
-static void __init iSeries_Get_Location_Code(u16 bus, HvAgentId agent,
+static int __init iSeries_Get_Location_Code(u16 bus, HvAgentId agent,
 		u8 *frame, char card[4])
 {
+	int status = 0;
 	int BusVpdLen = 0;
 	u8 *BusVpdPtr = kmalloc(BUS_VPDSIZE, GFP_KERNEL);
 
 	if (BusVpdPtr == NULL) {
 		printk("PCI: Bus VPD Buffer allocation failure.\n");
-		return;
+		return 0;
 	}
 	BusVpdLen = HvCallPci_getBusVpd(bus, iseries_hv_addr(BusVpdPtr),
 					BUS_VPDSIZE);
@@ -228,8 +229,10 @@ static void __init iSeries_Get_Location_Code(u16 bus, HvAgentId agent,
 		goto out_free;
 	}
 	iSeries_Parse_Vpd(BusVpdPtr, BusVpdLen, agent, frame, card);
+	status = 1;
 out_free:
 	kfree(BusVpdPtr);
+	return status;
 }
 
 /*
@@ -246,7 +249,7 @@ void __init iSeries_Device_Information(struct pci_dev *PciDev, int count)
 	struct device_node *DevNode = PciDev->sysdata;
 	struct pci_dn *pdn;
 	u16 bus;
-	u8 frame;
+	u8 frame = 0;
 	char card[4];
 	HvSubBusNumber subbus;
 	HvAgentId agent;
@@ -262,10 +265,11 @@ void __init iSeries_Device_Information(struct pci_dev *PciDev, int count)
 	subbus = pdn->bussubno;
 	agent = ISERIES_PCI_AGENTID(ISERIES_GET_DEVICE_FROM_SUBBUS(subbus),
 			ISERIES_GET_FUNCTION_FROM_SUBBUS(subbus));
-	iSeries_Get_Location_Code(bus, agent, &frame, card);
 
-	printk("%d. PCI: Bus%3d, Device%3d, Vendor %04X Frame%3d, Card %4s  ",
-			count, bus, PCI_SLOT(PciDev->devfn), PciDev->vendor,
-			frame, card);
-	printk("0x%04X\n", (int)(PciDev->class >> 8));
+	if (iSeries_Get_Location_Code(bus, agent, &frame, card)) {
+		printk("%d. PCI: Bus%3d, Device%3d, Vendor %04X Frame%3d, "
+			"Card %4s  0x%04X\n", count, bus,
+			PCI_SLOT(PciDev->devfn), PciDev->vendor, frame,
+			card, (int)(PciDev->class >> 8));
+	}
 }
-- 
GitLab


From 463c61928c453c2998d39b683c86385ee877c289 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 13 Jul 2006 17:54:44 +1000
Subject: [PATCH 0027/1063] [POWERPC] iseries: Fix a compiler warning in
 platforms/iseries/vpdinfo.c

PhbId might be used unitialised, so set it to 0xff (nothing) always.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/platforms/iseries/vpdinfo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/iseries/vpdinfo.c b/arch/powerpc/platforms/iseries/vpdinfo.c
index ba7f6a62c3bfd..9f83878a0c2e4 100644
--- a/arch/powerpc/platforms/iseries/vpdinfo.c
+++ b/arch/powerpc/platforms/iseries/vpdinfo.c
@@ -188,7 +188,7 @@ static void __init iSeries_Parse_Vpd(u8 *VpdData, int VpdDataLen,
 {
 	u8 *TagPtr = VpdData;
 	int DataLen = VpdDataLen - 3;
-	u8 PhbId;
+	u8 PhbId = 0xff;
 
 	while ((*TagPtr != VpdEndOfAreaTag) && (DataLen > 0)) {
 		int AreaLen = *(TagPtr + 1) + (*(TagPtr + 2) * 256);
-- 
GitLab


From 2d69ff32ebf3dff9e9b48bbbbafe2b9b6f188d48 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 13 Jul 2006 17:54:44 +1000
Subject: [PATCH 0028/1063] [POWERPC] Fix a compiler warning in mm/tlb_64.c

The compiler doesn't understand that BUG() never returns, so complains that
psize isn't set. Just set it to the normal value, which seems to produce nice
code and keeps gcc happy.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/mm/tlb_64.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index f6eef78efd293..b58baa65c4a74 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -146,6 +146,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
 		psize = mmu_huge_psize;
 #else
 		BUG();
+		psize = pte_pagesize_index(pte); /* shutup gcc */
 #endif
 	} else
 		psize = pte_pagesize_index(pte);
-- 
GitLab


From 8bff05b052db7a4cfaaf0eee7f8145600548e9c9 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 13 Jul 2006 18:51:22 +1000
Subject: [PATCH 0029/1063] [POWERPC] iseries: A new iSeries console

This driver uses the hvc_console.c infrastructure that is used by the
pSeries virtual and RTAS consoles.  This will allow us to make viocons.c
obsolete and is another step along the way to a combined kernel (as
viocons could not coexist with CONFIG_VT).

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/platforms/iseries/Kconfig |   8 +-
 arch/powerpc/platforms/iseries/dt.c    |   3 +-
 drivers/char/Kconfig                   |   7 +
 drivers/char/Makefile                  |   1 +
 drivers/char/hvc_iseries.c             | 593 +++++++++++++++++++++++++
 drivers/char/viocons.c                 |  31 +-
 include/asm-powerpc/iseries/vio.h      |  28 ++
 7 files changed, 638 insertions(+), 33 deletions(-)
 create mode 100644 drivers/char/hvc_iseries.c

diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index 3d957a30c8c25..887b68804e6d6 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -3,13 +3,17 @@ menu "iSeries device drivers"
 	depends on PPC_ISERIES
 
 config VIOCONS
-	tristate "iSeries Virtual Console Support"
+	tristate "iSeries Virtual Console Support (Obsolete)"
+	help
+	  This is the old virtual console driver for legacy iSeries.
+	  You should use the iSeries Hypervisor Virtual Console
+	  support instead.
 
 config VIODASD
 	tristate "iSeries Virtual I/O disk support"
 	help
 	  If you are running on an iSeries system and you want to use
- 	  virtual disks created and managed by OS/400, say Y.
+	  virtual disks created and managed by OS/400, say Y.
 
 config VIOCD
 	tristate "iSeries Virtual I/O CD support"
diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index d194140c1ebf0..894b534669d00 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -298,7 +298,8 @@ static void __init dt_vdevices(struct iseries_flat_dt *dt)
 	dt_prop_u32(dt, "#address-cells", 1);
 	dt_prop_u32(dt, "#size-cells", 0);
 
-	dt_do_vdevice(dt, "vty", reg, -1, device_type_serial, NULL, 1);
+	dt_do_vdevice(dt, "vty", reg, -1, device_type_serial,
+			"IBM,iSeries-vty", 1);
 	reg++;
 
 	dt_do_vdevice(dt, "v-scsi", reg, -1, device_type_vscsi,
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 11de59ff4229d..a7ef542afbc20 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -610,6 +610,13 @@ config HVC_CONSOLE
 	  console. This driver allows each pSeries partition to have a console
 	  which is accessed via the HMC.
 
+config HVC_ISERIES
+	bool "iSeries Hypervisor Virtual Console support"
+	depends on PPC_ISERIES && !VIOCONS
+	select HVC_DRIVER
+	help
+	  iSeries machines support a hypervisor virtual console.
+
 config HVC_RTAS
 	bool "IBM RTAS Console support"
 	depends on PPC_RTAS
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 7a7ee57212797..8c6dfc6215201 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o
 obj-$(CONFIG_SX)		+= sx.o generic_serial.o
 obj-$(CONFIG_RIO)		+= rio/ generic_serial.o
 obj-$(CONFIG_HVC_CONSOLE)	+= hvc_vio.o hvsi.o
+obj-$(CONFIG_HVC_ISERIES)	+= hvc_iseries.o
 obj-$(CONFIG_HVC_RTAS)		+= hvc_rtas.o
 obj-$(CONFIG_HVC_DRIVER)	+= hvc_console.o
 obj-$(CONFIG_RAW_DRIVER)	+= raw.o
diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c
new file mode 100644
index 0000000000000..256afc8e58382
--- /dev/null
+++ b/drivers/char/hvc_iseries.c
@@ -0,0 +1,593 @@
+/*
+ * iSeries vio driver interface to hvc_console.c
+ *
+ * This code is based heavily on hvc_vio.c and viocons.c
+ *
+ * Copyright (C) 2006 Stephen Rothwell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#include <stdarg.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/console.h>
+
+#include <asm/hvconsole.h>
+#include <asm/vio.h>
+#include <asm/prom.h>
+#include <asm/iseries/vio.h>
+#include <asm/iseries/hv_call.h>
+#include <asm/iseries/hv_lp_config.h>
+#include <asm/iseries/hv_lp_event.h>
+
+#include "hvc_console.h"
+
+#define VTTY_PORTS 10
+
+static DEFINE_SPINLOCK(consolelock);
+static DEFINE_SPINLOCK(consoleloglock);
+
+static const char hvc_driver_name[] = "hvc_console";
+
+#define IN_BUF_SIZE	200
+
+/*
+ * Our port information.
+ */
+static struct port_info {
+	HvLpIndex lp;
+	u64 seq;	/* sequence number of last HV send */
+	u64 ack;	/* last ack from HV */
+	struct hvc_struct *hp;
+	int in_start;
+	int in_end;
+	unsigned char in_buf[IN_BUF_SIZE];
+} port_info[VTTY_PORTS] = {
+	[ 0 ... VTTY_PORTS - 1 ] = {
+		.lp = HvLpIndexInvalid
+	}
+};
+
+#define viochar_is_console(pi)	((pi) == &port_info[0])
+
+static struct vio_device_id hvc_driver_table[] __devinitdata = {
+	{"serial", "IBM,iSeries-vty"},
+	{ "", "" }
+};
+MODULE_DEVICE_TABLE(vio, hvc_driver_table);
+
+static void hvlog(char *fmt, ...)
+{
+	int i;
+	unsigned long flags;
+	va_list args;
+	static char buf[256];
+
+	spin_lock_irqsave(&consoleloglock, flags);
+	va_start(args, fmt);
+	i = vscnprintf(buf, sizeof(buf) - 1, fmt, args);
+	va_end(args);
+	buf[i++] = '\r';
+	HvCall_writeLogBuffer(buf, i);
+	spin_unlock_irqrestore(&consoleloglock, flags);
+}
+
+/*
+ * Initialize the common fields in a charLpEvent
+ */
+static void init_data_event(struct viocharlpevent *viochar, HvLpIndex lp)
+{
+	struct HvLpEvent *hev = &viochar->event;
+
+	memset(viochar, 0, sizeof(struct viocharlpevent));
+
+	hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DEFERRED_ACK |
+		HV_LP_EVENT_INT;
+	hev->xType = HvLpEvent_Type_VirtualIo;
+	hev->xSubtype = viomajorsubtype_chario | viochardata;
+	hev->xSourceLp = HvLpConfig_getLpIndex();
+	hev->xTargetLp = lp;
+	hev->xSizeMinus1 = sizeof(struct viocharlpevent);
+	hev->xSourceInstanceId = viopath_sourceinst(lp);
+	hev->xTargetInstanceId = viopath_targetinst(lp);
+}
+
+static int get_chars(uint32_t vtermno, char *buf, int count)
+{
+	struct port_info *pi;
+	int n = 0;
+	unsigned long flags;
+
+	if (vtermno >= VTTY_PORTS)
+		return -EINVAL;
+	if (count == 0)
+		return 0;
+
+	pi = &port_info[vtermno];
+	spin_lock_irqsave(&consolelock, flags);
+
+	if (pi->in_end == 0)
+		goto done;
+
+	n = pi->in_end - pi->in_start;
+	if (n > count)
+		n = count;
+	memcpy(buf, &pi->in_buf[pi->in_start], n);
+	pi->in_start += n;
+	if (pi->in_start == pi->in_end) {
+		pi->in_start = 0;
+		pi->in_end = 0;
+	}
+done:
+	spin_unlock_irqrestore(&consolelock, flags);
+	return n;
+}
+
+static int put_chars(uint32_t vtermno, const char *buf, int count)
+{
+	struct viocharlpevent *viochar;
+	struct port_info *pi;
+	HvLpEvent_Rc hvrc;
+	unsigned long flags;
+	int sent = 0;
+
+	if (vtermno >= VTTY_PORTS)
+		return -EINVAL;
+
+	pi = &port_info[vtermno];
+
+	spin_lock_irqsave(&consolelock, flags);
+
+	if (viochar_is_console(pi) && !viopath_isactive(pi->lp)) {
+		spin_lock_irqsave(&consoleloglock, flags);
+		HvCall_writeLogBuffer(buf, count);
+		spin_unlock_irqrestore(&consoleloglock, flags);
+		sent = count;
+		goto done;
+	}
+
+	viochar = vio_get_event_buffer(viomajorsubtype_chario);
+	if (viochar == NULL) {
+		hvlog("\n\rviocons: Can't get viochar buffer.");
+		goto done;
+	}
+
+	while ((count > 0) && ((pi->seq - pi->ack) < VIOCHAR_WINDOW)) {
+		int len;
+
+		len = (count > VIOCHAR_MAX_DATA) ? VIOCHAR_MAX_DATA : count;
+
+		if (viochar_is_console(pi)) {
+			spin_lock_irqsave(&consoleloglock, flags);
+			HvCall_writeLogBuffer(buf, len);
+			spin_unlock_irqrestore(&consoleloglock, flags);
+		}
+
+		init_data_event(viochar, pi->lp);
+
+		viochar->len = len;
+		viochar->event.xCorrelationToken = pi->seq++;
+		viochar->event.xSizeMinus1 =
+			offsetof(struct viocharlpevent, data) + len;
+
+		memcpy(viochar->data, buf, len);
+
+		hvrc = HvCallEvent_signalLpEvent(&viochar->event);
+		if (hvrc)
+			hvlog("\n\rerror sending event! return code %d\n\r",
+				(int)hvrc);
+		sent += len;
+		count -= len;
+		buf += len;
+	}
+
+	vio_free_event_buffer(viomajorsubtype_chario, viochar);
+done:
+	spin_unlock_irqrestore(&consolelock, flags);
+	return sent;
+}
+
+static struct hv_ops hvc_get_put_ops = {
+	.get_chars = get_chars,
+	.put_chars = put_chars,
+};
+
+static int __devinit hvc_vio_probe(struct vio_dev *vdev,
+			const struct vio_device_id *id)
+{
+	struct hvc_struct *hp;
+	struct port_info *pi;
+
+	/* probed with invalid parameters. */
+	if (!vdev || !id)
+		return -EPERM;
+
+	if (vdev->unit_address >= VTTY_PORTS)
+		return -ENODEV;
+
+	pi = &port_info[vdev->unit_address];
+
+	hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops);
+	if (IS_ERR(hp))
+		return PTR_ERR(hp);
+	pi->hp = hp;
+	dev_set_drvdata(&vdev->dev, pi);
+
+	return 0;
+}
+
+static int __devexit hvc_vio_remove(struct vio_dev *vdev)
+{
+	struct port_info *pi = dev_get_drvdata(&vdev->dev);
+	struct hvc_struct *hp = pi->hp;
+
+	return hvc_remove(hp);
+}
+
+static struct vio_driver hvc_vio_driver = {
+	.id_table	= hvc_driver_table,
+	.probe		= hvc_vio_probe,
+	.remove		= hvc_vio_remove,
+	.driver		= {
+		.name	= hvc_driver_name,
+		.owner	= THIS_MODULE,
+	}
+};
+
+static void hvc_open_event(struct HvLpEvent *event)
+{
+	unsigned long flags;
+	struct viocharlpevent *cevent = (struct viocharlpevent *)event;
+	u8 port = cevent->virtual_device;
+	struct port_info *pi;
+	int reject = 0;
+
+	if (hvlpevent_is_ack(event)) {
+		if (port >= VTTY_PORTS)
+			return;
+
+		spin_lock_irqsave(&consolelock, flags);
+
+		pi = &port_info[port];
+		if (event->xRc == HvLpEvent_Rc_Good) {
+			pi->seq = pi->ack = 0;
+			/*
+			 * This line allows connections from the primary
+			 * partition but once one is connected from the
+			 * primary partition nothing short of a reboot
+			 * of linux will allow access from the hosting
+			 * partition again without a required iSeries fix.
+			 */
+			pi->lp = event->xTargetLp;
+		}
+
+		spin_unlock_irqrestore(&consolelock, flags);
+		if (event->xRc != HvLpEvent_Rc_Good)
+			printk(KERN_WARNING
+			       "hvc: handle_open_event: event->xRc == (%d).\n",
+			       event->xRc);
+
+		if (event->xCorrelationToken != 0) {
+			atomic_t *aptr= (atomic_t *)event->xCorrelationToken;
+			atomic_set(aptr, 1);
+		} else
+			printk(KERN_WARNING
+			       "hvc: weird...got open ack without atomic\n");
+		return;
+	}
+
+	/* This had better require an ack, otherwise complain */
+	if (!hvlpevent_need_ack(event)) {
+		printk(KERN_WARNING "hvc: viocharopen without ack bit!\n");
+		return;
+	}
+
+	spin_lock_irqsave(&consolelock, flags);
+
+	/* Make sure this is a good virtual tty */
+	if (port >= VTTY_PORTS) {
+		event->xRc = HvLpEvent_Rc_SubtypeError;
+		cevent->subtype_result_code = viorc_openRejected;
+		/*
+		 * Flag state here since we can't printk while holding
+		 * the consolelock spinlock.
+		 */
+		reject = 1;
+	} else {
+		pi = &port_info[port];
+		if ((pi->lp != HvLpIndexInvalid) &&
+				(pi->lp != event->xSourceLp)) {
+			/*
+			 * If this is tty is already connected to a different
+			 * partition, fail.
+			 */
+			event->xRc = HvLpEvent_Rc_SubtypeError;
+			cevent->subtype_result_code = viorc_openRejected;
+			reject = 2;
+		} else {
+			pi->lp = event->xSourceLp;
+			event->xRc = HvLpEvent_Rc_Good;
+			cevent->subtype_result_code = viorc_good;
+			pi->seq = pi->ack = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&consolelock, flags);
+
+	if (reject == 1)
+		printk(KERN_WARNING "hvc: open rejected: bad virtual tty.\n");
+	else if (reject == 2)
+		printk(KERN_WARNING "hvc: open rejected: console in exclusive "
+				"use by another partition.\n");
+
+	/* Return the acknowledgement */
+	HvCallEvent_ackLpEvent(event);
+}
+
+/*
+ * Handle a close charLpEvent.  This should ONLY be an Interrupt because the
+ * virtual console should never actually issue a close event to the hypervisor
+ * because the virtual console never goes away.  A close event coming from the
+ * hypervisor simply means that there are no client consoles connected to the
+ * virtual console.
+ */
+static void hvc_close_event(struct HvLpEvent *event)
+{
+	unsigned long flags;
+	struct viocharlpevent *cevent = (struct viocharlpevent *)event;
+	u8 port = cevent->virtual_device;
+
+	if (!hvlpevent_is_int(event)) {
+		printk(KERN_WARNING
+			"hvc: got unexpected close acknowlegement\n");
+		return;
+	}
+
+	if (port >= VTTY_PORTS) {
+		printk(KERN_WARNING
+			"hvc: close message from invalid virtual device.\n");
+		return;
+	}
+
+	/* For closes, just mark the console partition invalid */
+	spin_lock_irqsave(&consolelock, flags);
+
+	if (port_info[port].lp == event->xSourceLp)
+		port_info[port].lp = HvLpIndexInvalid;
+
+	spin_unlock_irqrestore(&consolelock, flags);
+}
+
+static void hvc_data_event(struct HvLpEvent *event)
+{
+	unsigned long flags;
+	struct viocharlpevent *cevent = (struct viocharlpevent *)event;
+	struct port_info *pi;
+	int n;
+	u8 port = cevent->virtual_device;
+
+	if (port >= VTTY_PORTS) {
+		printk(KERN_WARNING "hvc: data on invalid virtual device %d\n",
+				port);
+		return;
+	}
+	if (cevent->len == 0)
+		return;
+
+	/*
+	 * Change 05/01/2003 - Ryan Arnold: If a partition other than
+	 * the current exclusive partition tries to send us data
+	 * events then just drop them on the floor because we don't
+	 * want his stinking data.  He isn't authorized to receive
+	 * data because he wasn't the first one to get the console,
+	 * therefore he shouldn't be allowed to send data either.
+	 * This will work without an iSeries fix.
+	 */
+	pi = &port_info[port];
+	if (pi->lp != event->xSourceLp)
+		return;
+
+	spin_lock_irqsave(&consolelock, flags);
+
+	n = IN_BUF_SIZE - pi->in_end;
+	if (n > cevent->len)
+		n = cevent->len;
+	if (n > 0) {
+		memcpy(&pi->in_buf[pi->in_end], cevent->data, n);
+		pi->in_end += n;
+	}
+	spin_unlock_irqrestore(&consolelock, flags);
+	if (n == 0)
+		printk(KERN_WARNING "hvc: input buffer overflow\n");
+}
+
+static void hvc_ack_event(struct HvLpEvent *event)
+{
+	struct viocharlpevent *cevent = (struct viocharlpevent *)event;
+	unsigned long flags;
+	u8 port = cevent->virtual_device;
+
+	if (port >= VTTY_PORTS) {
+		printk(KERN_WARNING "hvc: data on invalid virtual device\n");
+		return;
+	}
+
+	spin_lock_irqsave(&consolelock, flags);
+	port_info[port].ack = event->xCorrelationToken;
+	spin_unlock_irqrestore(&consolelock, flags);
+}
+
+static void hvc_config_event(struct HvLpEvent *event)
+{
+	struct viocharlpevent *cevent = (struct viocharlpevent *)event;
+
+	if (cevent->data[0] == 0x01)
+		printk(KERN_INFO "hvc: window resized to %d: %d: %d: %d\n",
+		       cevent->data[1], cevent->data[2],
+		       cevent->data[3], cevent->data[4]);
+	else
+		printk(KERN_WARNING "hvc: unknown config event\n");
+}
+
+static void hvc_handle_event(struct HvLpEvent *event)
+{
+	int charminor;
+
+	if (event == NULL)
+		return;
+
+	charminor = event->xSubtype & VIOMINOR_SUBTYPE_MASK;
+	switch (charminor) {
+	case viocharopen:
+		hvc_open_event(event);
+		break;
+	case viocharclose:
+		hvc_close_event(event);
+		break;
+	case viochardata:
+		hvc_data_event(event);
+		break;
+	case viocharack:
+		hvc_ack_event(event);
+		break;
+	case viocharconfig:
+		hvc_config_event(event);
+		break;
+	default:
+		if (hvlpevent_is_int(event) && hvlpevent_need_ack(event)) {
+			event->xRc = HvLpEvent_Rc_InvalidSubtype;
+			HvCallEvent_ackLpEvent(event);
+		}
+	}
+}
+
+static int send_open(HvLpIndex remoteLp, void *sem)
+{
+	return HvCallEvent_signalLpEventFast(remoteLp,
+			HvLpEvent_Type_VirtualIo,
+			viomajorsubtype_chario | viocharopen,
+			HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
+			viopath_sourceinst(remoteLp),
+			viopath_targetinst(remoteLp),
+			(u64)(unsigned long)sem, VIOVERSION << 16,
+			0, 0, 0, 0);
+}
+
+static int hvc_vio_init(void)
+{
+	atomic_t wait_flag;
+	int rc;
+
+	/* +2 for fudge */
+	rc = viopath_open(HvLpConfig_getPrimaryLpIndex(),
+			viomajorsubtype_chario, VIOCHAR_WINDOW + 2);
+	if (rc)
+		printk(KERN_WARNING "hvc: error opening to primary %d\n", rc);
+
+	if (viopath_hostLp == HvLpIndexInvalid)
+		vio_set_hostlp();
+
+	/*
+	 * And if the primary is not the same as the hosting LP, open to the
+	 * hosting lp
+	 */
+	if ((viopath_hostLp != HvLpIndexInvalid) &&
+	    (viopath_hostLp != HvLpConfig_getPrimaryLpIndex())) {
+		printk(KERN_INFO "hvc: open path to hosting (%d)\n",
+				viopath_hostLp);
+		rc = viopath_open(viopath_hostLp, viomajorsubtype_chario,
+				VIOCHAR_WINDOW + 2);	/* +2 for fudge */
+		if (rc)
+			printk(KERN_WARNING
+				"error opening to partition %d: %d\n",
+				viopath_hostLp, rc);
+	}
+
+	if (vio_setHandler(viomajorsubtype_chario, hvc_handle_event) < 0)
+		printk(KERN_WARNING
+			"hvc: error seting handler for console events!\n");
+
+	/*
+	 * First, try to open the console to the hosting lp.
+	 * Wait on a semaphore for the response.
+	 */
+	atomic_set(&wait_flag, 0);
+	if ((viopath_isactive(viopath_hostLp)) &&
+	    (send_open(viopath_hostLp, &wait_flag) == 0)) {
+		printk(KERN_INFO "hvc: hosting partition %d\n", viopath_hostLp);
+		while (atomic_read(&wait_flag) == 0)
+			mb();
+		atomic_set(&wait_flag, 0);
+	}
+
+	/*
+	 * If we don't have an active console, try the primary
+	 */
+	if ((!viopath_isactive(port_info[0].lp)) &&
+	    (viopath_isactive(HvLpConfig_getPrimaryLpIndex())) &&
+	    (send_open(HvLpConfig_getPrimaryLpIndex(), &wait_flag) == 0)) {
+		printk(KERN_INFO "hvc: opening console to primary partition\n");
+		while (atomic_read(&wait_flag) == 0)
+			mb();
+	}
+
+	/* Register as a vio device to receive callbacks */
+	rc = vio_register_driver(&hvc_vio_driver);
+
+	return rc;
+}
+module_init(hvc_vio_init); /* after drivers/char/hvc_console.c */
+
+static void hvc_vio_exit(void)
+{
+	vio_unregister_driver(&hvc_vio_driver);
+}
+module_exit(hvc_vio_exit);
+
+/* the device tree order defines our numbering */
+static int hvc_find_vtys(void)
+{
+	struct device_node *vty;
+	int num_found = 0;
+
+	for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL;
+			vty = of_find_node_by_name(vty, "vty")) {
+		uint32_t *vtermno;
+
+		/* We have statically defined space for only a certain number
+		 * of console adapters.
+		 */
+		if ((num_found >= MAX_NR_HVC_CONSOLES) ||
+				(num_found >= VTTY_PORTS))
+			break;
+
+		vtermno = (uint32_t *)get_property(vty, "reg", NULL);
+		if (!vtermno)
+			continue;
+
+		if (!device_is_compatible(vty, "IBM,iSeries-vty"))
+			continue;
+
+		if (num_found == 0)
+			add_preferred_console("hvc", 0, NULL);
+		hvc_instantiate(*vtermno, num_found, &hvc_get_put_ops);
+		++num_found;
+	}
+
+	return num_found;
+}
+console_initcall(hvc_find_vtys);
diff --git a/drivers/char/viocons.c b/drivers/char/viocons.c
index 766f7864c6c6b..f3efeaf2826e9 100644
--- a/drivers/char/viocons.c
+++ b/drivers/char/viocons.c
@@ -43,7 +43,6 @@
 #include <linux/sysrq.h>
 
 #include <asm/iseries/vio.h>
-
 #include <asm/iseries/hv_lp_event.h>
 #include <asm/iseries/hv_call_event.h>
 #include <asm/iseries/hv_lp_config.h>
@@ -67,35 +66,6 @@ static int vio_sysrq_pressed;
 extern int sysrq_enabled;
 #endif
 
-/*
- * The structure of the events that flow between us and OS/400.  You can't
- * mess with this unless the OS/400 side changes too
- */
-struct viocharlpevent {
-	struct HvLpEvent event;
-	u32 reserved;
-	u16 version;
-	u16 subtype_result_code;
-	u8 virtual_device;
-	u8 len;
-	u8 data[VIOCHAR_MAX_DATA];
-};
-
-#define VIOCHAR_WINDOW		10
-#define VIOCHAR_HIGHWATERMARK	3
-
-enum viocharsubtype {
-	viocharopen = 0x0001,
-	viocharclose = 0x0002,
-	viochardata = 0x0003,
-	viocharack = 0x0004,
-	viocharconfig = 0x0005
-};
-
-enum viochar_rc {
-	viochar_rc_ebusy = 1
-};
-
 #define VIOCHAR_NUM_BUF		16
 
 /*
@@ -1183,6 +1153,7 @@ static int __init viocons_init(void)
 		port_info[i].magic = VIOTTY_MAGIC;
 	}
 	HvCall_setLogBufferFormatAndCodepage(HvCall_LogBuffer_ASCII, 437);
+	add_preferred_console("viocons", 0, NULL);
 	register_console(&viocons_early);
 	return 0;
 }
diff --git a/include/asm-powerpc/iseries/vio.h b/include/asm-powerpc/iseries/vio.h
index 72a97d37aac35..7a95d296abd12 100644
--- a/include/asm-powerpc/iseries/vio.h
+++ b/include/asm-powerpc/iseries/vio.h
@@ -122,6 +122,34 @@ enum viorc {
 	viorc_openRejected = 0x0301
 };
 
+/*
+ * The structure of the events that flow between us and OS/400 for chario
+ * events.  You can't mess with this unless the OS/400 side changes too.
+ */
+struct viocharlpevent {
+	struct HvLpEvent event;
+	u32 reserved;
+	u16 version;
+	u16 subtype_result_code;
+	u8 virtual_device;
+	u8 len;
+	u8 data[VIOCHAR_MAX_DATA];
+};
+
+#define VIOCHAR_WINDOW		10
+
+enum viocharsubtype {
+	viocharopen = 0x0001,
+	viocharclose = 0x0002,
+	viochardata = 0x0003,
+	viocharack = 0x0004,
+	viocharconfig = 0x0005
+};
+
+enum viochar_rc {
+	viochar_rc_ebusy = 1
+};
+
 struct device;
 
 extern struct device *iSeries_vio_dev;
-- 
GitLab


From 4e9e95a3554e98e7383a3591283ffcd850c9ef48 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 13 Jul 2006 18:53:32 +1000
Subject: [PATCH 0030/1063] [POWERPC] Make the hvc_console output buffer size
 settable

So the iSeries console will be faster since it can send up to 200 bytes at
a time to the Hypervisor.  This only affects the tty part of the console,
the console writes are still in 16 byte lots.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 drivers/char/hvc_console.c | 14 +++++++++-----
 drivers/char/hvc_console.h |  2 +-
 drivers/char/hvc_iseries.c |  3 ++-
 drivers/char/hvc_rtas.c    |  2 +-
 drivers/char/hvc_vio.c     |  3 ++-
 5 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index ca2f538e549e3..dbee8bed05307 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -80,7 +80,8 @@ struct hvc_struct {
 	struct tty_struct *tty;
 	unsigned int count;
 	int do_wakeup;
-	char outbuf[N_OUTBUF] __ALIGNED__;
+	char *outbuf;
+	int outbuf_size;
 	int n_outbuf;
 	uint32_t vtermno;
 	struct hv_ops *ops;
@@ -505,7 +506,7 @@ static int hvc_write(struct tty_struct *tty, const unsigned char *buf, int count
 	if (hp->n_outbuf > 0)
 		hvc_push(hp);
 
-	while (count > 0 && (rsize = N_OUTBUF - hp->n_outbuf) > 0) {
+	while (count > 0 && (rsize = hp->outbuf_size - hp->n_outbuf) > 0) {
 		if (rsize > count)
 			rsize = count;
 		memcpy(hp->outbuf + hp->n_outbuf, buf, rsize);
@@ -538,7 +539,7 @@ static int hvc_write_room(struct tty_struct *tty)
 	if (!hp)
 		return -1;
 
-	return N_OUTBUF - hp->n_outbuf;
+	return hp->outbuf_size - hp->n_outbuf;
 }
 
 static int hvc_chars_in_buffer(struct tty_struct *tty)
@@ -728,12 +729,13 @@ static struct kobj_type hvc_kobj_type = {
 };
 
 struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
-					struct hv_ops *ops)
+					struct hv_ops *ops, int outbuf_size)
 {
 	struct hvc_struct *hp;
 	int i;
 
-	hp = kmalloc(sizeof(*hp), GFP_KERNEL);
+	hp = kmalloc(ALIGN(sizeof(*hp), sizeof(long)) + outbuf_size,
+			GFP_KERNEL);
 	if (!hp)
 		return ERR_PTR(-ENOMEM);
 
@@ -742,6 +744,8 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
 	hp->vtermno = vtermno;
 	hp->irq = irq;
 	hp->ops = ops;
+	hp->outbuf_size = outbuf_size;
+	hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))];
 
 	kobject_init(&hp->kobj);
 	hp->kobj.ktype = &hvc_kobj_type;
diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h
index 96b7401319c19..8c59818050e66 100644
--- a/drivers/char/hvc_console.h
+++ b/drivers/char/hvc_console.h
@@ -56,7 +56,7 @@ extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops);
 
 /* register a vterm for hvc tty operation (module_init or hotplug add) */
 extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq,
-						 struct hv_ops *ops);
+				struct hv_ops *ops, int outbuf_size);
 /* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */
 extern int __devexit hvc_remove(struct hvc_struct *hp);
 
diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c
index 256afc8e58382..4747729459c7a 100644
--- a/drivers/char/hvc_iseries.c
+++ b/drivers/char/hvc_iseries.c
@@ -221,7 +221,8 @@ static int __devinit hvc_vio_probe(struct vio_dev *vdev,
 
 	pi = &port_info[vdev->unit_address];
 
-	hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops);
+	hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops,
+			VIOCHAR_MAX_DATA);
 	if (IS_ERR(hp))
 		return PTR_ERR(hp);
 	pi->hp = hp;
diff --git a/drivers/char/hvc_rtas.c b/drivers/char/hvc_rtas.c
index 57106e02fd2e4..4b97eaf18602c 100644
--- a/drivers/char/hvc_rtas.c
+++ b/drivers/char/hvc_rtas.c
@@ -94,7 +94,7 @@ static int hvc_rtas_init(void)
 
 	/* Allocate an hvc_struct for the console device we instantiated
 	 * earlier.  Save off hp so that we can return it on exit */
-	hp = hvc_alloc(hvc_rtas_cookie, NO_IRQ, &hvc_rtas_get_put_ops);
+	hp = hvc_alloc(hvc_rtas_cookie, NO_IRQ, &hvc_rtas_get_put_ops, 16);
 	if (IS_ERR(hp))
 		return PTR_ERR(hp);
 
diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c
index 9add81ceb4405..651e5d25f58b6 100644
--- a/drivers/char/hvc_vio.c
+++ b/drivers/char/hvc_vio.c
@@ -90,7 +90,8 @@ static int __devinit hvc_vio_probe(struct vio_dev *vdev,
 	if (!vdev || !id)
 		return -EPERM;
 
-	hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops);
+	hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops,
+			MAX_VIO_PUT_CHARS);
 	if (IS_ERR(hp))
 		return PTR_ERR(hp);
 	dev_set_drvdata(&vdev->dev, hp);
-- 
GitLab


From 380ed24b1b81a188c5b716286143157a27935aab Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 13 Jul 2006 18:56:00 +1000
Subject: [PATCH 0031/1063] [POWERPC] iseries: Small viotape cleanup allowed by
 devfs removal

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 drivers/char/viotape.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c
index b72b2049aaae1..73c78bf75d7f4 100644
--- a/drivers/char/viotape.c
+++ b/drivers/char/viotape.c
@@ -940,7 +940,6 @@ static void vioHandleTapeEvent(struct HvLpEvent *event)
 
 static int viotape_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 {
-	char tapename[32];
 	int i = vdev->unit_address;
 	int j;
 
@@ -956,10 +955,9 @@ static int viotape_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 			"iseries!vt%d", i);
 	class_device_create(tape_class, NULL, MKDEV(VIOTAPE_MAJOR, i | 0x80),
 			NULL, "iseries!nvt%d", i);
-	sprintf(tapename, "iseries/vt%d", i);
-	printk(VIOTAPE_KERN_INFO "tape %s is iSeries "
+	printk(VIOTAPE_KERN_INFO "tape iseries/vt%d is iSeries "
 			"resource %10.10s type %4.4s, model %3.3s\n",
-			tapename, viotape_unitinfo[i].rsrcname,
+			i, viotape_unitinfo[i].rsrcname,
 			viotape_unitinfo[i].type, viotape_unitinfo[i].model);
 	return 0;
 }
-- 
GitLab


From 54f5cd8afa1c9c9f8b152a946b0a7e0ecdef1631 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 13 Jul 2006 18:56:56 +1000
Subject: [PATCH 0032/1063] [POWERPC] iseries: Remove unnecessary include of
 iseries/hv_lp_event.h

Also remove unnecessary reference to struct HvLpEvent.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/kernel/asm-offsets.c         | 1 -
 include/asm-powerpc/iseries/it_lp_queue.h | 2 --
 2 files changed, 3 deletions(-)

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 7ee84968087b3..ac0631958b20d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -40,7 +40,6 @@
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #include <asm/lppaca.h>
-#include <asm/iseries/hv_lp_event.h>
 #include <asm/cache.h>
 #include <asm/compat.h>
 #endif
diff --git a/include/asm-powerpc/iseries/it_lp_queue.h b/include/asm-powerpc/iseries/it_lp_queue.h
index 284c5a7db3ac4..3f68147692959 100644
--- a/include/asm-powerpc/iseries/it_lp_queue.h
+++ b/include/asm-powerpc/iseries/it_lp_queue.h
@@ -27,8 +27,6 @@
 #include <asm/types.h>
 #include <asm/ptrace.h>
 
-struct HvLpEvent;
-
 #define IT_LP_MAX_QUEUES	8
 
 #define IT_LP_NOT_USED		0	/* Queue will not be used by PLIC */
-- 
GitLab


From ca652c9396fa052815518e2b2ce2ebee6d9fb861 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 14 Jul 2006 14:25:33 +1000
Subject: [PATCH 0033/1063] [POWERPC] iseries: Move iommu_table_cb into
 platforms/iseries

Although we pass the address of an iommu_table_cb to HvCallXm_getTceTableParms,
we don't actually need the structure definition anywhere except in the
iseries iommu code, so move the struct in there.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/platforms/iseries/iommu.c   | 17 +++++++++++++++++
 include/asm-powerpc/iseries/hv_call_xm.h | 17 -----------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index e3bd2015f2c92..2c3dbcd4613cb 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -87,6 +87,23 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
 	}
 }
 
+/*
+ * Structure passed to HvCallXm_getTceTableParms
+ */
+struct iommu_table_cb {
+	unsigned long	itc_busno;	/* Bus number for this tce table */
+	unsigned long	itc_start;	/* Will be NULL for secondary */
+	unsigned long	itc_totalsize;	/* Size (in pages) of whole table */
+	unsigned long	itc_offset;	/* Index into real tce table of the
+					   start of our section */
+	unsigned long	itc_size;	/* Size (in pages) of our section */
+	unsigned long	itc_index;	/* Index of this tce table */
+	unsigned short	itc_maxtables;	/* Max num of tables for partition */
+	unsigned char	itc_virtbus;	/* Flag to indicate virtual bus */
+	unsigned char	itc_slotno;	/* IOA Tce Slot Index */
+	unsigned char	itc_rsvd[4];
+};
+
 /*
  * Call Hv with the architected data structure to get TCE table info.
  * info. Put the returned data into the Linux representation of the
diff --git a/include/asm-powerpc/iseries/hv_call_xm.h b/include/asm-powerpc/iseries/hv_call_xm.h
index ca9202cb01ed0..392ac3f54df08 100644
--- a/include/asm-powerpc/iseries/hv_call_xm.h
+++ b/include/asm-powerpc/iseries/hv_call_xm.h
@@ -16,23 +16,6 @@
 #define HvCallXmSetTce			HvCallXm + 11
 #define HvCallXmSetTces			HvCallXm + 13
 
-/*
- * Structure passed to HvCallXm_getTceTableParms
- */
-struct iommu_table_cb {
-	unsigned long	itc_busno;	/* Bus number for this tce table */
-	unsigned long	itc_start;	/* Will be NULL for secondary */
-	unsigned long	itc_totalsize;	/* Size (in pages) of whole table */
-	unsigned long	itc_offset;	/* Index into real tce table of the
-					   start of our section */
-	unsigned long	itc_size;	/* Size (in pages) of our section */
-	unsigned long	itc_index;	/* Index of this tce table */
-	unsigned short	itc_maxtables;	/* Max num of tables for partition */
-	unsigned char	itc_virtbus;	/* Flag to indicate virtual bus */
-	unsigned char	itc_slotno;	/* IOA Tce Slot Index */
-	unsigned char	itc_rsvd[4];
-};
-
 static inline void HvCallXm_getTceTableParms(u64 cb)
 {
 	HvCall1(HvCallXmGetTceTableParms, cb);
-- 
GitLab


From 6a5a297cf78e64ed68577f3e3480bc10abf0124b Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Sat, 15 Jul 2006 13:05:24 +0100
Subject: [PATCH 0034/1063] MTD: [NAND] Fix the sharpsl driver after breakage
 from a core conversion

The CNE bits are inverted on the device and writeb function is missing a
NOT operation.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/nand/sharpsl.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c
index fbeedc3184e91..51c7288ab49a0 100644
--- a/drivers/mtd/nand/sharpsl.c
+++ b/drivers/mtd/nand/sharpsl.c
@@ -78,7 +78,7 @@ static struct mtd_partition sharpsl_nand_default_partition_info[] = {
 /*
  *	hardware specific access to control-lines
  *	ctrl:
- *	NAND_CNE: bit 0 -> bit 0 & 4
+ *	NAND_CNE: bit 0 -> ! bit 0 & 4
  *	NAND_CLE: bit 1 -> bit 1
  *	NAND_ALE: bit 2 -> bit 2
  *
@@ -92,7 +92,10 @@ static void sharpsl_nand_hwcontrol(struct mtd_info *mtd, int cmd,
 		unsigned char bits = ctrl & 0x07;
 
 		bits |= (ctrl & 0x01) << 4;
-		writeb((readb(FLASHCTL) & 0x17) | bits, FLASHCTL);
+
+		bits ^= 0x11;
+
+		writeb((readb(FLASHCTL) & ~0x17) | bits, FLASHCTL);
 	}
 
 	if (cmd != NAND_CMD_NONE)
-- 
GitLab


From 9d05cd51780c3855976b26cbee265490a0a10be9 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Fri, 14 Jul 2006 14:39:06 +0200
Subject: [PATCH 0035/1063] remove #error on !PCI from pmc551.c

PMC551 depends on PCI in Kconfig so there is no need to #error in code if PCI
is not set.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/devices/pmc551.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index 6f9bbf6fee4d0..2c01497087398 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -99,10 +99,6 @@
 #include <asm/system.h>
 #include <linux/pci.h>
 
-#ifndef CONFIG_PCI
-#error Enable PCI in your kernel config
-#endif
-
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/pmc551.h>
 #include <linux/mtd/compatmac.h>
-- 
GitLab


From 9a909867d2eca7727d0d5884df96e791e3531f24 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sat, 15 Jul 2006 13:26:18 +0100
Subject: [PATCH 0036/1063] [MTD NAND] Fix lookup error in
 nand_get_flash_type()

Spotted by liyu <liyu@ccoss.com.cn>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/nand/nand_base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index cffd66309ffa0..119d17cdb780c 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2222,7 +2222,7 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 	}
 
 	/* Try to identify manufacturer */
-	for (maf_idx = 0; nand_manuf_ids[maf_idx].id != 0x0; maf_id++) {
+	for (maf_idx = 0; nand_manuf_ids[maf_idx].id != 0x0; maf_idx++) {
 		if (nand_manuf_ids[maf_idx].id == *maf_id)
 			break;
 	}
-- 
GitLab


From c4e7fb313771ac03dfdca26d30e8b721731c562b Mon Sep 17 00:00:00 2001
From: Ville Herva <vherva@vianova.fi>
Date: Fri, 14 Jul 2006 00:31:16 +0300
Subject: [PATCH 0037/1063] block2mtd.c: Make kernel boot command line
 arguments work (try 4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trying to pass kernel command line arguments to block2mtd at boot-time does
not work currently. block2mtd_setup() is called so early that kmalloc()
fails nevermind being able to do open_bdev_excl() (which requires rootfs to
be mounted. This patch only saves the option string at the early boot stage,
and parses them later when block2mtd_init() is called. If open_bdev_excl()
fails, open_by_devnum(name_to_dev_t()) is tried instead, which makes it
possible to initialize the driver before rootfs has been mounted. Also gets
rid of the superfluous parse_name() that only checks if name is longer than
80 chars and copies it to a string that is not kfreed.

With this patch, I can boot statically compiled block2mtd, and mount jffs2
as rootfs (without modules or initrd), with lilo config like this:

   root=/dev/mtdblock0
   append="rootfstype=jffs2 block2mtd.block2mtd=/dev/hdc2,65536"

(Note that rootfstype=jffs2 is required, since the kernel only tries
filesystems without "nodev" attribute by default, and jffs is "nodev").

Compared to first version of this patch, this one does not copy the
parameters to the global buffer if init has already been called, and the
global array is marked as __initdata.

Compared to the second version of this patch, module build is fixed.

Compared to the third version of this patch, statically compiled block2mtd
driver with no boot-time parameter no longer gives spurious error 'cannot
open device ""'

Signed-off-by: Ville Herva <vherva@vianova.fi>
Acked-by: Jörn Engel <joern@wohnheim.fh-wedel.de>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/devices/block2mtd.c | 93 ++++++++++++++++++++++-----------
 1 file changed, 63 insertions(+), 30 deletions(-)

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index ede3561be8709..401c6a294baae 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -18,6 +18,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/buffer_head.h>
 #include <linux/mutex.h>
+#include <linux/mount.h>
 
 #define VERSION "$Revision: 1.30 $"
 
@@ -236,6 +237,8 @@ static int _block2mtd_write(struct block2mtd_dev *dev, const u_char *buf,
 	}
 	return 0;
 }
+
+
 static int block2mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 		size_t *retlen, const u_char *buf)
 {
@@ -299,6 +302,19 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
 
 	/* Get a handle on the device */
 	bdev = open_bdev_excl(devname, O_RDWR, NULL);
+#ifndef MODULE
+	if (IS_ERR(bdev)) {
+
+		/* We might not have rootfs mounted at this point. Try
+		   to resolve the device name by other means. */
+
+		dev_t dev = name_to_dev_t(devname);
+		if (dev != 0) {
+			bdev = open_by_devnum(dev, FMODE_WRITE | FMODE_READ);
+		}
+	}
+#endif
+
 	if (IS_ERR(bdev)) {
 		ERROR("error: cannot open device %s", devname);
 		goto devinit_err;
@@ -393,26 +409,6 @@ static int parse_num(size_t *num, const char *token)
 }
 
 
-static int parse_name(char **pname, const char *token, size_t limit)
-{
-	size_t len;
-	char *name;
-
-	len = strlen(token) + 1;
-	if (len > limit)
-		return -ENOSPC;
-
-	name = kmalloc(len, GFP_KERNEL);
-	if (!name)
-		return -ENOMEM;
-
-	strcpy(name, token);
-
-	*pname = name;
-	return 0;
-}
-
-
 static inline void kill_final_newline(char *str)
 {
 	char *newline = strrchr(str, '\n');
@@ -426,9 +422,15 @@ static inline void kill_final_newline(char *str)
 	return 0;				\
 } while (0)
 
-static int block2mtd_setup(const char *val, struct kernel_param *kp)
+#ifndef MODULE
+static int block2mtd_init_called = 0;
+static __initdata char block2mtd_paramline[80 + 12]; /* 80 for device, 12 for erase size */
+#endif
+
+
+static int block2mtd_setup2(const char *val)
 {
-	char buf[80+12]; /* 80 for device, 12 for erase size */
+	char buf[80 + 12]; /* 80 for device, 12 for erase size */
 	char *str = buf;
 	char *token[2];
 	char *name;
@@ -450,13 +452,9 @@ static int block2mtd_setup(const char *val, struct kernel_param *kp)
 	if (!token[0])
 		parse_err("no argument");
 
-	ret = parse_name(&name, token[0], 80);
-	if (ret == -ENOMEM)
-		parse_err("out of memory");
-	if (ret == -ENOSPC)
-		parse_err("name too long");
-	if (ret)
-		return 0;
+	name = token[0];
+	if (strlen(name) + 1 > 80)
+		parse_err("device name too long");
 
 	if (token[1]) {
 		ret = parse_num(&erase_size, token[1]);
@@ -472,13 +470,48 @@ static int block2mtd_setup(const char *val, struct kernel_param *kp)
 }
 
 
+static int block2mtd_setup(const char *val, struct kernel_param *kp)
+{
+#ifdef MODULE
+	return block2mtd_setup2(val);
+#else
+	/* If more parameters are later passed in via
+	   /sys/module/block2mtd/parameters/block2mtd
+	   and block2mtd_init() has already been called,
+	   we can parse the argument now. */
+
+	if (block2mtd_init_called)
+		return block2mtd_setup2(val);
+
+	/* During early boot stage, we only save the parameters
+	   here. We must parse them later: if the param passed
+	   from kernel boot command line, block2mtd_setup() is
+	   called so early that it is not possible to resolve
+	   the device (even kmalloc() fails). Deter that work to
+	   block2mtd_setup2(). */
+
+	strlcpy(block2mtd_paramline, val, sizeof(block2mtd_paramline));
+
+	return 0;
+#endif
+}
+
+
 module_param_call(block2mtd, block2mtd_setup, NULL, NULL, 0200);
 MODULE_PARM_DESC(block2mtd, "Device to use. \"block2mtd=<dev>[,<erasesize>]\"");
 
 static int __init block2mtd_init(void)
 {
+	int ret = 0;
 	INFO("version " VERSION);
-	return 0;
+
+#ifndef MODULE
+	if (strlen(block2mtd_paramline))
+		ret = block2mtd_setup2(block2mtd_paramline);
+	block2mtd_init_called = 1;
+#endif
+
+	return ret;
 }
 
 
-- 
GitLab


From 46a1652c28fc4f4e9d46ea12b0c36b5b6b600f58 Mon Sep 17 00:00:00 2001
From: Alexey Korolev <akorolev@pentafluge.infradead.org>
Date: Wed, 28 Jun 2006 19:22:07 +0100
Subject: [PATCH 0038/1063] [MTD] Fixes of performance and stability issues in
 CFI driver.

Fix of performance and stability issues on Intel NOR chips. It fixes:

1. Very low write performance on Sibley (perf tests demonstrated write
   performance less than 100Kb/sec when it should be over 400Kb/sec).

2. Low erase performance. (perf tests on Sibleuy demonstrated erase
   performance 246Kb/sec when it should be over 300Kb/sec).

3. Error on JFFS2 tests with CPU loading application when MTD returns
   "block erase error: (status timeout)" To fix the issue it does the
   following:
     1. Removes the timeout tuning from inval_cache_and_wait_for_operation.
     2. Waiting conditions in inval_cache_and_wait_for_operation now is
         based on timer resolution
        If timeout is lower than timer resolution then we do in cycle
	  "Checking the status"
	  udelay(1);
	  cond_resched();
        If timeout is greater than timer resolution (probably erase
        operation) We do the following
	  sleep for half of operation timeout and do in cycle the following
	    "Checking the status"
	    sleep for timer resolution

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Alexey Korolev <akorolev@infradead.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/chips/cfi_cmdset_0001.c | 87 ++++++++++++++---------------
 1 file changed, 43 insertions(+), 44 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 39edb8250fbc1..7ea49a0d5ec32 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -908,7 +908,7 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip,
 
 static int __xipram xip_wait_for_operation(
 		struct map_info *map, struct flchip *chip,
-		unsigned long adr, int *chip_op_time )
+		unsigned long adr, unsigned int chip_op_time )
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
@@ -917,7 +917,7 @@ static int __xipram xip_wait_for_operation(
 	flstate_t oldstate, newstate;
 
        	start = xip_currtime();
-	usec = *chip_op_time * 8;
+	usec = chip_op_time * 8;
 	if (usec == 0)
 		usec = 500000;
 	done = 0;
@@ -1027,8 +1027,8 @@ static int __xipram xip_wait_for_operation(
 #define XIP_INVAL_CACHED_RANGE(map, from, size)  \
 	INVALIDATE_CACHED_RANGE(map, from, size)
 
-#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, p_usec) \
-	xip_wait_for_operation(map, chip, cmd_adr, p_usec)
+#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec) \
+	xip_wait_for_operation(map, chip, cmd_adr, usec)
 
 #else
 
@@ -1040,64 +1040,64 @@ static int __xipram xip_wait_for_operation(
 static int inval_cache_and_wait_for_operation(
 		struct map_info *map, struct flchip *chip,
 		unsigned long cmd_adr, unsigned long inval_adr, int inval_len,
-		int *chip_op_time )
+		unsigned int chip_op_time)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	map_word status, status_OK = CMD(0x80);
-	int z, chip_state = chip->state;
-	unsigned long timeo;
+	int chip_state = chip->state;
+	unsigned int timeo, sleep_time;
 
 	spin_unlock(chip->mutex);
 	if (inval_len)
 		INVALIDATE_CACHED_RANGE(map, inval_adr, inval_len);
-	if (*chip_op_time)
-		cfi_udelay(*chip_op_time);
 	spin_lock(chip->mutex);
 
-	timeo = *chip_op_time * 8 * HZ / 1000000;
-	if (timeo < HZ/2)
-		timeo = HZ/2;
-	timeo += jiffies;
+	/* set our timeout to 8 times the expected delay */
+	timeo = chip_op_time * 8;
+	if (!timeo)
+		timeo = 500000;
+	sleep_time = chip_op_time / 2;
 
-	z = 0;
 	for (;;) {
-		if (chip->state != chip_state) {
-			/* Someone's suspended the operation: sleep */
-			DECLARE_WAITQUEUE(wait, current);
-
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			add_wait_queue(&chip->wq, &wait);
-			spin_unlock(chip->mutex);
-			schedule();
-			remove_wait_queue(&chip->wq, &wait);
-			timeo = jiffies + (HZ / 2); /* FIXME */
-			spin_lock(chip->mutex);
-			continue;
-		}
-
 		status = map_read(map, cmd_adr);
 		if (map_word_andequal(map, status, status_OK, status_OK))
 			break;
 
-		/* OK Still waiting */
-		if (time_after(jiffies, timeo)) {
+		if (!timeo) {
 			map_write(map, CMD(0x70), cmd_adr);
 			chip->state = FL_STATUS;
 			return -ETIME;
 		}
 
-		/* Latency issues. Drop the lock, wait a while and retry */
-		z++;
+		/* OK Still waiting. Drop the lock, wait a while and retry. */
 		spin_unlock(chip->mutex);
-		cfi_udelay(1);
+		if (sleep_time >= 1000000/HZ) {
+			/*
+			 * Half of the normal delay still remaining
+			 * can be performed with a sleeping delay instead
+			 * of busy waiting.
+			 */
+			msleep(sleep_time/1000);
+			timeo -= sleep_time;
+			sleep_time = 1000000/HZ;
+		} else {
+			udelay(1);
+			cond_resched();
+			timeo--;
+		}
 		spin_lock(chip->mutex);
-	}
 
-	if (!z) {
-		if (!--(*chip_op_time))
-			*chip_op_time = 1;
-	} else if (z > 1)
-		++(*chip_op_time);
+		if (chip->state != chip_state) {
+			/* Someone's suspended the operation: sleep */
+			DECLARE_WAITQUEUE(wait, current);
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			add_wait_queue(&chip->wq, &wait);
+			spin_unlock(chip->mutex);
+			schedule();
+			remove_wait_queue(&chip->wq, &wait);
+			spin_lock(chip->mutex);
+		}
+	}
 
 	/* Done and happy. */
  	chip->state = FL_STATUS;
@@ -1107,8 +1107,7 @@ static int inval_cache_and_wait_for_operation(
 #endif
 
 #define WAIT_TIMEOUT(map, chip, adr, udelay) \
-	({ int __udelay = (udelay); \
-	   INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, &__udelay); })
+	INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay);
 
 
 static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t adr, size_t len)
@@ -1332,7 +1331,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
 				   adr, map_bankwidth(map),
-				   &chip->word_write_time);
+				   chip->word_write_time);
 	if (ret) {
 		xip_enable(map, chip, adr);
 		printk(KERN_ERR "%s: word write error (status timeout)\n", map->name);
@@ -1569,7 +1568,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, cmd_adr,
 				   adr, len,
-				   &chip->buffer_write_time);
+				   chip->buffer_write_time);
 	if (ret) {
 		map_write(map, CMD(0x70), cmd_adr);
 		chip->state = FL_STATUS;
@@ -1704,7 +1703,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
 				   adr, len,
-				   &chip->erase_time);
+				   chip->erase_time);
 	if (ret) {
 		map_write(map, CMD(0x70), adr);
 		chip->state = FL_STATUS;
-- 
GitLab


From 804af2cf6e7af31d2e664b54e657dddd9b531dbd Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 26 Jul 2006 21:39:49 +0100
Subject: [PATCH 0039/1063] [AGPGART] remove private page protection map

AGP keeps its own copy of the protection_map, upcoming DRM changes will
also require access to this map from modules.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/frontend.c | 27 ++-------------------------
 include/linux/mm.h          |  1 +
 mm/mmap.c                   |  7 +++++++
 3 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index d9c5a9142ad1d..0f2ed2aa2d815 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -151,35 +151,12 @@ static void agp_add_seg_to_client(struct agp_client *client,
 	client->segments = seg;
 }
 
-/* Originally taken from linux/mm/mmap.c from the array
- * protection_map.
- * The original really should be exported to modules, or
- * some routine which does the conversion for you
- */
-
-static const pgprot_t my_protect_map[16] =
-{
-	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
-	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
-};
-
 static pgprot_t agp_convert_mmap_flags(int prot)
 {
-#define _trans(x,bit1,bit2) \
-((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
-
 	unsigned long prot_bits;
-	pgprot_t temp;
-
-	prot_bits = _trans(prot, PROT_READ, VM_READ) |
-	    _trans(prot, PROT_WRITE, VM_WRITE) |
-	    _trans(prot, PROT_EXEC, VM_EXEC);
-
-	prot_bits |= VM_SHARED;
 
-	temp = my_protect_map[prot_bits & 0x0000000f];
-
-	return temp;
+	prot_bits = calc_vm_prot_bits(prot) | VM_SHARED;
+	return vm_get_page_prot(prot_bits);
 }
 
 static int agp_create_segment(struct agp_client *client, struct agp_region *region)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 990957e0929f2..4fba4560699b1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1012,6 +1012,7 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
+pgprot_t vm_get_page_prot(unsigned long vm_flags);
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 struct page *vmalloc_to_page(void *addr);
 unsigned long vmalloc_to_pfn(void *addr);
diff --git a/mm/mmap.c b/mm/mmap.c
index c1868ecdbc5fa..c7ed061f45075 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -60,6 +60,13 @@ pgprot_t protection_map[16] = {
 	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
 };
 
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+	return protection_map[vm_flags &
+				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+}
+EXPORT_SYMBOL(vm_get_page_prot);
+
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio = 50;	/* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
-- 
GitLab


From f4c8aa1107969c26b1984eb2996a58f816dea71f Mon Sep 17 00:00:00 2001
From: "brking@charter.net" <brking@charter.net>
Date: Wed, 5 Jul 2006 17:00:01 -0500
Subject: [PATCH 0040/1063] [SCSI] megaraid: Add support for change_queue_depth

Adds support for change_queue_depth so that device
queue depth can be changed at runtime through sysfs.

Signed-off-by: <brking@charter.net>
Acked-by: Seokmann Ju <seokmann.ju@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/megaraid/megaraid_mbox.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index 92715130ac099..7ae580f17e64d 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -330,6 +330,21 @@ static struct device_attribute *megaraid_sdev_attrs[] = {
 	NULL,
 };
 
+/**
+ * megaraid_change_queue_depth - Change the device's queue depth
+ * @sdev:	scsi device struct
+ * @qdepth:	depth to set
+ *
+ * Return value:
+ * 	actual depth set
+ **/
+static int megaraid_change_queue_depth(struct scsi_device *sdev, int qdepth)
+{
+	if (qdepth > MBOX_MAX_SCSI_CMDS)
+		qdepth = MBOX_MAX_SCSI_CMDS;
+	scsi_adjust_queue_depth(sdev, 0, qdepth);
+	return sdev->queue_depth;
+}
 
 /*
  * Scsi host template for megaraid unified driver
@@ -343,6 +358,7 @@ static struct scsi_host_template megaraid_template_g = {
 	.eh_device_reset_handler	= megaraid_reset_handler,
 	.eh_bus_reset_handler		= megaraid_reset_handler,
 	.eh_host_reset_handler		= megaraid_reset_handler,
+	.change_queue_depth		= megaraid_change_queue_depth,
 	.use_clustering			= ENABLE_CLUSTERING,
 	.sdev_attrs			= megaraid_sdev_attrs,
 	.shost_attrs			= megaraid_shost_attrs,
-- 
GitLab


From 0c269e6d3c615403a6e0acbe6e88f1c0da9c2396 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Wed, 12 Jul 2006 09:51:04 -0400
Subject: [PATCH 0041/1063] [SCSI] mptsas: add parent port backlink

This takes advantage of the sas class backlink function to show which
port on an expander is used to communicate with the parent.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/message/fusion/mptsas.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index f66f2203143a8..dfdd1e4457682 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1642,14 +1642,18 @@ static int mptsas_probe_one_phy(struct device *dev,
 
 			for (i = 0; i < port_info->num_phys; i++)
 				if (port_info->phy_info[i].identify.sas_address ==
-				    identify.sas_address)
+				    identify.sas_address) {
+					sas_port_mark_backlink(port);
 					goto out;
+				}
 
 		} else if (scsi_is_sas_rphy(parent)) {
 			struct sas_rphy *parent_rphy = dev_to_rphy(parent);
 			if (identify.sas_address ==
-			    parent_rphy->identify.sas_address)
+			    parent_rphy->identify.sas_address) {
+				sas_port_mark_backlink(port);
 				goto out;
+			}
 		}
 
 		switch (identify.device_type) {
-- 
GitLab


From 1c57e86d75cf162bdadb3a5fe0cd3f65aa1a9ca3 Mon Sep 17 00:00:00 2001
From: Erich Chen <erich@areca.com.tw>
Date: Wed, 12 Jul 2006 08:59:32 -0700
Subject: [PATCH 0042/1063] [SCSI] arcmsr: initial driver, version 1.20.00.13

arcmsr is a driver for the Areca Raid controller, a host based RAID
subsystem that speaks SCSI at the firmware level.

This patch is quite a clean up over the initial submission with
contributions from:

Randy Dunlap <rdunlap@xenotime.net>
Christoph Hellwig <hch@lst.de>
Matthew Wilcox <matthew@wil.cx>
Adrian Bunk <bunk@stusta.de>

Signed-off-by: Erich Chen <erich@areca.com.tw>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 Documentation/scsi/ChangeLog.arcmsr |   56 +
 Documentation/scsi/arcmsr_spec.txt  |  574 ++++++++++
 drivers/scsi/Kconfig                |   14 +
 drivers/scsi/Makefile               |    1 +
 drivers/scsi/arcmsr/Makefile        |    6 +
 drivers/scsi/arcmsr/arcmsr.h        |  472 +++++++++
 drivers/scsi/arcmsr/arcmsr_attr.c   |  392 +++++++
 drivers/scsi/arcmsr/arcmsr_hba.c    | 1496 +++++++++++++++++++++++++++
 include/linux/pci_ids.h             |   17 +
 9 files changed, 3028 insertions(+)
 create mode 100644 Documentation/scsi/ChangeLog.arcmsr
 create mode 100644 Documentation/scsi/arcmsr_spec.txt
 create mode 100644 drivers/scsi/arcmsr/Makefile
 create mode 100644 drivers/scsi/arcmsr/arcmsr.h
 create mode 100644 drivers/scsi/arcmsr/arcmsr_attr.c
 create mode 100644 drivers/scsi/arcmsr/arcmsr_hba.c

diff --git a/Documentation/scsi/ChangeLog.arcmsr b/Documentation/scsi/ChangeLog.arcmsr
new file mode 100644
index 0000000000000..162c47fdf45f4
--- /dev/null
+++ b/Documentation/scsi/ChangeLog.arcmsr
@@ -0,0 +1,56 @@
+**************************************************************************
+** History
+**
+**   REV#         DATE             NAME         DESCRIPTION
+** 1.00.00.00    3/31/2004       Erich Chen     First release
+** 1.10.00.04    7/28/2004       Erich Chen     modify for ioctl
+** 1.10.00.06    8/28/2004       Erich Chen     modify for 2.6.x
+** 1.10.00.08    9/28/2004       Erich Chen     modify for x86_64
+** 1.10.00.10   10/10/2004       Erich Chen     bug fix for SMP & ioctl
+** 1.20.00.00   11/29/2004       Erich Chen     bug fix with arcmsr_bus_reset when PHY error
+** 1.20.00.02   12/09/2004       Erich Chen     bug fix with over 2T bytes RAID Volume
+** 1.20.00.04    1/09/2005       Erich Chen     fits for Debian linux kernel version 2.2.xx
+** 1.20.00.05    2/20/2005       Erich Chen     cleanly as look like a Linux driver at 2.6.x
+**                                              thanks for peoples kindness comment
+**						Kornel Wieliczek
+**						Christoph Hellwig
+**						Adrian Bunk
+**						Andrew Morton
+**						Christoph Hellwig
+**						James Bottomley
+**						Arjan van de Ven
+** 1.20.00.06    3/12/2005       Erich Chen     fix with arcmsr_pci_unmap_dma "unsigned long" cast,
+**						modify PCCB POOL allocated by "dma_alloc_coherent"
+**						(Kornel Wieliczek's comment)
+** 1.20.00.07    3/23/2005       Erich Chen     bug fix with arcmsr_scsi_host_template_init
+**						occur segmentation fault,
+**						if RAID adapter does not on PCI slot
+**						and modprobe/rmmod this driver twice.
+**						bug fix enormous stack usage (Adrian Bunk's comment)
+** 1.20.00.08    6/23/2005       Erich Chen     bug fix with abort command,
+**						in case of heavy loading when sata cable
+**						working on low quality connection
+** 1.20.00.09    9/12/2005       Erich Chen     bug fix with abort command handling, firmware version check
+**						and firmware update notify for hardware bug fix
+** 1.20.00.10    9/23/2005       Erich Chen     enhance sysfs function for change driver's max tag Q number.
+**						add DMA_64BIT_MASK for backward compatible with all 2.6.x
+**						add some useful message for abort command
+**						add ioctl code 'ARCMSR_IOCTL_FLUSH_ADAPTER_CACHE'
+**						customer can send this command for sync raid volume data
+** 1.20.00.11    9/29/2005       Erich Chen     by comment of Arjan van de Ven fix incorrect msleep redefine
+**						cast off sizeof(dma_addr_t) condition for 64bit pci_set_dma_mask
+** 1.20.00.12    9/30/2005       Erich Chen     bug fix with 64bit platform's ccbs using if over 4G system memory
+**						change 64bit pci_set_consistent_dma_mask into 32bit
+**						increcct adapter count if adapter initialize fail.
+**						miss edit at arcmsr_build_ccb....
+**						psge += sizeof(struct _SG64ENTRY *) =>
+**						psge += sizeof(struct _SG64ENTRY)
+**						64 bits sg entry would be incorrectly calculated
+**						thanks Kornel Wieliczek give me kindly notify
+**						and detail description
+** 1.20.00.13   11/15/2005       Erich Chen     scheduling pending ccb with FIFO
+**						change the architecture of arcmsr command queue list
+**						for linux standard list
+**						enable usage of pci message signal interrupt
+**						follow Randy.Danlup kindness suggestion cleanup this code
+**************************************************************************
\ No newline at end of file
diff --git a/Documentation/scsi/arcmsr_spec.txt b/Documentation/scsi/arcmsr_spec.txt
new file mode 100644
index 0000000000000..5e0042340fd3e
--- /dev/null
+++ b/Documentation/scsi/arcmsr_spec.txt
@@ -0,0 +1,574 @@
+*******************************************************************************
+**                            ARECA FIRMWARE SPEC
+*******************************************************************************
+**	Usage of IOP331 adapter
+**	(All In/Out is in IOP331's view)
+**	1. Message 0 --> InitThread message and retrun code
+**	2. Doorbell is used for RS-232 emulation
+**		inDoorBell :    bit0 -- data in ready
+**			(DRIVER DATA WRITE OK)
+**				bit1 -- data out has been read
+**			(DRIVER DATA READ OK)
+**		outDooeBell:    bit0 -- data out ready
+**			(IOP331 DATA WRITE OK)
+**				bit1 -- data in has been read
+**			(IOP331 DATA READ OK)
+**	3. Index Memory Usage
+**	offset 0xf00 : for RS232 out (request buffer)
+**	offset 0xe00 : for RS232 in  (scratch buffer)
+**	offset 0xa00 : for inbound message code message_rwbuffer
+**			(driver send to IOP331)
+**	offset 0xa00 : for outbound message code message_rwbuffer
+**			(IOP331 send to driver)
+**	4. RS-232 emulation
+**		Currently 128 byte buffer is used
+**			1st uint32_t : Data length (1--124)
+**			Byte 4--127  : Max 124 bytes of data
+**	5. PostQ
+**	All SCSI Command must be sent through postQ:
+**	(inbound queue port)	Request frame must be 32 bytes aligned
+**	#bit27--bit31 => flag for post ccb
+**	#bit0--bit26  => real address (bit27--bit31) of post arcmsr_cdb
+**		bit31 :
+**			0 : 256 bytes frame
+**			1 : 512 bytes frame
+**		bit30 :
+**			0 : normal request
+**			1 : BIOS request
+**		bit29 : reserved
+**		bit28 : reserved
+**		bit27 : reserved
+**  ---------------------------------------------------------------------------
+**	(outbount queue port)	Request reply
+**	#bit27--bit31
+**		=> flag for reply
+**	#bit0--bit26
+**		=> real address (bit27--bit31) of reply arcmsr_cdb
+**			bit31 : must be 0 (for this type of reply)
+**			bit30 : reserved for BIOS handshake
+**			bit29 : reserved
+**			bit28 :
+**			0 : no error, ignore AdapStatus/DevStatus/SenseData
+**			1 : Error, error code in AdapStatus/DevStatus/SenseData
+**			bit27 : reserved
+**	6. BIOS request
+**		All BIOS request is the same with request from PostQ
+**		Except :
+**			Request frame is sent from configuration space
+**		offset: 0x78 : Request Frame (bit30 == 1)
+**		offset: 0x18 : writeonly to generate
+**					IRQ to IOP331
+**		Completion of request:
+**			(bit30 == 0, bit28==err flag)
+**	7. Definition of SGL entry (structure)
+**	8. Message1 Out - Diag Status Code (????)
+**	9. Message0 message code :
+**		0x00 : NOP
+**		0x01 : Get Config
+**		->offset 0xa00 :for outbound message code message_rwbuffer
+**		(IOP331 send to driver)
+**		Signature             0x87974060(4)
+**		Request len           0x00000200(4)
+**		numbers of queue      0x00000100(4)
+**		SDRAM Size            0x00000100(4)-->256 MB
+**		IDE Channels          0x00000008(4)
+**		vendor                40 bytes char
+**		model                  8 bytes char
+**		FirmVer               16 bytes char
+**		Device Map            16 bytes char
+**		FirmwareVersion DWORD <== Added for checking of
+**						new firmware capability
+**		0x02 : Set Config
+**		->offset 0xa00 :for inbound message code message_rwbuffer
+**		(driver send to IOP331)
+**		Signature             0x87974063(4)
+**		UPPER32 of Request Frame  (4)-->Driver Only
+**		0x03 : Reset (Abort all queued Command)
+**		0x04 : Stop Background Activity
+**		0x05 : Flush Cache
+**		0x06 : Start Background Activity
+**			(re-start if background is halted)
+**		0x07 : Check If Host Command Pending
+**			(Novell May Need This Function)
+**		0x08 : Set controller time
+**		->offset 0xa00 : for inbound message code message_rwbuffer
+**		(driver to IOP331)
+**		byte 0 : 0xaa <-- signature
+**		byte 1 : 0x55 <-- signature
+**		byte 2 : year (04)
+**		byte 3 : month (1..12)
+**		byte 4 : date (1..31)
+**		byte 5 : hour (0..23)
+**		byte 6 : minute (0..59)
+**		byte 7 : second (0..59)
+*******************************************************************************
+*******************************************************************************
+**      	RS-232 Interface for Areca Raid Controller
+**      The low level command interface is exclusive with VT100 terminal
+**  --------------------------------------------------------------------
+**      1. Sequence of command execution
+**  --------------------------------------------------------------------
+**    	(A) Header : 3 bytes sequence (0x5E, 0x01, 0x61)
+**    	(B) Command block : variable length of data including length,
+**    		command code, data and checksum byte
+**    	(C) Return data : variable length of data
+**  --------------------------------------------------------------------
+**    2. Command block
+**  --------------------------------------------------------------------
+**    	(A) 1st byte : command block length (low byte)
+**    	(B) 2nd byte : command block length (high byte)
+**                note ..command block length shouldn't > 2040 bytes,
+**    		length excludes these two bytes
+**    	(C) 3rd byte : command code
+**    	(D) 4th and following bytes : variable length data bytes
+**    		depends on command code
+**    	(E) last byte : checksum byte (sum of 1st byte until last data byte)
+**  --------------------------------------------------------------------
+**    3. Command code and associated data
+**  --------------------------------------------------------------------
+**    	The following are command code defined in raid controller Command
+**    	code 0x10--0x1? are used for system level management,
+**    	no password checking is needed and should be implemented in separate
+**    	well controlled utility and not for end user access.
+**    	Command code 0x20--0x?? always check the password,
+**    	password must be entered to enable these command.
+**    	enum
+**    	{
+**    		GUI_SET_SERIAL=0x10,
+**    		GUI_SET_VENDOR,
+**    		GUI_SET_MODEL,
+**    		GUI_IDENTIFY,
+**    		GUI_CHECK_PASSWORD,
+**    		GUI_LOGOUT,
+**    		GUI_HTTP,
+**    		GUI_SET_ETHERNET_ADDR,
+**    		GUI_SET_LOGO,
+**    		GUI_POLL_EVENT,
+**    		GUI_GET_EVENT,
+**    		GUI_GET_HW_MONITOR,
+**    		//    GUI_QUICK_CREATE=0x20, (function removed)
+**    		GUI_GET_INFO_R=0x20,
+**    		GUI_GET_INFO_V,
+**    		GUI_GET_INFO_P,
+**    		GUI_GET_INFO_S,
+**    		GUI_CLEAR_EVENT,
+**    		GUI_MUTE_BEEPER=0x30,
+**    		GUI_BEEPER_SETTING,
+**    		GUI_SET_PASSWORD,
+**    		GUI_HOST_INTERFACE_MODE,
+**    		GUI_REBUILD_PRIORITY,
+**    		GUI_MAX_ATA_MODE,
+**    		GUI_RESET_CONTROLLER,
+**    		GUI_COM_PORT_SETTING,
+**    		GUI_NO_OPERATION,
+**    		GUI_DHCP_IP,
+**    		GUI_CREATE_PASS_THROUGH=0x40,
+**    		GUI_MODIFY_PASS_THROUGH,
+**    		GUI_DELETE_PASS_THROUGH,
+**    		GUI_IDENTIFY_DEVICE,
+**    		GUI_CREATE_RAIDSET=0x50,
+**    		GUI_DELETE_RAIDSET,
+**    		GUI_EXPAND_RAIDSET,
+**    		GUI_ACTIVATE_RAIDSET,
+**    		GUI_CREATE_HOT_SPARE,
+**    		GUI_DELETE_HOT_SPARE,
+**    		GUI_CREATE_VOLUME=0x60,
+**    		GUI_MODIFY_VOLUME,
+**    		GUI_DELETE_VOLUME,
+**    		GUI_START_CHECK_VOLUME,
+**    		GUI_STOP_CHECK_VOLUME
+**    	};
+**    Command description :
+**    	GUI_SET_SERIAL : Set the controller serial#
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x10
+**    		byte 3          : password length (should be 0x0f)
+**    		byte 4-0x13     : should be "ArEcATecHnoLogY"
+**    		byte 0x14--0x23 : Serial number string (must be 16 bytes)
+**      GUI_SET_VENDOR : Set vendor string for the controller
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x11
+**    		byte 3          : password length (should be 0x08)
+**    		byte 4-0x13     : should be "ArEcAvAr"
+**    		byte 0x14--0x3B : vendor string (must be 40 bytes)
+**      GUI_SET_MODEL : Set the model name of the controller
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x12
+**    		byte 3          : password length (should be 0x08)
+**    		byte 4-0x13     : should be "ArEcAvAr"
+**    		byte 0x14--0x1B : model string (must be 8 bytes)
+**      GUI_IDENTIFY : Identify device
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x13
+**    		                  return "Areca RAID Subsystem "
+**      GUI_CHECK_PASSWORD : Verify password
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x14
+**    		byte 3          : password length
+**    		byte 4-0x??     : user password to be checked
+**      GUI_LOGOUT : Logout GUI (force password checking on next command)
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x15
+**      GUI_HTTP : HTTP interface (reserved for Http proxy service)(0x16)
+**
+**      GUI_SET_ETHERNET_ADDR : Set the ethernet MAC address
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x17
+**    		byte 3          : password length (should be 0x08)
+**    		byte 4-0x13     : should be "ArEcAvAr"
+**    		byte 0x14--0x19 : Ethernet MAC address (must be 6 bytes)
+**      GUI_SET_LOGO : Set logo in HTTP
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x18
+**    		byte 3          : Page# (0/1/2/3) (0xff --> clear OEM logo)
+**    		byte 4/5/6/7    : 0x55/0xaa/0xa5/0x5a
+**    		byte 8          : TITLE.JPG data (each page must be 2000 bytes)
+**    		                  note page0 1st 2 byte must be
+**    					actual length of the JPG file
+**      GUI_POLL_EVENT : Poll If Event Log Changed
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x19
+**      GUI_GET_EVENT : Read Event
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x1a
+**    		byte 3          : Event Page (0:1st page/1/2/3:last page)
+**      GUI_GET_HW_MONITOR : Get HW monitor data
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x1b
+**    		byte 3 			: # of FANs(example 2)
+**    		byte 4 			: # of Voltage sensor(example 3)
+**    		byte 5 			: # of temperature sensor(example 2)
+**    		byte 6 			: # of power
+**    		byte 7/8        : Fan#0 (RPM)
+**    		byte 9/10       : Fan#1
+**    		byte 11/12 		: Voltage#0 original value in *1000
+**    		byte 13/14 		: Voltage#0 value
+**    		byte 15/16 		: Voltage#1 org
+**    		byte 17/18 		: Voltage#1
+**    		byte 19/20 		: Voltage#2 org
+**    		byte 21/22 		: Voltage#2
+**    		byte 23 		: Temp#0
+**    		byte 24 		: Temp#1
+**    		byte 25 		: Power indicator (bit0 : power#0,
+**    						 bit1 : power#1)
+**    		byte 26 		: UPS indicator
+**      GUI_QUICK_CREATE : Quick create raid/volume set
+**    	    byte 0,1        : length
+**    	    byte 2          : command code 0x20
+**    	    byte 3/4/5/6    : raw capacity
+**    	    byte 7 			: raid level
+**    	    byte 8 			: stripe size
+**    	    byte 9 			: spare
+**    	    byte 10/11/12/13: device mask (the devices to create raid/volume)
+**    		This function is removed, application like
+**    		to implement quick create function
+**    	need to use GUI_CREATE_RAIDSET and GUI_CREATE_VOLUMESET function.
+**      GUI_GET_INFO_R : Get Raid Set Information
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x20
+**    		byte 3          : raidset#
+**    	typedef struct sGUI_RAIDSET
+**    	{
+**    		BYTE grsRaidSetName[16];
+**    		DWORD grsCapacity;
+**    		DWORD grsCapacityX;
+**    		DWORD grsFailMask;
+**    		BYTE grsDevArray[32];
+**    		BYTE grsMemberDevices;
+**    		BYTE grsNewMemberDevices;
+**    		BYTE grsRaidState;
+**    		BYTE grsVolumes;
+**    		BYTE grsVolumeList[16];
+**    		BYTE grsRes1;
+**    		BYTE grsRes2;
+**    		BYTE grsRes3;
+**    		BYTE grsFreeSegments;
+**    		DWORD grsRawStripes[8];
+**    		DWORD grsRes4;
+**    		DWORD grsRes5; //     Total to 128 bytes
+**    		DWORD grsRes6; //     Total to 128 bytes
+**    	} sGUI_RAIDSET, *pGUI_RAIDSET;
+**      GUI_GET_INFO_V : Get Volume Set Information
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x21
+**    		byte 3          : volumeset#
+**    	typedef struct sGUI_VOLUMESET
+**    	{
+**    		BYTE gvsVolumeName[16]; //     16
+**    		DWORD gvsCapacity;
+**    		DWORD gvsCapacityX;
+**    		DWORD gvsFailMask;
+**    		DWORD gvsStripeSize;
+**    		DWORD gvsNewFailMask;
+**    		DWORD gvsNewStripeSize;
+**    		DWORD gvsVolumeStatus;
+**    		DWORD gvsProgress; //     32
+**    		sSCSI_ATTR gvsScsi;
+**    		BYTE gvsMemberDisks;
+**    		BYTE gvsRaidLevel; //     8
+**    		BYTE gvsNewMemberDisks;
+**    		BYTE gvsNewRaidLevel;
+**    		BYTE gvsRaidSetNumber;
+**    		BYTE gvsRes0; //     4
+**    		BYTE gvsRes1[4]; //     64 bytes
+**    	} sGUI_VOLUMESET, *pGUI_VOLUMESET;
+**      GUI_GET_INFO_P : Get Physical Drive Information
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x22
+**    		byte 3          : drive # (from 0 to max-channels - 1)
+**    	typedef struct sGUI_PHY_DRV
+**    	{
+**    		BYTE gpdModelName[40];
+**    		BYTE gpdSerialNumber[20];
+**    		BYTE gpdFirmRev[8];
+**    		DWORD gpdCapacity;
+**    		DWORD gpdCapacityX; //     Reserved for expansion
+**    		BYTE gpdDeviceState;
+**    		BYTE gpdPioMode;
+**    		BYTE gpdCurrentUdmaMode;
+**    		BYTE gpdUdmaMode;
+**    		BYTE gpdDriveSelect;
+**    		BYTE gpdRaidNumber; //     0xff if not belongs to a raid set
+**    		sSCSI_ATTR gpdScsi;
+**    		BYTE gpdReserved[40]; //     Total to 128 bytes
+**    	} sGUI_PHY_DRV, *pGUI_PHY_DRV;
+**    	GUI_GET_INFO_S : Get System Information
+**      	byte 0,1        : length
+**      	byte 2          : command code 0x23
+**    	typedef struct sCOM_ATTR
+**    	{
+**    		BYTE comBaudRate;
+**    		BYTE comDataBits;
+**    		BYTE comStopBits;
+**    		BYTE comParity;
+**    		BYTE comFlowControl;
+**    	} sCOM_ATTR, *pCOM_ATTR;
+**    	typedef struct sSYSTEM_INFO
+**    	{
+**    		BYTE gsiVendorName[40];
+**    		BYTE gsiSerialNumber[16];
+**    		BYTE gsiFirmVersion[16];
+**    		BYTE gsiBootVersion[16];
+**    		BYTE gsiMbVersion[16];
+**    		BYTE gsiModelName[8];
+**    		BYTE gsiLocalIp[4];
+**    		BYTE gsiCurrentIp[4];
+**    		DWORD gsiTimeTick;
+**    		DWORD gsiCpuSpeed;
+**    		DWORD gsiICache;
+**    		DWORD gsiDCache;
+**    		DWORD gsiScache;
+**    		DWORD gsiMemorySize;
+**    		DWORD gsiMemorySpeed;
+**    		DWORD gsiEvents;
+**    		BYTE gsiMacAddress[6];
+**    		BYTE gsiDhcp;
+**    		BYTE gsiBeeper;
+**    		BYTE gsiChannelUsage;
+**    		BYTE gsiMaxAtaMode;
+**    		BYTE gsiSdramEcc; //     1:if ECC enabled
+**    		BYTE gsiRebuildPriority;
+**    		sCOM_ATTR gsiComA; //     5 bytes
+**    		sCOM_ATTR gsiComB; //     5 bytes
+**    		BYTE gsiIdeChannels;
+**    		BYTE gsiScsiHostChannels;
+**    		BYTE gsiIdeHostChannels;
+**    		BYTE gsiMaxVolumeSet;
+**    		BYTE gsiMaxRaidSet;
+**    		BYTE gsiEtherPort; //     1:if ether net port supported
+**    		BYTE gsiRaid6Engine; //     1:Raid6 engine supported
+**    		BYTE gsiRes[75];
+**    	} sSYSTEM_INFO, *pSYSTEM_INFO;
+**    	GUI_CLEAR_EVENT : Clear System Event
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x24
+**      GUI_MUTE_BEEPER : Mute current beeper
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x30
+**      GUI_BEEPER_SETTING : Disable beeper
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x31
+**    		byte 3          : 0->disable, 1->enable
+**      GUI_SET_PASSWORD : Change password
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x32
+**    		byte 3 			: pass word length ( must <= 15 )
+**    		byte 4 			: password (must be alpha-numerical)
+**    	GUI_HOST_INTERFACE_MODE : Set host interface mode
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x33
+**    		byte 3 			: 0->Independent, 1->cluster
+**      GUI_REBUILD_PRIORITY : Set rebuild priority
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x34
+**    		byte 3 			: 0/1/2/3 (low->high)
+**      GUI_MAX_ATA_MODE : Set maximum ATA mode to be used
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x35
+**    		byte 3 			: 0/1/2/3 (133/100/66/33)
+**      GUI_RESET_CONTROLLER : Reset Controller
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x36
+**                            *Response with VT100 screen (discard it)
+**      GUI_COM_PORT_SETTING : COM port setting
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x37
+**    		byte 3 			: 0->COMA (term port),
+**    					  1->COMB (debug port)
+**    		byte 4 			: 0/1/2/3/4/5/6/7
+**    			(1200/2400/4800/9600/19200/38400/57600/115200)
+**    		byte 5 			: data bit
+**    					(0:7 bit, 1:8 bit : must be 8 bit)
+**    		byte 6 			: stop bit (0:1, 1:2 stop bits)
+**    		byte 7 			: parity (0:none, 1:off, 2:even)
+**    		byte 8 			: flow control
+**    			(0:none, 1:xon/xoff, 2:hardware => must use none)
+**      GUI_NO_OPERATION : No operation
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x38
+**      GUI_DHCP_IP : Set DHCP option and local IP address
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x39
+**    		byte 3          : 0:dhcp disabled, 1:dhcp enabled
+**    		byte 4/5/6/7    : IP address
+**      GUI_CREATE_PASS_THROUGH : Create pass through disk
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x40
+**    		byte 3 			: device #
+**    		byte 4 			: scsi channel (0/1)
+**    		byte 5 			: scsi id (0-->15)
+**    		byte 6 			: scsi lun (0-->7)
+**    		byte 7 			: tagged queue (1 : enabled)
+**    		byte 8 			: cache mode (1 : enabled)
+**    		byte 9 			: max speed (0/1/2/3/4,
+**    			async/20/40/80/160 for scsi)
+**    			(0/1/2/3/4, 33/66/100/133/150 for ide  )
+**      GUI_MODIFY_PASS_THROUGH : Modify pass through disk
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x41
+**    		byte 3 			: device #
+**    		byte 4 			: scsi channel (0/1)
+**    		byte 5 			: scsi id (0-->15)
+**    		byte 6 			: scsi lun (0-->7)
+**    		byte 7 			: tagged queue (1 : enabled)
+**    		byte 8 			: cache mode (1 : enabled)
+**    		byte 9 			: max speed (0/1/2/3/4,
+**    					async/20/40/80/160 for scsi)
+**    			(0/1/2/3/4, 33/66/100/133/150 for ide  )
+**      GUI_DELETE_PASS_THROUGH : Delete pass through disk
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x42
+**    		byte 3          : device# to be deleted
+**      GUI_IDENTIFY_DEVICE : Identify Device
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x43
+**    		byte 3          : Flash Method
+**    				(0:flash selected, 1:flash not selected)
+**    		byte 4/5/6/7    : IDE device mask to be flashed
+**                           note .... no response data available
+**    	GUI_CREATE_RAIDSET : Create Raid Set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x50
+**    		byte 3/4/5/6    : device mask
+**    		byte 7-22       : raidset name (if byte 7 == 0:use default)
+**      GUI_DELETE_RAIDSET : Delete Raid Set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x51
+**    		byte 3          : raidset#
+**    	GUI_EXPAND_RAIDSET : Expand Raid Set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x52
+**    		byte 3          : raidset#
+**    		byte 4/5/6/7    : device mask for expansion
+**    		byte 8/9/10     : (8:0 no change, 1 change, 0xff:terminate,
+**    				9:new raid level,
+**    				10:new stripe size
+**    				0/1/2/3/4/5->4/8/16/32/64/128K )
+**    		byte 11/12/13   : repeat for each volume in the raidset
+**      GUI_ACTIVATE_RAIDSET : Activate incomplete raid set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x53
+**    		byte 3          : raidset#
+**      GUI_CREATE_HOT_SPARE : Create hot spare disk
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x54
+**    		byte 3/4/5/6    : device mask for hot spare creation
+**    	GUI_DELETE_HOT_SPARE : Delete hot spare disk
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x55
+**    		byte 3/4/5/6    : device mask for hot spare deletion
+**    	GUI_CREATE_VOLUME : Create volume set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x60
+**    		byte 3          : raidset#
+**    		byte 4-19       : volume set name
+**    				(if byte4 == 0, use default)
+**    		byte 20-27      : volume capacity (blocks)
+**    		byte 28 		: raid level
+**    		byte 29 		: stripe size
+**    				(0/1/2/3/4/5->4/8/16/32/64/128K)
+**    		byte 30 		: channel
+**    		byte 31 		: ID
+**    		byte 32 		: LUN
+**    		byte 33 		: 1 enable tag
+**    		byte 34 		: 1 enable cache
+**    		byte 35 		: speed
+**    		(0/1/2/3/4->async/20/40/80/160 for scsi)
+**    		(0/1/2/3/4->33/66/100/133/150 for IDE  )
+**    		byte 36 		: 1 to select quick init
+**
+**    	GUI_MODIFY_VOLUME : Modify volume Set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x61
+**    		byte 3          : volumeset#
+**    		byte 4-19       : new volume set name
+**    		(if byte4 == 0, not change)
+**    		byte 20-27      : new volume capacity (reserved)
+**    		byte 28 		: new raid level
+**    		byte 29 		: new stripe size
+**    		(0/1/2/3/4/5->4/8/16/32/64/128K)
+**    		byte 30 		: new channel
+**    		byte 31 		: new ID
+**    		byte 32 		: new LUN
+**    		byte 33 		: 1 enable tag
+**    		byte 34 		: 1 enable cache
+**    		byte 35 		: speed
+**    		(0/1/2/3/4->async/20/40/80/160 for scsi)
+**    		(0/1/2/3/4->33/66/100/133/150 for IDE  )
+**    	GUI_DELETE_VOLUME : Delete volume set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x62
+**    		byte 3          : volumeset#
+**    	GUI_START_CHECK_VOLUME : Start volume consistency check
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x63
+**    		byte 3          : volumeset#
+**    	GUI_STOP_CHECK_VOLUME : Stop volume consistency check
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x64
+** ---------------------------------------------------------------------
+**    4. Returned data
+** ---------------------------------------------------------------------
+**    	(A) Header          : 3 bytes sequence (0x5E, 0x01, 0x61)
+**    	(B) Length          : 2 bytes
+**    			(low byte 1st, excludes length and checksum byte)
+**    	(C) status or data  :
+**           <1> If length == 1 ==> 1 byte status code
+**    		#define GUI_OK                    0x41
+**    		#define GUI_RAIDSET_NOT_NORMAL    0x42
+**    		#define GUI_VOLUMESET_NOT_NORMAL  0x43
+**    		#define GUI_NO_RAIDSET            0x44
+**    		#define GUI_NO_VOLUMESET          0x45
+**    		#define GUI_NO_PHYSICAL_DRIVE     0x46
+**    		#define GUI_PARAMETER_ERROR       0x47
+**    		#define GUI_UNSUPPORTED_COMMAND   0x48
+**    		#define GUI_DISK_CONFIG_CHANGED   0x49
+**    		#define GUI_INVALID_PASSWORD      0x4a
+**    		#define GUI_NO_DISK_SPACE         0x4b
+**    		#define GUI_CHECKSUM_ERROR        0x4c
+**    		#define GUI_PASSWORD_REQUIRED     0x4d
+**           <2> If length > 1 ==>
+**    		data block returned from controller
+**    		and the contents depends on the command code
+**    	(E) Checksum        : checksum of length and status or data byte
+**************************************************************************
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 96a81cd17617a..d61662c1a0ee9 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -469,6 +469,20 @@ config SCSI_IN2000
 	  To compile this driver as a module, choose M here: the
 	  module will be called in2000.
 
+config SCSI_ARCMSR
+	tristate "ARECA ARC11X0[PCI-X]/ARC12X0[PCI-EXPRESS] SATA-RAID support"
+	depends on PCI && SCSI
+	help
+	  This driver supports all of ARECA's SATA RAID controller cards.
+	  This is an ARECA-maintained driver by Erich Chen.
+	  If you have any problems, please mail to: < erich@areca.com.tw >
+	  Areca supports Linux RAID config tools.
+
+	  < http://www.areca.com.tw >
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called arcmsr (modprobe arcmsr).
+
 source "drivers/scsi/megaraid/Kconfig.megaraid"
 
 config SCSI_SATA
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index ebd0cf00bf3e6..b2de9bfdfdcdf 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_SCSI_PSI240I)	+= psi240i.o
 obj-$(CONFIG_SCSI_BUSLOGIC)	+= BusLogic.o
 obj-$(CONFIG_SCSI_DPT_I2O)	+= dpt_i2o.o
 obj-$(CONFIG_SCSI_U14_34F)	+= u14-34f.o
+obj-$(CONFIG_SCSI_ARCMSR)	+= arcmsr/
 obj-$(CONFIG_SCSI_ULTRASTOR)	+= ultrastor.o
 obj-$(CONFIG_SCSI_AHA152X)	+= aha152x.o
 obj-$(CONFIG_SCSI_AHA1542)	+= aha1542.o
diff --git a/drivers/scsi/arcmsr/Makefile b/drivers/scsi/arcmsr/Makefile
new file mode 100644
index 0000000000000..721aced39168f
--- /dev/null
+++ b/drivers/scsi/arcmsr/Makefile
@@ -0,0 +1,6 @@
+# File: drivers/arcmsr/Makefile
+# Makefile for the ARECA PCI-X PCI-EXPRESS SATA RAID controllers SCSI driver.
+
+arcmsr-objs := arcmsr_attr.o arcmsr_hba.o
+
+obj-$(CONFIG_SCSI_ARCMSR) := arcmsr.o
diff --git a/drivers/scsi/arcmsr/arcmsr.h b/drivers/scsi/arcmsr/arcmsr.h
new file mode 100644
index 0000000000000..aff96db9ccf6b
--- /dev/null
+++ b/drivers/scsi/arcmsr/arcmsr.h
@@ -0,0 +1,472 @@
+/*
+*******************************************************************************
+**        O.S   : Linux
+**   FILE NAME  : arcmsr.h
+**        BY    : Erich Chen
+**   Description: SCSI RAID Device Driver for
+**                ARECA RAID Host adapter
+*******************************************************************************
+** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved.
+**
+**     Web site: www.areca.com.tw
+**       E-mail: erich@areca.com.tw
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License version 2 as
+** published by the Free Software Foundation.
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*******************************************************************************
+** Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions
+** are met:
+** 1. Redistributions of source code must retain the above copyright
+**    notice, this list of conditions and the following disclaimer.
+** 2. Redistributions in binary form must reproduce the above copyright
+**    notice, this list of conditions and the following disclaimer in the
+**    documentation and/or other materials provided with the distribution.
+** 3. The name of the author may not be used to endorse or promote products
+**    derived from this software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING, BUT
+** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY
+** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+**(INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF
+** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************
+*/
+#include <linux/interrupt.h>
+
+struct class_device_attribute;
+
+#define ARCMSR_MAX_OUTSTANDING_CMD 						256
+#define ARCMSR_MAX_FREECCB_NUM							288
+#define ARCMSR_DRIVER_VERSION				"Driver Version 1.20.00.13"
+#define ARCMSR_SCSI_INITIATOR_ID						255
+#define ARCMSR_MAX_XFER_SECTORS							512
+#define ARCMSR_MAX_TARGETID							 17
+#define ARCMSR_MAX_TARGETLUN							  8
+#define ARCMSR_MAX_CMD_PERLUN				 ARCMSR_MAX_OUTSTANDING_CMD
+#define ARCMSR_MAX_QBUFFER						       4096
+#define ARCMSR_MAX_SG_ENTRIES							 38
+
+/*
+*******************************************************************************
+**        split 64bits dma addressing
+*******************************************************************************
+*/
+#define dma_addr_hi32(addr)               (uint32_t) ((addr>>16)>>16)
+#define dma_addr_lo32(addr)               (uint32_t) (addr & 0xffffffff)
+/*
+*******************************************************************************
+**        MESSAGE CONTROL CODE
+*******************************************************************************
+*/
+struct CMD_MESSAGE
+{
+      uint32_t HeaderLength;
+      uint8_t  Signature[8];
+      uint32_t Timeout;
+      uint32_t ControlCode;
+      uint32_t ReturnCode;
+      uint32_t Length;
+};
+/*
+*******************************************************************************
+**        IOP Message Transfer Data for user space
+*******************************************************************************
+*/
+struct CMD_MESSAGE_FIELD
+{
+    struct CMD_MESSAGE			cmdmessage;
+    uint8_t				messagedatabuffer[1032];
+};
+/* IOP message transfer */
+#define ARCMSR_MESSAGE_FAIL             0x0001
+/* DeviceType */
+#define ARECA_SATA_RAID				0x90000000
+/* FunctionCode */
+#define FUNCTION_READ_RQBUFFER			0x0801
+#define FUNCTION_WRITE_WQBUFFER			0x0802
+#define FUNCTION_CLEAR_RQBUFFER			0x0803
+#define FUNCTION_CLEAR_WQBUFFER			0x0804
+#define FUNCTION_CLEAR_ALLQBUFFER		0x0805
+#define FUNCTION_RETURN_CODE_3F			0x0806
+#define FUNCTION_SAY_HELLO			0x0807
+#define FUNCTION_SAY_GOODBYE			0x0808
+#define FUNCTION_FLUSH_ADAPTER_CACHE		0x0809
+/* ARECA IO CONTROL CODE*/
+#define ARCMSR_MESSAGE_READ_RQBUFFER       \
+	ARECA_SATA_RAID | FUNCTION_READ_RQBUFFER
+#define ARCMSR_MESSAGE_WRITE_WQBUFFER      \
+	ARECA_SATA_RAID | FUNCTION_WRITE_WQBUFFER
+#define ARCMSR_MESSAGE_CLEAR_RQBUFFER      \
+	ARECA_SATA_RAID | FUNCTION_CLEAR_RQBUFFER
+#define ARCMSR_MESSAGE_CLEAR_WQBUFFER      \
+	ARECA_SATA_RAID | FUNCTION_CLEAR_WQBUFFER
+#define ARCMSR_MESSAGE_CLEAR_ALLQBUFFER    \
+	ARECA_SATA_RAID | FUNCTION_CLEAR_ALLQBUFFER
+#define ARCMSR_MESSAGE_RETURN_CODE_3F      \
+	ARECA_SATA_RAID | FUNCTION_RETURN_CODE_3F
+#define ARCMSR_MESSAGE_SAY_HELLO           \
+	ARECA_SATA_RAID | FUNCTION_SAY_HELLO
+#define ARCMSR_MESSAGE_SAY_GOODBYE         \
+	ARECA_SATA_RAID | FUNCTION_SAY_GOODBYE
+#define ARCMSR_MESSAGE_FLUSH_ADAPTER_CACHE \
+	ARECA_SATA_RAID | FUNCTION_FLUSH_ADAPTER_CACHE
+/* ARECA IOCTL ReturnCode */
+#define ARCMSR_MESSAGE_RETURNCODE_OK              0x00000001
+#define ARCMSR_MESSAGE_RETURNCODE_ERROR           0x00000006
+#define ARCMSR_MESSAGE_RETURNCODE_3F              0x0000003F
+/*
+*************************************************************
+**   structure for holding DMA address data
+*************************************************************
+*/
+#define IS_SG64_ADDR                0x01000000 /* bit24 */
+struct  SG32ENTRY
+{
+	uint32_t					length;
+	uint32_t					address;
+};
+struct  SG64ENTRY
+{
+ 	uint32_t					length;
+ 	uint32_t					address;
+ 	uint32_t					addresshigh;
+};
+struct SGENTRY_UNION
+{
+	union
+	{
+		struct SG32ENTRY            sg32entry;
+		struct SG64ENTRY            sg64entry;
+	}u;
+};
+/*
+********************************************************************
+**      Q Buffer of IOP Message Transfer
+********************************************************************
+*/
+struct QBUFFER
+{
+	uint32_t      data_len;
+	uint8_t       data[124];
+};
+/*
+*******************************************************************************
+**      FIRMWARE INFO
+*******************************************************************************
+*/
+struct FIRMWARE_INFO
+{
+	uint32_t      signature;                /*0, 00-03*/
+	uint32_t      request_len;              /*1, 04-07*/
+	uint32_t      numbers_queue;            /*2, 08-11*/
+	uint32_t      sdram_size;               /*3, 12-15*/
+	uint32_t      ide_channels;             /*4, 16-19*/
+	char          vendor[40];               /*5, 20-59*/
+	char          model[8];                 /*15, 60-67*/
+	char          firmware_ver[16];         /*17, 68-83*/
+	char          device_map[16];           /*21, 84-99*/
+};
+/* signature of set and get firmware config */
+#define ARCMSR_SIGNATURE_GET_CONFIG                   0x87974060
+#define ARCMSR_SIGNATURE_SET_CONFIG                   0x87974063
+/* message code of inbound message register */
+#define ARCMSR_INBOUND_MESG0_NOP                      0x00000000
+#define ARCMSR_INBOUND_MESG0_GET_CONFIG               0x00000001
+#define ARCMSR_INBOUND_MESG0_SET_CONFIG               0x00000002
+#define ARCMSR_INBOUND_MESG0_ABORT_CMD                0x00000003
+#define ARCMSR_INBOUND_MESG0_STOP_BGRB                0x00000004
+#define ARCMSR_INBOUND_MESG0_FLUSH_CACHE              0x00000005
+#define ARCMSR_INBOUND_MESG0_START_BGRB               0x00000006
+#define ARCMSR_INBOUND_MESG0_CHK331PENDING            0x00000007
+#define ARCMSR_INBOUND_MESG0_SYNC_TIMER               0x00000008
+/* doorbell interrupt generator */
+#define ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK           0x00000001
+#define ARCMSR_INBOUND_DRIVER_DATA_READ_OK            0x00000002
+#define ARCMSR_OUTBOUND_IOP331_DATA_WRITE_OK          0x00000001
+#define ARCMSR_OUTBOUND_IOP331_DATA_READ_OK           0x00000002
+/* ccb areca cdb flag */
+#define ARCMSR_CCBPOST_FLAG_SGL_BSIZE                 0x80000000
+#define ARCMSR_CCBPOST_FLAG_IAM_BIOS                  0x40000000
+#define ARCMSR_CCBREPLY_FLAG_IAM_BIOS                 0x40000000
+#define ARCMSR_CCBREPLY_FLAG_ERROR                    0x10000000
+/* outbound firmware ok */
+#define ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK             0x80000000
+/*
+*******************************************************************************
+**    ARECA SCSI COMMAND DESCRIPTOR BLOCK size 0x1F8 (504)
+*******************************************************************************
+*/
+struct ARCMSR_CDB
+{
+	uint8_t							Bus;
+	uint8_t							TargetID;
+	uint8_t							LUN;
+	uint8_t							Function;
+
+	uint8_t							CdbLength;
+	uint8_t							sgcount;
+	uint8_t							Flags;
+#define ARCMSR_CDB_FLAG_SGL_BSIZE          0x01
+#define ARCMSR_CDB_FLAG_BIOS               0x02
+#define ARCMSR_CDB_FLAG_WRITE              0x04
+#define ARCMSR_CDB_FLAG_SIMPLEQ            0x00
+#define ARCMSR_CDB_FLAG_HEADQ              0x08
+#define ARCMSR_CDB_FLAG_ORDEREDQ           0x10
+	uint8_t							Reserved1;
+
+	uint32_t						Context;
+	uint32_t						DataLength;
+
+	uint8_t							Cdb[16];
+
+	uint8_t							DeviceStatus;
+#define ARCMSR_DEV_CHECK_CONDITION          0x02
+#define ARCMSR_DEV_SELECT_TIMEOUT			0xF0
+#define ARCMSR_DEV_ABORTED				0xF1
+#define ARCMSR_DEV_INIT_FAIL				0xF2
+	uint8_t							SenseData[15];
+
+	union
+	{
+		struct SG32ENTRY                sg32entry[ARCMSR_MAX_SG_ENTRIES];
+		struct SG64ENTRY                sg64entry[ARCMSR_MAX_SG_ENTRIES];
+	} u;
+};
+/*
+*******************************************************************************
+**     Messaging Unit (MU) of the Intel R 80331 I/O processor (80331)
+*******************************************************************************
+*/
+struct MessageUnit
+{
+	uint32_t	resrved0[4];			/*0000 000F*/
+	uint32_t	inbound_msgaddr0;		/*0010 0013*/
+	uint32_t	inbound_msgaddr1;		/*0014 0017*/
+	uint32_t	outbound_msgaddr0;		/*0018 001B*/
+	uint32_t	outbound_msgaddr1;		/*001C 001F*/
+	uint32_t	inbound_doorbell;		/*0020 0023*/
+	uint32_t	inbound_intstatus;		/*0024 0027*/
+	uint32_t	inbound_intmask;		/*0028 002B*/
+	uint32_t	outbound_doorbell;		/*002C 002F*/
+	uint32_t	outbound_intstatus;		/*0030 0033*/
+	uint32_t	outbound_intmask;		/*0034 0037*/
+	uint32_t	reserved1[2];			/*0038 003F*/
+	uint32_t	inbound_queueport;		/*0040 0043*/
+	uint32_t	outbound_queueport;     	/*0044 0047*/
+	uint32_t	reserved2[2];			/*0048 004F*/
+	uint32_t	reserved3[492];			/*0050 07FF 492*/
+	uint32_t	reserved4[128];			/*0800 09FF 128*/
+	uint32_t	message_rwbuffer[256];		/*0a00 0DFF 256*/
+	uint32_t	message_wbuffer[32];		/*0E00 0E7F  32*/
+	uint32_t	reserved5[32];			/*0E80 0EFF  32*/
+	uint32_t	message_rbuffer[32];		/*0F00 0F7F  32*/
+	uint32_t	reserved6[32];			/*0F80 0FFF  32*/
+};
+/*
+*******************************************************************************
+**                 Adapter Control Block
+*******************************************************************************
+*/
+struct AdapterControlBlock
+{
+	struct pci_dev *		pdev;
+	struct Scsi_Host *		host;
+	unsigned long			vir2phy_offset;
+	/* Offset is used in making arc cdb physical to virtual calculations */
+	uint32_t			outbound_int_enable;
+
+	struct MessageUnit __iomem *		pmu;
+	/* message unit ATU inbound base address0 */
+
+	uint32_t			acb_flags;
+#define ACB_F_SCSISTOPADAPTER         0x0001
+#define ACB_F_MSG_STOP_BGRB           0x0002
+	/* stop RAID background rebuild */
+#define ACB_F_MSG_START_BGRB          0x0004
+	/* stop RAID background rebuild */
+#define ACB_F_IOPDATA_OVERFLOW        0x0008
+	/* iop message data rqbuffer overflow */
+#define ACB_F_MESSAGE_WQBUFFER_CLEARED  0x0010
+	/* message clear wqbuffer */
+#define ACB_F_MESSAGE_RQBUFFER_CLEARED  0x0020
+	/* message clear rqbuffer */
+#define ACB_F_MESSAGE_WQBUFFER_READED   0x0040
+#define ACB_F_BUS_RESET               0x0080
+#define ACB_F_IOP_INITED              0x0100
+	/* iop init */
+
+	struct CommandControlBlock *			pccb_pool[ARCMSR_MAX_FREECCB_NUM];
+	/* used for memory free */
+	struct list_head		ccb_free_list;
+	/* head of free ccb list */
+	atomic_t			ccboutstandingcount;
+
+	void *				dma_coherent;
+	/* dma_coherent used for memory free */
+	dma_addr_t			dma_coherent_handle;
+	/* dma_coherent_handle used for memory free */
+
+	uint8_t				rqbuffer[ARCMSR_MAX_QBUFFER];
+	/* data collection buffer for read from 80331 */
+	int32_t				rqbuf_firstindex;
+	/* first of read buffer  */
+	int32_t				rqbuf_lastindex;
+	/* last of read buffer   */
+	uint8_t				wqbuffer[ARCMSR_MAX_QBUFFER];
+	/* data collection buffer for write to 80331  */
+	int32_t				wqbuf_firstindex;
+	/* first of write buffer */
+	int32_t				wqbuf_lastindex;
+	/* last of write buffer  */
+	uint8_t				devstate[ARCMSR_MAX_TARGETID][ARCMSR_MAX_TARGETLUN];
+	/* id0 ..... id15, lun0...lun7 */
+#define ARECA_RAID_GONE               0x55
+#define ARECA_RAID_GOOD               0xaa
+	uint32_t			num_resets;
+	uint32_t			num_aborts;
+	uint32_t			firm_request_len;
+	uint32_t			firm_numbers_queue;
+	uint32_t			firm_sdram_size;
+	uint32_t			firm_hd_channels;
+	char				firm_model[12];
+	char				firm_version[20];
+};/* HW_DEVICE_EXTENSION */
+/*
+*******************************************************************************
+**                   Command Control Block
+**             this CCB length must be 32 bytes boundary
+*******************************************************************************
+*/
+struct CommandControlBlock
+{
+	struct ARCMSR_CDB		arcmsr_cdb;
+	/*
+	** 0-503 (size of CDB=504):
+	** arcmsr messenger scsi command descriptor size 504 bytes
+	*/
+	uint32_t			cdb_shifted_phyaddr;
+	/* 504-507 */
+	uint32_t			reserved1;
+	/* 508-511 */
+#if BITS_PER_LONG == 64
+	/*  ======================512+64 bytes========================  */
+	struct list_head		list;
+	/* 512-527 16 bytes next/prev ptrs for ccb lists */
+	struct scsi_cmnd *		pcmd;
+	/* 528-535 8 bytes pointer of linux scsi command */
+	struct AdapterControlBlock *	acb;
+	/* 536-543 8 bytes pointer of acb */
+
+	uint16_t			ccb_flags;
+	/* 544-545 */
+	#define		CCB_FLAG_READ			0x0000
+	#define		CCB_FLAG_WRITE			0x0001
+	#define		CCB_FLAG_ERROR			0x0002
+	#define		CCB_FLAG_FLUSHCACHE		0x0004
+	#define		CCB_FLAG_MASTER_ABORTED		0x0008
+	uint16_t			startdone;
+	/* 546-547 */
+	#define		ARCMSR_CCB_DONE			0x0000
+	#define		ARCMSR_CCB_START		0x55AA
+	#define		ARCMSR_CCB_ABORTED		0xAA55
+	#define		ARCMSR_CCB_ILLEGAL		0xFFFF
+	uint32_t			reserved2[7];
+	/* 548-551 552-555 556-559 560-563 564-567 568-571 572-575 */
+#else
+	/*  ======================512+32 bytes========================  */
+	struct list_head		list;
+	/* 512-519 8 bytes next/prev ptrs for ccb lists */
+	struct scsi_cmnd *		pcmd;
+	/* 520-523 4 bytes pointer of linux scsi command */
+	struct AdapterControlBlock *	acb;
+	/* 524-527 4 bytes pointer of acb */
+
+	uint16_t			ccb_flags;
+	/* 528-529 */
+	#define		CCB_FLAG_READ			0x0000
+	#define		CCB_FLAG_WRITE			0x0001
+	#define		CCB_FLAG_ERROR			0x0002
+	#define		CCB_FLAG_FLUSHCACHE		0x0004
+	#define		CCB_FLAG_MASTER_ABORTED		0x0008
+	uint16_t			startdone;
+	/* 530-531 */
+	#define		ARCMSR_CCB_DONE			0x0000
+	#define		ARCMSR_CCB_START		0x55AA
+	#define		ARCMSR_CCB_ABORTED		0xAA55
+	#define		ARCMSR_CCB_ILLEGAL		0xFFFF
+	uint32_t			reserved2[3];
+	/* 532-535 536-539 540-543 */
+#endif
+	/*  ==========================================================  */
+};
+/*
+*******************************************************************************
+**    ARECA SCSI sense data
+*******************************************************************************
+*/
+struct SENSE_DATA
+{
+	uint8_t				ErrorCode:7;
+#define SCSI_SENSE_CURRENT_ERRORS	0x70
+#define SCSI_SENSE_DEFERRED_ERRORS	0x71
+	uint8_t				Valid:1;
+	uint8_t				SegmentNumber;
+	uint8_t				SenseKey:4;
+	uint8_t				Reserved:1;
+	uint8_t				IncorrectLength:1;
+	uint8_t				EndOfMedia:1;
+	uint8_t				FileMark:1;
+	uint8_t				Information[4];
+	uint8_t				AdditionalSenseLength;
+	uint8_t				CommandSpecificInformation[4];
+	uint8_t				AdditionalSenseCode;
+	uint8_t				AdditionalSenseCodeQualifier;
+	uint8_t				FieldReplaceableUnitCode;
+	uint8_t				SenseKeySpecific[3];
+};
+/*
+*******************************************************************************
+**  Outbound Interrupt Status Register - OISR
+*******************************************************************************
+*/
+#define     ARCMSR_MU_OUTBOUND_INTERRUPT_STATUS_REG                 0x30
+#define     ARCMSR_MU_OUTBOUND_PCI_INT                              0x10
+#define     ARCMSR_MU_OUTBOUND_POSTQUEUE_INT                        0x08
+#define     ARCMSR_MU_OUTBOUND_DOORBELL_INT                         0x04
+#define     ARCMSR_MU_OUTBOUND_MESSAGE1_INT                         0x02
+#define     ARCMSR_MU_OUTBOUND_MESSAGE0_INT                         0x01
+#define     ARCMSR_MU_OUTBOUND_HANDLE_INT                 \
+                    (ARCMSR_MU_OUTBOUND_MESSAGE0_INT      \
+                     |ARCMSR_MU_OUTBOUND_MESSAGE1_INT     \
+                     |ARCMSR_MU_OUTBOUND_DOORBELL_INT     \
+                     |ARCMSR_MU_OUTBOUND_POSTQUEUE_INT    \
+                     |ARCMSR_MU_OUTBOUND_PCI_INT)
+/*
+*******************************************************************************
+**  Outbound Interrupt Mask Register - OIMR
+*******************************************************************************
+*/
+#define     ARCMSR_MU_OUTBOUND_INTERRUPT_MASK_REG                   0x34
+#define     ARCMSR_MU_OUTBOUND_PCI_INTMASKENABLE                    0x10
+#define     ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE              0x08
+#define     ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE               0x04
+#define     ARCMSR_MU_OUTBOUND_MESSAGE1_INTMASKENABLE               0x02
+#define     ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE               0x01
+#define     ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE                    0x1F
+
+extern void arcmsr_post_Qbuffer(struct AdapterControlBlock *acb);
+extern struct class_device_attribute *arcmsr_host_attrs[];
+extern int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb);
+void arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb);
+
diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c
new file mode 100644
index 0000000000000..0459f4194d7c6
--- /dev/null
+++ b/drivers/scsi/arcmsr/arcmsr_attr.c
@@ -0,0 +1,392 @@
+/*
+*******************************************************************************
+**        O.S   : Linux
+**   FILE NAME  : arcmsr_attr.c
+**        BY    : Erich Chen
+**   Description: attributes exported to sysfs and device host
+*******************************************************************************
+** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved
+**
+**     Web site: www.areca.com.tw
+**       E-mail: erich@areca.com.tw
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License version 2 as
+** published by the Free Software Foundation.
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*******************************************************************************
+** Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions
+** are met:
+** 1. Redistributions of source code must retain the above copyright
+**    notice, this list of conditions and the following disclaimer.
+** 2. Redistributions in binary form must reproduce the above copyright
+**    notice, this list of conditions and the following disclaimer in the
+**    documentation and/or other materials provided with the distribution.
+** 3. The name of the author may not be used to endorse or promote products
+**    derived from this software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING,BUT
+** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY
+** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF
+** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************
+** For history of changes, see Documentation/scsi/ChangeLog.arcmsr
+**     Firmware Specification, see Documentation/scsi/arcmsr_spec.txt
+*******************************************************************************
+*/
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_transport.h>
+#include "arcmsr.h"
+
+struct class_device_attribute *arcmsr_host_attrs[];
+
+static ssize_t
+arcmsr_sysfs_iop_message_read(struct kobject *kobj, char *buf, loff_t off,
+    size_t count)
+{
+	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	struct MessageUnit __iomem *reg = acb->pmu;
+	uint8_t *pQbuffer,*ptmpQbuffer;
+	int32_t allxfer_len = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	/* do message unit read. */
+	ptmpQbuffer = (uint8_t *)buf;
+	while ((acb->rqbuf_firstindex != acb->rqbuf_lastindex)
+		&& (allxfer_len < 1031)) {
+		pQbuffer = &acb->rqbuffer[acb->rqbuf_firstindex];
+		memcpy(ptmpQbuffer, pQbuffer, 1);
+		acb->rqbuf_firstindex++;
+		acb->rqbuf_firstindex %= ARCMSR_MAX_QBUFFER;
+		ptmpQbuffer++;
+		allxfer_len++;
+	}
+	if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+		struct QBUFFER __iomem * prbuffer = (struct QBUFFER __iomem *)
+					&reg->message_rbuffer;
+		uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data;
+		int32_t iop_len;
+
+		acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+		iop_len = readl(&prbuffer->data_len);
+		while (iop_len > 0) {
+			acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data);
+			acb->rqbuf_lastindex++;
+			acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER;
+			iop_data++;
+			iop_len--;
+		}
+		writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,
+				&reg->inbound_doorbell);
+	}
+	return (allxfer_len);
+}
+
+static ssize_t
+arcmsr_sysfs_iop_message_write(struct kobject *kobj, char *buf, loff_t off,
+    size_t count)
+{
+	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	int32_t my_empty_len, user_len, wqbuf_firstindex, wqbuf_lastindex;
+	uint8_t *pQbuffer, *ptmpuserbuffer;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (count > 1032)
+		return -EINVAL;
+	/* do message unit write. */
+	ptmpuserbuffer = (uint8_t *)buf;
+	user_len = (int32_t)count;
+	wqbuf_lastindex = acb->wqbuf_lastindex;
+	wqbuf_firstindex = acb->wqbuf_firstindex;
+	if (wqbuf_lastindex != wqbuf_firstindex) {
+		arcmsr_post_Qbuffer(acb);
+		return 0;	/*need retry*/
+	} else {
+		my_empty_len = (wqbuf_firstindex-wqbuf_lastindex - 1)
+				&(ARCMSR_MAX_QBUFFER - 1);
+		if (my_empty_len >= user_len) {
+			while (user_len > 0) {
+				pQbuffer =
+				&acb->wqbuffer[acb->wqbuf_lastindex];
+				memcpy(pQbuffer, ptmpuserbuffer, 1);
+				acb->wqbuf_lastindex++;
+				acb->wqbuf_lastindex %= ARCMSR_MAX_QBUFFER;
+				ptmpuserbuffer++;
+				user_len--;
+			}
+			if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_CLEARED) {
+				acb->acb_flags &=
+					~ACB_F_MESSAGE_WQBUFFER_CLEARED;
+				arcmsr_post_Qbuffer(acb);
+			}
+			return count;
+		} else {
+			return 0;	/*need retry*/
+		}
+	}
+}
+
+static ssize_t
+arcmsr_sysfs_iop_message_clear(struct kobject *kobj, char *buf, loff_t off,
+    size_t count)
+{
+	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	struct MessageUnit __iomem *reg = acb->pmu;
+	uint8_t *pQbuffer;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+		acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+		writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK
+				, &reg->inbound_doorbell);
+	}
+	acb->acb_flags |=
+		(ACB_F_MESSAGE_WQBUFFER_CLEARED
+		| ACB_F_MESSAGE_RQBUFFER_CLEARED
+		| ACB_F_MESSAGE_WQBUFFER_READED);
+	acb->rqbuf_firstindex = 0;
+	acb->rqbuf_lastindex = 0;
+	acb->wqbuf_firstindex = 0;
+	acb->wqbuf_lastindex = 0;
+	pQbuffer = acb->rqbuffer;
+	memset(pQbuffer, 0, sizeof (struct QBUFFER));
+	pQbuffer = acb->wqbuffer;
+	memset(pQbuffer, 0, sizeof (struct QBUFFER));
+	return 1;
+}
+
+static struct bin_attribute arcmsr_sysfs_message_read_attr = {
+	.attr = {
+		.name = "mu_read",
+		.mode = S_IRUSR ,
+		.owner = THIS_MODULE,
+	},
+	.size = 1032,
+	.read = arcmsr_sysfs_iop_message_read,
+};
+
+static struct bin_attribute arcmsr_sysfs_message_write_attr = {
+	.attr = {
+		.name = "mu_write",
+		.mode = S_IWUSR,
+		.owner = THIS_MODULE,
+	},
+	.size = 1032,
+	.write = arcmsr_sysfs_iop_message_write,
+};
+
+static struct bin_attribute arcmsr_sysfs_message_clear_attr = {
+	.attr = {
+		.name = "mu_clear",
+		.mode = S_IWUSR,
+		.owner = THIS_MODULE,
+	},
+	.size = 1,
+	.write = arcmsr_sysfs_iop_message_clear,
+};
+
+int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb)
+{
+	struct Scsi_Host *host = acb->host;
+	int error;
+
+	error = sysfs_create_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_read_attr);
+	if (error) {
+		printk(KERN_ERR "arcmsr: alloc sysfs mu_read failed\n");
+		goto error_bin_file_message_read;
+	}
+	error = sysfs_create_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_write_attr);
+	if (error) {
+		printk(KERN_ERR "arcmsr: alloc sysfs mu_write failed\n");
+		goto error_bin_file_message_write;
+	}
+	error = sysfs_create_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_clear_attr);
+	if (error) {
+		printk(KERN_ERR "arcmsr: alloc sysfs mu_clear failed\n");
+		goto error_bin_file_message_clear;
+	}
+	return 0;
+error_bin_file_message_clear:
+	error = sysfs_remove_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_write_attr);
+	if (error)
+		printk(KERN_ERR "arcmsr: sysfs_remove_bin_file mu_write failed\n");
+error_bin_file_message_write:
+	error = sysfs_remove_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_read_attr);
+	if (error)
+		printk(KERN_ERR "arcmsr: sysfs_remove_bin_file mu_read failed\n");
+error_bin_file_message_read:
+	return error;
+}
+
+void
+arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb) {
+	struct Scsi_Host *host = acb->host;
+	int error;
+
+	error = sysfs_remove_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_clear_attr);
+	if (error)
+		printk(KERN_ERR "arcmsr: free sysfs mu_clear failed\n");
+	error = sysfs_remove_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_write_attr);
+	if (error)
+		printk(KERN_ERR "arcmsr: free sysfs mu_write failed\n");
+	error = sysfs_remove_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_read_attr);
+	if (error)
+		printk(KERN_ERR "arcmsr: free sysfss mu_read failed\n");
+}
+
+
+static ssize_t
+arcmsr_attr_host_driver_version(struct class_device *cdev, char *buf) {
+	return snprintf(buf, PAGE_SIZE,
+			"ARCMSR: %s\n",
+			ARCMSR_DRIVER_VERSION);
+}
+
+static ssize_t
+arcmsr_attr_host_driver_posted_cmd(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	return snprintf(buf, PAGE_SIZE,
+			"Current commands posted:     %4d\n",
+			atomic_read(&acb->ccboutstandingcount));
+}
+
+static ssize_t
+arcmsr_attr_host_driver_reset(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	return snprintf(buf, PAGE_SIZE,
+			"SCSI Host Resets:            %4d\n",
+			acb->num_resets);
+}
+
+static ssize_t
+arcmsr_attr_host_driver_abort(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	return snprintf(buf, PAGE_SIZE,
+			"SCSI Aborts/Timeouts:        %4d\n",
+			acb->num_aborts);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_model(struct class_device *cdev, char *buf) {
+    struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	return snprintf(buf, PAGE_SIZE,
+			"Adapter Model: %s\n",
+			acb->firm_model);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_version(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+
+	return snprintf(buf, PAGE_SIZE,
+			"Firmware Version:  %s\n",
+			acb->firm_version);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_request_len(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+
+	return snprintf(buf, PAGE_SIZE,
+			"Reguest Lenth: %4d\n",
+			acb->firm_request_len);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_numbers_queue(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+
+	return snprintf(buf, PAGE_SIZE,
+			"Numbers of Queue: %4d\n",
+			acb->firm_numbers_queue);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_sdram_size(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+
+	return snprintf(buf, PAGE_SIZE,
+			"SDRAM Size: %4d\n",
+			acb->firm_sdram_size);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_hd_channels(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+
+	return snprintf(buf, PAGE_SIZE,
+			"Hard Disk Channels: %4d\n",
+			acb->firm_hd_channels);
+}
+
+static CLASS_DEVICE_ATTR(host_driver_version, S_IRUGO, arcmsr_attr_host_driver_version, NULL);
+static CLASS_DEVICE_ATTR(host_driver_posted_cmd, S_IRUGO, arcmsr_attr_host_driver_posted_cmd, NULL);
+static CLASS_DEVICE_ATTR(host_driver_reset, S_IRUGO, arcmsr_attr_host_driver_reset, NULL);
+static CLASS_DEVICE_ATTR(host_driver_abort, S_IRUGO, arcmsr_attr_host_driver_abort, NULL);
+static CLASS_DEVICE_ATTR(host_fw_model, S_IRUGO, arcmsr_attr_host_fw_model, NULL);
+static CLASS_DEVICE_ATTR(host_fw_version, S_IRUGO, arcmsr_attr_host_fw_version, NULL);
+static CLASS_DEVICE_ATTR(host_fw_request_len, S_IRUGO, arcmsr_attr_host_fw_request_len, NULL);
+static CLASS_DEVICE_ATTR(host_fw_numbers_queue, S_IRUGO, arcmsr_attr_host_fw_numbers_queue, NULL);
+static CLASS_DEVICE_ATTR(host_fw_sdram_size, S_IRUGO, arcmsr_attr_host_fw_sdram_size, NULL);
+static CLASS_DEVICE_ATTR(host_fw_hd_channels, S_IRUGO, arcmsr_attr_host_fw_hd_channels, NULL);
+
+struct class_device_attribute *arcmsr_host_attrs[] = {
+	&class_device_attr_host_driver_version,
+	&class_device_attr_host_driver_posted_cmd,
+	&class_device_attr_host_driver_reset,
+	&class_device_attr_host_driver_abort,
+	&class_device_attr_host_fw_model,
+	&class_device_attr_host_fw_version,
+	&class_device_attr_host_fw_request_len,
+	&class_device_attr_host_fw_numbers_queue,
+	&class_device_attr_host_fw_sdram_size,
+	&class_device_attr_host_fw_hd_channels,
+	NULL,
+};
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
new file mode 100644
index 0000000000000..475f978ff8f0d
--- /dev/null
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -0,0 +1,1496 @@
+/*
+*******************************************************************************
+**        O.S   : Linux
+**   FILE NAME  : arcmsr_hba.c
+**        BY    : Erich Chen
+**   Description: SCSI RAID Device Driver for
+**                ARECA RAID Host adapter
+*******************************************************************************
+** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved
+**
+**     Web site: www.areca.com.tw
+**       E-mail: erich@areca.com.tw
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License version 2 as
+** published by the Free Software Foundation.
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*******************************************************************************
+** Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions
+** are met:
+** 1. Redistributions of source code must retain the above copyright
+**    notice, this list of conditions and the following disclaimer.
+** 2. Redistributions in binary form must reproduce the above copyright
+**    notice, this list of conditions and the following disclaimer in the
+**    documentation and/or other materials provided with the distribution.
+** 3. The name of the author may not be used to endorse or promote products
+**    derived from this software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING,BUT
+** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY
+** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF
+** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************
+** For history of changes, see Documentation/scsi/ChangeLog.arcmsr
+**     Firmware Specification, see Documentation/scsi/arcmsr_spec.txt
+*******************************************************************************
+*/
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/spinlock.h>
+#include <linux/pci_ids.h>
+#include <linux/interrupt.h>
+#include <linux/moduleparam.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/timer.h>
+#include <linux/pci.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsicam.h>
+#include "arcmsr.h"
+
+MODULE_AUTHOR("Erich Chen <erich@areca.com.tw>");
+MODULE_DESCRIPTION("ARECA (ARC11xx/12xx) SATA RAID HOST Adapter");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(ARCMSR_DRIVER_VERSION);
+
+static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, struct scsi_cmnd *cmd);
+static int arcmsr_abort(struct scsi_cmnd *);
+static int arcmsr_bus_reset(struct scsi_cmnd *);
+static int arcmsr_bios_param(struct scsi_device *sdev,
+				struct block_device *bdev, sector_t capacity, int *info);
+static int arcmsr_queue_command(struct scsi_cmnd * cmd,
+				void (*done) (struct scsi_cmnd *));
+static int arcmsr_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id);
+static void arcmsr_remove(struct pci_dev *pdev);
+static void arcmsr_shutdown(struct pci_dev *pdev);
+static void arcmsr_iop_init(struct AdapterControlBlock *acb);
+static void arcmsr_free_ccb_pool(struct AdapterControlBlock *acb);
+static void arcmsr_stop_adapter_bgrb(struct AdapterControlBlock *acb);
+static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb);
+static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb);
+static const char *arcmsr_info(struct Scsi_Host *);
+static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb);
+
+static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, int queue_depth)
+{
+	if (queue_depth > ARCMSR_MAX_CMD_PERLUN)
+		queue_depth = ARCMSR_MAX_CMD_PERLUN;
+	scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth);
+	return queue_depth;
+}
+
+static struct scsi_host_template arcmsr_scsi_host_template = {
+	.module			= THIS_MODULE,
+	.name			= "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION,
+	.info			= arcmsr_info,
+	.queuecommand		= arcmsr_queue_command,
+	.eh_abort_handler	= arcmsr_abort,
+	.eh_bus_reset_handler	= arcmsr_bus_reset,
+	.bios_param		= arcmsr_bios_param,
+	.change_queue_depth	= arcmsr_adjust_disk_queue_depth,
+	.can_queue		= ARCMSR_MAX_OUTSTANDING_CMD,
+	.this_id		= ARCMSR_SCSI_INITIATOR_ID,
+	.sg_tablesize		= ARCMSR_MAX_SG_ENTRIES,
+	.max_sectors    	= ARCMSR_MAX_XFER_SECTORS,
+	.cmd_per_lun		= ARCMSR_MAX_CMD_PERLUN,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.shost_attrs		= arcmsr_host_attrs,
+};
+
+static struct pci_device_id arcmsr_device_id_table[] = {
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1110)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1120)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1130)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1160)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1170)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1210)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1220)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1230)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1260)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1270)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1280)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1380)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1381)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1680)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1681)},
+	{0, 0}, /* Terminating entry */
+};
+MODULE_DEVICE_TABLE(pci, arcmsr_device_id_table);
+static struct pci_driver arcmsr_pci_driver = {
+	.name			= "arcmsr",
+	.id_table		= arcmsr_device_id_table,
+	.probe			= arcmsr_probe,
+	.remove			= arcmsr_remove,
+	.shutdown		= arcmsr_shutdown
+};
+
+static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id,
+	struct pt_regs *regs)
+{
+	irqreturn_t handle_state;
+	struct AdapterControlBlock *acb;
+	unsigned long flags;
+
+	acb = (struct AdapterControlBlock *)dev_id;
+
+	spin_lock_irqsave(acb->host->host_lock, flags);
+	handle_state = arcmsr_interrupt(acb);
+	spin_unlock_irqrestore(acb->host->host_lock, flags);
+	return handle_state;
+}
+
+static int arcmsr_bios_param(struct scsi_device *sdev,
+		struct block_device *bdev, sector_t capacity, int *geom)
+{
+	int ret, heads, sectors, cylinders, total_capacity;
+	unsigned char *buffer;/* return copy of block device's partition table */
+
+	buffer = scsi_bios_ptable(bdev);
+	if (buffer) {
+		ret = scsi_partsize(buffer, capacity, &geom[2], &geom[0], &geom[1]);
+		kfree(buffer);
+		if (ret != -1)
+			return ret;
+	}
+	total_capacity = capacity;
+	heads = 64;
+	sectors = 32;
+	cylinders = total_capacity / (heads * sectors);
+	if (cylinders > 1024) {
+		heads = 255;
+		sectors = 63;
+		cylinders = total_capacity / (heads * sectors);
+	}
+	geom[0] = heads;
+	geom[1] = sectors;
+	geom[2] = cylinders;
+	return 0;
+}
+
+static int arcmsr_alloc_ccb_pool(struct AdapterControlBlock *acb)
+{
+	struct pci_dev *pdev = acb->pdev;
+	struct MessageUnit __iomem *reg = acb->pmu;
+	u32 ccb_phyaddr_hi32;
+	void *dma_coherent;
+	dma_addr_t dma_coherent_handle, dma_addr;
+	struct CommandControlBlock *ccb_tmp;
+	int i, j;
+
+	dma_coherent = dma_alloc_coherent(&pdev->dev,
+			ARCMSR_MAX_FREECCB_NUM *
+			sizeof (struct CommandControlBlock) + 0x20,
+			&dma_coherent_handle, GFP_KERNEL);
+	if (!dma_coherent)
+		return -ENOMEM;
+
+	acb->dma_coherent = dma_coherent;
+	acb->dma_coherent_handle = dma_coherent_handle;
+
+	if (((unsigned long)dma_coherent & 0x1F)) {
+		dma_coherent = dma_coherent +
+			(0x20 - ((unsigned long)dma_coherent & 0x1F));
+		dma_coherent_handle = dma_coherent_handle +
+			(0x20 - ((unsigned long)dma_coherent_handle & 0x1F));
+	}
+
+	dma_addr = dma_coherent_handle;
+	ccb_tmp = (struct CommandControlBlock *)dma_coherent;
+	for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
+		ccb_tmp->cdb_shifted_phyaddr = dma_addr >> 5;
+		ccb_tmp->acb = acb;
+		acb->pccb_pool[i] = ccb_tmp;
+		list_add_tail(&ccb_tmp->list, &acb->ccb_free_list);
+		dma_addr = dma_addr + sizeof (struct CommandControlBlock);
+		ccb_tmp++;
+	}
+
+	acb->vir2phy_offset = (unsigned long)ccb_tmp -
+			      (unsigned long)dma_addr;
+	for (i = 0; i < ARCMSR_MAX_TARGETID; i++)
+		for (j = 0; j < ARCMSR_MAX_TARGETLUN; j++)
+			acb->devstate[i][j] = ARECA_RAID_GOOD;
+
+	/*
+	** here we need to tell iop 331 our ccb_tmp.HighPart
+	** if ccb_tmp.HighPart is not zero
+	*/
+	ccb_phyaddr_hi32 = (uint32_t) ((dma_coherent_handle >> 16) >> 16);
+	if (ccb_phyaddr_hi32 != 0) {
+		writel(ARCMSR_SIGNATURE_SET_CONFIG, &reg->message_rwbuffer[0]);
+		writel(ccb_phyaddr_hi32, &reg->message_rwbuffer[1]);
+		writel(ARCMSR_INBOUND_MESG0_SET_CONFIG, &reg->inbound_msgaddr0);
+		if (arcmsr_wait_msgint_ready(acb))
+			printk(KERN_NOTICE "arcmsr%d: "
+			       "'set ccb high part physical address' timeout\n",
+				acb->host->host_no);
+	}
+
+	writel(readl(&reg->outbound_intmask) |
+			ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE,
+	       &reg->outbound_intmask);
+	return 0;
+}
+
+static int arcmsr_probe(struct pci_dev *pdev,
+	const struct pci_device_id *id)
+{
+	struct Scsi_Host *host;
+	struct AdapterControlBlock *acb;
+	uint8_t bus, dev_fun;
+	int error;
+
+	error = pci_enable_device(pdev);
+	if (error)
+		goto out;
+	pci_set_master(pdev);
+
+	host = scsi_host_alloc(&arcmsr_scsi_host_template,
+			sizeof(struct AdapterControlBlock));
+	if (!host) {
+		error = -ENOMEM;
+		goto out_disable_device;
+	}
+	acb = (struct AdapterControlBlock *)host->hostdata;
+	memset(acb, 0, sizeof (struct AdapterControlBlock));
+
+	error = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+	if (error) {
+		error = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+		if (error) {
+			printk(KERN_WARNING
+			       "scsi%d: No suitable DMA mask available\n",
+			       host->host_no);
+			goto out_host_put;
+		}
+	}
+	bus = pdev->bus->number;
+	dev_fun = pdev->devfn;
+	acb->host = host;
+	acb->pdev = pdev;
+	host->max_sectors = ARCMSR_MAX_XFER_SECTORS;
+	host->max_lun = ARCMSR_MAX_TARGETLUN;
+	host->max_id = ARCMSR_MAX_TARGETID;/*16:8*/
+	host->max_cmd_len = 16;    /*this is issue of 64bit LBA, over 2T byte*/
+	host->sg_tablesize = ARCMSR_MAX_SG_ENTRIES;
+	host->can_queue = ARCMSR_MAX_FREECCB_NUM; /* max simultaneous cmds */
+	host->cmd_per_lun = ARCMSR_MAX_CMD_PERLUN;
+	host->this_id = ARCMSR_SCSI_INITIATOR_ID;
+	host->unique_id = (bus << 8) | dev_fun;
+	host->irq = pdev->irq;
+	error = pci_request_regions(pdev, "arcmsr");
+	if (error)
+		goto out_host_put;
+
+	acb->pmu = ioremap(pci_resource_start(pdev, 0),
+			   pci_resource_len(pdev, 0));
+	if (!acb->pmu) {
+		printk(KERN_NOTICE "arcmsr%d: memory"
+			" mapping region fail \n", acb->host->host_no);
+		goto out_release_regions;
+	}
+	acb->acb_flags |= (ACB_F_MESSAGE_WQBUFFER_CLEARED |
+			   ACB_F_MESSAGE_RQBUFFER_CLEARED |
+			   ACB_F_MESSAGE_WQBUFFER_READED);
+	acb->acb_flags &= ~ACB_F_SCSISTOPADAPTER;
+	INIT_LIST_HEAD(&acb->ccb_free_list);
+
+	error = arcmsr_alloc_ccb_pool(acb);
+	if (error)
+		goto out_iounmap;
+
+	error = request_irq(pdev->irq, arcmsr_do_interrupt,
+			SA_INTERRUPT | SA_SHIRQ, "arcmsr", acb);
+	if (error)
+		goto out_free_ccb_pool;
+
+	arcmsr_iop_init(acb);
+	pci_set_drvdata(pdev, host);
+
+	error = scsi_add_host(host, &pdev->dev);
+	if (error)
+		goto out_free_irq;
+
+	error = arcmsr_alloc_sysfs_attr(acb);
+	if (error)
+		goto out_free_sysfs;
+
+	scsi_scan_host(host);
+	return 0;
+ out_free_sysfs:
+ out_free_irq:
+	free_irq(pdev->irq, acb);
+ out_free_ccb_pool:
+	arcmsr_free_ccb_pool(acb);
+ out_iounmap:
+	iounmap(acb->pmu);
+ out_release_regions:
+	pci_release_regions(pdev);
+ out_host_put:
+	scsi_host_put(host);
+ out_disable_device:
+	pci_disable_device(pdev);
+ out:
+	return error;
+}
+
+static void arcmsr_abort_allcmd(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+
+	writel(ARCMSR_INBOUND_MESG0_ABORT_CMD, &reg->inbound_msgaddr0);
+	if (arcmsr_wait_msgint_ready(acb))
+		printk(KERN_NOTICE
+			"arcmsr%d: wait 'abort all outstanding command' timeout \n"
+			, acb->host->host_no);
+}
+
+static void arcmsr_pci_unmap_dma(struct CommandControlBlock *ccb)
+{
+	struct AdapterControlBlock *acb = ccb->acb;
+	struct scsi_cmnd *pcmd = ccb->pcmd;
+
+	if (pcmd->use_sg != 0) {
+		struct scatterlist *sl;
+
+		sl = (struct scatterlist *)pcmd->request_buffer;
+		pci_unmap_sg(acb->pdev, sl, pcmd->use_sg, pcmd->sc_data_direction);
+	}
+	else if (pcmd->request_bufflen != 0)
+		pci_unmap_single(acb->pdev,
+			pcmd->SCp.dma_handle,
+			pcmd->request_bufflen, pcmd->sc_data_direction);
+}
+
+static void arcmsr_ccb_complete(struct CommandControlBlock *ccb, int stand_flag)
+{
+	struct AdapterControlBlock *acb = ccb->acb;
+	struct scsi_cmnd *pcmd = ccb->pcmd;
+
+	arcmsr_pci_unmap_dma(ccb);
+	if (stand_flag == 1)
+		atomic_dec(&acb->ccboutstandingcount);
+	ccb->startdone = ARCMSR_CCB_DONE;
+	ccb->ccb_flags = 0;
+	list_add_tail(&ccb->list, &acb->ccb_free_list);
+	pcmd->scsi_done(pcmd);
+}
+
+static void arcmsr_remove(struct pci_dev *pdev)
+{
+	struct Scsi_Host *host = pci_get_drvdata(pdev);
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *) host->hostdata;
+	struct MessageUnit __iomem *reg = acb->pmu;
+	int poll_count = 0;
+
+	arcmsr_free_sysfs_attr(acb);
+	scsi_remove_host(host);
+	arcmsr_stop_adapter_bgrb(acb);
+	arcmsr_flush_adapter_cache(acb);
+	writel(readl(&reg->outbound_intmask) |
+		ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE,
+		&reg->outbound_intmask);
+	acb->acb_flags |= ACB_F_SCSISTOPADAPTER;
+	acb->acb_flags &= ~ACB_F_IOP_INITED;
+
+	for (poll_count = 0; poll_count < 256; poll_count++) {
+		if (!atomic_read(&acb->ccboutstandingcount))
+			break;
+		arcmsr_interrupt(acb);
+		msleep(25);
+	}
+
+	if (atomic_read(&acb->ccboutstandingcount)) {
+		int i;
+
+		arcmsr_abort_allcmd(acb);
+		for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++)
+			readl(&reg->outbound_queueport);
+		for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
+			struct CommandControlBlock *ccb = acb->pccb_pool[i];
+			if (ccb->startdone == ARCMSR_CCB_START) {
+				ccb->startdone = ARCMSR_CCB_ABORTED;
+				ccb->pcmd->result = DID_ABORT << 16;
+				arcmsr_ccb_complete(ccb, 1);
+			}
+		}
+	}
+
+	free_irq(pdev->irq, acb);
+	iounmap(acb->pmu);
+	arcmsr_free_ccb_pool(acb);
+	pci_release_regions(pdev);
+
+	scsi_host_put(host);
+
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static void arcmsr_shutdown(struct pci_dev *pdev)
+{
+	struct Scsi_Host *host = pci_get_drvdata(pdev);
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *)host->hostdata;
+
+	arcmsr_stop_adapter_bgrb(acb);
+	arcmsr_flush_adapter_cache(acb);
+}
+
+static int arcmsr_module_init(void)
+{
+	int error = 0;
+
+	error = pci_register_driver(&arcmsr_pci_driver);
+	return error;
+}
+
+static void arcmsr_module_exit(void)
+{
+	pci_unregister_driver(&arcmsr_pci_driver);
+}
+module_init(arcmsr_module_init);
+module_exit(arcmsr_module_exit);
+
+static u32 arcmsr_disable_outbound_ints(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	u32 orig_mask = readl(&reg->outbound_intmask);
+
+	writel(orig_mask | ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE,
+			&reg->outbound_intmask);
+	return orig_mask;
+}
+
+static void arcmsr_enable_outbound_ints(struct AdapterControlBlock *acb,
+		u32 orig_mask)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	u32 mask;
+
+	mask = orig_mask & ~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE |
+			     ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE);
+	writel(mask, &reg->outbound_intmask);
+}
+
+static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg=acb->pmu;
+
+	writel(ARCMSR_INBOUND_MESG0_FLUSH_CACHE, &reg->inbound_msgaddr0);
+	if (arcmsr_wait_msgint_ready(acb))
+		printk(KERN_NOTICE
+			"arcmsr%d: wait 'flush adapter cache' timeout \n"
+			, acb->host->host_no);
+}
+
+static void arcmsr_report_sense_info(struct CommandControlBlock *ccb)
+{
+	struct scsi_cmnd *pcmd = ccb->pcmd;
+	struct SENSE_DATA *sensebuffer = (struct SENSE_DATA *)pcmd->sense_buffer;
+
+	pcmd->result = DID_OK << 16;
+	if (sensebuffer) {
+		int sense_data_length =
+			sizeof (struct SENSE_DATA) < sizeof (pcmd->sense_buffer)
+			? sizeof (struct SENSE_DATA) : sizeof (pcmd->sense_buffer);
+		memset(sensebuffer, 0, sizeof (pcmd->sense_buffer));
+		memcpy(sensebuffer, ccb->arcmsr_cdb.SenseData, sense_data_length);
+		sensebuffer->ErrorCode = SCSI_SENSE_CURRENT_ERRORS;
+		sensebuffer->Valid = 1;
+	}
+}
+
+static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	uint32_t Index;
+	uint8_t Retries = 0x00;
+
+	do {
+		for (Index = 0; Index < 100; Index++) {
+			if (readl(&reg->outbound_intstatus)
+				& ARCMSR_MU_OUTBOUND_MESSAGE0_INT) {
+				writel(ARCMSR_MU_OUTBOUND_MESSAGE0_INT
+					, &reg->outbound_intstatus);
+				return 0x00;
+			}
+			msleep_interruptible(10);
+		}/*max 1 seconds*/
+	} while (Retries++ < 20);/*max 20 sec*/
+	return 0xff;
+}
+
+static void arcmsr_build_ccb(struct AdapterControlBlock *acb,
+	struct CommandControlBlock *ccb, struct scsi_cmnd *pcmd)
+{
+	struct ARCMSR_CDB *arcmsr_cdb = (struct ARCMSR_CDB *)&ccb->arcmsr_cdb;
+	int8_t *psge = (int8_t *)&arcmsr_cdb->u;
+	uint32_t address_lo, address_hi;
+	int arccdbsize = 0x30;
+
+	ccb->pcmd = pcmd;
+	memset(arcmsr_cdb, 0, sizeof (struct ARCMSR_CDB));
+	arcmsr_cdb->Bus = 0;
+	arcmsr_cdb->TargetID = pcmd->device->id;
+	arcmsr_cdb->LUN = pcmd->device->lun;
+	arcmsr_cdb->Function = 1;
+	arcmsr_cdb->CdbLength = (uint8_t)pcmd->cmd_len;
+	arcmsr_cdb->Context = (unsigned long)arcmsr_cdb;
+	memcpy(arcmsr_cdb->Cdb, pcmd->cmnd, pcmd->cmd_len);
+	if (pcmd->use_sg) {
+		int length, sgcount, i, cdb_sgcount = 0;
+		struct scatterlist *sl;
+
+		/* Get Scatter Gather List from scsiport. */
+		sl = (struct scatterlist *) pcmd->request_buffer;
+		sgcount = pci_map_sg(acb->pdev, sl, pcmd->use_sg,
+				pcmd->sc_data_direction);
+		/* map stor port SG list to our iop SG List. */
+		for (i = 0; i < sgcount; i++) {
+			/* Get the physical address of the current data pointer */
+			length = cpu_to_le32(sg_dma_len(sl));
+			address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sl)));
+			address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sl)));
+			if (address_hi == 0) {
+				struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge;
+
+				pdma_sg->address = address_lo;
+				pdma_sg->length = length;
+				psge += sizeof (struct SG32ENTRY);
+				arccdbsize += sizeof (struct SG32ENTRY);
+			} else {
+				struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge;
+
+				pdma_sg->addresshigh = address_hi;
+				pdma_sg->address = address_lo;
+				pdma_sg->length = length|IS_SG64_ADDR;
+				psge += sizeof (struct SG64ENTRY);
+				arccdbsize += sizeof (struct SG64ENTRY);
+			}
+			sl++;
+			cdb_sgcount++;
+		}
+		arcmsr_cdb->sgcount = (uint8_t)cdb_sgcount;
+		arcmsr_cdb->DataLength = pcmd->request_bufflen;
+		if ( arccdbsize > 256)
+			arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_SGL_BSIZE;
+	} else if (pcmd->request_bufflen) {
+		dma_addr_t dma_addr;
+		dma_addr = pci_map_single(acb->pdev, pcmd->request_buffer,
+				pcmd->request_bufflen, pcmd->sc_data_direction);
+		pcmd->SCp.dma_handle = dma_addr;
+		address_lo = cpu_to_le32(dma_addr_lo32(dma_addr));
+		address_hi = cpu_to_le32(dma_addr_hi32(dma_addr));
+		if (address_hi == 0) {
+			struct  SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge;
+			pdma_sg->address = address_lo;
+			pdma_sg->length = pcmd->request_bufflen;
+		} else {
+			struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge;
+			pdma_sg->addresshigh = address_hi;
+			pdma_sg->address = address_lo;
+			pdma_sg->length = pcmd->request_bufflen|IS_SG64_ADDR;
+		}
+		arcmsr_cdb->sgcount = 1;
+		arcmsr_cdb->DataLength = pcmd->request_bufflen;
+	}
+	if (pcmd->sc_data_direction == DMA_TO_DEVICE ) {
+		arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_WRITE;
+		ccb->ccb_flags |= CCB_FLAG_WRITE;
+	}
+}
+
+static void arcmsr_post_ccb(struct AdapterControlBlock *acb, struct CommandControlBlock *ccb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	uint32_t cdb_shifted_phyaddr = ccb->cdb_shifted_phyaddr;
+	struct ARCMSR_CDB *arcmsr_cdb = (struct ARCMSR_CDB *)&ccb->arcmsr_cdb;
+
+	atomic_inc(&acb->ccboutstandingcount);
+	ccb->startdone = ARCMSR_CCB_START;
+	if (arcmsr_cdb->Flags & ARCMSR_CDB_FLAG_SGL_BSIZE)
+		writel(cdb_shifted_phyaddr | ARCMSR_CCBPOST_FLAG_SGL_BSIZE,
+			&reg->inbound_queueport);
+	else
+		writel(cdb_shifted_phyaddr, &reg->inbound_queueport);
+}
+
+void arcmsr_post_Qbuffer(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	struct QBUFFER __iomem *pwbuffer = (struct QBUFFER __iomem *) &reg->message_wbuffer;
+	uint8_t __iomem *iop_data = (uint8_t __iomem *) pwbuffer->data;
+	int32_t allxfer_len = 0;
+
+	if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_READED) {
+		acb->acb_flags &= (~ACB_F_MESSAGE_WQBUFFER_READED);
+		while ((acb->wqbuf_firstindex != acb->wqbuf_lastindex)
+			&& (allxfer_len < 124)) {
+			writeb(acb->wqbuffer[acb->wqbuf_firstindex], iop_data);
+			acb->wqbuf_firstindex++;
+			acb->wqbuf_firstindex %= ARCMSR_MAX_QBUFFER;
+			iop_data++;
+			allxfer_len++;
+		}
+		writel(allxfer_len, &pwbuffer->data_len);
+		writel(ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK
+			, &reg->inbound_doorbell);
+	}
+}
+
+static void arcmsr_stop_adapter_bgrb(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+
+	acb->acb_flags &= ~ACB_F_MSG_START_BGRB;
+	writel(ARCMSR_INBOUND_MESG0_STOP_BGRB, &reg->inbound_msgaddr0);
+	if (arcmsr_wait_msgint_ready(acb))
+		printk(KERN_NOTICE
+			"arcmsr%d: wait 'stop adapter background rebulid' timeout \n"
+			, acb->host->host_no);
+}
+
+static void arcmsr_free_ccb_pool(struct AdapterControlBlock *acb)
+{
+	dma_free_coherent(&acb->pdev->dev,
+		ARCMSR_MAX_FREECCB_NUM * sizeof (struct CommandControlBlock) + 0x20,
+		acb->dma_coherent,
+		acb->dma_coherent_handle);
+}
+
+static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	struct CommandControlBlock *ccb;
+	uint32_t flag_ccb, outbound_intstatus, outbound_doorbell;
+
+	outbound_intstatus = readl(&reg->outbound_intstatus)
+		& acb->outbound_int_enable;
+	writel(outbound_intstatus, &reg->outbound_intstatus);
+	if (outbound_intstatus & ARCMSR_MU_OUTBOUND_DOORBELL_INT) {
+		outbound_doorbell = readl(&reg->outbound_doorbell);
+		writel(outbound_doorbell, &reg->outbound_doorbell);
+		if (outbound_doorbell & ARCMSR_OUTBOUND_IOP331_DATA_WRITE_OK) {
+			struct QBUFFER __iomem * prbuffer =
+				(struct QBUFFER __iomem *) &reg->message_rbuffer;
+			uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data;
+			int32_t my_empty_len, iop_len, rqbuf_firstindex, rqbuf_lastindex;
+
+			rqbuf_lastindex = acb->rqbuf_lastindex;
+			rqbuf_firstindex = acb->rqbuf_firstindex;
+			iop_len = readl(&prbuffer->data_len);
+			my_empty_len = (rqbuf_firstindex - rqbuf_lastindex - 1)
+					&(ARCMSR_MAX_QBUFFER - 1);
+			if (my_empty_len >= iop_len) {
+				while (iop_len > 0) {
+					acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data);
+					acb->rqbuf_lastindex++;
+					acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER;
+					iop_data++;
+					iop_len--;
+				}
+				writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,
+					&reg->inbound_doorbell);
+			} else
+				acb->acb_flags |= ACB_F_IOPDATA_OVERFLOW;
+		}
+		if (outbound_doorbell & ARCMSR_OUTBOUND_IOP331_DATA_READ_OK) {
+			acb->acb_flags |= ACB_F_MESSAGE_WQBUFFER_READED;
+			if (acb->wqbuf_firstindex != acb->wqbuf_lastindex) {
+				struct QBUFFER __iomem * pwbuffer =
+						(struct QBUFFER __iomem *) &reg->message_wbuffer;
+				uint8_t __iomem * iop_data = (uint8_t __iomem *) pwbuffer->data;
+				int32_t allxfer_len = 0;
+
+				acb->acb_flags &= (~ACB_F_MESSAGE_WQBUFFER_READED);
+				while ((acb->wqbuf_firstindex != acb->wqbuf_lastindex)
+					&& (allxfer_len < 124)) {
+					writeb(acb->wqbuffer[acb->wqbuf_firstindex], iop_data);
+					acb->wqbuf_firstindex++;
+					acb->wqbuf_firstindex %= ARCMSR_MAX_QBUFFER;
+					iop_data++;
+					allxfer_len++;
+				}
+				writel(allxfer_len, &pwbuffer->data_len);
+				writel(ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK,
+					&reg->inbound_doorbell);
+			}
+			if (acb->wqbuf_firstindex == acb->wqbuf_lastindex)
+				acb->acb_flags |= ACB_F_MESSAGE_WQBUFFER_CLEARED;
+		}
+	}
+	if (outbound_intstatus & ARCMSR_MU_OUTBOUND_POSTQUEUE_INT) {
+		int id, lun;
+		/*
+		****************************************************************
+		**               areca cdb command done
+		****************************************************************
+		*/
+		while (1) {
+			if ((flag_ccb = readl(&reg->outbound_queueport)) == 0xFFFFFFFF)
+				break;/*chip FIFO no ccb for completion already*/
+			/* check if command done with no error*/
+			ccb = (struct CommandControlBlock *)(acb->vir2phy_offset +
+				(flag_ccb << 5));
+			if ((ccb->acb != acb) || (ccb->startdone != ARCMSR_CCB_START)) {
+				if (ccb->startdone == ARCMSR_CCB_ABORTED) {
+					struct scsi_cmnd *abortcmd=ccb->pcmd;
+					if (abortcmd) {
+					abortcmd->result |= DID_ABORT >> 16;
+					arcmsr_ccb_complete(ccb, 1);
+					printk(KERN_NOTICE
+						"arcmsr%d: ccb='0x%p' isr got aborted command \n"
+						, acb->host->host_no, ccb);
+					}
+					continue;
+				}
+				printk(KERN_NOTICE
+					"arcmsr%d: isr get an illegal ccb command done acb='0x%p'"
+					"ccb='0x%p' ccbacb='0x%p' startdone = 0x%x"
+					" ccboutstandingcount=%d \n"
+					, acb->host->host_no
+					, acb
+					, ccb
+					, ccb->acb
+					, ccb->startdone
+					, atomic_read(&acb->ccboutstandingcount));
+				continue;
+			}
+			id = ccb->pcmd->device->id;
+			lun = ccb->pcmd->device->lun;
+			if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) {
+				if (acb->devstate[id][lun] == ARECA_RAID_GONE)
+					acb->devstate[id][lun] = ARECA_RAID_GOOD;
+				ccb->pcmd->result = DID_OK << 16;
+				arcmsr_ccb_complete(ccb, 1);
+			} else {
+				switch(ccb->arcmsr_cdb.DeviceStatus) {
+				case ARCMSR_DEV_SELECT_TIMEOUT: {
+						acb->devstate[id][lun] = ARECA_RAID_GONE;
+						ccb->pcmd->result = DID_TIME_OUT << 16;
+						arcmsr_ccb_complete(ccb, 1);
+					}
+					break;
+				case ARCMSR_DEV_ABORTED:
+				case ARCMSR_DEV_INIT_FAIL: {
+						acb->devstate[id][lun] = ARECA_RAID_GONE;
+						ccb->pcmd->result = DID_BAD_TARGET << 16;
+						arcmsr_ccb_complete(ccb, 1);
+					}
+					break;
+				case ARCMSR_DEV_CHECK_CONDITION: {
+						acb->devstate[id][lun] = ARECA_RAID_GOOD;
+						arcmsr_report_sense_info(ccb);
+						arcmsr_ccb_complete(ccb, 1);
+					}
+					break;
+				default:
+					printk(KERN_NOTICE
+						"arcmsr%d: scsi id=%d lun=%d"
+						" isr get command error done,"
+						"but got unknown DeviceStatus = 0x%x \n"
+						, acb->host->host_no
+						, id
+						, lun
+						, ccb->arcmsr_cdb.DeviceStatus);
+						acb->devstate[id][lun] = ARECA_RAID_GONE;
+						ccb->pcmd->result = DID_NO_CONNECT << 16;
+						arcmsr_ccb_complete(ccb, 1);
+					break;
+				}
+			}
+		}/*drain reply FIFO*/
+	}
+	if (!(outbound_intstatus & ARCMSR_MU_OUTBOUND_HANDLE_INT))
+		return IRQ_NONE;
+	return IRQ_HANDLED;
+}
+
+static void arcmsr_iop_parking(struct AdapterControlBlock *acb)
+{
+	if (acb) {
+		/* stop adapter background rebuild */
+		if (acb->acb_flags & ACB_F_MSG_START_BGRB) {
+			acb->acb_flags &= ~ACB_F_MSG_START_BGRB;
+			arcmsr_stop_adapter_bgrb(acb);
+			arcmsr_flush_adapter_cache(acb);
+		}
+	}
+}
+
+static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, struct scsi_cmnd *cmd)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	struct CMD_MESSAGE_FIELD *pcmdmessagefld;
+	int retvalue = 0, transfer_len = 0;
+	char *buffer;
+	uint32_t controlcode = (uint32_t ) cmd->cmnd[5] << 24 |
+						(uint32_t ) cmd->cmnd[6] << 16 |
+						(uint32_t ) cmd->cmnd[7] << 8  |
+						(uint32_t ) cmd->cmnd[8];
+					/* 4 bytes: Areca io control code */
+	if (cmd->use_sg) {
+		struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer;
+
+		buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+		if (cmd->use_sg > 1) {
+			retvalue = ARCMSR_MESSAGE_FAIL;
+			goto message_out;
+		}
+		transfer_len += sg->length;
+	} else {
+		buffer = cmd->request_buffer;
+		transfer_len = cmd->request_bufflen;
+	}
+	if (transfer_len > sizeof(struct CMD_MESSAGE_FIELD)) {
+		retvalue = ARCMSR_MESSAGE_FAIL;
+		goto message_out;
+	}
+	pcmdmessagefld = (struct CMD_MESSAGE_FIELD *) buffer;
+	switch(controlcode) {
+	case ARCMSR_MESSAGE_READ_RQBUFFER: {
+			unsigned long *ver_addr;
+			dma_addr_t buf_handle;
+			uint8_t *pQbuffer, *ptmpQbuffer;
+			int32_t allxfer_len = 0;
+
+			ver_addr = pci_alloc_consistent(acb->pdev, 1032, &buf_handle);
+			if (!ver_addr) {
+				retvalue = ARCMSR_MESSAGE_FAIL;
+				goto message_out;
+			}
+			ptmpQbuffer = (uint8_t *) ver_addr;
+			while ((acb->rqbuf_firstindex != acb->rqbuf_lastindex)
+				&& (allxfer_len < 1031)) {
+				pQbuffer = &acb->rqbuffer[acb->rqbuf_firstindex];
+				memcpy(ptmpQbuffer, pQbuffer, 1);
+				acb->rqbuf_firstindex++;
+				acb->rqbuf_firstindex %= ARCMSR_MAX_QBUFFER;
+				ptmpQbuffer++;
+				allxfer_len++;
+			}
+			if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+				struct QBUFFER __iomem * prbuffer = (struct QBUFFER __iomem *)
+							&reg->message_rbuffer;
+				uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data;
+				int32_t iop_len;
+
+				acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+				iop_len = readl(&prbuffer->data_len);
+				while (iop_len > 0) {
+					acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data);
+					acb->rqbuf_lastindex++;
+					acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER;
+					iop_data++;
+					iop_len--;
+				}
+				writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,
+						&reg->inbound_doorbell);
+			}
+			memcpy(pcmdmessagefld->messagedatabuffer,
+				(uint8_t *)ver_addr, allxfer_len);
+			pcmdmessagefld->cmdmessage.Length = allxfer_len;
+			pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK;
+			pci_free_consistent(acb->pdev, 1032, ver_addr, buf_handle);
+		}
+		break;
+	case ARCMSR_MESSAGE_WRITE_WQBUFFER: {
+			unsigned long *ver_addr;
+			dma_addr_t buf_handle;
+			int32_t my_empty_len, user_len, wqbuf_firstindex, wqbuf_lastindex;
+			uint8_t *pQbuffer, *ptmpuserbuffer;
+
+			ver_addr = pci_alloc_consistent(acb->pdev, 1032, &buf_handle);
+			if (!ver_addr) {
+				retvalue = ARCMSR_MESSAGE_FAIL;
+				goto message_out;
+			}
+			ptmpuserbuffer = (uint8_t *)ver_addr;
+			user_len = pcmdmessagefld->cmdmessage.Length;
+			memcpy(ptmpuserbuffer, pcmdmessagefld->messagedatabuffer, user_len);
+			wqbuf_lastindex = acb->wqbuf_lastindex;
+			wqbuf_firstindex = acb->wqbuf_firstindex;
+			if (wqbuf_lastindex != wqbuf_firstindex) {
+				struct SENSE_DATA *sensebuffer =
+					(struct SENSE_DATA *)cmd->sense_buffer;
+				arcmsr_post_Qbuffer(acb);
+				/* has error report sensedata */
+				sensebuffer->ErrorCode = 0x70;
+				sensebuffer->SenseKey = ILLEGAL_REQUEST;
+				sensebuffer->AdditionalSenseLength = 0x0A;
+				sensebuffer->AdditionalSenseCode = 0x20;
+				sensebuffer->Valid = 1;
+				retvalue = ARCMSR_MESSAGE_FAIL;
+			} else {
+				my_empty_len = (wqbuf_firstindex-wqbuf_lastindex - 1)
+						&(ARCMSR_MAX_QBUFFER - 1);
+				if (my_empty_len >= user_len) {
+					while (user_len > 0) {
+						pQbuffer =
+						&acb->wqbuffer[acb->wqbuf_lastindex];
+						memcpy(pQbuffer, ptmpuserbuffer, 1);
+						acb->wqbuf_lastindex++;
+						acb->wqbuf_lastindex %= ARCMSR_MAX_QBUFFER;
+						ptmpuserbuffer++;
+						user_len--;
+					}
+					if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_CLEARED) {
+						acb->acb_flags &=
+							~ACB_F_MESSAGE_WQBUFFER_CLEARED;
+						arcmsr_post_Qbuffer(acb);
+					}
+				} else {
+					/* has error report sensedata */
+					struct SENSE_DATA *sensebuffer =
+						(struct SENSE_DATA *)cmd->sense_buffer;
+					sensebuffer->ErrorCode = 0x70;
+					sensebuffer->SenseKey = ILLEGAL_REQUEST;
+					sensebuffer->AdditionalSenseLength = 0x0A;
+					sensebuffer->AdditionalSenseCode = 0x20;
+					sensebuffer->Valid = 1;
+					retvalue = ARCMSR_MESSAGE_FAIL;
+				}
+			}
+			pci_free_consistent(acb->pdev, 1032, ver_addr, buf_handle);
+		}
+		break;
+	case ARCMSR_MESSAGE_CLEAR_RQBUFFER: {
+			uint8_t *pQbuffer = acb->rqbuffer;
+
+			if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+				acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+				writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,
+					&reg->inbound_doorbell);
+			}
+			acb->acb_flags |= ACB_F_MESSAGE_RQBUFFER_CLEARED;
+			acb->rqbuf_firstindex = 0;
+			acb->rqbuf_lastindex = 0;
+			memset(pQbuffer, 0, ARCMSR_MAX_QBUFFER);
+			pcmdmessagefld->cmdmessage.ReturnCode =
+				ARCMSR_MESSAGE_RETURNCODE_OK;
+		}
+		break;
+	case ARCMSR_MESSAGE_CLEAR_WQBUFFER: {
+			uint8_t *pQbuffer = acb->wqbuffer;
+
+			if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+				acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+				writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK
+						, &reg->inbound_doorbell);
+			}
+			acb->acb_flags |=
+				(ACB_F_MESSAGE_WQBUFFER_CLEARED |
+					ACB_F_MESSAGE_WQBUFFER_READED);
+			acb->wqbuf_firstindex = 0;
+			acb->wqbuf_lastindex = 0;
+			memset(pQbuffer, 0, ARCMSR_MAX_QBUFFER);
+			pcmdmessagefld->cmdmessage.ReturnCode =
+				ARCMSR_MESSAGE_RETURNCODE_OK;
+		}
+		break;
+	case ARCMSR_MESSAGE_CLEAR_ALLQBUFFER: {
+			uint8_t *pQbuffer;
+
+			if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+				acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+				writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK
+						, &reg->inbound_doorbell);
+			}
+			acb->acb_flags |=
+				(ACB_F_MESSAGE_WQBUFFER_CLEARED
+				| ACB_F_MESSAGE_RQBUFFER_CLEARED
+				| ACB_F_MESSAGE_WQBUFFER_READED);
+			acb->rqbuf_firstindex = 0;
+			acb->rqbuf_lastindex = 0;
+			acb->wqbuf_firstindex = 0;
+			acb->wqbuf_lastindex = 0;
+			pQbuffer = acb->rqbuffer;
+			memset(pQbuffer, 0, sizeof (struct QBUFFER));
+			pQbuffer = acb->wqbuffer;
+			memset(pQbuffer, 0, sizeof (struct QBUFFER));
+			pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK;
+		}
+		break;
+	case ARCMSR_MESSAGE_RETURN_CODE_3F: {
+			pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_3F;
+		}
+		break;
+	case ARCMSR_MESSAGE_SAY_HELLO: {
+			int8_t * hello_string = "Hello! I am ARCMSR";
+
+			memcpy(pcmdmessagefld->messagedatabuffer, hello_string
+				, (int16_t)strlen(hello_string));
+			pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK;
+		}
+		break;
+	case ARCMSR_MESSAGE_SAY_GOODBYE:
+		arcmsr_iop_parking(acb);
+		break;
+	case ARCMSR_MESSAGE_FLUSH_ADAPTER_CACHE:
+		arcmsr_flush_adapter_cache(acb);
+		break;
+	default:
+		retvalue = ARCMSR_MESSAGE_FAIL;
+	}
+ message_out:
+	if (cmd->use_sg) {
+		struct scatterlist *sg;
+
+		sg = (struct scatterlist *) cmd->request_buffer;
+		kunmap_atomic(buffer - sg->offset, KM_IRQ0);
+	}
+	return retvalue;
+}
+
+static struct CommandControlBlock *arcmsr_get_freeccb(struct AdapterControlBlock *acb)
+{
+	struct list_head *head = &acb->ccb_free_list;
+	struct CommandControlBlock *ccb = NULL;
+
+	if (!list_empty(head)) {
+		ccb = list_entry(head->next, struct CommandControlBlock, list);
+		list_del(head->next);
+	}
+	return ccb;
+}
+
+static void arcmsr_handle_virtual_command(struct AdapterControlBlock *acb,
+		struct scsi_cmnd *cmd)
+{
+	switch (cmd->cmnd[0]) {
+	case INQUIRY: {
+		unsigned char inqdata[36];
+		char *buffer;
+
+		if (cmd->device->lun) {
+			cmd->result = (DID_TIME_OUT << 16);
+			cmd->scsi_done(cmd);
+			return;
+		}
+		inqdata[0] = TYPE_PROCESSOR;
+		/* Periph Qualifier & Periph Dev Type */
+		inqdata[1] = 0;
+		/* rem media bit & Dev Type Modifier */
+		inqdata[2] = 0;
+		/* ISO,ECMA,& ANSI versions */
+		inqdata[4] = 31;
+		/* length of additional data */
+		strncpy(&inqdata[8], "Areca   ", 8);
+		/* Vendor Identification */
+		strncpy(&inqdata[16], "RAID controller ", 16);
+		/* Product Identification */
+		strncpy(&inqdata[32], "R001", 4); /* Product Revision */
+		if (cmd->use_sg) {
+			struct scatterlist *sg;
+
+			sg = (struct scatterlist *) cmd->request_buffer;
+			buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+		} else {
+			buffer = cmd->request_buffer;
+		}
+		memcpy(buffer, inqdata, sizeof(inqdata));
+		if (cmd->use_sg) {
+			struct scatterlist *sg;
+
+			sg = (struct scatterlist *) cmd->request_buffer;
+			kunmap_atomic(buffer - sg->offset, KM_IRQ0);
+		}
+		cmd->scsi_done(cmd);
+	}
+	break;
+	case WRITE_BUFFER:
+	case READ_BUFFER: {
+		if (arcmsr_iop_message_xfer(acb, cmd))
+			cmd->result = (DID_ERROR << 16);
+		cmd->scsi_done(cmd);
+	}
+	break;
+	default:
+		cmd->scsi_done(cmd);
+	}
+}
+
+static int arcmsr_queue_command(struct scsi_cmnd *cmd,
+	void (* done)(struct scsi_cmnd *))
+{
+	struct Scsi_Host *host = cmd->device->host;
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *) host->hostdata;
+	struct CommandControlBlock *ccb;
+	int target = cmd->device->id;
+	int lun = cmd->device->lun;
+
+	cmd->scsi_done = done;
+	cmd->host_scribble = NULL;
+	cmd->result = 0;
+	if (acb->acb_flags & ACB_F_BUS_RESET) {
+		printk(KERN_NOTICE "arcmsr%d: bus reset"
+			" and return busy \n"
+			, acb->host->host_no);
+		return SCSI_MLQUEUE_HOST_BUSY;
+	}
+	if(target == 16) {
+		/* virtual device for iop message transfer */
+		arcmsr_handle_virtual_command(acb, cmd);
+		return 0;
+	}
+	if (acb->devstate[target][lun] == ARECA_RAID_GONE) {
+		uint8_t block_cmd;
+
+		block_cmd = cmd->cmnd[0] & 0x0f;
+		if (block_cmd == 0x08 || block_cmd == 0x0a) {
+			printk(KERN_NOTICE
+				"arcmsr%d: block 'read/write'"
+				"command with gone raid volume"
+				" Cmd=%2x, TargetId=%d, Lun=%d \n"
+				, acb->host->host_no
+				, cmd->cmnd[0]
+				, target, lun);
+			cmd->result = (DID_NO_CONNECT << 16);
+			cmd->scsi_done(cmd);
+			return 0;
+		}
+	}
+	if (atomic_read(&acb->ccboutstandingcount) >=
+			ARCMSR_MAX_OUTSTANDING_CMD)
+		return SCSI_MLQUEUE_HOST_BUSY;
+
+	ccb = arcmsr_get_freeccb(acb);
+	if (!ccb)
+		return SCSI_MLQUEUE_HOST_BUSY;
+	arcmsr_build_ccb(acb, ccb, cmd);
+	arcmsr_post_ccb(acb, ccb);
+	return 0;
+}
+
+static void arcmsr_get_firmware_spec(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	char *acb_firm_model = acb->firm_model;
+	char *acb_firm_version = acb->firm_version;
+	char __iomem *iop_firm_model = (char __iomem *) &reg->message_rwbuffer[15];
+	char __iomem *iop_firm_version = (char __iomem *) &reg->message_rwbuffer[17];
+	int count;
+
+	writel(ARCMSR_INBOUND_MESG0_GET_CONFIG, &reg->inbound_msgaddr0);
+	if (arcmsr_wait_msgint_ready(acb))
+		printk(KERN_NOTICE
+			"arcmsr%d: wait "
+			"'get adapter firmware miscellaneous data' timeout \n"
+			, acb->host->host_no);
+	count = 8;
+	while (count) {
+		*acb_firm_model = readb(iop_firm_model);
+		acb_firm_model++;
+		iop_firm_model++;
+		count--;
+	}
+	count = 16;
+	while (count) {
+		*acb_firm_version = readb(iop_firm_version);
+		acb_firm_version++;
+		iop_firm_version++;
+		count--;
+	}
+	printk(KERN_INFO
+		"ARECA RAID ADAPTER%d: FIRMWARE VERSION %s \n"
+		, acb->host->host_no
+		, acb->firm_version);
+	acb->firm_request_len = readl(&reg->message_rwbuffer[1]);
+	acb->firm_numbers_queue = readl(&reg->message_rwbuffer[2]);
+	acb->firm_sdram_size = readl(&reg->message_rwbuffer[3]);
+	acb->firm_hd_channels = readl(&reg->message_rwbuffer[4]);
+}
+
+static void arcmsr_polling_ccbdone(struct AdapterControlBlock *acb,
+	struct CommandControlBlock *poll_ccb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	struct CommandControlBlock *ccb;
+	uint32_t flag_ccb, outbound_intstatus, poll_ccb_done = 0, poll_count = 0;
+	int id, lun;
+
+ polling_ccb_retry:
+	poll_count++;
+	outbound_intstatus = readl(&reg->outbound_intstatus)
+					& acb->outbound_int_enable;
+	writel(outbound_intstatus, &reg->outbound_intstatus);/*clear interrupt*/
+	while (1) {
+		if ((flag_ccb = readl(&reg->outbound_queueport)) == 0xFFFFFFFF) {
+			if (poll_ccb_done)
+				break;
+			else {
+				msleep(25);
+				if (poll_count > 100)
+					break;
+				goto polling_ccb_retry;
+			}
+		}
+		ccb = (struct CommandControlBlock *)
+			(acb->vir2phy_offset + (flag_ccb << 5));
+		if ((ccb->acb != acb) ||
+			(ccb->startdone != ARCMSR_CCB_START)) {
+			if ((ccb->startdone == ARCMSR_CCB_ABORTED) ||
+				(ccb == poll_ccb)) {
+				printk(KERN_NOTICE
+					"arcmsr%d: scsi id=%d lun=%d ccb='0x%p'"
+					" poll command abort successfully \n"
+					, acb->host->host_no
+					, ccb->pcmd->device->id
+					, ccb->pcmd->device->lun
+					, ccb);
+				ccb->pcmd->result = DID_ABORT << 16;
+				arcmsr_ccb_complete(ccb, 1);
+				poll_ccb_done = 1;
+				continue;
+			}
+			printk(KERN_NOTICE
+				"arcmsr%d: polling get an illegal ccb"
+				" command done ccb='0x%p'"
+				"ccboutstandingcount=%d \n"
+				, acb->host->host_no
+				, ccb
+				, atomic_read(&acb->ccboutstandingcount));
+			continue;
+		}
+		id = ccb->pcmd->device->id;
+		lun = ccb->pcmd->device->lun;
+		if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) {
+			if (acb->devstate[id][lun] == ARECA_RAID_GONE)
+				acb->devstate[id][lun] = ARECA_RAID_GOOD;
+			ccb->pcmd->result = DID_OK << 16;
+			arcmsr_ccb_complete(ccb, 1);
+		} else {
+			switch(ccb->arcmsr_cdb.DeviceStatus) {
+			case ARCMSR_DEV_SELECT_TIMEOUT: {
+					acb->devstate[id][lun] = ARECA_RAID_GONE;
+					ccb->pcmd->result = DID_TIME_OUT << 16;
+					arcmsr_ccb_complete(ccb, 1);
+				}
+				break;
+			case ARCMSR_DEV_ABORTED:
+			case ARCMSR_DEV_INIT_FAIL: {
+					acb->devstate[id][lun] = ARECA_RAID_GONE;
+					ccb->pcmd->result = DID_BAD_TARGET << 16;
+					arcmsr_ccb_complete(ccb, 1);
+				}
+				break;
+			case ARCMSR_DEV_CHECK_CONDITION: {
+					acb->devstate[id][lun] = ARECA_RAID_GOOD;
+					arcmsr_report_sense_info(ccb);
+					arcmsr_ccb_complete(ccb, 1);
+				}
+				break;
+			default:
+				printk(KERN_NOTICE
+					"arcmsr%d: scsi id=%d lun=%d"
+					" polling and getting command error done"
+					"but got unknown DeviceStatus = 0x%x \n"
+					, acb->host->host_no
+					, id
+					, lun
+					, ccb->arcmsr_cdb.DeviceStatus);
+				acb->devstate[id][lun] = ARECA_RAID_GONE;
+				ccb->pcmd->result = DID_BAD_TARGET << 16;
+				arcmsr_ccb_complete(ccb, 1);
+				break;
+			}
+		}
+	}
+}
+
+static void arcmsr_iop_init(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	uint32_t intmask_org, mask, outbound_doorbell, firmware_state = 0;
+
+	do {
+		firmware_state = readl(&reg->outbound_msgaddr1);
+	} while (!(firmware_state & ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK));
+	intmask_org = readl(&reg->outbound_intmask)
+			| ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE;
+	arcmsr_get_firmware_spec(acb);
+
+	acb->acb_flags |= ACB_F_MSG_START_BGRB;
+	writel(ARCMSR_INBOUND_MESG0_START_BGRB, &reg->inbound_msgaddr0);
+	if (arcmsr_wait_msgint_ready(acb)) {
+		printk(KERN_NOTICE "arcmsr%d: "
+			"wait 'start adapter background rebulid' timeout\n",
+			acb->host->host_no);
+	}
+
+	outbound_doorbell = readl(&reg->outbound_doorbell);
+	writel(outbound_doorbell, &reg->outbound_doorbell);
+	writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, &reg->inbound_doorbell);
+	mask = ~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE
+			| ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE);
+	writel(intmask_org & mask, &reg->outbound_intmask);
+	acb->outbound_int_enable = ~(intmask_org & mask) & 0x000000ff;
+	acb->acb_flags |= ACB_F_IOP_INITED;
+}
+
+static void arcmsr_iop_reset(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	struct CommandControlBlock *ccb;
+	uint32_t intmask_org;
+	int i = 0;
+
+	if (atomic_read(&acb->ccboutstandingcount) != 0) {
+		/* talk to iop 331 outstanding command aborted */
+		arcmsr_abort_allcmd(acb);
+		/* wait for 3 sec for all command aborted*/
+		msleep_interruptible(3000);
+		/* disable all outbound interrupt */
+		intmask_org = arcmsr_disable_outbound_ints(acb);
+		/* clear all outbound posted Q */
+		for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++)
+			readl(&reg->outbound_queueport);
+		for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
+			ccb = acb->pccb_pool[i];
+			if ((ccb->startdone == ARCMSR_CCB_START) ||
+				(ccb->startdone == ARCMSR_CCB_ABORTED)) {
+				ccb->startdone = ARCMSR_CCB_ABORTED;
+				ccb->pcmd->result = DID_ABORT << 16;
+				arcmsr_ccb_complete(ccb, 1);
+			}
+		}
+		/* enable all outbound interrupt */
+		arcmsr_enable_outbound_ints(acb, intmask_org);
+	}
+	atomic_set(&acb->ccboutstandingcount, 0);
+}
+
+static int arcmsr_bus_reset(struct scsi_cmnd *cmd)
+{
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *)cmd->device->host->hostdata;
+	int i;
+
+	acb->num_resets++;
+	acb->acb_flags |= ACB_F_BUS_RESET;
+	for (i = 0; i < 400; i++) {
+		if (!atomic_read(&acb->ccboutstandingcount))
+			break;
+		arcmsr_interrupt(acb);
+		msleep(25);
+	}
+	arcmsr_iop_reset(acb);
+	acb->acb_flags &= ~ACB_F_BUS_RESET;
+	return SUCCESS;
+}
+
+static void arcmsr_abort_one_cmd(struct AdapterControlBlock *acb,
+		struct CommandControlBlock *ccb)
+{
+	u32 intmask;
+
+	ccb->startdone = ARCMSR_CCB_ABORTED;
+
+	/*
+	** Wait for 3 sec for all command done.
+	*/
+	msleep_interruptible(3000);
+
+	intmask = arcmsr_disable_outbound_ints(acb);
+	arcmsr_polling_ccbdone(acb, ccb);
+	arcmsr_enable_outbound_ints(acb, intmask);
+}
+
+static int arcmsr_abort(struct scsi_cmnd *cmd)
+{
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *)cmd->device->host->hostdata;
+	int i = 0;
+
+	printk(KERN_NOTICE
+		"arcmsr%d: abort device command of scsi id=%d lun=%d \n",
+		acb->host->host_no, cmd->device->id, cmd->device->lun);
+	acb->num_aborts++;
+
+	/*
+	************************************************
+	** the all interrupt service routine is locked
+	** we need to handle it as soon as possible and exit
+	************************************************
+	*/
+	if (!atomic_read(&acb->ccboutstandingcount))
+		return SUCCESS;
+
+	for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
+		struct CommandControlBlock *ccb = acb->pccb_pool[i];
+		if (ccb->startdone == ARCMSR_CCB_START && ccb->pcmd == cmd) {
+			arcmsr_abort_one_cmd(acb, ccb);
+			break;
+		}
+	}
+
+	return SUCCESS;
+}
+
+static const char *arcmsr_info(struct Scsi_Host *host)
+{
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *) host->hostdata;
+	static char buf[256];
+	char *type;
+	int raid6 = 1;
+
+	switch (acb->pdev->device) {
+	case PCI_DEVICE_ID_ARECA_1110:
+	case PCI_DEVICE_ID_ARECA_1210:
+		raid6 = 0;
+		/*FALLTHRU*/
+	case PCI_DEVICE_ID_ARECA_1120:
+	case PCI_DEVICE_ID_ARECA_1130:
+	case PCI_DEVICE_ID_ARECA_1160:
+	case PCI_DEVICE_ID_ARECA_1170:
+	case PCI_DEVICE_ID_ARECA_1220:
+	case PCI_DEVICE_ID_ARECA_1230:
+	case PCI_DEVICE_ID_ARECA_1260:
+	case PCI_DEVICE_ID_ARECA_1270:
+	case PCI_DEVICE_ID_ARECA_1280:
+		type = "SATA";
+		break;
+	case PCI_DEVICE_ID_ARECA_1380:
+	case PCI_DEVICE_ID_ARECA_1381:
+	case PCI_DEVICE_ID_ARECA_1680:
+	case PCI_DEVICE_ID_ARECA_1681:
+		type = "SAS";
+		break;
+	default:
+		type = "X-TYPE";
+		break;
+	}
+	sprintf(buf, "Areca %s Host Adapter RAID Controller%s\n        %s",
+			type, raid6 ? "( RAID6 capable)" : "",
+			ARCMSR_DRIVER_VERSION);
+	return buf;
+}
+
+
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c09396d2c77b6..df7b62676d875 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2004,6 +2004,23 @@
 #define PCI_DEVICE_ID_ALTIMA_AC9100	0x03ea
 #define PCI_DEVICE_ID_ALTIMA_AC1003	0x03eb
 
+#define PCI_VENDOR_ID_ARECA		0x17d3
+#define PCI_DEVICE_ID_ARECA_1110	0x1110
+#define PCI_DEVICE_ID_ARECA_1120	0x1120
+#define PCI_DEVICE_ID_ARECA_1130	0x1130
+#define PCI_DEVICE_ID_ARECA_1160	0x1160
+#define PCI_DEVICE_ID_ARECA_1170	0x1170
+#define PCI_DEVICE_ID_ARECA_1210	0x1210
+#define PCI_DEVICE_ID_ARECA_1220	0x1220
+#define PCI_DEVICE_ID_ARECA_1230	0x1230
+#define PCI_DEVICE_ID_ARECA_1260	0x1260
+#define PCI_DEVICE_ID_ARECA_1270	0x1270
+#define PCI_DEVICE_ID_ARECA_1280	0x1280
+#define PCI_DEVICE_ID_ARECA_1380	0x1380
+#define PCI_DEVICE_ID_ARECA_1381	0x1381
+#define PCI_DEVICE_ID_ARECA_1680	0x1680
+#define PCI_DEVICE_ID_ARECA_1681	0x1681
+
 #define PCI_VENDOR_ID_S2IO		0x17d5
 #define	PCI_DEVICE_ID_S2IO_WIN		0x5731
 #define	PCI_DEVICE_ID_S2IO_UNI		0x5831
-- 
GitLab


From 4288b92b9644fdb4c6168273873fe08f32090d7a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 8 Jul 2006 22:38:56 -0700
Subject: [PATCH 0043/1063] [POWERPC] briq_panel Kconfig fix

drivers/char/briq_panel.c:28:22: error: asm/prom.h: No such file or directory

Cc: Jeremy Kerr <jk@ozlabs.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/char/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index a7ef542afbc20..320ad7ba11d44 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -497,6 +497,7 @@ config LEGACY_PTY_COUNT
 
 config BRIQ_PANEL
 	tristate 'Total Impact briQ front panel driver'
+	depends on PPC
 	---help---
 	  The briQ is a small footprint CHRP computer with a frontpanel VFD, a
 	  tristate led and two switches. It is the size of a CDROM drive.
-- 
GitLab


From a7f67bdf2c9f24509b8e81e0f35573b611987c80 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:35:54 +1000
Subject: [PATCH 0044/1063] [POWERPC] Constify & voidify get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

powerpc core changes.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/btext.c            | 24 ++++-----
 arch/powerpc/kernel/ibmebus.c          |  6 +--
 arch/powerpc/kernel/legacy_serial.c    | 35 ++++++-------
 arch/powerpc/kernel/lparcfg.c          | 11 ++---
 arch/powerpc/kernel/machine_kexec_64.c | 10 ++--
 arch/powerpc/kernel/pci_32.c           | 39 ++++++++-------
 arch/powerpc/kernel/pci_64.c           | 28 ++++++-----
 arch/powerpc/kernel/pci_dn.c           | 13 ++---
 arch/powerpc/kernel/prom.c             | 23 +++++----
 arch/powerpc/kernel/prom_parse.c       | 68 +++++++++++++-------------
 arch/powerpc/kernel/rtas-proc.c        | 25 +++++-----
 arch/powerpc/kernel/rtas.c             | 28 ++++++-----
 arch/powerpc/kernel/rtas_pci.c         | 22 ++++-----
 arch/powerpc/kernel/setup-common.c     | 19 +++----
 arch/powerpc/kernel/setup_64.c         | 14 +++---
 arch/powerpc/kernel/sysfs.c            |  5 +-
 arch/powerpc/kernel/time.c             |  4 +-
 arch/powerpc/kernel/vio.c              | 16 +++---
 arch/powerpc/mm/numa.c                 | 31 ++++++------
 arch/powerpc/sysdev/fsl_soc.c          | 30 ++++++------
 arch/powerpc/sysdev/mmio_nvram.c       |  4 +-
 include/asm-powerpc/ibmebus.h          |  2 +-
 include/asm-powerpc/prom.h             | 16 +++---
 include/asm-powerpc/vio.h              |  4 +-
 24 files changed, 239 insertions(+), 238 deletions(-)

diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index f4e5e14ee2b6b..995fcef156fd8 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -158,35 +158,35 @@ int btext_initialize(struct device_node *np)
 {
 	unsigned int width, height, depth, pitch;
 	unsigned long address = 0;
-	u32 *prop;
+	const u32 *prop;
 
-	prop = (u32 *)get_property(np, "linux,bootx-width", NULL);
+	prop = get_property(np, "linux,bootx-width", NULL);
 	if (prop == NULL)
-		prop = (u32 *)get_property(np, "width", NULL);
+		prop = get_property(np, "width", NULL);
 	if (prop == NULL)
 		return -EINVAL;
 	width = *prop;
-	prop = (u32 *)get_property(np, "linux,bootx-height", NULL);
+	prop = get_property(np, "linux,bootx-height", NULL);
 	if (prop == NULL)
-		prop = (u32 *)get_property(np, "height", NULL);
+		prop = get_property(np, "height", NULL);
 	if (prop == NULL)
 		return -EINVAL;
 	height = *prop;
-	prop = (u32 *)get_property(np, "linux,bootx-depth", NULL);
+	prop = get_property(np, "linux,bootx-depth", NULL);
 	if (prop == NULL)
-		prop = (u32 *)get_property(np, "depth", NULL);
+		prop = get_property(np, "depth", NULL);
 	if (prop == NULL)
 		return -EINVAL;
 	depth = *prop;
 	pitch = width * ((depth + 7) / 8);
-	prop = (u32 *)get_property(np, "linux,bootx-linebytes", NULL);
+	prop = get_property(np, "linux,bootx-linebytes", NULL);
 	if (prop == NULL)
-		prop = (u32 *)get_property(np, "linebytes", NULL);
+		prop = get_property(np, "linebytes", NULL);
 	if (prop)
 		pitch = *prop;
 	if (pitch == 1)
 		pitch = 0x1000;
-	prop = (u32 *)get_property(np, "address", NULL);
+	prop = get_property(np, "address", NULL);
 	if (prop)
 		address = *prop;
 
@@ -214,11 +214,11 @@ int btext_initialize(struct device_node *np)
 
 int __init btext_find_display(int allow_nonstdout)
 {
-	char *name;
+	const char *name;
 	struct device_node *np = NULL; 
 	int rc = -ENODEV;
 
-	name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
+	name = get_property(of_chosen, "linux,stdout-path", NULL);
 	if (name != NULL) {
 		np = of_find_node_by_path(name);
 		if (np != NULL) {
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 97ddc02a3d42e..d9a0b087fa7fc 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -167,7 +167,7 @@ static DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, ibmebusdev_show_name,
 		   NULL);
 
 static struct ibmebus_dev* __devinit ibmebus_register_device_common(
-	struct ibmebus_dev *dev, char *name)
+	struct ibmebus_dev *dev, const char *name)
 {
 	int err = 0;
 
@@ -194,10 +194,10 @@ static struct ibmebus_dev* __devinit ibmebus_register_device_node(
 	struct device_node *dn)
 {
 	struct ibmebus_dev *dev;
-	char *loc_code;
+	const char *loc_code;
 	int length;
 
-	loc_code = (char *)get_property(dn, "ibm,loc-code", NULL);
+	loc_code = get_property(dn, "ibm,loc-code", NULL);
 	if (!loc_code) {
                 printk(KERN_WARNING "%s: node %s missing 'ibm,loc-code'\n",
 		       __FUNCTION__, dn->name ? dn->name : "<unknown>");
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 359ab89748e05..ee1e0b8c7f1fc 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -39,16 +39,17 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
 				  phys_addr_t taddr, unsigned long irq,
 				  upf_t flags, int irq_check_parent)
 {
-	u32 *clk, *spd, clock = BASE_BAUD * 16;
+	const u32 *clk, *spd;
+	u32 clock = BASE_BAUD * 16;
 	int index;
 
 	/* get clock freq. if present */
-	clk = (u32 *)get_property(np, "clock-frequency", NULL);
+	clk = get_property(np, "clock-frequency", NULL);
 	if (clk && *clk)
 		clock = *clk;
 
 	/* get default speed if present */
-	spd = (u32 *)get_property(np, "current-speed", NULL);
+	spd = get_property(np, "current-speed", NULL);
 
 	/* If we have a location index, then try to use it */
 	if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS)
@@ -113,7 +114,7 @@ static int __init add_legacy_soc_port(struct device_node *np,
 				      struct device_node *soc_dev)
 {
 	u64 addr;
-	u32 *addrp;
+	const u32 *addrp;
 	upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ;
 
 	/* We only support ports that have a clock frequency properly
@@ -140,15 +141,15 @@ static int __init add_legacy_soc_port(struct device_node *np,
 static int __init add_legacy_isa_port(struct device_node *np,
 				      struct device_node *isa_brg)
 {
-	u32 *reg;
-	char *typep;
+	const u32 *reg;
+	const char *typep;
 	int index = -1;
 	u64 taddr;
 
 	DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
 
 	/* Get the ISA port number */
-	reg = (u32 *)get_property(np, "reg", NULL);
+	reg = get_property(np, "reg", NULL);
 	if (reg == NULL)
 		return -1;
 
@@ -159,7 +160,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
 	/* Now look for an "ibm,aix-loc" property that gives us ordering
 	 * if any...
 	 */
-	typep = (char *)get_property(np, "ibm,aix-loc", NULL);
+	typep = get_property(np, "ibm,aix-loc", NULL);
 
 	/* If we have a location index, then use it */
 	if (typep && *typep == 'S')
@@ -184,7 +185,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
 				      struct device_node *pci_dev)
 {
 	u64 addr, base;
-	u32 *addrp;
+	const u32 *addrp;
 	unsigned int flags;
 	int iotype, index = -1, lindex = 0;
 
@@ -223,7 +224,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
 	 * we get to their "reg" property
 	 */
 	if (np != pci_dev) {
-		u32 *reg = (u32 *)get_property(np, "reg", NULL);
+		const u32 *reg = get_property(np, "reg", NULL);
 		if (reg && (*reg < 4))
 			index = lindex = *reg;
 	}
@@ -281,13 +282,13 @@ static void __init setup_legacy_serial_console(int console)
 void __init find_legacy_serial_ports(void)
 {
 	struct device_node *np, *stdout = NULL;
-	char *path;
+	const char *path;
 	int index;
 
 	DBG(" -> find_legacy_serial_port()\n");
 
 	/* Now find out if one of these is out firmware console */
-	path = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
+	path = get_property(of_chosen, "linux,stdout-path", NULL);
 	if (path != NULL) {
 		stdout = of_find_node_by_path(path);
 		if (stdout)
@@ -487,8 +488,8 @@ static int __init check_legacy_serial_console(void)
 {
 	struct device_node *prom_stdout = NULL;
 	int speed = 0, offset = 0;
-	char *name;
-	u32 *spd;
+	const char *name;
+	const u32 *spd;
 
 	DBG(" -> check_legacy_serial_console()\n");
 
@@ -509,7 +510,7 @@ static int __init check_legacy_serial_console(void)
 	}
 	/* We are getting a weird phandle from OF ... */
 	/* ... So use the full path instead */
-	name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
+	name = get_property(of_chosen, "linux,stdout-path", NULL);
 	if (name == NULL) {
 		DBG(" no linux,stdout-path !\n");
 		return -ENODEV;
@@ -521,12 +522,12 @@ static int __init check_legacy_serial_console(void)
 	}
 	DBG("stdout is %s\n", prom_stdout->full_name);
 
-	name = (char *)get_property(prom_stdout, "name", NULL);
+	name = get_property(prom_stdout, "name", NULL);
 	if (!name) {
 		DBG(" stdout package has no name !\n");
 		goto not_found;
 	}
-	spd = (u32 *)get_property(prom_stdout, "current-speed", NULL);
+	spd = get_property(prom_stdout, "current-speed", NULL);
 	if (spd)
 		speed = *spd;
 
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 2d94b372d49b2..3ce3a2d56fa88 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -309,12 +309,11 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 	int partition_potential_processors;
 	int partition_active_processors;
 	struct device_node *rtas_node;
-	int *lrdrp = NULL;
+	const int *lrdrp = NULL;
 
 	rtas_node = find_path_device("/rtas");
 	if (rtas_node)
-		lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity",
-		                            NULL);
+		lrdrp = get_property(rtas_node, "ibm,lrdr-capacity", NULL);
 
 	if (lrdrp == NULL) {
 		partition_potential_processors = vdso_data->processorCount;
@@ -519,7 +518,8 @@ static int lparcfg_data(struct seq_file *m, void *v)
 	const char *model = "";
 	const char *system_id = "";
 	const char *tmp;
-	unsigned int *lp_index_ptr, lp_index = 0;
+	const unsigned int *lp_index_ptr;
+	unsigned int lp_index = 0;
 
 	seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
 
@@ -539,8 +539,7 @@ static int lparcfg_data(struct seq_file *m, void *v)
 			if (firmware_has_feature(FW_FEATURE_ISERIES))
 				system_id += 4;
 		}
-		lp_index_ptr = (unsigned int *)
-			get_property(rootdn, "ibm,partition-no", NULL);
+		lp_index_ptr = get_property(rootdn, "ibm,partition-no", NULL);
 		if (lp_index_ptr)
 			lp_index = *lp_index_ptr;
 	}
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index b438d45a068c6..4efdaa9d3f43f 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -33,8 +33,8 @@ int default_machine_kexec_prepare(struct kimage *image)
 	unsigned long begin, end;	/* limits of segment */
 	unsigned long low, high;	/* limits of blocked memory range */
 	struct device_node *node;
-	unsigned long *basep;
-	unsigned int *sizep;
+	const unsigned long *basep;
+	const unsigned int *sizep;
 
 	if (!ppc_md.hpte_clear_all)
 		return -ENOENT;
@@ -74,10 +74,8 @@ int default_machine_kexec_prepare(struct kimage *image)
 	/* We also should not overwrite the tce tables */
 	for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
 			node = of_find_node_by_type(node, "pci")) {
-		basep = (unsigned long *)get_property(node, "linux,tce-base",
-							NULL);
-		sizep = (unsigned int *)get_property(node, "linux,tce-size",
-							NULL);
+		basep = get_property(node, "linux,tce-base", NULL);
+		sizep = get_property(node, "linux,tce-size", NULL);
 		if (basep == NULL || sizep == NULL)
 			continue;
 
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 898dae8ab6d91..3f6bd36e9e14e 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -633,12 +633,12 @@ pcibios_alloc_controller(void)
 static void
 make_one_node_map(struct device_node* node, u8 pci_bus)
 {
-	int *bus_range;
+	const int *bus_range;
 	int len;
 
 	if (pci_bus >= pci_bus_count)
 		return;
-	bus_range = (int *) get_property(node, "bus-range", &len);
+	bus_range = get_property(node, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, "
 		       "assuming it starts at 0\n", node->full_name);
@@ -648,13 +648,13 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
 
 	for (node=node->child; node != 0;node = node->sibling) {
 		struct pci_dev* dev;
-		unsigned int *class_code, *reg;
+		const unsigned int *class_code, *reg;
 	
-		class_code = (unsigned int *) get_property(node, "class-code", NULL);
+		class_code = get_property(node, "class-code", NULL);
 		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
 			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
 			continue;
-		reg = (unsigned int *)get_property(node, "reg", NULL);
+		reg = get_property(node, "reg", NULL);
 		if (!reg)
 			continue;
 		dev = pci_find_slot(pci_bus, ((reg[0] >> 8) & 0xff));
@@ -669,7 +669,7 @@ pcibios_make_OF_bus_map(void)
 {
 	int i;
 	struct pci_controller* hose;
-	u8* of_prop_map;
+	struct property *map_prop;
 
 	pci_to_OF_bus_map = (u8*)kmalloc(pci_bus_count, GFP_KERNEL);
 	if (!pci_to_OF_bus_map) {
@@ -691,9 +691,12 @@ pcibios_make_OF_bus_map(void)
 			continue;
 		make_one_node_map(node, hose->first_busno);
 	}
-	of_prop_map = get_property(find_path_device("/"), "pci-OF-bus-map", NULL);
-	if (of_prop_map)
-		memcpy(of_prop_map, pci_to_OF_bus_map, pci_bus_count);
+	map_prop = of_find_property(find_path_device("/"),
+			"pci-OF-bus-map", NULL);
+	if (map_prop) {
+		BUG_ON(pci_bus_count > map_prop->length);
+		memcpy(map_prop->value, pci_to_OF_bus_map, pci_bus_count);
+	}
 #ifdef DEBUG
 	printk("PCI->OF bus map:\n");
 	for (i=0; i<pci_bus_count; i++) {
@@ -712,7 +715,7 @@ scan_OF_pci_childs(struct device_node* node, pci_OF_scan_iterator filter, void*
 	struct device_node* sub_node;
 
 	for (; node != 0;node = node->sibling) {
-		unsigned int *class_code;
+		const unsigned int *class_code;
 	
 		if (filter(node, data))
 			return node;
@@ -722,7 +725,7 @@ scan_OF_pci_childs(struct device_node* node, pci_OF_scan_iterator filter, void*
 		 * a fake root for all functions of a multi-function device,
 		 * we go down them as well.
 		 */
-		class_code = (unsigned int *) get_property(node, "class-code", NULL);
+		class_code = get_property(node, "class-code", NULL);
 		if ((!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
 			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) &&
 			strcmp(node->name, "multifunc-device"))
@@ -737,10 +740,10 @@ scan_OF_pci_childs(struct device_node* node, pci_OF_scan_iterator filter, void*
 static int
 scan_OF_pci_childs_iterator(struct device_node* node, void* data)
 {
-	unsigned int *reg;
+	const unsigned int *reg;
 	u8* fdata = (u8*)data;
 	
-	reg = (unsigned int *) get_property(node, "reg", NULL);
+	reg = get_property(node, "reg", NULL);
 	if (reg && ((reg[0] >> 8) & 0xff) == fdata[1]
 		&& ((reg[0] >> 16) & 0xff) == fdata[0])
 		return 1;
@@ -841,7 +844,7 @@ find_OF_pci_device_filter(struct device_node* node, void* data)
 int
 pci_device_from_OF_node(struct device_node* node, u8* bus, u8* devfn)
 {
-	unsigned int *reg;
+	const unsigned int *reg;
 	struct pci_controller* hose;
 	struct pci_dev* dev = NULL;
 	
@@ -854,7 +857,7 @@ pci_device_from_OF_node(struct device_node* node, u8* bus, u8* devfn)
 	if (!scan_OF_pci_childs(((struct device_node*)hose->arch_data)->child,
 			find_OF_pci_device_filter, (void *)node))
 		return -ENODEV;
-	reg = (unsigned int *) get_property(node, "reg", NULL);
+	reg = get_property(node, "reg", NULL);
 	if (!reg)
 		return -ENODEV;
 	*bus = (reg[0] >> 16) & 0xff;
@@ -885,8 +888,8 @@ pci_process_bridge_OF_ranges(struct pci_controller *hose,
 			   struct device_node *dev, int primary)
 {
 	static unsigned int static_lc_ranges[256] __initdata;
-	unsigned int *dt_ranges, *lc_ranges, *ranges, *prev;
-	unsigned int size;
+	const unsigned int *dt_ranges;
+	unsigned int *lc_ranges, *ranges, *prev, size;
 	int rlen = 0, orig_rlen;
 	int memno = 0;
 	struct resource *res;
@@ -897,7 +900,7 @@ pci_process_bridge_OF_ranges(struct pci_controller *hose,
 	 * that can have more than 3 ranges, fortunately using contiguous
 	 * addresses -- BenH
 	 */
-	dt_ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
+	dt_ranges = get_property(dev, "ranges", &rlen);
 	if (!dt_ranges)
 		return;
 	/* Sanity check, though hopefully that never happens */
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 1d85fcba51e4d..e795a7e2a38e4 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -246,10 +246,10 @@ static void __init pcibios_claim_of_setup(void)
 #ifdef CONFIG_PPC_MULTIPLATFORM
 static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
 {
-	u32 *prop;
+	const u32 *prop;
 	int len;
 
-	prop = (u32 *) get_property(np, name, &len);
+	prop = get_property(np, name, &len);
 	if (prop && len >= 4)
 		return *prop;
 	return def;
@@ -278,10 +278,11 @@ static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev)
 	u64 base, size;
 	unsigned int flags;
 	struct resource *res;
-	u32 *addrs, i;
+	const u32 *addrs;
+	u32 i;
 	int proplen;
 
-	addrs = (u32 *) get_property(node, "assigned-addresses", &proplen);
+	addrs = get_property(node, "assigned-addresses", &proplen);
 	if (!addrs)
 		return;
 	DBG("    parse addresses (%d bytes) @ %p\n", proplen, addrs);
@@ -381,7 +382,7 @@ void __devinit of_scan_bus(struct device_node *node,
 				  struct pci_bus *bus)
 {
 	struct device_node *child = NULL;
-	u32 *reg;
+	const u32 *reg;
 	int reglen, devfn;
 	struct pci_dev *dev;
 
@@ -389,7 +390,7 @@ void __devinit of_scan_bus(struct device_node *node,
 
 	while ((child = of_get_next_child(node, child)) != NULL) {
 		DBG("  * %s\n", child->full_name);
-		reg = (u32 *) get_property(child, "reg", &reglen);
+		reg = get_property(child, "reg", &reglen);
 		if (reg == NULL || reglen < 20)
 			continue;
 		devfn = (reg[0] >> 8) & 0xff;
@@ -413,7 +414,7 @@ void __devinit of_scan_pci_bridge(struct device_node *node,
 			 	struct pci_dev *dev)
 {
 	struct pci_bus *bus;
-	u32 *busrange, *ranges;
+	const u32 *busrange, *ranges;
 	int len, i, mode;
 	struct resource *res;
 	unsigned int flags;
@@ -422,13 +423,13 @@ void __devinit of_scan_pci_bridge(struct device_node *node,
 	DBG("of_scan_pci_bridge(%s)\n", node->full_name);
 
 	/* parse bus-range property */
-	busrange = (u32 *) get_property(node, "bus-range", &len);
+	busrange = get_property(node, "bus-range", &len);
 	if (busrange == NULL || len != 8) {
 		printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
 		       node->full_name);
 		return;
 	}
-	ranges = (u32 *) get_property(node, "ranges", &len);
+	ranges = get_property(node, "ranges", &len);
 	if (ranges == NULL) {
 		printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n",
 		       node->full_name);
@@ -892,13 +893,13 @@ static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
 		unsigned int size;
 	};
 
-	struct isa_range *range;
+	const struct isa_range *range;
 	unsigned long pci_addr;
 	unsigned int isa_addr;
 	unsigned int size;
 	int rlen = 0;
 
-	range = (struct isa_range *) get_property(isa_node, "ranges", &rlen);
+	range = get_property(isa_node, "ranges", &rlen);
 	if (range == NULL || (rlen < sizeof(struct isa_range))) {
 		printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
 		       "mapping 64k\n");
@@ -939,7 +940,8 @@ static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
 void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
 					    struct device_node *dev, int prim)
 {
-	unsigned int *ranges, pci_space;
+	const unsigned int *ranges;
+	unsigned int pci_space;
 	unsigned long size;
 	int rlen = 0;
 	int memno = 0;
@@ -957,7 +959,7 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
 	 *			(size depending on dev->n_addr_cells)
 	 *   cells 4+5 or 5+6:	the size of the range
 	 */
-	ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
+	ranges = get_property(dev, "ranges", &rlen);
 	if (ranges == NULL)
 		return;
 	hose->io_base_phys = 0;
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 1c18953514c3d..68df018dae0ea 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -40,8 +40,8 @@
 static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
 {
 	struct pci_controller *phb = data;
-	int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL);
-	u32 *regs;
+	const int *type = get_property(dn, "ibm,pci-config-space-type", NULL);
+	const u32 *regs;
 	struct pci_dn *pdn;
 
 	if (mem_init_done)
@@ -54,14 +54,14 @@ static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
 	dn->data = pdn;
 	pdn->node = dn;
 	pdn->phb = phb;
-	regs = (u32 *)get_property(dn, "reg", NULL);
+	regs = get_property(dn, "reg", NULL);
 	if (regs) {
 		/* First register entry is addr (00BBSS00)  */
 		pdn->busno = (regs[0] >> 16) & 0xff;
 		pdn->devfn = (regs[0] >> 8) & 0xff;
 	}
 	if (firmware_has_feature(FW_FEATURE_ISERIES)) {
-		u32 *busp = (u32 *)get_property(dn, "linux,subbus", NULL);
+		const u32 *busp = get_property(dn, "linux,subbus", NULL);
 		if (busp)
 			pdn->bussubno = *busp;
 	}
@@ -96,10 +96,11 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre,
 
 	/* We started with a phb, iterate all childs */
 	for (dn = start->child; dn; dn = nextdn) {
-		u32 *classp, class;
+		const u32 *classp;
+		u32 class;
 
 		nextdn = NULL;
-		classp = (u32 *)get_property(dn, "class-code", NULL);
+		classp = get_property(dn, "class-code", NULL);
 		class = classp ? *classp : 0;
 
 		if (pre && ((ret = pre(dn, data)) != NULL))
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index a1787ffb6319b..2a3d84a39cb5a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -942,11 +942,11 @@ void __init early_init_devtree(void *params)
 int
 prom_n_addr_cells(struct device_node* np)
 {
-	int* ip;
+	const int *ip;
 	do {
 		if (np->parent)
 			np = np->parent;
-		ip = (int *) get_property(np, "#address-cells", NULL);
+		ip = get_property(np, "#address-cells", NULL);
 		if (ip != NULL)
 			return *ip;
 	} while (np->parent);
@@ -958,11 +958,11 @@ EXPORT_SYMBOL(prom_n_addr_cells);
 int
 prom_n_size_cells(struct device_node* np)
 {
-	int* ip;
+	const int* ip;
 	do {
 		if (np->parent)
 			np = np->parent;
-		ip = (int *) get_property(np, "#size-cells", NULL);
+		ip = get_property(np, "#size-cells", NULL);
 		if (ip != NULL)
 			return *ip;
 	} while (np->parent);
@@ -1034,7 +1034,7 @@ int device_is_compatible(struct device_node *device, const char *compat)
 	const char* cp;
 	int cplen, l;
 
-	cp = (char *) get_property(device, "compatible", &cplen);
+	cp = get_property(device, "compatible", &cplen);
 	if (cp == NULL)
 		return 0;
 	while (cplen > 0) {
@@ -1449,7 +1449,7 @@ static int of_finish_dynamic_node(struct device_node *node)
 {
 	struct device_node *parent = of_get_parent(node);
 	int err = 0;
-	phandle *ibm_phandle;
+	const phandle *ibm_phandle;
 
 	node->name = get_property(node, "name", NULL);
 	node->type = get_property(node, "device_type", NULL);
@@ -1466,8 +1466,7 @@ static int of_finish_dynamic_node(struct device_node *node)
 		return -ENODEV;
 
 	/* fix up new node's linux_phandle field */
-	if ((ibm_phandle = (unsigned int *)get_property(node,
-							"ibm,phandle", NULL)))
+	if ((ibm_phandle = get_property(node, "ibm,phandle", NULL)))
 		node->linux_phandle = *ibm_phandle;
 
 out:
@@ -1658,16 +1657,16 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
 	hardid = get_hard_smp_processor_id(cpu);
 
 	for_each_node_by_type(np, "cpu") {
-		u32 *intserv;
+		const u32 *intserv;
 		unsigned int plen, t;
 
 		/* Check for ibm,ppc-interrupt-server#s. If it doesn't exist
 		 * fallback to "reg" property and assume no threads
 		 */
-		intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s",
-					      &plen);
+		intserv = get_property(np, "ibm,ppc-interrupt-server#s",
+				&plen);
 		if (intserv == NULL) {
-			u32 *reg = (u32 *)get_property(np, "reg", NULL);
+			const u32 *reg = get_property(np, "reg", NULL);
 			if (reg == NULL)
 				continue;
 			if (*reg == hardid) {
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index e9960170667b2..cdcd5d665468f 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -270,7 +270,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus,
 			    struct of_bus *pbus, u32 *addr,
 			    int na, int ns, int pna)
 {
-	u32 *ranges;
+	const u32 *ranges;
 	unsigned int rlen;
 	int rone;
 	u64 offset = OF_BAD_ADDR;
@@ -287,7 +287,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus,
 	 * to translate addresses that aren't supposed to be translated in
 	 * the first place. --BenH.
 	 */
-	ranges = (u32 *)get_property(parent, "ranges", &rlen);
+	ranges = get_property(parent, "ranges", &rlen);
 	if (ranges == NULL || rlen == 0) {
 		offset = of_read_number(addr, na);
 		memset(addr, 0, pna * 4);
@@ -330,7 +330,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus,
  * that can be mapped to a cpu physical address). This is not really specified
  * that way, but this is traditionally the way IBM at least do things
  */
-u64 of_translate_address(struct device_node *dev, u32 *in_addr)
+u64 of_translate_address(struct device_node *dev, const u32 *in_addr)
 {
 	struct device_node *parent = NULL;
 	struct of_bus *bus, *pbus;
@@ -407,10 +407,10 @@ u64 of_translate_address(struct device_node *dev, u32 *in_addr)
 }
 EXPORT_SYMBOL(of_translate_address);
 
-u32 *of_get_address(struct device_node *dev, int index, u64 *size,
+const u32 *of_get_address(struct device_node *dev, int index, u64 *size,
 		    unsigned int *flags)
 {
-	u32 *prop;
+	const u32 *prop;
 	unsigned int psize;
 	struct device_node *parent;
 	struct of_bus *bus;
@@ -427,7 +427,7 @@ u32 *of_get_address(struct device_node *dev, int index, u64 *size,
 		return NULL;
 
 	/* Get "reg" or "assigned-addresses" property */
-	prop = (u32 *)get_property(dev, bus->addresses, &psize);
+	prop = get_property(dev, bus->addresses, &psize);
 	if (prop == NULL)
 		return NULL;
 	psize /= 4;
@@ -445,10 +445,10 @@ u32 *of_get_address(struct device_node *dev, int index, u64 *size,
 }
 EXPORT_SYMBOL(of_get_address);
 
-u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
+const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
 			unsigned int *flags)
 {
-	u32 *prop;
+	const u32 *prop;
 	unsigned int psize;
 	struct device_node *parent;
 	struct of_bus *bus;
@@ -469,7 +469,7 @@ u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
 		return NULL;
 
 	/* Get "reg" or "assigned-addresses" property */
-	prop = (u32 *)get_property(dev, bus->addresses, &psize);
+	prop = get_property(dev, bus->addresses, &psize);
 	if (prop == NULL)
 		return NULL;
 	psize /= 4;
@@ -487,7 +487,7 @@ u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
 }
 EXPORT_SYMBOL(of_get_pci_address);
 
-static int __of_address_to_resource(struct device_node *dev, u32 *addrp,
+static int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
 				    u64 size, unsigned int flags,
 				    struct resource *r)
 {
@@ -518,7 +518,7 @@ static int __of_address_to_resource(struct device_node *dev, u32 *addrp,
 int of_address_to_resource(struct device_node *dev, int index,
 			   struct resource *r)
 {
-	u32		*addrp;
+	const u32	*addrp;
 	u64		size;
 	unsigned int	flags;
 
@@ -532,7 +532,7 @@ EXPORT_SYMBOL_GPL(of_address_to_resource);
 int of_pci_address_to_resource(struct device_node *dev, int bar,
 			       struct resource *r)
 {
-	u32		*addrp;
+	const u32	*addrp;
 	u64		size;
 	unsigned int	flags;
 
@@ -543,13 +543,14 @@ int of_pci_address_to_resource(struct device_node *dev, int bar,
 }
 EXPORT_SYMBOL_GPL(of_pci_address_to_resource);
 
-void of_parse_dma_window(struct device_node *dn, unsigned char *dma_window_prop,
+void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 		unsigned long *busno, unsigned long *phys, unsigned long *size)
 {
-	u32 *dma_window, cells;
-	unsigned char *prop;
+	const u32 *dma_window;
+	u32 cells;
+	const unsigned char *prop;
 
-	dma_window = (u32 *)dma_window_prop;
+	dma_window = dma_window_prop;
 
 	/* busno is always one cell */
 	*busno = *(dma_window++);
@@ -578,13 +579,13 @@ static struct device_node *of_irq_dflt_pic;
 static struct device_node *of_irq_find_parent(struct device_node *child)
 {
 	struct device_node *p;
-	phandle *parp;
+	const phandle *parp;
 
 	if (!of_node_get(child))
 		return NULL;
 
 	do {
-		parp = (phandle *)get_property(child, "interrupt-parent", NULL);
+		parp = get_property(child, "interrupt-parent", NULL);
 		if (parp == NULL)
 			p = of_get_parent(child);
 		else {
@@ -646,11 +647,11 @@ void of_irq_map_init(unsigned int flags)
 
 }
 
-int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr,
-		   struct of_irq *out_irq)
+int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
+		const u32 *addr, struct of_irq *out_irq)
 {
 	struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
-	u32 *tmp, *imap, *imask;
+	const u32 *tmp, *imap, *imask;
 	u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
 	int imaplen, match, i;
 
@@ -661,7 +662,7 @@ int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr,
 	 * is none, we are nice and just walk up the tree
 	 */
 	do {
-		tmp = (u32 *)get_property(ipar, "#interrupt-cells", NULL);
+		tmp = get_property(ipar, "#interrupt-cells", NULL);
 		if (tmp != NULL) {
 			intsize = *tmp;
 			break;
@@ -682,7 +683,7 @@ int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr,
 	 */
 	old = of_node_get(ipar);
 	do {
-		tmp = (u32 *)get_property(old, "#address-cells", NULL);
+		tmp = get_property(old, "#address-cells", NULL);
 		tnode = of_get_parent(old);
 		of_node_put(old);
 		old = tnode;
@@ -709,7 +710,7 @@ int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr,
 		}
 
 		/* Now look for an interrupt-map */
-		imap = (u32 *)get_property(ipar, "interrupt-map", &imaplen);
+		imap = get_property(ipar, "interrupt-map", &imaplen);
 		/* No interrupt map, check for an interrupt parent */
 		if (imap == NULL) {
 			DBG(" -> no map, getting parent\n");
@@ -719,7 +720,7 @@ int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr,
 		imaplen /= sizeof(u32);
 
 		/* Look for a mask */
-		imask = (u32 *)get_property(ipar, "interrupt-map-mask", NULL);
+		imask = get_property(ipar, "interrupt-map-mask", NULL);
 
 		/* If we were passed no "reg" property and we attempt to parse
 		 * an interrupt-map, then #address-cells must be 0.
@@ -766,14 +767,14 @@ int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr,
 			/* Get #interrupt-cells and #address-cells of new
 			 * parent
 			 */
-			tmp = (u32 *)get_property(newpar, "#interrupt-cells",
+			tmp = get_property(newpar, "#interrupt-cells",
 						  NULL);
 			if (tmp == NULL) {
 				DBG(" -> parent lacks #interrupt-cells !\n");
 				goto fail;
 			}
 			newintsize = *tmp;
-			tmp = (u32 *)get_property(newpar, "#address-cells",
+			tmp = get_property(newpar, "#address-cells",
 						  NULL);
 			newaddrsize = (tmp == NULL) ? 0 : *tmp;
 
@@ -819,14 +820,14 @@ EXPORT_SYMBOL_GPL(of_irq_map_raw);
 static int of_irq_map_oldworld(struct device_node *device, int index,
 			       struct of_irq *out_irq)
 {
-	u32 *ints;
+	const u32 *ints;
 	int intlen;
 
 	/*
 	 * Old machines just have a list of interrupt numbers
 	 * and no interrupt-controller nodes.
 	 */
-	ints = (u32 *) get_property(device, "AAPL,interrupts", &intlen);
+	ints = get_property(device, "AAPL,interrupts", &intlen);
 	if (ints == NULL)
 		return -EINVAL;
 	intlen /= sizeof(u32);
@@ -851,7 +852,8 @@ static int of_irq_map_oldworld(struct device_node *device, int index,
 int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq)
 {
 	struct device_node *p;
-	u32 *intspec, *tmp, intsize, intlen, *addr;
+	const u32 *intspec, *tmp, *addr;
+	u32 intsize, intlen;
 	int res;
 
 	DBG("of_irq_map_one: dev=%s, index=%d\n", device->full_name, index);
@@ -861,13 +863,13 @@ int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq
 		return of_irq_map_oldworld(device, index, out_irq);
 
 	/* Get the interrupts property */
-	intspec = (u32 *)get_property(device, "interrupts", &intlen);
+	intspec = get_property(device, "interrupts", &intlen);
 	if (intspec == NULL)
 		return -EINVAL;
 	intlen /= sizeof(u32);
 
 	/* Get the reg property (if any) */
-	addr = (u32 *)get_property(device, "reg", NULL);
+	addr = get_property(device, "reg", NULL);
 
 	/* Look for the interrupt parent. */
 	p = of_irq_find_parent(device);
@@ -875,7 +877,7 @@ int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq
 		return -EINVAL;
 
 	/* Get size of interrupt specifier */
-	tmp = (u32 *)get_property(p, "#interrupt-cells", NULL);
+	tmp = get_property(p, "#interrupt-cells", NULL);
 	if (tmp == NULL) {
 		of_node_put(p);
 		return -EINVAL;
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 9c9ad1fa9cce9..2fe82abf1c528 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -246,12 +246,12 @@ struct file_operations ppc_rtas_rmo_buf_ops = {
 
 static int ppc_rtas_find_all_sensors(void);
 static void ppc_rtas_process_sensor(struct seq_file *m,
-	struct individual_sensor *s, int state, int error, char *loc);
+	struct individual_sensor *s, int state, int error, const char *loc);
 static char *ppc_rtas_process_error(int error);
 static void get_location_code(struct seq_file *m,
-	struct individual_sensor *s, char *loc);
-static void check_location_string(struct seq_file *m, char *c);
-static void check_location(struct seq_file *m, char *c);
+	struct individual_sensor *s, const char *loc);
+static void check_location_string(struct seq_file *m, const char *c);
+static void check_location(struct seq_file *m, const char *c);
 
 static int __init proc_rtas_init(void)
 {
@@ -446,11 +446,11 @@ static int ppc_rtas_sensors_show(struct seq_file *m, void *v)
 	for (i=0; i<sensors.quant; i++) {
 		struct individual_sensor *p = &sensors.sensor[i];
 		char rstr[64];
-		char *loc;
+		const char *loc;
 		int llen, offs;
 
 		sprintf (rstr, SENSOR_PREFIX"%04d", p->token);
-		loc = (char *) get_property(rtas_node, rstr, &llen);
+		loc = get_property(rtas_node, rstr, &llen);
 
 		/* A sensor may have multiple instances */
 		for (j = 0, offs = 0; j <= p->quant; j++) {
@@ -474,10 +474,10 @@ static int ppc_rtas_sensors_show(struct seq_file *m, void *v)
 
 static int ppc_rtas_find_all_sensors(void)
 {
-	unsigned int *utmp;
+	const unsigned int *utmp;
 	int len, i;
 
-	utmp = (unsigned int *) get_property(rtas_node, "rtas-sensors", &len);
+	utmp = get_property(rtas_node, "rtas-sensors", &len);
 	if (utmp == NULL) {
 		printk (KERN_ERR "error: could not get rtas-sensors\n");
 		return 1;
@@ -530,7 +530,7 @@ static char *ppc_rtas_process_error(int error)
  */
 
 static void ppc_rtas_process_sensor(struct seq_file *m,
-	struct individual_sensor *s, int state, int error, char *loc)
+	struct individual_sensor *s, int state, int error, const char *loc)
 {
 	/* Defined return vales */
 	const char * key_switch[]        = { "Off\t", "Normal\t", "Secure\t", 
@@ -682,7 +682,7 @@ static void ppc_rtas_process_sensor(struct seq_file *m,
 
 /* ****************************************************************** */
 
-static void check_location(struct seq_file *m, char *c)
+static void check_location(struct seq_file *m, const char *c)
 {
 	switch (c[0]) {
 		case LOC_PLANAR:
@@ -719,7 +719,7 @@ static void check_location(struct seq_file *m, char *c)
  * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ]
  * the '.' may be an abbrevation
  */
-static void check_location_string(struct seq_file *m, char *c)
+static void check_location_string(struct seq_file *m, const char *c)
 {
 	while (*c) {
 		if (isalpha(*c) || *c == '.')
@@ -733,7 +733,8 @@ static void check_location_string(struct seq_file *m, char *c)
 
 /* ****************************************************************** */
 
-static void get_location_code(struct seq_file *m, struct individual_sensor *s, char *loc)
+static void get_location_code(struct seq_file *m, struct individual_sensor *s,
+		const char *loc)
 {
 	if (!loc || !*loc) {
 		seq_printf(m, "---");/* does not have a location */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 4a4cb55984027..10e10be324c9a 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -177,10 +177,12 @@ void __init udbg_init_rtas_console(void)
 void rtas_progress(char *s, unsigned short hex)
 {
 	struct device_node *root;
-	int width, *p;
+	int width;
+	const int *p;
 	char *os;
 	static int display_character, set_indicator;
-	static int display_width, display_lines, *row_width, form_feed;
+	static int display_width, display_lines, form_feed;
+	const static int *row_width;
 	static DEFINE_SPINLOCK(progress_lock);
 	static int current_line;
 	static int pending_newline = 0;  /* did last write end with unprinted newline? */
@@ -191,16 +193,16 @@ void rtas_progress(char *s, unsigned short hex)
 	if (display_width == 0) {
 		display_width = 0x10;
 		if ((root = find_path_device("/rtas"))) {
-			if ((p = (unsigned int *)get_property(root,
+			if ((p = get_property(root,
 					"ibm,display-line-length", NULL)))
 				display_width = *p;
-			if ((p = (unsigned int *)get_property(root,
+			if ((p = get_property(root,
 					"ibm,form-feed", NULL)))
 				form_feed = *p;
-			if ((p = (unsigned int *)get_property(root,
+			if ((p = get_property(root,
 					"ibm,display-number-of-lines", NULL)))
 				display_lines = *p;
-			row_width = (unsigned int *)get_property(root,
+			row_width = get_property(root,
 					"ibm,display-truncation-length", NULL);
 		}
 		display_character = rtas_token("display-character");
@@ -293,10 +295,10 @@ EXPORT_SYMBOL(rtas_progress);		/* needed by rtas_flash module */
 
 int rtas_token(const char *service)
 {
-	int *tokp;
+	const int *tokp;
 	if (rtas.dev == NULL)
 		return RTAS_UNKNOWN_SERVICE;
-	tokp = (int *) get_property(rtas.dev, service, NULL);
+	tokp = get_property(rtas.dev, service, NULL);
 	return tokp ? *tokp : RTAS_UNKNOWN_SERVICE;
 }
 EXPORT_SYMBOL(rtas_token);
@@ -824,15 +826,15 @@ void __init rtas_initialize(void)
 	 */
 	rtas.dev = of_find_node_by_name(NULL, "rtas");
 	if (rtas.dev) {
-		u32 *basep, *entryp;
-		u32 *sizep;
+		const u32 *basep, *entryp, *sizep;
 
-		basep = (u32 *)get_property(rtas.dev, "linux,rtas-base", NULL);
-		sizep = (u32 *)get_property(rtas.dev, "rtas-size", NULL);
+		basep = get_property(rtas.dev, "linux,rtas-base", NULL);
+		sizep = get_property(rtas.dev, "rtas-size", NULL);
 		if (basep != NULL && sizep != NULL) {
 			rtas.base = *basep;
 			rtas.size = *sizep;
-			entryp = (u32 *)get_property(rtas.dev, "linux,rtas-entry", NULL);
+			entryp = get_property(rtas.dev,
+					"linux,rtas-entry", NULL);
 			if (entryp == NULL) /* Ugh */
 				rtas.entry = rtas.base;
 			else
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index cda0226573243..5a798ac6aecfb 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -57,7 +57,7 @@ static inline int config_access_valid(struct pci_dn *dn, int where)
 
 static int of_device_available(struct device_node * dn)
 {
-        char * status;
+        const char *status;
 
         status = get_property(dn, "status", NULL);
 
@@ -178,7 +178,7 @@ struct pci_ops rtas_pci_ops = {
 
 int is_python(struct device_node *dev)
 {
-	char *model = (char *)get_property(dev, "model", NULL);
+	const char *model = get_property(dev, "model", NULL);
 
 	if (model && strstr(model, "Python"))
 		return 1;
@@ -234,7 +234,7 @@ void __init init_pci_config_tokens (void)
 unsigned long __devinit get_phb_buid (struct device_node *phb)
 {
 	int addr_cells;
-	unsigned int *buid_vals;
+	const unsigned int *buid_vals;
 	unsigned int len;
 	unsigned long buid;
 
@@ -247,7 +247,7 @@ unsigned long __devinit get_phb_buid (struct device_node *phb)
 	if (phb->parent->parent)
 		return 0;
 
-	buid_vals = (unsigned int *) get_property(phb, "reg", &len);
+	buid_vals = get_property(phb, "reg", &len);
 	if (buid_vals == NULL)
 		return 0;
 
@@ -264,10 +264,10 @@ unsigned long __devinit get_phb_buid (struct device_node *phb)
 static int phb_set_bus_ranges(struct device_node *dev,
 			      struct pci_controller *phb)
 {
-	int *bus_range;
+	const int *bus_range;
 	unsigned int len;
 
-	bus_range = (int *) get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		return 1;
  	}
@@ -325,15 +325,15 @@ unsigned long __init find_and_init_phbs(void)
 	 * in chosen.
 	 */
 	if (of_chosen) {
-		int *prop;
+		const int *prop;
 
-		prop = (int *)get_property(of_chosen, "linux,pci-probe-only",
-					   NULL);
+		prop = get_property(of_chosen,
+				"linux,pci-probe-only", NULL);
 		if (prop)
 			pci_probe_only = *prop;
 
-		prop = (int *)get_property(of_chosen,
-					   "linux,pci-assign-all-buses", NULL);
+		prop = get_property(of_chosen,
+				"linux,pci-assign-all-buses", NULL);
 		if (prop)
 			pci_assign_all_buses = *prop;
 	}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index c6d7b98af7d5b..aaf1727b35705 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -304,16 +304,15 @@ struct seq_operations cpuinfo_op = {
 void __init check_for_initrd(void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
-	unsigned long *prop;
+	const unsigned long *prop;
 
 	DBG(" -> check_for_initrd()\n");
 
 	if (of_chosen) {
-		prop = (unsigned long *)get_property(of_chosen,
-				"linux,initrd-start", NULL);
+		prop = get_property(of_chosen, "linux,initrd-start", NULL);
 		if (prop != NULL) {
 			initrd_start = (unsigned long)__va(*prop);
-			prop = (unsigned long *)get_property(of_chosen,
+			prop = get_property(of_chosen,
 					"linux,initrd-end", NULL);
 			if (prop != NULL) {
 				initrd_end = (unsigned long)__va(*prop);
@@ -366,15 +365,14 @@ void __init smp_setup_cpu_maps(void)
 	int cpu = 0;
 
 	while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) {
-		int *intserv;
+		const int *intserv;
 		int j, len = sizeof(u32), nthreads = 1;
 
-		intserv = (int *)get_property(dn, "ibm,ppc-interrupt-server#s",
-					      &len);
+		intserv = get_property(dn, "ibm,ppc-interrupt-server#s", &len);
 		if (intserv)
 			nthreads = len / sizeof(int);
 		else {
-			intserv = (int *) get_property(dn, "reg", NULL);
+			intserv = get_property(dn, "reg", NULL);
 			if (!intserv)
 				intserv = &cpu;	/* assume logical == phys */
 		}
@@ -395,13 +393,12 @@ void __init smp_setup_cpu_maps(void)
 	if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) &&
 	    (dn = of_find_node_by_path("/rtas"))) {
 		int num_addr_cell, num_size_cell, maxcpus;
-		unsigned int *ireg;
+		const unsigned int *ireg;
 
 		num_addr_cell = prom_n_addr_cells(dn);
 		num_size_cell = prom_n_size_cells(dn);
 
-		ireg = (unsigned int *)
-			get_property(dn, "ibm,lrdr-capacity", NULL);
+		ireg = get_property(dn, "ibm,lrdr-capacity", NULL);
 
 		if (!ireg)
 			goto out;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e2447aef3a8fb..77efe19ccd2c8 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -106,7 +106,7 @@ static int smt_enabled_cmdline;
 static void check_smt_enabled(void)
 {
 	struct device_node *dn;
-	char *smt_option;
+	const char *smt_option;
 
 	/* Allow the command line to overrule the OF option */
 	if (smt_enabled_cmdline)
@@ -115,7 +115,7 @@ static void check_smt_enabled(void)
 	dn = of_find_node_by_path("/options");
 
 	if (dn) {
-		smt_option = (char *)get_property(dn, "ibm,smt-enabled", NULL);
+		smt_option = get_property(dn, "ibm,smt-enabled", NULL);
 
                 if (smt_option) {
 			if (!strcmp(smt_option, "on"))
@@ -292,7 +292,7 @@ static void __init initialize_cache_info(void)
 		 */
 
 		if ( num_cpus == 1 ) {
-			u32 *sizep, *lsizep;
+			const u32 *sizep, *lsizep;
 			u32 size, lsize;
 			const char *dc, *ic;
 
@@ -307,10 +307,10 @@ static void __init initialize_cache_info(void)
 
 			size = 0;
 			lsize = cur_cpu_spec->dcache_bsize;
-			sizep = (u32 *)get_property(np, "d-cache-size", NULL);
+			sizep = get_property(np, "d-cache-size", NULL);
 			if (sizep != NULL)
 				size = *sizep;
-			lsizep = (u32 *) get_property(np, dc, NULL);
+			lsizep = get_property(np, dc, NULL);
 			if (lsizep != NULL)
 				lsize = *lsizep;
 			if (sizep == 0 || lsizep == 0)
@@ -324,10 +324,10 @@ static void __init initialize_cache_info(void)
 
 			size = 0;
 			lsize = cur_cpu_spec->icache_bsize;
-			sizep = (u32 *)get_property(np, "i-cache-size", NULL);
+			sizep = get_property(np, "i-cache-size", NULL);
 			if (sizep != NULL)
 				size = *sizep;
-			lsizep = (u32 *)get_property(np, ic, NULL);
+			lsizep = get_property(np, ic, NULL);
 			if (lsizep != NULL)
 				lsize = *lsizep;
 			if (sizep == 0 || lsizep == 0)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 0104350955501..1d724aef438ac 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -60,7 +60,7 @@ static int smt_snooze_cmdline;
 static int __init smt_setup(void)
 {
 	struct device_node *options;
-	unsigned int *val;
+	const unsigned int *val;
 	unsigned int cpu;
 
 	if (!cpu_has_feature(CPU_FTR_SMT))
@@ -70,8 +70,7 @@ static int __init smt_setup(void)
 	if (!options)
 		return -ENODEV;
 
-	val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
-					   NULL);
+	val = get_property(options, "ibm,smt-snooze-delay", NULL);
 	if (!smt_snooze_cmdline && val) {
 		for_each_possible_cpu(cpu)
 			per_cpu(smt_snooze_delay, cpu) = *val;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 774c0a3c50191..8d4ccf061a4dd 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -859,14 +859,14 @@ EXPORT_SYMBOL(do_settimeofday);
 static int __init get_freq(char *name, int cells, unsigned long *val)
 {
 	struct device_node *cpu;
-	unsigned int *fp;
+	const unsigned int *fp;
 	int found = 0;
 
 	/* The cpu node should have timebase and clock frequency properties */
 	cpu = of_find_node_by_type(NULL, "cpu");
 
 	if (cpu) {
-		fp = (unsigned int *)get_property(cpu, name, NULL);
+		fp = get_property(cpu, name, NULL);
 		if (fp) {
 			found = 1;
 			*val = 0;
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index fad8580f9081c..cb87e71eec665 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -77,7 +77,7 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
 	} else
 #endif
 	{
-		unsigned char *dma_window;
+		const unsigned char *dma_window;
 		struct iommu_table *tbl;
 		unsigned long offset, size;
 
@@ -217,7 +217,7 @@ static void __devinit vio_dev_release(struct device *dev)
 struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
 {
 	struct vio_dev *viodev;
-	unsigned int *unit_address;
+	const unsigned int *unit_address;
 
 	/* we need the 'device_type' property, in order to match with drivers */
 	if (of_node->type == NULL) {
@@ -227,7 +227,7 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
 		return NULL;
 	}
 
-	unit_address = (unsigned int *)get_property(of_node, "reg", NULL);
+	unit_address = get_property(of_node, "reg", NULL);
 	if (unit_address == NULL) {
 		printk(KERN_WARNING "%s: node %s missing 'reg'\n",
 				__FUNCTION__,
@@ -249,7 +249,7 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
 	viodev->type = of_node->type;
 	viodev->unit_address = *unit_address;
 	if (firmware_has_feature(FW_FEATURE_ISERIES)) {
-		unit_address = (unsigned int *)get_property(of_node,
+		unit_address = get_property(of_node,
 				"linux,unit_address", NULL);
 		if (unit_address != NULL)
 			viodev->unit_address = *unit_address;
@@ -423,7 +423,7 @@ static int vio_hotplug(struct device *dev, char **envp, int num_envp,
 {
 	const struct vio_dev *vio_dev = to_vio_dev(dev);
 	struct device_node *dn = dev->platform_data;
-	char *cp;
+	const char *cp;
 	int length;
 
 	if (!num_envp)
@@ -431,7 +431,7 @@ static int vio_hotplug(struct device *dev, char **envp, int num_envp,
 
 	if (!dn)
 		return -ENODEV;
-	cp = (char *)get_property(dn, "compatible", &length);
+	cp = get_property(dn, "compatible", &length);
 	if (!cp)
 		return -ENODEV;
 
@@ -493,11 +493,11 @@ static struct vio_dev *vio_find_name(const char *kobj_name)
  */
 struct vio_dev *vio_find_node(struct device_node *vnode)
 {
-	uint32_t *unit_address;
+	const uint32_t *unit_address;
 	char kobj_name[BUS_ID_SIZE];
 
 	/* construct the kobject name from the device node */
-	unit_address = (uint32_t *)get_property(vnode, "reg", NULL);
+	unit_address = get_property(vnode, "reg", NULL);
 	if (!unit_address)
 		return NULL;
 	snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fbe23933f7319..6c0f1c7d83e50 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -159,12 +159,12 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
 {
 	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
 	struct device_node *cpu_node = NULL;
-	unsigned int *interrupt_server, *reg;
+	const unsigned int *interrupt_server, *reg;
 	int len;
 
 	while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
 		/* Try interrupt server first */
-		interrupt_server = (unsigned int *)get_property(cpu_node,
+		interrupt_server = get_property(cpu_node,
 					"ibm,ppc-interrupt-server#s", &len);
 
 		len = len / sizeof(u32);
@@ -175,8 +175,7 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
 					return cpu_node;
 			}
 		} else {
-			reg = (unsigned int *)get_property(cpu_node,
-							   "reg", &len);
+			reg = get_property(cpu_node, "reg", &len);
 			if (reg && (len > 0) && (reg[0] == hw_cpuid))
 				return cpu_node;
 		}
@@ -186,9 +185,9 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
 }
 
 /* must hold reference to node during call */
-static int *of_get_associativity(struct device_node *dev)
+static const int *of_get_associativity(struct device_node *dev)
 {
-	return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
+	return get_property(dev, "ibm,associativity", NULL);
 }
 
 /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
@@ -197,7 +196,7 @@ static int *of_get_associativity(struct device_node *dev)
 static int of_node_to_nid_single(struct device_node *device)
 {
 	int nid = -1;
-	unsigned int *tmp;
+	const unsigned int *tmp;
 
 	if (min_common_depth == -1)
 		goto out;
@@ -255,7 +254,7 @@ EXPORT_SYMBOL_GPL(of_node_to_nid);
 static int __init find_min_common_depth(void)
 {
 	int depth;
-	unsigned int *ref_points;
+	const unsigned int *ref_points;
 	struct device_node *rtas_root;
 	unsigned int len;
 
@@ -270,7 +269,7 @@ static int __init find_min_common_depth(void)
 	 * configuration (should be all 0's) and the second is for a normal
 	 * NUMA configuration.
 	 */
-	ref_points = (unsigned int *)get_property(rtas_root,
+	ref_points = get_property(rtas_root,
 			"ibm,associativity-reference-points", &len);
 
 	if ((len >= 1) && ref_points) {
@@ -297,7 +296,7 @@ static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
 	of_node_put(memory);
 }
 
-static unsigned long __devinit read_n_cells(int n, unsigned int **buf)
+static unsigned long __devinit read_n_cells(int n, const unsigned int **buf)
 {
 	unsigned long result = 0;
 
@@ -435,15 +434,13 @@ static int __init parse_numa_properties(void)
 		unsigned long size;
 		int nid;
 		int ranges;
-		unsigned int *memcell_buf;
+		const unsigned int *memcell_buf;
 		unsigned int len;
 
-		memcell_buf = (unsigned int *)get_property(memory,
+		memcell_buf = get_property(memory,
 			"linux,usable-memory", &len);
 		if (!memcell_buf || len <= 0)
-			memcell_buf =
-				(unsigned int *)get_property(memory, "reg",
-					&len);
+			memcell_buf = get_property(memory, "reg", &len);
 		if (!memcell_buf || len <= 0)
 			continue;
 
@@ -787,10 +784,10 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
 		unsigned long start, size;
 		int ranges;
-		unsigned int *memcell_buf;
+		const unsigned int *memcell_buf;
 		unsigned int len;
 
-		memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
+		memcell_buf = get_property(memory, "reg", &len);
 		if (!memcell_buf || len <= 0)
 			continue;
 
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index e983972132d8c..07c47e8309eda 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -41,7 +41,7 @@ phys_addr_t get_immrbase(void)
 	soc = of_find_node_by_type(NULL, "soc");
 	if (soc) {
 		unsigned int size;
-		void *prop = get_property(soc, "reg", &size);
+		const void *prop = get_property(soc, "reg", &size);
 		immrbase = of_translate_address(soc, prop);
 		of_node_put(soc);
 	};
@@ -86,8 +86,8 @@ static int __init gfar_mdio_of_init(void)
 
 		while ((child = of_get_next_child(np, child)) != NULL) {
 			if (child->n_intrs) {
-				u32 *id =
-				    (u32 *) get_property(child, "reg", NULL);
+				const u32 *id =
+					get_property(child, "reg", NULL);
 				mdio_data.irq[*id] = child->intrs[0].line;
 			}
 		}
@@ -127,10 +127,10 @@ static int __init gfar_of_init(void)
 		struct resource r[4];
 		struct device_node *phy, *mdio;
 		struct gianfar_platform_data gfar_data;
-		unsigned int *id;
-		char *model;
-		void *mac_addr;
-		phandle *ph;
+		const unsigned int *id;
+		const char *model;
+		const void *mac_addr;
+		const phandle *ph;
 
 		memset(r, 0, sizeof(r));
 		memset(&gfar_data, 0, sizeof(gfar_data));
@@ -188,7 +188,7 @@ static int __init gfar_of_init(void)
 			    FSL_GIANFAR_DEV_HAS_VLAN |
 			    FSL_GIANFAR_DEV_HAS_EXTENDED_HASH;
 
-		ph = (phandle *) get_property(np, "phy-handle", NULL);
+		ph = get_property(np, "phy-handle", NULL);
 		phy = of_find_node_by_phandle(*ph);
 
 		if (phy == NULL) {
@@ -198,7 +198,7 @@ static int __init gfar_of_init(void)
 
 		mdio = of_get_parent(phy);
 
-		id = (u32 *) get_property(phy, "reg", NULL);
+		id = get_property(phy, "reg", NULL);
 		ret = of_address_to_resource(mdio, 0, &res);
 		if (ret) {
 			of_node_put(phy);
@@ -242,7 +242,7 @@ static int __init fsl_i2c_of_init(void)
 	     i++) {
 		struct resource r[2];
 		struct fsl_i2c_platform_data i2c_data;
-		unsigned char *flags = NULL;
+		const unsigned char *flags = NULL;
 
 		memset(&r, 0, sizeof(r));
 		memset(&i2c_data, 0, sizeof(i2c_data));
@@ -294,7 +294,7 @@ static int __init mpc83xx_wdt_init(void)
 	struct resource r;
 	struct device_node *soc, *np;
 	struct platform_device *dev;
-	unsigned int *freq;
+	const unsigned int *freq;
 	int ret;
 
 	np = of_find_compatible_node(NULL, "watchdog", "mpc83xx_wdt");
@@ -311,7 +311,7 @@ static int __init mpc83xx_wdt_init(void)
 		goto nosoc;
 	}
 
-	freq = (unsigned int *)get_property(soc, "bus-frequency", NULL);
+	freq = get_property(soc, "bus-frequency", NULL);
 	if (!freq) {
 		ret = -ENODEV;
 		goto err;
@@ -351,7 +351,7 @@ static int __init mpc83xx_wdt_init(void)
 arch_initcall(mpc83xx_wdt_init);
 #endif
 
-static enum fsl_usb2_phy_modes determine_usb_phy(char * phy_type)
+static enum fsl_usb2_phy_modes determine_usb_phy(const char *phy_type)
 {
 	if (!phy_type)
 		return FSL_USB2_PHY_NONE;
@@ -379,7 +379,7 @@ static int __init fsl_usb_of_init(void)
 	     i++) {
 		struct resource r[2];
 		struct fsl_usb2_platform_data usb_data;
-		unsigned char *prop = NULL;
+		const unsigned char *prop = NULL;
 
 		memset(&r, 0, sizeof(r));
 		memset(&usb_data, 0, sizeof(usb_data));
@@ -428,7 +428,7 @@ static int __init fsl_usb_of_init(void)
 	     i++) {
 		struct resource r[2];
 		struct fsl_usb2_platform_data usb_data;
-		unsigned char *prop = NULL;
+		const unsigned char *prop = NULL;
 
 		memset(&r, 0, sizeof(r));
 		memset(&usb_data, 0, sizeof(usb_data));
diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c
index 615350d46b526..ff23f5a4d4b91 100644
--- a/arch/powerpc/sysdev/mmio_nvram.c
+++ b/arch/powerpc/sysdev/mmio_nvram.c
@@ -80,7 +80,7 @@ static ssize_t mmio_nvram_get_size(void)
 int __init mmio_nvram_init(void)
 {
 	struct device_node *nvram_node;
-	unsigned long *buffer;
+	const unsigned long *buffer;
 	int proplen;
 	unsigned long nvram_addr;
 	int ret;
@@ -91,7 +91,7 @@ int __init mmio_nvram_init(void)
 		goto out;
 
 	ret = -EIO;
-	buffer = (unsigned long *)get_property(nvram_node, "reg", &proplen);
+	buffer = get_property(nvram_node, "reg", &proplen);
 	if (proplen != 2*sizeof(unsigned long))
 		goto out;
 
diff --git a/include/asm-powerpc/ibmebus.h b/include/asm-powerpc/ibmebus.h
index 7a42723d107c2..7ab195a278885 100644
--- a/include/asm-powerpc/ibmebus.h
+++ b/include/asm-powerpc/ibmebus.h
@@ -48,7 +48,7 @@ extern struct dma_mapping_ops ibmebus_dma_ops;
 extern struct bus_type ibmebus_bus_type;
 
 struct ibmebus_dev {	
-	char *name;
+	const char *name;
 	struct of_device ofdev;
 };
 
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index 56f6ea0c76de9..abdf1be66e970 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -72,8 +72,8 @@ struct property {
 };
 
 struct device_node {
-	char	*name;
-	char	*type;
+	const char *name;
+	const char *type;
 	phandle	node;
 	phandle linux_phandle;
 	char	*full_name;
@@ -209,15 +209,15 @@ static inline u64 of_read_number(const u32 *cell, int size)
 /* Translate an OF address block into a CPU physical address
  */
 #define OF_BAD_ADDR	((u64)-1)
-extern u64 of_translate_address(struct device_node *np, u32 *addr);
+extern u64 of_translate_address(struct device_node *np, const u32 *addr);
 
 /* Extract an address from a device, returns the region size and
  * the address space flags too. The PCI version uses a BAR number
  * instead of an absolute index
  */
-extern u32 *of_get_address(struct device_node *dev, int index,
+extern const u32 *of_get_address(struct device_node *dev, int index,
 			   u64 *size, unsigned int *flags);
-extern u32 *of_get_pci_address(struct device_node *dev, int bar_no,
+extern const u32 *of_get_pci_address(struct device_node *dev, int bar_no,
 			       u64 *size, unsigned int *flags);
 
 /* Get an address as a resource. Note that if your address is
@@ -234,7 +234,7 @@ extern int of_pci_address_to_resource(struct device_node *dev, int bar,
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
  * size parameters.
  */
-void of_parse_dma_window(struct device_node *dn, unsigned char *dma_window_prop,
+void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 		unsigned long *busno, unsigned long *phys, unsigned long *size);
 
 extern void kdump_move_device_tree(void);
@@ -288,8 +288,8 @@ extern void of_irq_map_init(unsigned int flags);
  *
  */
 
-extern int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr,
-			  struct of_irq *out_irq);
+extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
+			  const u32 *addr, struct of_irq *out_irq);
 
 
 /***
diff --git a/include/asm-powerpc/vio.h b/include/asm-powerpc/vio.h
index dc9bd101ca14c..4b51d42e1419d 100644
--- a/include/asm-powerpc/vio.h
+++ b/include/asm-powerpc/vio.h
@@ -46,8 +46,8 @@ struct iommu_table;
  */
 struct vio_dev {
 	struct iommu_table *iommu_table;     /* vio_map_* uses this */
-	char *name;
-	char *type;
+	const char *name;
+	const char *type;
 	uint32_t unit_address;
 	unsigned int irq;
 	struct device dev;
-- 
GitLab


From 954a46e2d5aec6f59976ddeb1d232b486e59b54a Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:39:43 +1000
Subject: [PATCH 0045/1063] [POWERPC] pseries: Constify & voidify
 get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

pseries platform changes.

Built for pseries_defconfig

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/pseries/eeh.c        | 12 ++++----
 arch/powerpc/platforms/pseries/eeh_driver.c |  4 +--
 arch/powerpc/platforms/pseries/eeh_event.c  |  4 +--
 arch/powerpc/platforms/pseries/firmware.c   |  2 +-
 arch/powerpc/platforms/pseries/iommu.c      | 13 ++++-----
 arch/powerpc/platforms/pseries/lpar.c       | 10 +++----
 arch/powerpc/platforms/pseries/nvram.c      |  5 ++--
 arch/powerpc/platforms/pseries/pci.c        |  2 +-
 arch/powerpc/platforms/pseries/ras.c        |  4 +--
 arch/powerpc/platforms/pseries/rtasd.c      |  4 +--
 arch/powerpc/platforms/pseries/setup.c      | 12 ++++----
 arch/powerpc/platforms/pseries/smp.c        |  8 +++---
 arch/powerpc/platforms/pseries/xics.c       | 22 +++++++-------
 drivers/char/hvc_vio.c                      |  4 +--
 drivers/char/hvsi.c                         |  7 ++---
 drivers/pci/hotplug/rpaphp_core.c           | 32 ++++++++++-----------
 16 files changed, 72 insertions(+), 73 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 32eaddfa5470a..5a23ce5e16ff1 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -691,11 +691,11 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
 {
 	struct eeh_early_enable_info *info = data;
 	int ret;
-	char *status = get_property(dn, "status", NULL);
-	u32 *class_code = (u32 *)get_property(dn, "class-code", NULL);
-	u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL);
-	u32 *device_id = (u32 *)get_property(dn, "device-id", NULL);
-	u32 *regs;
+	const char *status = get_property(dn, "status", NULL);
+	const u32 *class_code = get_property(dn, "class-code", NULL);
+	const u32 *vendor_id = get_property(dn, "vendor-id", NULL);
+	const u32 *device_id = get_property(dn, "device-id", NULL);
+	const u32 *regs;
 	int enable;
 	struct pci_dn *pdn = PCI_DN(dn);
 
@@ -737,7 +737,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
 
 	/* Ok... see if this device supports EEH.  Some do, some don't,
 	 * and the only way to find out is to check each and every one. */
-	regs = (u32 *)get_property(dn, "reg", NULL);
+	regs = get_property(dn, "reg", NULL);
 	if (regs) {
 		/* First register entry is addr (00BBSS00)  */
 		/* Try to enable eeh */
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index aaad2c0afcbf9..3269d2cd428bc 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -268,14 +268,14 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 
 	if (!frozen_dn) {
 
-		location = (char *) get_property(event->dn, "ibm,loc-code", NULL);
+		location = get_property(event->dn, "ibm,loc-code", NULL);
 		location = location ? location : "unknown";
 		printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
 		                "for location=%s pci addr=%s\n",
 		        location, pci_name(event->dev));
 		return NULL;
 	}
-	location = (char *) get_property(frozen_dn, "ibm,loc-code", NULL);
+	location = get_property(frozen_dn, "ibm,loc-code", NULL);
 	location = location ? location : "unknown";
 
 	/* There are two different styles for coming up with the PE.
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index 45ccc687e57cb..137077451316b 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -124,11 +124,11 @@ int eeh_send_failure_event (struct device_node *dn,
 {
 	unsigned long flags;
 	struct eeh_event *event;
-	char *location;
+	const char *location;
 
 	if (!mem_init_done) {
 		printk(KERN_ERR "EEH: event during early boot not handled\n");
-		location = (char *) get_property(dn, "ibm,loc-code", NULL);
+		location = get_property(dn, "ibm,loc-code", NULL);
 		printk(KERN_ERR "EEH: device node = %s\n", dn->full_name);
 		printk(KERN_ERR "EEH: PCI location = %s\n", location);
 		return 1;
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index c01d8f0cbe6d0..1c7b2baa5f73c 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -68,7 +68,7 @@ firmware_features_table[FIRMWARE_MAX_FEATURES] = {
 void __init fw_feature_init(void)
 {
 	struct device_node *dn;
-	char *hypertas, *s;
+	const char *hypertas, *s;
 	int len, i;
 
 	DBG(" -> fw_feature_init()\n");
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index d67af2c657544..bbf2e34dc3582 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -267,13 +267,12 @@ static void iommu_table_setparms(struct pci_controller *phb,
 				 struct iommu_table *tbl)
 {
 	struct device_node *node;
-	unsigned long *basep;
-	unsigned int *sizep;
+	const unsigned long *basep, *sizep;
 
 	node = (struct device_node *)phb->arch_data;
 
-	basep = (unsigned long *)get_property(node, "linux,tce-base", NULL);
-	sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL);
+	basep = get_property(node, "linux,tce-base", NULL);
+	sizep = get_property(node, "linux,tce-size", NULL);
 	if (basep == NULL || sizep == NULL) {
 		printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has "
 				"missing tce entries !\n", dn->full_name);
@@ -315,7 +314,7 @@ static void iommu_table_setparms(struct pci_controller *phb,
 static void iommu_table_setparms_lpar(struct pci_controller *phb,
 				      struct device_node *dn,
 				      struct iommu_table *tbl,
-				      unsigned char *dma_window)
+				      const void *dma_window)
 {
 	unsigned long offset, size;
 
@@ -415,7 +414,7 @@ static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus)
 	struct iommu_table *tbl;
 	struct device_node *dn, *pdn;
 	struct pci_dn *ppci;
-	unsigned char *dma_window = NULL;
+	const void *dma_window = NULL;
 
 	DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self);
 
@@ -519,7 +518,7 @@ static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev)
 {
 	struct device_node *pdn, *dn;
 	struct iommu_table *tbl;
-	unsigned char *dma_window = NULL;
+	const void *dma_window = NULL;
 	struct pci_dn *pci;
 
 	DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev));
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 3aeb406990420..4cb7ff227f72e 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -204,20 +204,20 @@ void __init udbg_init_debug_lpar(void)
 void __init find_udbg_vterm(void)
 {
 	struct device_node *stdout_node;
-	u32 *termno;
-	char *name;
+	const u32 *termno;
+	const char *name;
 	int add_console;
 
 	/* find the boot console from /chosen/stdout */
 	if (!of_chosen)
 		return;
-	name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
+	name = get_property(of_chosen, "linux,stdout-path", NULL);
 	if (name == NULL)
 		return;
 	stdout_node = of_find_node_by_path(name);
 	if (!stdout_node)
 		return;
-	name = (char *)get_property(stdout_node, "name", NULL);
+	name = get_property(stdout_node, "name", NULL);
 	if (!name) {
 		printk(KERN_WARNING "stdout node missing 'name' property!\n");
 		goto out;
@@ -228,7 +228,7 @@ void __init find_udbg_vterm(void)
 	/* Check if it's a virtual terminal */
 	if (strncmp(name, "vty", 3) != 0)
 		goto out;
-	termno = (u32 *)get_property(stdout_node, "reg", NULL);
+	termno = get_property(stdout_node, "reg", NULL);
 	if (termno == NULL)
 		goto out;
 	vtermno = termno[0];
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 18abfb1f4e248..64163cecdf93f 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -123,13 +123,14 @@ static ssize_t pSeries_nvram_get_size(void)
 int __init pSeries_nvram_init(void)
 {
 	struct device_node *nvram;
-	unsigned int *nbytes_p, proplen;
+	const unsigned int *nbytes_p;
+	unsigned int proplen;
 
 	nvram = of_find_node_by_type(NULL, "nvram");
 	if (nvram == NULL)
 		return -ENODEV;
 
-	nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen);
+	nbytes_p = get_property(nvram, "#bytes", &proplen);
 	if (nbytes_p == NULL || proplen != sizeof(unsigned int))
 		return -EIO;
 
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index e97e67f5e0791..410a6bcc4ca00 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -60,7 +60,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device);
 static void __devinit check_s7a(void)
 {
 	struct device_node *root;
-	char *model;
+	const char *model;
 
 	s7a_workaround = 0;
 	root = of_find_node_by_path("/");
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 9df783088b618..0e6339ee45a15 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -79,7 +79,7 @@ static void request_ras_irqs(struct device_node *np,
 {
 	int i, index, count = 0;
 	struct of_irq oirq;
-	u32 *opicprop;
+	const u32 *opicprop;
 	unsigned int opicplen;
 	unsigned int virqs[16];
 
@@ -87,7 +87,7 @@ static void request_ras_irqs(struct device_node *np,
 	 * map those interrupts using the default interrupt host and default
 	 * trigger
 	 */
-	opicprop = (u32 *)get_property(np, "open-pic-interrupt", &opicplen);
+	opicprop = get_property(np, "open-pic-interrupt", &opicplen);
 	if (opicprop) {
 		opicplen /= sizeof(u32);
 		for (i = 0; i < opicplen; i++) {
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index 2e4e04042d857..8ca2612221d65 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -359,11 +359,11 @@ static int enable_surveillance(int timeout)
 static int get_eventscan_parms(void)
 {
 	struct device_node *node;
-	int *ip;
+	const int *ip;
 
 	node = of_find_node_by_path("/rtas");
 
-	ip = (int *)get_property(node, "rtas-event-scan-rate", NULL);
+	ip = get_property(node, "rtas-event-scan-rate", NULL);
 	if (ip == NULL) {
 		printk(KERN_ERR "rtasd: no rtas-event-scan-rate\n");
 		of_node_put(node);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 54a52437265c5..927e0a423b87a 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -133,9 +133,9 @@ void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc,
 static void __init pseries_mpic_init_IRQ(void)
 {
 	struct device_node *np, *old, *cascade = NULL;
-        unsigned int *addrp;
+        const unsigned int *addrp;
 	unsigned long intack = 0;
-	unsigned int *opprop;
+	const unsigned int *opprop;
 	unsigned long openpic_addr = 0;
 	unsigned int cascade_irq;
 	int naddr, n, i, opplen;
@@ -143,7 +143,7 @@ static void __init pseries_mpic_init_IRQ(void)
 
 	np = of_find_node_by_path("/");
 	naddr = prom_n_addr_cells(np);
-	opprop = (unsigned int *) get_property(np, "platform-open-pic", &opplen);
+	opprop = get_property(np, "platform-open-pic", &opplen);
 	if (opprop != 0) {
 		openpic_addr = of_read_number(opprop, naddr);
 		printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
@@ -192,7 +192,7 @@ static void __init pseries_mpic_init_IRQ(void)
 			break;
 		if (strcmp(np->name, "pci") != 0)
 			continue;
-		addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge",
+		addrp = get_property(np, "8259-interrupt-acknowledge",
 					    NULL);
 		if (addrp == NULL)
 			continue;
@@ -249,11 +249,11 @@ static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary)
 static void __init pseries_discover_pic(void)
 {
 	struct device_node *np;
-	char *typep;
+	const char *typep;
 
 	for (np = NULL; (np = of_find_node_by_name(np,
 						   "interrupt-controller"));) {
-		typep = (char *)get_property(np, "compatible", NULL);
+		typep = get_property(np, "compatible", NULL);
 		if (strstr(typep, "open-pic")) {
 			pSeries_mpic_node = of_node_get(np);
 			ppc_md.init_IRQ       = pseries_mpic_init_IRQ;
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index ac61098ff401c..f39dad8b99e0d 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -145,9 +145,9 @@ static int pSeries_add_processor(struct device_node *np)
 	unsigned int cpu;
 	cpumask_t candidate_map, tmp = CPU_MASK_NONE;
 	int err = -ENOSPC, len, nthreads, i;
-	u32 *intserv;
+	const u32 *intserv;
 
-	intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len);
+	intserv = get_property(np, "ibm,ppc-interrupt-server#s", &len);
 	if (!intserv)
 		return 0;
 
@@ -205,9 +205,9 @@ static void pSeries_remove_processor(struct device_node *np)
 {
 	unsigned int cpu;
 	int len, nthreads, i;
-	u32 *intserv;
+	const u32 *intserv;
 
-	intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len);
+	intserv = get_property(np, "ibm,ppc-interrupt-server#s", &len);
 	if (!intserv)
 		return;
 
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 716972aa97772..7564210494414 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -604,14 +604,14 @@ static void __init xics_init_one_node(struct device_node *np,
 				      unsigned int *indx)
 {
 	unsigned int ilen;
-	u32 *ireg;
+	const u32 *ireg;
 
 	/* This code does the theorically broken assumption that the interrupt
 	 * server numbers are the same as the hard CPU numbers.
 	 * This happens to be the case so far but we are playing with fire...
 	 * should be fixed one of these days. -BenH.
 	 */
-	ireg = (u32 *)get_property(np, "ibm,interrupt-server-ranges", NULL);
+	ireg = get_property(np, "ibm,interrupt-server-ranges", NULL);
 
 	/* Do that ever happen ? we'll know soon enough... but even good'old
 	 * f80 does have that property ..
@@ -623,7 +623,7 @@ static void __init xics_init_one_node(struct device_node *np,
 		 */
 		*indx = *ireg;
 	}
-	ireg = (u32 *)get_property(np, "reg", &ilen);
+	ireg = get_property(np, "reg", &ilen);
 	if (!ireg)
 		panic("xics_init_IRQ: can't find interrupt reg property");
 
@@ -649,7 +649,7 @@ static void __init xics_setup_8259_cascade(void)
 {
 	struct device_node *np, *old, *found = NULL;
 	int cascade, naddr;
-	u32 *addrp;
+	const u32 *addrp;
 	unsigned long intack = 0;
 
 	for_each_node_by_type(np, "interrupt-controller")
@@ -675,7 +675,7 @@ static void __init xics_setup_8259_cascade(void)
 			break;
 		if (strcmp(np->name, "pci") != 0)
 			continue;
-		addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge", NULL);
+		addrp = get_property(np, "8259-interrupt-acknowledge", NULL);
 		if (addrp == NULL)
 			continue;
 		naddr = prom_n_addr_cells(np);
@@ -694,7 +694,8 @@ void __init xics_init_IRQ(void)
 {
 	int i;
 	struct device_node *np;
-	u32 *ireg, ilen, indx = 0;
+	u32 ilen, indx = 0;
+	const u32 *ireg;
 	int found = 0;
 
 	ppc64_boot_msg(0x20, "XICS Init");
@@ -719,18 +720,17 @@ void __init xics_init_IRQ(void)
 	for (np = of_find_node_by_type(NULL, "cpu");
 	     np;
 	     np = of_find_node_by_type(np, "cpu")) {
-		ireg = (u32 *)get_property(np, "reg", &ilen);
+		ireg = get_property(np, "reg", &ilen);
 		if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) {
-			ireg = (u32 *)get_property(np,
-						  "ibm,ppc-interrupt-gserver#s",
-						   &ilen);
+			ireg = get_property(np,
+					"ibm,ppc-interrupt-gserver#s", &ilen);
 			i = ilen / sizeof(int);
 			if (ireg && i > 0) {
 				default_server = ireg[0];
 				/* take last element */
 				default_distrib_server = ireg[i-1];
 			}
-			ireg = (u32 *)get_property(np,
+			ireg = get_property(np,
 					"ibm,interrupt-server#-size", NULL);
 			if (ireg)
 				interrupt_server_size = *ireg;
diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c
index 651e5d25f58b6..cc95941148fbf 100644
--- a/drivers/char/hvc_vio.c
+++ b/drivers/char/hvc_vio.c
@@ -141,7 +141,7 @@ static int hvc_find_vtys(void)
 
 	for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL;
 			vty = of_find_node_by_name(vty, "vty")) {
-		uint32_t *vtermno;
+		const uint32_t *vtermno;
 
 		/* We have statically defined space for only a certain number
 		 * of console adapters.
@@ -149,7 +149,7 @@ static int hvc_find_vtys(void)
 		if (num_found >= MAX_NR_HVC_CONSOLES)
 			break;
 
-		vtermno = (uint32_t *)get_property(vty, "reg", NULL);
+		vtermno = get_property(vty, "reg", NULL);
 		if (!vtermno)
 			continue;
 
diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c
index 56612a2dca6be..542de0e51f353 100644
--- a/drivers/char/hvsi.c
+++ b/drivers/char/hvsi.c
@@ -1276,11 +1276,10 @@ static int __init hvsi_console_init(void)
 			vty != NULL;
 			vty = of_find_compatible_node(vty, "serial", "hvterm-protocol")) {
 		struct hvsi_struct *hp;
-		uint32_t *vtermno;
-		uint32_t *irq;
+		const uint32_t *vtermno, *irq;
 
-		vtermno = (uint32_t *)get_property(vty, "reg", NULL);
-		irq = (uint32_t *)get_property(vty, "interrupts", NULL);
+		vtermno = get_property(vty, "reg", NULL);
+		irq = get_property(vty, "interrupts", NULL);
 		if (!vtermno || !irq)
 			continue;
 
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 076bd6dcafae6..7288a3eccfb3e 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -176,16 +176,16 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
 	return 0;
 }
 
-static int get_children_props(struct device_node *dn, int **drc_indexes,
-		int **drc_names, int **drc_types, int **drc_power_domains)
+static int get_children_props(struct device_node *dn, const int **drc_indexes,
+		const int **drc_names, const int **drc_types,
+		const int **drc_power_domains)
 {
-	int *indexes, *names;
-	int *types, *domains;
+	const int *indexes, *names, *types, *domains;
 
-	indexes = (int *) get_property(dn, "ibm,drc-indexes", NULL);
-	names = (int *) get_property(dn, "ibm,drc-names", NULL);
-	types = (int *) get_property(dn, "ibm,drc-types", NULL);
-	domains = (int *) get_property(dn, "ibm,drc-power-domains", NULL);
+	indexes = get_property(dn, "ibm,drc-indexes", NULL);
+	names = get_property(dn, "ibm,drc-names", NULL);
+	types = get_property(dn, "ibm,drc-types", NULL);
+	domains = get_property(dn, "ibm,drc-power-domains", NULL);
 
 	if (!indexes || !names || !types || !domains) {
 		/* Slot does not have dynamically-removable children */
@@ -212,13 +212,13 @@ static int get_children_props(struct device_node *dn, int **drc_indexes,
 int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
 		char **drc_name, char **drc_type, int *drc_power_domain)
 {
-	int *indexes, *names;
-	int *types, *domains;
-	unsigned int *my_index;
+	const int *indexes, *names;
+	const int *types, *domains;
+	const unsigned int *my_index;
 	char *name_tmp, *type_tmp;
 	int i, rc;
 
-	my_index = (int *) get_property(dn, "ibm,my-drc-index", NULL);
+	my_index = get_property(dn, "ibm,my-drc-index", NULL);
 	if (!my_index) {
 		/* Node isn't DLPAR/hotplug capable */
 		return -EINVAL;
@@ -265,10 +265,10 @@ static int is_php_type(char *drc_type)
 	return 1;
 }
 
-static int is_php_dn(struct device_node *dn, int **indexes, int **names,
-		int **types, int **power_domains)
+static int is_php_dn(struct device_node *dn, const int **indexes,
+		const int **names, const int **types, const int **power_domains)
 {
-	int *drc_types;
+	const int *drc_types;
 	int rc;
 
 	rc = get_children_props(dn, indexes, names, &drc_types, power_domains);
@@ -296,7 +296,7 @@ int rpaphp_add_slot(struct device_node *dn)
 	struct slot *slot;
 	int retval = 0;
 	int i;
-	int *indexes, *names, *types, *power_domains;
+	const int *indexes, *names, *types, *power_domains;
 	char *name, *type;
 
 	dbg("Entry %s: dn->full_name=%s\n", __FUNCTION__, dn->full_name);
-- 
GitLab


From c4c7cba90cf9f180a2c45f7e54143f786360f3dd Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:39:42 +1000
Subject: [PATCH 0046/1063] [POWERPC] iseries: Constify & voidify
 get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

iseries platform changes.

Built for iseries_defconfig

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/iseries/iommu.c | 2 +-
 arch/powerpc/platforms/iseries/pci.c   | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index 2c3dbcd4613cb..f4cbbcf8773a7 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -179,7 +179,7 @@ void iommu_devnode_init_iSeries(struct device_node *dn)
 {
 	struct iommu_table *tbl;
 	struct pci_dn *pdn = PCI_DN(dn);
-	u32 *lsn = (u32 *)get_property(dn, "linux,logical-slot-number", NULL);
+	const u32 *lsn = get_property(dn, "linux,logical-slot-number", NULL);
 
 	BUG_ON(lsn == NULL);
 
diff --git a/arch/powerpc/platforms/iseries/pci.c b/arch/powerpc/platforms/iseries/pci.c
index 35bcc98111f5b..f4d427a7bb2d6 100644
--- a/arch/powerpc/platforms/iseries/pci.c
+++ b/arch/powerpc/platforms/iseries/pci.c
@@ -176,12 +176,12 @@ void iSeries_pcibios_init(void)
 	}
 	while ((node = of_get_next_child(root, node)) != NULL) {
 		HvBusNumber bus;
-		u32 *busp;
+		const u32 *busp;
 
 		if ((node->type == NULL) || (strcmp(node->type, "pci") != 0))
 			continue;
 
-		busp = (u32 *)get_property(node, "bus-range", NULL);
+		busp = get_property(node, "bus-range", NULL);
 		if (busp == NULL)
 			continue;
 		bus = *busp;
@@ -221,10 +221,9 @@ void __init iSeries_pci_final_fixup(void)
 
 		if (node != NULL) {
 			struct pci_dn *pdn = PCI_DN(node);
-			u32 *agent;
+			const u32 *agent;
 
-			agent = (u32 *)get_property(node, "linux,agent-id",
-					NULL);
+			agent = get_property(node, "linux,agent-id", NULL);
 			if ((pdn != NULL) && (agent != NULL)) {
 				u8 irq = iSeries_allocate_IRQ(pdn->busno, 0,
 						pdn->bussubno);
-- 
GitLab


From 8efca49329a50710d656a8bb78d6f0f0e2f48a26 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:39:42 +1000
Subject: [PATCH 0047/1063] [POWERPC] mpc: Constify & voidify get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

mpc* platform changes.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/83xx/mpc834x_itx.c         | 4 ++--
 arch/powerpc/platforms/83xx/mpc834x_sys.c         | 4 ++--
 arch/powerpc/platforms/83xx/pci.c                 | 4 ++--
 arch/powerpc/platforms/85xx/mpc85xx_ads.c         | 4 ++--
 arch/powerpc/platforms/85xx/mpc85xx_cds.c         | 4 ++--
 arch/powerpc/platforms/85xx/pci.c                 | 4 ++--
 arch/powerpc/platforms/86xx/mpc86xx_hpcn.c        | 4 ++--
 arch/powerpc/platforms/86xx/pci.c                 | 4 ++--
 arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 8 ++++----
 9 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c
index b46305645d381..d9675f9b9766f 100644
--- a/arch/powerpc/platforms/83xx/mpc834x_itx.c
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c
@@ -80,8 +80,8 @@ static void __init mpc834x_itx_setup_arch(void)
 
 	np = of_find_node_by_type(NULL, "cpu");
 	if (np != 0) {
-		unsigned int *fp =
-		    (int *)get_property(np, "clock-frequency", NULL);
+		const unsigned int *fp =
+			get_property(np, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
diff --git a/arch/powerpc/platforms/83xx/mpc834x_sys.c b/arch/powerpc/platforms/83xx/mpc834x_sys.c
index 3e1c16eb4a639..5eadf9d035f13 100644
--- a/arch/powerpc/platforms/83xx/mpc834x_sys.c
+++ b/arch/powerpc/platforms/83xx/mpc834x_sys.c
@@ -84,8 +84,8 @@ static void __init mpc834x_sys_setup_arch(void)
 
 	np = of_find_node_by_type(NULL, "cpu");
 	if (np != 0) {
-		unsigned int *fp =
-		    (int *)get_property(np, "clock-frequency", NULL);
+		const unsigned int *fp =
+			get_property(np, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
diff --git a/arch/powerpc/platforms/83xx/pci.c b/arch/powerpc/platforms/83xx/pci.c
index 3b5e563c279f9..9c3650555144d 100644
--- a/arch/powerpc/platforms/83xx/pci.c
+++ b/arch/powerpc/platforms/83xx/pci.c
@@ -50,7 +50,7 @@ int __init add_bridge(struct device_node *dev)
 	int len;
 	struct pci_controller *hose;
 	struct resource rsrc;
-	int *bus_range;
+	const int *bus_range;
 	int primary = 1, has_address = 0;
 	phys_addr_t immr = get_immrbase();
 
@@ -60,7 +60,7 @@ int __init add_bridge(struct device_node *dev)
 	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
 
 	/* Get bus range if any */
-	bus_range = (int *)get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, assume"
 		       " bus 0\n", dev->full_name);
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
index 06a497676c992..d0cfcdb1d1b5a 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
@@ -172,9 +172,9 @@ static void __init mpc85xx_ads_setup_arch(void)
 
 	cpu = of_find_node_by_type(NULL, "cpu");
 	if (cpu != 0) {
-		unsigned int *fp;
+		const unsigned int *fp;
 
-		fp = (int *)get_property(cpu, "clock-frequency", NULL);
+		fp = get_property(cpu, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 18e6e11f70202..5fd53eba6912d 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -273,9 +273,9 @@ mpc85xx_cds_setup_arch(void)
 
 	cpu = of_find_node_by_type(NULL, "cpu");
 	if (cpu != 0) {
-		unsigned int *fp;
+		const unsigned int *fp;
 
-		fp = (int *)get_property(cpu, "clock-frequency", NULL);
+		fp = get_property(cpu, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
diff --git a/arch/powerpc/platforms/85xx/pci.c b/arch/powerpc/platforms/85xx/pci.c
index 1d51f3242ab1b..05930eeb6e7f8 100644
--- a/arch/powerpc/platforms/85xx/pci.c
+++ b/arch/powerpc/platforms/85xx/pci.c
@@ -41,7 +41,7 @@ int __init add_bridge(struct device_node *dev)
 	int len;
 	struct pci_controller *hose;
 	struct resource rsrc;
-	int *bus_range;
+	const int *bus_range;
 	int primary = 1, has_address = 0;
 	phys_addr_t immr = get_immrbase();
 
@@ -51,7 +51,7 @@ int __init add_bridge(struct device_node *dev)
 	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
 
 	/* Get bus range if any */
-	bus_range = (int *) get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, assume"
 		       " bus 0\n", dev->full_name);
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index ebae73eb00630..839090682ab21 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -323,9 +323,9 @@ mpc86xx_hpcn_setup_arch(void)
 
 	np = of_find_node_by_type(NULL, "cpu");
 	if (np != 0) {
-		unsigned int *fp;
+		const unsigned int *fp;
 
-		fp = (int *)get_property(np, "clock-frequency", NULL);
+		fp = get_property(np, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
diff --git a/arch/powerpc/platforms/86xx/pci.c b/arch/powerpc/platforms/86xx/pci.c
index bc5139043112e..d7050c1108ff1 100644
--- a/arch/powerpc/platforms/86xx/pci.c
+++ b/arch/powerpc/platforms/86xx/pci.c
@@ -153,7 +153,7 @@ int __init add_bridge(struct device_node *dev)
 	int len;
 	struct pci_controller *hose;
 	struct resource rsrc;
-	int *bus_range;
+	const int *bus_range;
 	int has_address = 0;
 	int primary = 0;
 
@@ -163,7 +163,7 @@ int __init add_bridge(struct device_node *dev)
 	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
 
 	/* Get bus range if any */
-	bus_range = (int *) get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int))
 		printk(KERN_WARNING "Can't get bus-range for %s, assume"
 		       " bus 0\n", dev->full_name);
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index d7a4fc7ca238a..69c998cb4f1b9 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -130,7 +130,7 @@ void mpc7448_hpc2_fixup_irq(struct pci_dev *dev)
 {
 	struct pci_controller *hose;
 	struct device_node *node;
-	unsigned int *interrupt;
+	const unsigned int *interrupt;
 	int busnr;
 	int len;
 	u8 slot;
@@ -147,7 +147,7 @@ void mpc7448_hpc2_fixup_irq(struct pci_dev *dev)
 	if (!node)
 		printk(KERN_ERR "No pci node found\n");
 
-	interrupt = (unsigned int *) get_property(node, "interrupt-map", &len);
+	interrupt = get_property(node, "interrupt-map", &len);
 	slot = find_slot_by_devfn(interrupt, dev->devfn);
 	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
 	if (pin == 0 || pin > 4)
@@ -176,9 +176,9 @@ static void __init mpc7448_hpc2_setup_arch(void)
 
 	cpu = of_find_node_by_type(NULL, "cpu");
 	if (cpu != 0) {
-		unsigned int *fp;
+		const unsigned int *fp;
 
-		fp = (int *)get_property(cpu, "clock-frequency", NULL);
+		fp = get_property(cpu, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
-- 
GitLab


From c61c27d58af61e5b78257019b173732c29ce0c64 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:39:54 +1000
Subject: [PATCH 0048/1063] [POWERPC] cell: Constify & voidify get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

cell platform changes.

Built for cell_defconfig

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/cbe_regs.c   |  8 +++-----
 arch/powerpc/platforms/cell/interrupt.c  |  5 ++---
 arch/powerpc/platforms/cell/iommu.c      | 22 ++++++++++++----------
 arch/powerpc/platforms/cell/spider-pic.c | 10 +++++-----
 arch/powerpc/platforms/cell/spu_base.c   | 18 +++++++++---------
 include/asm-powerpc/spu.h                |  2 +-
 6 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index ce696c1cca75b..3f3859d12e003 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -97,7 +97,7 @@ void __init cbe_regs_init(void)
 		struct cbe_regs_map *map = &cbe_regs_maps[cbe_regs_map_count++];
 
 		/* That hack must die die die ! */
-		struct address_prop {
+		const struct address_prop {
 			unsigned long address;
 			unsigned int len;
 		} __attribute__((packed)) *prop;
@@ -114,13 +114,11 @@ void __init cbe_regs_init(void)
 			if (cbe_thread_map[i].cpu_node == cpu)
 				cbe_thread_map[i].regs = map;
 
-		prop = (struct address_prop *)get_property(cpu, "pervasive",
-							   NULL);
+		prop = get_property(cpu, "pervasive", NULL);
 		if (prop != NULL)
 			map->pmd_regs = ioremap(prop->address, prop->len);
 
-		prop = (struct address_prop *)get_property(cpu, "iic",
-							   NULL);
+		prop = get_property(cpu, "iic", NULL);
 		if (prop != NULL)
 			map->iic_regs = ioremap(prop->address, prop->len);
 	}
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 9d5da78968927..b26b496f65487 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -250,15 +250,14 @@ static int __init setup_iic(void)
 	struct resource r0, r1;
 	struct irq_host *host;
 	int found = 0;
- 	u32 *np;
+ 	const u32 *np;
 
 	for (dn = NULL;
 	     (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
 		if (!device_is_compatible(dn,
 				     "IBM,CBEA-Internal-Interrupt-Controller"))
 			continue;
- 		np = (u32 *)get_property(dn, "ibm,interrupt-server-ranges",
-					 NULL);
+		np = get_property(dn, "ibm,interrupt-server-ranges", NULL);
  		if (np == NULL) {
 			printk(KERN_WARNING "IIC: CPU association not found\n");
 			of_node_put(dn);
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index a35004e14c699..d2b20eba5b872 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -308,15 +308,16 @@ static void cell_do_map_iommu(struct cell_iommu *iommu,
 
 static void iommu_devnode_setup(struct device_node *d)
 {
-	unsigned int *ioid;
-	unsigned long *dma_window, map_start, map_size, token;
+	const unsigned int *ioid;
+	unsigned long map_start, map_size, token;
+	const unsigned long *dma_window;
 	struct cell_iommu *iommu;
 
-	ioid = (unsigned int *)get_property(d, "ioid", NULL);
+	ioid = get_property(d, "ioid", NULL);
 	if (!ioid)
 		pr_debug("No ioid entry found !\n");
 
-	dma_window = (unsigned long *)get_property(d, "ibm,dma-window", NULL);
+	dma_window = get_property(d, "ibm,dma-window", NULL);
 	if (!dma_window)
 		pr_debug("No ibm,dma-window entry found !\n");
 
@@ -371,8 +372,9 @@ static int cell_map_iommu_hardcoded(int num_nodes)
 
 static int cell_map_iommu(void)
 {
-	unsigned int num_nodes = 0, *node_id;
-	unsigned long *base, *mmio_base;
+	unsigned int num_nodes = 0;
+	const unsigned int *node_id;
+	const unsigned long *base, *mmio_base;
 	struct device_node *dn;
 	struct cell_iommu *iommu = NULL;
 
@@ -381,7 +383,7 @@ static int cell_map_iommu(void)
 	for(dn = of_find_node_by_type(NULL, "cpu");
 	    dn;
 	    dn = of_find_node_by_type(dn, "cpu")) {
-		node_id = (unsigned int *)get_property(dn, "node-id", NULL);
+		node_id = get_property(dn, "node-id", NULL);
 
 		if (num_nodes < *node_id)
 			num_nodes = *node_id;
@@ -396,9 +398,9 @@ static int cell_map_iommu(void)
 	    dn;
 	    dn = of_find_node_by_type(dn, "cpu")) {
 
-		node_id = (unsigned int *)get_property(dn, "node-id", NULL);
-		base = (unsigned long *)get_property(dn, "ioc-cache", NULL);
-		mmio_base = (unsigned long *)get_property(dn, "ioc-translation", NULL);
+		node_id = get_property(dn, "node-id", NULL);
+		base = get_property(dn, "ioc-cache", NULL);
+		mmio_base = get_property(dn, "ioc-translation", NULL);
 
 		if (!base || !mmio_base || !node_id)
 			return cell_map_iommu_hardcoded(num_nodes);
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index ae7ef88f1a372..ab4c252a4d9b7 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -230,7 +230,7 @@ static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc,
 static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
 {
 	unsigned int virq;
-	u32 *imap, *tmp;
+	const u32 *imap, *tmp;
 	int imaplen, intsize, unit;
 	struct device_node *iic;
 	struct irq_host *iic_host;
@@ -248,25 +248,25 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
 #endif
 
 	/* Now do the horrible hacks */
-	tmp = (u32 *)get_property(pic->of_node, "#interrupt-cells", NULL);
+	tmp = get_property(pic->of_node, "#interrupt-cells", NULL);
 	if (tmp == NULL)
 		return NO_IRQ;
 	intsize = *tmp;
-	imap = (u32 *)get_property(pic->of_node, "interrupt-map", &imaplen);
+	imap = get_property(pic->of_node, "interrupt-map", &imaplen);
 	if (imap == NULL || imaplen < (intsize + 1))
 		return NO_IRQ;
 	iic = of_find_node_by_phandle(imap[intsize]);
 	if (iic == NULL)
 		return NO_IRQ;
 	imap += intsize + 1;
-	tmp = (u32 *)get_property(iic, "#interrupt-cells", NULL);
+	tmp = get_property(iic, "#interrupt-cells", NULL);
 	if (tmp == NULL)
 		return NO_IRQ;
 	intsize = *tmp;
 	/* Assume unit is last entry of interrupt specifier */
 	unit = imap[intsize - 1];
 	/* Ok, we have a unit, now let's try to get the node */
-	tmp = (u32 *)get_property(iic, "ibm,interrupt-server-ranges", NULL);
+	tmp = get_property(iic, "ibm,interrupt-server-ranges", NULL);
 	if (tmp == NULL) {
 		of_node_put(iic);
 		return NO_IRQ;
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 5d2313a6c82bf..86d55675e1d2b 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -488,10 +488,10 @@ int spu_irq_class_1_bottom(struct spu *spu)
 
 static int __init find_spu_node_id(struct device_node *spe)
 {
-	unsigned int *id;
+	const unsigned int *id;
 	struct device_node *cpu;
 	cpu = spe->parent->parent;
-	id = (unsigned int *)get_property(cpu, "node-id", NULL);
+	id = get_property(cpu, "node-id", NULL);
 	return id ? *id : 0;
 }
 
@@ -500,7 +500,7 @@ static int __init cell_spuprop_present(struct spu *spu, struct device_node *spe,
 {
 	static DEFINE_MUTEX(add_spumem_mutex);
 
-	struct address_prop {
+	const struct address_prop {
 		unsigned long address;
 		unsigned int len;
 	} __attribute__((packed)) *p;
@@ -511,7 +511,7 @@ static int __init cell_spuprop_present(struct spu *spu, struct device_node *spe,
 	struct zone *zone;
 	int ret;
 
-	p = (void*)get_property(spe, prop, &proplen);
+	p = get_property(spe, prop, &proplen);
 	WARN_ON(proplen != sizeof (*p));
 
 	start_pfn = p->address >> PAGE_SHIFT;
@@ -531,12 +531,12 @@ static int __init cell_spuprop_present(struct spu *spu, struct device_node *spe,
 static void __iomem * __init map_spe_prop(struct spu *spu,
 		struct device_node *n, const char *name)
 {
-	struct address_prop {
+	const struct address_prop {
 		unsigned long address;
 		unsigned int len;
 	} __attribute__((packed)) *prop;
 
-	void *p;
+	const void *p;
 	int proplen;
 	void* ret = NULL;
 	int err = 0;
@@ -570,14 +570,14 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
 {
 	struct irq_host *host;
 	unsigned int isrc;
-	u32 *tmp;
+	const u32 *tmp;
 
 	host = iic_get_irq_host(spu->node);
 	if (host == NULL)
 		return -ENODEV;
 
 	/* Get the interrupt source from the device-tree */
-	tmp = (u32 *)get_property(np, "isrc", NULL);
+	tmp = get_property(np, "isrc", NULL);
 	if (!tmp)
 		return -ENODEV;
 	spu->isrc = isrc = tmp[0];
@@ -593,7 +593,7 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
 
 static int __init spu_map_device(struct spu *spu, struct device_node *node)
 {
-	char *prop;
+	const char *prop;
 	int ret;
 
 	ret = -ENODEV;
diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h
index c02d105d8294d..b42b53c40f5dc 100644
--- a/include/asm-powerpc/spu.h
+++ b/include/asm-powerpc/spu.h
@@ -106,7 +106,7 @@ struct spu_context;
 struct spu_runqueue;
 
 struct spu {
-	char *name;
+	const char *name;
 	unsigned long local_store_phys;
 	u8 *local_store;
 	unsigned long problem_phys;
-- 
GitLab


From ae6b4101e53dcf8a41f3432dacca9d3eb34e9cc3 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:40:05 +1000
Subject: [PATCH 0049/1063] [POWERPC] chrp: Constify & voidify get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

chrp platform changes.

Built for chrp32_defconfig

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/chrp/nvram.c |  5 +++--
 arch/powerpc/platforms/chrp/pci.c   | 11 +++++------
 arch/powerpc/platforms/chrp/setup.c | 21 +++++++++------------
 3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c
index 150f67d6f90cd..0dd4a64757d92 100644
--- a/arch/powerpc/platforms/chrp/nvram.c
+++ b/arch/powerpc/platforms/chrp/nvram.c
@@ -67,13 +67,14 @@ static void chrp_nvram_write(int addr, unsigned char val)
 void __init chrp_nvram_init(void)
 {
 	struct device_node *nvram;
-	unsigned int *nbytes_p, proplen;
+	const unsigned int *nbytes_p;
+	unsigned int proplen;
 
 	nvram = of_find_node_by_type(NULL, "nvram");
 	if (nvram == NULL)
 		return;
 
-	nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen);
+	nbytes_p = get_property(nvram, "#bytes", &proplen);
 	if (nbytes_p == NULL || proplen != sizeof(unsigned int))
 		return;
 
diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
index 6d7ac649b45e8..0f4340506c758 100644
--- a/arch/powerpc/platforms/chrp/pci.c
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -214,11 +214,11 @@ void __init
 chrp_find_bridges(void)
 {
 	struct device_node *dev;
-	int *bus_range;
+	const int *bus_range;
 	int len, index = -1;
 	struct pci_controller *hose;
-	unsigned int *dma;
-	char *model, *machine;
+	const unsigned int *dma;
+	const char *model, *machine;
 	int is_longtrail = 0, is_mot = 0, is_pegasos = 0;
 	struct device_node *root = find_path_device("/");
 	struct resource r;
@@ -246,7 +246,7 @@ chrp_find_bridges(void)
 			       dev->full_name);
 			continue;
 		}
-		bus_range = (int *) get_property(dev, "bus-range", &len);
+		bus_range = get_property(dev, "bus-range", &len);
 		if (bus_range == NULL || len < 2 * sizeof(int)) {
 			printk(KERN_WARNING "Can't get bus-range for %s\n",
 				dev->full_name);
@@ -312,8 +312,7 @@ chrp_find_bridges(void)
 
 		/* check the first bridge for a property that we can
 		   use to set pci_dram_offset */
-		dma = (unsigned int *)
-			get_property(dev, "ibm,dma-ranges", &len);
+		dma = get_property(dev, "ibm,dma-ranges", &len);
 		if (index == 0 && dma != NULL && len >= 6 * sizeof(*dma)) {
 			pci_dram_offset = dma[2] - dma[3];
 			printk("pci_dram_offset = %lx\n", pci_dram_offset);
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index be39742db809b..488dbd9b51ae1 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -226,8 +226,7 @@ static void __init pegasos_set_l2cr(void)
 	/* Enable L2 cache if needed */
 	np = find_type_devices("cpu");
 	if (np != NULL) {
-		unsigned int *l2cr = (unsigned int *)
-			get_property (np, "l2cr", NULL);
+		const unsigned int *l2cr = get_property(np, "l2cr", NULL);
 		if (l2cr == NULL) {
 			printk ("Pegasos l2cr : no cpu l2cr property found\n");
 			return;
@@ -252,7 +251,7 @@ static void briq_restart(char *cmd)
 void __init chrp_setup_arch(void)
 {
 	struct device_node *root = find_path_device ("/");
-	char *machine = NULL;
+	const char *machine = NULL;
 
 	/* init to some ~sane value until calibrate_delay() runs */
 	loops_per_jiffy = 50000000/HZ;
@@ -353,7 +352,7 @@ static void __init chrp_find_openpic(void)
 	struct device_node *np, *root;
 	int len, i, j;
 	int isu_size, idu_size;
-	unsigned int *iranges, *opprop = NULL;
+	const unsigned int *iranges, *opprop = NULL;
 	int oplen = 0;
 	unsigned long opaddr;
 	int na = 1;
@@ -363,8 +362,7 @@ static void __init chrp_find_openpic(void)
 		return;
 	root = of_find_node_by_path("/");
 	if (root) {
-		opprop = (unsigned int *) get_property
-			(root, "platform-open-pic", &oplen);
+		opprop = get_property(root, "platform-open-pic", &oplen);
 		na = prom_n_addr_cells(root);
 	}
 	if (opprop && oplen >= na * sizeof(unsigned int)) {
@@ -381,7 +379,7 @@ static void __init chrp_find_openpic(void)
 
 	printk(KERN_INFO "OpenPIC at %lx\n", opaddr);
 
-	iranges = (unsigned int *) get_property(np, "interrupt-ranges", &len);
+	iranges = get_property(np, "interrupt-ranges", &len);
 	if (iranges == NULL)
 		len = 0;	/* non-distributed mpic */
 	else
@@ -467,8 +465,8 @@ static void __init chrp_find_8259(void)
 	 * from anyway
 	 */
 	for (np = find_devices("pci"); np != NULL; np = np->next) {
-		unsigned int *addrp = (unsigned int *)
-			get_property(np, "8259-interrupt-acknowledge", NULL);
+		const unsigned int *addrp = get_property(np,
+				"8259-interrupt-acknowledge", NULL);
 
 		if (addrp == NULL)
 			continue;
@@ -527,7 +525,7 @@ void __init
 chrp_init2(void)
 {
 	struct device_node *device;
-	unsigned int *p = NULL;
+	const unsigned int *p = NULL;
 
 #ifdef CONFIG_NVRAM
 	chrp_nvram_init();
@@ -545,8 +543,7 @@ chrp_init2(void)
 	 */
 	device = find_devices("rtas");
 	if (device)
-		p = (unsigned int *) get_property
-			(device, "rtas-event-scan-rate", NULL);
+		p = get_property(device, "rtas-event-scan-rate", NULL);
 	if (p && *p) {
 		/*
 		 * Arrange to call chrp_event_scan at least *p times
-- 
GitLab


From eeb2b723ef5100fafa381d92eb70d83e98516a44 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:40:17 +1000
Subject: [PATCH 0050/1063] [POWERPC] maple: Constify & voidify get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

maple platform changes.

Built for maple_defconfig

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/maple/pci.c   | 20 +++++++++++---------
 arch/powerpc/platforms/maple/setup.c | 27 ++++++++++-----------------
 2 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index 63a1670d3bfd3..dc05af5156a99 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -38,16 +38,16 @@ static struct pci_controller *u3_agp, *u3_ht;
 static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
 {
 	for (; node != 0;node = node->sibling) {
-		int * bus_range;
-		unsigned int *class_code;
+		const int *bus_range;
+		const unsigned int *class_code;
 		int len;
 
 		/* For PCI<->PCI bridges or CardBus bridges, we go down */
-		class_code = (unsigned int *) get_property(node, "class-code", NULL);
+		class_code = get_property(node, "class-code", NULL);
 		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
 			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
 			continue;
-		bus_range = (int *) get_property(node, "bus-range", &len);
+		bus_range = get_property(node, "bus-range", &len);
 		if (bus_range != NULL && len > 2 * sizeof(int)) {
 			if (bus_range[1] > higher)
 				higher = bus_range[1];
@@ -65,16 +65,18 @@ static int __init fixup_one_level_bus_range(struct device_node *node, int higher
  */
 static void __init fixup_bus_range(struct device_node *bridge)
 {
-	int * bus_range;
+	int *bus_range;
+	struct property *prop;
 	int len;
 
 	/* Lookup the "bus-range" property for the hose */
-	bus_range = (int *) get_property(bridge, "bus-range", &len);
-	if (bus_range == NULL || len < 2 * sizeof(int)) {
+	prop = of_find_property(bridge, "bus-range", &len);
+	if (prop == NULL  || prop->value == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s\n",
 			       bridge->full_name);
 		return;
 	}
+	bus_range = (int *)prop->value;
 	bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
 }
 
@@ -314,12 +316,12 @@ static int __init add_bridge(struct device_node *dev)
 	int len;
 	struct pci_controller *hose;
 	char* disp_name;
-	int *bus_range;
+	const int *bus_range;
 	int primary = 1;
 
 	DBG("Adding PCI host bridge %s\n", dev->full_name);
 
-	bus_range = (int *) get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n",
 		dev->full_name);
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index cb528c9de4c36..ecc764a3ff3a3 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -99,8 +99,7 @@ static unsigned long maple_find_nvram_base(void)
 static void maple_restart(char *cmd)
 {
 	unsigned int maple_nvram_base;
-	unsigned int maple_nvram_offset;
-	unsigned int maple_nvram_command;
+	const unsigned int *maple_nvram_offset, *maple_nvram_command;
 	struct device_node *sp;
 
 	maple_nvram_base = maple_find_nvram_base();
@@ -113,14 +112,12 @@ static void maple_restart(char *cmd)
 		printk(KERN_EMERG "Maple: Unable to find Service Processor\n");
 		goto fail;
 	}
-	maple_nvram_offset = *(unsigned int*) get_property(sp,
-			"restart-addr", NULL);
-	maple_nvram_command = *(unsigned int*) get_property(sp,
-			"restart-value", NULL);
+	maple_nvram_offset = get_property(sp, "restart-addr", NULL);
+	maple_nvram_command = get_property(sp, "restart-value", NULL);
 	of_node_put(sp);
 
 	/* send command */
-	outb_p(maple_nvram_command, maple_nvram_base + maple_nvram_offset);
+	outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset);
 	for (;;) ;
  fail:
 	printk(KERN_EMERG "Maple: Manual Restart Required\n");
@@ -129,8 +126,7 @@ static void maple_restart(char *cmd)
 static void maple_power_off(void)
 {
 	unsigned int maple_nvram_base;
-	unsigned int maple_nvram_offset;
-	unsigned int maple_nvram_command;
+	const unsigned int *maple_nvram_offset, *maple_nvram_command;
 	struct device_node *sp;
 
 	maple_nvram_base = maple_find_nvram_base();
@@ -143,14 +139,12 @@ static void maple_power_off(void)
 		printk(KERN_EMERG "Maple: Unable to find Service Processor\n");
 		goto fail;
 	}
-	maple_nvram_offset = *(unsigned int*) get_property(sp,
-			"power-off-addr", NULL);
-	maple_nvram_command = *(unsigned int*) get_property(sp,
-			"power-off-value", NULL);
+	maple_nvram_offset = get_property(sp, "power-off-addr", NULL);
+	maple_nvram_command = get_property(sp, "power-off-value", NULL);
 	of_node_put(sp);
 
 	/* send command */
-	outb_p(maple_nvram_command, maple_nvram_base + maple_nvram_offset);
+	outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset);
 	for (;;) ;
  fail:
 	printk(KERN_EMERG "Maple: Manual Power-Down Required\n");
@@ -211,7 +205,7 @@ static void __init maple_init_early(void)
 static void __init maple_init_IRQ(void)
 {
 	struct device_node *root, *np, *mpic_node = NULL;
-	unsigned int *opprop;
+	const unsigned int *opprop;
 	unsigned long openpic_addr = 0;
 	int naddr, n, i, opplen, has_isus = 0;
 	struct mpic *mpic;
@@ -234,8 +228,7 @@ static void __init maple_init_IRQ(void)
 	/* Find address list in /platform-open-pic */
 	root = of_find_node_by_path("/");
 	naddr = prom_n_addr_cells(root);
-	opprop = (unsigned int *) get_property(root, "platform-open-pic",
-					       &opplen);
+	opprop = get_property(root, "platform-open-pic", &opplen);
 	if (opprop != 0) {
 		openpic_addr = of_read_number(opprop, naddr);
 		has_isus = (opplen > naddr);
-- 
GitLab


From 018a3d1db7cdb6127656c1622ee1d2302e16436d Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:40:29 +1000
Subject: [PATCH 0051/1063] [POWERPC] powermac: Constify & voidify
 get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

powermac platform & macintosh driver changes.

Built for pmac32_defconfig, g5_defconfig

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/powermac/backlight.c  |  3 +-
 arch/powerpc/platforms/powermac/cpufreq_32.c | 23 ++++++------
 arch/powerpc/platforms/powermac/cpufreq_64.c | 27 +++++++-------
 arch/powerpc/platforms/powermac/feature.c    | 30 ++++++++--------
 arch/powerpc/platforms/powermac/low_i2c.c    | 24 ++++++-------
 arch/powerpc/platforms/powermac/pci.c        | 37 +++++++++++---------
 arch/powerpc/platforms/powermac/pfunc_base.c |  2 +-
 arch/powerpc/platforms/powermac/pfunc_core.c |  5 +--
 arch/powerpc/platforms/powermac/setup.c      | 18 +++++-----
 arch/powerpc/platforms/powermac/smp.c        |  7 ++--
 arch/powerpc/platforms/powermac/udbg_scc.c   | 10 +++---
 drivers/i2c/busses/i2c-powermac.c            |  3 +-
 drivers/ide/ppc/pmac.c                       |  6 ++--
 drivers/macintosh/macio_asic.c               | 10 +++---
 drivers/macintosh/macio_sysfs.c              |  8 ++---
 drivers/macintosh/smu.c                      | 19 +++++-----
 drivers/macintosh/therm_adt746x.c            |  8 ++---
 drivers/macintosh/therm_pm72.c               | 14 ++++----
 drivers/macintosh/therm_windtunnel.c         |  4 +--
 drivers/macintosh/via-cuda.c                 |  4 +--
 drivers/macintosh/via-pmu-led.c              |  2 +-
 drivers/macintosh/via-pmu.c                  | 10 +++---
 drivers/macintosh/windfarm_pm81.c            |  4 +--
 drivers/macintosh/windfarm_pm91.c            |  2 +-
 drivers/macintosh/windfarm_smu_controls.c    | 13 +++----
 drivers/macintosh/windfarm_smu_sat.c         |  8 ++---
 drivers/macintosh/windfarm_smu_sensors.c     | 12 +++----
 drivers/serial/pmac_zilog.c                  |  9 ++---
 include/asm-powerpc/smu.h                    |  2 +-
 29 files changed, 166 insertions(+), 158 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c
index 69f65e215a5c8..205b4a3928622 100644
--- a/arch/powerpc/platforms/powermac/backlight.c
+++ b/arch/powerpc/platforms/powermac/backlight.c
@@ -38,7 +38,8 @@ int pmac_has_backlight_type(const char *type)
 	struct device_node* bk_node = find_devices("backlight");
 
 	if (bk_node) {
-		char *prop = get_property(bk_node, "backlight-control", NULL);
+		const char *prop = get_property(bk_node,
+				"backlight-control", NULL);
 		if (prop && strncmp(prop, type, strlen(type)) == 0)
 			return 1;
 	}
diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c
index 62926248bdb83..c2b6b4134f684 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_32.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_32.c
@@ -421,7 +421,7 @@ static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 static u32 read_gpio(struct device_node *np)
 {
-	u32 *reg = (u32 *)get_property(np, "reg", NULL);
+	const u32 *reg = get_property(np, "reg", NULL);
 	u32 offset;
 
 	if (reg == NULL)
@@ -497,7 +497,7 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
 								"frequency-gpio");
 	struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL,
 								     "slewing-done");
-	u32 *value;
+	const u32 *value;
 
 	/*
 	 * Check to see if it's GPIO driven or PMU only
@@ -519,15 +519,15 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
 	 */
 	if (frequency_gpio && slew_done_gpio) {
 		int lenp, rc;
-		u32 *freqs, *ratio;
+		const u32 *freqs, *ratio;
 
-		freqs = (u32 *)get_property(cpunode, "bus-frequencies", &lenp);
+		freqs = get_property(cpunode, "bus-frequencies", &lenp);
 		lenp /= sizeof(u32);
 		if (freqs == NULL || lenp != 2) {
 			printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n");
 			return 1;
 		}
-		ratio = (u32 *)get_property(cpunode, "processor-to-bus-ratio*2", NULL);
+		ratio = get_property(cpunode, "processor-to-bus-ratio*2", NULL);
 		if (ratio == NULL) {
 			printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n");
 			return 1;
@@ -562,7 +562,7 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
 	/* If we use the PMU, look for the min & max frequencies in the
 	 * device-tree
 	 */
-	value = (u32 *)get_property(cpunode, "min-clock-frequency", NULL);
+	value = get_property(cpunode, "min-clock-frequency", NULL);
 	if (!value)
 		return 1;
 	low_freq = (*value) / 1000;
@@ -571,7 +571,7 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
 	if (low_freq < 100000)
 		low_freq *= 10;
 
-	value = (u32 *)get_property(cpunode, "max-clock-frequency", NULL);
+	value = get_property(cpunode, "max-clock-frequency", NULL);
 	if (!value)
 		return 1;
 	hi_freq = (*value) / 1000;
@@ -611,13 +611,14 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode)
 static int pmac_cpufreq_init_750FX(struct device_node *cpunode)
 {
 	struct device_node *volt_gpio_np;
-	u32 pvr, *value;
+	u32 pvr;
+	const u32 *value;
 
 	if (get_property(cpunode, "dynamic-power-step", NULL) == NULL)
 		return 1;
 
 	hi_freq = cur_freq;
-	value = (u32 *)get_property(cpunode, "reduced-clock-frequency", NULL);
+	value = get_property(cpunode, "reduced-clock-frequency", NULL);
 	if (!value)
 		return 1;
 	low_freq = (*value) / 1000;
@@ -650,7 +651,7 @@ static int pmac_cpufreq_init_750FX(struct device_node *cpunode)
 static int __init pmac_cpufreq_setup(void)
 {
 	struct device_node	*cpunode;
-	u32			*value;
+	const u32		*value;
 
 	if (strstr(cmd_line, "nocpufreq"))
 		return 0;
@@ -661,7 +662,7 @@ static int __init pmac_cpufreq_setup(void)
 		goto out;
 
 	/* Get current cpu clock freq */
-	value = (u32 *)get_property(cpunode, "clock-frequency", NULL);
+	value = get_property(cpunode, "clock-frequency", NULL);
 	if (!value)
 		goto out;
 	cur_freq = (*value) / 1000;
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
index a6a84ac5433e8..c364c89adb4e9 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_64.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_64.c
@@ -89,7 +89,7 @@ static DEFINE_MUTEX(g5_switch_mutex);
 
 #ifdef CONFIG_PPC_SMU
 
-static u32 *g5_pmode_data;
+static const u32 *g5_pmode_data;
 static int g5_pmode_max;
 
 static struct smu_sdbp_fvt *g5_fvt_table;	/* table of op. points */
@@ -391,7 +391,8 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
 	unsigned int psize, ssize;
 	unsigned long max_freq;
 	char *freq_method, *volt_method;
-	u32 *valp, pvr_hi;
+	const u32 *valp;
+	u32 pvr_hi;
 	int use_volts_vdnap = 0;
 	int use_volts_smu = 0;
 	int rc = -ENODEV;
@@ -409,8 +410,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
 	/* Get first CPU node */
 	for (cpunode = NULL;
 	     (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) {
-		u32 *reg =
-			(u32 *)get_property(cpunode, "reg", NULL);
+		const u32 *reg = get_property(cpunode, "reg", NULL);
 		if (reg == NULL || (*reg) != 0)
 			continue;
 		if (!strcmp(cpunode->type, "cpu"))
@@ -422,7 +422,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
 	}
 
 	/* Check 970FX for now */
-	valp = (u32 *)get_property(cpunode, "cpu-version", NULL);
+	valp = get_property(cpunode, "cpu-version", NULL);
 	if (!valp) {
 		DBG("No cpu-version property !\n");
 		goto bail_noprops;
@@ -434,7 +434,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
 	}
 
 	/* Look for the powertune data in the device-tree */
-	g5_pmode_data = (u32 *)get_property(cpunode, "power-mode-data",&psize);
+	g5_pmode_data = get_property(cpunode, "power-mode-data",&psize);
 	if (!g5_pmode_data) {
 		DBG("No power-mode-data !\n");
 		goto bail_noprops;
@@ -442,7 +442,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
 	g5_pmode_max = psize / sizeof(u32) - 1;
 
 	if (use_volts_smu) {
-		struct smu_sdbp_header *shdr;
+		const struct smu_sdbp_header *shdr;
 
 		/* Look for the FVT table */
 		shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL);
@@ -493,7 +493,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
 	 * half freq in this version. So far, I haven't yet seen a machine
 	 * supporting anything else.
 	 */
-	valp = (u32 *)get_property(cpunode, "clock-frequency", NULL);
+	valp = get_property(cpunode, "clock-frequency", NULL);
 	if (!valp)
 		return -ENODEV;
 	max_freq = (*valp)/1000;
@@ -541,8 +541,8 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
 static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
 {
 	struct device_node *cpuid = NULL, *hwclock = NULL, *cpunode = NULL;
-	u8 *eeprom = NULL;
-	u32 *valp;
+	const u8 *eeprom = NULL;
+	const u32 *valp;
 	u64 max_freq, min_freq, ih, il;
 	int has_volt = 1, rc = 0;
 
@@ -563,7 +563,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
 	/* Lookup the cpuid eeprom node */
         cpuid = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/cpuid@a0");
 	if (cpuid != NULL)
-		eeprom = (u8 *)get_property(cpuid, "cpuid", NULL);
+		eeprom = get_property(cpuid, "cpuid", NULL);
 	if (eeprom == NULL) {
 		printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n");
 		rc = -ENODEV;
@@ -573,7 +573,8 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
 	/* Lookup the i2c hwclock */
 	for (hwclock = NULL;
 	     (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){
-		char *loc = get_property(hwclock, "hwctrl-location", NULL);
+		const char *loc = get_property(hwclock,
+				"hwctrl-location", NULL);
 		if (loc == NULL)
 			continue;
 		if (strcmp(loc, "CPU CLOCK"))
@@ -637,7 +638,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
 	 */
 
 	/* Get max frequency from device-tree */
-	valp = (u32 *)get_property(cpunode, "clock-frequency", NULL);
+	valp = get_property(cpunode, "clock-frequency", NULL);
 	if (!valp) {
 		printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n");
 		rc = -ENODEV;
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index f8313bf9a9f75..13fcaf5b17960 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -1058,8 +1058,8 @@ core99_reset_cpu(struct device_node *node, long param, long value)
 	if (np == NULL)
 		return -ENODEV;
 	for (np = np->child; np != NULL; np = np->sibling) {
-		u32 *num = (u32 *)get_property(np, "reg", NULL);
-		u32 *rst = (u32 *)get_property(np, "soft-reset", NULL);
+		u32 *num = get_property(np, "reg", NULL);
+		u32 *rst = get_property(np, "soft-reset", NULL);
 		if (num == NULL || rst == NULL)
 			continue;
 		if (param == *num) {
@@ -1087,7 +1087,7 @@ core99_usb_enable(struct device_node *node, long param, long value)
 {
 	struct macio_chip *macio;
 	unsigned long flags;
-	char *prop;
+	const char *prop;
 	int number;
 	u32 reg;
 
@@ -1096,7 +1096,7 @@ core99_usb_enable(struct device_node *node, long param, long value)
 	    macio->type != macio_intrepid)
 		return -ENODEV;
 
-	prop = (char *)get_property(node, "AAPL,clock-id", NULL);
+	prop = get_property(node, "AAPL,clock-id", NULL);
 	if (!prop)
 		return -ENODEV;
 	if (strncmp(prop, "usb0u048", 8) == 0)
@@ -1507,8 +1507,8 @@ static long g5_reset_cpu(struct device_node *node, long param, long value)
 	if (np == NULL)
 		return -ENODEV;
 	for (np = np->child; np != NULL; np = np->sibling) {
-		u32 *num = (u32 *)get_property(np, "reg", NULL);
-		u32 *rst = (u32 *)get_property(np, "soft-reset", NULL);
+		const u32 *num = get_property(np, "reg", NULL);
+		const u32 *rst = get_property(np, "soft-reset", NULL);
 		if (num == NULL || rst == NULL)
 			continue;
 		if (param == *num) {
@@ -2408,7 +2408,7 @@ static int __init probe_motherboard(void)
 	 */
 	dt = find_devices("device-tree");
 	if (dt != NULL)
-		model = (const char *) get_property(dt, "model", NULL);
+		model = get_property(dt, "model", NULL);
 	for(i=0; model && i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) {
 	    if (strcmp(model, pmac_mb_defs[i].model_string) == 0) {
 		pmac_mb = pmac_mb_defs[i];
@@ -2536,7 +2536,7 @@ static int __init probe_motherboard(void)
  */
 static void __init probe_uninorth(void)
 {
-	u32 *addrp;
+	const u32 *addrp;
 	phys_addr_t address;
 	unsigned long actrl;
 
@@ -2555,7 +2555,7 @@ static void __init probe_uninorth(void)
 	if (uninorth_node == NULL)
 		return;
 
-	addrp = (u32 *)get_property(uninorth_node, "reg", NULL);
+	addrp = get_property(uninorth_node, "reg", NULL);
 	if (addrp == NULL)
 		return;
 	address = of_translate_address(uninorth_node, addrp);
@@ -2596,7 +2596,7 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ
 	struct device_node*	node;
 	int			i;
 	volatile u32 __iomem	*base;
-	u32			*addrp, *revp;
+	const u32		*addrp, *revp;
 	phys_addr_t		addr;
 	u64			size;
 
@@ -2639,7 +2639,7 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ
 		return;
 	}
 	if (type == macio_keylargo || type == macio_keylargo2) {
-		u32 *did = (u32 *)get_property(node, "device-id", NULL);
+		const u32 *did = get_property(node, "device-id", NULL);
 		if (*did == 0x00000025)
 			type = macio_pangea;
 		if (*did == 0x0000003e)
@@ -2652,7 +2652,7 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ
 	macio_chips[i].base	= base;
 	macio_chips[i].flags	= MACIO_FLAG_SCCB_ON | MACIO_FLAG_SCCB_ON;
 	macio_chips[i].name	= macio_names[type];
-	revp = (u32 *)get_property(node, "revision-id", NULL);
+	revp = get_property(node, "revision-id", NULL);
 	if (revp)
 		macio_chips[i].rev = *revp;
 	printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n",
@@ -2695,15 +2695,15 @@ static void __init
 initial_serial_shutdown(struct device_node *np)
 {
 	int len;
-	struct slot_names_prop {
+	const struct slot_names_prop {
 		int	count;
 		char	name[1];
 	} *slots;
-	char *conn;
+	const char *conn;
 	int port_type = PMAC_SCC_ASYNC;
 	int modem = 0;
 
-	slots = (struct slot_names_prop *)get_property(np, "slot-names", &len);
+	slots = get_property(np, "slot-names", &len);
 	conn = get_property(np, "AAPL,connector", &len);
 	if (conn && (strcmp(conn, "infrared") == 0))
 		port_type = PMAC_SCC_IRDA;
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 8677f50c25860..c2c7cf75dd5fa 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -477,7 +477,8 @@ static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
 static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
 {
 	struct pmac_i2c_host_kw *host;
-	u32			*psteps, *prate, *addrp, steps;
+	const u32		*psteps, *prate, *addrp;
+	u32			steps;
 
 	host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL);
 	if (host == NULL) {
@@ -490,7 +491,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
 	 * on all i2c keywest nodes so far ... we would have to fallback
 	 * to macio parsing if that wasn't the case
 	 */
-	addrp = (u32 *)get_property(np, "AAPL,address", NULL);
+	addrp = get_property(np, "AAPL,address", NULL);
 	if (addrp == NULL) {
 		printk(KERN_ERR "low_i2c: Can't find address for %s\n",
 		       np->full_name);
@@ -504,13 +505,13 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
 	host->timeout_timer.function = kw_i2c_timeout;
 	host->timeout_timer.data = (unsigned long)host;
 
-	psteps = (u32 *)get_property(np, "AAPL,address-step", NULL);
+	psteps = get_property(np, "AAPL,address-step", NULL);
 	steps = psteps ? (*psteps) : 0x10;
 	for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++)
 		steps >>= 1;
 	/* Select interface rate */
 	host->speed = KW_I2C_MODE_25KHZ;
-	prate = (u32 *)get_property(np, "AAPL,i2c-rate", NULL);
+	prate = get_property(np, "AAPL,i2c-rate", NULL);
 	if (prate) switch(*prate) {
 	case 100:
 		host->speed = KW_I2C_MODE_100KHZ;
@@ -618,8 +619,8 @@ static void __init kw_i2c_probe(void)
 		} else {
 			for (child = NULL;
 			     (child = of_get_next_child(np, child)) != NULL;) {
-				u32 *reg =
-					(u32 *)get_property(child, "reg", NULL);
+				const u32 *reg = get_property(child,
+						"reg", NULL);
 				if (reg == NULL)
 					continue;
 				kw_i2c_add(host, np, child, *reg);
@@ -881,7 +882,7 @@ static void __init smu_i2c_probe(void)
 {
 	struct device_node *controller, *busnode;
 	struct pmac_i2c_bus *bus;
-	u32 *reg;
+	const u32 *reg;
 	int sz;
 
 	if (!smu_present())
@@ -904,7 +905,7 @@ static void __init smu_i2c_probe(void)
 		if (strcmp(busnode->type, "i2c") &&
 		    strcmp(busnode->type, "i2c-bus"))
 			continue;
-		reg = (u32 *)get_property(busnode, "reg", NULL);
+		reg = get_property(busnode, "reg", NULL);
 		if (reg == NULL)
 			continue;
 
@@ -948,9 +949,8 @@ struct pmac_i2c_bus *pmac_i2c_find_bus(struct device_node *node)
 		list_for_each_entry(bus, &pmac_i2c_busses, link) {
 			if (p == bus->busnode) {
 				if (prev && bus->flags & pmac_i2c_multibus) {
-					u32 *reg;
-					reg = (u32 *)get_property(prev, "reg",
-								  NULL);
+					const u32 *reg;
+					reg = get_property(prev, "reg", NULL);
 					if (!reg)
 						continue;
 					if (((*reg) >> 8) != bus->channel)
@@ -971,7 +971,7 @@ EXPORT_SYMBOL_GPL(pmac_i2c_find_bus);
 
 u8 pmac_i2c_get_dev_addr(struct device_node *device)
 {
-	u32 *reg = (u32 *)get_property(device, "reg", NULL);
+	const u32 *reg = get_property(device, "reg", NULL);
 
 	if (reg == NULL)
 		return 0;
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 556b349797e85..787ffd999bc2e 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -69,16 +69,16 @@ struct device_node *k2_skiplist[2];
 static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
 {
 	for (; node != 0;node = node->sibling) {
-		int * bus_range;
-		unsigned int *class_code;
+		const int * bus_range;
+		const unsigned int *class_code;
 		int len;
 
 		/* For PCI<->PCI bridges or CardBus bridges, we go down */
-		class_code = (unsigned int *) get_property(node, "class-code", NULL);
+		class_code = get_property(node, "class-code", NULL);
 		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
 			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
 			continue;
-		bus_range = (int *) get_property(node, "bus-range", &len);
+		bus_range = get_property(node, "bus-range", &len);
 		if (bus_range != NULL && len > 2 * sizeof(int)) {
 			if (bus_range[1] > higher)
 				higher = bus_range[1];
@@ -96,13 +96,15 @@ static int __init fixup_one_level_bus_range(struct device_node *node, int higher
  */
 static void __init fixup_bus_range(struct device_node *bridge)
 {
-	int * bus_range;
-	int len;
+	int *bus_range, len;
+	struct property *prop;
 
 	/* Lookup the "bus-range" property for the hose */
-	bus_range = (int *) get_property(bridge, "bus-range", &len);
-	if (bus_range == NULL || len < 2 * sizeof(int))
+	prop = of_find_property(bridge, "bus-range", &len);
+	if (prop == NULL || prop->length < 2 * sizeof(int))
 		return;
+
+	bus_range = (int *)prop->value;
 	bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
 }
 
@@ -240,7 +242,7 @@ static struct pci_ops macrisc_pci_ops =
 static int chaos_validate_dev(struct pci_bus *bus, int devfn, int offset)
 {
 	struct device_node *np;
-	u32 *vendor, *device;
+	const u32 *vendor, *device;
 
 	if (offset >= 0x100)
 		return  PCIBIOS_BAD_REGISTER_NUMBER;
@@ -248,8 +250,8 @@ static int chaos_validate_dev(struct pci_bus *bus, int devfn, int offset)
 	if (np == NULL)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	vendor = (u32 *)get_property(np, "vendor-id", NULL);
-	device = (u32 *)get_property(np, "device-id", NULL);
+	vendor = get_property(np, "vendor-id", NULL);
+	device = get_property(np, "device-id", NULL);
 	if (vendor == NULL || device == NULL)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
@@ -689,20 +691,21 @@ static void __init fixup_nec_usb2(void)
 
 	for (nec = NULL; (nec = of_find_node_by_name(nec, "usb")) != NULL;) {
 		struct pci_controller *hose;
-		u32 data, *prop;
+		u32 data;
+		const u32 *prop;
 		u8 bus, devfn;
 
-		prop = (u32 *)get_property(nec, "vendor-id", NULL);
+		prop = get_property(nec, "vendor-id", NULL);
 		if (prop == NULL)
 			continue;
 		if (0x1033 != *prop)
 			continue;
-		prop = (u32 *)get_property(nec, "device-id", NULL);
+		prop = get_property(nec, "device-id", NULL);
 		if (prop == NULL)
 			continue;
 		if (0x0035 != *prop)
 			continue;
-		prop = (u32 *)get_property(nec, "reg", NULL);
+		prop = get_property(nec, "reg", NULL);
 		if (prop == NULL)
 			continue;
 		devfn = (prop[0] >> 8) & 0xff;
@@ -901,7 +904,7 @@ static int __init add_bridge(struct device_node *dev)
 	struct pci_controller *hose;
 	struct resource rsrc;
 	char *disp_name;
-	int *bus_range;
+	const int *bus_range;
 	int primary = 1, has_address = 0;
 
 	DBG("Adding PCI host bridge %s\n", dev->full_name);
@@ -910,7 +913,7 @@ static int __init add_bridge(struct device_node *dev)
 	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
 
 	/* Get bus range if any */
-	bus_range = (int *) get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, assume"
 		       " bus 0\n", dev->full_name);
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index 6d66359ec8c89..829dacec96e55 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -114,7 +114,7 @@ static void macio_gpio_init_one(struct macio_chip *macio)
 	 * we just create them all
 	 */
 	for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;) {
-		u32 *reg = (u32 *)get_property(gp, "reg", NULL);
+		const u32 *reg = get_property(gp, "reg", NULL);
 		unsigned long offset;
 		if (reg == NULL)
 			continue;
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index b117adbf95718..7651f278615a3 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -813,14 +813,15 @@ struct pmf_function *__pmf_find_function(struct device_node *target,
 	struct pmf_device *dev;
 	struct pmf_function *func, *result = NULL;
 	char fname[64];
-	u32 *prop, ph;
+	const u32 *prop;
+	u32 ph;
 
 	/*
 	 * Look for a "platform-*" function reference. If we can't find
 	 * one, then we fallback to a direct call attempt
 	 */
 	snprintf(fname, 63, "platform-%s", name);
-	prop = (u32 *)get_property(target, fname, NULL);
+	prop = get_property(target, fname, NULL);
 	if (prop == NULL)
 		goto find_it;
 	ph = *prop;
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 31a9da769fa23..824a618396ab1 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -116,7 +116,7 @@ extern struct smp_ops_t core99_smp_ops;
 static void pmac_show_cpuinfo(struct seq_file *m)
 {
 	struct device_node *np;
-	char *pp;
+	const char *pp;
 	int plen;
 	int mbmodel;
 	unsigned int mbflags;
@@ -134,12 +134,12 @@ static void pmac_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "machine\t\t: ");
 	np = of_find_node_by_path("/");
 	if (np != NULL) {
-		pp = (char *) get_property(np, "model", NULL);
+		pp = get_property(np, "model", NULL);
 		if (pp != NULL)
 			seq_printf(m, "%s\n", pp);
 		else
 			seq_printf(m, "PowerMac\n");
-		pp = (char *) get_property(np, "compatible", &plen);
+		pp = get_property(np, "compatible", &plen);
 		if (pp != NULL) {
 			seq_printf(m, "motherboard\t:");
 			while (plen > 0) {
@@ -163,10 +163,8 @@ static void pmac_show_cpuinfo(struct seq_file *m)
 	if (np == NULL)
 		np = of_find_node_by_type(NULL, "cache");
 	if (np != NULL) {
-		unsigned int *ic = (unsigned int *)
-			get_property(np, "i-cache-size", NULL);
-		unsigned int *dc = (unsigned int *)
-			get_property(np, "d-cache-size", NULL);
+		const unsigned int *ic = get_property(np, "i-cache-size", NULL);
+		const unsigned int *dc = get_property(np, "d-cache-size", NULL);
 		seq_printf(m, "L2 cache\t:");
 		has_l2cache = 1;
 		if (get_property(np, "cache-unified", NULL) != 0 && dc) {
@@ -254,7 +252,7 @@ static void __init l2cr_init(void)
 		if (np == 0)
 			np = find_type_devices("cpu");
 		if (np != 0) {
-			unsigned int *l2cr = (unsigned int *)
+			const unsigned int *l2cr =
 				get_property(np, "l2cr-value", NULL);
 			if (l2cr != 0) {
 				ppc_override_l2cr = 1;
@@ -277,7 +275,7 @@ static void __init l2cr_init(void)
 static void __init pmac_setup_arch(void)
 {
 	struct device_node *cpu, *ic;
-	int *fp;
+	const int *fp;
 	unsigned long pvr;
 
 	pvr = PVR_VER(mfspr(SPRN_PVR));
@@ -287,7 +285,7 @@ static void __init pmac_setup_arch(void)
 	loops_per_jiffy = 50000000 / HZ;
 	cpu = of_find_node_by_type(NULL, "cpu");
 	if (cpu != NULL) {
-		fp = (int *) get_property(cpu, "clock-frequency", NULL);
+		fp = get_property(cpu, "clock-frequency", NULL);
 		if (fp != NULL) {
 			if (pvr >= 0x30 && pvr < 0x80)
 				/* PPC970 etc. */
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 827b7121ffb84..653eeb64d1e28 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -548,7 +548,7 @@ static void __init smp_core99_setup_i2c_hwsync(int ncpus)
 	struct device_node *cc = NULL;	
 	struct device_node *p;
 	const char *name = NULL;
-	u32 *reg;
+	const u32 *reg;
 	int ok;
 
 	/* Look for the clock chip */
@@ -562,7 +562,7 @@ static void __init smp_core99_setup_i2c_hwsync(int ncpus)
 		pmac_tb_clock_chip_host = pmac_i2c_find_bus(cc);
 		if (pmac_tb_clock_chip_host == NULL)
 			continue;
-		reg = (u32 *)get_property(cc, "reg", NULL);
+		reg = get_property(cc, "reg", NULL);
 		if (reg == NULL)
 			continue;
 		switch (*reg) {
@@ -707,8 +707,7 @@ static void __init smp_core99_setup(int ncpus)
 		core99_tb_gpio = KL_GPIO_TB_ENABLE;	/* default value */
 		cpu = of_find_node_by_type(NULL, "cpu");
 		if (cpu != NULL) {
-			tbprop = (u32 *)get_property(cpu, "timebase-enable",
-						     NULL);
+			tbprop = get_property(cpu, "timebase-enable", NULL);
 			if (tbprop)
 				core99_tb_gpio = *tbprop;
 			of_node_put(cpu);
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index 37e5b1eff911f..ce1a235855f75 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -68,11 +68,11 @@ static unsigned char scc_inittab[] = {
 
 void udbg_scc_init(int force_scc)
 {
-	u32 *reg;
+	const u32 *reg;
 	unsigned long addr;
 	struct device_node *stdout = NULL, *escc = NULL, *macio = NULL;
 	struct device_node *ch, *ch_def = NULL, *ch_a = NULL;
-	char *path;
+	const char *path;
 	int i, x;
 
 	escc = of_find_node_by_name(NULL, "escc");
@@ -81,7 +81,7 @@ void udbg_scc_init(int force_scc)
 	macio = of_get_parent(escc);
 	if (macio == NULL)
 		goto bail;
-	path = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
+	path = get_property(of_chosen, "linux,stdout-path", NULL);
 	if (path != NULL)
 		stdout = of_find_node_by_path(path);
 	for (ch = NULL; (ch = of_get_next_child(escc, ch)) != NULL;) {
@@ -96,13 +96,13 @@ void udbg_scc_init(int force_scc)
 	ch = ch_def ? ch_def : ch_a;
 
 	/* Get address within mac-io ASIC */
-	reg = (u32 *)get_property(escc, "reg", NULL);
+	reg = get_property(escc, "reg", NULL);
 	if (reg == NULL)
 		goto bail;
 	addr = reg[0];
 
 	/* Get address of mac-io PCI itself */
-	reg = (u32 *)get_property(macio, "assigned-addresses", NULL);
+	reg = get_property(macio, "assigned-addresses", NULL);
 	if (reg == NULL)
 		goto bail;
 	addr += reg[2];
diff --git a/drivers/i2c/busses/i2c-powermac.c b/drivers/i2c/busses/i2c-powermac.c
index 2a0b3be7cdd08..e8a6de5a1517e 100644
--- a/drivers/i2c/busses/i2c-powermac.c
+++ b/drivers/i2c/busses/i2c-powermac.c
@@ -209,7 +209,8 @@ static int i2c_powermac_probe(struct device *dev)
 	struct pmac_i2c_bus *bus = dev->platform_data;
 	struct device_node *parent = NULL;
 	struct i2c_adapter *adapter;
-	char name[32], *basename;
+	char name[32];
+	const char *basename;
 	int rc;
 
 	if (bus == NULL)
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index ebf961f1718d8..fa46856e8068c 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1154,7 +1154,7 @@ static int
 pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 {
 	struct device_node *np = pmif->node;
-	int *bidp;
+	const int *bidp;
 
 	pmif->cable_80 = 0;
 	pmif->broken_dma = pmif->broken_dma_warn = 0;
@@ -1176,14 +1176,14 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 		pmif->broken_dma = 1;
 	}
 
-	bidp = (int *)get_property(np, "AAPL,bus-id", NULL);
+	bidp = get_property(np, "AAPL,bus-id", NULL);
 	pmif->aapl_bus_id =  bidp ? *bidp : 0;
 
 	/* Get cable type from device-tree */
 	if (pmif->kind == controller_kl_ata4 || pmif->kind == controller_un_ata6
 	    || pmif->kind == controller_k2_ata6
 	    || pmif->kind == controller_sh_ata6) {
-		char* cable = get_property(np, "cable-type", NULL);
+		const char* cable = get_property(np, "cable-type", NULL);
 		if (cable && !strncmp(cable, "80-", 3))
 			pmif->cable_80 = 1;
 	}
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index 80c0c665b5f6b..7817cf286d0cf 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -139,7 +139,9 @@ static int macio_uevent(struct device *dev, char **envp, int num_envp,
 {
 	struct macio_dev * macio_dev;
 	struct of_device * of;
-	char *scratch, *compat, *compat2;
+	char *scratch;
+	const char *compat, *compat2;
+
 	int i = 0;
 	int length, cplen, cplen2, seen = 0;
 
@@ -173,7 +175,7 @@ static int macio_uevent(struct device *dev, char **envp, int num_envp,
          * it's not really legal to split it out with commas. We split it
          * up using a number of environment variables instead. */
 
-	compat = (char *) get_property(of->node, "compatible", &cplen);
+	compat = get_property(of->node, "compatible", &cplen);
 	compat2 = compat;
 	cplen2= cplen;
 	while (compat && cplen > 0) {
@@ -454,7 +456,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
 					       struct resource *parent_res)
 {
 	struct macio_dev *dev;
-	u32 *reg;
+	const u32 *reg;
 	
 	if (np == NULL)
 		return NULL;
@@ -489,7 +491,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
 #endif
 			MAX_NODE_NAME_SIZE, np->name);
 	} else {
-		reg = (u32 *)get_property(np, "reg", NULL);
+		reg = get_property(np, "reg", NULL);
 		sprintf(dev->ofdev.dev.bus_id, "%1d.%08x:%.*s",
 			chip->lbus.index,
 			reg ? *reg : 0, MAX_NODE_NAME_SIZE, np->name);
diff --git a/drivers/macintosh/macio_sysfs.c b/drivers/macintosh/macio_sysfs.c
index cae24a13526a3..8566bdfdd4b88 100644
--- a/drivers/macintosh/macio_sysfs.c
+++ b/drivers/macintosh/macio_sysfs.c
@@ -16,12 +16,12 @@ static ssize_t
 compatible_show (struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct of_device *of;
-	char *compat;
+	const char *compat;
 	int cplen;
 	int length = 0;
 
 	of = &to_macio_device (dev)->ofdev;
-	compat = (char *) get_property(of->node, "compatible", &cplen);
+	compat = get_property(of->node, "compatible", &cplen);
 	if (!compat) {
 		*buf = '\0';
 		return 0;
@@ -42,12 +42,12 @@ static ssize_t modalias_show (struct device *dev, struct device_attribute *attr,
 			      char *buf)
 {
 	struct of_device *of;
-	char *compat;
+	const char *compat;
 	int cplen;
 	int length;
 
 	of = &to_macio_device (dev)->ofdev;
-	compat = (char *) get_property (of->node, "compatible", &cplen);
+	compat = get_property(of->node, "compatible", &cplen);
 	if (!compat) compat = "", cplen = 1;
 	length = sprintf (buf, "of:N%sT%s", of->node->name, of->node->type);
 	buf += length;
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index f139a74696fe8..6f358600536e6 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -447,7 +447,7 @@ EXPORT_SYMBOL(smu_present);
 int __init smu_init (void)
 {
 	struct device_node *np;
-	u32 *data;
+	const u32 *data;
 
         np = of_find_node_by_type(NULL, "smu");
         if (np == NULL)
@@ -483,7 +483,7 @@ int __init smu_init (void)
 		printk(KERN_ERR "SMU: Can't find doorbell GPIO !\n");
 		goto fail;
 	}
-	data = (u32 *)get_property(np, "reg", NULL);
+	data = get_property(np, "reg", NULL);
 	if (data == NULL) {
 		of_node_put(np);
 		printk(KERN_ERR "SMU: Can't find doorbell GPIO address !\n");
@@ -506,7 +506,7 @@ int __init smu_init (void)
 		np = of_find_node_by_name(NULL, "smu-interrupt");
 		if (np == NULL)
 			break;
-		data = (u32 *)get_property(np, "reg", NULL);
+		data = get_property(np, "reg", NULL);
 		if (data == NULL) {
 			of_node_put(np);
 			break;
@@ -959,11 +959,11 @@ static struct smu_sdbp_header *smu_create_sdb_partition(int id)
 /* Note: Only allowed to return error code in pointers (using ERR_PTR)
  * when interruptible is 1
  */
-struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size,
-						int interruptible)
+const struct smu_sdbp_header *__smu_get_sdb_partition(int id,
+		unsigned int *size, int interruptible)
 {
 	char pname[32];
-	struct smu_sdbp_header *part;
+	const struct smu_sdbp_header *part;
 
 	if (!smu)
 		return NULL;
@@ -980,8 +980,7 @@ struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size,
 	} else
 		mutex_lock(&smu_part_access);
 
-	part = (struct smu_sdbp_header *)get_property(smu->of_node,
-						      pname, size);
+	part = get_property(smu->of_node, pname, size);
 	if (part == NULL) {
 		DPRINTK("trying to extract from SMU ...\n");
 		part = smu_create_sdb_partition(id);
@@ -992,7 +991,7 @@ struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size,
 	return part;
 }
 
-struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size)
+const struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size)
 {
 	return __smu_get_sdb_partition(id, size, 0);
 }
@@ -1071,7 +1070,7 @@ static ssize_t smu_write(struct file *file, const char __user *buf,
 		pp->mode = smu_file_events;
 		return 0;
 	} else if (hdr.cmdtype == SMU_CMDTYPE_GET_PARTITION) {
-		struct smu_sdbp_header *part;
+		const struct smu_sdbp_header *part;
 		part = __smu_get_sdb_partition(hdr.cmd, NULL, 1);
 		if (part == NULL)
 			return -EINVAL;
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index 7f86478bdd366..a0f30d0853ea2 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -47,7 +47,7 @@ static u8 FAN_SPD_SET[2] = {0x30, 0x31};
 
 static u8 default_limits_local[3] = {70, 50, 70};    /* local, sensor1, sensor2 */
 static u8 default_limits_chip[3] = {80, 65, 80};    /* local, sensor1, sensor2 */
-static char *sensor_location[3] = {NULL, NULL, NULL};
+static const char *sensor_location[3] = {NULL, NULL, NULL};
 
 static int limit_adjust = 0;
 static int fan_speed = -1;
@@ -553,7 +553,7 @@ static int __init
 thermostat_init(void)
 {
 	struct device_node* np;
-	u32 *prop;
+	const u32 *prop;
 	int i = 0, offset = 0;
 	
 	np = of_find_node_by_name(NULL, "fan");
@@ -566,13 +566,13 @@ thermostat_init(void)
 	else
 		return -ENODEV;
 
-	prop = (u32 *)get_property(np, "hwsensor-params-version", NULL);
+	prop = get_property(np, "hwsensor-params-version", NULL);
 	printk(KERN_INFO "adt746x: version %d (%ssupported)\n", *prop,
 			 (*prop == 1)?"":"un");
 	if (*prop != 1)
 		return -ENODEV;
 
-	prop = (u32 *)get_property(np, "reg", NULL);
+	prop = get_property(np, "reg", NULL);
 	if (!prop)
 		return -ENODEV;
 
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index 20bf67244e2c1..d00c0c37a12e2 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -660,7 +660,7 @@ static int read_eeprom(int cpu, struct mpu_data *out)
 {
 	struct device_node *np;
 	char nodename[64];
-	u8 *data;
+	const u8 *data;
 	int len;
 
 	/* prom.c routine for finding a node by path is a bit brain dead
@@ -673,7 +673,7 @@ static int read_eeprom(int cpu, struct mpu_data *out)
 		printk(KERN_ERR "therm_pm72: Failed to retrieve cpuid node from device-tree\n");
 		return -ENODEV;
 	}
-	data = (u8 *)get_property(np, "cpuid", &len);
+	data = get_property(np, "cpuid", &len);
 	if (data == NULL) {
 		printk(KERN_ERR "therm_pm72: Failed to retrieve cpuid property from device-tree\n");
 		of_node_put(np);
@@ -1336,7 +1336,7 @@ static int init_backside_state(struct backside_pid_state *state)
 	 */
 	u3 = of_find_node_by_path("/u3@0,f8000000");
 	if (u3 != NULL) {
-		u32 *vers = (u32 *)get_property(u3, "device-rev", NULL);
+		const u32 *vers = get_property(u3, "device-rev", NULL);
 		if (vers)
 			if (((*vers) & 0x3f) < 0x34)
 				u3h = 0;
@@ -2111,8 +2111,8 @@ static void fcu_lookup_fans(struct device_node *fcu_node)
 
 	while ((np = of_get_next_child(fcu_node, np)) != NULL) {
 		int type = -1;
-		char *loc;
-		u32 *reg;
+		const char *loc;
+		const u32 *reg;
 
 		DBG(" control: %s, type: %s\n", np->name, np->type);
 
@@ -2128,8 +2128,8 @@ static void fcu_lookup_fans(struct device_node *fcu_node)
 			continue;
 
 		/* Lookup for a matching location */
-		loc = (char *)get_property(np, "location", NULL);
-		reg = (u32 *)get_property(np, "reg", NULL);
+		loc = get_property(np, "location", NULL);
+		reg = get_property(np, "reg", NULL);
 		if (loc == NULL || reg == NULL)
 			continue;
 		DBG(" matching location: %s, reg: 0x%08x\n", loc, *reg);
diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c
index c7d1c290cb0c1..738faab1b22c2 100644
--- a/drivers/macintosh/therm_windtunnel.c
+++ b/drivers/macintosh/therm_windtunnel.c
@@ -484,14 +484,14 @@ struct apple_thermal_info {
 static int __init
 g4fan_init( void )
 {
-	struct apple_thermal_info *info;
+	const struct apple_thermal_info *info;
 	struct device_node *np;
 
 	init_MUTEX( &x.lock );
 
 	if( !(np=of_find_node_by_name(NULL, "power-mgt")) )
 		return -ENODEV;
-	info = (struct apple_thermal_info*)get_property(np, "thermal-info", NULL);
+	info = get_property(np, "thermal-info", NULL);
 	of_node_put(np);
 
 	if( !info || !machine_is_compatible("PowerMac3,6") )
diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c
index 69d5452fd22fc..7512d1c152076 100644
--- a/drivers/macintosh/via-cuda.c
+++ b/drivers/macintosh/via-cuda.c
@@ -123,7 +123,7 @@ int __init find_via_cuda(void)
 {
     struct adb_request req;
     phys_addr_t taddr;
-    u32 *reg;
+    const u32 *reg;
     int err;
 
     if (vias != 0)
@@ -132,7 +132,7 @@ int __init find_via_cuda(void)
     if (vias == 0)
 	return 0;
 
-    reg = (u32 *)get_property(vias, "reg", NULL);
+    reg = get_property(vias, "reg", NULL);
     if (reg == NULL) {
 	    printk(KERN_ERR "via-cuda: No \"reg\" property !\n");
 	    goto fail;
diff --git a/drivers/macintosh/via-pmu-led.c b/drivers/macintosh/via-pmu-led.c
index 5189d5454b1f2..179af10105d96 100644
--- a/drivers/macintosh/via-pmu-led.c
+++ b/drivers/macintosh/via-pmu-led.c
@@ -120,7 +120,7 @@ static int __init via_pmu_led_init(void)
 	dt = of_find_node_by_path("/");
 	if (dt == NULL)
 		return -ENODEV;
-	model = (const char *)get_property(dt, "model", NULL);
+	model = get_property(dt, "model", NULL);
 	if (model == NULL)
 		return -ENODEV;
 	if (strncmp(model, "PowerBook", strlen("PowerBook")) != 0 &&
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 06ca80bfd6b98..80e88b4a1d072 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -287,7 +287,7 @@ static char *pbook_type[] = {
 int __init find_via_pmu(void)
 {
 	u64 taddr;
-	u32 *reg;
+	const u32 *reg;
 
 	if (via != 0)
 		return 1;
@@ -295,7 +295,7 @@ int __init find_via_pmu(void)
 	if (vias == NULL)
 		return 0;
 
-	reg = (u32 *)get_property(vias, "reg", NULL);
+	reg = get_property(vias, "reg", NULL);
 	if (reg == NULL) {
 		printk(KERN_ERR "via-pmu: No \"reg\" property !\n");
 		goto fail;
@@ -337,7 +337,7 @@ int __init find_via_pmu(void)
 		
 		gpiop = of_find_node_by_name(NULL, "gpio");
 		if (gpiop) {
-			reg = (u32 *)get_property(gpiop, "reg", NULL);
+			reg = get_property(gpiop, "reg", NULL);
 			if (reg)
 				gaddr = of_translate_address(gpiop, reg);
 			if (gaddr != OF_BAD_ADDR)
@@ -486,9 +486,9 @@ static int __init via_pmu_dev_init(void)
 		pmu_batteries[1].flags |= PMU_BATT_TYPE_SMART;
 	} else {
 		struct device_node* prim = find_devices("power-mgt");
-		u32 *prim_info = NULL;
+		const u32 *prim_info = NULL;
 		if (prim)
-			prim_info = (u32 *)get_property(prim, "prim-info", NULL);
+			prim_info = get_property(prim, "prim-info", NULL);
 		if (prim_info) {
 			/* Other stuffs here yet unknown */
 			pmu_battery_count = (prim_info[6] >> 16) & 0xff;
diff --git a/drivers/macintosh/windfarm_pm81.c b/drivers/macintosh/windfarm_pm81.c
index f1df6efcbe683..2ff546e4c92f7 100644
--- a/drivers/macintosh/windfarm_pm81.c
+++ b/drivers/macintosh/windfarm_pm81.c
@@ -396,7 +396,7 @@ static void wf_smu_sys_fans_tick(struct wf_smu_sys_fans_state *st)
 static void wf_smu_create_cpu_fans(void)
 {
 	struct wf_cpu_pid_param pid_param;
-	struct smu_sdbp_header *hdr;
+	const struct smu_sdbp_header *hdr;
 	struct smu_sdbp_cpupiddata *piddata;
 	struct smu_sdbp_fvt *fvt;
 	s32 tmax, tdelta, maxpow, powadj;
@@ -702,7 +702,7 @@ static struct notifier_block wf_smu_events = {
 
 static int wf_init_pm(void)
 {
-	struct smu_sdbp_header *hdr;
+	const struct smu_sdbp_header *hdr;
 
 	hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL);
 	if (hdr != 0) {
diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c
index 0d6372e96d32e..59e9ffe37c395 100644
--- a/drivers/macintosh/windfarm_pm91.c
+++ b/drivers/macintosh/windfarm_pm91.c
@@ -144,7 +144,7 @@ static struct wf_smu_slots_fans_state *wf_smu_slots_fans;
 static void wf_smu_create_cpu_fans(void)
 {
 	struct wf_cpu_pid_param pid_param;
-	struct smu_sdbp_header *hdr;
+	const struct smu_sdbp_header *hdr;
 	struct smu_sdbp_cpupiddata *piddata;
 	struct smu_sdbp_fvt *fvt;
 	s32 tmax, tdelta, maxpow, powadj;
diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c
index a9e88edc0c72f..bff1f372f1885 100644
--- a/drivers/macintosh/windfarm_smu_controls.c
+++ b/drivers/macintosh/windfarm_smu_controls.c
@@ -159,14 +159,15 @@ static struct smu_fan_control *smu_fan_create(struct device_node *node,
 					      int pwm_fan)
 {
 	struct smu_fan_control *fct;
-	s32 *v; u32 *reg;
-	char *l;
+	const s32 *v;
+	const u32 *reg;
+	const char *l;
 
 	fct = kmalloc(sizeof(struct smu_fan_control), GFP_KERNEL);
 	if (fct == NULL)
 		return NULL;
 	fct->ctrl.ops = &smu_fan_ops;
-	l = (char *)get_property(node, "location", NULL);
+	l = get_property(node, "location", NULL);
 	if (l == NULL)
 		goto fail;
 
@@ -223,17 +224,17 @@ static struct smu_fan_control *smu_fan_create(struct device_node *node,
 		goto fail;
 
 	/* Get min & max values*/
-	v = (s32 *)get_property(node, "min-value", NULL);
+	v = get_property(node, "min-value", NULL);
 	if (v == NULL)
 		goto fail;
 	fct->min = *v;
-	v = (s32 *)get_property(node, "max-value", NULL);
+	v = get_property(node, "max-value", NULL);
 	if (v == NULL)
 		goto fail;
 	fct->max = *v;
 
 	/* Get "reg" value */
-	reg = (u32 *)get_property(node, "reg", NULL);
+	reg = get_property(node, "reg", NULL);
 	if (reg == NULL)
 		goto fail;
 	fct->reg = *reg;
diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
index e295a07a1ebca..aceb61d9fbc8d 100644
--- a/drivers/macintosh/windfarm_smu_sat.c
+++ b/drivers/macintosh/windfarm_smu_sat.c
@@ -233,15 +233,15 @@ static void wf_sat_create(struct i2c_adapter *adapter, struct device_node *dev)
 {
 	struct wf_sat *sat;
 	struct wf_sat_sensor *sens;
-	u32 *reg;
-	char *loc, *type;
+	const u32 *reg;
+	const char *loc, *type;
 	u8 addr, chip, core;
 	struct device_node *child;
 	int shift, cpu, index;
 	char *name;
 	int vsens[2], isens[2];
 
-	reg = (u32 *) get_property(dev, "reg", NULL);
+	reg = get_property(dev, "reg", NULL);
 	if (reg == NULL)
 		return;
 	addr = *reg;
@@ -268,7 +268,7 @@ static void wf_sat_create(struct i2c_adapter *adapter, struct device_node *dev)
 	isens[0] = isens[1] = -1;
 	child = NULL;
 	while ((child = of_get_next_child(dev, child)) != NULL) {
-		reg = (u32 *) get_property(child, "reg", NULL);
+		reg = get_property(child, "reg", NULL);
 		type = get_property(child, "device_type", NULL);
 		loc = get_property(child, "location", NULL);
 		if (reg == NULL || loc == NULL)
diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c
index bed25dcf8a1ea..defe9922ebd1b 100644
--- a/drivers/macintosh/windfarm_smu_sensors.c
+++ b/drivers/macintosh/windfarm_smu_sensors.c
@@ -198,14 +198,14 @@ static struct wf_sensor_ops smu_slotspow_ops = {
 static struct smu_ad_sensor *smu_ads_create(struct device_node *node)
 {
 	struct smu_ad_sensor *ads;
-	char *c, *l;
-	u32 *v;
+	const char *c, *l;
+	const u32 *v;
 
 	ads = kmalloc(sizeof(struct smu_ad_sensor), GFP_KERNEL);
 	if (ads == NULL)
 		return NULL;
-	c = (char *)get_property(node, "device_type", NULL);
-	l = (char *)get_property(node, "location", NULL);
+	c = get_property(node, "device_type", NULL);
+	l = get_property(node, "location", NULL);
 	if (c == NULL || l == NULL)
 		goto fail;
 
@@ -255,7 +255,7 @@ static struct smu_ad_sensor *smu_ads_create(struct device_node *node)
 	} else
 		goto fail;
 
-	v = (u32 *)get_property(node, "reg", NULL);
+	v = get_property(node, "reg", NULL);
 	if (v == NULL)
 		goto fail;
 	ads->reg = *v;
@@ -382,7 +382,7 @@ smu_cpu_power_create(struct wf_sensor *volts, struct wf_sensor *amps)
 
 static void smu_fetch_param_partitions(void)
 {
-	struct smu_sdbp_header *hdr;
+	const struct smu_sdbp_header *hdr;
 
 	/* Get CPU voltage/current/power calibration data */
 	hdr = smu_get_sdb_partition(SMU_SDB_CPUVCP_ID, NULL);
diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c
index bfd2a22759ebb..a3b99caf80e64 100644
--- a/drivers/serial/pmac_zilog.c
+++ b/drivers/serial/pmac_zilog.c
@@ -1400,8 +1400,8 @@ static struct uart_ops pmz_pops = {
 static int __init pmz_init_port(struct uart_pmac_port *uap)
 {
 	struct device_node *np = uap->node;
-	char *conn;
-	struct slot_names_prop {
+	const char *conn;
+	const struct slot_names_prop {
 		int	count;
 		char	name[1];
 	} *slots;
@@ -1458,7 +1458,7 @@ static int __init pmz_init_port(struct uart_pmac_port *uap)
 		uap->flags |= PMACZILOG_FLAG_IS_IRDA;
 	uap->port_type = PMAC_SCC_ASYNC;
 	/* 1999 Powerbook G3 has slot-names property instead */
-	slots = (struct slot_names_prop *)get_property(np, "slot-names", &len);
+	slots = get_property(np, "slot-names", &len);
 	if (slots && slots->count > 0) {
 		if (strcmp(slots->name, "IrDA") == 0)
 			uap->flags |= PMACZILOG_FLAG_IS_IRDA;
@@ -1470,7 +1470,8 @@ static int __init pmz_init_port(struct uart_pmac_port *uap)
 	if (ZS_IS_INTMODEM(uap)) {
 		struct device_node* i2c_modem = find_devices("i2c-modem");
 		if (i2c_modem) {
-			char* mid = get_property(i2c_modem, "modem-id", NULL);
+			const char* mid =
+				get_property(i2c_modem, "modem-id", NULL);
 			if (mid) switch(*mid) {
 			case 0x04 :
 			case 0x05 :
diff --git a/include/asm-powerpc/smu.h b/include/asm-powerpc/smu.h
index 51e65fc46a038..e49f644ca63a1 100644
--- a/include/asm-powerpc/smu.h
+++ b/include/asm-powerpc/smu.h
@@ -517,7 +517,7 @@ struct smu_sdbp_cpupiddata {
  * This returns the pointer to an SMU "sdb" partition data or NULL
  * if not found. The data format is described below
  */
-extern struct smu_sdbp_header *smu_get_sdb_partition(int id,
+extern const struct smu_sdbp_header *smu_get_sdb_partition(int id,
 					unsigned int *size);
 
 /* Get "sdb" partition data from an SMU satellite */
-- 
GitLab


From b04e3dd4ab4c7763a4ca8f751caaf69ce8dabbba Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:40:40 +1000
Subject: [PATCH 0052/1063] [POWERPC] video & agp: Constify & voidify
 get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

powerpc-specific video & agp driver changes.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/char/agp/uninorth-agp.c    |  4 ++--
 drivers/video/S3triofb.c           | 12 ++++++------
 drivers/video/aty/radeon_base.c    |  8 ++++----
 drivers/video/aty/radeon_monitor.c | 12 ++++++------
 drivers/video/aty/radeon_pm.c      |  4 ++--
 drivers/video/nvidia/nv_of.c       | 12 +++++-------
 drivers/video/offb.c               | 22 +++++++++++-----------
 drivers/video/riva/fbdev.c         |  5 ++---
 8 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index 1de1b12043bf0..91b71e750ee15 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -601,8 +601,8 @@ static int __devinit agp_uninorth_probe(struct pci_dev *pdev,
 		uninorth_node = of_find_node_by_name(NULL, "u3");
 	}
 	if (uninorth_node) {
-		int *revprop = (int *)
-			get_property(uninorth_node, "device-rev", NULL);
+		const int *revprop = get_property(uninorth_node,
+				"device-rev", NULL);
 		if (revprop != NULL)
 			uninorth_rev = *revprop & 0x3f;
 		of_node_put(uninorth_node);
diff --git a/drivers/video/S3triofb.c b/drivers/video/S3triofb.c
index e714e8449c1d1..0f2ed75a681f8 100644
--- a/drivers/video/S3triofb.c
+++ b/drivers/video/S3triofb.c
@@ -350,30 +350,30 @@ static void __init s3triofb_of_init(struct device_node *dp)
     s3trio_name[sizeof(s3trio_name)-1] = '\0';
     strcpy(fb_fix.id, s3trio_name);
 
-    if((pp = (int *)get_property(dp, "vendor-id", &len)) != NULL
+    if((pp = get_property(dp, "vendor-id", &len)) != NULL
 	&& *pp!=PCI_VENDOR_ID_S3) {
 	printk("%s: can't find S3 Trio board\n", dp->full_name);
 	return;
     }
 
-    if((pp = (int *)get_property(dp, "device-id", &len)) != NULL
+    if((pp = get_property(dp, "device-id", &len)) != NULL
 	&& *pp!=PCI_DEVICE_ID_S3_TRIO) {
 	printk("%s: can't find S3 Trio board\n", dp->full_name);
 	return;
     }
 
-    if ((pp = (int *)get_property(dp, "depth", &len)) != NULL
+    if ((pp = get_property(dp, "depth", &len)) != NULL
 	&& len == sizeof(int) && *pp != 8) {
 	printk("%s: can't use depth = %d\n", dp->full_name, *pp);
 	return;
     }
-    if ((pp = (int *)get_property(dp, "width", &len)) != NULL
+    if ((pp = get_property(dp, "width", &len)) != NULL
 	&& len == sizeof(int))
 	fb_var.xres = fb_var.xres_virtual = *pp;
-    if ((pp = (int *)get_property(dp, "height", &len)) != NULL
+    if ((pp = get_property(dp, "height", &len)) != NULL
 	&& len == sizeof(int))
 	fb_var.yres = fb_var.yres_virtual = *pp;
-    if ((pp = (int *)get_property(dp, "linebytes", &len)) != NULL
+    if ((pp = get_property(dp, "linebytes", &len)) != NULL
 	&& len == sizeof(int))
 	fb_fix.line_length = *pp;
     else
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 51b78f8de949a..60c37add2579a 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -412,11 +412,11 @@ static int  __devinit radeon_find_mem_vbios(struct radeonfb_info *rinfo)
 static int __devinit radeon_read_xtal_OF (struct radeonfb_info *rinfo)
 {
 	struct device_node *dp = rinfo->of_node;
-	u32 *val;
+	const u32 *val;
 
 	if (dp == NULL)
 		return -ENODEV;
-	val = (u32 *) get_property(dp, "ATY,RefCLK", NULL);
+	val = get_property(dp, "ATY,RefCLK", NULL);
 	if (!val || !*val) {
 		printk(KERN_WARNING "radeonfb: No ATY,RefCLK property !\n");
 		return -EINVAL;
@@ -424,11 +424,11 @@ static int __devinit radeon_read_xtal_OF (struct radeonfb_info *rinfo)
 
 	rinfo->pll.ref_clk = (*val) / 10;
 
-	val = (u32 *) get_property(dp, "ATY,SCLK", NULL);
+	val = get_property(dp, "ATY,SCLK", NULL);
 	if (val && *val)
 		rinfo->pll.sclk = (*val) / 10;
 
-	val = (u32 *) get_property(dp, "ATY,MCLK", NULL);
+	val = get_property(dp, "ATY,MCLK", NULL);
 	if (val && *val)
 		rinfo->pll.mclk = (*val) / 10;
 
diff --git a/drivers/video/aty/radeon_monitor.c b/drivers/video/aty/radeon_monitor.c
index 98c05bc0de44a..ea531a6f45d1f 100644
--- a/drivers/video/aty/radeon_monitor.c
+++ b/drivers/video/aty/radeon_monitor.c
@@ -64,13 +64,13 @@ static int __devinit radeon_parse_montype_prop(struct device_node *dp, u8 **out_
 {
         static char *propnames[] = { "DFP,EDID", "LCD,EDID", "EDID",
 				     "EDID1", "EDID2",  NULL };
-	u8 *pedid = NULL;
-	u8 *pmt = NULL;
+	const u8 *pedid = NULL;
+	const u8 *pmt = NULL;
 	u8 *tmp;
         int i, mt = MT_NONE;  
 	
 	RTRACE("analyzing OF properties...\n");
-	pmt = (u8 *)get_property(dp, "display-type", NULL);
+	pmt = get_property(dp, "display-type", NULL);
 	if (!pmt)
 		return MT_NONE;
 	RTRACE("display-type: %s\n", pmt);
@@ -89,7 +89,7 @@ static int __devinit radeon_parse_montype_prop(struct device_node *dp, u8 **out_
 	}
 
 	for (i = 0; propnames[i] != NULL; ++i) {
-		pedid = (u8 *)get_property(dp, propnames[i], NULL);
+		pedid = get_property(dp, propnames[i], NULL);
 		if (pedid != NULL)
 			break;
 	}
@@ -124,14 +124,14 @@ static int __devinit radeon_probe_OF_head(struct radeonfb_info *rinfo, int head_
 		return MT_NONE;
 
 	if (rinfo->has_CRTC2) {
-		char *pname;
+		const char *pname;
 		int len, second = 0;
 
 		dp = dp->child;
 		do {
 			if (!dp)
 				return MT_NONE;
-			pname = (char *)get_property(dp, "name", NULL);
+			pname = get_property(dp, "name", NULL);
 			if (!pname)
 				return MT_NONE;
 			len = strlen(pname);
diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c
index c7091761cef41..b9b9396d3bde1 100644
--- a/drivers/video/aty/radeon_pm.c
+++ b/drivers/video/aty/radeon_pm.c
@@ -1167,7 +1167,7 @@ static void radeon_pm_full_reset_sdram(struct radeonfb_info *rinfo)
 			  0x21320032, 0xa1320032, 0x21320032, 0xffffffff,
 			  0x31320032 };
 
-		u32 *mrtable = default_mrtable;
+		const u32 *mrtable = default_mrtable;
 		int i, mrtable_size = ARRAY_SIZE(default_mrtable);
 
 		mdelay(30);
@@ -1186,7 +1186,7 @@ static void radeon_pm_full_reset_sdram(struct radeonfb_info *rinfo)
 		if (rinfo->of_node != NULL) {
 			int size;
 
-			mrtable = (u32 *)get_property(rinfo->of_node, "ATY,MRT", &size);
+			mrtable = get_property(rinfo->of_node, "ATY,MRT", &size);
 			if (mrtable)
 				mrtable_size = size >> 2;
 			else
diff --git a/drivers/video/nvidia/nv_of.c b/drivers/video/nvidia/nv_of.c
index 8209106e26eee..d9af88c2b580e 100644
--- a/drivers/video/nvidia/nv_of.c
+++ b/drivers/video/nvidia/nv_of.c
@@ -32,7 +32,7 @@ int nvidia_probe_of_connector(struct fb_info *info, int conn, u8 **out_edid)
 {
 	struct nvidia_par *par = info->par;
 	struct device_node *parent, *dp;
-	unsigned char *pedid = NULL;
+	const unsigned char *pedid = NULL;
 	static char *propnames[] = {
 		"DFP,EDID", "LCD,EDID", "EDID", "EDID1",
 		"EDID,B", "EDID,A", NULL };
@@ -42,20 +42,19 @@ int nvidia_probe_of_connector(struct fb_info *info, int conn, u8 **out_edid)
 	if (parent == NULL)
 		return -1;
 	if (par->twoHeads) {
-		char *pname;
+		const char *pname;
 		int len;
 
 		for (dp = NULL;
 		     (dp = of_get_next_child(parent, dp)) != NULL;) {
-			pname = (char *)get_property(dp, "name", NULL);
+			pname = get_property(dp, "name", NULL);
 			if (!pname)
 				continue;
 			len = strlen(pname);
 			if ((pname[len-1] == 'A' && conn == 1) ||
 			    (pname[len-1] == 'B' && conn == 2)) {
 				for (i = 0; propnames[i] != NULL; ++i) {
-					pedid = (unsigned char *)
-						get_property(dp, propnames[i],
+					pedid = get_property(dp, propnames[i],
 							     NULL);
 					if (pedid != NULL)
 						break;
@@ -67,8 +66,7 @@ int nvidia_probe_of_connector(struct fb_info *info, int conn, u8 **out_edid)
 	}
 	if (pedid == NULL) {
 		for (i = 0; propnames[i] != NULL; ++i) {
-			pedid = (unsigned char *)
-				get_property(parent, propnames[i], NULL);
+			pedid = get_property(parent, propnames[i], NULL);
 			if (pedid != NULL)
 				break;
 		}
diff --git a/drivers/video/offb.c b/drivers/video/offb.c
index faba672285260..0e750a8510fc5 100644
--- a/drivers/video/offb.c
+++ b/drivers/video/offb.c
@@ -410,30 +410,30 @@ static void __init offb_init_nodriver(struct device_node *dp, int no_real_node)
 	unsigned int flags, rsize, addr_prop = 0;
 	unsigned long max_size = 0;
 	u64 rstart, address = OF_BAD_ADDR;
-	u32 *pp, *addrp, *up;
+	const u32 *pp, *addrp, *up;
 	u64 asize;
 
-	pp = (u32 *)get_property(dp, "linux,bootx-depth", &len);
+	pp = get_property(dp, "linux,bootx-depth", &len);
 	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "depth", &len);
+		pp = get_property(dp, "depth", &len);
 	if (pp && len == sizeof(u32))
 		depth = *pp;
 
-	pp = (u32 *)get_property(dp, "linux,bootx-width", &len);
+	pp = get_property(dp, "linux,bootx-width", &len);
 	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "width", &len);
+		pp = get_property(dp, "width", &len);
 	if (pp && len == sizeof(u32))
 		width = *pp;
 
-	pp = (u32 *)get_property(dp, "linux,bootx-height", &len);
+	pp = get_property(dp, "linux,bootx-height", &len);
 	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "height", &len);
+		pp = get_property(dp, "height", &len);
 	if (pp && len == sizeof(u32))
 		height = *pp;
 
-	pp = (u32 *)get_property(dp, "linux,bootx-linebytes", &len);
+	pp = get_property(dp, "linux,bootx-linebytes", &len);
 	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "linebytes", &len);
+		pp = get_property(dp, "linebytes", &len);
 	if (pp && len == sizeof(u32))
 		pitch = *pp;
 	else
@@ -451,9 +451,9 @@ static void __init offb_init_nodriver(struct device_node *dp, int no_real_node)
 	 * ranges and pick one that is both big enough and if possible encloses
 	 * the "address" property. If none match, we pick the biggest
 	 */
-	up = (u32 *)get_property(dp, "linux,bootx-addr", &len);
+	up = get_property(dp, "linux,bootx-addr", &len);
 	if (up == NULL)
-		up = (u32 *)get_property(dp, "address", &len);
+		up = get_property(dp, "address", &len);
 	if (up && len == sizeof(u32))
 		addr_prop = *up;
 
diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index 2788655e6e7de..6a30c0ca1de0a 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -1816,14 +1816,13 @@ static int __devinit riva_get_EDID_OF(struct fb_info *info, struct pci_dev *pd)
 	NVTRACE_ENTER();
 	dp = pci_device_to_OF_node(pd);
 	for (; dp != NULL; dp = dp->child) {
-		disptype = (unsigned char *)get_property(dp, "display-type", NULL);
+		disptype = get_property(dp, "display-type", NULL);
 		if (disptype == NULL)
 			continue;
 		if (strncmp(disptype, "LCD", 3) != 0)
 			continue;
 		for (i = 0; propnames[i] != NULL; ++i) {
-			pedid = (unsigned char *)
-				get_property(dp, propnames[i], NULL);
+			pedid = get_property(dp, propnames[i], NULL);
 			if (pedid != NULL) {
 				par->EDID = pedid;
 				NVTRACE("LCD found.\n");
-- 
GitLab


From 294ef16a2ee34d0d94aa63616f7552d3bc66c982 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:40:51 +1000
Subject: [PATCH 0053/1063] [POWERPC] scsi: Constify & voidify get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

powerpc-specific scsi driver changes.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/scsi/ibmvscsi/rpa_vscsi.c | 11 ++++-------
 drivers/scsi/mac53c94.c           |  2 +-
 drivers/scsi/mesh.c               |  5 +++--
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c
index 242b8873b3336..cafef9cbbe2eb 100644
--- a/drivers/scsi/ibmvscsi/rpa_vscsi.c
+++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c
@@ -156,8 +156,8 @@ static void gather_partition_info(void)
 {
 	struct device_node *rootdn;
 
-	char *ppartition_name;
-	unsigned int *p_number_ptr;
+	const char *ppartition_name;
+	const unsigned int *p_number_ptr;
 
 	/* Retrieve information about this partition */
 	rootdn = find_path_device("/");
@@ -165,14 +165,11 @@ static void gather_partition_info(void)
 		return;
 	}
 
-	ppartition_name =
-		get_property(rootdn, "ibm,partition-name", NULL);
+	ppartition_name = get_property(rootdn, "ibm,partition-name", NULL);
 	if (ppartition_name)
 		strncpy(partition_name, ppartition_name,
 				sizeof(partition_name));
-	p_number_ptr =
-		(unsigned int *)get_property(rootdn, "ibm,partition-no",
-					     NULL);
+	p_number_ptr = get_property(rootdn, "ibm,partition-no", NULL);
 	if (p_number_ptr)
 		partition_number = *p_number_ptr;
 }
diff --git a/drivers/scsi/mac53c94.c b/drivers/scsi/mac53c94.c
index 93edaa8696cf3..c77f6f2581f72 100644
--- a/drivers/scsi/mac53c94.c
+++ b/drivers/scsi/mac53c94.c
@@ -431,7 +431,7 @@ static int mac53c94_probe(struct macio_dev *mdev, const struct of_device_id *mat
 	struct fsc_state *state;
 	struct Scsi_Host *host;
 	void *dma_cmd_space;
-	unsigned char *clkprop;
+	const unsigned char *clkprop;
 	int proplen, rc = -ENODEV;
 
 	if (macio_resource_count(mdev) != 2 || macio_irq_count(mdev) != 2) {
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index c88717727be8e..cee9758b9278a 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -1850,7 +1850,8 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match)
 {
 	struct device_node *mesh = macio_get_of_node(mdev);
 	struct pci_dev* pdev = macio_get_pci_dev(mdev);
-	int tgt, *cfp, minper;
+	int tgt, minper;
+	const int *cfp;
 	struct mesh_state *ms;
 	struct Scsi_Host *mesh_host;
 	void *dma_cmd_space;
@@ -1939,7 +1940,7 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match)
 	       	ms->tgts[tgt].current_req = NULL;
        	}
 
-	if ((cfp = (int *) get_property(mesh, "clock-frequency", NULL)))
+	if ((cfp = get_property(mesh, "clock-frequency", NULL)))
        		ms->clk_freq = *cfp;
 	else {
        		printk(KERN_INFO "mesh: assuming 50MHz clock frequency\n");
-- 
GitLab


From 1a2509c946bfd4d4a4c5a6e816082d3a7de45db8 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:41:03 +1000
Subject: [PATCH 0054/1063] [POWERPC] netdevices: Constify & voidify
 get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

powerpc-specific network device driver changes.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/net/bmac.c       | 13 ++++++++-----
 drivers/net/mace.c       |  2 +-
 drivers/net/spider_net.c | 12 ++++++------
 drivers/net/sungem.c     |  2 +-
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c
index 6fad83f24c4f5..711609665632a 100644
--- a/drivers/net/bmac.c
+++ b/drivers/net/bmac.c
@@ -1264,7 +1264,8 @@ static int __devinit bmac_probe(struct macio_dev *mdev, const struct of_device_i
 {
 	int j, rev, ret;
 	struct bmac_data *bp;
-	unsigned char *addr;
+	const unsigned char *prop_addr;
+	unsigned char addr[6];
 	struct net_device *dev;
 	int is_bmac_plus = ((int)match->data) != 0;
 
@@ -1272,14 +1273,16 @@ static int __devinit bmac_probe(struct macio_dev *mdev, const struct of_device_i
 		printk(KERN_ERR "BMAC: can't use, need 3 addrs and 3 intrs\n");
 		return -ENODEV;
 	}
-	addr = get_property(macio_get_of_node(mdev), "mac-address", NULL);
-	if (addr == NULL) {
-		addr = get_property(macio_get_of_node(mdev), "local-mac-address", NULL);
-		if (addr == NULL) {
+	prop_addr = get_property(macio_get_of_node(mdev), "mac-address", NULL);
+	if (prop_addr == NULL) {
+		prop_addr = get_property(macio_get_of_node(mdev),
+				"local-mac-address", NULL);
+		if (prop_addr == NULL) {
 			printk(KERN_ERR "BMAC: Can't get mac-address\n");
 			return -ENODEV;
 		}
 	}
+	memcpy(addr, prop_addr, sizeof(addr));
 
 	dev = alloc_etherdev(PRIV_BYTES);
 	if (!dev) {
diff --git a/drivers/net/mace.c b/drivers/net/mace.c
index 29e4b5aa6eadb..5d80e0e6a8e90 100644
--- a/drivers/net/mace.c
+++ b/drivers/net/mace.c
@@ -113,7 +113,7 @@ static int __devinit mace_probe(struct macio_dev *mdev, const struct of_device_i
 	struct device_node *mace = macio_get_of_node(mdev);
 	struct net_device *dev;
 	struct mace_data *mp;
-	unsigned char *addr;
+	const unsigned char *addr;
 	int j, rev, rc = -EBUSY;
 
 	if (macio_resource_count(mdev) != 3 || macio_irq_count(mdev) != 3) {
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index fb1d5a8a45cf9..b30290d53f79f 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -1812,10 +1812,10 @@ spider_net_setup_phy(struct spider_net_card *card)
  */
 static int
 spider_net_download_firmware(struct spider_net_card *card,
-			     u8 *firmware_ptr)
+			     const void *firmware_ptr)
 {
 	int sequencer, i;
-	u32 *fw_ptr = (u32 *)firmware_ptr;
+	const u32 *fw_ptr = firmware_ptr;
 
 	/* stop sequencers */
 	spider_net_write_reg(card, SPIDER_NET_GSINIT,
@@ -1872,7 +1872,7 @@ spider_net_init_firmware(struct spider_net_card *card)
 {
 	struct firmware *firmware = NULL;
 	struct device_node *dn;
-	u8 *fw_prop = NULL;
+	const u8 *fw_prop = NULL;
 	int err = -ENOENT;
 	int fw_size;
 
@@ -1898,7 +1898,7 @@ spider_net_init_firmware(struct spider_net_card *card)
 	if (!dn)
 		goto out_err;
 
-	fw_prop = (u8 *)get_property(dn, "firmware", &fw_size);
+	fw_prop = get_property(dn, "firmware", &fw_size);
 	if (!fw_prop)
 		goto out_err;
 
@@ -2058,7 +2058,7 @@ spider_net_setup_netdev(struct spider_net_card *card)
 	struct net_device *netdev = card->netdev;
 	struct device_node *dn;
 	struct sockaddr addr;
-	u8 *mac;
+	const u8 *mac;
 
 	SET_MODULE_OWNER(netdev);
 	SET_NETDEV_DEV(netdev, &card->pdev->dev);
@@ -2089,7 +2089,7 @@ spider_net_setup_netdev(struct spider_net_card *card)
 	if (!dn)
 		return -EIO;
 
-	mac = (u8 *)get_property(dn, "local-mac-address", NULL);
+	mac = get_property(dn, "local-mac-address", NULL);
 	if (!mac)
 		return -EIO;
 	memcpy(addr.sa_data, mac, ETH_ALEN);
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index b70bbd7489789..d7b1d1882cab5 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -2896,7 +2896,7 @@ static int __devinit gem_get_device_address(struct gem *gp)
 	if (use_idprom)
 		memcpy(dev->dev_addr, idprom->id_ethaddr, 6);
 #elif defined(CONFIG_PPC_PMAC)
-	unsigned char *addr;
+	const unsigned char *addr;
 
 	addr = get_property(gp->of_node, "local-mac-address", NULL);
 	if (addr == NULL) {
-- 
GitLab


From abddd185a0195988b8a5e802d55aff91783489de Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:41:18 +1000
Subject: [PATCH 0055/1063] [POWERPC] sound: Constify & voidify get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

powerpc-specific sound driver changes.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 sound/aoa/core/snd-aoa-gpio-feature.c |  2 +-
 sound/oss/dmasound/dmasound_awacs.c   | 11 ++++++-----
 sound/ppc/tumbler.c                   | 15 ++++++++-------
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/sound/aoa/core/snd-aoa-gpio-feature.c b/sound/aoa/core/snd-aoa-gpio-feature.c
index 7ae0c0bdfad8f..2ab55330b31cd 100644
--- a/sound/aoa/core/snd-aoa-gpio-feature.c
+++ b/sound/aoa/core/snd-aoa-gpio-feature.c
@@ -56,7 +56,7 @@ static struct device_node *get_gpio(char *name,
 {
 	struct device_node *np, *gpio;
 	u32 *reg;
-	char *audio_gpio;
+	const char *audio_gpio;
 
 	*gpioptr = -1;
 
diff --git a/sound/oss/dmasound/dmasound_awacs.c b/sound/oss/dmasound/dmasound_awacs.c
index 4359903f43762..9ae659f824303 100644
--- a/sound/oss/dmasound/dmasound_awacs.c
+++ b/sound/oss/dmasound/dmasound_awacs.c
@@ -347,8 +347,8 @@ int
 setup_audio_gpio(const char *name, const char* compatible, int *gpio_addr, int* gpio_pol)
 {
 	struct device_node *np;
-	u32* pp;
-	
+	const u32* pp;
+
 	np = find_devices("gpio");
 	if (!np)
 		return -ENODEV;
@@ -356,7 +356,8 @@ setup_audio_gpio(const char *name, const char* compatible, int *gpio_addr, int*
 	np = np->child;
 	while(np != 0) {
 		if (name) {
-			char *property = get_property(np,"audio-gpio",NULL);
+			const char *property =
+				get_property(np,"audio-gpio",NULL);
 			if (property != 0 && strcmp(property,name) == 0)
 				break;
 		} else if (compatible && device_is_compatible(np, compatible))
@@ -365,11 +366,11 @@ setup_audio_gpio(const char *name, const char* compatible, int *gpio_addr, int*
 	}
 	if (!np)
 		return -ENODEV;
-	pp = (u32 *)get_property(np, "AAPL,address", NULL);
+	pp = get_property(np, "AAPL,address", NULL);
 	if (!pp)
 		return -ENODEV;
 	*gpio_addr = (*pp) & 0x0000ffff;
-	pp = (u32 *)get_property(np, "audio-gpio-active-state", NULL);
+	pp = get_property(np, "audio-gpio-active-state", NULL);
 	if (pp)
 		*gpio_pol = *pp;
 	else
diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c
index 692c611776785..b94ecd0ebab2c 100644
--- a/sound/ppc/tumbler.c
+++ b/sound/ppc/tumbler.c
@@ -1035,7 +1035,7 @@ static struct device_node *find_audio_device(const char *name)
 		return NULL;
   
 	for (np = np->child; np; np = np->sibling) {
-		char *property = get_property(np, "audio-gpio", NULL);
+		const char *property = get_property(np, "audio-gpio", NULL);
 		if (property && strcmp(property, name) == 0)
 			return np;
 	}  
@@ -1062,7 +1062,8 @@ static long tumbler_find_device(const char *device, const char *platform,
 				struct pmac_gpio *gp, int is_compatible)
 {
 	struct device_node *node;
-	u32 *base, addr;
+	const u32 *base;
+	u32 addr;
 
 	if (is_compatible)
 		node = find_compatible_audio_device(device);
@@ -1074,9 +1075,9 @@ static long tumbler_find_device(const char *device, const char *platform,
 		return -ENODEV;
 	}
 
-	base = (u32 *)get_property(node, "AAPL,address", NULL);
+	base = get_property(node, "AAPL,address", NULL);
 	if (! base) {
-		base = (u32 *)get_property(node, "reg", NULL);
+		base = get_property(node, "reg", NULL);
 		if (!base) {
 			DBG("(E) cannot find address for device %s !\n", device);
 			snd_printd("cannot find address for device %s\n", device);
@@ -1090,13 +1091,13 @@ static long tumbler_find_device(const char *device, const char *platform,
 
 	gp->addr = addr & 0x0000ffff;
 	/* Try to find the active state, default to 0 ! */
-	base = (u32 *)get_property(node, "audio-gpio-active-state", NULL);
+	base = get_property(node, "audio-gpio-active-state", NULL);
 	if (base) {
 		gp->active_state = *base;
 		gp->active_val = (*base) ? 0x5 : 0x4;
 		gp->inactive_val = (*base) ? 0x4 : 0x5;
 	} else {
-		u32 *prop = NULL;
+		const u32 *prop = NULL;
 		gp->active_state = 0;
 		gp->active_val = 0x4;
 		gp->inactive_val = 0x5;
@@ -1105,7 +1106,7 @@ static long tumbler_find_device(const char *device, const char *platform,
 		 * as we don't yet have an interpreter for these things
 		 */
 		if (platform)
-			prop = (u32 *)get_property(node, platform, NULL);
+			prop = get_property(node, platform, NULL);
 		if (prop) {
 			if (prop[3] == 0x9 && prop[4] == 0x9) {
 				gp->active_val = 0xd;
-- 
GitLab


From 5c339e96a391476ebb7cc63d913445c8cee092ff Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:41:30 +1000
Subject: [PATCH 0056/1063] [POWERPC] tmp_atmel: Constify & voidify
 get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

tpm_atmel changes

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/char/tpm/tpm_atmel.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/tpm/tpm_atmel.h b/drivers/char/tpm/tpm_atmel.h
index 2e68eeb8a2cdf..aefd683c60b7e 100644
--- a/drivers/char/tpm/tpm_atmel.h
+++ b/drivers/char/tpm/tpm_atmel.h
@@ -37,7 +37,7 @@ static void __iomem * atmel_get_base_addr(unsigned long *base, int *region_size)
 {
 	struct device_node *dn;
 	unsigned long address, size;
-	unsigned int *reg;
+	const unsigned int *reg;
 	int reglen;
 	int naddrc;
 	int nsizec;
@@ -52,7 +52,7 @@ static void __iomem * atmel_get_base_addr(unsigned long *base, int *region_size)
 		return NULL;
 	}
 
-	reg = (unsigned int *) get_property(dn, "reg", &reglen);
+	reg = get_property(dn, "reg", &reglen);
 	naddrc = prom_n_addr_cells(dn);
 	nsizec = prom_n_size_cells(dn);
 
-- 
GitLab


From af5f92d881d783b47d1f993ddffa2bce8b2993fe Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:41:41 +1000
Subject: [PATCH 0057/1063] [POWERPC] sata_svw: Constify & voidify
 get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

sata_svw changes

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/scsi/sata_svw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sata_svw.c b/drivers/scsi/sata_svw.c
index 7d0858095e1fe..6b70c3c76dfdb 100644
--- a/drivers/scsi/sata_svw.c
+++ b/drivers/scsi/sata_svw.c
@@ -268,7 +268,7 @@ static int k2_sata_proc_info(struct Scsi_Host *shost, char *page, char **start,
 	/* Match it to a port node */
 	index = (ap == ap->host_set->ports[0]) ? 0 : 1;
 	for (np = np->child; np != NULL; np = np->sibling) {
-		u32 *reg = (u32 *)get_property(np, "reg", NULL);
+		const u32 *reg = get_property(np, "reg", NULL);
 		if (!reg)
 			continue;
 		if (index == *reg)
-- 
GitLab


From 88c805940bb9a1478f06ed6dd5d6f660bdc38eaa Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:41:52 +1000
Subject: [PATCH 0058/1063] [POWERPC] tsi108: Constify & voidify get_property()

Now that get_property() returns a void *, there's no need to cast its
return value. Also, treat the return value as const, so we can
constify get_property later.

tsi108 driver changes.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/sysdev/tsi108_pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 3265d54c82ed8..f6c492f8ab955 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -195,7 +195,7 @@ int __init tsi108_setup_pci(struct device_node *dev)
 	int len;
 	struct pci_controller *hose;
 	struct resource rsrc;
-	int *bus_range;
+	const int *bus_range;
 	int primary = 0, has_address = 0;
 
 	/* PCI Config mapping */
@@ -208,7 +208,7 @@ int __init tsi108_setup_pci(struct device_node *dev)
 	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
 
 	/* Get bus range if any */
-	bus_range = (int *)get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, assume"
 		       " bus 0\n", dev->full_name);
-- 
GitLab


From 931b261f442e779b0656d9b04c7ffe4939ef8c0a Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 12 Jul 2006 15:42:06 +1000
Subject: [PATCH 0059/1063] [POWERPC] Make get_property() return a const void *

Previous changes have treated the return values of get_property as
const, so now we can make the actual change to get_property(). There
shouldn't be a need to cast the return values anymore.

We will now get compiler warnings when property values are assigned to
a non-const variable.

If properties need to be updated, there's still the of_find_property
function.

Built for cell_defconfig, chrp32_defconfig, g5_defconfig,
iseries_defconfig, maple_defconfig, pmac32_defconfig, ppc64_defconfig
and pseries_defconfig.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/prom.c | 2 +-
 include/asm-powerpc/prom.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 2a3d84a39cb5a..bf2005b2feb61 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -1527,7 +1527,7 @@ struct property *of_find_property(struct device_node *np, const char *name,
  * Find a property with a given name for a given node
  * and return the value.
  */
-void *get_property(struct device_node *np, const char *name, int *lenp)
+const void *get_property(struct device_node *np, const char *name, int *lenp)
 {
 	struct property *pp = of_find_property(np,name,lenp);
 	return pp ? pp->value : NULL;
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index abdf1be66e970..31bfea4686a6c 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -160,7 +160,7 @@ extern void unflatten_device_tree(void);
 extern void early_init_devtree(void *);
 extern int device_is_compatible(struct device_node *device, const char *);
 extern int machine_is_compatible(const char *compat);
-extern void *get_property(struct device_node *node, const char *name,
+extern const void *get_property(struct device_node *node, const char *name,
 		int *lenp);
 extern void print_properties(struct device_node *node);
 extern int prom_n_addr_cells(struct device_node* np);
-- 
GitLab


From 5d33eebee83784f5f03bc3861fa92ee5cd831922 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Thu, 13 Jul 2006 16:32:52 +1000
Subject: [PATCH 0060/1063] [POWERPC] Simplify dma_ops bug conditions

Use BUG_ON rather than BUG to simplify the dma_ops handing,
and remove the now-unnecessary return cases.

Booted on pseries.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/dma_64.c | 65 ++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
index 36aaa7663f028..6c168f6ea1428 100644
--- a/arch/powerpc/kernel/dma_64.c
+++ b/arch/powerpc/kernel/dma_64.c
@@ -35,10 +35,9 @@ int dma_supported(struct device *dev, u64 mask)
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		return dma_ops->dma_supported(dev, mask);
-	BUG();
-	return 0;
+	BUG_ON(!dma_ops);
+
+	return dma_ops->dma_supported(dev, mask);
 }
 EXPORT_SYMBOL(dma_supported);
 
@@ -66,10 +65,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
-	BUG();
-	return NULL;
+	BUG_ON(!dma_ops);
+
+	return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
 }
 EXPORT_SYMBOL(dma_alloc_coherent);
 
@@ -78,10 +76,9 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
-	else
-		BUG();
+	BUG_ON(!dma_ops);
+
+	dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
 }
 EXPORT_SYMBOL(dma_free_coherent);
 
@@ -90,10 +87,9 @@ dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size,
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		return dma_ops->map_single(dev, cpu_addr, size, direction);
-	BUG();
-	return (dma_addr_t)0;
+	BUG_ON(!dma_ops);
+
+	return dma_ops->map_single(dev, cpu_addr, size, direction);
 }
 EXPORT_SYMBOL(dma_map_single);
 
@@ -102,10 +98,9 @@ void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		dma_ops->unmap_single(dev, dma_addr, size, direction);
-	else
-		BUG();
+	BUG_ON(!dma_ops);
+
+	dma_ops->unmap_single(dev, dma_addr, size, direction);
 }
 EXPORT_SYMBOL(dma_unmap_single);
 
@@ -115,11 +110,10 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page,
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		return dma_ops->map_single(dev,
-				(page_address(page) + offset), size, direction);
-	BUG();
-	return (dma_addr_t)0;
+	BUG_ON(!dma_ops);
+
+	return dma_ops->map_single(dev, page_address(page) + offset, size,
+			direction);
 }
 EXPORT_SYMBOL(dma_map_page);
 
@@ -128,10 +122,9 @@ void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		dma_ops->unmap_single(dev, dma_address, size, direction);
-	else
-		BUG();
+	BUG_ON(!dma_ops);
+
+	dma_ops->unmap_single(dev, dma_address, size, direction);
 }
 EXPORT_SYMBOL(dma_unmap_page);
 
@@ -140,10 +133,9 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		return dma_ops->map_sg(dev, sg, nents, direction);
-	BUG();
-	return 0;
+	BUG_ON(!dma_ops);
+
+	return dma_ops->map_sg(dev, sg, nents, direction);
 }
 EXPORT_SYMBOL(dma_map_sg);
 
@@ -152,9 +144,8 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		dma_ops->unmap_sg(dev, sg, nhwentries, direction);
-	else
-		BUG();
+	BUG_ON(!dma_ops);
+
+	dma_ops->unmap_sg(dev, sg, nhwentries, direction);
 }
 EXPORT_SYMBOL(dma_unmap_sg);
-- 
GitLab


From cb18bd40030c879cd93fef02fd579f74dbab473d Mon Sep 17 00:00:00 2001
From: Mike Kravetz <kravetz@us.ibm.com>
Date: Thu, 20 Jul 2006 23:39:51 -0700
Subject: [PATCH 0061/1063] [POWERPC] Instrument Hypervisor Calls: merge
 headers

Move all the Hypervisor call definitions to to a single header file.

Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/net/ibmveth.h        | 10 ----------
 include/asm-powerpc/hvcall.h |  8 ++++++++
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h
index 8385bf8365071..149191cef2f0a 100644
--- a/drivers/net/ibmveth.h
+++ b/drivers/net/ibmveth.h
@@ -41,16 +41,6 @@
 #define IbmVethMcastRemoveFilter     0x2UL
 #define IbmVethMcastClearFilterTable 0x3UL
 
-/* hcall numbers */
-#define H_VIO_SIGNAL             0x104
-#define H_REGISTER_LOGICAL_LAN   0x114
-#define H_FREE_LOGICAL_LAN       0x118
-#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
-#define H_SEND_LOGICAL_LAN       0x120
-#define H_MULTICAST_CTRL         0x130
-#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
-#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
-
 /* hcall macros */
 #define h_register_logical_lan(ua, buflst, rxq, fltlst, mac) \
   plpar_hcall_norets(H_REGISTER_LOGICAL_LAN, ua, buflst, rxq, fltlst, mac)
diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h
index 0d3c4e85711a6..f07ae50cbc2ce 100644
--- a/include/asm-powerpc/hvcall.h
+++ b/include/asm-powerpc/hvcall.h
@@ -164,9 +164,15 @@
 #define H_VIO_SIGNAL		0x104
 #define H_SEND_CRQ		0x108
 #define H_COPY_RDMA		0x110
+#define H_REGISTER_LOGICAL_LAN	0x114
+#define H_FREE_LOGICAL_LAN	0x118
+#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
+#define H_SEND_LOGICAL_LAN	0x120
+#define H_MULTICAST_CTRL	0x130
 #define H_SET_XDABR		0x134
 #define H_STUFF_TCE		0x138
 #define H_PUT_TCE_INDIRECT	0x13C
+#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
 #define H_VTERM_PARTNER_INFO	0x150
 #define H_REGISTER_VTERM	0x154
 #define H_FREE_VTERM		0x158
@@ -196,11 +202,13 @@
 #define H_GET_HCA_INFO          0x1B8
 #define H_GET_PERF_COUNT        0x1BC
 #define H_MANAGE_TRACE          0x1C0
+#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
 #define H_QUERY_INT_STATE       0x1E4
 #define H_POLL_PENDING		0x1D8
 #define H_JOIN			0x298
 #define H_VASI_STATE            0x2A4
 #define H_ENABLE_CRQ		0x2B0
+#define MAX_HCALL_OPCODES	(H_ENABLE_CRQ >> 2)
 
 #ifndef __ASSEMBLY__
 
-- 
GitLab


From b9377ffc3a03cde558d76349a262a1adbb6d3112 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 19 Jul 2006 08:01:28 +1000
Subject: [PATCH 0062/1063] [POWERPC] clean up pseries hcall interfaces

Our pseries hcall interfaces are out of control:

	plpar_hcall_norets
	plpar_hcall
	plpar_hcall_8arg_2ret
	plpar_hcall_4out
	plpar_hcall_7arg_7ret
	plpar_hcall_9arg_9ret

Create 3 interfaces to cover all cases:

	plpar_hcall_norets:	7 arguments no returns
	plpar_hcall:		6 arguments 4 returns
	plpar_hcall9:		9 arguments 9 returns

There are only 2 cases in the kernel that need plpar_hcall9, hopefully
we can keep it that way.

Pass in a buffer to stash return parameters so we avoid the &dummy1,
&dummy2 madness.

Signed-off-by: Anton Blanchard <anton@samba.org>
--
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/lparcfg.c                 |  18 +-
 arch/powerpc/kernel/rtas.c                    |  11 +-
 arch/powerpc/platforms/pseries/hvCall.S       | 207 +++---------------
 arch/powerpc/platforms/pseries/hvconsole.c    |   5 +-
 arch/powerpc/platforms/pseries/lpar.c         |  12 +-
 .../platforms/pseries/plpar_wrappers.h        |  97 ++++++--
 arch/powerpc/platforms/pseries/xics.c         |  22 +-
 drivers/net/ibmveth.c                         |   3 +-
 drivers/net/ibmveth.h                         |  17 +-
 include/asm-powerpc/hvcall.h                  | 105 +++------
 10 files changed, 184 insertions(+), 313 deletions(-)

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 3ce3a2d56fa88..41c05dcd68f40 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -182,8 +182,14 @@ static unsigned int h_get_ppp(unsigned long *entitled,
 			      unsigned long *resource)
 {
 	unsigned long rc;
-	rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated,
-			      aggregation, resource);
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_GET_PPP, retbuf);
+
+	*entitled = retbuf[0];
+	*unallocated = retbuf[1];
+	*aggregation = retbuf[2];
+	*resource = retbuf[3];
 
 	log_plpar_hcall_return(rc, "H_GET_PPP");
 
@@ -193,8 +199,12 @@ static unsigned int h_get_ppp(unsigned long *entitled,
 static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
 {
 	unsigned long rc;
-	unsigned long dummy;
-	rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_PIC, retbuf);
+
+	*pool_idle_time = retbuf[0];
+	*num_procs = retbuf[1];
 
 	if (rc != H_AUTHORITY)
 		log_plpar_hcall_return(rc, "H_PIC");
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 10e10be324c9a..14353b8789dde 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -668,15 +668,14 @@ static int rtas_ibm_suspend_me(struct rtas_args *args)
 	int i;
 	long state;
 	long rc;
-	unsigned long dummy;
-
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
 	struct rtas_suspend_me_data data;
 
 	/* Make sure the state is valid */
-	rc = plpar_hcall(H_VASI_STATE,
-			 ((u64)args->args[0] << 32) | args->args[1],
-			 0, 0, 0,
-			 &state, &dummy, &dummy);
+	rc = plpar_hcall(H_VASI_STATE, retbuf,
+			 ((u64)args->args[0] << 32) | args->args[1]);
+
+	state = retbuf[0];
 
 	if (rc) {
 		printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc);
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index c9ff547f9d251..9a99b056bd272 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -1,7 +1,6 @@
 /*
  * This file contains the generic code to perform a call to the
  * pSeries LPAR hypervisor.
- * NOTE: this file will go away when we move to inline this work.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -16,42 +15,6 @@
 
 	.text
 
-/* long plpar_hcall(unsigned long opcode,		R3
-			unsigned long arg1,		R4
-			unsigned long arg2,		R5
-			unsigned long arg3,		R6
-			unsigned long arg4,		R7
-			unsigned long *out1,		R8
-			unsigned long *out2,		R9
-			unsigned long *out3);		R10
- */
-_GLOBAL(plpar_hcall)
-	HMT_MEDIUM
-
-	mfcr	r0
-
-	std	r8,STK_PARM(r8)(r1)	/* Save out ptrs */
-	std	r9,STK_PARM(r9)(r1)
-	std	r10,STK_PARM(r10)(r1)
-
-	stw	r0,8(r1)
-
-	HVSC				/* invoke the hypervisor */
-
-	lwz	r0,8(r1)
-
-	ld	r8,STK_PARM(r8)(r1)	/* Fetch r4-r6 ret args */
-	ld	r9,STK_PARM(r9)(r1)
-	ld	r10,STK_PARM(r10)(r1)
-	std	r4,0(r8)
-	std	r5,0(r9)
-	std	r6,0(r10)
-
-	mtcrf	0xff,r0
-	blr				/* return r3 = status */
-
-
-/* Simple interface with no output values (other than status) */
 _GLOBAL(plpar_hcall_norets)
 	HMT_MEDIUM
 
@@ -64,164 +27,64 @@ _GLOBAL(plpar_hcall_norets)
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
 
-
-/* long plpar_hcall_8arg_2ret(unsigned long opcode,	R3
-			unsigned long arg1,		R4
-			unsigned long arg2,		R5
-			unsigned long arg3,		R6
-			unsigned long arg4,		R7
-			unsigned long arg5,		R8
-			unsigned long arg6,		R9
-			unsigned long arg7,		R10
-			unsigned long arg8,		112(R1)
-			unsigned long *out1);		120(R1)
- */
-_GLOBAL(plpar_hcall_8arg_2ret)
+_GLOBAL(plpar_hcall)
 	HMT_MEDIUM
 
 	mfcr	r0
-	ld	r11,STK_PARM(r11)(r1)	/* put arg8 in R11 */
 	stw	r0,8(r1)
 
-	HVSC				/* invoke the hypervisor */
-
-	lwz	r0,8(r1)
-	ld	r10,STK_PARM(r12)(r1)	/* Fetch r4 ret arg */
-	std	r4,0(r10)
-	mtcrf	0xff,r0
-	blr				/* return r3 = status */
-
-
-/* long plpar_hcall_4out(unsigned long opcode,		R3
-		 	unsigned long arg1,		R4
-		 	unsigned long arg2,		R5
-		 	unsigned long arg3,		R6
-		 	unsigned long arg4,		R7
-		 	unsigned long *out1,		R8
-		 	unsigned long *out2,		R9
-		 	unsigned long *out3,		R10
-		 	unsigned long *out4);		112(R1)
- */
-_GLOBAL(plpar_hcall_4out)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
+	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
-	std	r8,STK_PARM(r8)(r1)	/* Save out ptrs */
-	std	r9,STK_PARM(r9)(r1)
-	std	r10,STK_PARM(r10)(r1)
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
 
 	HVSC				/* invoke the hypervisor */
 
-	lwz	r0,8(r1)
-
-	ld	r8,STK_PARM(r8)(r1)	/* Fetch r4-r7 ret args */
-	ld	r9,STK_PARM(r9)(r1)
-	ld	r10,STK_PARM(r10)(r1)
-	ld	r11,STK_PARM(r11)(r1)
-	std	r4,0(r8)
-	std	r5,0(r9)
-	std	r6,0(r10)
-	std	r7,0(r11)
-
-	mtcrf	0xff,r0
-	blr				/* return r3 = status */
-
-/* plpar_hcall_7arg_7ret(unsigned long opcode,		R3
-			 unsigned long arg1,		R4
-			 unsigned long arg2,		R5
-			 unsigned long arg3,		R6
-			 unsigned long arg4,		R7
-			 unsigned long arg5,		R8
-			 unsigned long arg6,		R9
-			 unsigned long arg7,		R10
-			 unsigned long *out1,		112(R1)
-			 unsigned long *out2,		110(R1)
-			 unsigned long *out3,		108(R1)
-			 unsigned long *out4,		106(R1)
-			 unsigned long *out5,		104(R1)
-			 unsigned long *out6,		102(R1)
-			 unsigned long *out7);		100(R1)
-*/
-_GLOBAL(plpar_hcall_7arg_7ret)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HVSC				/* invoke the hypervisor */
+	ld	r12,STK_PARM(r4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
 
 	lwz	r0,8(r1)
-
-	ld	r11,STK_PARM(r11)(r1)	/* Fetch r4 ret arg */
-	std	r4,0(r11)
-	ld	r11,STK_PARM(r12)(r1)	/* Fetch r5 ret arg */
-	std	r5,0(r11)
-	ld	r11,STK_PARM(r13)(r1)	/* Fetch r6 ret arg */
-	std	r6,0(r11)
-	ld	r11,STK_PARM(r14)(r1)	/* Fetch r7 ret arg */
-	std	r7,0(r11)
-	ld	r11,STK_PARM(r15)(r1)	/* Fetch r8 ret arg */
-	std	r8,0(r11)
-	ld	r11,STK_PARM(r16)(r1)	/* Fetch r9 ret arg */
-	std	r9,0(r11)
-	ld	r11,STK_PARM(r17)(r1)	/* Fetch r10 ret arg */
-	std	r10,0(r11)
-
 	mtcrf	0xff,r0
 
 	blr				/* return r3 = status */
 
-/* plpar_hcall_9arg_9ret(unsigned long opcode,		R3
-			 unsigned long arg1,		R4
-			 unsigned long arg2,		R5
-			 unsigned long arg3,		R6
-			 unsigned long arg4,		R7
-			 unsigned long arg5,		R8
-			 unsigned long arg6,		R9
-			 unsigned long arg7,		R10
-			 unsigned long arg8,		112(R1)
-			 unsigned long arg9,		110(R1)
-			 unsigned long *out1,		108(R1)
-			 unsigned long *out2,		106(R1)
-			 unsigned long *out3,		104(R1)
-			 unsigned long *out4,		102(R1)
-			 unsigned long *out5,		100(R1)
-			 unsigned long *out6,		 98(R1)
-			 unsigned long *out7);		 96(R1)
-			 unsigned long *out8,		 94(R1)
-		         unsigned long *out9,            92(R1)
-*/
-_GLOBAL(plpar_hcall_9arg_9ret)
+_GLOBAL(plpar_hcall9)
 	HMT_MEDIUM
 
 	mfcr	r0
 	stw	r0,8(r1)
 
-	ld	r11,STK_PARM(r11)(r1)	 /* put arg8 in R11 */
-	ld	r12,STK_PARM(r12)(r1)    /* put arg9 in R12 */
+	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+	ld	r10,STK_PARM(r11)(r1)	 /* put arg7 in R10 */
+	ld	r11,STK_PARM(r12)(r1)	 /* put arg8 in R11 */
+	ld	r12,STK_PARM(r13)(r1)    /* put arg9 in R12 */
 
 	HVSC				/* invoke the hypervisor */
 
-	ld	r0,STK_PARM(r13)(r1)	/* Fetch r4 ret arg */
-	stdx	r4,r0,r0
-	ld	r0,STK_PARM(r14)(r1)	/* Fetch r5 ret arg */
-	stdx	r5,r0,r0
-	ld	r0,STK_PARM(r15)(r1)	/* Fetch r6 ret arg */
-	stdx	r6,r0,r0
-	ld	r0,STK_PARM(r16)(r1)	/* Fetch r7 ret arg */
-	stdx	r7,r0,r0
-	ld	r0,STK_PARM(r17)(r1)	/* Fetch r8 ret arg */
-	stdx	r8,r0,r0
-	ld	r0,STK_PARM(r18)(r1)	/* Fetch r9 ret arg */
-	stdx	r9,r0,r0
-	ld	r0,STK_PARM(r19)(r1)	/* Fetch r10 ret arg */
-	stdx	r10,r0,r0
-	ld	r0,STK_PARM(r20)(r1)	/* Fetch r11 ret arg */
-	stdx	r11,r0,r0
-	ld	r0,STK_PARM(r21)(r1)	/* Fetch r12 ret arg */
-	stdx	r12,r0,r0
+	ld	r12,STK_PARM(r4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+	std	r8, 32(r12)
+	std	r9, 40(r12)
+	std	r10,48(r12)
+	std	r11,56(r12)
+	std	r12,64(r12)
 
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
index a72a987f1d4d4..3f6a89b09816e 100644
--- a/arch/powerpc/platforms/pseries/hvconsole.c
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <asm/hvcall.h>
 #include <asm/hvconsole.h>
+#include "plpar_wrappers.h"
 
 /**
  * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper
@@ -40,9 +41,9 @@ int hvc_get_chars(uint32_t vtermno, char *buf, int count)
 {
 	unsigned long got;
 
-	if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got,
-		(unsigned long *)buf, (unsigned long *)buf+1) == H_SUCCESS)
+	if (plpar_get_term_char(vtermno, &got, buf) == H_SUCCESS)
 		return got;
+
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 4cb7ff227f72e..6cbf14266d5e2 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -48,13 +48,11 @@
 #define DBG_LOW(fmt...) do { } while(0)
 #endif
 
-/* in pSeries_hvCall.S */
+/* in hvCall.S */
 EXPORT_SYMBOL(plpar_hcall);
-EXPORT_SYMBOL(plpar_hcall_4out);
+EXPORT_SYMBOL(plpar_hcall9);
 EXPORT_SYMBOL(plpar_hcall_norets);
-EXPORT_SYMBOL(plpar_hcall_8arg_2ret);
-EXPORT_SYMBOL(plpar_hcall_7arg_7ret);
-EXPORT_SYMBOL(plpar_hcall_9arg_9ret);
+
 extern void pSeries_find_serial_port(void);
 
 
@@ -277,7 +275,6 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	unsigned long flags;
 	unsigned long slot;
 	unsigned long hpte_v, hpte_r;
-	unsigned long dummy0, dummy1;
 
 	if (!(vflags & HPTE_V_BOLTED))
 		DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
@@ -302,8 +299,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
 		hpte_r &= ~_PAGE_COHERENT;
 
-	lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v,
-			      hpte_r, &slot, &dummy0, &dummy1);
+	lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
 	if (unlikely(lpar_rc == H_PTEG_FULL)) {
 		if (!(vflags & HPTE_V_BOLTED))
 			DBG_LOW(" full\n");
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index 3bd1b3e060036..ebd15de7597ea 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -5,20 +5,17 @@
 
 static inline long poll_pending(void)
 {
-	unsigned long dummy;
-	return plpar_hcall(H_POLL_PENDING, 0, 0, 0, 0, &dummy, &dummy, &dummy);
+	return plpar_hcall_norets(H_POLL_PENDING);
 }
 
 static inline long prod_processor(void)
 {
-	plpar_hcall_norets(H_PROD);
-	return 0;
+	return plpar_hcall_norets(H_PROD);
 }
 
 static inline long cede_processor(void)
 {
-	plpar_hcall_norets(H_CEDE);
-	return 0;
+	return plpar_hcall_norets(H_CEDE);
 }
 
 static inline long vpa_call(unsigned long flags, unsigned long cpu,
@@ -42,21 +39,47 @@ static inline long register_vpa(unsigned long cpu, unsigned long vpa)
 
 extern void vpa_init(int cpu);
 
+static inline long plpar_pte_enter(unsigned long flags,
+		unsigned long hpte_group, unsigned long hpte_v,
+		unsigned long hpte_r, unsigned long *slot)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_ENTER, retbuf, flags, hpte_group, hpte_v, hpte_r);
+
+	*slot = retbuf[0];
+
+	return rc;
+}
+
 static inline long plpar_pte_remove(unsigned long flags, unsigned long ptex,
 		unsigned long avpn, unsigned long *old_pteh_ret,
 		unsigned long *old_ptel_ret)
 {
-	unsigned long dummy;
-	return plpar_hcall(H_REMOVE, flags, ptex, avpn, 0, old_pteh_ret,
-			old_ptel_ret, &dummy);
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_REMOVE, retbuf, flags, ptex, avpn);
+
+	*old_pteh_ret = retbuf[0];
+	*old_ptel_ret = retbuf[1];
+
+	return rc;
 }
 
 static inline long plpar_pte_read(unsigned long flags, unsigned long ptex,
 		unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
 {
-	unsigned long dummy;
-	return plpar_hcall(H_READ, flags, ptex, 0, 0, old_pteh_ret,
-			old_ptel_ret, &dummy);
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_READ, retbuf, flags, ptex);
+
+	*old_pteh_ret = retbuf[0];
+	*old_ptel_ret = retbuf[1];
+
+	return rc;
 }
 
 static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex,
@@ -68,9 +91,14 @@ static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex,
 static inline long plpar_tce_get(unsigned long liobn, unsigned long ioba,
 		unsigned long *tce_ret)
 {
-	unsigned long dummy;
-	return plpar_hcall(H_GET_TCE, liobn, ioba, 0, 0, tce_ret, &dummy,
-			&dummy);
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_GET_TCE, retbuf, liobn, ioba);
+
+	*tce_ret = retbuf[0];
+
+	return rc;
 }
 
 static inline long plpar_tce_put(unsigned long liobn, unsigned long ioba,
@@ -94,9 +122,17 @@ static inline long plpar_tce_stuff(unsigned long liobn, unsigned long ioba,
 static inline long plpar_get_term_char(unsigned long termno,
 		unsigned long *len_ret, char *buf_ret)
 {
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
 	unsigned long *lbuf = (unsigned long *)buf_ret;	/* TODO: alignment? */
-	return plpar_hcall(H_GET_TERM_CHAR, termno, 0, 0, 0, len_ret,
-			lbuf + 0, lbuf + 1);
+
+	rc = plpar_hcall(H_GET_TERM_CHAR, retbuf, termno);
+
+	*len_ret = retbuf[0];
+	lbuf[0] = retbuf[1];
+	lbuf[1] = retbuf[2];
+
+	return rc;
 }
 
 static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
@@ -107,4 +143,31 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
 			lbuf[1]);
 }
 
+static inline long plpar_eoi(unsigned long xirr)
+{
+	return plpar_hcall_norets(H_EOI, xirr);
+}
+
+static inline long plpar_cppr(unsigned long cppr)
+{
+	return plpar_hcall_norets(H_CPPR, cppr);
+}
+
+static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
+{
+	return plpar_hcall_norets(H_IPI, servernum, mfrr);
+}
+
+static inline long plpar_xirr(unsigned long *xirr_ret)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_XIRR, retbuf);
+
+	*xirr_ret = retbuf[0];
+
+	return rc;
+}
+
 #endif /* _PSERIES_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 1eab4688be178..c88ec63129f3c 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -34,6 +34,7 @@
 #include <asm/i8259.h>
 
 #include "xics.h"
+#include "plpar_wrappers.h"
 
 #define XICS_IPI		2
 #define XICS_IRQ_SPURIOUS	0
@@ -110,27 +111,6 @@ static inline void direct_qirr_info(int n_cpu, u8 value)
 /* LPAR low level accessors */
 
 
-static inline long plpar_eoi(unsigned long xirr)
-{
-	return plpar_hcall_norets(H_EOI, xirr);
-}
-
-static inline long plpar_cppr(unsigned long cppr)
-{
-	return plpar_hcall_norets(H_CPPR, cppr);
-}
-
-static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
-{
-	return plpar_hcall_norets(H_IPI, servernum, mfrr);
-}
-
-static inline long plpar_xirr(unsigned long *xirr_ret)
-{
-	unsigned long dummy;
-	return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy);
-}
-
 static inline unsigned int lpar_xirr_info_get(int n_cpu)
 {
 	unsigned long lpar_rc;
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 0464e78f733ad..e56eac88b8093 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -702,7 +702,8 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 					     desc[3].desc,
 					     desc[4].desc,
 					     desc[5].desc,
-					     correlator);
+					     correlator,
+					     &correlator);
 	} while ((lpar_rc == H_BUSY) && (retry_count--));
 
 	if(lpar_rc != H_SUCCESS && lpar_rc != H_DROPPED) {
diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h
index 149191cef2f0a..f5b25bff15403 100644
--- a/drivers/net/ibmveth.h
+++ b/drivers/net/ibmveth.h
@@ -51,8 +51,21 @@
 #define h_add_logical_lan_buffer(ua, buf) \
   plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf)
 
-#define h_send_logical_lan(ua, buf1, buf2, buf3, buf4, buf5, buf6, correlator) \
-  plpar_hcall_8arg_2ret(H_SEND_LOGICAL_LAN, ua, buf1, buf2, buf3, buf4, buf5, buf6, correlator, &correlator)
+static inline long h_send_logical_lan(unsigned long unit_address,
+		unsigned long desc1, unsigned long desc2, unsigned long desc3,
+		unsigned long desc4, unsigned long desc5, unsigned long desc6,
+		unsigned long corellator_in, unsigned long *corellator_out)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, desc1,
+			desc2, desc3, desc4, desc5, desc6, corellator_in);
+
+	*corellator_out = retbuf[0];
+
+	return rc;
+}
 
 #define h_multicast_ctrl(ua, cmd, mac) \
   plpar_hcall_norets(H_MULTICAST_CTRL, ua, cmd, mac)
diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h
index f07ae50cbc2ce..63ce1ac8c1f48 100644
--- a/include/asm-powerpc/hvcall.h
+++ b/include/asm-powerpc/hvcall.h
@@ -212,94 +212,39 @@
 
 #ifndef __ASSEMBLY__
 
-/* plpar_hcall() -- Generic call interface using above opcodes
+/**
+ * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments
+ * @opcode: The hypervisor call to make.
  *
- * The actual call interface is a hypervisor call instruction with
- * the opcode in R3 and input args in R4-R7.
- * Status is returned in R3 with variable output values in R4-R11.
- * Only H_PTE_READ with H_READ_4 uses R6-R11 so we ignore it for now
- * and return only two out args which MUST ALWAYS BE PROVIDED.
- */
-long plpar_hcall(unsigned long opcode,
-		 unsigned long arg1,
-		 unsigned long arg2,
-		 unsigned long arg3,
-		 unsigned long arg4,
-		 unsigned long *out1,
-		 unsigned long *out2,
-		 unsigned long *out3);
-
-/* Same as plpar_hcall but for those opcodes that return no values
- * other than status.  Slightly more efficient.
+ * This call supports up to 7 arguments and only returns the status of
+ * the hcall. Use this version where possible, its slightly faster than
+ * the other plpar_hcalls.
  */
 long plpar_hcall_norets(unsigned long opcode, ...);
 
-/*
- * Special hcall interface for ibmveth support.
- * Takes 8 input parms. Returns a rc and stores the
- * R4 return value in *out1.
- */
-long plpar_hcall_8arg_2ret(unsigned long opcode,
-			   unsigned long arg1,
-			   unsigned long arg2,
-			   unsigned long arg3,
-			   unsigned long arg4,
-			   unsigned long arg5,
-			   unsigned long arg6,
-			   unsigned long arg7,
-			   unsigned long arg8,
-			   unsigned long *out1);
-
-/* plpar_hcall_4out()
+/**
+ * plpar_hcall: - Make a pseries hypervisor call
+ * @opcode: The hypervisor call to make.
+ * @retbuf: Buffer to store up to 4 return arguments in.
  *
- * same as plpar_hcall except with 4 output arguments.
+ * This call supports up to 6 arguments and 4 return arguments. Use
+ * PLPAR_HCALL_BUFSIZE to size the return argument buffer.
  *
+ * Used for all but the craziest of phyp interfaces (see plpar_hcall9)
  */
-long plpar_hcall_4out(unsigned long opcode,
-		      unsigned long arg1,
-		      unsigned long arg2,
-		      unsigned long arg3,
-		      unsigned long arg4,
-		      unsigned long *out1,
-		      unsigned long *out2,
-		      unsigned long *out3,
-		      unsigned long *out4);
+#define PLPAR_HCALL_BUFSIZE 4
+long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...);
 
-long plpar_hcall_7arg_7ret(unsigned long opcode,
-			   unsigned long arg1,
-			   unsigned long arg2,
-			   unsigned long arg3,
-			   unsigned long arg4,
-			   unsigned long arg5,
-			   unsigned long arg6,
-			   unsigned long arg7,
-			   unsigned long *out1,
-			   unsigned long *out2,
-			   unsigned long *out3,
-			   unsigned long *out4,
-			   unsigned long *out5,
-			   unsigned long *out6,
-			   unsigned long *out7);
-
-long plpar_hcall_9arg_9ret(unsigned long opcode,
-			   unsigned long arg1,
-			   unsigned long arg2,
-			   unsigned long arg3,
-			   unsigned long arg4,
-			   unsigned long arg5,
-			   unsigned long arg6,
-			   unsigned long arg7,
-			   unsigned long arg8,
-			   unsigned long arg9,
-			   unsigned long *out1,
-			   unsigned long *out2,
-			   unsigned long *out3,
-			   unsigned long *out4,
-			   unsigned long *out5,
-			   unsigned long *out6,
-			   unsigned long *out7,
-			   unsigned long *out8,
-			   unsigned long *out9);
+/**
+ * plpar_hcall9: - Make a pseries hypervisor call with up to 9 return arguments
+ * @opcode: The hypervisor call to make.
+ * @retbuf: Buffer to store up to 9 return arguments in.
+ *
+ * This call supports up to 9 arguments and 9 return arguments. Use
+ * PLPAR_HCALL9_BUFSIZE to size the return argument buffer.
+ */
+#define PLPAR_HCALL9_BUFSIZE 9
+long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
 
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
-- 
GitLab


From 43d6b68dc38867e489995e21649bb82f6ee7b5d3 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 29 Jul 2006 11:14:08 -0700
Subject: [PATCH 0063/1063] [SCSI] areca sysfs fix

Remove sysfs_remove_bin_file() return-value checking from the areca driver.

There's nothing a driver can do if sysfs file removal fails, so we'll soon be
changing sysfs_remove_bin_file() to internally print a diagnostic and to
return void.

Cc: Erich Chen <erich@areca.com.tw>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/arcmsr/arcmsr_attr.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c
index 0459f4194d7c6..c96f7140cb621 100644
--- a/drivers/scsi/arcmsr/arcmsr_attr.c
+++ b/drivers/scsi/arcmsr/arcmsr_attr.c
@@ -240,15 +240,11 @@ int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb)
 	}
 	return 0;
 error_bin_file_message_clear:
-	error = sysfs_remove_bin_file(&host->shost_classdev.kobj,
+	sysfs_remove_bin_file(&host->shost_classdev.kobj,
 				&arcmsr_sysfs_message_write_attr);
-	if (error)
-		printk(KERN_ERR "arcmsr: sysfs_remove_bin_file mu_write failed\n");
 error_bin_file_message_write:
-	error = sysfs_remove_bin_file(&host->shost_classdev.kobj,
+	sysfs_remove_bin_file(&host->shost_classdev.kobj,
 				&arcmsr_sysfs_message_read_attr);
-	if (error)
-		printk(KERN_ERR "arcmsr: sysfs_remove_bin_file mu_read failed\n");
 error_bin_file_message_read:
 	return error;
 }
@@ -256,20 +252,13 @@ int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb)
 void
 arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb) {
 	struct Scsi_Host *host = acb->host;
-	int error;
 
-	error = sysfs_remove_bin_file(&host->shost_classdev.kobj,
+	sysfs_remove_bin_file(&host->shost_classdev.kobj,
 				&arcmsr_sysfs_message_clear_attr);
-	if (error)
-		printk(KERN_ERR "arcmsr: free sysfs mu_clear failed\n");
-	error = sysfs_remove_bin_file(&host->shost_classdev.kobj,
+	sysfs_remove_bin_file(&host->shost_classdev.kobj,
 				&arcmsr_sysfs_message_write_attr);
-	if (error)
-		printk(KERN_ERR "arcmsr: free sysfs mu_write failed\n");
-	error = sysfs_remove_bin_file(&host->shost_classdev.kobj,
+	sysfs_remove_bin_file(&host->shost_classdev.kobj,
 				&arcmsr_sysfs_message_read_attr);
-	if (error)
-		printk(KERN_ERR "arcmsr: free sysfss mu_read failed\n");
 }
 
 
-- 
GitLab


From d67a70aca200f67be42428e74eb3353f20ad1130 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Fri, 28 Jul 2006 17:36:46 -0500
Subject: [PATCH 0064/1063] [SCSI] arcmsr: fix up sysfs values

The sysfs files in arcmsr are non-standard in that they aren't simple
filename value pairs, the values actually contain preceeding text which
would have to be parsed.  The idea of sysfs files is that the file name
is the description and the contents is a simple value.

Fix up arcmsr to conform to this standard.

Acked-By: Erich Chen <erich@areca.com.tw>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/arcmsr/arcmsr_attr.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c
index c96f7140cb621..12497da5529d8 100644
--- a/drivers/scsi/arcmsr/arcmsr_attr.c
+++ b/drivers/scsi/arcmsr/arcmsr_attr.c
@@ -265,7 +265,7 @@ arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb) {
 static ssize_t
 arcmsr_attr_host_driver_version(struct class_device *cdev, char *buf) {
 	return snprintf(buf, PAGE_SIZE,
-			"ARCMSR: %s\n",
+			"%s\n",
 			ARCMSR_DRIVER_VERSION);
 }
 
@@ -274,7 +274,7 @@ arcmsr_attr_host_driver_posted_cmd(struct class_device *cdev, char *buf) {
 	struct Scsi_Host *host = class_to_shost(cdev);
 	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
 	return snprintf(buf, PAGE_SIZE,
-			"Current commands posted:     %4d\n",
+			"%4d\n",
 			atomic_read(&acb->ccboutstandingcount));
 }
 
@@ -283,7 +283,7 @@ arcmsr_attr_host_driver_reset(struct class_device *cdev, char *buf) {
 	struct Scsi_Host *host = class_to_shost(cdev);
 	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
 	return snprintf(buf, PAGE_SIZE,
-			"SCSI Host Resets:            %4d\n",
+			"%4d\n",
 			acb->num_resets);
 }
 
@@ -292,7 +292,7 @@ arcmsr_attr_host_driver_abort(struct class_device *cdev, char *buf) {
 	struct Scsi_Host *host = class_to_shost(cdev);
 	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
 	return snprintf(buf, PAGE_SIZE,
-			"SCSI Aborts/Timeouts:        %4d\n",
+			"%4d\n",
 			acb->num_aborts);
 }
 
@@ -301,7 +301,7 @@ arcmsr_attr_host_fw_model(struct class_device *cdev, char *buf) {
     struct Scsi_Host *host = class_to_shost(cdev);
 	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
 	return snprintf(buf, PAGE_SIZE,
-			"Adapter Model: %s\n",
+			"%s\n",
 			acb->firm_model);
 }
 
@@ -311,7 +311,7 @@ arcmsr_attr_host_fw_version(struct class_device *cdev, char *buf) {
 	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
 
 	return snprintf(buf, PAGE_SIZE,
-			"Firmware Version:  %s\n",
+			"%s\n",
 			acb->firm_version);
 }
 
@@ -321,7 +321,7 @@ arcmsr_attr_host_fw_request_len(struct class_device *cdev, char *buf) {
 	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
 
 	return snprintf(buf, PAGE_SIZE,
-			"Reguest Lenth: %4d\n",
+			"%4d\n",
 			acb->firm_request_len);
 }
 
@@ -331,7 +331,7 @@ arcmsr_attr_host_fw_numbers_queue(struct class_device *cdev, char *buf) {
 	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
 
 	return snprintf(buf, PAGE_SIZE,
-			"Numbers of Queue: %4d\n",
+			"%4d\n",
 			acb->firm_numbers_queue);
 }
 
@@ -341,7 +341,7 @@ arcmsr_attr_host_fw_sdram_size(struct class_device *cdev, char *buf) {
 	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
 
 	return snprintf(buf, PAGE_SIZE,
-			"SDRAM Size: %4d\n",
+			"%4d\n",
 			acb->firm_sdram_size);
 }
 
@@ -351,7 +351,7 @@ arcmsr_attr_host_fw_hd_channels(struct class_device *cdev, char *buf) {
 	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
 
 	return snprintf(buf, PAGE_SIZE,
-			"Hard Disk Channels: %4d\n",
+			"%4d\n",
 			acb->firm_hd_channels);
 }
 
-- 
GitLab


From 2672ea86be26353108a72a28910df4dc61cdb5e2 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Wed, 2 Aug 2006 17:11:49 -0400
Subject: [PATCH 0065/1063] [SCSI] advansys pci tweaks.

Remove a lot of duplicate #defines from the advansys driver,
and make them look like PCI IDs as defined elsewhere in the kernel.
Also add a module table so that it automatically gets picked up
by tools relying on modinfo output (like say, distro installers).

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/advansys.c | 90 ++++++++++++++++++++++-------------------
 1 file changed, 49 insertions(+), 41 deletions(-)

diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index e32b4ab2f8fb2..773f02e3b10b0 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -888,10 +888,6 @@ typedef unsigned char uchar;
 #define ASC_PCI_ID2DEV(id)    (((id) >> 11) & 0x1F)
 #define ASC_PCI_ID2FUNC(id)   (((id) >> 8) & 0x7)
 #define ASC_PCI_MKID(bus, dev, func) ((((dev) & 0x1F) << 11) | (((func) & 0x7) << 8) | ((bus) & 0xFF))
-#define ASC_PCI_VENDORID                  0x10CD
-#define ASC_PCI_DEVICEID_1200A            0x1100
-#define ASC_PCI_DEVICEID_1200B            0x1200
-#define ASC_PCI_DEVICEID_ULTRA            0x1300
 #define ASC_PCI_REVISION_3150             0x02
 #define ASC_PCI_REVISION_3050             0x03
 
@@ -899,6 +895,14 @@ typedef unsigned char uchar;
 #define  ASC_DVCLIB_CALL_FAILED   (0)
 #define  ASC_DVCLIB_CALL_ERROR    (-1)
 
+#define PCI_VENDOR_ID_ASP		0x10cd
+#define PCI_DEVICE_ID_ASP_1200A		0x1100
+#define PCI_DEVICE_ID_ASP_ABP940	0x1200
+#define PCI_DEVICE_ID_ASP_ABP940U	0x1300
+#define PCI_DEVICE_ID_ASP_ABP940UW	0x2300
+#define PCI_DEVICE_ID_38C0800_REV1	0x2500
+#define PCI_DEVICE_ID_38C1600_REV1	0x2700
+
 /*
  * Enable CC_VERY_LONG_SG_LIST to support up to 64K element SG lists.
  * The SRB structure will have to be changed and the ASC_SRB2SCSIQ()
@@ -1492,8 +1496,6 @@ typedef struct asc_dvc_cfg {
 #define ASC_INIT_STATE_END_INQUIRY   0x0080
 #define ASC_INIT_RESET_SCSI_DONE     0x0100
 #define ASC_INIT_STATE_WITHOUT_EEP   0x8000
-#define ASC_PCI_DEVICE_ID_REV_A      0x1100
-#define ASC_PCI_DEVICE_ID_REV_B      0x1200
 #define ASC_BUG_FIX_IF_NOT_DWB       0x0001
 #define ASC_BUG_FIX_ASYN_USE_SYN     0x0002
 #define ASYN_SDTR_DATA_FIX_PCI_REV_AB 0x41
@@ -2100,12 +2102,6 @@ STATIC ASC_DCNT  AscGetMaxDmaCount(ushort);
 #define ADV_NUM_PAGE_CROSSING \
     ((ADV_SG_TOTAL_MEM_SIZE + (ADV_PAGE_SIZE - 1))/ADV_PAGE_SIZE)
 
-/* a_condor.h */
-#define ADV_PCI_VENDOR_ID               0x10CD
-#define ADV_PCI_DEVICE_ID_REV_A         0x2300
-#define ADV_PCI_DEVID_38C0800_REV1      0x2500
-#define ADV_PCI_DEVID_38C1600_REV1      0x2700
-
 #define ADV_EEP_DVC_CFG_BEGIN           (0x00)
 #define ADV_EEP_DVC_CFG_END             (0x15)
 #define ADV_EEP_DVC_CTL_BEGIN           (0x16)  /* location of OEM name */
@@ -3569,14 +3565,7 @@ typedef struct scsi_cmnd     REQ, *REQP;
 #define PCI_MAX_SLOT            0x1F
 #define PCI_MAX_BUS             0xFF
 #define PCI_IOADDRESS_MASK      0xFFFE
-#define ASC_PCI_VENDORID        0x10CD
 #define ASC_PCI_DEVICE_ID_CNT   6       /* PCI Device ID count. */
-#define ASC_PCI_DEVICE_ID_1100  0x1100
-#define ASC_PCI_DEVICE_ID_1200  0x1200
-#define ASC_PCI_DEVICE_ID_1300  0x1300
-#define ASC_PCI_DEVICE_ID_2300  0x2300  /* ASC-3550 */
-#define ASC_PCI_DEVICE_ID_2500  0x2500  /* ASC-38C0800 */
-#define ASC_PCI_DEVICE_ID_2700  0x2700  /* ASC-38C1600 */
 
 #ifndef ADVANSYS_STATS
 #define ASC_STATS(shp, counter)
@@ -4330,12 +4319,12 @@ advansys_detect(struct scsi_host_template *tpnt)
     struct pci_dev      *pci_devp = NULL;
     int                 pci_device_id_cnt = 0;
     unsigned int        pci_device_id[ASC_PCI_DEVICE_ID_CNT] = {
-                                    ASC_PCI_DEVICE_ID_1100,
-                                    ASC_PCI_DEVICE_ID_1200,
-                                    ASC_PCI_DEVICE_ID_1300,
-                                    ASC_PCI_DEVICE_ID_2300,
-                                    ASC_PCI_DEVICE_ID_2500,
-                                    ASC_PCI_DEVICE_ID_2700
+                                    PCI_DEVICE_ID_ASP_1200A,
+                                    PCI_DEVICE_ID_ASP_ABP940,
+                                    PCI_DEVICE_ID_ASP_ABP940U,
+                                    PCI_DEVICE_ID_ASP_ABP940UW,
+                                    PCI_DEVICE_ID_38C0800_REV1,
+                                    PCI_DEVICE_ID_38C1600_REV1
                         };
     ADV_PADDR           pci_memory_address;
 #endif /* CONFIG_PCI */
@@ -4471,7 +4460,7 @@ advansys_detect(struct scsi_host_template *tpnt)
 
                     /* Find all PCI cards. */
                     while (pci_device_id_cnt < ASC_PCI_DEVICE_ID_CNT) {
-                        if ((pci_devp = pci_find_device(ASC_PCI_VENDORID,
+                        if ((pci_devp = pci_find_device(PCI_VENDOR_ID_ASP,
                             pci_device_id[pci_device_id_cnt], pci_devp)) ==
                             NULL) {
                             pci_device_id_cnt++;
@@ -4575,9 +4564,9 @@ advansys_detect(struct scsi_host_template *tpnt)
              */
 #ifdef CONFIG_PCI
             if (asc_bus[bus] == ASC_IS_PCI &&
-                (pci_devp->device == ASC_PCI_DEVICE_ID_2300 ||
-                 pci_devp->device == ASC_PCI_DEVICE_ID_2500 ||
-                 pci_devp->device == ASC_PCI_DEVICE_ID_2700))
+                (pci_devp->device == PCI_DEVICE_ID_ASP_ABP940UW ||
+                 pci_devp->device == PCI_DEVICE_ID_38C0800_REV1 ||
+                 pci_devp->device == PCI_DEVICE_ID_38C1600_REV1))
             {
                 boardp->flags |= ASC_IS_WIDE_BOARD;
             }
@@ -4600,11 +4589,11 @@ advansys_detect(struct scsi_host_template *tpnt)
                 adv_dvc_varp->isr_callback = adv_isr_callback;
                 adv_dvc_varp->async_callback = adv_async_callback;
 #ifdef CONFIG_PCI
-                if (pci_devp->device == ASC_PCI_DEVICE_ID_2300)
+                if (pci_devp->device == PCI_DEVICE_ID_ASP_ABP940UW)
                 {
                     ASC_DBG(1, "advansys_detect: ASC-3550\n");
                     adv_dvc_varp->chip_type = ADV_CHIP_ASC3550;
-                } else if (pci_devp->device == ASC_PCI_DEVICE_ID_2500)
+                } else if (pci_devp->device == PCI_DEVICE_ID_38C0800_REV1)
                 {
                     ASC_DBG(1, "advansys_detect: ASC-38C0800\n");
                     adv_dvc_varp->chip_type = ADV_CHIP_ASC38C0800;
@@ -11922,7 +11911,7 @@ AscInitGetConfig(
         PCIRevisionID = DvcReadPCIConfigByte(asc_dvc,
                                     AscPCIConfigRevisionIDRegister);
 
-        if (PCIVendorID != ASC_PCI_VENDORID) {
+        if (PCIVendorID != PCI_VENDOR_ID_ASP) {
             warn_code |= ASC_WARN_SET_PCI_CONFIG_SPACE;
         }
         prevCmdRegBits = DvcReadPCIConfigByte(asc_dvc,
@@ -11942,15 +11931,15 @@ AscInitGetConfig(
                 warn_code |= ASC_WARN_SET_PCI_CONFIG_SPACE;
             }
         }
-        if ((PCIDeviceID == ASC_PCI_DEVICEID_1200A) ||
-            (PCIDeviceID == ASC_PCI_DEVICEID_1200B)) {
+        if ((PCIDeviceID == PCI_DEVICE_ID_ASP_1200A) ||
+            (PCIDeviceID == PCI_DEVICE_ID_ASP_ABP940)) {
             DvcWritePCIConfigByte(asc_dvc,
                             AscPCIConfigLatencyTimer, 0x00);
             if (DvcReadPCIConfigByte(asc_dvc, AscPCIConfigLatencyTimer)
                 != 0x00) {
                 warn_code |= ASC_WARN_SET_PCI_CONFIG_SPACE;
             }
-        } else if (PCIDeviceID == ASC_PCI_DEVICEID_ULTRA) {
+        } else if (PCIDeviceID == PCI_DEVICE_ID_ASP_ABP940U) {
             if (DvcReadPCIConfigByte(asc_dvc,
                                 AscPCIConfigLatencyTimer) < 0x20) {
                 DvcWritePCIConfigByte(asc_dvc,
@@ -12037,8 +12026,8 @@ AscInitFromAscDvcVar(
         AscSetChipCfgMsw(iop_base, cfg_msw);
         if ((asc_dvc->bus_type & ASC_IS_PCI_ULTRA) == ASC_IS_PCI_ULTRA) {
         } else {
-            if ((pci_device_id == ASC_PCI_DEVICE_ID_REV_A) ||
-                (pci_device_id == ASC_PCI_DEVICE_ID_REV_B)) {
+            if ((pci_device_id == PCI_DEVICE_ID_ASP_1200A) ||
+                (pci_device_id == PCI_DEVICE_ID_ASP_ABP940)) {
                 asc_dvc->bug_fix_cntl |= ASC_BUG_FIX_IF_NOT_DWB;
                 asc_dvc->bug_fix_cntl |= ASC_BUG_FIX_ASYN_USE_SYN;
             }
@@ -14275,8 +14264,8 @@ Default_38C0800_EEPROM_Config __initdata = {
     0,                          /* 55 reserved */
     0,                          /* 56 cisptr_lsw */
     0,                          /* 57 cisprt_msw */
-    ADV_PCI_VENDOR_ID,          /* 58 subsysvid */
-    ADV_PCI_DEVID_38C0800_REV1, /* 59 subsysid */
+    PCI_VENDOR_ID_ASP,          /* 58 subsysvid */
+    PCI_DEVICE_ID_38C0800_REV1, /* 59 subsysid */
     0,                          /* 60 reserved */
     0,                          /* 61 reserved */
     0,                          /* 62 reserved */
@@ -14405,8 +14394,8 @@ Default_38C1600_EEPROM_Config __initdata = {
     0,                          /* 55 reserved */
     0,                          /* 56 cisptr_lsw */
     0,                          /* 57 cisprt_msw */
-    ADV_PCI_VENDOR_ID,          /* 58 subsysvid */
-    ADV_PCI_DEVID_38C1600_REV1, /* 59 subsysid */
+    PCI_VENDOR_ID_ASP,          /* 58 subsysvid */
+    PCI_DEVICE_ID_38C1600_REV1, /* 59 subsysid */
     0,                          /* 60 reserved */
     0,                          /* 61 reserved */
     0,                          /* 62 reserved */
@@ -18225,3 +18214,22 @@ AdvInquiryHandling(
     }
 }
 MODULE_LICENSE("Dual BSD/GPL");
+
+/* PCI Devices supported by this driver */
+static struct pci_device_id advansys_pci_tbl[] __devinitdata = {
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_1200A,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_ABP940,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_ABP940U,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_ABP940UW,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_38C0800_REV1,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_38C1600_REV1,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, advansys_pci_tbl);
+
-- 
GitLab


From fddafd3d21953d5ea740f7b2f27149f7dd493194 Mon Sep 17 00:00:00 2001
From: Brian King <brking@us.ibm.com>
Date: Thu, 3 Aug 2006 13:54:59 -0500
Subject: [PATCH 0066/1063] [SCSI] DAC960: PCI id table fixup

The PCI ID table in the DAC960 driver conflicts with some devices
that use the ipr driver. All ipr adapters that use this chip
have an IBM subvendor ID and all DAC960 adapters that use this
chip have a Mylex subvendor id.

Signed-off-by: Brian King <brking@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/block/DAC960.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 4cd23c3eab41a..a360215dbce79 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -7115,7 +7115,7 @@ static struct pci_device_id DAC960_id_table[] = {
 	{
 		.vendor 	= PCI_VENDOR_ID_MYLEX,
 		.device		= PCI_DEVICE_ID_MYLEX_DAC960_GEM,
-		.subvendor	= PCI_ANY_ID,
+		.subvendor	= PCI_VENDOR_ID_MYLEX,
 		.subdevice	= PCI_ANY_ID,
 		.driver_data	= (unsigned long) &DAC960_GEM_privdata,
 	},
-- 
GitLab


From 9e5c50fa8686ede7c37b939a0b950df50346eb3d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Aug 2006 17:18:30 +0200
Subject: [PATCH 0067/1063] [SCSI] remove SCSI_STATE_ #defines

These aren't used anymore since the field in scsi_cmnd where it was
stored has been removed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/scsi/scsi_cmnd.h | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 58e6444eebee7..be117f812deb9 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -118,20 +118,6 @@ struct scsi_cmnd {
 	unsigned long pid;	/* Process ID, starts at 0. Unique per host. */
 };
 
-/*
- * These are the values that scsi_cmd->state can take.
- */
-#define SCSI_STATE_TIMEOUT         0x1000
-#define SCSI_STATE_FINISHED        0x1001
-#define SCSI_STATE_FAILED          0x1002
-#define SCSI_STATE_QUEUED          0x1003
-#define SCSI_STATE_UNUSED          0x1006
-#define SCSI_STATE_DISCONNECTING   0x1008
-#define SCSI_STATE_INITIALIZING    0x1009
-#define SCSI_STATE_BHQUEUE         0x100a
-#define SCSI_STATE_MLQUEUE         0x100b
-
-
 extern struct scsi_cmnd *scsi_get_command(struct scsi_device *, gfp_t);
 extern void scsi_put_command(struct scsi_cmnd *);
 extern void scsi_io_completion(struct scsi_cmnd *, unsigned int);
-- 
GitLab


From dd7e2f2266acf66ec882baa6fbd79f853b5fe966 Mon Sep 17 00:00:00 2001
From: Michael Reed <mdr@sgi.com>
Date: Fri, 4 Aug 2006 12:09:24 -0500
Subject: [PATCH 0068/1063] [SCSI] scsi_queue_work() documented return value is
 incorrect

If you examine the queue_work() routine you'll see that it returns
1 on success, 0 if the work is already queued.

This patch corrects the source code documentation for the
scsi_queue_work function.

Signed-off-by: Michael Reed <mdr@sgi.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/hosts.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index dfcb96f3e60ce..f244d4f6597a4 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -487,7 +487,9 @@ EXPORT_SYMBOL(scsi_is_host_device);
  * @work:	Work to queue for execution.
  *
  * Return value:
- * 	0 on success / != 0 for error
+ * 	1 - work queued for execution
+ *	0 - work is already queued
+ *	-EINVAL - work queue doesn't exist
  **/
 int scsi_queue_work(struct Scsi_Host *shost, struct work_struct *work)
 {
-- 
GitLab


From 5d947f2b7607c4674d104accbd3768744aaa4154 Mon Sep 17 00:00:00 2001
From: Michael Reed <mdr@sgi.com>
Date: Mon, 31 Jul 2006 12:19:30 -0500
Subject: [PATCH 0069/1063] [SCSI] mptfc: add additional fc transport
 attributes

Add host_supported_speeds, host_maxframe_size, host_speed, host_fabric_name,
host_port_type, host_port_state, and host_symbolic_name transport attributes
to fusion fibre channel.

Signed-off-by: Michael Reed <mdr@sgi.com>
Acked-by: Moore, Eric <Eric.Moore@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/message/fusion/mptfc.c | 100 +++++++++++++++++++++++++++------
 1 file changed, 84 insertions(+), 16 deletions(-)

diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index 90da7d63b08e1..244a32c66f06c 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -162,7 +162,13 @@ static struct fc_function_template mptfc_transport_functions = {
 	.show_starget_port_id = 1,
 	.set_rport_dev_loss_tmo = mptfc_set_rport_loss_tmo,
 	.show_rport_dev_loss_tmo = 1,
-
+	.show_host_supported_speeds = 1,
+	.show_host_maxframe_size = 1,
+	.show_host_speed = 1,
+	.show_host_fabric_name = 1,
+	.show_host_port_type = 1,
+	.show_host_port_state = 1,
+	.show_host_symbolic_name = 1,
 };
 
 static void
@@ -836,33 +842,95 @@ mptfc_SetFcPortPage1_defaults(MPT_ADAPTER *ioc)
 static void
 mptfc_init_host_attr(MPT_ADAPTER *ioc,int portnum)
 {
-	unsigned class = 0, cos = 0;
+	unsigned	class = 0;
+	unsigned	cos = 0;
+	unsigned	speed;
+	unsigned	port_type;
+	unsigned	port_state;
+	FCPortPage0_t	*pp0;
+	struct Scsi_Host *sh;
+	char		*sn;
 
 	/* don't know what to do as only one scsi (fc) host was allocated */
 	if (portnum != 0)
 		return;
 
-	class = ioc->fc_port_page0[portnum].SupportedServiceClass;
+	pp0 = &ioc->fc_port_page0[portnum];
+	sh = ioc->sh;
+
+	sn = fc_host_symbolic_name(sh);
+	snprintf(sn, FC_SYMBOLIC_NAME_SIZE, "%s %s%08xh",
+	    ioc->prod_name,
+	    MPT_FW_REV_MAGIC_ID_STRING,
+	    ioc->facts.FWVersion.Word);
+
+	fc_host_tgtid_bind_type(sh) = FC_TGTID_BIND_BY_WWPN;
+
+	fc_host_maxframe_size(sh) = pp0->MaxFrameSize;
+
+	fc_host_node_name(sh) =
+	    	(u64)pp0->WWNN.High << 32 | (u64)pp0->WWNN.Low;
+
+	fc_host_port_name(sh) =
+	    	(u64)pp0->WWPN.High << 32 | (u64)pp0->WWPN.Low;
+
+	fc_host_port_id(sh) = pp0->PortIdentifier;
+
+	class = pp0->SupportedServiceClass;
 	if (class & MPI_FCPORTPAGE0_SUPPORT_CLASS_1)
 		cos |= FC_COS_CLASS1;
 	if (class & MPI_FCPORTPAGE0_SUPPORT_CLASS_2)
 		cos |= FC_COS_CLASS2;
 	if (class & MPI_FCPORTPAGE0_SUPPORT_CLASS_3)
 		cos |= FC_COS_CLASS3;
+	fc_host_supported_classes(sh) = cos;
+
+	if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_1GBIT)
+		speed = FC_PORTSPEED_1GBIT;
+	else if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_2GBIT)
+		speed = FC_PORTSPEED_2GBIT;
+	else if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_4GBIT)
+		speed = FC_PORTSPEED_4GBIT;
+	else if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_10GBIT)
+		speed = FC_PORTSPEED_10GBIT;
+	else
+		speed = FC_PORTSPEED_UNKNOWN;
+	fc_host_speed(sh) = speed;
+
+	speed = 0;
+	if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_1GBIT_SPEED)
+		speed |= FC_PORTSPEED_1GBIT;
+	if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_2GBIT_SPEED)
+		speed |= FC_PORTSPEED_2GBIT;
+	if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_4GBIT_SPEED)
+		speed |= FC_PORTSPEED_4GBIT;
+	if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_10GBIT_SPEED)
+		speed |= FC_PORTSPEED_10GBIT;
+	fc_host_supported_speeds(sh) = speed;
+
+	port_state = FC_PORTSTATE_UNKNOWN;
+	if (pp0->PortState == MPI_FCPORTPAGE0_PORTSTATE_ONLINE)
+		port_state = FC_PORTSTATE_ONLINE;
+	else if (pp0->PortState == MPI_FCPORTPAGE0_PORTSTATE_OFFLINE)
+		port_state = FC_PORTSTATE_LINKDOWN;
+	fc_host_port_state(sh) = port_state;
+
+	port_type = FC_PORTTYPE_UNKNOWN;
+	if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_POINT_TO_POINT)
+		port_type = FC_PORTTYPE_PTP;
+	else if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_PRIVATE_LOOP)
+		port_type = FC_PORTTYPE_LPORT;
+	else if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_PUBLIC_LOOP)
+		port_type = FC_PORTTYPE_NLPORT;
+	else if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_FABRIC_DIRECT)
+		port_type = FC_PORTTYPE_NPORT;
+	fc_host_port_type(sh) = port_type;
+
+	fc_host_fabric_name(sh) =
+	    (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_FABRIC_WWN_VALID) ?
+		(u64) pp0->FabricWWNN.High << 32 | (u64) pp0->FabricWWPN.Low :
+		(u64)pp0->WWNN.High << 32 | (u64)pp0->WWNN.Low;
 
-	fc_host_node_name(ioc->sh) =
-	    	(u64)ioc->fc_port_page0[portnum].WWNN.High << 32
-		    | (u64)ioc->fc_port_page0[portnum].WWNN.Low;
-
-	fc_host_port_name(ioc->sh) =
-	    	(u64)ioc->fc_port_page0[portnum].WWPN.High << 32
-		    | (u64)ioc->fc_port_page0[portnum].WWPN.Low;
-
-	fc_host_port_id(ioc->sh) = ioc->fc_port_page0[portnum].PortIdentifier;
-
-	fc_host_supported_classes(ioc->sh) = cos;
-
-	fc_host_tgtid_bind_type(ioc->sh) = FC_TGTID_BIND_BY_WWPN;
 }
 
 static void
-- 
GitLab


From b5145d25f0d8eae21ad7969822f2d4ce7f22e72a Mon Sep 17 00:00:00 2001
From: Brian King <brking@us.ibm.com>
Date: Wed, 2 Aug 2006 14:57:36 -0500
Subject: [PATCH 0070/1063] [SCSI] ipr: Add some hardware defined types for
 SATA

Add some hardware defined types for SATA. This is required
by future patches to add SATA support to ipr.

Signed-off-by: Brian King <brking@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/ipr.h | 78 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index 1ad24df69d704..1e9c1227fdae6 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -45,6 +45,7 @@
  *	This can be adjusted at runtime through sysfs device attributes.
  */
 #define IPR_MAX_CMD_PER_LUN				6
+#define IPR_MAX_CMD_PER_ATA_LUN			1
 
 /*
  * IPR_NUM_BASE_CMD_BLKS: This defines the maximum number of
@@ -106,7 +107,7 @@
 #define IPR_IOA_BUS						0xff
 #define IPR_IOA_TARGET					0xff
 #define IPR_IOA_LUN						0xff
-#define IPR_MAX_NUM_BUSES				8
+#define IPR_MAX_NUM_BUSES				16
 #define IPR_MAX_BUS_TO_SCAN				IPR_MAX_NUM_BUSES
 
 #define IPR_NUM_RESET_RELOAD_RETRIES		3
@@ -145,6 +146,7 @@
 #define	IPR_LUN_RESET					0x40
 #define	IPR_TARGET_RESET					0x20
 #define	IPR_BUS_RESET					0x10
+#define	IPR_ATA_PHY_RESET					0x80
 #define IPR_ID_HOST_RR_Q				0xC4
 #define IPR_QUERY_IOA_CONFIG				0xC5
 #define IPR_CANCEL_ALL_REQUESTS			0xCE
@@ -295,7 +297,11 @@ struct ipr_std_inq_data {
 }__attribute__ ((packed));
 
 struct ipr_config_table_entry {
-	u8 service_level;
+	u8 proto;
+#define IPR_PROTO_SATA			0x02
+#define IPR_PROTO_SATA_ATAPI		0x03
+#define IPR_PROTO_SAS_STP		0x06
+#define IPR_PROTO_SAS_STP_ATAPI	0x07
 	u8 array_id;
 	u8 flags;
 #define IPR_IS_IOA_RESOURCE	0x80
@@ -307,6 +313,7 @@ struct ipr_config_table_entry {
 #define IPR_SUBTYPE_AF_DASD			0
 #define IPR_SUBTYPE_GENERIC_SCSI	1
 #define IPR_SUBTYPE_VOLUME_SET		2
+#define IPR_SUBTYPE_GENERIC_ATA	4
 
 #define IPR_QUEUEING_MODEL(res)	((((res)->cfgte.flags) & 0x70) >> 4)
 #define IPR_QUEUE_FROZEN_MODEL	0
@@ -350,6 +357,7 @@ struct ipr_cmd_pkt {
 #define IPR_RQTYPE_SCSICDB		0x00
 #define IPR_RQTYPE_IOACMD		0x01
 #define IPR_RQTYPE_HCAM			0x02
+#define IPR_RQTYPE_ATA_PASSTHRU	0x04
 
 	u8 luntar_luntrn;
 
@@ -373,6 +381,37 @@ struct ipr_cmd_pkt {
 	__be16 timeout;
 }__attribute__ ((packed, aligned(4)));
 
+struct ipr_ioarcb_ata_regs {
+	u8 flags;
+#define IPR_ATA_FLAG_PACKET_CMD			0x80
+#define IPR_ATA_FLAG_XFER_TYPE_DMA			0x40
+#define IPR_ATA_FLAG_STATUS_ON_GOOD_COMPLETION	0x20
+	u8 reserved[3];
+
+	__be16 data;
+	u8 feature;
+	u8 nsect;
+	u8 lbal;
+	u8 lbam;
+	u8 lbah;
+	u8 device;
+	u8 command;
+	u8 reserved2[3];
+	u8 hob_feature;
+	u8 hob_nsect;
+	u8 hob_lbal;
+	u8 hob_lbam;
+	u8 hob_lbah;
+	u8 ctl;
+}__attribute__ ((packed, aligned(4)));
+
+struct ipr_ioarcb_add_data {
+	union {
+		struct ipr_ioarcb_ata_regs regs;
+		__be32 add_cmd_parms[10];
+	}u;
+}__attribute__ ((packed, aligned(4)));
+
 /* IOA Request Control Block    128 bytes  */
 struct ipr_ioarcb {
 	__be32 ioarcb_host_pci_addr;
@@ -397,7 +436,7 @@ struct ipr_ioarcb {
 	struct ipr_cmd_pkt cmd_pkt;
 
 	__be32 add_cmd_parms_len;
-	__be32 add_cmd_parms[10];
+	struct ipr_ioarcb_add_data add_data;
 }__attribute__((packed, aligned (4)));
 
 struct ipr_ioadl_desc {
@@ -433,6 +472,21 @@ struct ipr_ioasa_gpdd {
 	__be32 ioa_data[2];
 }__attribute__((packed, aligned (4)));
 
+struct ipr_ioasa_gata {
+	u8 error;
+	u8 nsect;		/* Interrupt reason */
+	u8 lbal;
+	u8 lbam;
+	u8 lbah;
+	u8 device;
+	u8 status;
+	u8 alt_status;	/* ATA CTL */
+	u8 hob_nsect;
+	u8 hob_lbal;
+	u8 hob_lbam;
+	u8 hob_lbah;
+}__attribute__((packed, aligned (4)));
+
 struct ipr_auto_sense {
 	__be16 auto_sense_len;
 	__be16 ioa_data_len;
@@ -466,6 +520,7 @@ struct ipr_ioasa {
 	__be32 ioasc_specific;	/* status code specific field */
 #define IPR_ADDITIONAL_STATUS_FMT		0x80000000
 #define IPR_AUTOSENSE_VALID			0x40000000
+#define IPR_ATA_DEVICE_WAS_RESET		0x20000000
 #define IPR_IOASC_SPECIFIC_MASK		0x00ffffff
 #define IPR_FIELD_POINTER_VALID		(0x80000000 >> 8)
 #define IPR_FIELD_POINTER_MASK		0x0000ffff
@@ -474,6 +529,7 @@ struct ipr_ioasa {
 		struct ipr_ioasa_vset vset;
 		struct ipr_ioasa_af_dasd dasd;
 		struct ipr_ioasa_gpdd gpdd;
+		struct ipr_ioasa_gata gata;
 	} u;
 
 	struct ipr_auto_sense auto_sense;
@@ -1307,6 +1363,22 @@ static inline int ipr_is_scsi_disk(struct ipr_resource_entry *res)
 		return 0;
 }
 
+/**
+ * ipr_is_gata - Determine if a resource is a generic ATA resource
+ * @res:	resource entry struct
+ *
+ * Return value:
+ * 	1 if GATA / 0 if not GATA
+ **/
+static inline int ipr_is_gata(struct ipr_resource_entry *res)
+{
+	if (!ipr_is_ioa_resource(res) &&
+	    IPR_RES_SUBTYPE(res) == IPR_SUBTYPE_GENERIC_ATA)
+		return 1;
+	else
+		return 0;
+}
+
 /**
  * ipr_is_naca_model - Determine if a resource is using NACA queueing model
  * @res:	resource entry struct
-- 
GitLab


From 896bbd21408ddbfb9a57819404dbb04f4f0afb35 Mon Sep 17 00:00:00 2001
From: Brian King <brking@us.ibm.com>
Date: Wed, 2 Aug 2006 14:57:44 -0500
Subject: [PATCH 0071/1063] [SCSI] ipr: Handle new SAS error codes

Add definitions for some SAS error codes that can be
logged by ipr SAS adapters.

Signed-off-by: Brian King <brking@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/ipr.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 01080b3acf5e7..7f2c5cfc57bac 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -175,6 +175,8 @@ struct ipr_error_table_t ipr_error_table[] = {
 	"Qualified success"},
 	{0x01080000, 1, 1,
 	"FFFE: Soft device bus error recovered by the IOA"},
+	{0x01088100, 0, 1,
+	"4101: Soft device bus fabric error"},
 	{0x01170600, 0, 1,
 	"FFF9: Device sector reassign successful"},
 	{0x01170900, 0, 1,
@@ -225,6 +227,8 @@ struct ipr_error_table_t ipr_error_table[] = {
 	"3109: IOA timed out a device command"},
 	{0x04088000, 0, 0,
 	"3120: SCSI bus is not operational"},
+	{0x04088100, 0, 1,
+	"4100: Hard device bus fabric error"},
 	{0x04118000, 0, 1,
 	"9000: IOA reserved area data check"},
 	{0x04118100, 0, 1,
@@ -273,6 +277,14 @@ struct ipr_error_table_t ipr_error_table[] = {
 	"9091: Incorrect hardware configuration change has been detected"},
 	{0x04678000, 0, 1,
 	"9073: Invalid multi-adapter configuration"},
+	{0x04678100, 0, 1,
+	"4010: Incorrect connection between cascaded expanders"},
+	{0x04678200, 0, 1,
+	"4020: Connections exceed IOA design limits"},
+	{0x04678300, 0, 1,
+	"4030: Incorrect multipath connection"},
+	{0x04679000, 0, 1,
+	"4110: Unsupported enclosure function"},
 	{0x046E0000, 0, 1,
 	"FFF4: Command to logical unit failed"},
 	{0x05240000, 1, 0,
@@ -297,6 +309,8 @@ struct ipr_error_table_t ipr_error_table[] = {
 	"9031: Array protection temporarily suspended, protection resuming"},
 	{0x06040600, 0, 1,
 	"9040: Array protection temporarily suspended, protection resuming"},
+	{0x06288000, 0, 1,
+	"3140: Device bus not ready to ready transition"},
 	{0x06290000, 0, 1,
 	"FFFB: SCSI bus was reset"},
 	{0x06290500, 0, 0,
@@ -319,6 +333,16 @@ struct ipr_error_table_t ipr_error_table[] = {
 	"3150: SCSI bus configuration error"},
 	{0x06678100, 0, 1,
 	"9074: Asymmetric advanced function disk configuration"},
+	{0x06678300, 0, 1,
+	"4040: Incomplete multipath connection between IOA and enclosure"},
+	{0x06678400, 0, 1,
+	"4041: Incomplete multipath connection between enclosure and device"},
+	{0x06678500, 0, 1,
+	"9075: Incomplete multipath connection between IOA and remote IOA"},
+	{0x06678600, 0, 1,
+	"9076: Configuration error, missing remote IOA"},
+	{0x06679100, 0, 1,
+	"4050: Enclosure does not support a required multipath function"},
 	{0x06690200, 0, 1,
 	"9041: Array protection temporarily suspended"},
 	{0x06698200, 0, 1,
@@ -331,6 +355,10 @@ struct ipr_error_table_t ipr_error_table[] = {
 	"9072: Link not operational transition"},
 	{0x066B8200, 0, 1,
 	"9032: Array exposed but still protected"},
+	{0x066B9100, 0, 1,
+	"4061: Multipath redundancy level got better"},
+	{0x066B9200, 0, 1,
+	"4060: Multipath redundancy level got worse"},
 	{0x07270000, 0, 0,
 	"Failure due to other device"},
 	{0x07278000, 0, 1,
-- 
GitLab


From 5b7304fbfb74bfca6f7d5a88b28197e3f7f2743b Mon Sep 17 00:00:00 2001
From: Brian King <brking@us.ibm.com>
Date: Wed, 2 Aug 2006 14:57:51 -0500
Subject: [PATCH 0072/1063] [SCSI] ipr: Properly handle IOA recovered errors

The ipr driver currently translates adapter recovered errors
to DID_ERROR. This patch fixes this to translate these
errors to success instead.

Signed-off-by: Brian King <brking@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/ipr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 7f2c5cfc57bac..55c0156e36b06 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -4218,7 +4218,8 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg,
 	case IPR_IOASC_NR_INIT_CMD_REQUIRED:
 		break;
 	default:
-		scsi_cmd->result |= (DID_ERROR << 16);
+		if (IPR_IOASC_SENSE_KEY(ioasc) > RECOVERED_ERROR)
+			scsi_cmd->result |= (DID_ERROR << 16);
 		if (!ipr_is_vset_device(res) && !ipr_is_naca_model(res))
 			res->needs_sync_complete = 1;
 		break;
-- 
GitLab


From 117d2ce1cea25fc94302ff418ccef644cd3e59af Mon Sep 17 00:00:00 2001
From: Brian King <brking@us.ibm.com>
Date: Wed, 2 Aug 2006 14:57:58 -0500
Subject: [PATCH 0073/1063] [SCSI] ipr: Auto sense handling fix

Fix up a logic error in the checking for valid sense data.

Signed-off-by: Brian King <brking@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/ipr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 55c0156e36b06..7ed4eef8347b2 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -4127,8 +4127,7 @@ static int ipr_get_autosense(struct ipr_cmnd *ipr_cmd)
 {
 	struct ipr_ioasa *ioasa = &ipr_cmd->ioasa;
 
-	if ((be32_to_cpu(ioasa->ioasc_specific) &
-	     (IPR_ADDITIONAL_STATUS_FMT | IPR_AUTOSENSE_VALID)) == 0)
+	if ((be32_to_cpu(ioasa->ioasc_specific) & IPR_AUTOSENSE_VALID) == 0)
 		return 0;
 
 	memcpy(ipr_cmd->scsi_cmd->sense_buffer, ioasa->auto_sense.data,
-- 
GitLab


From 008cd5bbfb4763322837cd1f7c621f02ebe22fef Mon Sep 17 00:00:00 2001
From: Brian King <brking@us.ibm.com>
Date: Wed, 2 Aug 2006 14:58:04 -0500
Subject: [PATCH 0074/1063] [SCSI] ipr: Bump driver version to 2.1.4

Bump the ipr driver version to 2.1.4.

Signed-off-by: Brian King <brking@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/ipr.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index 1e9c1227fdae6..11eaff524327a 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -36,8 +36,8 @@
 /*
  * Literals
  */
-#define IPR_DRIVER_VERSION "2.1.3"
-#define IPR_DRIVER_DATE "(March 29, 2006)"
+#define IPR_DRIVER_VERSION "2.1.4"
+#define IPR_DRIVER_DATE "(August 2, 2006)"
 
 /*
  * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding
-- 
GitLab


From 4ff36718ede26ee2da73f2dae94d71e2b06845fc Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Tue, 4 Jul 2006 12:15:20 -0600
Subject: [PATCH 0075/1063] [SCSI] Improve inquiry printing

 - Replace scsi_device_types array API with scsi_device_type function API.
   Gets rid of a lot of common code, as well as being easier to use.
 - Add the new device types in SPC4 r05a, and rename some of the older ones.
 - Reformat the printing of inquiry data; now fits on one line and
   includes PQ.

I think I've addressed all the feedback from the previous versions.  My
current test box prints:

scsi 2:0:1:0: Direct access     HP 18.2G ATLAS10K3_18_SCA HP05 PQ: 0 ANSI: 2

Signed-off-by: Matthew Wilcox <matthew@wil.cx>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/block/cciss_scsi.c | 14 +++-----
 drivers/scsi/fcal.c        |  3 +-
 drivers/scsi/g_NCR5380.c   |  3 +-
 drivers/scsi/megaraid.c    |  4 +--
 drivers/scsi/scsi.c        | 36 +++++++++++++++------
 drivers/scsi/scsi_proc.c   |  4 +--
 drivers/scsi/scsi_scan.c   | 66 +++++---------------------------------
 include/scsi/scsi.h        | 10 ++----
 8 files changed, 46 insertions(+), 94 deletions(-)

diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index afdff32f67247..05f79d7393f72 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -251,10 +251,6 @@ scsi_cmd_stack_free(int ctlr)
 	stk->pool = NULL;
 }
 
-/* scsi_device_types comes from scsi.h */
-#define DEVICETYPE(n) (n<0 || n>MAX_SCSI_DEVICE_CODE) ? \
-	"Unknown" : scsi_device_types[n]
-
 #if 0
 static int xmargin=8;
 static int amargin=60;
@@ -389,7 +385,7 @@ cciss_scsi_add_entry(int ctlr, int hostno,
 	   time anyway (the scsi layer's inquiries will show that info) */
 	if (hostno != -1)
 		printk("cciss%d: %s device c%db%dt%dl%d added.\n", 
-			ctlr, DEVICETYPE(sd->devtype), hostno, 
+			ctlr, scsi_device_type(sd->devtype), hostno,
 			sd->bus, sd->target, sd->lun);
 	return 0;
 }
@@ -407,7 +403,7 @@ cciss_scsi_remove_entry(int ctlr, int hostno, int entry)
 		ccissscsi[ctlr].dev[i] = ccissscsi[ctlr].dev[i+1];
 	ccissscsi[ctlr].ndevices--;
 	printk("cciss%d: %s device c%db%dt%dl%d removed.\n",
-		ctlr, DEVICETYPE(sd.devtype), hostno, 
+		ctlr, scsi_device_type(sd.devtype), hostno,
 			sd.bus, sd.target, sd.lun);
 }
 
@@ -458,7 +454,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
 		if (found == 0) { /* device no longer present. */ 
 			changes++;
 			/* printk("cciss%d: %s device c%db%dt%dl%d removed.\n",
-				ctlr, DEVICETYPE(csd->devtype), hostno, 
+				ctlr, scsi_device_type(csd->devtype), hostno,
 					csd->bus, csd->target, csd->lun); */
 			cciss_scsi_remove_entry(ctlr, hostno, i);
 			/* note, i not incremented */
@@ -468,7 +464,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
 			printk("cciss%d: device c%db%dt%dl%d type changed "
 				"(device type now %s).\n",
 				ctlr, hostno, csd->bus, csd->target, csd->lun,
-					DEVICETYPE(csd->devtype));
+					scsi_device_type(csd->devtype));
 			csd->devtype = sd[j].devtype;
 			i++;	/* so just move along. */
 		} else 		/* device is same as it ever was, */
@@ -1098,7 +1094,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
 			if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
 				printk(KERN_INFO "cciss%d: %s ignored, "
 					"too many devices.\n", cntl_num,
-					DEVICETYPE(devtype));
+					scsi_device_type(devtype));
 				break;
 			}
 			memcpy(&currentsd[ncurrent].scsi3addr[0], 
diff --git a/drivers/scsi/fcal.c b/drivers/scsi/fcal.c
index 7f891023aa15c..c4e16c0775def 100644
--- a/drivers/scsi/fcal.c
+++ b/drivers/scsi/fcal.c
@@ -248,8 +248,7 @@ int fcal_proc_info (struct Scsi_Host *host, char *buffer, char **start, off_t of
 				if (scd->id == target) {
 					SPRINTF ("  [AL-PA: %02x, Id: %02d, Port WWN: %08x%08x, Node WWN: %08x%08x]  ",
 						alpa, target, u1[0], u1[1], u2[0], u2[1]);
-					SPRINTF ("%s ", (scd->type < MAX_SCSI_DEVICE_CODE) ?
-						scsi_device_types[(short) scd->type] : "Unknown device");
+					SPRINTF ("%s ", scsi_device_type(scd->type));
 
 					for (j = 0; (j < 8) && (scd->vendor[j] >= 0x20); j++)
 						SPRINTF ("%c", scd->vendor[j]);
diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 67f1100f31036..cdd893bb4e281 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -811,7 +811,6 @@ static int generic_NCR5380_proc_info(struct Scsi_Host *scsi_ptr, char *buffer, c
 	struct NCR5380_hostdata *hostdata;
 #ifdef NCR5380_STATS
 	struct scsi_device *dev;
-	extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE];
 #endif
 
 	NCR5380_setup(scsi_ptr);
@@ -851,7 +850,7 @@ static int generic_NCR5380_proc_info(struct Scsi_Host *scsi_ptr, char *buffer, c
 		long tr = hostdata->time_read[dev->id] / HZ;
 		long tw = hostdata->time_write[dev->id] / HZ;
 
-		PRINTP("  T:%d %s " ANDP dev->id ANDP(dev->type < MAX_SCSI_DEVICE_CODE) ? scsi_device_types[(int) dev->type] : "Unknown");
+		PRINTP("  T:%d %s " ANDP dev->id ANDP scsi_device_type(dev->type));
 		for (i = 0; i < 8; i++)
 			if (dev->vendor[i] >= 0x20)
 				*(buffer + (len++)) = dev->vendor[i];
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 76edbb639d373..ccb0055ac73ad 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -2822,9 +2822,7 @@ mega_print_inquiry(char *page, char *scsi_inq)
 
 	i = scsi_inq[0] & 0x1f;
 
-	len += sprintf(page+len, "  Type:   %s ",
-		i < MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] :
-		   "Unknown          ");
+	len += sprintf(page+len, "  Type:   %s ", scsi_device_type(i));
 
 	len += sprintf(page+len,
 	"                 ANSI SCSI revision: %02x", scsi_inq[2] & 0x07);
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index b332caddd5b37..94df671d776a3 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -96,24 +96,40 @@ unsigned int scsi_logging_level;
 EXPORT_SYMBOL(scsi_logging_level);
 #endif
 
-const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE] = {
-	"Direct-Access    ",
-	"Sequential-Access",
+static const char *const scsi_device_types[] = {
+	"Direct access    ",
+	"Sequential access",
 	"Printer          ",
 	"Processor        ",
 	"WORM             ",
-	"CD-ROM           ",
+	"CD/DVD           ",
 	"Scanner          ",
-	"Optical Device   ",
-	"Medium Changer   ",
+	"Optical memory   ",
+	"Media changer    ",
 	"Communications   ",
-	"Unknown          ",
-	"Unknown          ",
+	"ASC IT8          ",
+	"ASC IT8          ",
 	"RAID             ",
 	"Enclosure        ",
-	"Direct-Access-RBC",
+	"Direct access RBC",
+	"Optical card     ",
+	"Bridge controller",
+	"Object storage   ",
+	"Automation/Drive ",
 };
-EXPORT_SYMBOL(scsi_device_types);
+
+const char * scsi_device_type(unsigned type)
+{
+	if (type == 0x1e)
+		return "Well-known LUN   ";
+	if (type == 0x1f)
+		return "No Device        ";
+	if (type > ARRAY_SIZE(scsi_device_types))
+		return "Unknown          ";
+	return scsi_device_types[type];
+}
+
+EXPORT_SYMBOL(scsi_device_type);
 
 struct scsi_host_cmd_pool {
 	kmem_cache_t	*slab;
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index 55200e4fdf11e..524a5f7a5193d 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -178,9 +178,7 @@ static int proc_print_scsidevice(struct device *dev, void *data)
 
 	seq_printf(s, "\n");
 
-	seq_printf(s, "  Type:   %s ",
-		     sdev->type < MAX_SCSI_DEVICE_CODE ?
-	       scsi_device_types[(int) sdev->type] : "Unknown          ");
+	seq_printf(s, "  Type:   %s ", scsi_device_type(sdev->type));
 	seq_printf(s, "               ANSI"
 		     " SCSI revision: %02x", (sdev->scsi_level - 1) ?
 		     sdev->scsi_level - 1 : 1);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 1bd92b9b46d9c..1803994065109 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -133,59 +133,6 @@ static void scsi_unlock_floptical(struct scsi_device *sdev,
 			 SCSI_TIMEOUT, 3);
 }
 
-/**
- * print_inquiry - printk the inquiry information
- * @inq_result:	printk this SCSI INQUIRY
- *
- * Description:
- *     printk the vendor, model, and other information found in the
- *     INQUIRY data in @inq_result.
- *
- * Notes:
- *     Remove this, and replace with a hotplug event that logs any
- *     relevant information.
- **/
-static void print_inquiry(unsigned char *inq_result)
-{
-	int i;
-
-	printk(KERN_NOTICE "  Vendor: ");
-	for (i = 8; i < 16; i++)
-		if (inq_result[i] >= 0x20 && i < inq_result[4] + 5)
-			printk("%c", inq_result[i]);
-		else
-			printk(" ");
-
-	printk("  Model: ");
-	for (i = 16; i < 32; i++)
-		if (inq_result[i] >= 0x20 && i < inq_result[4] + 5)
-			printk("%c", inq_result[i]);
-		else
-			printk(" ");
-
-	printk("  Rev: ");
-	for (i = 32; i < 36; i++)
-		if (inq_result[i] >= 0x20 && i < inq_result[4] + 5)
-			printk("%c", inq_result[i]);
-		else
-			printk(" ");
-
-	printk("\n");
-
-	i = inq_result[0] & 0x1f;
-
-	printk(KERN_NOTICE "  Type:   %s ",
-	       i <
-	       MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] :
-	       "Unknown          ");
-	printk("                 ANSI SCSI revision: %02x",
-	       inq_result[2] & 0x07);
-	if ((inq_result[2] & 0x07) == 1 && (inq_result[3] & 0x0f) == 1)
-		printk(" CCS\n");
-	else
-		printk("\n");
-}
-
 /**
  * scsi_alloc_sdev - allocate and setup a scsi_Device
  *
@@ -653,9 +600,8 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags)
 	if (*bflags & BLIST_ISROM) {
 		/*
 		 * It would be better to modify sdev->type, and set
-		 * sdev->removable, but then the print_inquiry() output
-		 * would not show TYPE_ROM; if print_inquiry() is removed
-		 * the issue goes away.
+		 * sdev->removable; this can now be done since
+		 * print_inquiry has gone away.
 		 */
 		inq_result[0] = TYPE_ROM;
 		inq_result[1] |= 0x80;	/* removable */
@@ -684,8 +630,6 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags)
 		printk(KERN_INFO "scsi: unknown device type %d\n", sdev->type);
 	}
 
-	print_inquiry(inq_result);
-
 	/*
 	 * For a peripheral qualifier (PQ) value of 1 (001b), the SCSI
 	 * spec says: The device server is capable of supporting the
@@ -715,6 +659,12 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags)
 	if (inq_result[7] & 0x10)
 		sdev->sdtr = 1;
 
+	sdev_printk(KERN_NOTICE "scsi", sdev, "%s %.8s %.16s %.4s PQ: %d "
+			"ANSI: %d%s\n", scsi_device_type(sdev->type),
+			sdev->vendor, sdev->model, sdev->rev,
+			sdev->inq_periph_qual, inq_result[2] & 0x07,
+			(inq_result[3] & 0x0f) == 1 ? " CCS" : "");
+
 	/*
 	 * End sysfs code.
 	 */
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index c60b8ff2f5e4f..1bc6752014139 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -24,13 +24,6 @@
 extern const unsigned char scsi_command_size[8];
 #define COMMAND_SIZE(opcode) scsi_command_size[((opcode) >> 5) & 7]
 
-/*
- *	SCSI device types
- */
-
-#define MAX_SCSI_DEVICE_CODE 15
-extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE];
-
 /*
  * Special value for scanning to specify scanning or rescanning of all
  * possible channels, (target) ids, or luns on a given shost.
@@ -225,6 +218,9 @@ static inline int scsi_status_is_good(int status)
 #define TYPE_RBC	    0x0e
 #define TYPE_NO_LUN         0x7f
 
+/* Returns a human-readable name for the device */
+extern const char * scsi_device_type(unsigned type);
+
 /*
  * standard mode-select header prepended to all mode-select commands
  */
-- 
GitLab


From 19ac0db3e22de3b00cc4aadc7efbad0420c7aa08 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Sun, 6 Aug 2006 18:15:22 -0500
Subject: [PATCH 0076/1063] [SCSI] fix up short inquiry printing

A recent drivers base commit:

3e95637a48820ff8bedb33e6439def96ccff1de5

Caused the bus to be added to dev_printk, so now our SCSI inquiry short
messages print like this:

scsiscsi 2:0:0:0: Direct access     IBM-ESXS ST973401SS       B519 PQ: 0 ANSI: 5

Just remove the "scsi" from the sdev_printk to compensate.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 1803994065109..114e2067dce57 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -659,7 +659,7 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags)
 	if (inq_result[7] & 0x10)
 		sdev->sdtr = 1;
 
-	sdev_printk(KERN_NOTICE "scsi", sdev, "%s %.8s %.16s %.4s PQ: %d "
+	sdev_printk(KERN_NOTICE, sdev, "%s %.8s %.16s %.4s PQ: %d "
 			"ANSI: %d%s\n", scsi_device_type(sdev->type),
 			sdev->vendor, sdev->model, sdev->rev,
 			sdev->inq_periph_qual, inq_result[2] & 0x07,
-- 
GitLab


From afd05423e02bc7391a7489b686ba1e166b6e8349 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Fri, 28 Jul 2006 13:58:37 +1000
Subject: [PATCH 0077/1063] [POWERPC] Enable PURR sysfs entry correctly

We have CPU_FTR_PURR now, so let's use it.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index ae927a4e46e46..406f308ddeadd 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -230,7 +230,7 @@ static void register_cpu_online(unsigned int cpu)
 	if (cur_cpu_spec->num_pmcs >= 8)
 		sysdev_create_file(s, &attr_pmc8);
 
-	if (cpu_has_feature(CPU_FTR_SMT))
+	if (cpu_has_feature(CPU_FTR_PURR))
 		sysdev_create_file(s, &attr_purr);
 }
 
@@ -272,7 +272,7 @@ static void unregister_cpu_online(unsigned int cpu)
 	if (cur_cpu_spec->num_pmcs >= 8)
 		sysdev_remove_file(s, &attr_pmc8);
 
-	if (cpu_has_feature(CPU_FTR_SMT))
+	if (cpu_has_feature(CPU_FTR_PURR))
 		sysdev_remove_file(s, &attr_purr);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
-- 
GitLab


From 919fede6edab94cccb3ca8c1c0b32fa62c9369a5 Mon Sep 17 00:00:00 2001
From: Jon Loeliger <jdl@freescale.com>
Date: Mon, 31 Jul 2006 15:35:41 -0500
Subject: [PATCH 0078/1063] [POWERPC] Rewrite the PPC 86xx IRQ handling to use
 Flat Device Tree

IRQ setup now comes from the Flat Device Tree and use the new generic
IRQ code.  Fixed the fsl_soc.c IRQ OF interrupt node parsing.
Removed some unused MPC86xx macro definition.

Signed-off-by: Zhang Wei <wei.zhang@freescale.com>
Signed-off-by: Jon Loeliger <jdl@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/86xx/mpc8641_hpcn.h |  32 --
 arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 324 +++++++++++----------
 arch/powerpc/sysdev/fsl_soc.c              |  30 +-
 3 files changed, 188 insertions(+), 198 deletions(-)

diff --git a/arch/powerpc/platforms/86xx/mpc8641_hpcn.h b/arch/powerpc/platforms/86xx/mpc8641_hpcn.h
index 5d2bcf78cef70..41e554c4af94b 100644
--- a/arch/powerpc/platforms/86xx/mpc8641_hpcn.h
+++ b/arch/powerpc/platforms/86xx/mpc8641_hpcn.h
@@ -16,38 +16,6 @@
 
 #include <linux/init.h>
 
-/* PCI interrupt controller */
-#define PIRQA		3
-#define PIRQB		4
-#define PIRQC		5
-#define PIRQD		6
-#define PIRQ7		7
-#define PIRQE		9
-#define PIRQF		10
-#define PIRQG		11
-#define PIRQH		12
-
-/* PCI-Express memory map */
-#define MPC86XX_PCIE_LOWER_IO        0x00000000
-#define MPC86XX_PCIE_UPPER_IO        0x00ffffff
-
-#define MPC86XX_PCIE_LOWER_MEM       0x80000000
-#define MPC86XX_PCIE_UPPER_MEM       0x9fffffff
-
-#define MPC86XX_PCIE_IO_BASE         0xe2000000
-#define MPC86XX_PCIE_MEM_OFFSET      0x00000000
-
-#define MPC86XX_PCIE_IO_SIZE         0x01000000
-
-#define PCIE1_CFG_ADDR_OFFSET    (0x8000)
-#define PCIE1_CFG_DATA_OFFSET    (0x8004)
-
-#define PCIE2_CFG_ADDR_OFFSET    (0x9000)
-#define PCIE2_CFG_DATA_OFFSET    (0x9004)
-
-#define MPC86xx_PCIE_OFFSET PCIE1_CFG_ADDR_OFFSET
-#define MPC86xx_PCIE_SIZE	(0x1000)
-
 #define MPC86XX_RSTCR_OFFSET	(0xe00b0)	/* Reset Control Register */
 
 #endif	/* __MPC8641_HPCN_H__ */
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index 839090682ab21..4a33d95e7ad75 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -37,6 +37,14 @@
 #include "mpc86xx.h"
 #include "mpc8641_hpcn.h"
 
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) do { printk(KERN_ERR fmt); } while(0)
+#else
+#define DBG(fmt...) do { } while(0)
+#endif
+
 #ifndef CONFIG_PCI
 unsigned long isa_io_base = 0;
 unsigned long isa_mem_base = 0;
@@ -44,205 +52,215 @@ unsigned long pci_dram_offset = 0;
 #endif
 
 
-/*
- * Internal interrupts are all Level Sensitive, and Positive Polarity
- */
-
-static u_char mpc86xx_hpcn_openpic_initsenses[] __initdata = {
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  0: Reserved */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  1: MCM */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  2: DDR DRAM */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  3: LBIU */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  4: DMA 0 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  5: DMA 1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  6: DMA 2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  7: DMA 3 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  8: PCIE1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  9: PCIE2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 10: Reserved */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 11: Reserved */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 12: DUART2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 13: TSEC 1 Transmit */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 14: TSEC 1 Receive */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 15: TSEC 3 transmit */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 16: TSEC 3 receive */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 17: TSEC 3 error */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 18: TSEC 1 Receive/Transmit Error */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 19: TSEC 2 Transmit */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 20: TSEC 2 Receive */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 21: TSEC 4 transmit */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 22: TSEC 4 receive */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 23: TSEC 4 error */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 24: TSEC 2 Receive/Transmit Error */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 25: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 26: DUART1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 27: I2C */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 28: Performance Monitor */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 29: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 30: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 31: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 32: SRIO error/write-port unit */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 33: SRIO outbound doorbell */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 34: SRIO inbound doorbell */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 35: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 36: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 37: SRIO outbound message unit 1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 38: SRIO inbound message unit 1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 39: SRIO outbound message unit 2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 40: SRIO inbound message unit 2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 41: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 42: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 43: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 44: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 45: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 46: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 47: Unused */
-	0x0,						/* External  0: */
-	0x0,						/* External  1: */
-	0x0,						/* External  2: */
-	0x0,						/* External  3: */
-	0x0,						/* External  4: */
-	0x0,						/* External  5: */
-	0x0,						/* External  6: */
-	0x0,						/* External  7: */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* External  8: Pixis FPGA */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* External  9: ULI 8259 INTR Cascade */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* External 10: Quad ETH PHY */
-	0x0,						/* External 11: */
-	0x0,
-	0x0,
-	0x0,
-	0x0,
-};
-
+static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc,
+				 struct pt_regs *regs)
+{
+	unsigned int cascade_irq = i8259_irq(regs);
+	if (cascade_irq != NO_IRQ)
+		generic_handle_irq(cascade_irq, regs);
+	desc->chip->eoi(irq);
+}
 
 void __init
 mpc86xx_hpcn_init_irq(void)
 {
 	struct mpic *mpic1;
+	struct device_node *np, *cascade_node = NULL;
+	int cascade_irq;
 	phys_addr_t openpic_paddr;
 
+	np = of_find_node_by_type(NULL, "open-pic");
+	if (np == NULL)
+		return;
+
 	/* Determine the Physical Address of the OpenPIC regs */
 	openpic_paddr = get_immrbase() + MPC86xx_OPENPIC_OFFSET;
 
 	/* Alloc mpic structure and per isu has 16 INT entries. */
-	mpic1 = mpic_alloc(openpic_paddr,
+	mpic1 = mpic_alloc(np, openpic_paddr,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
-			16, MPC86xx_OPENPIC_IRQ_OFFSET, 0, 250,
-			mpc86xx_hpcn_openpic_initsenses,
-			sizeof(mpc86xx_hpcn_openpic_initsenses),
+			16, NR_IRQS - 4,
 			" MPIC     ");
 	BUG_ON(mpic1 == NULL);
 
+	mpic_assign_isu(mpic1, 0, openpic_paddr + 0x10000);
+
 	/* 48 Internal Interrupts */
-	mpic_assign_isu(mpic1, 0, openpic_paddr + 0x10200);
-	mpic_assign_isu(mpic1, 1, openpic_paddr + 0x10400);
-	mpic_assign_isu(mpic1, 2, openpic_paddr + 0x10600);
+	mpic_assign_isu(mpic1, 1, openpic_paddr + 0x10200);
+	mpic_assign_isu(mpic1, 2, openpic_paddr + 0x10400);
+	mpic_assign_isu(mpic1, 3, openpic_paddr + 0x10600);
 
-	/* 16 External interrupts */
-	mpic_assign_isu(mpic1, 3, openpic_paddr + 0x10000);
+	/* 16 External interrupts
+	 * Moving them from [0 - 15] to [64 - 79]
+	 */
+	mpic_assign_isu(mpic1, 4, openpic_paddr + 0x10000);
 
 	mpic_init(mpic1);
 
 #ifdef CONFIG_PCI
-	mpic_setup_cascade(MPC86xx_IRQ_EXT9, i8259_irq_cascade, NULL);
-	i8259_init(0, I8259_OFFSET);
-#endif
-}
+	/* Initialize i8259 controller */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (device_is_compatible(np, "chrp,iic")) {
+			cascade_node = np;
+			break;
+		}
+	if (cascade_node == NULL) {
+		printk(KERN_DEBUG "mpc86xxhpcn: no ISA interrupt controller\n");
+		return;
+	}
 
+	cascade_irq = irq_of_parse_and_map(cascade_node, 0);
+	if (cascade_irq == NO_IRQ) {
+		printk(KERN_ERR "mpc86xxhpcn: failed to map cascade interrupt");
+		return;
+	}
+	DBG("mpc86xxhpcn: cascade mapped to irq %d\n", cascade_irq);
 
+	i8259_init(cascade_node, 0);
+	set_irq_chained_handler(cascade_irq, mpc86xx_8259_cascade);
+#endif
+}
 
 #ifdef CONFIG_PCI
-/*
- * interrupt routing
- */
 
-int
-mpc86xx_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin)
+enum pirq{PIRQA = 8, PIRQB, PIRQC, PIRQD, PIRQE, PIRQF, PIRQG, PIRQH};
+const unsigned char uli1575_irq_route_table[16] = {
+	0, 	/* 0: Reserved */
+	0x8, 	/* 1: 0b1000 */
+	0, 	/* 2: Reserved */
+	0x2,	/* 3: 0b0010 */
+	0x4,	/* 4: 0b0100 */
+	0x5, 	/* 5: 0b0101 */
+	0x7,	/* 6: 0b0111 */
+	0x6,	/* 7: 0b0110 */
+	0, 	/* 8: Reserved */
+	0x1,	/* 9: 0b0001 */
+	0x3,	/* 10: 0b0011 */
+	0x9,	/* 11: 0b1001 */
+	0xb,	/* 12: 0b1011 */
+	0, 	/* 13: Reserved */
+	0xd,	/* 14, 0b1101 */
+	0xf,	/* 15, 0b1111 */
+};
+
+static int __devinit
+get_pci_irq_from_of(struct pci_controller *hose, int slot, int pin)
 {
-	static char pci_irq_table[][4] = {
-		/*
-		 *      PCI IDSEL/INTPIN->INTLINE
-		 *       A      B      C      D
-		 */
-		{PIRQA, PIRQB, PIRQC, PIRQD},   /* IDSEL 17 -- PCI Slot 1 */
-		{PIRQB, PIRQC, PIRQD, PIRQA},	/* IDSEL 18 -- PCI Slot 2 */
-		{0, 0, 0, 0},			/* IDSEL 19 */
-		{0, 0, 0, 0},			/* IDSEL 20 */
-		{0, 0, 0, 0},			/* IDSEL 21 */
-		{0, 0, 0, 0},			/* IDSEL 22 */
-		{0, 0, 0, 0},			/* IDSEL 23 */
-		{0, 0, 0, 0},			/* IDSEL 24 */
-		{0, 0, 0, 0},			/* IDSEL 25 */
-		{PIRQD, PIRQA, PIRQB, PIRQC},	/* IDSEL 26 -- PCI Bridge*/
-		{PIRQC, 0, 0, 0},		/* IDSEL 27 -- LAN */
-		{PIRQE, PIRQF, PIRQH, PIRQ7},	/* IDSEL 28 -- USB 1.1 */
-		{PIRQE, PIRQF, PIRQG, 0},	/* IDSEL 29 -- Audio & Modem */
-		{PIRQH, 0, 0, 0},		/* IDSEL 30 -- LPC & PMU*/
-		{PIRQD, 0, 0, 0},		/* IDSEL 31 -- ATA */
-	};
-
-	const long min_idsel = 17, max_idsel = 31, irqs_per_slot = 4;
-	return PCI_IRQ_TABLE_LOOKUP + I8259_OFFSET;
+	struct of_irq oirq;
+	u32 laddr[3];
+	struct device_node *hosenode = hose ? hose->arch_data : NULL;
+
+	if (!hosenode) return -EINVAL;
+
+	laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(slot, 0) << 8);
+	laddr[1] = laddr[2] = 0;
+	of_irq_map_raw(hosenode, &pin, laddr, &oirq);
+	DBG("mpc86xx_hpcn: pci irq addr %x, slot %d, pin %d, irq %d\n",
+			laddr[0], slot, pin, oirq.specifier[0]);
+	return oirq.specifier[0];
 }
 
-static void __devinit quirk_ali1575(struct pci_dev *dev)
+static void __devinit quirk_uli1575(struct pci_dev *dev)
 {
 	unsigned short temp;
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	unsigned char irq2pin[16];
+	unsigned long pirq_map_word = 0;
+	u32 irq;
+	int i;
 
 	/*
-	 * ALI1575 interrupts route table setup:
+	 * ULI1575 interrupts route setup
+	 */
+	memset(irq2pin, 0, 16); /* Initialize default value 0 */
+
+	/*
+	 * PIRQA -> PIRQD mapping read from OF-tree
+	 *
+	 * interrupts for PCI slot0 -- PIRQA / PIRQB / PIRQC / PIRQD
+	 *                PCI slot1 -- PIRQB / PIRQC / PIRQD / PIRQA
+	 */
+	for (i = 0; i < 4; i++){
+		irq = get_pci_irq_from_of(hose, 17, i + 1);
+		if (irq > 0 && irq < 16)
+			irq2pin[irq] = PIRQA + i;
+		else
+			printk(KERN_WARNING "ULI1575 device"
+			    "(slot %d, pin %d) irq %d is invalid.\n",
+			    17, i, irq);
+	}
+
+	/*
+	 * PIRQE -> PIRQF mapping set manually
 	 *
 	 * IRQ pin   IRQ#
-	 * PIRQA ---- 3
-	 * PIRQB ---- 4
-	 * PIRQC ---- 5
-	 * PIRQD ---- 6
 	 * PIRQE ---- 9
 	 * PIRQF ---- 10
 	 * PIRQG ---- 11
 	 * PIRQH ---- 12
-	 *
-	 * interrupts for PCI slot0 -- PIRQA / PIRQB / PIRQC / PIRQD
-	 *                PCI slot1 -- PIRQB / PIRQC / PIRQD / PIRQA
 	 */
-	pci_write_config_dword(dev, 0x48, 0xb9317542);
+	for (i = 0; i < 4; i++) irq2pin[i + 9] = PIRQE + i;
+
+	/* Set IRQ-PIRQ Mapping to ULI1575 */
+	for (i = 0; i < 16; i++)
+		if (irq2pin[i])
+			pirq_map_word |= (uli1575_irq_route_table[i] & 0xf)
+				<< ((irq2pin[i] - PIRQA) * 4);
 
-	/* USB 1.1 OHCI controller 1, interrupt: PIRQE */
-	pci_write_config_byte(dev, 0x86, 0x0c);
+	/* ULI1575 IRQ mapping conf register default value is 0xb9317542 */
+	DBG("Setup ULI1575 IRQ mapping configuration register value = 0x%x\n",
+			pirq_map_word);
+	pci_write_config_dword(dev, 0x48, pirq_map_word);
 
-	/* USB 1.1 OHCI controller 2, interrupt: PIRQF */
-	pci_write_config_byte(dev, 0x87, 0x0d);
+#define ULI1575_SET_DEV_IRQ(slot, pin, reg) 				\
+	do { 								\
+		int irq; 						\
+		irq = get_pci_irq_from_of(hose, slot, pin); 		\
+		if (irq > 0 && irq < 16) 				\
+			pci_write_config_byte(dev, reg, irq2pin[irq]); 	\
+		else							\
+			printk(KERN_WARNING "ULI1575 device"		\
+			    "(slot %d, pin %d) irq %d is invalid.\n",	\
+			    slot, pin, irq);				\
+	} while(0)
 
-	/* USB 1.1 OHCI controller 3, interrupt: PIRQH */
-	pci_write_config_byte(dev, 0x88, 0x0f);
+	/* USB 1.1 OHCI controller 1, slot 28, pin 1 */
+	ULI1575_SET_DEV_IRQ(28, 1, 0x86);
 
-	/* USB 2.0 controller, interrupt: PIRQ7 */
-	pci_write_config_byte(dev, 0x74, 0x06);
+	/* USB 1.1 OHCI controller 2, slot 28, pin 2 */
+	ULI1575_SET_DEV_IRQ(28, 2, 0x87);
 
-	/* Audio controller, interrupt: PIRQE */
-	pci_write_config_byte(dev, 0x8a, 0x0c);
+	/* USB 1.1 OHCI controller 3, slot 28, pin 3 */
+	ULI1575_SET_DEV_IRQ(28, 3, 0x88);
 
-	/* Modem controller, interrupt: PIRQF */
-	pci_write_config_byte(dev, 0x8b, 0x0d);
+	/* USB 2.0 controller, slot 28, pin 4 */
+	irq = get_pci_irq_from_of(hose, 28, 4);
+	if (irq >= 0 && irq <=15)
+		pci_write_config_dword(dev, 0x74, uli1575_irq_route_table[irq]);
 
-	/* HD audio controller, interrupt: PIRQG */
-	pci_write_config_byte(dev, 0x8c, 0x0e);
+	/* Audio controller, slot 29, pin 1 */
+	ULI1575_SET_DEV_IRQ(29, 1, 0x8a);
 
-	/* Serial ATA interrupt: PIRQD */
-	pci_write_config_byte(dev, 0x8d, 0x0b);
+	/* Modem controller, slot 29, pin 2 */
+	ULI1575_SET_DEV_IRQ(29, 2, 0x8b);
 
-	/* SMB interrupt: PIRQH */
-	pci_write_config_byte(dev, 0x8e, 0x0f);
+	/* HD audio controller, slot 29, pin 3 */
+	ULI1575_SET_DEV_IRQ(29, 3, 0x8c);
 
-	/* PMU ACPI SCI interrupt: PIRQH */
-	pci_write_config_byte(dev, 0x8f, 0x0f);
+	/* SMB interrupt: slot 30, pin 1 */
+	ULI1575_SET_DEV_IRQ(30, 1, 0x8e);
+
+	/* PMU ACPI SCI interrupt: slot 30, pin 2 */
+	ULI1575_SET_DEV_IRQ(30, 2, 0x8f);
+
+	/* Serial ATA interrupt: slot 31, pin 1 */
+	ULI1575_SET_DEV_IRQ(31, 1, 0x8d);
 
 	/* Primary PATA IDE IRQ: 14
 	 * Secondary PATA IDE IRQ: 15
 	 */
-	pci_write_config_byte(dev, 0x44, 0x3d);
-	pci_write_config_byte(dev, 0x75, 0x0f);
+	pci_write_config_byte(dev, 0x44, 0x30 | uli1575_irq_route_table[14]);
+	pci_write_config_byte(dev, 0x75, uli1575_irq_route_table[15]);
 
 	/* Set IRQ14 and IRQ15 to legacy IRQs */
 	pci_read_config_word(dev, 0x46, &temp);
@@ -264,6 +282,8 @@ static void __devinit quirk_ali1575(struct pci_dev *dev)
 	 */
 	outb(0xfa, 0x4d0);
 	outb(0x1e, 0x4d1);
+
+#undef ULI1575_SET_DEV_IRQ
 }
 
 static void __devinit quirk_uli5288(struct pci_dev *dev)
@@ -306,7 +326,7 @@ static void __devinit early_uli5249(struct pci_dev *dev)
 	dev->class |= 0x1;
 }
 
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, quirk_ali1575);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, quirk_uli1575);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, quirk_uli5288);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, 0x5249, early_uli5249);
@@ -337,8 +357,6 @@ mpc86xx_hpcn_setup_arch(void)
 	for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;)
 		add_bridge(np);
 
-	ppc_md.pci_swizzle = common_swizzle;
-	ppc_md.pci_map_irq = mpc86xx_map_irq;
 	ppc_md.pci_exclude_device = mpc86xx_exclude_device;
 #endif
 
@@ -377,6 +395,15 @@ mpc86xx_hpcn_show_cpuinfo(struct seq_file *m)
 }
 
 
+void __init mpc86xx_hpcn_pcibios_fixup(void)
+{
+	struct pci_dev *dev = NULL;
+
+	for_each_pci_dev(dev)
+		pci_read_irq_line(dev);
+}
+
+
 /*
  * Called very early, device-tree isn't unflattened
  */
@@ -431,6 +458,7 @@ define_machine(mpc86xx_hpcn) {
 	.setup_arch		= mpc86xx_hpcn_setup_arch,
 	.init_IRQ		= mpc86xx_hpcn_init_irq,
 	.show_cpuinfo		= mpc86xx_hpcn_show_cpuinfo,
+	.pcibios_fixup		= mpc86xx_hpcn_pcibios_fixup,
 	.get_irq		= mpic_get_irq,
 	.restart		= mpc86xx_restart,
 	.time_init		= mpc86xx_time_init,
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 07c47e8309eda..4a6aa640ac135 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -85,11 +85,8 @@ static int __init gfar_mdio_of_init(void)
 			mdio_data.irq[k] = -1;
 
 		while ((child = of_get_next_child(np, child)) != NULL) {
-			if (child->n_intrs) {
-				const u32 *id =
-					get_property(child, "reg", NULL);
-				mdio_data.irq[*id] = child->intrs[0].line;
-			}
+			const u32 *id = get_property(child, "reg", NULL);
+			mdio_data.irq[*id] = irq_of_parse_and_map(child, 0);
 		}
 
 		ret =
@@ -131,6 +128,7 @@ static int __init gfar_of_init(void)
 		const char *model;
 		const void *mac_addr;
 		const phandle *ph;
+		int n_res = 1;
 
 		memset(r, 0, sizeof(r));
 		memset(&gfar_data, 0, sizeof(gfar_data));
@@ -139,8 +137,7 @@ static int __init gfar_of_init(void)
 		if (ret)
 			goto err;
 
-		r[1].start = np->intrs[0].line;
-		r[1].end = np->intrs[0].line;
+		r[1].start = r[1].end = irq_of_parse_and_map(np, 0);
 		r[1].flags = IORESOURCE_IRQ;
 
 		model = get_property(np, "model", NULL);
@@ -150,19 +147,19 @@ static int __init gfar_of_init(void)
 			r[1].name = gfar_tx_intr;
 
 			r[2].name = gfar_rx_intr;
-			r[2].start = np->intrs[1].line;
-			r[2].end = np->intrs[1].line;
+			r[2].start = r[2].end = irq_of_parse_and_map(np, 1);
 			r[2].flags = IORESOURCE_IRQ;
 
 			r[3].name = gfar_err_intr;
-			r[3].start = np->intrs[2].line;
-			r[3].end = np->intrs[2].line;
+			r[3].start = r[3].end = irq_of_parse_and_map(np, 2);
 			r[3].flags = IORESOURCE_IRQ;
+
+			n_res += 2;
 		}
 
 		gfar_dev =
 		    platform_device_register_simple("fsl-gianfar", i, &r[0],
-						    np->n_intrs + 1);
+						    n_res + 1);
 
 		if (IS_ERR(gfar_dev)) {
 			ret = PTR_ERR(gfar_dev);
@@ -251,8 +248,7 @@ static int __init fsl_i2c_of_init(void)
 		if (ret)
 			goto err;
 
-		r[1].start = np->intrs[0].line;
-		r[1].end = np->intrs[0].line;
+		r[1].start = r[1].end = irq_of_parse_and_map(np, 0);
 		r[1].flags = IORESOURCE_IRQ;
 
 		i2c_dev = platform_device_register_simple("fsl-i2c", i, r, 2);
@@ -388,8 +384,7 @@ static int __init fsl_usb_of_init(void)
 		if (ret)
 			goto err;
 
-		r[1].start = np->intrs[0].line;
-		r[1].end = np->intrs[0].line;
+		r[1].start = r[1].end = irq_of_parse_and_map(np, 0);
 		r[1].flags = IORESOURCE_IRQ;
 
 		usb_dev_mph =
@@ -437,8 +432,7 @@ static int __init fsl_usb_of_init(void)
 		if (ret)
 			goto unreg_mph;
 
-		r[1].start = np->intrs[0].line;
-		r[1].end = np->intrs[0].line;
+		r[1].start = r[1].end = irq_of_parse_and_map(np, 0);
 		r[1].flags = IORESOURCE_IRQ;
 
 		usb_dev_dr =
-- 
GitLab


From 45934c47237108903ec019f08e124e592ba0b6c2 Mon Sep 17 00:00:00 2001
From: Jake Moilanen <moilanen@austin.ibm.com>
Date: Thu, 27 Jul 2006 13:17:25 -0500
Subject: [PATCH 0079/1063] [POWERPC] Export msi symbols

Forgot to export symbols for MSI.

Signed-off-by: Jake Moilanen <moilanen@austin.ibm.com>
Acked-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/irq.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 01bdae35cb559..b2ded6460a860 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -52,6 +52,7 @@
 #include <linux/radix-tree.h>
 #include <linux/mutex.h>
 #include <linux/bootmem.h>
+#include <linux/pci.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -827,12 +828,14 @@ int pci_enable_msi(struct pci_dev * pdev)
 	else
 		return -1;
 }
+EXPORT_SYMBOL(pci_enable_msi);
 
 void pci_disable_msi(struct pci_dev * pdev)
 {
 	if (ppc_md.disable_msi)
 		ppc_md.disable_msi(pdev);
 }
+EXPORT_SYMBOL(pci_disable_msi);
 
 void pci_scan_msi_device(struct pci_dev *dev) {}
 int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) {return -1;}
@@ -840,6 +843,8 @@ void pci_disable_msix(struct pci_dev *dev) {}
 void msi_remove_pci_irq_vectors(struct pci_dev *dev) {}
 void disable_msi_mode(struct pci_dev *dev, int pos, int type) {}
 void pci_no_msi(void) {}
+EXPORT_SYMBOL(pci_enable_msix);
+EXPORT_SYMBOL(pci_disable_msix);
 
 #endif
 
-- 
GitLab


From 3ab2b385c8a5cdf060c6a41582118a0cb27d0910 Mon Sep 17 00:00:00 2001
From: Amos Waterland <apw@us.ibm.com>
Date: Tue, 1 Aug 2006 15:44:11 -0400
Subject: [PATCH 0080/1063] [POWERPC] Turn on tigon3 support in maple_defconfig

I think that most people who use maple_defconfig are doing so for a JS21,
so it might make sense to turn Tigon3 support on by default.

Built and booted on a JS21.

Signed-off-by: Amos Waterland <apw@us.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/configs/maple_defconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig
index 80a0db43aeb7a..27b18ca1549c3 100644
--- a/arch/powerpc/configs/maple_defconfig
+++ b/arch/powerpc/configs/maple_defconfig
@@ -474,7 +474,7 @@ CONFIG_E1000=y
 # CONFIG_SKY2 is not set
 # CONFIG_SK98LIN is not set
 # CONFIG_VIA_VELOCITY is not set
-# CONFIG_TIGON3 is not set
+CONFIG_TIGON3=y
 # CONFIG_BNX2 is not set
 # CONFIG_MV643XX_ETH is not set
 
-- 
GitLab


From 40681b95a4ef798bc38c92e0d9b8c06bbdd34409 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Wed, 2 Aug 2006 11:13:50 +1000
Subject: [PATCH 0081/1063] [POWERPC] Make doc comments extractable

We don't have much in the way of doc comments, but some of those we do have
don't work because they start with "/***" or "/*", not "/**" which is what
kernel-doc requires.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/crash_dump.c |  2 +-
 arch/powerpc/sysdev/i8259.c      |  2 +-
 include/asm-powerpc/irq.h        | 24 ++++++++++++------------
 include/asm-powerpc/prom.h       |  8 ++++----
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 371973be8d711..2f6f5a7bc69ed 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -80,7 +80,7 @@ static int __init parse_savemaxmem(char *p)
 }
 __setup("savemaxmem=", parse_savemaxmem);
 
-/*
+/**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
  * @buf: target memory address for the copy; this can be in kernel address
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 9855820b9548c..26a6a3becd66e 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -224,7 +224,7 @@ static struct irq_host_ops i8259_host_ops = {
 	.xlate = i8259_host_xlate,
 };
 
-/****
+/**
  * i8259_init - Initialize the legacy controller
  * @node: device node of the legacy PIC (can be NULL, but then, it will match
  *        all interrupts, so beware)
diff --git a/include/asm-powerpc/irq.h b/include/asm-powerpc/irq.h
index d903a62959dab..4da41efb1319d 100644
--- a/include/asm-powerpc/irq.h
+++ b/include/asm-powerpc/irq.h
@@ -137,7 +137,7 @@ struct irq_map_entry {
 extern struct irq_map_entry irq_map[NR_IRQS];
 
 
-/***
+/**
  * irq_alloc_host - Allocate a new irq_host data structure
  * @node: device-tree node of the interrupt controller
  * @revmap_type: type of reverse mapping to use
@@ -159,14 +159,14 @@ extern struct irq_host *irq_alloc_host(unsigned int revmap_type,
 				       irq_hw_number_t inval_irq);
 
 
-/***
+/**
  * irq_find_host - Locates a host for a given device node
  * @node: device-tree node of the interrupt controller
  */
 extern struct irq_host *irq_find_host(struct device_node *node);
 
 
-/***
+/**
  * irq_set_default_host - Set a "default" host
  * @host: default host pointer
  *
@@ -178,7 +178,7 @@ extern struct irq_host *irq_find_host(struct device_node *node);
 extern void irq_set_default_host(struct irq_host *host);
 
 
-/***
+/**
  * irq_set_virq_count - Set the maximum number of virt irqs
  * @count: number of linux virtual irqs, capped with NR_IRQS
  *
@@ -188,7 +188,7 @@ extern void irq_set_default_host(struct irq_host *host);
 extern void irq_set_virq_count(unsigned int count);
 
 
-/***
+/**
  * irq_create_mapping - Map a hardware interrupt into linux virq space
  * @host: host owning this hardware interrupt or NULL for default host
  * @hwirq: hardware irq number in that host space
@@ -202,13 +202,13 @@ extern unsigned int irq_create_mapping(struct irq_host *host,
 				       irq_hw_number_t hwirq);
 
 
-/***
+/**
  * irq_dispose_mapping - Unmap an interrupt
  * @virq: linux virq number of the interrupt to unmap
  */
 extern void irq_dispose_mapping(unsigned int virq);
 
-/***
+/**
  * irq_find_mapping - Find a linux virq from an hw irq number.
  * @host: host owning this hardware interrupt
  * @hwirq: hardware irq number in that host space
@@ -221,7 +221,7 @@ extern unsigned int irq_find_mapping(struct irq_host *host,
 				     irq_hw_number_t hwirq);
 
 
-/***
+/**
  * irq_radix_revmap - Find a linux virq from a hw irq number.
  * @host: host owning this hardware interrupt
  * @hwirq: hardware irq number in that host space
@@ -232,7 +232,7 @@ extern unsigned int irq_find_mapping(struct irq_host *host,
 extern unsigned int irq_radix_revmap(struct irq_host *host,
 				     irq_hw_number_t hwirq);
 
-/***
+/**
  * irq_linear_revmap - Find a linux virq from a hw irq number.
  * @host: host owning this hardware interrupt
  * @hwirq: hardware irq number in that host space
@@ -247,7 +247,7 @@ extern unsigned int irq_linear_revmap(struct irq_host *host,
 
 
 
-/***
+/**
  * irq_alloc_virt - Allocate virtual irq numbers
  * @host: host owning these new virtual irqs
  * @count: number of consecutive numbers to allocate
@@ -261,7 +261,7 @@ extern unsigned int irq_alloc_virt(struct irq_host *host,
 				   unsigned int count,
 				   unsigned int hint);
 
-/***
+/**
  * irq_free_virt - Free virtual irq numbers
  * @virq: virtual irq number of the first interrupt to free
  * @count: number of interrupts to free
@@ -300,7 +300,7 @@ extern unsigned int irq_of_parse_and_map(struct device_node *dev, int index);
 
 /* -- End OF helpers -- */
 
-/***
+/**
  * irq_early_init - Init irq remapping subsystem
  */
 extern void irq_early_init(void);
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index 31bfea4686a6c..7a457bd462a27 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -259,7 +259,7 @@ struct of_irq {
 	u32 specifier[OF_MAX_IRQ_SPEC];	/* Specifier copy */
 };
 
-/***
+/**
  * of_irq_map_init - Initialize the irq remapper
  * @flags:	flags defining workarounds to enable
  *
@@ -272,7 +272,7 @@ struct of_irq {
 
 extern void of_irq_map_init(unsigned int flags);
 
-/***
+/**
  * of_irq_map_raw - Low level interrupt tree parsing
  * @parent:	the device interrupt parent
  * @intspec:	interrupt specifier ("interrupts" property of the device)
@@ -292,7 +292,7 @@ extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
 			  const u32 *addr, struct of_irq *out_irq);
 
 
-/***
+/**
  * of_irq_map_one - Resolve an interrupt for a device
  * @device:	the device whose interrupt is to be resolved
  * @index:     	index of the interrupt to resolve
@@ -305,7 +305,7 @@ extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
 extern int of_irq_map_one(struct device_node *device, int index,
 			  struct of_irq *out_irq);
 
-/***
+/**
  * of_irq_map_pci - Resolve the interrupt for a PCI device
  * @pdev:	the device whose interrupt is to be resolved
  * @out_irq:	structure of_irq filled by this function
-- 
GitLab


From 3d7714867a8d240fae3ab0bde656a369de2b08ab Mon Sep 17 00:00:00 2001
From: Jon Loeliger <jdl@freescale.com>
Date: Thu, 3 Aug 2006 16:27:57 -0500
Subject: [PATCH 0082/1063] [POWERPC] Add MPC8641 HPCN Device Tree Source file.

As per list discussion, let's add device tree source files
under powerpc/boot/dts.  If nothing else, it is a starting point.

Signed-off-by: Jon Loeliger <jdl@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/boot/dts/mpc8641_hpcn.dts | 338 +++++++++++++++++++++++++
 1 file changed, 338 insertions(+)
 create mode 100644 arch/powerpc/boot/dts/mpc8641_hpcn.dts

diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn.dts b/arch/powerpc/boot/dts/mpc8641_hpcn.dts
new file mode 100644
index 0000000000000..e832a884d7654
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8641_hpcn.dts
@@ -0,0 +1,338 @@
+/*
+ * MPC8641 HPCN Device Tree Source
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+
+/ {
+	model = "MPC8641HPCN";
+	compatible = "mpc86xx";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#cpus = <2>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8641@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <20>;	// 32 bytes
+			i-cache-line-size = <20>;	// 32 bytes
+			d-cache-size = <8000>;		// L1, 32K
+			i-cache-size = <8000>;		// L1, 32K
+			timebase-frequency = <0>;	// 33 MHz, from uboot
+			bus-frequency = <0>;		// From uboot
+			clock-frequency = <0>;		// From uboot
+			32-bit;
+			linux,boot-cpu;
+		};
+		PowerPC,8641@1 {
+			device_type = "cpu";
+			reg = <1>;
+			d-cache-line-size = <20>;	// 32 bytes
+			i-cache-line-size = <20>;	// 32 bytes
+			d-cache-size = <8000>;		// L1, 32K
+			i-cache-size = <8000>;		// L1, 32K
+			timebase-frequency = <0>;	// 33 MHz, from uboot
+			bus-frequency = <0>;		// From uboot
+			clock-frequency = <0>;		// From uboot
+			32-bit;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <00000000 40000000>;	// 1G at 0x0
+	};
+
+	soc8641@f8000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		#interrupt-cells = <2>;
+		device_type = "soc";
+		ranges = <0 f8000000 00100000>;
+		reg = <f8000000 00100000>;	// CCSRBAR 1M
+		bus-frequency = <0>;
+
+		i2c@3000 {
+			device_type = "i2c";
+			compatible = "fsl-i2c";
+			reg = <3000 100>;
+			interrupts = <2b 2>;
+			interrupt-parent = <40000>;
+			dfsrr;
+		};
+
+		i2c@3100 {
+			device_type = "i2c";
+			compatible = "fsl-i2c";
+			reg = <3100 100>;
+			interrupts = <2b 2>;
+			interrupt-parent = <40000>;
+			dfsrr;
+		};
+
+		mdio@24520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "mdio";
+			compatible = "gianfar";
+			reg = <24520 20>;
+			linux,phandle = <24520>;
+			ethernet-phy@0 {
+				linux,phandle = <2452000>;
+				interrupt-parent = <40000>;
+				interrupts = <4a 1>;
+				reg = <0>;
+				device_type = "ethernet-phy";
+			};
+			ethernet-phy@1 {
+				linux,phandle = <2452001>;
+				interrupt-parent = <40000>;
+				interrupts = <4a 1>;
+				reg = <1>;
+				device_type = "ethernet-phy";
+			};
+			ethernet-phy@2 {
+				linux,phandle = <2452002>;
+				interrupt-parent = <40000>;
+				interrupts = <4a 1>;
+				reg = <2>;
+				device_type = "ethernet-phy";
+			};
+			ethernet-phy@3 {
+				linux,phandle = <2452003>;
+				interrupt-parent = <40000>;
+				interrupts = <4a 1>;
+				reg = <3>;
+				device_type = "ethernet-phy";
+			};
+		};
+
+		ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <24000 1000>;
+			mac-address = [ 00 E0 0C 00 73 00 ];
+			interrupts = <1d 2 1e 2 22 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452000>;
+		};
+
+		ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <25000 1000>;
+			mac-address = [ 00 E0 0C 00 73 01 ];
+			interrupts = <23 2 24 2 28 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452001>;
+		};
+		
+		ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <26000 1000>;
+			mac-address = [ 00 E0 0C 00 02 FD ];
+			interrupts = <1F 2 20 2 21 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452002>;
+		};
+
+		ethernet@27000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <27000 1000>;
+			mac-address = [ 00 E0 0C 00 03 FD ];
+			interrupts = <25 2 26 2 27 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452003>;
+		};
+		serial@4500 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <4500 100>;
+			clock-frequency = <0>;
+			interrupts = <2a 2>;
+			interrupt-parent = <40000>;
+		};
+
+		serial@4600 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <4600 100>;
+			clock-frequency = <0>;
+			interrupts = <1c 2>;
+			interrupt-parent = <40000>;
+		};
+
+		pci@8000 {
+			compatible = "86xx";
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = <8000 1000>;
+			bus-range = <0 fe>;
+			ranges = <02000000 0 80000000 80000000 0 20000000
+				  01000000 0 00000000 e2000000 0 00100000>;
+			clock-frequency = <1fca055>;
+			interrupt-parent = <40000>;
+			interrupts = <18 2>;
+			interrupt-map-mask = <f800 0 0 7>;
+			interrupt-map = <
+				/* IDSEL 0x11 */
+				8800 0 0 1 4d0 3 2
+				8800 0 0 2 4d0 4 2
+				8800 0 0 3 4d0 5 2
+				8800 0 0 4 4d0 6 2
+
+				/* IDSEL 0x12 */
+				9000 0 0 1 4d0 4 2
+				9000 0 0 2 4d0 5 2
+				9000 0 0 3 4d0 6 2
+				9000 0 0 4 4d0 3 2
+
+				/* IDSEL 0x13 */
+				9800 0 0 1 4d0 0 0
+				9800 0 0 2 4d0 0 0
+				9800 0 0 3 4d0 0 0
+				9800 0 0 4 4d0 0 0
+
+				/* IDSEL 0x14 */
+				a000 0 0 1 4d0 0 0
+				a000 0 0 2 4d0 0 0
+				a000 0 0 3 4d0 0 0
+				a000 0 0 4 4d0 0 0
+
+				/* IDSEL 0x15 */
+				a800 0 0 1 4d0 0 0
+				a800 0 0 2 4d0 0 0
+				a800 0 0 3 4d0 0 0
+				a800 0 0 4 4d0 0 0
+
+				/* IDSEL 0x16 */
+				b000 0 0 1 4d0 0 0
+				b000 0 0 2 4d0 0 0
+				b000 0 0 3 4d0 0 0
+				b000 0 0 4 4d0 0 0
+
+				/* IDSEL 0x17 */
+				b800 0 0 1 4d0 0 0
+				b800 0 0 2 4d0 0 0
+				b800 0 0 3 4d0 0 0
+				b800 0 0 4 4d0 0 0
+
+				/* IDSEL 0x18 */
+				c000 0 0 1 4d0 0 0
+				c000 0 0 2 4d0 0 0
+				c000 0 0 3 4d0 0 0
+				c000 0 0 4 4d0 0 0
+
+				/* IDSEL 0x19 */
+				c800 0 0 1 4d0 0 0
+				c800 0 0 2 4d0 0 0
+				c800 0 0 3 4d0 0 0
+				c800 0 0 4 4d0 0 0
+
+				/* IDSEL 0x1a */
+				d000 0 0 1 4d0 6 2
+				d000 0 0 2 4d0 3 2
+				d000 0 0 3 4d0 4 2
+				d000 0 0 4 4d0 5 2
+
+
+				/* IDSEL 0x1b */
+				d800 0 0 1 4d0 5 2
+				d800 0 0 2 4d0 0 0
+				d800 0 0 3 4d0 0 0
+				d800 0 0 4 4d0 0 0
+
+				/* IDSEL 0x1c */
+				e000 0 0 1 4d0 9 2
+				e000 0 0 2 4d0 a 2
+				e000 0 0 3 4d0 c 2
+				e000 0 0 4 4d0 7 2
+
+				/* IDSEL 0x1d */
+				e800 0 0 1 4d0 9 2
+				e800 0 0 2 4d0 a 2
+				e800 0 0 3 4d0 b 2
+				e800 0 0 4 4d0 0 0
+
+				/* IDSEL 0x1e */
+				f000 0 0 1 4d0 c 2
+				f000 0 0 2 4d0 0 0
+				f000 0 0 3 4d0 0 0
+				f000 0 0 4 4d0 0 0
+
+				/* IDSEL 0x1f */
+				f800 0 0 1 4d0 6 2
+				f800 0 0 2 4d0 0 0
+				f800 0 0 3 4d0 0 0
+				f800 0 0 4 4d0 0 0
+				>;
+			i8259@4d0 {
+				clock-frequency = <0>;
+				interrupt-controller;
+				device_type = "interrupt-controller";
+				#address-cells = <0>;
+				#interrupt-cells = <2>;
+				built-in;
+				compatible = "chrp,iic";
+                	        big-endian;
+				interrupts = <49 2>;
+				interrupt-parent = <40000>;
+			};
+
+		};
+		pic@40000 {
+			linux,phandle = <40000>;
+			clock-frequency = <0>;
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <40000 40000>;
+			built-in;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+                        big-endian;
+			interrupts = <
+				10 2 11 2 12 2 13 2
+				14 2 15 2 16 2 17 2
+				18 2 19 2 1a 2 1b 2
+				1c 2 1d 2 1e 2 1f 2
+				20 2 21 2 22 2 23 2
+				24 2 25 2 26 2 27 2
+				28 2 29 2 2a 2 2b 2
+				2c 2 2d 2 2e 2 2f 2
+				30 2 31 2 32 2 33 2
+				34 2 35 2 36 2 37 2
+				38 2 39 2 2a 2 3b 2
+				3c 2 3d 2 3e 2 3f 2
+				48 1 49 2 4a 1
+				>;
+			interrupt-parent = <40000>;
+		};
+	};
+};
-- 
GitLab


From f4dddce57c105c447c566be52c3d210dec570a27 Mon Sep 17 00:00:00 2001
From: Matt Porter <mporter@embeddedalley.com>
Date: Fri, 4 Aug 2006 11:41:51 -0500
Subject: [PATCH 0083/1063] [POWERPC] Remove flush_dcache_all export

Removes the flush_dcache_all export for non coherent platforms.
We removed the last in-kernel user of this years ago in arch/ppc
so it no longer serves a purpose. Plus, it breaks the build
at the moment.

Signed-off-by: Matt Porter <mporter@embeddedalley.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/ppc_ksyms.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index f6a05f090b25e..39d3bfcabcd26 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -126,10 +126,6 @@ EXPORT_SYMBOL(pci_bus_mem_base_phys);
 EXPORT_SYMBOL(pci_bus_to_hose);
 #endif /* CONFIG_PCI */
 
-#ifdef CONFIG_NOT_COHERENT_CACHE
-EXPORT_SYMBOL(flush_dcache_all);
-#endif
-
 EXPORT_SYMBOL(start_thread);
 EXPORT_SYMBOL(kernel_thread);
 
-- 
GitLab


From 452b5e21216011f2f068e80443568f5f3f3f4d63 Mon Sep 17 00:00:00 2001
From: Matt Porter <mporter@embeddedalley.com>
Date: Fri, 4 Aug 2006 11:44:01 -0500
Subject: [PATCH 0084/1063] [POWERPC] Fix powerpc 44x_mmu build

The PIN_SIZE definition name changed, update 44x_mmu.c accordingly.

Signed-off-by: Matt Porter <mporter@embeddedalley.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/44x_mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 376829ed22110..0a0a0487b3343 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -103,7 +103,7 @@ unsigned long __init mmu_mapin_ram(void)
 
 	/* Determine number of entries necessary to cover lowmem */
 	pinned_tlbs = (unsigned int)
-		(_ALIGN(total_lowmem, PPC44x_PIN_SIZE) >> PPC44x_PIN_SHIFT);
+		(_ALIGN(total_lowmem, PPC_PIN_SIZE) >> PPC44x_PIN_SHIFT);
 
 	/* Write upper watermark to save location */
 	tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs;
@@ -111,7 +111,7 @@ unsigned long __init mmu_mapin_ram(void)
 	/* If necessary, set additional pinned TLBs */
 	if (pinned_tlbs > 1)
 		for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) {
-			unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC44x_PIN_SIZE;
+			unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC_PIN_SIZE;
 			ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr);
 		}
 
-- 
GitLab


From 2f6093c84730b4bad65bcd0f2f904a5769b1dfc5 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Mon, 7 Aug 2006 16:19:19 +1000
Subject: [PATCH 0085/1063] [POWERPC] Implement SLB shadow buffer

This adds a shadow buffer for the SLBs and regsiters it with PHYP.
Only the bolted SLB entries (top 3) are shadowed.

The SLB shadow buffer tells the hypervisor what the kernel needs to
have in the SLB for the kernel to be able to function.  The hypervisor
can use this information to speed up partition context switches.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/asm-offsets.c             |  2 +
 arch/powerpc/kernel/entry_64.S                | 13 +++++++
 arch/powerpc/kernel/paca.c                    | 15 +++++++-
 arch/powerpc/mm/slb.c                         | 37 +++++++++++++++++--
 arch/powerpc/platforms/pseries/lpar.c         | 24 ++++++++++--
 .../platforms/pseries/plpar_wrappers.h        | 10 +++++
 arch/powerpc/platforms/pseries/setup.c        | 12 +++++-
 include/asm-powerpc/lppaca.h                  | 19 ++++++++++
 include/asm-powerpc/paca.h                    |  3 ++
 9 files changed, 124 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index ac0631958b20d..2ef7ea8603799 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -135,11 +135,13 @@ int main(void)
 	DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
 	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
 	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
+	DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
 
 	DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
 	DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
 	DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
 	DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
+	DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
 #endif /* CONFIG_PPC64 */
 
 	/* RTAS */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 54d9f5cdaab49..5baea498ea641 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -323,6 +323,11 @@ _GLOBAL(ret_from_fork)
  * The code which creates the new task context is in 'copy_thread'
  * in arch/powerpc/kernel/process.c 
  */
+#define SHADOW_SLB_BOLTED_STACK_ESID \
+		(SLBSHADOW_SAVEAREA + 0x10*(SLB_NUM_BOLTED-1))
+#define SHADOW_SLB_BOLTED_STACK_VSID \
+		(SLBSHADOW_SAVEAREA + 0x10*(SLB_NUM_BOLTED-1) + 8)
+
 	.align	7
 _GLOBAL(_switch)
 	mflr	r0
@@ -375,6 +380,14 @@ BEGIN_FTR_SECTION
 	ld	r7,KSP_VSID(r4)	/* Get new stack's VSID */
 	oris	r0,r6,(SLB_ESID_V)@h
 	ori	r0,r0,(SLB_NUM_BOLTED-1)@l
+
+	/* Update the last bolted SLB */
+	ld	r9,PACA_SLBSHADOWPTR(r13)
+ 	li	r12,0
+  	std	r12,SHADOW_SLB_BOLTED_STACK_ESID(r9) /* Clear ESID */
+	std	r7,SHADOW_SLB_BOLTED_STACK_VSID(r9)  /* Save VSID */
+ 	std	r0,SHADOW_SLB_BOLTED_STACK_ESID(r9)  /* Save ESID */
+
 	slbie	r6
 	slbie	r6		/* Workaround POWER5 < DD2.1 issue */
 	slbmte	r7,r0
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index c68741fed14bb..55f1a25085cd8 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -17,6 +17,7 @@
 #include <asm/lppaca.h>
 #include <asm/iseries/it_lp_reg_save.h>
 #include <asm/paca.h>
+#include <asm/mmu.h>
 
 
 /* This symbol is provided by the linker - let it fill in the paca
@@ -45,6 +46,17 @@ struct lppaca lppaca[] = {
 	},
 };
 
+/*
+ * 3 persistent SLBs are registered here.  The buffer will be zero
+ * initially, hence will all be invaild until we actually write them.
+ */
+struct slb_shadow slb_shadow[] __cacheline_aligned = {
+	[0 ... (NR_CPUS-1)] = {
+		.persistent = SLB_NUM_BOLTED,
+		.buffer_length = sizeof(struct slb_shadow),
+	},
+};
+
 /* The Paca is an array with one entry per processor.  Each contains an
  * lppaca, which contains the information shared between the
  * hypervisor and Linux.
@@ -59,7 +71,8 @@ struct lppaca lppaca[] = {
 	.lock_token = 0x8000,						    \
 	.paca_index = (number),		/* Paca Index */		    \
 	.kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL,		    \
-	.hw_cpu_id = 0xffff,
+	.hw_cpu_id = 0xffff,						    \
+	.slb_shadow_ptr = &slb_shadow[number],
 
 #ifdef CONFIG_PPC_ISERIES
 #define PACA_INIT_ISERIES(number)					    \
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index de0c8842415cb..d3733912adb43 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -22,6 +22,8 @@
 #include <asm/paca.h>
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <linux/compiler.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -50,9 +52,32 @@ static inline unsigned long mk_vsid_data(unsigned long ea, unsigned long flags)
 	return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags;
 }
 
-static inline void create_slbe(unsigned long ea, unsigned long flags,
-			       unsigned long entry)
+static inline void slb_shadow_update(unsigned long esid, unsigned long vsid,
+				     unsigned long entry)
 {
+	/*
+	 * Clear the ESID first so the entry is not valid while we are
+	 * updating it.
+	 */
+	get_slb_shadow()->save_area[entry].esid = 0;
+	barrier();
+	get_slb_shadow()->save_area[entry].vsid = vsid;
+	barrier();
+	get_slb_shadow()->save_area[entry].esid = esid;
+
+}
+
+static inline void create_shadowed_slbe(unsigned long ea, unsigned long flags,
+					unsigned long entry)
+{
+	/*
+	 * Updating the shadow buffer before writing the SLB ensures
+	 * we don't get a stale entry here if we get preempted by PHYP
+	 * between these two statements.
+	 */
+	slb_shadow_update(mk_esid_data(ea, entry), mk_vsid_data(ea, flags),
+			  entry);
+
 	asm volatile("slbmte  %0,%1" :
 		     : "r" (mk_vsid_data(ea, flags)),
 		       "r" (mk_esid_data(ea, entry))
@@ -77,6 +102,10 @@ void slb_flush_and_rebolt(void)
 	if ((ksp_esid_data & ESID_MASK) == PAGE_OFFSET)
 		ksp_esid_data &= ~SLB_ESID_V;
 
+	/* Only third entry (stack) may change here so only resave that */
+	slb_shadow_update(ksp_esid_data,
+			  mk_vsid_data(ksp_esid_data, lflags), 2);
+
 	/* We need to do this all in asm, so we're sure we don't touch
 	 * the stack between the slbia and rebolting it. */
 	asm volatile("isync\n"
@@ -209,9 +238,9 @@ void slb_initialize(void)
 	asm volatile("isync":::"memory");
 	asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
 	asm volatile("isync; slbia; isync":::"memory");
-	create_slbe(PAGE_OFFSET, lflags, 0);
+	create_shadowed_slbe(PAGE_OFFSET, lflags, 0);
 
-	create_slbe(VMALLOC_START, vflags, 1);
+	create_shadowed_slbe(VMALLOC_START, vflags, 1);
 
 	/* We don't bolt the stack for the time being - we're in boot,
 	 * so the stack is in the bolted segment.  By the time it goes
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 6cbf14266d5e2..1820a0b0a8c6e 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -252,18 +252,34 @@ void __init find_udbg_vterm(void)
 void vpa_init(int cpu)
 {
 	int hwcpu = get_hard_smp_processor_id(cpu);
-	unsigned long vpa = __pa(&lppaca[cpu]);
+	unsigned long addr;
 	long ret;
 
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		lppaca[cpu].vmxregs_in_use = 1;
 
-	ret = register_vpa(hwcpu, vpa);
+	addr = __pa(&lppaca[cpu]);
+	ret = register_vpa(hwcpu, addr);
 
-	if (ret)
+	if (ret) {
 		printk(KERN_ERR "WARNING: vpa_init: VPA registration for "
 				"cpu %d (hw %d) of area %lx returns %ld\n",
-				cpu, hwcpu, vpa, ret);
+				cpu, hwcpu, addr, ret);
+		return;
+	}
+	/*
+	 * PAPR says this feature is SLB-Buffer but firmware never
+	 * reports that.  All SPLPAR support SLB shadow buffer.
+	 */
+	addr = __pa(&slb_shadow[cpu]);
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		ret = register_slb_shadow(hwcpu, addr);
+		if (ret)
+			printk(KERN_ERR
+			       "WARNING: vpa_init: SLB shadow buffer "
+			       "registration for cpu %d (hw %d) of area %lx "
+			       "returns %ld\n", cpu, hwcpu, addr, ret);
+	}
 }
 
 long pSeries_lpar_hpte_insert(unsigned long hpte_group,
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index ebd15de7597ea..3eb7b294d92f3 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -37,6 +37,16 @@ static inline long register_vpa(unsigned long cpu, unsigned long vpa)
 	return vpa_call(0x1, cpu, vpa);
 }
 
+static inline long unregister_slb_shadow(unsigned long cpu, unsigned long vpa)
+{
+	return vpa_call(0x7, cpu, vpa);
+}
+
+static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
+{
+	return vpa_call(0x3, cpu, vpa);
+}
+
 extern void vpa_init(int cpu);
 
 static inline long plpar_pte_enter(unsigned long flags,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index de214d86ff440..6ebeecfd6bcb9 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -234,9 +234,17 @@ static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary)
 {
 	/* Don't risk a hypervisor call if we're crashing */
 	if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
-		unsigned long vpa = __pa(get_lppaca());
+		unsigned long addr;
 
-		if (unregister_vpa(hard_smp_processor_id(), vpa)) {
+		addr = __pa(get_slb_shadow());
+		if (unregister_slb_shadow(hard_smp_processor_id(), addr))
+			printk("SLB shadow buffer deregistration of "
+			       "cpu %u (hw_cpu_id %d) failed\n",
+			       smp_processor_id(),
+			       hard_smp_processor_id());
+
+		addr = __pa(get_lppaca());
+		if (unregister_vpa(hard_smp_processor_id(), addr)) {
 			printk("VPA deregistration of cpu %u (hw_cpu_id %d) "
 					"failed\n", smp_processor_id(),
 					hard_smp_processor_id());
diff --git a/include/asm-powerpc/lppaca.h b/include/asm-powerpc/lppaca.h
index 4dc514aabfe7e..942bb450baff3 100644
--- a/include/asm-powerpc/lppaca.h
+++ b/include/asm-powerpc/lppaca.h
@@ -27,7 +27,9 @@
 //
 //
 //----------------------------------------------------------------------------
+#include <linux/cache.h>
 #include <asm/types.h>
+#include <asm/mmu.h>
 
 /* The Hypervisor barfs if the lppaca crosses a page boundary.  A 1k
  * alignment is sufficient to prevent this */
@@ -133,5 +135,22 @@ struct lppaca {
 
 extern struct lppaca lppaca[];
 
+/*
+ * SLB shadow buffer structure as defined in the PAPR.  The save_area
+ * contains adjacent ESID and VSID pairs for each shadowed SLB.  The
+ * ESID is stored in the lower 64bits, then the VSID.
+ */
+struct slb_shadow {
+	u32	persistent;		// Number of persistent SLBs	x00-x03
+	u32	buffer_length;		// Total shadow buffer length	x04-x07
+	u64	reserved;		// Alignment			x08-x0f
+	struct	{
+		u64     esid;
+		u64	vsid;
+	} save_area[SLB_NUM_BOLTED];	//				x10-x40
+} ____cacheline_aligned;
+
+extern struct slb_shadow slb_shadow[];
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_LPPACA_H */
diff --git a/include/asm-powerpc/paca.h b/include/asm-powerpc/paca.h
index 2d4585f062099..7ffa2512524e2 100644
--- a/include/asm-powerpc/paca.h
+++ b/include/asm-powerpc/paca.h
@@ -23,6 +23,7 @@
 register struct paca_struct *local_paca asm("r13");
 #define get_paca()	local_paca
 #define get_lppaca()	(get_paca()->lppaca_ptr)
+#define get_slb_shadow()	(get_paca()->slb_shadow_ptr)
 
 struct task_struct;
 
@@ -98,6 +99,8 @@ struct paca_struct {
 	u64 user_time;			/* accumulated usermode TB ticks */
 	u64 system_time;		/* accumulated system TB ticks */
 	u64 startpurr;			/* PURR/TB value snapshot */
+
+	struct slb_shadow *slb_shadow_ptr;
 };
 
 extern struct paca_struct paca[];
-- 
GitLab


From 5cf13911b1e72707b6f0eb39b2d819ec6e343d76 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Mon, 7 Aug 2006 17:34:50 +1000
Subject: [PATCH 0086/1063] [POWERPC] Update lppaca offset comments

Update offset comments.  No functional change.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/lppaca.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/asm-powerpc/lppaca.h b/include/asm-powerpc/lppaca.h
index 942bb450baff3..821ea0c512b41 100644
--- a/include/asm-powerpc/lppaca.h
+++ b/include/asm-powerpc/lppaca.h
@@ -116,7 +116,7 @@ struct lppaca {
 
 
 //=============================================================================
-// CACHE_LINE_3 0x0100 - 0x007F: This line is shared with other processors
+// CACHE_LINE_3 0x0100 - 0x017F: This line is shared with other processors
 //=============================================================================
 	// This is the yield_count.  An "odd" value (low bit on) means that
 	// the processor is yielded (either because of an OS yield or a PLIC
@@ -128,7 +128,7 @@ struct lppaca {
 	u8	reserved6[124];		// Reserved                     x04-x7F
 
 //=============================================================================
-// CACHE_LINE_4-5 0x0100 - 0x01FF Contains PMC interrupt data
+// CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data
 //=============================================================================
 	u8	pmc_save_area[256];	// PMC interrupt Area           x00-xFF
 } __attribute__((__aligned__(0x400)));
-- 
GitLab


From cd878479792cc1e4bc9d62ed0ef2c4454743848c Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 11 Aug 2006 17:59:28 -0400
Subject: [PATCH 0087/1063] [CPUFREQ] Fix typo.

Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/cpufreq/cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b3df613ae4ec8..d35a9f06ab7b6 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -32,7 +32,7 @@
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "cpufreq-core", msg)
 
 /**
- * The "cpufreq driver" - the arch- or hardware-dependend low
+ * The "cpufreq driver" - the arch- or hardware-dependent low
  * level driver of CPUFreq support, and its spinlock. This lock
  * also protects the cpufreq_cpu_data array.
  */
-- 
GitLab


From 1ce28d6b19112a7c76af8e971e2de3109d19a943 Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <alexey_y_starikovskiy@linux.intel.com>
Date: Mon, 31 Jul 2006 22:25:20 +0400
Subject: [PATCH 0088/1063] [CPUFREQ][1/2] ondemand: updated tune for hardware
 coordination

Try to make dbs_check_cpu() call on all CPUs at the same jiffy.
This will help when multiple cores share P-states via Hardware Coordination.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi at intel.com>
Signed-off-by: Alexey Starikovskiy <alexey.y.starikovskiy at intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/cpufreq/cpufreq_ondemand.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 52cf1f0218259..f507a869acbc0 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -305,6 +305,9 @@ static void do_dbs_timer(void *data)
 {
 	unsigned int cpu = smp_processor_id();
 	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+	/* We want all CPUs to do sampling nearly on same jiffy */
+	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+	delay -= jiffies % delay;
 
 	if (!dbs_info->enable)
 		return;
@@ -312,18 +315,18 @@ static void do_dbs_timer(void *data)
 	lock_cpu_hotplug();
 	dbs_check_cpu(dbs_info);
 	unlock_cpu_hotplug();
-	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
-			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
+	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
 static inline void dbs_timer_init(unsigned int cpu)
 {
 	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+	/* We want all CPUs to do sampling nearly on same jiffy */
+	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+	delay -= jiffies % delay;
 
 	INIT_WORK(&dbs_info->work, do_dbs_timer, 0);
-	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
-			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
-	return;
+	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
-- 
GitLab


From 05ca0350e8caa91a5ec9961c585c98005b6934ea Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <alexey_y_starikovskiy@linux.intel.com>
Date: Mon, 31 Jul 2006 22:28:12 +0400
Subject: [PATCH 0089/1063] [CPUFREQ][2/2] ondemand: updated add powersave_bias
 tunable

ondemand selects the minimum frequency that can retire
a workload with negligible idle time -- ideally resulting in the highest
performance/power efficiency with negligible performance impact.

But on some systems and some workloads, this algorithm
is more performance biased than necessary, and
de-tuning it a bit to allow some performance impact
can save measurable power.

This patch adds a "powersave_bias" tunable to ondemand
to allow it to reduce its target frequency by a specified percent.

By default, the powersave_bias is 0 and has no effect.
powersave_bias is in units of 0.1%, so it has an effective range
of 1 through 1000, resulting in 0.1% to 100% impact.

In practice, users will not be able to detect a difference between
0.1% increments, but 1.0% increments turned out to be too large.
Also, the max value of 1000 (100%) would simply peg the system
in its deepest power saving P-state, unless the processor really has
a hardware P-state at 0Hz:-)

For example, If ondemand requests 2.0GHz based on utilization,
and powersave_bias=100, this code will knock 10% off the target
and seek  a target of 1.8GHz instead of 2.0GHz until the
next sampling.  If 1.8 is an exact match with an hardware frequency
we use it, otherwise we average our time between the frequency
next higher than 1.8 and next lower than 1.8.

Note that a user or administrative program can change powersave_bias
at run-time depending on how they expect the system to be used.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi at intel.com>
Signed-off-by: Alexey Starikovskiy <alexey.y.starikovskiy at intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/cpufreq/cpufreq_ondemand.c | 157 +++++++++++++++++++++++++----
 1 file changed, 140 insertions(+), 17 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index f507a869acbc0..34874c2f18853 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -55,6 +55,10 @@ struct cpu_dbs_info_s {
 	struct cpufreq_policy *cur_policy;
  	struct work_struct work;
 	unsigned int enable;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int freq_lo;
+	unsigned int freq_lo_jiffies;
+	unsigned int freq_hi_jiffies;
 };
 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
 
@@ -72,15 +76,15 @@ static DEFINE_MUTEX(dbs_mutex);
 
 static struct workqueue_struct	*kondemand_wq;
 
-struct dbs_tuners {
+static struct dbs_tuners {
 	unsigned int sampling_rate;
 	unsigned int up_threshold;
 	unsigned int ignore_nice;
-};
-
-static struct dbs_tuners dbs_tuners_ins = {
+	unsigned int powersave_bias;
+} dbs_tuners_ins = {
 	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
 	.ignore_nice = 0,
+	.powersave_bias = 0,
 };
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
@@ -96,6 +100,69 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
 	return retval;
 }
 
+/*
+ * Find right freq to be set now with powersave_bias on.
+ * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
+ * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
+ */
+unsigned int powersave_bias_target(struct cpufreq_policy *policy,
+		unsigned int freq_next, unsigned int relation)
+{
+	unsigned int freq_req, freq_reduc, freq_avg;
+	unsigned int freq_hi, freq_lo;
+	unsigned int index = 0;
+	unsigned int jiffies_total, jiffies_hi, jiffies_lo;
+	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu);
+
+	if (!dbs_info->freq_table) {
+		dbs_info->freq_lo = 0;
+		dbs_info->freq_lo_jiffies = 0;
+		return freq_next;
+	}
+
+	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
+			relation, &index);
+	freq_req = dbs_info->freq_table[index].frequency;
+	freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
+	freq_avg = freq_req - freq_reduc;
+
+	/* Find freq bounds for freq_avg in freq_table */
+	index = 0;
+	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+			CPUFREQ_RELATION_H, &index);
+	freq_lo = dbs_info->freq_table[index].frequency;
+	index = 0;
+	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+			CPUFREQ_RELATION_L, &index);
+	freq_hi = dbs_info->freq_table[index].frequency;
+
+	/* Find out how long we have to be in hi and lo freqs */
+	if (freq_hi == freq_lo) {
+		dbs_info->freq_lo = 0;
+		dbs_info->freq_lo_jiffies = 0;
+		return freq_lo;
+	}
+	jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+	jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
+	jiffies_hi += ((freq_hi - freq_lo) / 2);
+	jiffies_hi /= (freq_hi - freq_lo);
+	jiffies_lo = jiffies_total - jiffies_hi;
+	dbs_info->freq_lo = freq_lo;
+	dbs_info->freq_lo_jiffies = jiffies_lo;
+	dbs_info->freq_hi_jiffies = jiffies_hi;
+	return freq_hi;
+}
+
+static void ondemand_powersave_bias_init(void)
+{
+	int i;
+	for_each_online_cpu(i) {
+		struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i);
+		dbs_info->freq_table = cpufreq_frequency_get_table(i);
+		dbs_info->freq_lo = 0;
+	}
+}
+
 /************************** sysfs interface ************************/
 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
 {
@@ -124,6 +191,7 @@ static ssize_t show_##file_name						\
 show_one(sampling_rate, sampling_rate);
 show_one(up_threshold, up_threshold);
 show_one(ignore_nice_load, ignore_nice);
+show_one(powersave_bias, powersave_bias);
 
 static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
 		const char *buf, size_t count)
@@ -198,6 +266,27 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
 	return count;
 }
 
+static ssize_t store_powersave_bias(struct cpufreq_policy *unused,
+		const char *buf, size_t count)
+{
+	unsigned int input;
+	int ret;
+	ret = sscanf(buf, "%u", &input);
+
+	if (ret != 1)
+		return -EINVAL;
+
+	if (input > 1000)
+		input = 1000;
+
+	mutex_lock(&dbs_mutex);
+	dbs_tuners_ins.powersave_bias = input;
+	ondemand_powersave_bias_init();
+	mutex_unlock(&dbs_mutex);
+
+	return count;
+}
+
 #define define_one_rw(_name) \
 static struct freq_attr _name = \
 __ATTR(_name, 0644, show_##_name, store_##_name)
@@ -205,6 +294,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
 define_one_rw(sampling_rate);
 define_one_rw(up_threshold);
 define_one_rw(ignore_nice_load);
+define_one_rw(powersave_bias);
 
 static struct attribute * dbs_attributes[] = {
 	&sampling_rate_max.attr,
@@ -212,6 +302,7 @@ static struct attribute * dbs_attributes[] = {
 	&sampling_rate.attr,
 	&up_threshold.attr,
 	&ignore_nice_load.attr,
+	&powersave_bias.attr,
 	NULL
 };
 
@@ -234,6 +325,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 	if (!this_dbs_info->enable)
 		return;
 
+	this_dbs_info->freq_lo = 0;
 	policy = this_dbs_info->cur_policy;
 	cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
 	total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
@@ -274,11 +366,18 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 	/* Check for frequency increase */
 	if (load > dbs_tuners_ins.up_threshold) {
 		/* if we are already at full speed then break out early */
-		if (policy->cur == policy->max)
-			return;
-
-		__cpufreq_driver_target(policy, policy->max,
-			CPUFREQ_RELATION_H);
+		if (!dbs_tuners_ins.powersave_bias) {
+			if (policy->cur == policy->max)
+				return;
+
+			__cpufreq_driver_target(policy, policy->max,
+				CPUFREQ_RELATION_H);
+		} else {
+			int freq = powersave_bias_target(policy, policy->max,
+					CPUFREQ_RELATION_H);
+			__cpufreq_driver_target(policy, freq,
+				CPUFREQ_RELATION_L);
+		}
 		return;
 	}
 
@@ -293,14 +392,23 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 	 * policy. To be safe, we focus 10 points under the threshold.
 	 */
 	if (load < (dbs_tuners_ins.up_threshold - 10)) {
-		unsigned int freq_next;
-		freq_next = (policy->cur * load) /
+		unsigned int freq_next = (policy->cur * load) /
 			(dbs_tuners_ins.up_threshold - 10);
-
-		__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
+		if (!dbs_tuners_ins.powersave_bias) {
+			__cpufreq_driver_target(policy, freq_next,
+					CPUFREQ_RELATION_L);
+		} else {
+			int freq = powersave_bias_target(policy, freq_next,
+					CPUFREQ_RELATION_L);
+			__cpufreq_driver_target(policy, freq,
+				CPUFREQ_RELATION_L);
+		}
 	}
 }
 
+/* Sampling types */
+enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
+
 static void do_dbs_timer(void *data)
 {
 	unsigned int cpu = smp_processor_id();
@@ -311,10 +419,24 @@ static void do_dbs_timer(void *data)
 
 	if (!dbs_info->enable)
 		return;
-
-	lock_cpu_hotplug();
-	dbs_check_cpu(dbs_info);
-	unlock_cpu_hotplug();
+	/* Common NORMAL_SAMPLE setup */
+	INIT_WORK(&dbs_info->work, do_dbs_timer, (void *)DBS_NORMAL_SAMPLE);
+	if (!dbs_tuners_ins.powersave_bias ||
+	    (unsigned long) data == DBS_NORMAL_SAMPLE) {
+		lock_cpu_hotplug();
+		dbs_check_cpu(dbs_info);
+		unlock_cpu_hotplug();
+		if (dbs_info->freq_lo) {
+			/* Setup timer for SUB_SAMPLE */
+			INIT_WORK(&dbs_info->work, do_dbs_timer,
+					(void *)DBS_SUB_SAMPLE);
+			delay = dbs_info->freq_hi_jiffies;
+		}
+	} else {
+		__cpufreq_driver_target(dbs_info->cur_policy,
+	                        	dbs_info->freq_lo,
+	                        	CPUFREQ_RELATION_H);
+	}
 	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
@@ -325,6 +447,7 @@ static inline void dbs_timer_init(unsigned int cpu)
 	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
 	delay -= jiffies % delay;
 
+	ondemand_powersave_bias_init();
 	INIT_WORK(&dbs_info->work, do_dbs_timer, 0);
 	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
-- 
GitLab


From 179da8e6e8903a8cdb19bb12672d50dc33f0fde6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=B3=20Bilski?= <rafalbilski@interia.pl>
Date: Tue, 8 Aug 2006 19:12:20 +0200
Subject: [PATCH 0090/1063] [CPUFREQ] Longhaul - Disable arbiter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ACPI C3 works for "Powersaver" processors, so use it only for them.

Older CPU will change frequency on "halt" only. But we can protect transition
in two ways:
- by ACPI PM2 register, there is "bus master arbiter disable" bit.
  This isn't tested because VIA mainboards don't have PM2 register,
- by PLE133 PCI/AGP arbiter disable register.
  There are two bits in this register. First is "PCI arbiter disable",
  second "AGP arbiter disable". This is working on VIA Epia 800 mainboards.

Test on bm_control is more proper because this is true
when PM2 register exist.

Signed-off-by: Rafa³ Bilski <rafalbilski@interia.pl>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/i386/kernel/cpu/cpufreq/longhaul.c | 86 ++++++++++++++++++-------
 1 file changed, 64 insertions(+), 22 deletions(-)

diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index 4f2c3aeef724c..83a8793f1db88 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -27,6 +27,7 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/cpufreq.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
@@ -60,6 +61,7 @@ static int can_scale_voltage;
 static int vrmrev;
 static struct acpi_processor *pr = NULL;
 static struct acpi_processor_cx *cx = NULL;
+static int port22_en = 0;
 
 /* Module parameters */
 static int dont_scale_voltage;
@@ -124,10 +126,9 @@ static int longhaul_get_cpu_mult(void)
 
 /* For processor with BCR2 MSR */
 
-static void do_longhaul1(int cx_address, unsigned int clock_ratio_index)
+static void do_longhaul1(unsigned int clock_ratio_index)
 {
 	union msr_bcr2 bcr2;
-	u32 t;
 
 	rdmsrl(MSR_VIA_BCR2, bcr2.val);
 	/* Enable software clock multiplier */
@@ -136,13 +137,11 @@ static void do_longhaul1(int cx_address, unsigned int clock_ratio_index)
 
 	/* Sync to timer tick */
 	safe_halt();
-	ACPI_FLUSH_CPU_CACHE();
 	/* Change frequency on next halt or sleep */
 	wrmsrl(MSR_VIA_BCR2, bcr2.val);
-	/* Invoke C3 */
-	inb(cx_address);
-	/* Dummy op - must do something useless after P_LVL3 read */
-	t = inl(acpi_fadt.xpm_tmr_blk.address);
+	/* Invoke transition */
+	ACPI_FLUSH_CPU_CACHE();
+	halt();
 
 	/* Disable software clock multiplier */
 	local_irq_disable();
@@ -166,9 +165,9 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
 
 	/* Sync to timer tick */
 	safe_halt();
-	ACPI_FLUSH_CPU_CACHE();
 	/* Change frequency on next halt or sleep */
 	wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	ACPI_FLUSH_CPU_CACHE();
 	/* Invoke C3 */
 	inb(cx_address);
 	/* Dummy op - must do something useless after P_LVL3 read */
@@ -227,10 +226,13 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
 	outb(0xFF,0xA1);	/* Overkill */
 	outb(0xFE,0x21);	/* TMR0 only */
 
-	/* Disable bus master arbitration */
-	if (pr->flags.bm_check) {
+	if (pr->flags.bm_control) {
+ 		/* Disable bus master arbitration */
 		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1,
 				  ACPI_MTX_DO_NOT_LOCK);
+	} else if (port22_en) {
+		/* Disable AGP and PCI arbiters */
+		outb(3, 0x22);
 	}
 
 	switch (longhaul_version) {
@@ -244,7 +246,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
 	 */
 	case TYPE_LONGHAUL_V1:
 	case TYPE_LONGHAUL_V2:
-		do_longhaul1(cx->address, clock_ratio_index);
+		do_longhaul1(clock_ratio_index);
 		break;
 
 	/*
@@ -259,14 +261,20 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
 	 * to work in practice.
 	 */
 	case TYPE_POWERSAVER:
+		/* Don't allow wakeup */
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0,
+				  ACPI_MTX_DO_NOT_LOCK);
 		do_powersaver(cx->address, clock_ratio_index);
 		break;
 	}
 
-	/* Enable bus master arbitration */
-	if (pr->flags.bm_check) {
+	if (pr->flags.bm_control) {
+		/* Enable bus master arbitration */
 		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0,
 				  ACPI_MTX_DO_NOT_LOCK);
+	} else if (port22_en) {
+		/* Enable arbiters */
+		outb(0, 0x22);
 	}
 
 	outb(pic2_mask,0xA1);	/* restore mask */
@@ -540,21 +548,33 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
 	return 1;
 }
 
+/* VIA don't support PM2 reg, but have something similar */
+static int enable_arbiter_disable(void)
+{
+	struct pci_dev *dev;
+	u8 pci_cmd;
+
+	/* Find PLE133 host bridge */
+	dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, NULL);
+	if (dev != NULL) {
+		/* Enable access to port 0x22 */
+		pci_read_config_byte(dev, 0x78, &pci_cmd);
+		if ( !(pci_cmd & 1<<7) ) {
+			pci_cmd |= 1<<7;
+			pci_write_config_byte(dev, 0x78, pci_cmd);
+		}
+		return 1;
+	}
+	return 0;
+}
+
 static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 {
 	struct cpuinfo_x86 *c = cpu_data;
 	char *cpuname=NULL;
 	int ret;
 
-	/* Check ACPI support for C3 state */
-	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
-			 &longhaul_walk_callback, NULL, (void *)&pr);
-	if (pr == NULL) goto err_acpi;
-
-	cx = &pr->power.states[ACPI_STATE_C3];
-	if (cx->address == 0 || cx->latency > 1000) goto err_acpi;
-
-	/* Now check what we have on this motherboard */
+	/* Check what we have on this motherboard */
 	switch (c->x86_model) {
 	case 6:
 		cpu_model = CPU_SAMUEL;
@@ -636,6 +656,28 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 		break;
 	};
 
+	/* Find ACPI data for processor */
+	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
+			    &longhaul_walk_callback, NULL, (void *)&pr);
+	if (pr == NULL)
+		goto err_acpi;
+
+	if (longhaul_version == TYPE_POWERSAVER) {
+		/* Check ACPI support for C3 state */
+		cx = &pr->power.states[ACPI_STATE_C3];
+		if (cx->address == 0 || cx->latency > 1000)
+			goto err_acpi;
+	} else {
+		/* Check ACPI support for bus master arbiter disable */
+		if (!pr->flags.bm_control) {
+			if (!enable_arbiter_disable()) {
+				printk(KERN_ERR PFX "No ACPI support. No VT8601 host bridge. Aborting.\n");
+				return -ENODEV;
+			} else
+				port22_en = 1;
+		}
+	}
+
 	ret = longhaul_get_ranges();
 	if (ret != 0)
 		return ret;
-- 
GitLab


From e7745d4e0299a3460128917ceb6b6a807fa7f9e8 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 11 Aug 2006 18:02:27 -0400
Subject: [PATCH 0091/1063] [AGPGART] Const'ify the agpgart driver version.

Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/backend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 509adc403250a..d59e037ddd123 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -44,7 +44,7 @@
  * past 0.99 at all due to some boolean logic error. */
 #define AGPGART_VERSION_MAJOR 0
 #define AGPGART_VERSION_MINOR 101
-static struct agp_version agp_current_version =
+static const struct agp_version agp_current_version =
 {
 	.major = AGPGART_VERSION_MAJOR,
 	.minor = AGPGART_VERSION_MINOR,
-- 
GitLab


From 71565619af84a15d0abef6f0d6704e6472cd87c1 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 12 Aug 2006 01:59:50 +0400
Subject: [PATCH 0092/1063] [AGPGART] CONFIG_PM=n slim:
 drivers/char/agp/efficeon-agp.c

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/efficeon-agp.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c
index b788b0a3bbf33..30f730ff81c17 100644
--- a/drivers/char/agp/efficeon-agp.c
+++ b/drivers/char/agp/efficeon-agp.c
@@ -337,13 +337,6 @@ static struct agp_bridge_driver efficeon_driver = {
 	.agp_destroy_page	= agp_generic_destroy_page,
 };
 
-
-static int agp_efficeon_resume(struct pci_dev *pdev)
-{
-	printk(KERN_DEBUG PFX "agp_efficeon_resume()\n");
-	return efficeon_configure();
-}
-
 static int __devinit agp_efficeon_probe(struct pci_dev *pdev,
 				     const struct pci_device_id *ent)
 {
@@ -414,11 +407,18 @@ static void __devexit agp_efficeon_remove(struct pci_dev *pdev)
 	agp_put_bridge(bridge);
 }
 
+#ifdef CONFIG_PM
 static int agp_efficeon_suspend(struct pci_dev *dev, pm_message_t state)
 {
 	return 0;
 }
 
+static int agp_efficeon_resume(struct pci_dev *pdev)
+{
+	printk(KERN_DEBUG PFX "agp_efficeon_resume()\n");
+	return efficeon_configure();
+}
+#endif
 
 static struct pci_device_id agp_efficeon_pci_table[] = {
 	{
@@ -439,8 +439,10 @@ static struct pci_driver agp_efficeon_pci_driver = {
 	.id_table	= agp_efficeon_pci_table,
 	.probe		= agp_efficeon_probe,
 	.remove		= agp_efficeon_remove,
+#ifdef CONFIG_PM
 	.suspend	= agp_efficeon_suspend,
 	.resume		= agp_efficeon_resume,
+#endif
 };
 
 static int __init agp_efficeon_init(void)
-- 
GitLab


From 85be7d60595b4803731cec158b0023bc050fdd14 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 12 Aug 2006 02:02:02 +0400
Subject: [PATCH 0093/1063] [AGPGART] CONFIG_PM=n slim:
 drivers/char/agp/intel-agp.c

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/intel-agp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 61ac3809f997c..42a1cb871992a 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -1766,6 +1766,7 @@ static void __devexit agp_intel_remove(struct pci_dev *pdev)
 	agp_put_bridge(bridge);
 }
 
+#ifdef CONFIG_PM
 static int agp_intel_resume(struct pci_dev *pdev)
 {
 	struct agp_bridge_data *bridge = pci_get_drvdata(pdev);
@@ -1789,6 +1790,7 @@ static int agp_intel_resume(struct pci_dev *pdev)
 
 	return 0;
 }
+#endif
 
 static struct pci_device_id agp_intel_pci_table[] = {
 #define ID(x)						\
@@ -1835,7 +1837,9 @@ static struct pci_driver agp_intel_pci_driver = {
 	.id_table	= agp_intel_pci_table,
 	.probe		= agp_intel_probe,
 	.remove		= __devexit_p(agp_intel_remove),
+#ifdef CONFIG_PM
 	.resume		= agp_intel_resume,
+#endif
 };
 
 static int __init agp_intel_init(void)
-- 
GitLab


From b53e674a707cf77e76339852abdc063696679261 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 11 Aug 2006 18:13:41 -0400
Subject: [PATCH 0094/1063] [AGPGART] const'ify VIA AGP PCI table.

Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/via-agp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c
index b8ec25d174787..c149ac9ce9a76 100644
--- a/drivers/char/agp/via-agp.c
+++ b/drivers/char/agp/via-agp.c
@@ -9,7 +9,7 @@
 #include <linux/agp_backend.h>
 #include "agp.h"
 
-static struct pci_device_id agp_via_pci_table[];
+static const struct pci_device_id agp_via_pci_table[];
 
 #define VIA_GARTCTRL	0x80
 #define VIA_APSIZE	0x84
@@ -485,7 +485,7 @@ static int agp_via_resume(struct pci_dev *pdev)
 #endif /* CONFIG_PM */
 
 /* must be the same order as name table above */
-static struct pci_device_id agp_via_pci_table[] = {
+static const struct pci_device_id agp_via_pci_table[] = {
 #define ID(x) \
 	{						\
 	.class		= (PCI_CLASS_BRIDGE_HOST << 8),	\
-- 
GitLab


From 6595413fc9453a211f4b5d5cc42f0bbf3daa615b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=B3=20Bilski?= <rafalbilski@interia.pl>
Date: Sun, 13 Aug 2006 09:16:20 +0200
Subject: [PATCH 0095/1063] [CPUFREQ] Longhaul - Add ignore_latency option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some laptops with VIA C3 processor, CLE266 chipset and
AMI BIOS have incorrect latency values in FADT table. These
laptops seems to be C3 capable, but latency values are to
big: 101 for C2 and 1017 for C3. This option will allow
user to skip C3 latency test but not C3 address test. AMI
BIOS is setting C3 address to correct value in DSDT table.

Signed-off-by: Rafa³ Bilski <rafalbilski@interia.pl>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/i386/kernel/cpu/cpufreq/longhaul.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index 83a8793f1db88..43bbf948d45d5 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -65,7 +65,7 @@ static int port22_en = 0;
 
 /* Module parameters */
 static int dont_scale_voltage;
-
+static int ignore_latency = 0;
 
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
 
@@ -665,8 +665,10 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 	if (longhaul_version == TYPE_POWERSAVER) {
 		/* Check ACPI support for C3 state */
 		cx = &pr->power.states[ACPI_STATE_C3];
-		if (cx->address == 0 || cx->latency > 1000)
+		if (cx->address == 0 ||
+		   (cx->latency > 1000 && ignore_latency == 0) )
 			goto err_acpi;
+
 	} else {
 		/* Check ACPI support for bus master arbiter disable */
 		if (!pr->flags.bm_control) {
@@ -773,6 +775,8 @@ static void __exit longhaul_exit(void)
 
 module_param (dont_scale_voltage, int, 0644);
 MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor");
+module_param(ignore_latency, int, 0644);
+MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test");
 
 MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
 MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
@@ -780,4 +784,3 @@ MODULE_LICENSE ("GPL");
 
 late_initcall(longhaul_init);
 module_exit(longhaul_exit);
-
-- 
GitLab


From b5ecf60fe6b18de0bc59d336d444835d4ef835ed Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sun, 13 Aug 2006 23:00:08 +0200
Subject: [PATCH 0096/1063] [CPUFREQ] make
 drivers/cpufreq/cpufreq_ondemand.c:powersave_bias_target() static

This patch makes the needlessly global powersave_bias_target() static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/cpufreq/cpufreq_ondemand.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 34874c2f18853..5ca2fd5d1ed1c 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -105,8 +105,9 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
  * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
  * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
  */
-unsigned int powersave_bias_target(struct cpufreq_policy *policy,
-		unsigned int freq_next, unsigned int relation)
+static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
+					  unsigned int freq_next,
+					  unsigned int relation)
 {
 	unsigned int freq_req, freq_reduc, freq_avg;
 	unsigned int freq_hi, freq_lo;
-- 
GitLab


From c4e6952ffd71b263a64d1a9d79812446130560a5 Mon Sep 17 00:00:00 2001
From: Takashi YOSHI <takasi-y@ops.dti.ne.jp>
Date: Mon, 14 Aug 2006 19:48:30 -0500
Subject: [PATCH 0097/1063] [PATCH] MTD: Add Macronix MX29F040 to JEDEC

Signed-off-by: Takashi YOSHII <takasi-y@ops.dti.ne.jp>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 drivers/mtd/chips/jedec_probe.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index 8f39d0a314383..1154dac715aa6 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -111,6 +111,7 @@
 #define MX29LV040C	0x004F
 #define MX29LV160T	0x22C4
 #define MX29LV160B	0x2249
+#define MX29F040	0x00A4
 #define MX29F016	0x00AD
 #define MX29F002T	0x00B0
 #define MX29F004T	0x0045
@@ -1171,6 +1172,19 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,31)
 		}
 	}, {
+		.mfr_id		= MANUFACTURER_MACRONIX,
+		.dev_id		= MX29F040,
+		.name		= "Macronix MX29F040",
+		.uaddr		= {
+			[0] = MTD_UADDR_0x0555_0x02AA /* x8 */
+		},
+		.DevSize	= SIZE_512KiB,
+		.CmdSet		= P_ID_AMD_STD,
+		.NumEraseRegions= 1,
+		.regions	= {
+			ERASEINFO(0x10000,8),
+		}
+        }, {
 		.mfr_id		= MANUFACTURER_MACRONIX,
 		.dev_id		= MX29F016,
 		.name		= "Macronix MX29F016",
-- 
GitLab


From 79b9cd586f534f3f40ee66b6c27732149a5915ad Mon Sep 17 00:00:00 2001
From: Takashi YOSHII <takasi-y@ops.dti.ne.jp>
Date: Tue, 15 Aug 2006 07:26:32 -0500
Subject: [PATCH 0098/1063] [PATCH] [MTD] Maps: Add dependency on alternate
 probe methods to physmap

map/physmap.c tries to probe "cfi_probe", "jedec_probe" and "map_rom", but
map/Kconfig says it depends on MTD_CFI only.
This patch adds  MTD_JEDECPROBE and MTD_ROM to the dependency condition.

Signed-off-by: Takashi YOSHII <takasi-y@ops.dti.ne.jp>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 drivers/mtd/maps/Kconfig | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 83d0b2a525277..64d1b6a6c920b 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -13,13 +13,13 @@ config MTD_COMPLEX_MAPPINGS
 
 config MTD_PHYSMAP
 	tristate "CFI Flash device in physical memory map"
-	depends on MTD_CFI
+	depends on MTD_CFI || MTD_JEDECPROBE || MTD_ROM
 	help
-	  This provides a 'mapping' driver which allows the CFI probe and
-	  command set driver code to communicate with flash chips which
-	  are mapped physically into the CPU's memory. You will need to
-	  configure the physical address and size of the flash chips on
-	  your particular board as well as the bus width, either statically
+	  This provides a 'mapping' driver which allows the NOR Flash and
+	  ROM driver code to communicate with chips which are mapped
+	  physically into the CPU's memory. You will need to configure
+	  the physical address and size of the flash chips on your
+	  particular board as well as the bus width, either statically
 	  with config options or at run-time.
 
 config MTD_PHYSMAP_START
-- 
GitLab


From 0b6c0bb3f9621b128011bcd5f65047c73afdde3b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 15 Aug 2006 02:42:25 +0200
Subject: [PATCH 0099/1063] fs/jffs2/xattr.c: remove dead code

This patch removes some obvious dead code spotted by the Coverity
checker.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 fs/jffs2/xattr.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 25bc1ae086484..4da09ce1d1f52 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -1215,7 +1215,6 @@ int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xatt
 	rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE);
 	if (rc) {
 		JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen);
-		rc = rc ? rc : -EBADFD;
 		goto out;
 	}
 	rc = save_xattr_datum(c, xd);
-- 
GitLab


From 5b0c5c2c0d04c29f85abb485378ba5476c7aeec2 Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <hskinnemoen@atmel.com>
Date: Wed, 9 Aug 2006 10:54:44 +0200
Subject: [PATCH 0100/1063] MTD: Convert Atmel PRI information to AMD format

Atmel flash chips don't have PRI information in the same format as
AMD flash chips. This patch installs a fixup for all Atmel chips that
converts the relevant PRI fields into AMD format.

Only the fields that are actually used by the command set is actually
converted. The rest are initialized to zero (which should be safe)

Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 21 +++++++++++++++++++++
 include/linux/mtd/cfi.h             | 13 +++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 9885726a16e46..8901c4412daf1 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -161,6 +161,26 @@ static void fixup_use_write_buffers(struct mtd_info *mtd, void *param)
 	}
 }
 
+/* Atmel chips don't use the same PRI format as AMD chips */
+static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
+{
+	struct map_info *map = mtd->priv;
+	struct cfi_private *cfi = map->fldrv_priv;
+	struct cfi_pri_amdstd *extp = cfi->cmdset_priv;
+	struct cfi_pri_atmel atmel_pri;
+
+	memcpy(&atmel_pri, extp, sizeof(atmel_pri));
+	memset(extp + 5, 0, sizeof(*extp) - 5);
+
+	if (atmel_pri.Features & 0x02)
+		extp->EraseSuspend = 2;
+
+	if (atmel_pri.BottomBoot)
+		extp->TopBottom = 2;
+	else
+		extp->TopBottom = 3;
+}
+
 static void fixup_use_secsi(struct mtd_info *mtd, void *param)
 {
 	/* Setup for chips with a secsi area */
@@ -192,6 +212,7 @@ static struct cfi_fixup cfi_fixup_table[] = {
 #if !FORCE_WORD_WRITE
 	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers, NULL, },
 #endif
+	{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL },
 	{ 0, 0, NULL, NULL }
 };
 static struct cfi_fixup jedec_fixup_table[] = {
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 09bfae6938b3e..123948b145478 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -199,6 +199,18 @@ struct cfi_pri_amdstd {
 	uint8_t  TopBottom;
 } __attribute__((packed));
 
+/* Vendor-Specific PRI for Atmel chips (command set 0x0002) */
+
+struct cfi_pri_atmel {
+	uint8_t pri[3];
+	uint8_t MajorVersion;
+	uint8_t MinorVersion;
+	uint8_t Features;
+	uint8_t BottomBoot;
+	uint8_t BurstMode;
+	uint8_t PageMode;
+} __attribute__((packed));
+
 struct cfi_pri_query {
 	uint8_t  NumFields;
 	uint32_t ProtField[1]; /* Not host ordered */
@@ -464,6 +476,7 @@ struct cfi_fixup {
 #define CFI_ID_ANY  0xffff
 
 #define CFI_MFR_AMD 0x0001
+#define CFI_MFR_ATMEL 0x001F
 #define CFI_MFR_ST  0x0020 	/* STMicroelectronics */
 
 void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups);
-- 
GitLab


From 0165508c80a2b5d5268d9c5dfa9b30c534a33693 Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <hskinnemoen@atmel.com>
Date: Wed, 9 Aug 2006 11:06:07 +0200
Subject: [PATCH 0101/1063] MTD: Add lock/unlock operations for Atmel
 AT49BV6416

The AT49BV6416 is locked by default, so we really need to provide
at least the unlock() operation for write and erase to work. This
patch implements both ->lock() and ->unlock() and provides a fixup
to install them when an AT49BV6416 chip is detected.

These functions are probably valid on more Atmel chips, but I believe
it's mostly obsolete ones. The AT49BV6416 is in fact obsolete, but
it's used on all current AT32STK1000 development boards.

Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 90 +++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 8901c4412daf1..ddc5bd7833546 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -45,9 +45,11 @@
 #define MAX_WORD_RETRIES 3
 
 #define MANUFACTURER_AMD	0x0001
+#define MANUFACTURER_ATMEL	0x001F
 #define MANUFACTURER_SST	0x00BF
 #define SST49LF004B	        0x0060
 #define SST49LF008A		0x005a
+#define AT49BV6416		0x00d6
 
 static int cfi_amdstd_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *);
 static int cfi_amdstd_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *);
@@ -68,6 +70,9 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr);
 #include "fwh_lock.h"
 
+static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, size_t len);
+static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, size_t len);
+
 static struct mtd_chip_driver cfi_amdstd_chipdrv = {
 	.probe		= NULL, /* Not usable directly */
 	.destroy	= cfi_amdstd_destroy,
@@ -199,6 +204,16 @@ static void fixup_use_erase_chip(struct mtd_info *mtd, void *param)
 
 }
 
+/*
+ * Some Atmel chips (e.g. the AT49BV6416) power-up with all sectors
+ * locked by default.
+ */
+static void fixup_use_atmel_lock(struct mtd_info *mtd, void *param)
+{
+	mtd->lock = cfi_atmel_lock;
+	mtd->unlock = cfi_atmel_unlock;
+}
+
 static struct cfi_fixup cfi_fixup_table[] = {
 #ifdef AMD_BOOTLOC_BUG
 	{ CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL },
@@ -228,6 +243,7 @@ static struct cfi_fixup fixup_table[] = {
 	 * we know that is the case.
 	 */
 	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_erase_chip, NULL },
+	{ CFI_MFR_ATMEL, AT49BV6416, fixup_use_atmel_lock, NULL },
 	{ 0, 0, NULL, NULL }
 };
 
@@ -1628,6 +1644,80 @@ static int cfi_amdstd_erase_chip(struct mtd_info *mtd, struct erase_info *instr)
 	return 0;
 }
 
+static int do_atmel_lock(struct map_info *map, struct flchip *chip,
+			 unsigned long adr, int len, void *thunk)
+{
+	struct cfi_private *cfi = map->fldrv_priv;
+	int ret;
+
+	spin_lock(chip->mutex);
+	ret = get_chip(map, chip, adr + chip->start, FL_LOCKING);
+	if (ret)
+		goto out_unlock;
+	chip->state = FL_LOCKING;
+
+	DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): LOCK 0x%08lx len %d\n",
+	      __func__, adr, len);
+
+	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	map_write(map, CMD(0x40), chip->start + adr);
+
+	chip->state = FL_READY;
+	put_chip(map, chip, adr + chip->start);
+	ret = 0;
+
+out_unlock:
+	spin_unlock(chip->mutex);
+	return ret;
+}
+
+static int do_atmel_unlock(struct map_info *map, struct flchip *chip,
+			   unsigned long adr, int len, void *thunk)
+{
+	struct cfi_private *cfi = map->fldrv_priv;
+	int ret;
+
+	spin_lock(chip->mutex);
+	ret = get_chip(map, chip, adr + chip->start, FL_UNLOCKING);
+	if (ret)
+		goto out_unlock;
+	chip->state = FL_UNLOCKING;
+
+	DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): LOCK 0x%08lx len %d\n",
+	      __func__, adr, len);
+
+	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	map_write(map, CMD(0x70), adr);
+
+	chip->state = FL_READY;
+	put_chip(map, chip, adr + chip->start);
+	ret = 0;
+
+out_unlock:
+	spin_unlock(chip->mutex);
+	return ret;
+}
+
+static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, size_t len)
+{
+	return cfi_varsize_frob(mtd, do_atmel_lock, ofs, len, NULL);
+}
+
+static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, size_t len)
+{
+	return cfi_varsize_frob(mtd, do_atmel_unlock, ofs, len, NULL);
+}
+
 
 static void cfi_amdstd_sync (struct mtd_info *mtd)
 {
-- 
GitLab


From 04846f25920d4b05d6040c531cc601049260db52 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <aherrman@de.ibm.com>
Date: Wed, 9 Aug 2006 17:31:16 +0200
Subject: [PATCH 0102/1063] [SCSI] limit recursion when flushing
 shost->starved_list

Attached is a patch that should limit a possible recursion that can
lead to a stack overflow like follows:

Kernel stack overflow.
CPU:    3    Not tainted
Process zfcperp0.0.d819
(pid: 13897, task: 000000003e0d8cc8, ksp: 000000003499dbb8)
Krnl PSW : 0404000180000000 000000000030f8b2 (get_device+0x12/0x48)
Krnl GPRS: 00000000135a1980 000000000030f758 000000003ed6c1e8 0000000000000005
           0000000000000000 000000000044a780 000000003dbf7000 0000000034e15800
           000000003621c048 070000003499c108 000000003499c1a0 000000003ed6c000
           0000000040895000 00000000408ab630 000000003499c0a0 000000003499c0a0
Krnl Code: a7 fb ff e8 a7 19 00 00 b9 02 00 22 e3 e0 f0 98 00 24 a7 84
Call Trace:
([<000000004089edc2>] scsi_request_fn+0x13e/0x650 [scsi_mod])
 [<00000000002c5ff4>] blk_run_queue+0xd4/0x1a4
 [<000000004089ff8c>] scsi_queue_insert+0x22c/0x2a4 [scsi_mod]
 [<000000004089779a>] scsi_dispatch_cmd+0x8a/0x3d0 [scsi_mod]
 [<000000004089f1ec>] scsi_request_fn+0x568/0x650 [scsi_mod]
...
 [<000000004089f1ec>] scsi_request_fn+0x568/0x650 [scsi_mod]
 [<00000000002c5ff4>] blk_run_queue+0xd4/0x1a4
 [<000000004089ff8c>] scsi_queue_insert+0x22c/0x2a4 [scsi_mod]
 [<000000004089779a>] scsi_dispatch_cmd+0x8a/0x3d0 [scsi_mod]
 [<000000004089f1ec>] scsi_request_fn+0x568/0x650 [scsi_mod]
 [<00000000002c5ff4>] blk_run_queue+0xd4/0x1a4
 [<000000004089fa9e>] scsi_run_host_queues+0x196/0x230 [scsi_mod]
 [<00000000409eba28>] zfcp_erp_thread+0x2638/0x3080 [zfcp]
 [<0000000000107462>] kernel_thread_starter+0x6/0xc
 [<000000000010745c>] kernel_thread_starter+0x0/0xc
<0>Kernel panic - not syncing: Corrupt kernel stack, can't continue.

This stack overflow occurred during tests on s390 using zfcp.
Recursion depth for this panic was 19.

Usually recursion between blk_run_queue and a request_fn is avoided
using QUEUE_FLAG_REENTER. But this does not help if the scsi stack
tries to flush the starved_list of a scsi_host.

Limit recursion depth when flushing the starved_list
of a scsi_host.

Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_lib.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 077c1c6912102..d6743b959a72b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -551,7 +551,15 @@ static void scsi_run_queue(struct request_queue *q)
 		list_del_init(&sdev->starved_entry);
 		spin_unlock_irqrestore(shost->host_lock, flags);
 
-		blk_run_queue(sdev->request_queue);
+
+		if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
+		    !test_and_set_bit(QUEUE_FLAG_REENTER,
+				      &sdev->request_queue->queue_flags)) {
+			blk_run_queue(sdev->request_queue);
+			clear_bit(QUEUE_FLAG_REENTER,
+				  &sdev->request_queue->queue_flags);
+		} else
+			blk_run_queue(sdev->request_queue);
 
 		spin_lock_irqsave(shost->host_lock, flags);
 		if (unlikely(!list_empty(&sdev->starved_entry)))
-- 
GitLab


From c8f7b073e0e81499474a84ee2a90f77f7805c7f8 Mon Sep 17 00:00:00 2001
From: Mark Haverkamp <markh@osdl.org>
Date: Thu, 3 Aug 2006 08:02:24 -0700
Subject: [PATCH 0103/1063] [SCSI] aacraid: interruptible ioctl

Received from Mark Salyzyn

This patch allows the FSACTL_SEND_LARGE_FIB, FSACTL_SENDFIB and
FSACTL_SEND_RAW_SRB ioctl calls into the aacraid driver to be
interruptible. Only necessary if the adapter and/or the management
software has gone into some sort of misbehavior and the system is being
rebooted, thus permitting the user management software applications to
be killed relatively cleanly. The FIB queue resource is held out of the
free queue until the adapter finally, if ever, completes the command.

Signed-off-by: Mark Haverkamp <markh@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aacraid/commctrl.c | 23 ++++++++++++++++++-----
 drivers/scsi/aacraid/commsup.c  | 15 ++++++++++++---
 drivers/scsi/aacraid/dpcsup.c   | 10 ++++++++--
 3 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
index 255421de9d1aa..14d7aa9b7df31 100644
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c
@@ -38,7 +38,7 @@
 #include <linux/completion.h>
 #include <linux/dma-mapping.h>
 #include <linux/blkdev.h>
-#include <linux/delay.h>
+#include <linux/delay.h> /* ssleep prototype */
 #include <linux/kthread.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
@@ -140,7 +140,8 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
 		fibptr->hw_fib_pa = hw_fib_pa;
 		fibptr->hw_fib = hw_fib;
 	}
-	aac_fib_free(fibptr);
+	if (retval != -EINTR)
+		aac_fib_free(fibptr);
 	return retval;
 }
 
@@ -621,7 +622,13 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
 
 		actual_fibsize = sizeof (struct aac_srb) + (((user_srbcmd->sg.count & 0xff) - 1) * sizeof (struct sgentry));
 		if(actual_fibsize != fibsize){ // User made a mistake - should not continue
-			dprintk((KERN_DEBUG"aacraid: Bad Size specified in Raw SRB command\n"));
+			dprintk((KERN_DEBUG"aacraid: Bad Size specified in "
+			  "Raw SRB command calculated fibsize=%d "
+			  "user_srbcmd->sg.count=%d aac_srb=%d sgentry=%d "
+			  "issued fibsize=%d\n",
+			  actual_fibsize, user_srbcmd->sg.count,
+			  sizeof(struct aac_srb), sizeof(struct sgentry),
+			  fibsize));
 			rcode = -EINVAL;
 			goto cleanup;
 		}
@@ -663,6 +670,10 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
 		psg->count = cpu_to_le32(sg_indx+1);
 		status = aac_fib_send(ScsiPortCommand, srbfib, actual_fibsize, FsaNormal, 1, 1, NULL, NULL);
 	}
+	if (status == -EINTR) {
+		rcode = -EINTR;
+		goto cleanup;
+	}
 
 	if (status != 0){
 		dprintk((KERN_DEBUG"aacraid: Could not send raw srb fib to hba\n")); 
@@ -696,8 +707,10 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
 	for(i=0; i <= sg_indx; i++){
 		kfree(sg_list[i]);
 	}
-	aac_fib_complete(srbfib);
-	aac_fib_free(srbfib);
+	if (rcode != -EINTR) {
+		aac_fib_complete(srbfib);
+		aac_fib_free(srbfib);
+	}
 
 	return rcode;
 }
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 3f27419c66af6..c67da1321133e 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -464,6 +464,8 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 	dprintk((KERN_DEBUG "  hw_fib pa being sent=%lx\n",(ulong)fibptr->hw_fib_pa));
 	dprintk((KERN_DEBUG "  fib being sent=%p\n",fibptr));
 
+	if (!dev->queues)
+		return -ENODEV;
 	q = &dev->queues->queue[AdapNormCmdQueue];
 
 	if(wait)
@@ -527,8 +529,15 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 				}
 				udelay(5);
 			}
-		} else
-			down(&fibptr->event_wait);
+		} else if (down_interruptible(&fibptr->event_wait)) {
+			spin_lock_irqsave(&fibptr->event_lock, flags);
+			if (fibptr->done == 0) {
+				fibptr->done = 2; /* Tell interrupt we aborted */
+				spin_unlock_irqrestore(&fibptr->event_lock, flags);
+				return -EINTR;
+			}
+			spin_unlock_irqrestore(&fibptr->event_lock, flags);
+		}
 		BUG_ON(fibptr->done == 0);
 			
 		if((fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT)){
@@ -795,7 +804,7 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
 
 	/* Sniff for container changes */
 
-	if (!dev)
+	if (!dev || !dev->fsa_dev)
 		return;
 	container = (u32)-1;
 
diff --git a/drivers/scsi/aacraid/dpcsup.c b/drivers/scsi/aacraid/dpcsup.c
index b2a5c7262f36b..8335f07b77205 100644
--- a/drivers/scsi/aacraid/dpcsup.c
+++ b/drivers/scsi/aacraid/dpcsup.c
@@ -124,10 +124,15 @@ unsigned int aac_response_normal(struct aac_queue * q)
 		} else {
 			unsigned long flagv;
 			spin_lock_irqsave(&fib->event_lock, flagv);
-			fib->done = 1;
+			if (!fib->done)
+				fib->done = 1;
 			up(&fib->event_wait);
 			spin_unlock_irqrestore(&fib->event_lock, flagv);
 			FIB_COUNTER_INCREMENT(aac_config.NormalRecved);
+			if (fib->done == 2) {
+				aac_fib_complete(fib);
+				aac_fib_free(fib);
+			}
 		}
 		consumed++;
 		spin_lock_irqsave(q->lock, flags);
@@ -316,7 +321,8 @@ unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index)
 			unsigned long flagv;
 	  		dprintk((KERN_INFO "event_wait up\n"));
 			spin_lock_irqsave(&fib->event_lock, flagv);
-			fib->done = 1;
+			if (!fib->done)
+				fib->done = 1;
 			up(&fib->event_wait);
 			spin_unlock_irqrestore(&fib->event_lock, flagv);
 			FIB_COUNTER_INCREMENT(aac_config.NormalRecved);
-- 
GitLab


From 8c23cd7457151fc8ace79ec700a8aeaa9fc5b3d9 Mon Sep 17 00:00:00 2001
From: Mark Haverkamp <markh@osdl.org>
Date: Tue, 8 Aug 2006 08:52:14 -0700
Subject: [PATCH 0104/1063] [SCSI] aacraid: Restart adapter on firmware assert
 (Update 2)

Received from Mark Salyzyn

If the adapter should be in a blinkled (Firmware Assert) state when the
driver loads, we will perform a warm restart of the Adapter Firmware to
see if we can rescue the adapter. Possible causes of a blinkled can
occur on some early release motherboard BIOSes, transitory PCI bus
problems on embedded systems or non-x86 based architectures, transitory
startup failures of early release drives or transitory hardware
failures; some of which can bite the adapter later at runtime. Future
enhancements will include recovery during runtime.

Fixed extra whitespace space issue.

Signed-off-by: Mark Haverkamp <markh@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aacraid/aacraid.h |  1 +
 drivers/scsi/aacraid/rkt.c     | 29 ++++++++++++++++++++++-------
 drivers/scsi/aacraid/rx.c      | 29 ++++++++++++++++++++++-------
 3 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index d0eecd4bec837..05f80982efa53 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1670,6 +1670,7 @@ extern struct aac_common aac_config;
 #define RCV_TEMP_READINGS		0x00000025
 #define GET_COMM_PREFERRED_SETTINGS	0x00000026
 #define IOP_RESET			0x00001000
+#define IOP_RESET_ALWAYS		0x00001001
 #define RE_INIT_ADAPTER			0x000000ee
 
 /*
diff --git a/drivers/scsi/aacraid/rkt.c b/drivers/scsi/aacraid/rkt.c
index 458ea897fd72f..f850c3a7cce97 100644
--- a/drivers/scsi/aacraid/rkt.c
+++ b/drivers/scsi/aacraid/rkt.c
@@ -395,6 +395,25 @@ static int aac_rkt_send(struct fib * fib)
 	return 0;
 }
 
+static int aac_rkt_restart_adapter(struct aac_dev *dev)
+{
+	u32 var;
+
+	printk(KERN_ERR "%s%d: adapter kernel panic'd.\n",
+			dev->name, dev->id);
+
+	if (aac_rkt_check_health(dev) <= 0)
+		return 1;
+	if (rkt_sync_cmd(dev, IOP_RESET, 0, 0, 0, 0, 0, 0,
+			&var, NULL, NULL, NULL, NULL))
+		return 1;
+	if (var != 0x00000001)
+		 return 1;
+	if (rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC)
+		return 1;
+	return 0;
+}
+
 /**
  *	aac_rkt_init	-	initialize an i960 based AAC card
  *	@dev: device to configure
@@ -417,6 +436,9 @@ int aac_rkt_init(struct aac_dev *dev)
 	/*
 	 *	Check to see if the board panic'd while booting.
 	 */
+	if (rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC)
+		if (aac_rkt_restart_adapter(dev))
+			goto error_iounmap;
 	/*
 	 *	Check to see if the board failed any self tests.
 	 */
@@ -431,13 +453,6 @@ int aac_rkt_init(struct aac_dev *dev)
 		printk(KERN_ERR "%s%d: adapter monitor panic.\n", dev->name, instance);
 		goto error_iounmap;
 	}
-	/*
-	 *	Check to see if the board panic'd while booting.
-	 */
-	if (rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) {
-		printk(KERN_ERR "%s%d: adapter kernel panic'd.\n", dev->name, instance);
-		goto error_iounmap;
-	}
 	start = jiffies;
 	/*
 	 *	Wait for the adapter to be up and running. Wait up to 3 minutes
diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index 035018db69b1b..c715c4b2442db 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c
@@ -394,6 +394,25 @@ static int aac_rx_send(struct fib * fib)
 	return 0;
 }
 
+static int aac_rx_restart_adapter(struct aac_dev *dev)
+{
+	u32 var;
+
+	printk(KERN_ERR "%s%d: adapter kernel panic'd.\n",
+			dev->name, dev->id);
+
+	if (aac_rx_check_health(dev) <= 0)
+		return 1;
+	if (rx_sync_cmd(dev, IOP_RESET, 0, 0, 0, 0, 0, 0,
+			&var, NULL, NULL, NULL, NULL))
+		return 1;
+	if (var != 0x00000001)
+		 return 1;
+	if (rx_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC)
+		return 1;
+	return 0;
+}
+
 /**
  *	aac_rx_init	-	initialize an i960 based AAC card
  *	@dev: device to configure
@@ -416,6 +435,9 @@ int aac_rx_init(struct aac_dev *dev)
 	/*
 	 *	Check to see if the board panic'd while booting.
 	 */
+	if (rx_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC)
+		if (aac_rx_restart_adapter(dev))
+			goto error_iounmap;
 	/*
 	 *	Check to see if the board failed any self tests.
 	 */
@@ -423,13 +445,6 @@ int aac_rx_init(struct aac_dev *dev)
 		printk(KERN_ERR "%s%d: adapter self-test failed.\n", dev->name, instance);
 		goto error_iounmap;
 	}
-	/*
-	 *	Check to see if the board panic'd while booting.
-	 */
-	if (rx_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) {
-		printk(KERN_ERR "%s%d: adapter kernel panic.\n", dev->name, instance);
-		goto error_iounmap;
-	}
 	/*
 	 *	Check to see if the monitor panic'd while booting.
 	 */
-- 
GitLab


From 90ee346651524eb275405d410f5d3bb6765a2d93 Mon Sep 17 00:00:00 2001
From: Mark Haverkamp <markh@osdl.org>
Date: Thu, 3 Aug 2006 08:03:07 -0700
Subject: [PATCH 0105/1063] [SCSI] aacraid: Check for unlikely errors

Received from Mark Salyzyn

The enclosed patch cleans up some code fragments, adds some paranoia
(unproven causes of potential driver failures).

Signed-off-by: Mark Haverkamp <markh@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aacraid/aachba.c   | 15 +++++++++++++--
 drivers/scsi/aacraid/comminit.c |  2 +-
 drivers/scsi/aacraid/linit.c    |  4 ++++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 83b5c7d085f23..699351c15cc9d 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -489,6 +489,8 @@ int aac_probe_container(struct aac_dev *dev, int cid)
 	unsigned instance;
 
 	fsa_dev_ptr = dev->fsa_dev;
+	if (!fsa_dev_ptr)
+		return -ENOMEM;
 	instance = dev->scsi_host_ptr->unique_id;
 
 	if (!(fibptr = aac_fib_alloc(dev)))
@@ -1392,6 +1394,7 @@ static int aac_synchronize(struct scsi_cmnd *scsicmd, int cid)
 	struct scsi_cmnd *cmd;
 	struct scsi_device *sdev = scsicmd->device;
 	int active = 0;
+	struct aac_dev *aac;
 	unsigned long flags;
 
 	/*
@@ -1413,11 +1416,11 @@ static int aac_synchronize(struct scsi_cmnd *scsicmd, int cid)
 	if (active)
 		return SCSI_MLQUEUE_DEVICE_BUSY;
 
+	aac = (struct aac_dev *)scsicmd->device->host->hostdata;
 	/*
 	 *	Allocate and initialize a Fib
 	 */
-	if (!(cmd_fibcontext = 
-	    aac_fib_alloc((struct aac_dev *)scsicmd->device->host->hostdata)))
+	if (!(cmd_fibcontext = aac_fib_alloc(aac)))
 		return SCSI_MLQUEUE_HOST_BUSY;
 
 	aac_fib_init(cmd_fibcontext);
@@ -1470,6 +1473,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 	struct aac_dev *dev = (struct aac_dev *)host->hostdata;
 	struct fsa_dev_info *fsa_dev_ptr = dev->fsa_dev;
 	
+	if (fsa_dev_ptr == NULL)
+		return -1;
 	/*
 	 *	If the bus, id or lun is out of range, return fail
 	 *	Test does not apply to ID 16, the pseudo id for the controller
@@ -1782,6 +1787,8 @@ static int query_disk(struct aac_dev *dev, void __user *arg)
 	struct fsa_dev_info *fsa_dev_ptr;
 
 	fsa_dev_ptr = dev->fsa_dev;
+	if (!fsa_dev_ptr)
+		return -ENODEV;
 	if (copy_from_user(&qd, arg, sizeof (struct aac_query_disk)))
 		return -EFAULT;
 	if (qd.cnum == -1)
@@ -1843,6 +1850,10 @@ static int delete_disk(struct aac_dev *dev, void __user *arg)
 	struct fsa_dev_info *fsa_dev_ptr;
 
 	fsa_dev_ptr = dev->fsa_dev;
+	if (!fsa_dev_ptr)
+		return -ENODEV;
+	if (!fsa_dev_ptr)
+		return -ENODEV;
 
 	if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk)))
 		return -EFAULT;
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index 1cd3584ba7ff6..87a9550967615 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -180,7 +180,7 @@ int aac_send_shutdown(struct aac_dev * dev)
 			  -2 /* Timeout silently */, 1,
 			  NULL, NULL);
 
-	if (status == 0)
+	if (status >= 0)
 		aac_fib_complete(fibctx);
 	aac_fib_free(fibctx);
 	return status;
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index e42a479ce64a2..9d8b550a91cbf 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1013,6 +1013,10 @@ static void __devexit aac_remove_one(struct pci_dev *pdev)
 	list_del(&aac->entry);
 	scsi_host_put(shost);
 	pci_disable_device(pdev);
+	if (list_empty(&aac_devices)) {
+		unregister_chrdev(aac_cfg_major, "aac");
+		aac_cfg_major = -1;
+	}
 }
 
 static struct pci_driver aac_pci_driver = {
-- 
GitLab


From 8c867b257d159ca04602d7087fa29f846785f9ea Mon Sep 17 00:00:00 2001
From: Mark Haverkamp <markh@osdl.org>
Date: Thu, 3 Aug 2006 08:03:30 -0700
Subject: [PATCH 0106/1063] [SCSI] aacraid: Reset adapter in recovery timeout

Received from Mark Salyzyn

If the adapter is in blinkled (Firmware Assert) when error recovery
timeout actions have been triggered, perform an adapter warm reset and
restart the initialization.

Signed-off-by: Mark Haverkamp <markh@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aacraid/aachba.c   |  39 +++--
 drivers/scsi/aacraid/aacraid.h  |   4 +-
 drivers/scsi/aacraid/commctrl.c |   2 +-
 drivers/scsi/aacraid/commsup.c  | 258 ++++++++++++++++++++++++++++++++
 drivers/scsi/aacraid/linit.c    |  14 +-
 5 files changed, 296 insertions(+), 21 deletions(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 699351c15cc9d..37c55ddce2143 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -175,7 +175,7 @@ MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size.
  *
  *	Query config status, and commit the configuration if needed.
  */
-int aac_get_config_status(struct aac_dev *dev)
+int aac_get_config_status(struct aac_dev *dev, int commit_flag)
 {
 	int status = 0;
 	struct fib * fibptr;
@@ -219,7 +219,7 @@ int aac_get_config_status(struct aac_dev *dev)
 	aac_fib_complete(fibptr);
 	/* Send a CT_COMMIT_CONFIG to enable discovery of devices */
 	if (status >= 0) {
-		if (commit == 1) {
+		if ((commit == 1) || commit_flag) {
 			struct aac_commit_config * dinfo;
 			aac_fib_init(fibptr);
 			dinfo = (struct aac_commit_config *) fib_data(fibptr);
@@ -784,8 +784,9 @@ int aac_get_adapter_info(struct aac_dev* dev)
 		dev->maximum_num_channels = le32_to_cpu(bus_info->BusCount);
 	}
 
-	tmp = le32_to_cpu(dev->adapter_info.kernelrev);
-	printk(KERN_INFO "%s%d: kernel %d.%d-%d[%d] %.*s\n", 
+	if (!dev->in_reset) {
+		tmp = le32_to_cpu(dev->adapter_info.kernelrev);
+		printk(KERN_INFO "%s%d: kernel %d.%d-%d[%d] %.*s\n",
 			dev->name, 
 			dev->id,
 			tmp>>24,
@@ -794,20 +795,21 @@ int aac_get_adapter_info(struct aac_dev* dev)
 			le32_to_cpu(dev->adapter_info.kernelbuild),
 			(int)sizeof(dev->supplement_adapter_info.BuildDate),
 			dev->supplement_adapter_info.BuildDate);
-	tmp = le32_to_cpu(dev->adapter_info.monitorrev);
-	printk(KERN_INFO "%s%d: monitor %d.%d-%d[%d]\n", 
+		tmp = le32_to_cpu(dev->adapter_info.monitorrev);
+		printk(KERN_INFO "%s%d: monitor %d.%d-%d[%d]\n",
 			dev->name, dev->id,
 			tmp>>24,(tmp>>16)&0xff,tmp&0xff,
 			le32_to_cpu(dev->adapter_info.monitorbuild));
-	tmp = le32_to_cpu(dev->adapter_info.biosrev);
-	printk(KERN_INFO "%s%d: bios %d.%d-%d[%d]\n", 
+		tmp = le32_to_cpu(dev->adapter_info.biosrev);
+		printk(KERN_INFO "%s%d: bios %d.%d-%d[%d]\n",
 			dev->name, dev->id,
 			tmp>>24,(tmp>>16)&0xff,tmp&0xff,
 			le32_to_cpu(dev->adapter_info.biosbuild));
-	if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0)
-		printk(KERN_INFO "%s%d: serial %x\n",
-			dev->name, dev->id,
-			le32_to_cpu(dev->adapter_info.serial[0]));
+		if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0)
+			printk(KERN_INFO "%s%d: serial %x\n",
+				dev->name, dev->id,
+				le32_to_cpu(dev->adapter_info.serial[0]));
+	}
 
 	dev->nondasd_support = 0;
 	dev->raid_scsi_mode = 0;
@@ -1417,6 +1419,9 @@ static int aac_synchronize(struct scsi_cmnd *scsicmd, int cid)
 		return SCSI_MLQUEUE_DEVICE_BUSY;
 
 	aac = (struct aac_dev *)scsicmd->device->host->hostdata;
+	if (aac->in_reset)
+		return SCSI_MLQUEUE_HOST_BUSY;
+
 	/*
 	 *	Allocate and initialize a Fib
 	 */
@@ -1504,6 +1509,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 				case INQUIRY:
 				case READ_CAPACITY:
 				case TEST_UNIT_READY:
+					if (dev->in_reset)
+						return -1;
 					spin_unlock_irq(host->host_lock);
 					aac_probe_container(dev, cid);
 					if ((fsa_dev_ptr[cid].valid & 1) == 0)
@@ -1529,6 +1536,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 			}
 		} else {  /* check for physical non-dasd devices */
 			if(dev->nondasd_support == 1){
+				if (dev->in_reset)
+					return -1;
 				return aac_send_srb_fib(scsicmd);
 			} else {
 				scsicmd->result = DID_NO_CONNECT << 16;
@@ -1584,6 +1593,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 			scsicmd->scsi_done(scsicmd);
 			return 0;
 		}
+		if (dev->in_reset)
+			return -1;
 		setinqstr(dev, (void *) (inq_data.inqd_vid), fsa_dev_ptr[cid].type);
 		inq_data.inqd_pdt = INQD_PDT_DA;	/* Direct/random access device */
 		aac_internal_transfer(scsicmd, &inq_data, 0, sizeof(inq_data));
@@ -1739,6 +1750,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 		case READ_10:
 		case READ_12:
 		case READ_16:
+			if (dev->in_reset)
+				return -1;
 			/*
 			 *	Hack to keep track of ordinal number of the device that
 			 *	corresponds to a container. Needed to convert
@@ -1757,6 +1770,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 		case WRITE_10:
 		case WRITE_12:
 		case WRITE_16:
+			if (dev->in_reset)
+				return -1;
 			return aac_write(scsicmd, cid);
 
 		case SYNCHRONIZE_CACHE:
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 05f80982efa53..8924c183d9c3f 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1029,6 +1029,7 @@ struct aac_dev
 	  init->InitStructRevision==cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION_4)
 	u8			raw_io_64;
 	u8			printf_enabled;
+	u8			in_reset;
 };
 
 #define aac_adapter_interrupt(dev) \
@@ -1789,7 +1790,7 @@ void aac_consumer_free(struct aac_dev * dev, struct aac_queue * q, u32 qnum);
 int aac_fib_complete(struct fib * context);
 #define fib_data(fibctx) ((void *)(fibctx)->hw_fib->data)
 struct aac_dev *aac_init_adapter(struct aac_dev *dev);
-int aac_get_config_status(struct aac_dev *dev);
+int aac_get_config_status(struct aac_dev *dev, int commit_flag);
 int aac_get_containers(struct aac_dev *dev);
 int aac_scsi_cmd(struct scsi_cmnd *cmd);
 int aac_dev_ioctl(struct aac_dev *dev, int cmd, void __user *arg);
@@ -1800,6 +1801,7 @@ int aac_sa_init(struct aac_dev *dev);
 unsigned int aac_response_normal(struct aac_queue * q);
 unsigned int aac_command_normal(struct aac_queue * q);
 unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index);
+int aac_check_health(struct aac_dev * dev);
 int aac_command_thread(void *data);
 int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx);
 int aac_fib_adapter_complete(struct fib * fibptr, unsigned short size);
diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
index 14d7aa9b7df31..da1d3a9212f80 100644
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c
@@ -298,7 +298,7 @@ static int next_getadapter_fib(struct aac_dev * dev, void __user *arg)
 		spin_unlock_irqrestore(&dev->fib_lock, flags);
 		/* If someone killed the AIF aacraid thread, restart it */
 		status = !dev->aif_thread;
-		if (status && dev->queues && dev->fsa_dev) {
+		if (status && !dev->in_reset && dev->queues && dev->fsa_dev) {
 			/* Be paranoid, be very paranoid! */
 			kthread_stop(dev->thread);
 			ssleep(1);
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index c67da1321133e..53add53be0bde 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -40,8 +40,10 @@
 #include <linux/blkdev.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
 #include <asm/semaphore.h>
 
 #include "aacraid.h"
@@ -1054,6 +1056,262 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
 
 }
 
+static int _aac_reset_adapter(struct aac_dev *aac)
+{
+	int index, quirks;
+	u32 ret;
+	int retval;
+	struct Scsi_Host *host;
+	struct scsi_device *dev;
+	struct scsi_cmnd *command;
+	struct scsi_cmnd *command_list;
+
+	/*
+	 * Assumptions:
+	 *	- host is locked.
+	 *	- in_reset is asserted, so no new i/o is getting to the
+	 *	  card.
+	 *	- The card is dead.
+	 */
+	host = aac->scsi_host_ptr;
+	scsi_block_requests(host);
+	aac_adapter_disable_int(aac);
+	spin_unlock_irq(host->host_lock);
+	kthread_stop(aac->thread);
+
+	/*
+	 *	If a positive health, means in a known DEAD PANIC
+	 * state and the adapter could be reset to `try again'.
+	 */
+	retval = aac_adapter_check_health(aac);
+	if (retval == 0)
+		retval = aac_adapter_sync_cmd(aac, IOP_RESET_ALWAYS,
+		  0, 0, 0, 0, 0, 0, &ret, NULL, NULL, NULL, NULL);
+	if (retval)
+		retval = aac_adapter_sync_cmd(aac, IOP_RESET,
+		  0, 0, 0, 0, 0, 0, &ret, NULL, NULL, NULL, NULL);
+
+	if (retval)
+		goto out;
+	if (ret != 0x00000001) {
+		retval = -ENODEV;
+		goto out;
+	}
+
+	index = aac->cardtype;
+
+	/*
+	 * Re-initialize the adapter, first free resources, then carefully
+	 * apply the initialization sequence to come back again. Only risk
+	 * is a change in Firmware dropping cache, it is assumed the caller
+	 * will ensure that i/o is queisced and the card is flushed in that
+	 * case.
+	 */
+	aac_fib_map_free(aac);
+	aac->hw_fib_va = NULL;
+	aac->hw_fib_pa = 0;
+	pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys);
+	aac->comm_addr = NULL;
+	aac->comm_phys = 0;
+	kfree(aac->queues);
+	aac->queues = NULL;
+	free_irq(aac->pdev->irq, aac);
+	kfree(aac->fsa_dev);
+	aac->fsa_dev = NULL;
+	if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT) {
+		if (((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK))) ||
+		  ((retval = pci_set_consistent_dma_mask(aac->pdev, DMA_32BIT_MASK))))
+			goto out;
+	} else {
+		if (((retval = pci_set_dma_mask(aac->pdev, 0x7FFFFFFFULL))) ||
+		  ((retval = pci_set_consistent_dma_mask(aac->pdev, 0x7FFFFFFFULL))))
+			goto out;
+	}
+	if ((retval = (*(aac_get_driver_ident(index)->init))(aac)))
+		goto out;
+	if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT)
+		if ((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK)))
+			goto out;
+	aac->thread = kthread_run(aac_command_thread, aac, aac->name);
+	if (IS_ERR(aac->thread)) {
+		retval = PTR_ERR(aac->thread);
+		goto out;
+	}
+	(void)aac_get_adapter_info(aac);
+	quirks = aac_get_driver_ident(index)->quirks;
+	if ((quirks & AAC_QUIRK_34SG) && (host->sg_tablesize > 34)) {
+ 		host->sg_tablesize = 34;
+ 		host->max_sectors = (host->sg_tablesize * 8) + 112;
+ 	}
+ 	if ((quirks & AAC_QUIRK_17SG) && (host->sg_tablesize > 17)) {
+ 		host->sg_tablesize = 17;
+ 		host->max_sectors = (host->sg_tablesize * 8) + 112;
+ 	}
+	aac_get_config_status(aac, 1);
+	aac_get_containers(aac);
+	/*
+	 * This is where the assumption that the Adapter is quiesced
+	 * is important.
+	 */
+	command_list = NULL;
+	__shost_for_each_device(dev, host) {
+		unsigned long flags;
+		spin_lock_irqsave(&dev->list_lock, flags);
+		list_for_each_entry(command, &dev->cmd_list, list)
+			if (command->SCp.phase == AAC_OWNER_FIRMWARE) {
+				command->SCp.buffer = (struct scatterlist *)command_list;
+				command_list = command;
+			}
+		spin_unlock_irqrestore(&dev->list_lock, flags);
+	}
+	while ((command = command_list)) {
+		command_list = (struct scsi_cmnd *)command->SCp.buffer;
+		command->SCp.buffer = NULL;
+		command->result = DID_OK << 16
+		  | COMMAND_COMPLETE << 8
+		  | SAM_STAT_TASK_SET_FULL;
+		command->SCp.phase = AAC_OWNER_ERROR_HANDLER;
+		command->scsi_done(command);
+	}
+	retval = 0;
+
+out:
+	aac->in_reset = 0;
+	scsi_unblock_requests(host);
+	spin_lock_irq(host->host_lock);
+	return retval;
+}
+
+int aac_check_health(struct aac_dev * aac)
+{
+	int BlinkLED;
+	unsigned long time_now, flagv = 0;
+	struct list_head * entry;
+	struct Scsi_Host * host;
+
+	/* Extending the scope of fib_lock slightly to protect aac->in_reset */
+	if (spin_trylock_irqsave(&aac->fib_lock, flagv) == 0)
+		return 0;
+
+	if (aac->in_reset || !(BlinkLED = aac_adapter_check_health(aac))) {
+		spin_unlock_irqrestore(&aac->fib_lock, flagv);
+		return 0; /* OK */
+	}
+
+	aac->in_reset = 1;
+
+	/* Fake up an AIF:
+	 *	aac_aifcmd.command = AifCmdEventNotify = 1
+	 *	aac_aifcmd.seqnum = 0xFFFFFFFF
+	 *	aac_aifcmd.data[0] = AifEnExpEvent = 23
+	 *	aac_aifcmd.data[1] = AifExeFirmwarePanic = 3
+	 *	aac.aifcmd.data[2] = AifHighPriority = 3
+	 *	aac.aifcmd.data[3] = BlinkLED
+	 */
+
+	time_now = jiffies/HZ;
+	entry = aac->fib_list.next;
+
+	/*
+	 * For each Context that is on the
+	 * fibctxList, make a copy of the
+	 * fib, and then set the event to wake up the
+	 * thread that is waiting for it.
+	 */
+	while (entry != &aac->fib_list) {
+		/*
+		 * Extract the fibctx
+		 */
+		struct aac_fib_context *fibctx = list_entry(entry, struct aac_fib_context, next);
+		struct hw_fib * hw_fib;
+		struct fib * fib;
+		/*
+		 * Check if the queue is getting
+		 * backlogged
+		 */
+		if (fibctx->count > 20) {
+			/*
+			 * It's *not* jiffies folks,
+			 * but jiffies / HZ, so do not
+			 * panic ...
+			 */
+			u32 time_last = fibctx->jiffies;
+			/*
+			 * Has it been > 2 minutes
+			 * since the last read off
+			 * the queue?
+			 */
+			if ((time_now - time_last) > aif_timeout) {
+				entry = entry->next;
+				aac_close_fib_context(aac, fibctx);
+				continue;
+			}
+		}
+		/*
+		 * Warning: no sleep allowed while
+		 * holding spinlock
+		 */
+		hw_fib = kmalloc(sizeof(struct hw_fib), GFP_ATOMIC);
+		fib = kmalloc(sizeof(struct fib), GFP_ATOMIC);
+		if (fib && hw_fib) {
+			struct aac_aifcmd * aif;
+
+			memset(hw_fib, 0, sizeof(struct hw_fib));
+			memset(fib, 0, sizeof(struct fib));
+			fib->hw_fib = hw_fib;
+			fib->dev = aac;
+			aac_fib_init(fib);
+			fib->type = FSAFS_NTC_FIB_CONTEXT;
+			fib->size = sizeof (struct fib);
+			fib->data = hw_fib->data;
+			aif = (struct aac_aifcmd *)hw_fib->data;
+			aif->command = cpu_to_le32(AifCmdEventNotify);
+		 	aif->seqnum = cpu_to_le32(0xFFFFFFFF);
+		 	aif->data[0] = cpu_to_le32(AifEnExpEvent);
+			aif->data[1] = cpu_to_le32(AifExeFirmwarePanic);
+		 	aif->data[2] = cpu_to_le32(AifHighPriority);
+			aif->data[3] = cpu_to_le32(BlinkLED);
+
+			/*
+			 * Put the FIB onto the
+			 * fibctx's fibs
+			 */
+			list_add_tail(&fib->fiblink, &fibctx->fib_list);
+			fibctx->count++;
+			/*
+			 * Set the event to wake up the
+			 * thread that will waiting.
+			 */
+			up(&fibctx->wait_sem);
+		} else {
+			printk(KERN_WARNING "aifd: didn't allocate NewFib.\n");
+			kfree(fib);
+			kfree(hw_fib);
+		}
+		entry = entry->next;
+	}
+
+	spin_unlock_irqrestore(&aac->fib_lock, flagv);
+
+	if (BlinkLED < 0) {
+		printk(KERN_ERR "%s: Host adapter dead %d\n", aac->name, BlinkLED);
+		goto out;
+	}
+
+	printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED);
+
+	host = aac->scsi_host_ptr;
+	spin_lock_irqsave(host->host_lock, flagv);
+	BlinkLED = _aac_reset_adapter(aac);
+	spin_unlock_irqrestore(host->host_lock, flagv);
+	return BlinkLED;
+
+out:
+	aac->in_reset = 0;
+	return BlinkLED;
+}
+
+
 /**
  *	aac_command_thread	-	command processing thread
  *	@dev: Adapter to monitor
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 9d8b550a91cbf..d67058f80816b 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -454,17 +454,17 @@ static int aac_eh_reset(struct scsi_cmnd* cmd)
 	printk(KERN_ERR "%s: Host adapter reset request. SCSI hang ?\n", 
 					AAC_DRIVERNAME);
 	aac = (struct aac_dev *)host->hostdata;
-	if (aac_adapter_check_health(aac)) {
-		printk(KERN_ERR "%s: Host adapter appears dead\n", 
-				AAC_DRIVERNAME);
-		return -ENODEV;
-	}
+
+	if ((count = aac_check_health(aac)))
+		return count;
 	/*
 	 * Wait for all commands to complete to this specific
 	 * target (block maximum 60 seconds).
 	 */
 	for (count = 60; count; --count) {
-		int active = 0;
+		int active = aac->in_reset;
+
+		if (active == 0)
 		__shost_for_each_device(dev, host) {
 			spin_lock_irqsave(&dev->list_lock, flags);
 			list_for_each_entry(command, &dev->cmd_list, list) {
@@ -933,7 +933,7 @@ static int __devinit aac_probe_one(struct pci_dev *pdev,
 	else
 		shost->max_channel = 0;
 
-	aac_get_config_status(aac);
+	aac_get_config_status(aac, 0);
 	aac_get_containers(aac);
 	list_add(&aac->entry, insert);
 
-- 
GitLab


From 84961f28e9d13a4b193d0c8545f3c060c1890ff3 Mon Sep 17 00:00:00 2001
From: dave wysochanski <davidw@netapp.com>
Date: Wed, 9 Aug 2006 14:56:32 -0400
Subject: [PATCH 0107/1063] [SCSI] Don't add scsi_device for devices that
 return PQ=1, PDT=0x1f

Some targets may return slight variations of PQ and PDT to indicate
no LUN mapped.  USB UFI setting PDT=0x1f but having reserved bits for
PQ is one example, and NetApp targets returning PQ=1 and PDT=0x1f is
another.  Both instances seem like reasonable responses according to
SPC-3 and UFI specs.

The current scsi_probe_and_add_lun() code adds a scsi_device
for targets that return PQ=1 and PDT=0x1f.  This causes LUNs of type
"UNKNOWN" to show up in /proc/scsi/scsi when no LUNs are mapped.
In addition, subsequent rescans fail to recognize LUNs that may be
added on the target, unless preceded by a write to the delete attribute
of the "UNKNOWN" LUN.

This patch addresses this problem by skipping over the scsi_add_lun()
when PQ=1,PDT=0x1f is encountered, and just returns
SCSI_SCAN_TARGET_PRESENT.

Signed-off-by: Dave Wysochanski <davidw@netapp.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_scan.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 114e2067dce57..a24d3461fc788 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -893,11 +893,26 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
 	}
 
 	/*
-	 * Non-standard SCSI targets may set the PDT to 0x1f (unknown or
-	 * no device type) instead of using the Peripheral Qualifier to
-	 * indicate that no LUN is present.  For example, USB UFI does this.
+	 * Some targets may set slight variations of PQ and PDT to signal
+	 * that no LUN is present, so don't add sdev in these cases.
+	 * Two specific examples are:
+	 * 1) NetApp targets: return PQ=1, PDT=0x1f
+	 * 2) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved"
+	 *    in the UFI 1.0 spec (we cannot rely on reserved bits).
+	 *
+	 * References:
+	 * 1) SCSI SPC-3, pp. 145-146
+	 * PQ=1: "A peripheral device having the specified peripheral
+	 * device type is not connected to this logical unit. However, the
+	 * device server is capable of supporting the specified peripheral
+	 * device type on this logical unit."
+	 * PDT=0x1f: "Unknown or no device type"
+	 * 2) USB UFI 1.0, p. 20
+	 * PDT=00h Direct-access device (floppy)
+	 * PDT=1Fh none (no FDD connected to the requested logical unit)
 	 */
-	if (starget->pdt_1f_for_no_lun && (result[0] & 0x1f) == 0x1f) {
+	if (((result[0] >> 5) == 1 || starget->pdt_1f_for_no_lun) &&
+	     (result[0] & 0x1f) == 0x1f) {
 		SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO
 					"scsi scan: peripheral device type"
 					" of 31, no device added\n"));
-- 
GitLab


From a2f5d4d94f0ab9560b9a99d73d5b86b377c7f201 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 10 Aug 2006 21:41:13 -0400
Subject: [PATCH 0108/1063] [SCSI] remove unnecessary includes of
 linux/config.h from drivers/scsi/

kbuild includes this automatically these days.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aic7xxx_old.c | 2 --
 drivers/scsi/dpt_i2o.c     | 1 -
 drivers/scsi/hptiop.c      | 1 -
 drivers/scsi/libata-eh.c   | 1 -
 drivers/scsi/scsi.h        | 2 --
 5 files changed, 7 deletions(-)

diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index 3f85b5e978f1a..ba3bccafe1137 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -249,8 +249,6 @@
 #include <linux/stat.h>
 #include <linux/slab.h>        /* for kmalloc() */
 
-#include <linux/config.h>        /* for CONFIG_PCI */
-
 #define AIC7XXX_C_VERSION  "5.2.6"
 
 #define ALL_TARGETS -1
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index e1337339cacc2..45806336ce02e 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -46,7 +46,6 @@ MODULE_DESCRIPTION("Adaptec I2O RAID Driver");
 
 #include <linux/stat.h>
 #include <linux/slab.h>		/* for kmalloc() */
-#include <linux/config.h>	/* for CONFIG_PCI */
 #include <linux/pci.h>		/* for PCI support */
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index ab2f8b2679085..6b41c2ef6e21a 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -15,7 +15,6 @@
  *
  * For more information, visit http://www.highpoint-tech.com
  */
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index 4b6aa30f4d685..b3095fd928637 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -32,7 +32,6 @@
  *
  */
 
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
diff --git a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h
index f51e466893e72..d5a55fae60e03 100644
--- a/drivers/scsi/scsi.h
+++ b/drivers/scsi/scsi.h
@@ -20,8 +20,6 @@
 #ifndef _SCSI_H
 #define _SCSI_H
 
-#include <linux/config.h>	    /* for CONFIG_SCSI_LOGGING */
-
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_eh.h>
-- 
GitLab


From 016131b8fffa1085b4ad165ab228116fdc278ebe Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Mon, 14 Aug 2006 08:20:25 -0400
Subject: [PATCH 0109/1063] [SCSI] fc transport: convert fc_host symbolic_name
 attribute to a dynamic attribute

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_transport_fc.c | 4 ++--
 include/scsi/scsi_transport_fc.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index b03aa85108e5f..c1c5cdffca38e 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -815,7 +815,6 @@ fc_private_host_rd_attr_cast(node_name, "0x%llx\n", 20, unsigned long long);
 fc_private_host_rd_attr_cast(port_name, "0x%llx\n", 20, unsigned long long);
 fc_private_host_rd_attr_cast(permanent_port_name, "0x%llx\n", 20,
 			     unsigned long long);
-fc_private_host_rd_attr(symbolic_name, "%s\n", (FC_SYMBOLIC_NAME_SIZE +1));
 fc_private_host_rd_attr(maxframe_size, "%u bytes\n", 20);
 fc_private_host_rd_attr(serial_number, "%s\n", (FC_SERIAL_NUMBER_SIZE +1));
 
@@ -858,6 +857,7 @@ fc_host_rd_attr(port_id, "0x%06x\n", 20);
 fc_host_rd_enum_attr(port_type, FC_PORTTYPE_MAX_NAMELEN);
 fc_host_rd_enum_attr(port_state, FC_PORTSTATE_MAX_NAMELEN);
 fc_host_rd_attr_cast(fabric_name, "0x%llx\n", 20, unsigned long long);
+fc_host_rd_attr(symbolic_name, "%s\n", FC_SYMBOLIC_NAME_SIZE + 1);
 
 
 /* Private Host Attributes */
@@ -1223,7 +1223,6 @@ fc_attach_transport(struct fc_function_template *ft)
 	SETUP_HOST_ATTRIBUTE_RD(permanent_port_name);
 	SETUP_HOST_ATTRIBUTE_RD(supported_classes);
 	SETUP_HOST_ATTRIBUTE_RD(supported_fc4s);
-	SETUP_HOST_ATTRIBUTE_RD(symbolic_name);
 	SETUP_HOST_ATTRIBUTE_RD(supported_speeds);
 	SETUP_HOST_ATTRIBUTE_RD(maxframe_size);
 	SETUP_HOST_ATTRIBUTE_RD(serial_number);
@@ -1234,6 +1233,7 @@ fc_attach_transport(struct fc_function_template *ft)
 	SETUP_HOST_ATTRIBUTE_RD(active_fc4s);
 	SETUP_HOST_ATTRIBUTE_RD(speed);
 	SETUP_HOST_ATTRIBUTE_RD(fabric_name);
+	SETUP_HOST_ATTRIBUTE_RD(symbolic_name);
 
 	/* Transport-managed attributes */
 	SETUP_PRIVATE_HOST_ATTRIBUTE_RW(tgtid_bind_type);
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 6d28b03176570..b7f62b85f0b35 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -409,6 +409,7 @@ struct fc_function_template {
 	void	(*get_host_active_fc4s)(struct Scsi_Host *);
 	void	(*get_host_speed)(struct Scsi_Host *);
 	void	(*get_host_fabric_name)(struct Scsi_Host *);
+	void	(*get_host_symbolic_name)(struct Scsi_Host *);
 
 	struct fc_host_statistics * (*get_fc_host_stats)(struct Scsi_Host *);
 	void	(*reset_fc_host_stats)(struct Scsi_Host *);
@@ -445,7 +446,6 @@ struct fc_function_template {
 	unsigned long	show_host_permanent_port_name:1;
 	unsigned long	show_host_supported_classes:1;
 	unsigned long	show_host_supported_fc4s:1;
-	unsigned long	show_host_symbolic_name:1;
 	unsigned long	show_host_supported_speeds:1;
 	unsigned long	show_host_maxframe_size:1;
 	unsigned long	show_host_serial_number:1;
@@ -456,6 +456,7 @@ struct fc_function_template {
 	unsigned long	show_host_active_fc4s:1;
 	unsigned long	show_host_speed:1;
 	unsigned long	show_host_fabric_name:1;
+	unsigned long	show_host_symbolic_name:1;
 };
 
 
-- 
GitLab


From 2b6ee9b5295460017fc1bc3d60545512df280908 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Mon, 14 Aug 2006 23:09:23 -0700
Subject: [PATCH 0110/1063] [SCSI] aic7*: cleanup MODULE_PARM_DESC strings

Modify beginning string to be more readable.  Remove one trailing newline.

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aic7xxx/aic79xx_osm.c | 4 ++--
 drivers/scsi/aic7xxx/aic7xxx_osm.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index 998999c0a9721..c7eeaced324af 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -321,7 +321,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(AIC79XX_DRIVER_VERSION);
 module_param(aic79xx, charp, 0444);
 MODULE_PARM_DESC(aic79xx,
-"period delimited, options string.\n"
+"period-delimited options string:\n"
 "	verbose			Enable verbose/diagnostic logging\n"
 "	allow_memio		Allow device registers to be memory mapped\n"
 "	debug			Bitmask of debug values to enable\n"
@@ -346,7 +346,7 @@ MODULE_PARM_DESC(aic79xx,
 "		Shorten the selection timeout to 128ms\n"
 "\n"
 "	options aic79xx 'aic79xx=verbose.tag_info:{{}.{}.{..10}}.seltime:1'\n"
-"\n");
+);
 
 static void ahd_linux_handle_scsi_status(struct ahd_softc *,
 					 struct scsi_device *,
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index aa4be8a31415e..e5bb4d87b3073 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -341,7 +341,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(AIC7XXX_DRIVER_VERSION);
 module_param(aic7xxx, charp, 0444);
 MODULE_PARM_DESC(aic7xxx,
-"period delimited, options string.\n"
+"period-delimited options string:\n"
 "	verbose			Enable verbose/diagnostic logging\n"
 "	allow_memio		Allow device registers to be memory mapped\n"
 "	debug			Bitmask of debug values to enable\n"
-- 
GitLab


From f3d7271c5ac9029d19fc0252a85bc045334382cc Mon Sep 17 00:00:00 2001
From: Henrik Kretzschmar <henne@nachtwidheim.de>
Date: Tue, 15 Aug 2006 11:17:21 +0200
Subject: [PATCH 0111/1063] [SCSI] convert to PCI_DEVICE() macro

Convert the pci_device_id-table of the megaraid_sas-driver to
the PCI_DEVICE-macro, to safe some lines.

Signed-off-by: Henrik Kretzschmar <henne@nachtwindheim.de>
Acked-by: "Patro, Sumant" <Sumant.Patro@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/megaraid/megaraid_sas.c | 34 ++++++++--------------------
 1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index a8c9627a15c4d..b36307d2e283a 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -53,31 +53,15 @@ MODULE_DESCRIPTION("LSI Logic MegaRAID SAS Driver");
  */
 static struct pci_device_id megasas_pci_table[] = {
 
-	{
-	 PCI_VENDOR_ID_LSI_LOGIC,
-	 PCI_DEVICE_ID_LSI_SAS1064R, /* xscale IOP */
-	 PCI_ANY_ID,
-	 PCI_ANY_ID,
-	 },
-	{
-	 PCI_VENDOR_ID_LSI_LOGIC,
-	 PCI_DEVICE_ID_LSI_SAS1078R, /* ppc IOP */
-	 PCI_ANY_ID,
-	 PCI_ANY_ID,
-	},
-	{
-	 PCI_VENDOR_ID_LSI_LOGIC,
-	 PCI_DEVICE_ID_LSI_VERDE_ZCR,	/* xscale IOP, vega */
-	 PCI_ANY_ID,
-	 PCI_ANY_ID,
-	 },
-	{
-	 PCI_VENDOR_ID_DELL,
-	 PCI_DEVICE_ID_DELL_PERC5, /* xscale IOP */
-	 PCI_ANY_ID,
-	 PCI_ANY_ID,
-	 },
-	{0}			/* Terminating entry */
+	{PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1064R)},
+	/* xscale IOP */
+	{PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1078R)},
+	/* ppc IOP */
+	{PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_VERDE_ZCR)},
+	/* xscale IOP, vega */
+	{PCI_DEVICE(PCI_VENDOR_ID_DELL, PCI_DEVICE_ID_DELL_PERC5)},
+	/* xscale IOP */
+	{}
 };
 
 MODULE_DEVICE_TABLE(pci, megasas_pci_table);
-- 
GitLab


From b8d08210126a7b769b857720a59721a453a57a1e Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Thu, 17 Aug 2006 08:00:43 -0400
Subject: [PATCH 0112/1063] [SCSI] fc transport: add fc_host system_hostname
 attribute and u64_to_wwn()

This patch updates the fc transport for the following:

- Addition of a new attribute "system_hostname" which can be
  used to set the fully qualified hostname that the fc_host
  is attached to. The fc_host can then register this string
  as the FDMI-based host name attribute.
  Note: for NPIV, a fc_host could be associated with a system which
    is not the local system.

- Add the inline function u64_to_wwn(), which is the inverse of the
  existing wwn_to_u64() function.

- Slight reorg, just to keep dynamic attributes with each other, etc

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_transport_fc.c | 30 +++++++++++++++++++++++--
 include/scsi/scsi_transport_fc.h | 38 +++++++++++++++++++++++---------
 2 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index c1c5cdffca38e..79d31ca2b7416 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -301,8 +301,6 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev,
 	fc_host->supported_classes = FC_COS_UNSPECIFIED;
 	memset(fc_host->supported_fc4s, 0,
 		sizeof(fc_host->supported_fc4s));
-	memset(fc_host->symbolic_name, 0,
-		sizeof(fc_host->symbolic_name));
 	fc_host->supported_speeds = FC_PORTSPEED_UNKNOWN;
 	fc_host->maxframe_size = -1;
 	memset(fc_host->serial_number, 0,
@@ -315,6 +313,8 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev,
 		sizeof(fc_host->active_fc4s));
 	fc_host->speed = FC_PORTSPEED_UNKNOWN;
 	fc_host->fabric_name = -1;
+	memset(fc_host->symbolic_name, 0, sizeof(fc_host->symbolic_name));
+	memset(fc_host->system_hostname, 0, sizeof(fc_host->system_hostname));
 
 	fc_host->tgtid_bind_type = FC_TGTID_BIND_BY_WWPN;
 
@@ -688,6 +688,25 @@ store_fc_host_##field(struct class_device *cdev, const char *buf,	\
 	return count;							\
 }
 
+#define fc_host_store_str_function(field, slen)				\
+static ssize_t								\
+store_fc_host_##field(struct class_device *cdev, const char *buf,	\
+			   size_t count)				\
+{									\
+	struct Scsi_Host *shost = transport_class_to_shost(cdev);	\
+	struct fc_internal *i = to_fc_internal(shost->transportt);	\
+	unsigned int cnt=count;						\
+									\
+	/* count may include a LF at end of string */			\
+	if (buf[cnt-1] == '\n')						\
+		cnt--;							\
+	if (cnt > ((slen) - 1))						\
+		return -EINVAL;						\
+	memcpy(fc_host_##field(shost), buf, cnt);			\
+	i->f->set_host_##field(shost);					\
+	return count;							\
+}
+
 #define fc_host_rd_attr(field, format_string, sz)			\
 	fc_host_show_function(field, format_string, sz, )		\
 static FC_CLASS_DEVICE_ATTR(host, field, S_IRUGO,			\
@@ -859,6 +878,12 @@ fc_host_rd_enum_attr(port_state, FC_PORTSTATE_MAX_NAMELEN);
 fc_host_rd_attr_cast(fabric_name, "0x%llx\n", 20, unsigned long long);
 fc_host_rd_attr(symbolic_name, "%s\n", FC_SYMBOLIC_NAME_SIZE + 1);
 
+fc_private_host_show_function(system_hostname, "%s\n",
+		FC_SYMBOLIC_NAME_SIZE + 1, )
+fc_host_store_str_function(system_hostname, FC_SYMBOLIC_NAME_SIZE)
+static FC_CLASS_DEVICE_ATTR(host, system_hostname, S_IRUGO | S_IWUSR,
+		show_fc_host_system_hostname, store_fc_host_system_hostname);
+
 
 /* Private Host Attributes */
 
@@ -1234,6 +1259,7 @@ fc_attach_transport(struct fc_function_template *ft)
 	SETUP_HOST_ATTRIBUTE_RD(speed);
 	SETUP_HOST_ATTRIBUTE_RD(fabric_name);
 	SETUP_HOST_ATTRIBUTE_RD(symbolic_name);
+	SETUP_HOST_ATTRIBUTE_RW(system_hostname);
 
 	/* Transport-managed attributes */
 	SETUP_PRIVATE_HOST_ATTRIBUTE_RW(tgtid_bind_type);
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index b7f62b85f0b35..c74be5dabfebb 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -312,7 +312,6 @@ struct fc_host_attrs {
 	u64 permanent_port_name;
 	u32 supported_classes;
 	u8  supported_fc4s[FC_FC4_LIST_SIZE];
-	char symbolic_name[FC_SYMBOLIC_NAME_SIZE];
 	u32 supported_speeds;
 	u32 maxframe_size;
 	char serial_number[FC_SERIAL_NUMBER_SIZE];
@@ -324,6 +323,8 @@ struct fc_host_attrs {
 	u8  active_fc4s[FC_FC4_LIST_SIZE];
 	u32 speed;
 	u64 fabric_name;
+	char symbolic_name[FC_SYMBOLIC_NAME_SIZE];
+	char system_hostname[FC_SYMBOLIC_NAME_SIZE];
 
 	/* Private (Transport-managed) Attributes */
 	enum fc_tgtid_binding_type  tgtid_bind_type;
@@ -354,8 +355,6 @@ struct fc_host_attrs {
 	(((struct fc_host_attrs *)(x)->shost_data)->supported_classes)
 #define fc_host_supported_fc4s(x)	\
 	(((struct fc_host_attrs *)(x)->shost_data)->supported_fc4s)
-#define fc_host_symbolic_name(x)	\
-	(((struct fc_host_attrs *)(x)->shost_data)->symbolic_name)
 #define fc_host_supported_speeds(x)	\
 	(((struct fc_host_attrs *)(x)->shost_data)->supported_speeds)
 #define fc_host_maxframe_size(x)	\
@@ -374,6 +373,10 @@ struct fc_host_attrs {
 	(((struct fc_host_attrs *)(x)->shost_data)->speed)
 #define fc_host_fabric_name(x)	\
 	(((struct fc_host_attrs *)(x)->shost_data)->fabric_name)
+#define fc_host_symbolic_name(x)	\
+	(((struct fc_host_attrs *)(x)->shost_data)->symbolic_name)
+#define fc_host_system_hostname(x)	\
+	(((struct fc_host_attrs *)(x)->shost_data)->system_hostname)
 #define fc_host_tgtid_bind_type(x) \
 	(((struct fc_host_attrs *)(x)->shost_data)->tgtid_bind_type)
 #define fc_host_rports(x) \
@@ -410,6 +413,7 @@ struct fc_function_template {
 	void	(*get_host_speed)(struct Scsi_Host *);
 	void	(*get_host_fabric_name)(struct Scsi_Host *);
 	void	(*get_host_symbolic_name)(struct Scsi_Host *);
+	void	(*set_host_system_hostname)(struct Scsi_Host *);
 
 	struct fc_host_statistics * (*get_fc_host_stats)(struct Scsi_Host *);
 	void	(*reset_fc_host_stats)(struct Scsi_Host *);
@@ -457,6 +461,7 @@ struct fc_function_template {
 	unsigned long	show_host_speed:1;
 	unsigned long	show_host_fabric_name:1;
 	unsigned long	show_host_symbolic_name:1;
+	unsigned long	show_host_system_hostname:1;
 };
 
 
@@ -492,6 +497,25 @@ fc_remote_port_chkready(struct fc_rport *rport)
 	return result;
 }
 
+static inline u64 wwn_to_u64(u8 *wwn)
+{
+	return (u64)wwn[0] << 56 | (u64)wwn[1] << 48 |
+	    (u64)wwn[2] << 40 | (u64)wwn[3] << 32 |
+	    (u64)wwn[4] << 24 | (u64)wwn[5] << 16 |
+	    (u64)wwn[6] <<  8 | (u64)wwn[7];
+}
+
+static inline void u64_to_wwn(u64 inm, u8 *wwn)
+{
+	wwn[0] = (inm >> 56) & 0xff;
+	wwn[1] = (inm >> 48) & 0xff;
+	wwn[2] = (inm >> 40) & 0xff;
+	wwn[3] = (inm >> 32) & 0xff;
+	wwn[4] = (inm >> 24) & 0xff;
+	wwn[5] = (inm >> 16) & 0xff;
+	wwn[6] = (inm >> 8) & 0xff;
+	wwn[7] = inm & 0xff;
+}
 
 struct scsi_transport_template *fc_attach_transport(
 			struct fc_function_template *);
@@ -503,12 +527,4 @@ void fc_remote_port_delete(struct fc_rport  *rport);
 void fc_remote_port_rolechg(struct fc_rport  *rport, u32 roles);
 int scsi_is_fc_rport(const struct device *);
 
-static inline u64 wwn_to_u64(u8 *wwn)
-{
-	return (u64)wwn[0] << 56 | (u64)wwn[1] << 48 |
-	    (u64)wwn[2] << 40 | (u64)wwn[3] << 32 |
-	    (u64)wwn[4] << 24 | (u64)wwn[5] << 16 |
-	    (u64)wwn[6] <<  8 | (u64)wwn[7];
-}
-
 #endif /* SCSI_TRANSPORT_FC_H */
-- 
GitLab


From 4041b9cd87d97a7c73a5bf5a9305dffee2599386 Mon Sep 17 00:00:00 2001
From: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
Date: Thu, 17 Aug 2006 13:28:22 +0000
Subject: [PATCH 0113/1063] [SCSI] megaraid_sas: pci_module_init to
 pci_register_driver conversion

Signed-off-by: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
Acked-by: "Patro, Sumant" <Sumant.Patro@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/megaraid/megaraid_sas.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index b36307d2e283a..4cab5b534b259 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -2838,7 +2838,7 @@ static int __init megasas_init(void)
 	/*
 	 * Register ourselves as PCI hotplug module
 	 */
-	rval = pci_module_init(&megasas_pci_driver);
+	rval = pci_register_driver(&megasas_pci_driver);
 
 	if (rval) {
 		printk(KERN_DEBUG "megasas: PCI hotplug regisration failed \n");
-- 
GitLab


From d2afb3ae04e36dbc6e9eb2d8bd54406ff7b6b3bd Mon Sep 17 00:00:00 2001
From: Daniel Walker <dwalker@mvista.com>
Date: Mon, 14 Aug 2006 23:09:23 -0700
Subject: [PATCH 0114/1063] [SCSI] BusLogic gcc 4.1 warning fixes

- Reworked all the very long lines in that block (this drivers full of
  them though)

- Returns an error in three places that it didn't before.

- Properly clean up after a scsi_add_host() failure.

Signed-off-by: Daniel Walker <dwalker@mvista.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/BusLogic.c | 46 +++++++++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 16a12a3b7b2bc..59d1adaed73ea 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -2176,6 +2176,7 @@ static int __init BusLogic_init(void)
 {
 	int BusLogicHostAdapterCount = 0, DriverOptionsIndex = 0, ProbeIndex;
 	struct BusLogic_HostAdapter *PrototypeHostAdapter;
+	int ret = 0;
 
 #ifdef MODULE
 	if (BusLogic)
@@ -2282,25 +2283,49 @@ static int __init BusLogic_init(void)
 		   perform Target Device Inquiry.
 		 */
 		if (BusLogic_ReadHostAdapterConfiguration(HostAdapter) &&
-		    BusLogic_ReportHostAdapterConfiguration(HostAdapter) && BusLogic_AcquireResources(HostAdapter) && BusLogic_CreateInitialCCBs(HostAdapter) && BusLogic_InitializeHostAdapter(HostAdapter) && BusLogic_TargetDeviceInquiry(HostAdapter)) {
+		    BusLogic_ReportHostAdapterConfiguration(HostAdapter) &&
+		    BusLogic_AcquireResources(HostAdapter) &&
+		    BusLogic_CreateInitialCCBs(HostAdapter) &&
+		    BusLogic_InitializeHostAdapter(HostAdapter) &&
+		    BusLogic_TargetDeviceInquiry(HostAdapter)) {
 			/*
 			   Initialization has been completed successfully.  Release and
 			   re-register usage of the I/O Address range so that the Model
 			   Name of the Host Adapter will appear, and initialize the SCSI
 			   Host structure.
 			 */
-			release_region(HostAdapter->IO_Address, HostAdapter->AddressCount);
-			if (!request_region(HostAdapter->IO_Address, HostAdapter->AddressCount, HostAdapter->FullModelName)) {
-				printk(KERN_WARNING "BusLogic: Release and re-register of " "port 0x%04lx failed \n", (unsigned long) HostAdapter->IO_Address);
+			release_region(HostAdapter->IO_Address,
+				       HostAdapter->AddressCount);
+			if (!request_region(HostAdapter->IO_Address,
+					    HostAdapter->AddressCount,
+					    HostAdapter->FullModelName)) {
+				printk(KERN_WARNING
+					"BusLogic: Release and re-register of "
+					"port 0x%04lx failed \n",
+					(unsigned long)HostAdapter->IO_Address);
 				BusLogic_DestroyCCBs(HostAdapter);
 				BusLogic_ReleaseResources(HostAdapter);
 				list_del(&HostAdapter->host_list);
 				scsi_host_put(Host);
+				ret = -ENOMEM;
 			} else {
-				BusLogic_InitializeHostStructure(HostAdapter, Host);
-				scsi_add_host(Host, HostAdapter->PCI_Device ? &HostAdapter->PCI_Device->dev : NULL);
-				scsi_scan_host(Host);
-				BusLogicHostAdapterCount++;
+				BusLogic_InitializeHostStructure(HostAdapter,
+								 Host);
+				if (scsi_add_host(Host, HostAdapter->PCI_Device
+						? &HostAdapter->PCI_Device->dev
+						  : NULL)) {
+					printk(KERN_WARNING
+					       "BusLogic: scsi_add_host()"
+					       "failed!\n");
+					BusLogic_DestroyCCBs(HostAdapter);
+					BusLogic_ReleaseResources(HostAdapter);
+					list_del(&HostAdapter->host_list);
+					scsi_host_put(Host);
+					ret = -ENODEV;
+				} else {
+					scsi_scan_host(Host);
+					BusLogicHostAdapterCount++;
+				}
 			}
 		} else {
 			/*
@@ -2315,12 +2340,13 @@ static int __init BusLogic_init(void)
 			BusLogic_ReleaseResources(HostAdapter);
 			list_del(&HostAdapter->host_list);
 			scsi_host_put(Host);
+			ret = -ENODEV;
 		}
 	}
 	kfree(PrototypeHostAdapter);
 	kfree(BusLogic_ProbeInfoList);
 	BusLogic_ProbeInfoList = NULL;
-	return 0;
+	return ret;
 }
 
 
@@ -2954,6 +2980,7 @@ static int BusLogic_QueueCommand(struct scsi_cmnd *Command, void (*CompletionRou
 }
 
 
+#if 0
 /*
   BusLogic_AbortCommand aborts Command if possible.
 */
@@ -3024,6 +3051,7 @@ static int BusLogic_AbortCommand(struct scsi_cmnd *Command)
 	return SUCCESS;
 }
 
+#endif
 /*
   BusLogic_ResetHostAdapter resets Host Adapter if possible, marking all
   currently executing SCSI Commands as having been Reset.
-- 
GitLab


From 11a27ad782fc7ae4b7d6ac8fefad4ceb415300d6 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Wed, 9 Aug 2006 17:00:30 +1000
Subject: [PATCH 0115/1063] [POWERPC] SLB shadow buffer cleanup

Cleanup some of the #define magic as suggested by Milton.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/asm-offsets.c |  5 +++++
 arch/powerpc/kernel/entry_64.S    | 13 ++++---------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 2ef7ea8603799..a2f95e467a75b 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -42,6 +42,7 @@
 #include <asm/lppaca.h>
 #include <asm/cache.h>
 #include <asm/compat.h>
+#include <asm/mmu.h>
 #endif
 
 #define DEFINE(sym, val) \
@@ -137,6 +138,10 @@ int main(void)
 	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
 	DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
 
+	DEFINE(SLBSHADOW_STACKVSID,
+	       offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
+	DEFINE(SLBSHADOW_STACKESID,
+	       offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
 	DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
 	DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
 	DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 5baea498ea641..2cd872b5283b7 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -323,11 +323,6 @@ _GLOBAL(ret_from_fork)
  * The code which creates the new task context is in 'copy_thread'
  * in arch/powerpc/kernel/process.c 
  */
-#define SHADOW_SLB_BOLTED_STACK_ESID \
-		(SLBSHADOW_SAVEAREA + 0x10*(SLB_NUM_BOLTED-1))
-#define SHADOW_SLB_BOLTED_STACK_VSID \
-		(SLBSHADOW_SAVEAREA + 0x10*(SLB_NUM_BOLTED-1) + 8)
-
 	.align	7
 _GLOBAL(_switch)
 	mflr	r0
@@ -383,10 +378,10 @@ BEGIN_FTR_SECTION
 
 	/* Update the last bolted SLB */
 	ld	r9,PACA_SLBSHADOWPTR(r13)
- 	li	r12,0
-  	std	r12,SHADOW_SLB_BOLTED_STACK_ESID(r9) /* Clear ESID */
-	std	r7,SHADOW_SLB_BOLTED_STACK_VSID(r9)  /* Save VSID */
- 	std	r0,SHADOW_SLB_BOLTED_STACK_ESID(r9)  /* Save ESID */
+	li	r12,0
+	std	r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
+	std	r7,SLBSHADOW_STACKVSID(r9)  /* Save VSID */
+	std	r0,SLBSHADOW_STACKESID(r9)  /* Save ESID */
 
 	slbie	r6
 	slbie	r6		/* Workaround POWER5 < DD2.1 issue */
-- 
GitLab


From 9e6ee340194e8bd8f463b55c6d028272c0e64155 Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoffrey.levand@am.sony.com>
Date: Wed, 9 Aug 2006 15:28:13 -0700
Subject: [PATCH 0116/1063] [POWERPC] cell: interrupt.c whitespace clean up

Whitespace clean up for cell/interrupt.c.

Signed-off-by: Geoff Levand <geoffrey.levand@am.sony.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/interrupt.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 7813a58e0db4f..6b57a47c5d37f 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -89,17 +89,17 @@ static struct irq_chip iic_chip = {
 /* Get an IRQ number from the pending state register of the IIC */
 static unsigned int iic_get_irq(struct pt_regs *regs)
 {
-  	struct cbe_iic_pending_bits pending;
- 	struct iic *iic;
-
- 	iic = &__get_cpu_var(iic);
- 	*(unsigned long *) &pending =
- 		in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
- 	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
- 	BUG_ON(iic->eoi_ptr > 15);
+	struct cbe_iic_pending_bits pending;
+	struct iic *iic;
+
+	iic = &__get_cpu_var(iic);
+	*(unsigned long *) &pending =
+		in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
+	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+	BUG_ON(iic->eoi_ptr > 15);
 	if (pending.flags & CBE_IIC_IRQ_VALID)
 		return irq_linear_revmap(iic->host,
- 					 iic_pending_to_hwnum(pending));
+					 iic_pending_to_hwnum(pending));
 	return NO_IRQ;
 }
 
@@ -250,7 +250,7 @@ static int __init setup_iic(void)
 	struct resource r0, r1;
 	struct irq_host *host;
 	int found = 0;
- 	const u32 *np;
+	const u32 *np;
 
 	for (dn = NULL;
 	     (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
@@ -258,7 +258,7 @@ static int __init setup_iic(void)
 				     "IBM,CBEA-Internal-Interrupt-Controller"))
 			continue;
 		np = get_property(dn, "ibm,interrupt-server-ranges", NULL);
- 		if (np == NULL) {
+		if (np == NULL) {
 			printk(KERN_WARNING "IIC: CPU association not found\n");
 			of_node_put(dn);
 			return -ENODEV;
-- 
GitLab


From 2e97425197ecf85641a89e5a4868f8e147cc443f Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Fri, 11 Aug 2006 00:03:01 -0500
Subject: [PATCH 0117/1063] [POWERPC] Rename cpu_setup_power4.S to
 cpu_setup_ppc970.S

Rename cpu_setup_power4.S to cpu_setup_ppc970.S, since that's
really what it is.

No functional or other changes.

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/Makefile                                   | 2 +-
 arch/powerpc/kernel/{cpu_setup_power4.S => cpu_setup_ppc970.S} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename arch/powerpc/kernel/{cpu_setup_power4.S => cpu_setup_ppc970.S} (100%)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 814f242aeb8cc..bcf50031a92ad 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -16,7 +16,7 @@ obj-y				:= semaphore.o cputable.o ptrace.o syscalls.o \
 obj-y				+= vdso32/
 obj-$(CONFIG_PPC64)		+= setup_64.o binfmt_elf32.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
-				   paca.o cpu_setup_power4.o \
+				   paca.o cpu_setup_ppc970.o \
 				   firmware.o sysfs.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o vector.o
diff --git a/arch/powerpc/kernel/cpu_setup_power4.S b/arch/powerpc/kernel/cpu_setup_ppc970.S
similarity index 100%
rename from arch/powerpc/kernel/cpu_setup_power4.S
rename to arch/powerpc/kernel/cpu_setup_ppc970.S
-- 
GitLab


From f39b7a55a84e34e3074b168e30dc73b66e85261d Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Fri, 11 Aug 2006 00:07:08 -0500
Subject: [PATCH 0118/1063] [POWERPC] Cleanup CPU inits

Cleanup CPU inits a bit more, Geoff Levand already did some earlier.

* Move CPU state save to cpu_setup, since cpu_setup is only ever done
  on cpu 0 on 64-bit and save is never done more than once.
* Rename __restore_cpu_setup to __restore_cpu_ppc970 and add
  function pointers to the cputable to use instead. Powermac always
  has 970 so no need to check there.
* Rename __970_cpu_preinit to __cpu_preinit_ppc970 and check PVR before
  calling it instead of in it, it's too early to use cputable.
* Rename pSeries_secondary_smp_init to generic_secondary_smp_init since
  everyone but powermac and iSeries use it.

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/asm-offsets.c      |  1 +
 arch/powerpc/kernel/cpu_setup_ppc970.S | 99 +++++++-------------------
 arch/powerpc/kernel/cputable.c         |  6 ++
 arch/powerpc/kernel/head_64.S          | 52 +++++++++-----
 arch/powerpc/platforms/cell/smp.c      |  4 +-
 arch/powerpc/platforms/pseries/smp.c   |  4 +-
 include/asm-powerpc/cputable.h         |  3 +
 7 files changed, 71 insertions(+), 98 deletions(-)

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index a2f95e467a75b..c53acd2a6dfcd 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -246,6 +246,7 @@ int main(void)
 	DEFINE(CPU_SPEC_PVR_VALUE, offsetof(struct cpu_spec, pvr_value));
 	DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features));
 	DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup));
+	DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore));
 
 #ifndef CONFIG_PPC64
 	DEFINE(pbe_address, offsetof(struct pbe, address));
diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S
index f69af2c5d7b30..f619932794e80 100644
--- a/arch/powerpc/kernel/cpu_setup_ppc970.S
+++ b/arch/powerpc/kernel/cpu_setup_ppc970.S
@@ -16,27 +16,12 @@
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 
-_GLOBAL(__970_cpu_preinit)
-	/*
-	 * Do nothing if not running in HV mode
-	 */
+_GLOBAL(__cpu_preinit_ppc970)
+	/* Do nothing if not running in HV mode */
 	mfmsr	r0
 	rldicl.	r0,r0,4,63
 	beqlr
 
-	/*
-	 * Deal only with PPC970 and PPC970FX.
-	 */
-	mfspr	r0,SPRN_PVR
-	srwi	r0,r0,16
-	cmpwi	r0,0x39
-	beq	1f
-	cmpwi	r0,0x3c
-	beq	1f
-	cmpwi	r0,0x44
-	bnelr
-1:
-
 	/* Make sure HID4:rm_ci is off before MMU is turned off, that large
 	 * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
 	 * HID5:DCBZ32_ill
@@ -72,21 +57,6 @@ _GLOBAL(__970_cpu_preinit)
 	isync
 	blr
 
-_GLOBAL(__setup_cpu_ppc970)
-	mfspr	r0,SPRN_HID0
-	li	r11,5			/* clear DOZE and SLEEP */
-	rldimi	r0,r11,52,8		/* set NAP and DPM */
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	sync
-	isync
-	blr
-
 /* Definitions for the table use to save CPU states */
 #define CS_HID0		0
 #define CS_HID1		8
@@ -101,33 +71,28 @@ cpu_state_storage:
 	.balign	L1_CACHE_BYTES,0
 	.text
 
-/* Called in normal context to backup CPU 0 state. This
- * does not include cache settings. This function is also
- * called for machine sleep. This does not include the MMU
- * setup, BATs, etc... but rather the "special" registers
- * like HID0, HID1, HID4, etc...
- */
-_GLOBAL(__save_cpu_setup)
-	/* Some CR fields are volatile, we back it up all */
-	mfcr	r7
-
-	/* Get storage ptr */
-	LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
 
-	/* We only deal with 970 for now */
-	mfspr	r0,SPRN_PVR
-	srwi	r0,r0,16
-	cmpwi	r0,0x39
-	beq	1f
-	cmpwi	r0,0x3c
-	beq	1f
-	cmpwi	r0,0x44
-	bne	2f
-
-1:	/* skip if not running in HV mode */
+_GLOBAL(__setup_cpu_ppc970)
+	/* Do nothing if not running in HV mode */
 	mfmsr	r0
 	rldicl.	r0,r0,4,63
-	beq	2f
+	beqlr
+
+	mfspr	r0,SPRN_HID0
+	li	r11,5			/* clear DOZE and SLEEP */
+	rldimi	r0,r11,52,8		/* set NAP and DPM */
+	mtspr	SPRN_HID0,r0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	sync
+	isync
+
+	/* Save away cpu state */
+	LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
 
 	/* Save HID0,1,4 and 5 */
 	mfspr	r3,SPRN_HID0
@@ -139,35 +104,19 @@ _GLOBAL(__save_cpu_setup)
 	mfspr	r3,SPRN_HID5
 	std	r3,CS_HID5(r5)
 
-2:
-	mtcr	r7
 	blr
 
 /* Called with no MMU context (typically MSR:IR/DR off) to
  * restore CPU state as backed up by the previous
  * function. This does not include cache setting
  */
-_GLOBAL(__restore_cpu_setup)
-	/* Get storage ptr (FIXME when using anton reloc as we
-	 * are running with translation disabled here
-	 */
-	LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
-
-	/* We only deal with 970 for now */
-	mfspr	r0,SPRN_PVR
-	srwi	r0,r0,16
-	cmpwi	r0,0x39
-	beq	1f
-	cmpwi	r0,0x3c
-	beq	1f
-	cmpwi	r0,0x44
-	bnelr
-
-1:	/* skip if not running in HV mode */
+_GLOBAL(__restore_cpu_ppc970)
+	/* Do nothing if not running in HV mode */
 	mfmsr	r0
 	rldicl.	r0,r0,4,63
 	beqlr
 
+	LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
 	/* Before accessing memory, we make sure rm_ci is clear */
 	li	r0,0
 	mfspr	r3,SPRN_HID4
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 272e43622fd63..306da4cd37a0f 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -39,7 +39,10 @@ extern void __setup_cpu_7400(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
 #endif /* CONFIG_PPC32 */
+#ifdef CONFIG_PPC64
 extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_ppc970(void);
+#endif /* CONFIG_PPC64 */
 
 /* This table only contains "desktop" CPUs, it need to be filled with embedded
  * ones as well...
@@ -184,6 +187,7 @@ struct cpu_spec	cpu_specs[] = {
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
 		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
 		.oprofile_cpu_type	= "ppc64/970",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.platform		= "ppc970",
@@ -199,6 +203,7 @@ struct cpu_spec	cpu_specs[] = {
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
 		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
 		.oprofile_cpu_type	= "ppc64/970",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.platform		= "ppc970",
@@ -214,6 +219,7 @@ struct cpu_spec	cpu_specs[] = {
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
 		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
 		.oprofile_cpu_type	= "ppc64/970",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.platform		= "ppc970",
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 6ff3cf506088b..e9963d9f335af 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -132,7 +132,7 @@ _GLOBAL(__secondary_hold)
 	bne	100b
 
 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
-	LOAD_REG_IMMEDIATE(r4, .pSeries_secondary_smp_init)
+	LOAD_REG_IMMEDIATE(r4, .generic_secondary_smp_init)
 	mtctr	r4
 	mr	r3,r24
 	bctr
@@ -1484,19 +1484,17 @@ fwnmi_data_area:
         . = 0x8000
 
 /*
- * On pSeries, secondary processors spin in the following code.
+ * On pSeries and most other platforms, secondary processors spin
+ * in the following code.
  * At entry, r3 = this processor's number (physical cpu id)
  */
-_GLOBAL(pSeries_secondary_smp_init)
+_GLOBAL(generic_secondary_smp_init)
 	mr	r24,r3
 	
 	/* turn on 64-bit mode */
 	bl	.enable_64b_mode
 	isync
 
-	/* Copy some CPU settings from CPU 0 */
-	bl	.__restore_cpu_setup
-
 	/* Set up a paca value for this processor. Since we have the
 	 * physical cpu id in r24, we need to search the pacas to find
 	 * which logical id maps to our physical one.
@@ -1522,15 +1520,28 @@ _GLOBAL(pSeries_secondary_smp_init)
 					/* start.			 */
 	sync
 
-	/* Create a temp kernel stack for use before relocation is on.	*/
+#ifndef CONFIG_SMP
+	b	3b			/* Never go on non-SMP		 */
+#else
+	cmpwi	0,r23,0
+	beq	3b			/* Loop until told to go	 */
+
+	/* See if we need to call a cpu state restore handler */
+	LOAD_REG_IMMEDIATE(r23, cur_cpu_spec)
+	ld	r23,0(r23)
+	ld	r23,CPU_SPEC_RESTORE(r23)
+	cmpdi	0,r23,0
+	beq	4f
+	ld	r23,0(r23)
+	mtctr	r23
+	bctrl
+
+4:	/* Create a temp kernel stack for use before relocation is on.	*/
 	ld	r1,PACAEMERGSP(r13)
 	subi	r1,r1,STACK_FRAME_OVERHEAD
 
-	cmpwi	0,r23,0
-#ifdef CONFIG_SMP
-	bne	.__secondary_start
+	b	.__secondary_start
 #endif
-	b 	3b			/* Loop until told to go	 */
 
 #ifdef CONFIG_PPC_ISERIES
 _STATIC(__start_initialization_iSeries)
@@ -1611,7 +1622,16 @@ _GLOBAL(__start_initialization_multiplatform)
 	bl	.enable_64b_mode
 
 	/* Setup some critical 970 SPRs before switching MMU off */
-	bl	.__970_cpu_preinit
+	mfspr	r0,SPRN_PVR
+	srwi	r0,r0,16
+	cmpwi	r0,0x39		/* 970 */
+	beq	1f
+	cmpwi	r0,0x3c		/* 970FX */
+	beq	1f
+	cmpwi	r0,0x44		/* 970MP */
+	bne	2f
+1:	bl	.__cpu_preinit_ppc970
+2:
 
 	/* Switch off MMU if not already */
 	LOAD_REG_IMMEDIATE(r4, .__after_prom_start - KERNELBASE)
@@ -1782,7 +1802,7 @@ _GLOBAL(pmac_secondary_start)
 	isync
 
 	/* Copy some CPU settings from CPU 0 */
-	bl	.__restore_cpu_setup
+	bl	.__restore_cpu_ppc970
 
 	/* pSeries do that early though I don't think we really need it */
 	mfmsr	r3
@@ -1932,12 +1952,6 @@ _STATIC(start_here_multiplatform)
 	mr	r5,r26
 	bl	.identify_cpu
 
-	/* Save some low level config HIDs of CPU0 to be copied to
-	 * other CPUs later on, or used for suspend/resume
-	 */
-	bl	.__save_cpu_setup
-	sync
-
 	/* Do very early kernel initializations, including initial hash table,
 	 * stab and slb setup before we turn on relocation.	*/
 
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index 46aef06407426..1c0acbad7425b 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -57,7 +57,7 @@
  */
 static cpumask_t of_spin_map;
 
-extern void pSeries_secondary_smp_init(unsigned long);
+extern void generic_secondary_smp_init(unsigned long);
 
 /**
  * smp_startup_cpu() - start the given cpu
@@ -74,7 +74,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
 {
 	int status;
 	unsigned long start_here = __pa((u32)*((unsigned long *)
-					       pSeries_secondary_smp_init));
+					       generic_secondary_smp_init));
 	unsigned int pcpu;
 	int start_cpu;
 
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index f39dad8b99e0d..c6624b8a0e774 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -62,7 +62,7 @@
  */
 static cpumask_t of_spin_map;
 
-extern void pSeries_secondary_smp_init(unsigned long);
+extern void generic_secondary_smp_init(unsigned long);
 
 #ifdef CONFIG_HOTPLUG_CPU
 
@@ -270,7 +270,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
 {
 	int status;
 	unsigned long start_here = __pa((u32)*((unsigned long *)
-					       pSeries_secondary_smp_init));
+					       generic_secondary_smp_init));
 	unsigned int pcpu;
 	int start_cpu;
 
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 1ba3c9983614d..748bc1805da90 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -36,6 +36,7 @@
 struct cpu_spec;
 
 typedef	void (*cpu_setup_t)(unsigned long offset, struct cpu_spec* spec);
+typedef	void (*cpu_restore_t)(void);
 
 enum powerpc_oprofile_type {
 	PPC_OPROFILE_INVALID = 0,
@@ -65,6 +66,8 @@ struct cpu_spec {
 	 * BHT, SPD, etc... from head.S before branching to identify_machine
 	 */
 	cpu_setup_t	cpu_setup;
+	/* Used to restore cpu setup on secondary processors and at resume */
+	cpu_restore_t	cpu_restore;
 
 	/* Used by oprofile userspace to select the right counters */
 	char		*oprofile_cpu_type;
-- 
GitLab


From 869d7f381e8c32de85ddfa9621125fb10a885f87 Mon Sep 17 00:00:00 2001
From: Jon Loeliger <jdl@freescale.com>
Date: Tue, 15 Aug 2006 16:19:02 -0500
Subject: [PATCH 0119/1063] [POWERPC] Allow MPC8641 HPCN to build with
 CONFIG_PCI disabled too.

Signed-off-by: Jon Loeliger <jdl@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 9 +++++++--
 arch/powerpc/platforms/86xx/pci.c          | 3 ++-
 include/asm-powerpc/mpc86xx.h              | 2 --
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index 4a33d95e7ad75..496cc7c3a54c4 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -52,6 +52,7 @@ unsigned long pci_dram_offset = 0;
 #endif
 
 
+#ifdef CONFIG_PCI
 static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc,
 				 struct pt_regs *regs)
 {
@@ -60,14 +61,18 @@ static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc,
 		generic_handle_irq(cascade_irq, regs);
 	desc->chip->eoi(irq);
 }
+#endif	/* CONFIG_PCI */
 
 void __init
 mpc86xx_hpcn_init_irq(void)
 {
 	struct mpic *mpic1;
-	struct device_node *np, *cascade_node = NULL;
-	int cascade_irq;
+	struct device_node *np;
 	phys_addr_t openpic_paddr;
+#ifdef CONFIG_PCI
+	struct device_node *cascade_node = NULL;
+	int cascade_irq;
+#endif
 
 	np = of_find_node_by_type(NULL, "open-pic");
 	if (np == NULL)
diff --git a/arch/powerpc/platforms/86xx/pci.c b/arch/powerpc/platforms/86xx/pci.c
index d7050c1108ff1..481e18ed5be94 100644
--- a/arch/powerpc/platforms/86xx/pci.c
+++ b/arch/powerpc/platforms/86xx/pci.c
@@ -188,7 +188,8 @@ int __init add_bridge(struct device_node *dev)
 
 	printk(KERN_INFO "Found MPC86xx PCIE host bridge at 0x%08lx. "
 	       "Firmware bus number: %d->%d\n",
-		rsrc.start, hose->first_busno, hose->last_busno);
+	       (unsigned long) rsrc.start,
+	       hose->first_busno, hose->last_busno);
 
 	DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n",
 		hose, hose->cfg_addr, hose->cfg_data);
diff --git a/include/asm-powerpc/mpc86xx.h b/include/asm-powerpc/mpc86xx.h
index f260382739faa..2d6ad859df7f5 100644
--- a/include/asm-powerpc/mpc86xx.h
+++ b/include/asm-powerpc/mpc86xx.h
@@ -23,8 +23,6 @@
 #define _ISA_MEM_BASE   isa_mem_base
 #ifdef CONFIG_PCI
 #define PCI_DRAM_OFFSET pci_dram_offset
-#else
-#define PCI_DRAM_OFFSET 0
 #endif
 
 #define CPU0_BOOT_RELEASE 0x01000000
-- 
GitLab


From 9a2ded55c40ad17b8b12f87c592a40b2e8593c4d Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Wed, 16 Aug 2006 23:12:14 -0500
Subject: [PATCH 0120/1063] [POWERPC] powerpc: Make RTAS console init generic

The rtas console doesn't have to be Cell specific.  If we get both
RTAS tokens, we should just enabled the console then and there.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/Kconfig                | 3 ++-
 arch/powerpc/kernel/rtas.c          | 5 +++++
 arch/powerpc/platforms/cell/setup.c | 4 ----
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 13e583f16ede5..7f782b338e8e5 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -426,7 +426,8 @@ config PPC_IBM_CELL_BLADE
 	select UDBG_RTAS_CONSOLE
 
 config UDBG_RTAS_CONSOLE
-	bool
+	bool "RTAS based debug console"
+	depends on PPC_RTAS
 	default n
 
 config XICS
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index bfd66d3a035c9..6b0699b82b41a 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -910,6 +910,11 @@ int __init early_init_dt_scan_rtas(unsigned long node,
 	basep = of_get_flat_dt_prop(node, "get-term-char", NULL);
 	if (basep)
 		rtas_getchar_token = *basep;
+
+	if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE &&
+	    rtas_getchar_token != RTAS_UNKNOWN_SERVICE)
+		udbg_init_rtas_console();
+
 #endif
 
 	/* break now */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 282987d6d4a21..22c228a49c337 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -150,10 +150,6 @@ static int __init cell_probe(void)
 	    !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
 		return 0;
 
-#ifdef CONFIG_UDBG_RTAS_CONSOLE
-	udbg_init_rtas_console();
-#endif
-
 	hpte_init_native();
 
 	return 1;
-- 
GitLab


From a0a428e30077fd64c39aadf5221cf2c7a14dc281 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 16 Aug 2006 15:24:28 +1000
Subject: [PATCH 0121/1063] [POWERPC] iseries: remove const warning

Just one bit of fallout from the constification of the get_property
return value.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/iseries/viopath.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c
index efeb6ae9df64c..9baa4ee82592e 100644
--- a/arch/powerpc/platforms/iseries/viopath.c
+++ b/arch/powerpc/platforms/iseries/viopath.c
@@ -117,6 +117,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
 	HvLpEvent_Rc hvrc;
 	DECLARE_MUTEX_LOCKED(Semaphore);
 	struct device_node *node;
+	const char *sysid;
 
 	buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL);
 	if (!buf)
@@ -152,15 +153,15 @@ static int proc_viopath_show(struct seq_file *m, void *v)
 	seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
 
 	node = of_find_node_by_path("/");
-	buf = NULL;
+	sysid = NULL;
 	if (node != NULL)
-		buf = get_property(node, "system-id", NULL);
+		sysid = get_property(node, "system-id", NULL);
 
-	if (buf == NULL)
+	if (sysid == NULL)
 		seq_printf(m, "SRLNBR=<UNKNOWN>\n");
 	else
 		/* Skip "IBM," on front of serial number, see dt.c */
-		seq_printf(m, "SRLNBR=%s\n", buf + 4);
+		seq_printf(m, "SRLNBR=%s\n", sysid + 4);
 
 	of_node_put(node);
 
-- 
GitLab


From 6f3d5d3cc4b1447578ae8484166bbc34a64150c5 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Wed, 16 Aug 2006 22:04:14 +1000
Subject: [PATCH 0122/1063] [POWERPC] Add a helper for calculating RTAS
 "config_addr" parameters

Several RTAS calls take a "config_addr" parameter, which is a particular
way of specifying a PCI busno, devfn and register number into a 32-bit word.
Currently these are open-coded, and I'll be adding another soon, replace
them with a helper that encapsulates the logic. Be more strict about masking
the busno too, just in case.

Booted on P5 LPAR.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/rtas_pci.c |  6 ++----
 include/asm-powerpc/rtas.h     | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 5a798ac6aecfb..b4a0de79c0600 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -81,8 +81,7 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
 	if (!config_access_valid(pdn, where))
 		return PCIBIOS_BAD_REGISTER_NUMBER;
 
-	addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
-		(pdn->devfn << 8) | (where & 0xff);
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
 	buid = pdn->phb->buid;
 	if (buid) {
 		ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
@@ -134,8 +133,7 @@ int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
 	if (!config_access_valid(pdn, where))
 		return PCIBIOS_BAD_REGISTER_NUMBER;
 
-	addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
-		(pdn->devfn << 8) | (where & 0xff);
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
 	buid = pdn->phb->buid;
 	if (buid) {
 		ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
diff --git a/include/asm-powerpc/rtas.h b/include/asm-powerpc/rtas.h
index 82a27e9a041ff..d34f9e1f242c0 100644
--- a/include/asm-powerpc/rtas.h
+++ b/include/asm-powerpc/rtas.h
@@ -230,5 +230,21 @@ extern unsigned long rtas_rmo_buf;
 
 #define GLOBAL_INTERRUPT_QUEUE 9005
 
+/**
+ * rtas_config_addr - Format a busno, devfn and reg for RTAS.
+ * @busno: The bus number.
+ * @devfn: The device and function number as encoded by PCI_DEVFN().
+ * @reg: The register number.
+ *
+ * This function encodes the given busno, devfn and register number as
+ * required for RTAS calls that take a "config_addr" parameter.
+ * See PAPR requirement 7.3.4-1 for more info.
+ */
+static inline u32 rtas_config_addr(int busno, int devfn, int reg)
+{
+	return ((reg & 0xf00) << 20) | ((busno & 0xff) << 16) |
+			(devfn << 8) | (reg & 0xff);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _POWERPC_RTAS_H */
-- 
GitLab


From e2bf2e26c0915d54208315fc8c9864f1d987217a Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 17 Aug 2006 16:28:28 +1000
Subject: [PATCH 0123/1063] [POWERPC] iseries: remove some gcc 4.1 warnings

gcc 4.1 produces some warnings that say it is ignoring the packed
attribute on some structure elements, so, since all the elements of
these structs are packed, pack the structs instead.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/iseries/main_store.h | 126 ++++++++++----------
 1 file changed, 63 insertions(+), 63 deletions(-)

diff --git a/arch/powerpc/platforms/iseries/main_store.h b/arch/powerpc/platforms/iseries/main_store.h
index 74f6889f834f1..1a7a3f50e40b0 100644
--- a/arch/powerpc/platforms/iseries/main_store.h
+++ b/arch/powerpc/platforms/iseries/main_store.h
@@ -61,9 +61,9 @@ struct IoHriMainStoreSegment4 {
 };
 
 /* Main Store VPD for Power4 */
-struct IoHriMainStoreChipInfo1 {
-	u32	chipMfgID	__attribute((packed));
-	char	chipECLevel[4]	__attribute((packed));
+struct __attribute((packed)) IoHriMainStoreChipInfo1 {
+	u32	chipMfgID;
+	char	chipECLevel[4];
 };
 
 struct IoHriMainStoreVpdIdData {
@@ -73,72 +73,72 @@ struct IoHriMainStoreVpdIdData {
 	char	serialNumber[12];
 };
 
-struct IoHriMainStoreVpdFruData {
-	char	fruLabel[8]	__attribute((packed));
-	u8	numberOfSlots	__attribute((packed));
-	u8	pluggingType	__attribute((packed));
-	u16	slotMapIndex	__attribute((packed));
+struct	__attribute((packed)) IoHriMainStoreVpdFruData {
+	char	fruLabel[8];
+	u8	numberOfSlots;
+	u8	pluggingType;
+	u16	slotMapIndex;
 };
 
-struct IoHriMainStoreAdrRangeBlock {
-	void	*blockStart      __attribute((packed));
-	void	*blockEnd        __attribute((packed));
-	u32	blockProcChipId __attribute((packed));
+struct  __attribute((packed)) IoHriMainStoreAdrRangeBlock {
+	void	*blockStart;
+	void	*blockEnd;
+	u32	blockProcChipId;
 };
 
 #define MaxAreaAdrRangeBlocks 4
 
-struct IoHriMainStoreArea4 {
-	u32	msVpdFormat			__attribute((packed));
-	u8	containedVpdType		__attribute((packed));
-	u8	reserved1			__attribute((packed));
-	u16	reserved2			__attribute((packed));
-
-	u64	msExists			__attribute((packed));
-	u64	msFunctional			__attribute((packed));
-
-	u32	memorySize			__attribute((packed));
-	u32	procNodeId			__attribute((packed));
-
-	u32	numAdrRangeBlocks		__attribute((packed));
-	struct IoHriMainStoreAdrRangeBlock xAdrRangeBlock[MaxAreaAdrRangeBlocks]	__attribute((packed));
-
-	struct IoHriMainStoreChipInfo1	chipInfo0	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo1	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo2	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo3	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo4	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo5	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo6	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo7	__attribute((packed));
-
-	void	*msRamAreaArray			__attribute((packed));
-	u32	msRamAreaArrayNumEntries	__attribute((packed));
-	u32	msRamAreaArrayEntrySize		__attribute((packed));
-
-	u32	numaDimmExists			__attribute((packed));
-	u32	numaDimmFunctional		__attribute((packed));
-	void	*numaDimmArray			__attribute((packed));
-	u32	numaDimmArrayNumEntries		__attribute((packed));
-	u32	numaDimmArrayEntrySize		__attribute((packed));
-
-	struct IoHriMainStoreVpdIdData idData	__attribute((packed));
-
-	u64	powerData			__attribute((packed));
-	u64	cardAssemblyPartNum		__attribute((packed));
-	u64	chipSerialNum			__attribute((packed));
-
-	u64	reserved3			__attribute((packed));
-	char	reserved4[16]			__attribute((packed));
-
-	struct IoHriMainStoreVpdFruData fruData	__attribute((packed));
-
-	u8	vpdPortNum			__attribute((packed));
-	u8	reserved5			__attribute((packed));
-	u8	frameId				__attribute((packed));
-	u8	rackUnit			__attribute((packed));
-	char	asciiKeywordVpd[256]		__attribute((packed));
-	u32	reserved6			__attribute((packed));
+struct __attribute((packed)) IoHriMainStoreArea4 {
+	u32	msVpdFormat;
+	u8	containedVpdType;
+	u8	reserved1;
+	u16	reserved2;
+
+	u64	msExists;
+	u64	msFunctional;
+
+	u32	memorySize;
+	u32	procNodeId;
+
+	u32	numAdrRangeBlocks;
+	struct IoHriMainStoreAdrRangeBlock xAdrRangeBlock[MaxAreaAdrRangeBlocks];
+
+	struct IoHriMainStoreChipInfo1	chipInfo0;
+	struct IoHriMainStoreChipInfo1	chipInfo1;
+	struct IoHriMainStoreChipInfo1	chipInfo2;
+	struct IoHriMainStoreChipInfo1	chipInfo3;
+	struct IoHriMainStoreChipInfo1	chipInfo4;
+	struct IoHriMainStoreChipInfo1	chipInfo5;
+	struct IoHriMainStoreChipInfo1	chipInfo6;
+	struct IoHriMainStoreChipInfo1	chipInfo7;
+
+	void	*msRamAreaArray;
+	u32	msRamAreaArrayNumEntries;
+	u32	msRamAreaArrayEntrySize;
+
+	u32	numaDimmExists;
+	u32	numaDimmFunctional;
+	void	*numaDimmArray;
+	u32	numaDimmArrayNumEntries;
+	u32	numaDimmArrayEntrySize;
+
+	struct IoHriMainStoreVpdIdData idData;
+
+	u64	powerData;
+	u64	cardAssemblyPartNum;
+	u64	chipSerialNum;
+
+	u64	reserved3;
+	char	reserved4[16];
+
+	struct IoHriMainStoreVpdFruData fruData;
+
+	u8	vpdPortNum;
+	u8	reserved5;
+	u8	frameId;
+	u8	rackUnit;
+	char	asciiKeywordVpd[256];
+	u32	reserved6;
 };
 
 
-- 
GitLab


From 39ed2fe62c39ac46cda00b1759806a297f38743b Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Mon, 21 Aug 2006 18:11:32 +0200
Subject: [PATCH 0124/1063] [POWERPC] reboot when panic_timout is set

Only call into RTAS when booted with panic=0 because the RTAS call
does not return.  The system has to be rebooted via the HMC or via the
management console right now.  This is cumbersome and not what the
default panic=180 is supposed to do.

Signed-off-by: Olaf Hering <olh@suse.de>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/rtas.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 6b0699b82b41a..6ef80d4e38d3c 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -628,6 +628,9 @@ void rtas_os_term(char *str)
 {
 	int status;
 
+	if (panic_timeout)
+		return;
+
 	if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term"))
 		return;
 
-- 
GitLab


From 271c511db9d37d6797745adb1f151a8bd2838c6f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 22 Aug 2006 16:57:05 +0200
Subject: [PATCH 0125/1063] [POWERPC] make checkstack work with ARCH=powerpc

This patch adds 'powerpc' architecture support to checkstack.pl.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 scripts/checkstack.pl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index b34924663ac1f..f7844f6aa487a 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -62,6 +62,8 @@ my (@stack, $re, $x, $xs);
 	} elsif ($arch eq 'ppc64') {
 		#XXX
 		$re = qr/.*stdu.*r1,-($x{1,8})\(r1\)/o;
+	} elsif ($arch eq 'powerpc') {
+		$re = qr/.*st[dw]u.*r1,-($x{1,8})\(r1\)/o;
 	} elsif ($arch =~ /^s390x?$/) {
 		#   11160:       a7 fb ff 60             aghi   %r15,-160
 		$re = qr/.*ag?hi.*\%r15,-(([0-9]{2}|[3-9])[0-9]{2})/o;
-- 
GitLab


From 2818c5dec5e28d65d52afbb7695bbbafe6377ee5 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 25 Aug 2006 15:08:21 +1000
Subject: [PATCH 0126/1063] [POWERPC] Only offer CONFIG_BRIQ_PANEL if
 CONFIG_PPC_CHRP is enabled

since only the briQ has a briQ front panel, and the briQ is a CHRP and
is only supported if CONFIG_PPC_CHRP is set.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/char/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 320ad7ba11d44..52ea94b891f59 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -497,7 +497,7 @@ config LEGACY_PTY_COUNT
 
 config BRIQ_PANEL
 	tristate 'Total Impact briQ front panel driver'
-	depends on PPC
+	depends on PPC_CHRP
 	---help---
 	  The briQ is a small footprint CHRP computer with a frontpanel VFD, a
 	  tristate led and two switches. It is the size of a CDROM drive.
-- 
GitLab


From f4ad7b5807385ad1fed0347d966e51a797cd1013 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Fri, 25 Aug 2006 13:48:18 -0500
Subject: [PATCH 0127/1063] [SCSI] scsi_transport_sas: remove local_attached
 flag

This flag denotes local attachment of the phy.  There are two problems
with it:

1) It's actually redundant ... you can get the same information simply
by seeing whether a host is the phys parent
2) we condition a lot of phy parameters on it on the false assumption
that we can only control local phys.  I'm wiring up phy resets in the
aic94xx now, and it will be able to reset non-local phys as well.

I fixed 2) by moving the local check into the reset and stats function
of the mptsas, since that seems to be the only HBA that can't
(currently) control non-local phys.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/message/fusion/mptsas.c   | 11 ++++++++---
 drivers/scsi/scsi_transport_sas.c | 10 ++--------
 include/scsi/scsi_transport_sas.h |  5 ++---
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index dfdd1e4457682..b752a479f6dbb 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -852,6 +852,10 @@ static int mptsas_get_linkerrors(struct sas_phy *phy)
 	dma_addr_t dma_handle;
 	int error;
 
+	/* FIXME: only have link errors on local phys */
+	if (!scsi_is_sas_phy_local(phy))
+		return -EINVAL;
+
 	hdr.PageVersion = MPI_SASPHY1_PAGEVERSION;
 	hdr.ExtPageLength = 0;
 	hdr.PageNumber = 1 /* page number 1*/;
@@ -924,6 +928,10 @@ static int mptsas_phy_reset(struct sas_phy *phy, int hard_reset)
 	unsigned long timeleft;
 	int error = -ERESTARTSYS;
 
+	/* FIXME: fusion doesn't allow non-local phy reset */
+	if (!scsi_is_sas_phy_local(phy))
+		return -EINVAL;
+
 	/* not implemented for expanders */
 	if (phy->identify.target_port_protocols & SAS_PROTOCOL_SMP)
 		return -ENXIO;
@@ -1570,9 +1578,6 @@ static int mptsas_probe_one_phy(struct device *dev,
 
 	if (!phy_info->phy) {
 
-		if (local)
-			phy->local_attached = 1;
-
 		error = sas_phy_add(phy);
 		if (error) {
 			sas_phy_free(phy);
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 5a625c3fddaed..d518c1207fb43 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -266,9 +266,6 @@ show_sas_phy_##field(struct class_device *cdev, char *buf)		\
 	struct sas_internal *i = to_sas_internal(shost->transportt);	\
 	int error;							\
 									\
-	if (!phy->local_attached)					\
-		return -EINVAL;						\
-									\
 	error = i->f->get_linkerrors ? i->f->get_linkerrors(phy) : 0;	\
 	if (error)							\
 		return error;						\
@@ -299,9 +296,6 @@ static ssize_t do_sas_phy_reset(struct class_device *cdev,
 	struct sas_internal *i = to_sas_internal(shost->transportt);
 	int error;
 
-	if (!phy->local_attached)
-		return -EINVAL;
-
 	error = i->f->phy_reset(phy, hard_reset);
 	if (error)
 		return error;
@@ -849,7 +843,7 @@ show_sas_rphy_enclosure_identifier(struct class_device *cdev, char *buf)
 	 * Only devices behind an expander are supported, because the
 	 * enclosure identifier is a SMP feature.
 	 */
-	if (phy->local_attached)
+	if (scsi_is_sas_phy_local(phy))
 		return -EINVAL;
 
 	error = i->f->get_enclosure_identifier(rphy, &identifier);
@@ -870,7 +864,7 @@ show_sas_rphy_bay_identifier(struct class_device *cdev, char *buf)
 	struct sas_internal *i = to_sas_internal(shost->transportt);
 	int val;
 
-	if (phy->local_attached)
+	if (scsi_is_sas_phy_local(phy))
 		return -EINVAL;
 
 	val = i->f->get_bay_identifier(rphy);
diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h
index 6cc2314098cf7..eeb2200de8555 100644
--- a/include/scsi/scsi_transport_sas.h
+++ b/include/scsi/scsi_transport_sas.h
@@ -57,9 +57,6 @@ struct sas_phy {
 	enum sas_linkrate	maximum_linkrate_hw;
 	enum sas_linkrate	maximum_linkrate;
 
-	/* internal state */
-	unsigned int		local_attached : 1;
-
 	/* link error statistics */
 	u32			invalid_dword_count;
 	u32			running_disparity_error_count;
@@ -196,4 +193,6 @@ scsi_is_sas_expander_device(struct device *dev)
 		rphy->identify.device_type == SAS_EDGE_EXPANDER_DEVICE;
 }
 
+#define scsi_is_sas_phy_local(phy)	scsi_is_host_device((phy)->dev.parent)
+
 #endif /* SCSI_TRANSPORT_SAS_H */
-- 
GitLab


From 2908d778ab3e244900c310974e1fc1c69066e450 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@SteelEye.com>
Date: Tue, 29 Aug 2006 09:22:51 -0500
Subject: [PATCH 0128/1063] [SCSI] aic94xx: new driver

This is the end point of the separate aic94xx driver based on the
original driver and transport class from Luben Tuikov
<ltuikov@yahoo.com>

The log of the separate development is:

Alexis Bruemmer:
  o aic94xx: fix hotplug/unplug for expanderless systems
  o aic94xx: disable split completion timer/setting by default
  o aic94xx: wide port off expander support
  o aic94xx: remove various inline functions
  o aic94xx: use bitops
  o aic94xx: remove queue comment
  o aic94xx: remove sas_common.c
  o aic94xx: sas remove depot's
  o aic94xx: use available list_for_each_entry_safe_reverse()
  o aic94xx: sas header file merge

James Bottomley:
  o aic94xx: fix TF_TMF_NO_CTX processing
  o aic94xx: convert to request_firmware interface
  o aic94xx: fix hotplug/unplug
  o aic94xx: add link error counts to the expander phys
  o aic94xx: add transport class phy reset capability
  o aic94xx: remove local_attached flag
  o Remove README
  o Fixup Makefile variable for libsas rename
  o Rename sas->libsas
  o aic94xx: correct return code for sas_discover_event
  o aic94xx: use parent backlink port
  o aic94xx: remove channel abstraction
  o aic94xx: fix routing algorithms
  o aic94xx: add backlink port
  o aic94xx: fix cascaded expander properties
  o aic94xx: fix sleep under lock
  o aic94xx: fix panic on module removal in complex topology
  o aic94xx: make use of the new sas_port
  o rename sas_port to asd_sas_port
  o Fix for eh_strategy_handler move
  o aic94xx: move entirely over to correct transport class formulation
  o remove last vestages of sas_rphy_alloc()
  o update for eh_timed_out move
  o Preliminary expander support for aic94xx
  o sas: remove event thread
  o minor warning cleanups
  o remove last vestiges of id mapping arrays
  o Further updates
  o Convert aic94xx over entirely to the transport class end device and
  o update aic94xx/sas to use the new sas transport class end device
  o [PATCH] aic94xx: attaching to the sas transport class
  o Add missing completion removal from prior patch
  o [PATCH] aic94xx: attaching to the sas transport class
  o Build fixes from akpm

Jeff Garzik:
  o [scsi aic94xx] Remove ->owner from PCI info table

Luben Tuikov:
  o initial aic94xx driver

Mike Anderson:
  o aic94xx: fix panic on module insertion
  o aic94xx: stub out SATA_DEV case
  o aic94xx: compile warning cleanups
  o aic94xx: sas_alloc_task
  o aic94xx: ref count update
  o aic94xx nexus loss time value
  o [PATCH] aic94xx: driver assertion in non-x86 BIOS env

Randy Dunlap:
  o libsas: externs not needed

Robert Tarte:
  o aic94xx: sequence patch - fixes SATA support

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 Documentation/scsi/libsas.txt          |  484 +++++
 drivers/scsi/Kconfig                   |    5 +-
 drivers/scsi/Makefile                  |    2 +
 drivers/scsi/aic94xx/Kconfig           |   41 +
 drivers/scsi/aic94xx/Makefile          |   39 +
 drivers/scsi/aic94xx/aic94xx.h         |  114 ++
 drivers/scsi/aic94xx/aic94xx_dev.c     |  353 ++++
 drivers/scsi/aic94xx/aic94xx_dump.c    |  959 ++++++++++
 drivers/scsi/aic94xx/aic94xx_dump.h    |   52 +
 drivers/scsi/aic94xx/aic94xx_hwi.c     | 1376 ++++++++++++++
 drivers/scsi/aic94xx/aic94xx_hwi.h     |  397 ++++
 drivers/scsi/aic94xx/aic94xx_init.c    |  860 +++++++++
 drivers/scsi/aic94xx/aic94xx_reg.c     |  332 ++++
 drivers/scsi/aic94xx/aic94xx_reg.h     |  302 +++
 drivers/scsi/aic94xx/aic94xx_reg_def.h | 2398 ++++++++++++++++++++++++
 drivers/scsi/aic94xx/aic94xx_sas.h     |  785 ++++++++
 drivers/scsi/aic94xx/aic94xx_scb.c     |  732 ++++++++
 drivers/scsi/aic94xx/aic94xx_sds.c     | 1136 +++++++++++
 drivers/scsi/aic94xx/aic94xx_seq.c     | 1401 ++++++++++++++
 drivers/scsi/aic94xx/aic94xx_seq.h     |   70 +
 drivers/scsi/aic94xx/aic94xx_task.c    |  642 +++++++
 drivers/scsi/aic94xx/aic94xx_tmf.c     |  636 +++++++
 drivers/scsi/libsas/Kconfig            |   39 +
 drivers/scsi/libsas/Makefile           |   36 +
 drivers/scsi/libsas/sas_discover.c     |  749 ++++++++
 drivers/scsi/libsas/sas_dump.c         |   76 +
 drivers/scsi/libsas/sas_dump.h         |   42 +
 drivers/scsi/libsas/sas_event.c        |   75 +
 drivers/scsi/libsas/sas_expander.c     | 1862 ++++++++++++++++++
 drivers/scsi/libsas/sas_init.c         |  227 +++
 drivers/scsi/libsas/sas_internal.h     |  146 ++
 drivers/scsi/libsas/sas_phy.c          |  157 ++
 drivers/scsi/libsas/sas_port.c         |  279 +++
 drivers/scsi/libsas/sas_scsi_host.c    |  786 ++++++++
 include/scsi/libsas.h                  |  627 +++++++
 include/scsi/sas.h                     |  644 +++++++
 include/scsi/scsi.h                    |    6 +
 37 files changed, 18866 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/scsi/libsas.txt
 create mode 100644 drivers/scsi/aic94xx/Kconfig
 create mode 100644 drivers/scsi/aic94xx/Makefile
 create mode 100644 drivers/scsi/aic94xx/aic94xx.h
 create mode 100644 drivers/scsi/aic94xx/aic94xx_dev.c
 create mode 100644 drivers/scsi/aic94xx/aic94xx_dump.c
 create mode 100644 drivers/scsi/aic94xx/aic94xx_dump.h
 create mode 100644 drivers/scsi/aic94xx/aic94xx_hwi.c
 create mode 100644 drivers/scsi/aic94xx/aic94xx_hwi.h
 create mode 100644 drivers/scsi/aic94xx/aic94xx_init.c
 create mode 100644 drivers/scsi/aic94xx/aic94xx_reg.c
 create mode 100644 drivers/scsi/aic94xx/aic94xx_reg.h
 create mode 100644 drivers/scsi/aic94xx/aic94xx_reg_def.h
 create mode 100644 drivers/scsi/aic94xx/aic94xx_sas.h
 create mode 100644 drivers/scsi/aic94xx/aic94xx_scb.c
 create mode 100644 drivers/scsi/aic94xx/aic94xx_sds.c
 create mode 100644 drivers/scsi/aic94xx/aic94xx_seq.c
 create mode 100644 drivers/scsi/aic94xx/aic94xx_seq.h
 create mode 100644 drivers/scsi/aic94xx/aic94xx_task.c
 create mode 100644 drivers/scsi/aic94xx/aic94xx_tmf.c
 create mode 100644 drivers/scsi/libsas/Kconfig
 create mode 100644 drivers/scsi/libsas/Makefile
 create mode 100644 drivers/scsi/libsas/sas_discover.c
 create mode 100644 drivers/scsi/libsas/sas_dump.c
 create mode 100644 drivers/scsi/libsas/sas_dump.h
 create mode 100644 drivers/scsi/libsas/sas_event.c
 create mode 100644 drivers/scsi/libsas/sas_expander.c
 create mode 100644 drivers/scsi/libsas/sas_init.c
 create mode 100644 drivers/scsi/libsas/sas_internal.h
 create mode 100644 drivers/scsi/libsas/sas_phy.c
 create mode 100644 drivers/scsi/libsas/sas_port.c
 create mode 100644 drivers/scsi/libsas/sas_scsi_host.c
 create mode 100644 include/scsi/libsas.h
 create mode 100644 include/scsi/sas.h

diff --git a/Documentation/scsi/libsas.txt b/Documentation/scsi/libsas.txt
new file mode 100644
index 0000000000000..9e2078b2a615c
--- /dev/null
+++ b/Documentation/scsi/libsas.txt
@@ -0,0 +1,484 @@
+SAS Layer
+---------
+
+The SAS Layer is a management infrastructure which manages
+SAS LLDDs.  It sits between SCSI Core and SAS LLDDs.  The
+layout is as follows: while SCSI Core is concerned with
+SAM/SPC issues, and a SAS LLDD+sequencer is concerned with
+phy/OOB/link management, the SAS layer is concerned with:
+
+      * SAS Phy/Port/HA event management (LLDD generates,
+        SAS Layer processes),
+      * SAS Port management (creation/destruction),
+      * SAS Domain discovery and revalidation,
+      * SAS Domain device management,
+      * SCSI Host registration/unregistration,
+      * Device registration with SCSI Core (SAS) or libata
+        (SATA), and
+      * Expander management and exporting expander control
+        to user space.
+
+A SAS LLDD is a PCI device driver.  It is concerned with
+phy/OOB management, and vendor specific tasks and generates
+events to the SAS layer.
+
+The SAS Layer does most SAS tasks as outlined in the SAS 1.1
+spec.
+
+The sas_ha_struct describes the SAS LLDD to the SAS layer.
+Most of it is used by the SAS Layer but a few fields need to
+be initialized by the LLDDs.
+
+After initializing your hardware, from the probe() function
+you call sas_register_ha(). It will register your LLDD with
+the SCSI subsystem, creating a SCSI host and it will
+register your SAS driver with the sysfs SAS tree it creates.
+It will then return.  Then you enable your phys to actually
+start OOB (at which point your driver will start calling the
+notify_* event callbacks).
+
+Structure descriptions:
+
+struct sas_phy --------------------
+Normally this is statically embedded to your driver's
+phy structure:
+	struct my_phy {
+	       blah;
+	       struct sas_phy sas_phy;
+	       bleh;
+	};
+And then all the phys are an array of my_phy in your HA
+struct (shown below).
+
+Then as you go along and initialize your phys you also
+initialize the sas_phy struct, along with your own
+phy structure.
+
+In general, the phys are managed by the LLDD and the ports
+are managed by the SAS layer.  So the phys are initialized
+and updated by the LLDD and the ports are initialized and
+updated by the SAS layer.
+
+There is a scheme where the LLDD can RW certain fields,
+and the SAS layer can only read such ones, and vice versa.
+The idea is to avoid unnecessary locking.
+
+enabled -- must be set (0/1)
+id -- must be set [0,MAX_PHYS)
+class, proto, type, role, oob_mode, linkrate -- must be set
+oob_mode --  you set this when OOB has finished and then notify
+the SAS Layer.
+
+sas_addr -- this normally points to an array holding the sas
+address of the phy, possibly somewhere in your my_phy
+struct.
+
+attached_sas_addr -- set this when you (LLDD) receive an
+IDENTIFY frame or a FIS frame, _before_ notifying the SAS
+layer.  The idea is that sometimes the LLDD may want to fake
+or provide a different SAS address on that phy/port and this
+allows it to do this.  At best you should copy the sas
+address from the IDENTIFY frame or maybe generate a SAS
+address for SATA directly attached devices.  The Discover
+process may later change this.
+
+frame_rcvd -- this is where you copy the IDENTIFY/FIS frame
+when you get it; you lock, copy, set frame_rcvd_size and
+unlock the lock, and then call the event.  It is a pointer
+since there's no way to know your hw frame size _exactly_,
+so you define the actual array in your phy struct and let
+this pointer point to it.  You copy the frame from your
+DMAable memory to that area holding the lock.
+
+sas_prim -- this is where primitives go when they're
+received.  See sas.h. Grab the lock, set the primitive,
+release the lock, notify.
+
+port -- this points to the sas_port if the phy belongs
+to a port -- the LLDD only reads this. It points to the
+sas_port this phy is part of.  Set by the SAS Layer.
+
+ha -- may be set; the SAS layer sets it anyway.
+
+lldd_phy -- you should set this to point to your phy so you
+can find your way around faster when the SAS layer calls one
+of your callbacks and passes you a phy.  If the sas_phy is
+embedded you can also use container_of -- whatever you
+prefer.
+
+
+struct sas_port --------------------
+The LLDD doesn't set any fields of this struct -- it only
+reads them.  They should be self explanatory.
+
+phy_mask is 32 bit, this should be enough for now, as I
+haven't heard of a HA having more than 8 phys.
+
+lldd_port -- I haven't found use for that -- maybe other
+LLDD who wish to have internal port representation can make
+use of this.
+
+
+struct sas_ha_struct --------------------
+It normally is statically declared in your own LLDD
+structure describing your adapter:
+struct my_sas_ha {
+       blah;
+       struct sas_ha_struct sas_ha;
+       struct my_phy phys[MAX_PHYS];
+       struct sas_port sas_ports[MAX_PHYS]; /* (1) */
+       bleh;
+};
+
+(1) If your LLDD doesn't have its own port representation.
+
+What needs to be initialized (sample function given below).
+
+pcidev
+sas_addr -- since the SAS layer doesn't want to mess with
+	 memory allocation, etc, this points to statically
+	 allocated array somewhere (say in your host adapter
+	 structure) and holds the SAS address of the host
+	 adapter as given by you or the manufacturer, etc.
+sas_port
+sas_phy -- an array of pointers to structures. (see
+	note above on sas_addr).
+	These must be set.  See more notes below.
+num_phys -- the number of phys present in the sas_phy array,
+	 and the number of ports present in the sas_port
+	 array.  There can be a maximum num_phys ports (one per
+	 port) so we drop the num_ports, and only use
+	 num_phys.
+
+The event interface:
+
+	/* LLDD calls these to notify the class of an event. */
+	void (*notify_ha_event)(struct sas_ha_struct *, enum ha_event);
+	void (*notify_port_event)(struct sas_phy *, enum port_event);
+	void (*notify_phy_event)(struct sas_phy *, enum phy_event);
+
+When sas_register_ha() returns, those are set and can be
+called by the LLDD to notify the SAS layer of such events
+the SAS layer.
+
+The port notification:
+
+	/* The class calls these to notify the LLDD of an event. */
+	void (*lldd_port_formed)(struct sas_phy *);
+	void (*lldd_port_deformed)(struct sas_phy *);
+
+If the LLDD wants notification when a port has been formed
+or deformed it sets those to a function satisfying the type.
+
+A SAS LLDD should also implement at least one of the Task
+Management Functions (TMFs) described in SAM:
+
+	/* Task Management Functions. Must be called from process context. */
+	int (*lldd_abort_task)(struct sas_task *);
+	int (*lldd_abort_task_set)(struct domain_device *, u8 *lun);
+	int (*lldd_clear_aca)(struct domain_device *, u8 *lun);
+	int (*lldd_clear_task_set)(struct domain_device *, u8 *lun);
+	int (*lldd_I_T_nexus_reset)(struct domain_device *);
+	int (*lldd_lu_reset)(struct domain_device *, u8 *lun);
+	int (*lldd_query_task)(struct sas_task *);
+
+For more information please read SAM from T10.org.
+
+Port and Adapter management:
+
+	/* Port and Adapter management */
+	int (*lldd_clear_nexus_port)(struct sas_port *);
+	int (*lldd_clear_nexus_ha)(struct sas_ha_struct *);
+
+A SAS LLDD should implement at least one of those.
+
+Phy management:
+
+	/* Phy management */
+	int (*lldd_control_phy)(struct sas_phy *, enum phy_func);
+
+lldd_ha -- set this to point to your HA struct. You can also
+use container_of if you embedded it as shown above.
+
+A sample initialization and registration function
+can look like this (called last thing from probe())
+*but* before you enable the phys to do OOB:
+
+static int register_sas_ha(struct my_sas_ha *my_ha)
+{
+	int i;
+	static struct sas_phy   *sas_phys[MAX_PHYS];
+	static struct sas_port  *sas_ports[MAX_PHYS];
+
+	my_ha->sas_ha.sas_addr = &my_ha->sas_addr[0];
+
+	for (i = 0; i < MAX_PHYS; i++) {
+		sas_phys[i] = &my_ha->phys[i].sas_phy;
+		sas_ports[i] = &my_ha->sas_ports[i];
+	}
+
+	my_ha->sas_ha.sas_phy  = sas_phys;
+	my_ha->sas_ha.sas_port = sas_ports;
+	my_ha->sas_ha.num_phys = MAX_PHYS;
+
+	my_ha->sas_ha.lldd_port_formed = my_port_formed;
+
+	my_ha->sas_ha.lldd_dev_found = my_dev_found;
+	my_ha->sas_ha.lldd_dev_gone = my_dev_gone;
+
+	my_ha->sas_ha.lldd_max_execute_num = lldd_max_execute_num; (1)
+
+	my_ha->sas_ha.lldd_queue_size = ha_can_queue;
+	my_ha->sas_ha.lldd_execute_task = my_execute_task;
+
+	my_ha->sas_ha.lldd_abort_task     = my_abort_task;
+	my_ha->sas_ha.lldd_abort_task_set = my_abort_task_set;
+	my_ha->sas_ha.lldd_clear_aca      = my_clear_aca;
+	my_ha->sas_ha.lldd_clear_task_set = my_clear_task_set;
+	my_ha->sas_ha.lldd_I_T_nexus_reset= NULL; (2)
+	my_ha->sas_ha.lldd_lu_reset       = my_lu_reset;
+	my_ha->sas_ha.lldd_query_task     = my_query_task;
+
+	my_ha->sas_ha.lldd_clear_nexus_port = my_clear_nexus_port;
+	my_ha->sas_ha.lldd_clear_nexus_ha = my_clear_nexus_ha;
+
+	my_ha->sas_ha.lldd_control_phy = my_control_phy;
+
+	return sas_register_ha(&my_ha->sas_ha);
+}
+
+(1) This is normally a LLDD parameter, something of the
+lines of a task collector.  What it tells the SAS Layer is
+whether the SAS layer should run in Direct Mode (default:
+value 0 or 1) or Task Collector Mode (value greater than 1).
+
+In Direct Mode, the SAS Layer calls Execute Task as soon as
+it has a command to send to the SDS, _and_ this is a single
+command, i.e. not linked.
+
+Some hardware (e.g. aic94xx) has the capability to DMA more
+than one task at a time (interrupt) from host memory.  Task
+Collector Mode is an optional feature for HAs which support
+this in their hardware.  (Again, it is completely optional
+even if your hardware supports it.)
+
+In Task Collector Mode, the SAS Layer would do _natural_
+coalescing of tasks and at the appropriate moment it would
+call your driver to DMA more than one task in a single HA
+interrupt. DMBS may want to use this by insmod/modprobe
+setting the lldd_max_execute_num to something greater than
+1.
+
+(2) SAS 1.1 does not define I_T Nexus Reset TMF.
+
+Events
+------
+
+Events are _the only way_ a SAS LLDD notifies the SAS layer
+of anything.  There is no other method or way a LLDD to tell
+the SAS layer of anything happening internally or in the SAS
+domain.
+
+Phy events:
+	PHYE_LOSS_OF_SIGNAL, (C)
+	PHYE_OOB_DONE,
+	PHYE_OOB_ERROR,      (C)
+	PHYE_SPINUP_HOLD.
+
+Port events, passed on a _phy_:
+	PORTE_BYTES_DMAED,      (M)
+	PORTE_BROADCAST_RCVD,   (E)
+	PORTE_LINK_RESET_ERR,   (C)
+	PORTE_TIMER_EVENT,      (C)
+	PORTE_HARD_RESET.
+
+Host Adapter event:
+	HAE_RESET
+
+A SAS LLDD should be able to generate
+	- at least one event from group C (choice),
+	- events marked M (mandatory) are mandatory (only one),
+	- events marked E (expander) if it wants the SAS layer
+	  to handle domain revalidation (only one such).
+	- Unmarked events are optional.
+
+Meaning:
+
+HAE_RESET -- when your HA got internal error and was reset.
+
+PORTE_BYTES_DMAED -- on receiving an IDENTIFY/FIS frame
+PORTE_BROADCAST_RCVD -- on receiving a primitive
+PORTE_LINK_RESET_ERR -- timer expired, loss of signal, loss
+of DWS, etc. (*)
+PORTE_TIMER_EVENT -- DWS reset timeout timer expired (*)
+PORTE_HARD_RESET -- Hard Reset primitive received.
+
+PHYE_LOSS_OF_SIGNAL -- the device is gone (*)
+PHYE_OOB_DONE -- OOB went fine and oob_mode is valid
+PHYE_OOB_ERROR -- Error while doing OOB, the device probably
+got disconnected. (*)
+PHYE_SPINUP_HOLD -- SATA is present, COMWAKE not sent.
+
+(*) should set/clear the appropriate fields in the phy,
+    or alternatively call the inlined sas_phy_disconnected()
+    which is just a helper, from their tasklet.
+
+The Execute Command SCSI RPC:
+
+	int (*lldd_execute_task)(struct sas_task *, int num,
+				 unsigned long gfp_flags);
+
+Used to queue a task to the SAS LLDD.  @task is the tasks to
+be executed.  @num should be the number of tasks being
+queued at this function call (they are linked listed via
+task::list), @gfp_mask should be the gfp_mask defining the
+context of the caller.
+
+This function should implement the Execute Command SCSI RPC,
+or if you're sending a SCSI Task as linked commands, you
+should also use this function.
+
+That is, when lldd_execute_task() is called, the command(s)
+go out on the transport *immediately*.  There is *no*
+queuing of any sort and at any level in a SAS LLDD.
+
+The use of task::list is two-fold, one for linked commands,
+the other discussed below.
+
+It is possible to queue up more than one task at a time, by
+initializing the list element of struct sas_task, and
+passing the number of tasks enlisted in this manner in num.
+
+Returns: -SAS_QUEUE_FULL, -ENOMEM, nothing was queued;
+	 0, the task(s) were queued.
+
+If you want to pass num > 1, then either
+A) you're the only caller of this function and keep track
+   of what you've queued to the LLDD, or
+B) you know what you're doing and have a strategy of
+   retrying.
+
+As opposed to queuing one task at a time (function call),
+batch queuing of tasks, by having num > 1, greatly
+simplifies LLDD code, sequencer code, and _hardware design_,
+and has some performance advantages in certain situations
+(DBMS).
+
+The LLDD advertises if it can take more than one command at
+a time at lldd_execute_task(), by setting the
+lldd_max_execute_num parameter (controlled by "collector"
+module parameter in aic94xx SAS LLDD).
+
+You should leave this to the default 1, unless you know what
+you're doing.
+
+This is a function of the LLDD, to which the SAS layer can
+cater to.
+
+int lldd_queue_size
+	The host adapter's queue size.  This is the maximum
+number of commands the lldd can have pending to domain
+devices on behalf of all upper layers submitting through
+lldd_execute_task().
+
+You really want to set this to something (much) larger than
+1.
+
+This _really_ has absolutely nothing to do with queuing.
+There is no queuing in SAS LLDDs.
+
+struct sas_task {
+	dev -- the device this task is destined to
+	list -- must be initialized (INIT_LIST_HEAD)
+	task_proto -- _one_ of enum sas_proto
+	scatter -- pointer to scatter gather list array
+	num_scatter -- number of elements in scatter
+	total_xfer_len -- total number of bytes expected to be transfered
+	data_dir -- PCI_DMA_...
+	task_done -- callback when the task has finished execution
+};
+
+When an external entity, entity other than the LLDD or the
+SAS Layer, wants to work with a struct domain_device, it
+_must_ call kobject_get() when getting a handle on the
+device and kobject_put() when it is done with the device.
+
+This does two things:
+     A) implements proper kfree() for the device;
+     B) increments/decrements the kref for all players:
+     domain_device
+	all domain_device's ... (if past an expander)
+	    port
+		host adapter
+		     pci device
+			 and up the ladder, etc.
+
+DISCOVERY
+---------
+
+The sysfs tree has the following purposes:
+    a) It shows you the physical layout of the SAS domain at
+       the current time, i.e. how the domain looks in the
+       physical world right now.
+    b) Shows some device parameters _at_discovery_time_.
+
+This is a link to the tree(1) program, very useful in
+viewing the SAS domain:
+ftp://mama.indstate.edu/linux/tree/
+I expect user space applications to actually create a
+graphical interface of this.
+
+That is, the sysfs domain tree doesn't show or keep state if
+you e.g., change the meaning of the READY LED MEANING
+setting, but it does show you the current connection status
+of the domain device.
+
+Keeping internal device state changes is responsibility of
+upper layers (Command set drivers) and user space.
+
+When a device or devices are unplugged from the domain, this
+is reflected in the sysfs tree immediately, and the device(s)
+removed from the system.
+
+The structure domain_device describes any device in the SAS
+domain.  It is completely managed by the SAS layer.  A task
+points to a domain device, this is how the SAS LLDD knows
+where to send the task(s) to.  A SAS LLDD only reads the
+contents of the domain_device structure, but it never creates
+or destroys one.
+
+Expander management from User Space
+-----------------------------------
+
+In each expander directory in sysfs, there is a file called
+"smp_portal".  It is a binary sysfs attribute file, which
+implements an SMP portal (Note: this is *NOT* an SMP port),
+to which user space applications can send SMP requests and
+receive SMP responses.
+
+Functionality is deceptively simple:
+
+1. Build the SMP frame you want to send. The format and layout
+   is described in the SAS spec.  Leave the CRC field equal 0.
+open(2)
+2. Open the expander's SMP portal sysfs file in RW mode.
+write(2)
+3. Write the frame you built in 1.
+read(2)
+4. Read the amount of data you expect to receive for the frame you built.
+   If you receive different amount of data you expected to receive,
+   then there was some kind of error.
+close(2)
+All this process is shown in detail in the function do_smp_func()
+and its callers, in the file "expander_conf.c".
+
+The kernel functionality is implemented in the file
+"sas_expander.c".
+
+The program "expander_conf.c" implements this. It takes one
+argument, the sysfs file name of the SMP portal to the
+expander, and gives expander information, including routing
+tables.
+
+The SMP portal gives you complete control of the expander,
+so please be careful.
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index d61662c1a0ee9..7de5fdfdab677 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -209,7 +209,7 @@ config SCSI_LOGGING
 	  there should be no noticeable performance impact as long as you have
 	  logging turned off.
 
-menu "SCSI Transport Attributes"
+menu "SCSI Transports"
 	depends on SCSI
 
 config SCSI_SPI_ATTRS
@@ -242,6 +242,8 @@ config SCSI_SAS_ATTRS
 	  If you wish to export transport-specific information about
 	  each attached SAS device to sysfs, say Y.
 
+source "drivers/scsi/libsas/Kconfig"
+
 endmenu
 
 menu "SCSI low-level drivers"
@@ -431,6 +433,7 @@ config SCSI_AIC7XXX_OLD
 	  module will be called aic7xxx_old.
 
 source "drivers/scsi/aic7xxx/Kconfig.aic79xx"
+source "drivers/scsi/aic94xx/Kconfig"
 
 # All the I2O code and drivers do not seem to be 64bit safe.
 config SCSI_DPT_I2O
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index b2de9bfdfdcdf..83da70decdd15 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_SCSI_SPI_ATTRS)	+= scsi_transport_spi.o
 obj-$(CONFIG_SCSI_FC_ATTRS) 	+= scsi_transport_fc.o
 obj-$(CONFIG_SCSI_ISCSI_ATTRS)	+= scsi_transport_iscsi.o
 obj-$(CONFIG_SCSI_SAS_ATTRS)	+= scsi_transport_sas.o
+obj-$(CONFIG_SCSI_SAS_LIBSAS)	+= libsas/
 
 obj-$(CONFIG_ISCSI_TCP) 	+= libiscsi.o	iscsi_tcp.o
 obj-$(CONFIG_INFINIBAND_ISER) 	+= libiscsi.o
@@ -68,6 +69,7 @@ obj-$(CONFIG_SCSI_AIC7XXX)	+= aic7xxx/
 obj-$(CONFIG_SCSI_AIC79XX)	+= aic7xxx/
 obj-$(CONFIG_SCSI_AACRAID)	+= aacraid/
 obj-$(CONFIG_SCSI_AIC7XXX_OLD)	+= aic7xxx_old.o
+obj-$(CONFIG_SCSI_AIC94XX)	+= aic94xx/
 obj-$(CONFIG_SCSI_IPS)		+= ips.o
 obj-$(CONFIG_SCSI_FD_MCS)	+= fd_mcs.o
 obj-$(CONFIG_SCSI_FUTURE_DOMAIN)+= fdomain.o
diff --git a/drivers/scsi/aic94xx/Kconfig b/drivers/scsi/aic94xx/Kconfig
new file mode 100644
index 0000000000000..0ed391d8ee84b
--- /dev/null
+++ b/drivers/scsi/aic94xx/Kconfig
@@ -0,0 +1,41 @@
+#
+# Kernel configuration file for aic94xx SAS/SATA driver.
+#
+# Copyright (c) 2005 Adaptec, Inc.  All rights reserved.
+# Copyright (c) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+#
+# This file is licensed under GPLv2.
+#
+# This file is part of the aic94xx driver.
+#
+# The aic94xx driver is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; version 2 of the
+# License.
+#
+# The aic94xx driver is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Aic94xx Driver; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#
+#
+
+config SCSI_AIC94XX
+	tristate "Adaptec AIC94xx SAS/SATA support"
+	depends on PCI
+	select SCSI_SAS_LIBSAS
+	help
+		This driver supports Adaptec's SAS/SATA 3Gb/s 64 bit PCI-X
+		AIC94xx chip based host adapters.
+
+config AIC94XX_DEBUG
+	bool "Compile in debug mode"
+	default y
+	depends on SCSI_AIC94XX
+	help
+		Compiles the aic94xx driver in debug mode.  In debug mode,
+		the driver prints some messages to the console.
diff --git a/drivers/scsi/aic94xx/Makefile b/drivers/scsi/aic94xx/Makefile
new file mode 100644
index 0000000000000..e6b70123940ce
--- /dev/null
+++ b/drivers/scsi/aic94xx/Makefile
@@ -0,0 +1,39 @@
+#
+# Makefile for Adaptec aic94xx SAS/SATA driver.
+#
+# Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+# Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+#
+# This file is licensed under GPLv2.
+#
+# This file is part of the the aic94xx driver.
+#
+# The aic94xx driver is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; version 2 of the
+# License.
+#
+# The aic94xx driver is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the aic94xx driver; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+ifeq ($(CONFIG_AIC94XX_DEBUG),y)
+	EXTRA_CFLAGS += -DASD_DEBUG -DASD_ENTER_EXIT
+endif
+
+obj-$(CONFIG_SCSI_AIC94XX) += aic94xx.o
+aic94xx-y += aic94xx_init.o \
+	     aic94xx_hwi.o  \
+	     aic94xx_reg.o  \
+	     aic94xx_sds.o  \
+	     aic94xx_seq.o  \
+	     aic94xx_dump.o \
+	     aic94xx_scb.o  \
+	     aic94xx_dev.o  \
+	     aic94xx_tmf.o  \
+	     aic94xx_task.o
diff --git a/drivers/scsi/aic94xx/aic94xx.h b/drivers/scsi/aic94xx/aic94xx.h
new file mode 100644
index 0000000000000..cb7caf1c9ce15
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx.h
@@ -0,0 +1,114 @@
+/*
+ * Aic94xx SAS/SATA driver header file.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * $Id: //depot/aic94xx/aic94xx.h#31 $
+ */
+
+#ifndef _AIC94XX_H_
+#define _AIC94XX_H_
+
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <scsi/libsas.h>
+
+#define ASD_DRIVER_NAME		"aic94xx"
+#define ASD_DRIVER_DESCRIPTION	"Adaptec aic94xx SAS/SATA driver"
+
+#define asd_printk(fmt, ...)	printk(KERN_NOTICE ASD_DRIVER_NAME ": " fmt, ## __VA_ARGS__)
+
+#ifdef ASD_ENTER_EXIT
+#define ENTER  printk(KERN_NOTICE "%s: ENTER %s\n", ASD_DRIVER_NAME, \
+		__FUNCTION__)
+#define EXIT   printk(KERN_NOTICE "%s: --EXIT %s\n", ASD_DRIVER_NAME, \
+		__FUNCTION__)
+#else
+#define ENTER
+#define EXIT
+#endif
+
+#ifdef ASD_DEBUG
+#define ASD_DPRINTK asd_printk
+#else
+#define ASD_DPRINTK(fmt, ...)
+#endif
+
+/* 2*ITNL timeout + 1 second */
+#define AIC94XX_SCB_TIMEOUT  (5*HZ)
+
+extern kmem_cache_t *asd_dma_token_cache;
+extern kmem_cache_t *asd_ascb_cache;
+extern char sas_addr_str[2*SAS_ADDR_SIZE + 1];
+
+static inline void asd_stringify_sas_addr(char *p, const u8 *sas_addr)
+{
+	int i;
+	for (i = 0; i < SAS_ADDR_SIZE; i++, p += 2)
+		snprintf(p, 3, "%02X", sas_addr[i]);
+	*p = '\0';
+}
+
+static inline void asd_destringify_sas_addr(u8 *sas_addr, const char *p)
+{
+	int i;
+	for (i = 0; i < SAS_ADDR_SIZE; i++) {
+		u8 h, l;
+		if (!*p)
+			break;
+		h = isdigit(*p) ? *p-'0' : *p-'A'+10;
+		p++;
+		l = isdigit(*p) ? *p-'0' : *p-'A'+10;
+		p++;
+		sas_addr[i] = (h<<4) | l;
+	}
+}
+
+struct asd_ha_struct;
+struct asd_ascb;
+
+int  asd_read_ocm(struct asd_ha_struct *asd_ha);
+int  asd_read_flash(struct asd_ha_struct *asd_ha);
+
+int  asd_dev_found(struct domain_device *dev);
+void asd_dev_gone(struct domain_device *dev);
+
+void asd_invalidate_edb(struct asd_ascb *ascb, int edb_id);
+
+int  asd_execute_task(struct sas_task *, int num, unsigned long gfp_flags);
+
+/* ---------- TMFs ---------- */
+int  asd_abort_task(struct sas_task *);
+int  asd_abort_task_set(struct domain_device *, u8 *lun);
+int  asd_clear_aca(struct domain_device *, u8 *lun);
+int  asd_clear_task_set(struct domain_device *, u8 *lun);
+int  asd_lu_reset(struct domain_device *, u8 *lun);
+int  asd_query_task(struct sas_task *);
+
+/* ---------- Adapter and Port management ---------- */
+int  asd_clear_nexus_port(struct asd_sas_port *port);
+int  asd_clear_nexus_ha(struct sas_ha_struct *sas_ha);
+
+/* ---------- Phy Management ---------- */
+int  asd_control_phy(struct asd_sas_phy *phy, enum phy_func func);
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_dev.c b/drivers/scsi/aic94xx/aic94xx_dev.c
new file mode 100644
index 0000000000000..6f8901b748f73
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_dev.c
@@ -0,0 +1,353 @@
+/*
+ * Aic94xx SAS/SATA DDB management
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * $Id: //depot/aic94xx/aic94xx_dev.c#21 $
+ */
+
+#include "aic94xx.h"
+#include "aic94xx_hwi.h"
+#include "aic94xx_reg.h"
+#include "aic94xx_sas.h"
+
+#define FIND_FREE_DDB(_ha) find_first_zero_bit((_ha)->hw_prof.ddb_bitmap, \
+					       (_ha)->hw_prof.max_ddbs)
+#define SET_DDB(_ddb, _ha) set_bit(_ddb, (_ha)->hw_prof.ddb_bitmap)
+#define CLEAR_DDB(_ddb, _ha) clear_bit(_ddb, (_ha)->hw_prof.ddb_bitmap)
+
+static inline int asd_get_ddb(struct asd_ha_struct *asd_ha)
+{
+	unsigned long flags;
+	int ddb, i;
+
+	spin_lock_irqsave(&asd_ha->hw_prof.ddb_lock, flags);
+	ddb = FIND_FREE_DDB(asd_ha);
+	if (ddb >= asd_ha->hw_prof.max_ddbs) {
+		ddb = -ENOMEM;
+		spin_unlock_irqrestore(&asd_ha->hw_prof.ddb_lock, flags);
+		goto out;
+	}
+	SET_DDB(ddb, asd_ha);
+	spin_unlock_irqrestore(&asd_ha->hw_prof.ddb_lock, flags);
+
+	for (i = 0; i < sizeof(struct asd_ddb_ssp_smp_target_port); i+= 4)
+		asd_ddbsite_write_dword(asd_ha, ddb, i, 0);
+out:
+	return ddb;
+}
+
+#define INIT_CONN_TAG   offsetof(struct asd_ddb_ssp_smp_target_port, init_conn_tag)
+#define DEST_SAS_ADDR   offsetof(struct asd_ddb_ssp_smp_target_port, dest_sas_addr)
+#define SEND_QUEUE_HEAD offsetof(struct asd_ddb_ssp_smp_target_port, send_queue_head)
+#define DDB_TYPE        offsetof(struct asd_ddb_ssp_smp_target_port, ddb_type)
+#define CONN_MASK       offsetof(struct asd_ddb_ssp_smp_target_port, conn_mask)
+#define DDB_TARG_FLAGS  offsetof(struct asd_ddb_ssp_smp_target_port, flags)
+#define DDB_TARG_FLAGS2 offsetof(struct asd_ddb_stp_sata_target_port, flags2)
+#define EXEC_QUEUE_TAIL offsetof(struct asd_ddb_ssp_smp_target_port, exec_queue_tail)
+#define SEND_QUEUE_TAIL offsetof(struct asd_ddb_ssp_smp_target_port, send_queue_tail)
+#define SISTER_DDB      offsetof(struct asd_ddb_ssp_smp_target_port, sister_ddb)
+#define MAX_CCONN       offsetof(struct asd_ddb_ssp_smp_target_port, max_concurrent_conn)
+#define NUM_CTX         offsetof(struct asd_ddb_ssp_smp_target_port, num_contexts)
+#define ATA_CMD_SCBPTR  offsetof(struct asd_ddb_stp_sata_target_port, ata_cmd_scbptr)
+#define SATA_TAG_ALLOC_MASK offsetof(struct asd_ddb_stp_sata_target_port, sata_tag_alloc_mask)
+#define NUM_SATA_TAGS   offsetof(struct asd_ddb_stp_sata_target_port, num_sata_tags)
+#define SATA_STATUS     offsetof(struct asd_ddb_stp_sata_target_port, sata_status)
+#define NCQ_DATA_SCB_PTR offsetof(struct asd_ddb_stp_sata_target_port, ncq_data_scb_ptr)
+#define ITNL_TIMEOUT    offsetof(struct asd_ddb_ssp_smp_target_port, itnl_timeout)
+
+static inline void asd_free_ddb(struct asd_ha_struct *asd_ha, int ddb)
+{
+	unsigned long flags;
+
+	if (!ddb || ddb >= 0xFFFF)
+		return;
+	asd_ddbsite_write_byte(asd_ha, ddb, DDB_TYPE, DDB_TYPE_UNUSED);
+	spin_lock_irqsave(&asd_ha->hw_prof.ddb_lock, flags);
+	CLEAR_DDB(ddb, asd_ha);
+	spin_unlock_irqrestore(&asd_ha->hw_prof.ddb_lock, flags);
+}
+
+static inline void asd_set_ddb_type(struct domain_device *dev)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	int ddb = (int) (unsigned long) dev->lldd_dev;
+
+	if (dev->dev_type == SATA_PM_PORT)
+		asd_ddbsite_write_byte(asd_ha,ddb, DDB_TYPE, DDB_TYPE_PM_PORT);
+	else if (dev->tproto)
+		asd_ddbsite_write_byte(asd_ha,ddb, DDB_TYPE, DDB_TYPE_TARGET);
+	else
+		asd_ddbsite_write_byte(asd_ha,ddb,DDB_TYPE,DDB_TYPE_INITIATOR);
+}
+
+static int asd_init_sata_tag_ddb(struct domain_device *dev)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	int ddb, i;
+
+	ddb = asd_get_ddb(asd_ha);
+	if (ddb < 0)
+		return ddb;
+
+	for (i = 0; i < sizeof(struct asd_ddb_sata_tag); i += 2)
+		asd_ddbsite_write_word(asd_ha, ddb, i, 0xFFFF);
+
+	asd_ddbsite_write_word(asd_ha, (int) (unsigned long) dev->lldd_dev,
+			       SISTER_DDB, ddb);
+	return 0;
+}
+
+static inline int asd_init_sata(struct domain_device *dev)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	int ddb = (int) (unsigned long) dev->lldd_dev;
+	u32 qdepth = 0;
+	int res = 0;
+
+	asd_ddbsite_write_word(asd_ha, ddb, ATA_CMD_SCBPTR, 0xFFFF);
+	if ((dev->dev_type == SATA_DEV || dev->dev_type == SATA_PM_PORT) &&
+	    dev->sata_dev.identify_device &&
+	    dev->sata_dev.identify_device[10] != 0) {
+		u16 w75 = le16_to_cpu(dev->sata_dev.identify_device[75]);
+		u16 w76 = le16_to_cpu(dev->sata_dev.identify_device[76]);
+
+		if (w76 & 0x100) /* NCQ? */
+			qdepth = (w75 & 0x1F) + 1;
+		asd_ddbsite_write_dword(asd_ha, ddb, SATA_TAG_ALLOC_MASK,
+					(1<<qdepth)-1);
+		asd_ddbsite_write_byte(asd_ha, ddb, NUM_SATA_TAGS, qdepth);
+	}
+	if (dev->dev_type == SATA_DEV || dev->dev_type == SATA_PM ||
+	    dev->dev_type == SATA_PM_PORT) {
+		struct dev_to_host_fis *fis = (struct dev_to_host_fis *)
+			dev->frame_rcvd;
+		asd_ddbsite_write_byte(asd_ha, ddb, SATA_STATUS, fis->status);
+	}
+	asd_ddbsite_write_word(asd_ha, ddb, NCQ_DATA_SCB_PTR, 0xFFFF);
+	if (qdepth > 0)
+		res = asd_init_sata_tag_ddb(dev);
+	return res;
+}
+
+static int asd_init_target_ddb(struct domain_device *dev)
+{
+	int ddb, i;
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	u8 flags = 0;
+
+	ddb = asd_get_ddb(asd_ha);
+	if (ddb < 0)
+		return ddb;
+
+	dev->lldd_dev = (void *) (unsigned long) ddb;
+
+	asd_ddbsite_write_byte(asd_ha, ddb, 0, DDB_TP_CONN_TYPE);
+	asd_ddbsite_write_byte(asd_ha, ddb, 1, 0);
+	asd_ddbsite_write_word(asd_ha, ddb, INIT_CONN_TAG, 0xFFFF);
+	for (i = 0; i < SAS_ADDR_SIZE; i++)
+		asd_ddbsite_write_byte(asd_ha, ddb, DEST_SAS_ADDR+i,
+				       dev->sas_addr[i]);
+	asd_ddbsite_write_word(asd_ha, ddb, SEND_QUEUE_HEAD, 0xFFFF);
+	asd_set_ddb_type(dev);
+	asd_ddbsite_write_byte(asd_ha, ddb, CONN_MASK, dev->port->phy_mask);
+	if (dev->port->oob_mode != SATA_OOB_MODE) {
+		flags |= OPEN_REQUIRED;
+		if ((dev->dev_type == SATA_DEV) ||
+		    (dev->tproto & SAS_PROTO_STP)) {
+			struct smp_resp *rps_resp = &dev->sata_dev.rps_resp;
+			if (rps_resp->frame_type == SMP_RESPONSE &&
+			    rps_resp->function == SMP_REPORT_PHY_SATA &&
+			    rps_resp->result == SMP_RESP_FUNC_ACC) {
+				if (rps_resp->rps.affil_valid)
+					flags |= STP_AFFIL_POL;
+				if (rps_resp->rps.affil_supp)
+					flags |= SUPPORTS_AFFIL;
+			}
+		} else {
+			flags |= CONCURRENT_CONN_SUPP;
+			if (!dev->parent &&
+			    (dev->dev_type == EDGE_DEV ||
+			     dev->dev_type == FANOUT_DEV))
+				asd_ddbsite_write_byte(asd_ha, ddb, MAX_CCONN,
+						       4);
+			else
+				asd_ddbsite_write_byte(asd_ha, ddb, MAX_CCONN,
+						       dev->pathways);
+			asd_ddbsite_write_byte(asd_ha, ddb, NUM_CTX, 1);
+		}
+	}
+	if (dev->dev_type == SATA_PM)
+		flags |= SATA_MULTIPORT;
+	asd_ddbsite_write_byte(asd_ha, ddb, DDB_TARG_FLAGS, flags);
+
+	flags = 0;
+	if (dev->tproto & SAS_PROTO_STP)
+		flags |= STP_CL_POL_NO_TX;
+	asd_ddbsite_write_byte(asd_ha, ddb, DDB_TARG_FLAGS2, flags);
+
+	asd_ddbsite_write_word(asd_ha, ddb, EXEC_QUEUE_TAIL, 0xFFFF);
+	asd_ddbsite_write_word(asd_ha, ddb, SEND_QUEUE_TAIL, 0xFFFF);
+	asd_ddbsite_write_word(asd_ha, ddb, SISTER_DDB, 0xFFFF);
+
+	if (dev->dev_type == SATA_DEV || (dev->tproto & SAS_PROTO_STP)) {
+		i = asd_init_sata(dev);
+		if (i < 0) {
+			asd_free_ddb(asd_ha, ddb);
+			return i;
+		}
+	}
+
+	if (dev->dev_type == SAS_END_DEV) {
+		struct sas_end_device *rdev = rphy_to_end_device(dev->rphy);
+		if (rdev->I_T_nexus_loss_timeout > 0)
+			asd_ddbsite_write_word(asd_ha, ddb, ITNL_TIMEOUT,
+					       min(rdev->I_T_nexus_loss_timeout,
+						   (u16)ITNL_TIMEOUT_CONST));
+		else
+			asd_ddbsite_write_word(asd_ha, ddb, ITNL_TIMEOUT,
+					       (u16)ITNL_TIMEOUT_CONST);
+	}
+	return 0;
+}
+
+static int asd_init_sata_pm_table_ddb(struct domain_device *dev)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	int ddb, i;
+
+	ddb = asd_get_ddb(asd_ha);
+	if (ddb < 0)
+		return ddb;
+
+	for (i = 0; i < 32; i += 2)
+		asd_ddbsite_write_word(asd_ha, ddb, i, 0xFFFF);
+
+	asd_ddbsite_write_word(asd_ha, (int) (unsigned long) dev->lldd_dev,
+			       SISTER_DDB, ddb);
+
+	return 0;
+}
+
+#define PM_PORT_FLAGS offsetof(struct asd_ddb_sata_pm_port, pm_port_flags)
+#define PARENT_DDB    offsetof(struct asd_ddb_sata_pm_port, parent_ddb)
+
+/**
+ * asd_init_sata_pm_port_ddb -- SATA Port Multiplier Port
+ * dev: pointer to domain device
+ *
+ * For SATA Port Multiplier Ports we need to allocate one SATA Port
+ * Multiplier Port DDB and depending on whether the target on it
+ * supports SATA II NCQ, one SATA Tag DDB.
+ */
+static int asd_init_sata_pm_port_ddb(struct domain_device *dev)
+{
+	int ddb, i, parent_ddb, pmtable_ddb;
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	u8  flags;
+
+	ddb = asd_get_ddb(asd_ha);
+	if (ddb < 0)
+		return ddb;
+
+	asd_set_ddb_type(dev);
+	flags = (dev->sata_dev.port_no << 4) | PM_PORT_SET;
+	asd_ddbsite_write_byte(asd_ha, ddb, PM_PORT_FLAGS, flags);
+	asd_ddbsite_write_word(asd_ha, ddb, SISTER_DDB, 0xFFFF);
+	asd_ddbsite_write_word(asd_ha, ddb, ATA_CMD_SCBPTR, 0xFFFF);
+	asd_init_sata(dev);
+
+	parent_ddb = (int) (unsigned long) dev->parent->lldd_dev;
+	asd_ddbsite_write_word(asd_ha, ddb, PARENT_DDB, parent_ddb);
+	pmtable_ddb = asd_ddbsite_read_word(asd_ha, parent_ddb, SISTER_DDB);
+	asd_ddbsite_write_word(asd_ha, pmtable_ddb, dev->sata_dev.port_no,ddb);
+
+	if (asd_ddbsite_read_byte(asd_ha, ddb, NUM_SATA_TAGS) > 0) {
+		i = asd_init_sata_tag_ddb(dev);
+		if (i < 0) {
+			asd_free_ddb(asd_ha, ddb);
+			return i;
+		}
+	}
+	return 0;
+}
+
+static int asd_init_initiator_ddb(struct domain_device *dev)
+{
+	return -ENODEV;
+}
+
+/**
+ * asd_init_sata_pm_ddb -- SATA Port Multiplier
+ * dev: pointer to domain device
+ *
+ * For STP and direct-attached SATA Port Multipliers we need
+ * one target port DDB entry and one SATA PM table DDB entry.
+ */
+static int asd_init_sata_pm_ddb(struct domain_device *dev)
+{
+	int res = 0;
+
+	res = asd_init_target_ddb(dev);
+	if (res)
+		goto out;
+	res = asd_init_sata_pm_table_ddb(dev);
+	if (res)
+		asd_free_ddb(dev->port->ha->lldd_ha,
+			     (int) (unsigned long) dev->lldd_dev);
+out:
+	return res;
+}
+
+int asd_dev_found(struct domain_device *dev)
+{
+	int res = 0;
+
+	switch (dev->dev_type) {
+	case SATA_PM:
+		res = asd_init_sata_pm_ddb(dev);
+		break;
+	case SATA_PM_PORT:
+		res = asd_init_sata_pm_port_ddb(dev);
+		break;
+	default:
+		if (dev->tproto)
+			res = asd_init_target_ddb(dev);
+		else
+			res = asd_init_initiator_ddb(dev);
+	}
+	return res;
+}
+
+void asd_dev_gone(struct domain_device *dev)
+{
+	int ddb, sister_ddb;
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+
+	ddb = (int) (unsigned long) dev->lldd_dev;
+	sister_ddb = asd_ddbsite_read_word(asd_ha, ddb, SISTER_DDB);
+
+	if (sister_ddb != 0xFFFF)
+		asd_free_ddb(asd_ha, sister_ddb);
+	asd_free_ddb(asd_ha, ddb);
+	dev->lldd_dev = NULL;
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_dump.c b/drivers/scsi/aic94xx/aic94xx_dump.c
new file mode 100644
index 0000000000000..e6ade5996d959
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_dump.c
@@ -0,0 +1,959 @@
+/*
+ * Aic94xx SAS/SATA driver dump interface.
+ *
+ * Copyright (C) 2004 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2004 David Chaw <david_chaw@adaptec.com>
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * 2005/07/14/LT  Complete overhaul of this file.  Update pages, register
+ * locations, names, etc.  Make use of macros.  Print more information.
+ * Print all cseq and lseq mip and mdp.
+ *
+ */
+
+#include "linux/pci.h"
+#include "aic94xx.h"
+#include "aic94xx_reg.h"
+#include "aic94xx_reg_def.h"
+#include "aic94xx_sas.h"
+
+#include "aic94xx_dump.h"
+
+#ifdef ASD_DEBUG
+
+#define MD(x)	    (1 << (x))
+#define MODE_COMMON (1 << 31)
+#define MODE_0_7    (0xFF)
+
+static const struct lseq_cio_regs {
+	char	*name;
+	u32	offs;
+	u8	width;
+	u32	mode;
+} LSEQmCIOREGS[] = {
+	{"LmMnSCBPTR",    0x20, 16, MD(0)|MD(1)|MD(2)|MD(3)|MD(4) },
+	{"LmMnDDBPTR",    0x22, 16, MD(0)|MD(1)|MD(2)|MD(3)|MD(4) },
+	{"LmREQMBX",      0x30, 32, MODE_COMMON },
+	{"LmRSPMBX",      0x34, 32, MODE_COMMON },
+	{"LmMnINT",       0x38, 32, MODE_0_7 },
+	{"LmMnINTEN",     0x3C, 32, MODE_0_7 },
+	{"LmXMTPRIMD",    0x40, 32, MODE_COMMON },
+	{"LmXMTPRIMCS",   0x44,  8, MODE_COMMON },
+	{"LmCONSTAT",     0x45,  8, MODE_COMMON },
+	{"LmMnDMAERRS",   0x46,  8, MD(0)|MD(1) },
+	{"LmMnSGDMAERRS", 0x47,  8, MD(0)|MD(1) },
+	{"LmMnEXPHDRP",   0x48,  8, MD(0) },
+	{"LmMnSASAALIGN", 0x48,  8, MD(1) },
+	{"LmMnMSKHDRP",   0x49,  8, MD(0) },
+	{"LmMnSTPALIGN",  0x49,  8, MD(1) },
+	{"LmMnRCVHDRP",   0x4A,  8, MD(0) },
+	{"LmMnXMTHDRP",   0x4A,  8, MD(1) },
+	{"LmALIGNMODE",   0x4B,  8, MD(1) },
+	{"LmMnEXPRCVCNT", 0x4C, 32, MD(0) },
+	{"LmMnXMTCNT",    0x4C, 32, MD(1) },
+	{"LmMnCURRTAG",   0x54, 16, MD(0) },
+	{"LmMnPREVTAG",   0x56, 16, MD(0) },
+	{"LmMnACKOFS",    0x58,  8, MD(1) },
+	{"LmMnXFRLVL",    0x59,  8, MD(0)|MD(1) },
+	{"LmMnSGDMACTL",  0x5A,  8, MD(0)|MD(1) },
+	{"LmMnSGDMASTAT", 0x5B,  8, MD(0)|MD(1) },
+	{"LmMnDDMACTL",   0x5C,  8, MD(0)|MD(1) },
+	{"LmMnDDMASTAT",  0x5D,  8, MD(0)|MD(1) },
+	{"LmMnDDMAMODE",  0x5E, 16, MD(0)|MD(1) },
+	{"LmMnPIPECTL",   0x61,  8, MD(0)|MD(1) },
+	{"LmMnACTSCB",    0x62, 16, MD(0)|MD(1) },
+	{"LmMnSGBHADR",   0x64,  8, MD(0)|MD(1) },
+	{"LmMnSGBADR",    0x65,  8, MD(0)|MD(1) },
+	{"LmMnSGDCNT",    0x66,  8, MD(0)|MD(1) },
+	{"LmMnSGDMADR",   0x68, 32, MD(0)|MD(1) },
+	{"LmMnSGDMADR",   0x6C, 32, MD(0)|MD(1) },
+	{"LmMnXFRCNT",    0x70, 32, MD(0)|MD(1) },
+	{"LmMnXMTCRC",    0x74, 32, MD(1) },
+	{"LmCURRTAG",     0x74, 16, MD(0) },
+	{"LmPREVTAG",     0x76, 16, MD(0) },
+	{"LmMnDPSEL",     0x7B,  8, MD(0)|MD(1) },
+	{"LmDPTHSTAT",    0x7C,  8, MODE_COMMON },
+	{"LmMnHOLDLVL",   0x7D,  8, MD(0) },
+	{"LmMnSATAFS",    0x7E,  8, MD(1) },
+	{"LmMnCMPLTSTAT", 0x7F,  8, MD(0)|MD(1) },
+	{"LmPRMSTAT0",    0x80, 32, MODE_COMMON },
+	{"LmPRMSTAT1",    0x84, 32, MODE_COMMON },
+	{"LmGPRMINT",     0x88,  8, MODE_COMMON },
+        {"LmMnCURRSCB",   0x8A, 16, MD(0) },
+	{"LmPRMICODE",    0x8C, 32, MODE_COMMON },
+	{"LmMnRCVCNT",    0x90, 16, MD(0) },
+	{"LmMnBUFSTAT",   0x92, 16, MD(0) },
+	{"LmMnXMTHDRSIZE",0x92,  8, MD(1) },
+	{"LmMnXMTSIZE",   0x93,  8, MD(1) },
+	{"LmMnTGTXFRCNT", 0x94, 32, MD(0) },
+	{"LmMnEXPROFS",   0x98, 32, MD(0) },
+	{"LmMnXMTROFS",   0x98, 32, MD(1) },
+	{"LmMnRCVROFS",   0x9C, 32, MD(0) },
+	{"LmCONCTL",      0xA0, 16, MODE_COMMON },
+	{"LmBITLTIMER",   0xA2, 16, MODE_COMMON },
+	{"LmWWNLOW",      0xA8, 32, MODE_COMMON },
+	{"LmWWNHIGH",     0xAC, 32, MODE_COMMON },
+	{"LmMnFRMERR",    0xB0, 32, MD(0) },
+	{"LmMnFRMERREN",  0xB4, 32, MD(0) },
+	{"LmAWTIMER",     0xB8, 16, MODE_COMMON },
+	{"LmAWTCTL",      0xBA,  8, MODE_COMMON },
+	{"LmMnHDRCMPS",   0xC0, 32, MD(0) },
+	{"LmMnXMTSTAT",   0xC4,  8, MD(1) },
+	{"LmHWTSTATEN",   0xC5,  8, MODE_COMMON },
+	{"LmMnRRDYRC",    0xC6,  8, MD(0) },
+        {"LmMnRRDYTC",    0xC6,  8, MD(1) },
+	{"LmHWTSTAT",     0xC7,  8, MODE_COMMON },
+	{"LmMnDATABUFADR",0xC8, 16, MD(0)|MD(1) },
+	{"LmDWSSTATUS",   0xCB,  8, MODE_COMMON },
+	{"LmMnACTSTAT",   0xCE, 16, MD(0)|MD(1) },
+	{"LmMnREQSCB",    0xD2, 16, MD(0)|MD(1) },
+	{"LmXXXPRIM",     0xD4, 32, MODE_COMMON },
+	{"LmRCVASTAT",    0xD9,  8, MODE_COMMON },
+	{"LmINTDIS1",     0xDA,  8, MODE_COMMON },
+	{"LmPSTORESEL",   0xDB,  8, MODE_COMMON },
+	{"LmPSTORE",      0xDC, 32, MODE_COMMON },
+	{"LmPRIMSTAT0EN", 0xE0, 32, MODE_COMMON },
+	{"LmPRIMSTAT1EN", 0xE4, 32, MODE_COMMON },
+	{"LmDONETCTL",    0xF2, 16, MODE_COMMON },
+	{NULL, 0, 0, 0 }
+};
+/*
+static struct lseq_cio_regs LSEQmOOBREGS[] = {
+   {"OOB_BFLTR"        ,0x100, 8, MD(5)},
+   {"OOB_INIT_MIN"     ,0x102,16, MD(5)},
+   {"OOB_INIT_MAX"     ,0x104,16, MD(5)},
+   {"OOB_INIT_NEG"     ,0x106,16, MD(5)},
+   {"OOB_SAS_MIN"      ,0x108,16, MD(5)},
+   {"OOB_SAS_MAX"      ,0x10A,16, MD(5)},
+   {"OOB_SAS_NEG"      ,0x10C,16, MD(5)},
+   {"OOB_WAKE_MIN"     ,0x10E,16, MD(5)},
+   {"OOB_WAKE_MAX"     ,0x110,16, MD(5)},
+   {"OOB_WAKE_NEG"     ,0x112,16, MD(5)},
+   {"OOB_IDLE_MAX"     ,0x114,16, MD(5)},
+   {"OOB_BURST_MAX"    ,0x116,16, MD(5)},
+   {"OOB_XMIT_BURST"   ,0x118, 8, MD(5)},
+   {"OOB_SEND_PAIRS"   ,0x119, 8, MD(5)},
+   {"OOB_INIT_IDLE"    ,0x11A, 8, MD(5)},
+   {"OOB_INIT_NEGO"    ,0x11C, 8, MD(5)},
+   {"OOB_SAS_IDLE"     ,0x11E, 8, MD(5)},
+   {"OOB_SAS_NEGO"     ,0x120, 8, MD(5)},
+   {"OOB_WAKE_IDLE"    ,0x122, 8, MD(5)},
+   {"OOB_WAKE_NEGO"    ,0x124, 8, MD(5)},
+   {"OOB_DATA_KBITS"   ,0x126, 8, MD(5)},
+   {"OOB_BURST_DATA"   ,0x128,32, MD(5)},
+   {"OOB_ALIGN_0_DATA" ,0x12C,32, MD(5)},
+   {"OOB_ALIGN_1_DATA" ,0x130,32, MD(5)},
+   {"OOB_SYNC_DATA"    ,0x134,32, MD(5)},
+   {"OOB_D10_2_DATA"   ,0x138,32, MD(5)},
+   {"OOB_PHY_RST_CNT"  ,0x13C,32, MD(5)},
+   {"OOB_SIG_GEN"      ,0x140, 8, MD(5)},
+   {"OOB_XMIT"         ,0x141, 8, MD(5)},
+   {"FUNCTION_MAKS"    ,0x142, 8, MD(5)},
+   {"OOB_MODE"         ,0x143, 8, MD(5)},
+   {"CURRENT_STATUS"   ,0x144, 8, MD(5)},
+   {"SPEED_MASK"       ,0x145, 8, MD(5)},
+   {"PRIM_COUNT"       ,0x146, 8, MD(5)},
+   {"OOB_SIGNALS"      ,0x148, 8, MD(5)},
+   {"OOB_DATA_DET"     ,0x149, 8, MD(5)},
+   {"OOB_TIME_OUT"     ,0x14C, 8, MD(5)},
+   {"OOB_TIMER_ENABLE" ,0x14D, 8, MD(5)},
+   {"OOB_STATUS"       ,0x14E, 8, MD(5)},
+   {"HOT_PLUG_DELAY"   ,0x150, 8, MD(5)},
+   {"RCD_DELAY"        ,0x151, 8, MD(5)},
+   {"COMSAS_TIMER"     ,0x152, 8, MD(5)},
+   {"SNTT_DELAY"       ,0x153, 8, MD(5)},
+   {"SPD_CHNG_DELAY"   ,0x154, 8, MD(5)},
+   {"SNLT_DELAY"       ,0x155, 8, MD(5)},
+   {"SNWT_DELAY"       ,0x156, 8, MD(5)},
+   {"ALIGN_DELAY"      ,0x157, 8, MD(5)},
+   {"INT_ENABLE_0"     ,0x158, 8, MD(5)},
+   {"INT_ENABLE_1"     ,0x159, 8, MD(5)},
+   {"INT_ENABLE_2"     ,0x15A, 8, MD(5)},
+   {"INT_ENABLE_3"     ,0x15B, 8, MD(5)},
+   {"OOB_TEST_REG"     ,0x15C, 8, MD(5)},
+   {"PHY_CONTROL_0"    ,0x160, 8, MD(5)},
+   {"PHY_CONTROL_1"    ,0x161, 8, MD(5)},
+   {"PHY_CONTROL_2"    ,0x162, 8, MD(5)},
+   {"PHY_CONTROL_3"    ,0x163, 8, MD(5)},
+   {"PHY_OOB_CAL_TX"   ,0x164, 8, MD(5)},
+   {"PHY_OOB_CAL_RX"   ,0x165, 8, MD(5)},
+   {"OOB_PHY_CAL_TX"   ,0x166, 8, MD(5)},
+   {"OOB_PHY_CAL_RX"   ,0x167, 8, MD(5)},
+   {"PHY_CONTROL_4"    ,0x168, 8, MD(5)},
+   {"PHY_TEST"         ,0x169, 8, MD(5)},
+   {"PHY_PWR_CTL"      ,0x16A, 8, MD(5)},
+   {"PHY_PWR_DELAY"    ,0x16B, 8, MD(5)},
+   {"OOB_SM_CON"       ,0x16C, 8, MD(5)},
+   {"ADDR_TRAP_1"      ,0x16D, 8, MD(5)},
+   {"ADDR_NEXT_1"      ,0x16E, 8, MD(5)},
+   {"NEXT_ST_1"        ,0x16F, 8, MD(5)},
+   {"OOB_SM_STATE"     ,0x170, 8, MD(5)},
+   {"ADDR_TRAP_2"      ,0x171, 8, MD(5)},
+   {"ADDR_NEXT_2"      ,0x172, 8, MD(5)},
+   {"NEXT_ST_2"        ,0x173, 8, MD(5)},
+   {NULL, 0, 0, 0 }
+};
+*/
+#define STR_8BIT   "   %30s[0x%04x]:0x%02x\n"
+#define STR_16BIT  "   %30s[0x%04x]:0x%04x\n"
+#define STR_32BIT  "   %30s[0x%04x]:0x%08x\n"
+#define STR_64BIT  "   %30s[0x%04x]:0x%llx\n"
+
+#define PRINT_REG_8bit(_ha, _n, _r) asd_printk(STR_8BIT, #_n, _n,      \
+					     asd_read_reg_byte(_ha, _r))
+#define PRINT_REG_16bit(_ha, _n, _r) asd_printk(STR_16BIT, #_n, _n,     \
+					      asd_read_reg_word(_ha, _r))
+#define PRINT_REG_32bit(_ha, _n, _r) asd_printk(STR_32BIT, #_n, _n,      \
+					      asd_read_reg_dword(_ha, _r))
+
+#define PRINT_CREG_8bit(_ha, _n) asd_printk(STR_8BIT, #_n, _n,      \
+					     asd_read_reg_byte(_ha, C##_n))
+#define PRINT_CREG_16bit(_ha, _n) asd_printk(STR_16BIT, #_n, _n,     \
+					      asd_read_reg_word(_ha, C##_n))
+#define PRINT_CREG_32bit(_ha, _n) asd_printk(STR_32BIT, #_n, _n,      \
+					      asd_read_reg_dword(_ha, C##_n))
+
+#define MSTR_8BIT   "   Mode:%02d %30s[0x%04x]:0x%02x\n"
+#define MSTR_16BIT  "   Mode:%02d %30s[0x%04x]:0x%04x\n"
+#define MSTR_32BIT  "   Mode:%02d %30s[0x%04x]:0x%08x\n"
+
+#define PRINT_MREG_8bit(_ha, _m, _n, _r) asd_printk(MSTR_8BIT, _m, #_n, _n,   \
+					     asd_read_reg_byte(_ha, _r))
+#define PRINT_MREG_16bit(_ha, _m, _n, _r) asd_printk(MSTR_16BIT, _m, #_n, _n, \
+					      asd_read_reg_word(_ha, _r))
+#define PRINT_MREG_32bit(_ha, _m, _n, _r) asd_printk(MSTR_32BIT, _m, #_n, _n, \
+					      asd_read_reg_dword(_ha, _r))
+
+/* can also be used for MD when the register is mode aware already */
+#define PRINT_MIS_byte(_ha, _n) asd_printk(STR_8BIT, #_n,CSEQ_##_n-CMAPPEDSCR,\
+                                           asd_read_reg_byte(_ha, CSEQ_##_n))
+#define PRINT_MIS_word(_ha, _n) asd_printk(STR_16BIT,#_n,CSEQ_##_n-CMAPPEDSCR,\
+                                           asd_read_reg_word(_ha, CSEQ_##_n))
+#define PRINT_MIS_dword(_ha, _n)                      \
+        asd_printk(STR_32BIT,#_n,CSEQ_##_n-CMAPPEDSCR,\
+                   asd_read_reg_dword(_ha, CSEQ_##_n))
+#define PRINT_MIS_qword(_ha, _n)                                       \
+        asd_printk(STR_64BIT, #_n,CSEQ_##_n-CMAPPEDSCR,                \
+                   (unsigned long long)(((u64)asd_read_reg_dword(_ha, CSEQ_##_n))     \
+                 | (((u64)asd_read_reg_dword(_ha, (CSEQ_##_n)+4))<<32)))
+
+#define CMDP_REG(_n, _m) (_m*(CSEQ_PAGE_SIZE*2)+CSEQ_##_n)
+#define PRINT_CMDP_word(_ha, _n) \
+asd_printk("%20s 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n", \
+	#_n, \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 0)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 1)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 2)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 3)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 4)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 5)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 6)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 7)))
+
+#define PRINT_CMDP_byte(_ha, _n) \
+asd_printk("%20s 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n", \
+	#_n, \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 0)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 1)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 2)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 3)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 4)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 5)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 6)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 7)))
+
+static void asd_dump_cseq_state(struct asd_ha_struct *asd_ha)
+{
+	int mode;
+
+	asd_printk("CSEQ STATE\n");
+
+	asd_printk("ARP2 REGISTERS\n");
+
+	PRINT_CREG_32bit(asd_ha, ARP2CTL);
+	PRINT_CREG_32bit(asd_ha, ARP2INT);
+	PRINT_CREG_32bit(asd_ha, ARP2INTEN);
+	PRINT_CREG_8bit(asd_ha, MODEPTR);
+	PRINT_CREG_8bit(asd_ha, ALTMODE);
+	PRINT_CREG_8bit(asd_ha, FLAG);
+	PRINT_CREG_8bit(asd_ha, ARP2INTCTL);
+	PRINT_CREG_16bit(asd_ha, STACK);
+	PRINT_CREG_16bit(asd_ha, PRGMCNT);
+	PRINT_CREG_16bit(asd_ha, ACCUM);
+	PRINT_CREG_16bit(asd_ha, SINDEX);
+	PRINT_CREG_16bit(asd_ha, DINDEX);
+	PRINT_CREG_8bit(asd_ha, SINDIR);
+	PRINT_CREG_8bit(asd_ha, DINDIR);
+	PRINT_CREG_8bit(asd_ha, JUMLDIR);
+	PRINT_CREG_8bit(asd_ha, ARP2HALTCODE);
+	PRINT_CREG_16bit(asd_ha, CURRADDR);
+	PRINT_CREG_16bit(asd_ha, LASTADDR);
+	PRINT_CREG_16bit(asd_ha, NXTLADDR);
+
+	asd_printk("IOP REGISTERS\n");
+
+	PRINT_REG_32bit(asd_ha, BISTCTL1, CBISTCTL);
+	PRINT_CREG_32bit(asd_ha, MAPPEDSCR);
+
+	asd_printk("CIO REGISTERS\n");
+
+	for (mode = 0; mode < 9; mode++)
+		PRINT_MREG_16bit(asd_ha, mode, MnSCBPTR, CMnSCBPTR(mode));
+	PRINT_MREG_16bit(asd_ha, 15, MnSCBPTR, CMnSCBPTR(15));
+
+	for (mode = 0; mode < 9; mode++)
+		PRINT_MREG_16bit(asd_ha, mode, MnDDBPTR, CMnDDBPTR(mode));
+	PRINT_MREG_16bit(asd_ha, 15, MnDDBPTR, CMnDDBPTR(15));
+
+	for (mode = 0; mode < 8; mode++)
+		PRINT_MREG_32bit(asd_ha, mode, MnREQMBX, CMnREQMBX(mode));
+	for (mode = 0; mode < 8; mode++)
+		PRINT_MREG_32bit(asd_ha, mode, MnRSPMBX, CMnRSPMBX(mode));
+	for (mode = 0; mode < 8; mode++)
+		PRINT_MREG_32bit(asd_ha, mode, MnINT, CMnINT(mode));
+	for (mode = 0; mode < 8; mode++)
+		PRINT_MREG_32bit(asd_ha, mode, MnINTEN, CMnINTEN(mode));
+
+	PRINT_CREG_8bit(asd_ha, SCRATCHPAGE);
+	for (mode = 0; mode < 8; mode++)
+		PRINT_MREG_8bit(asd_ha, mode, MnSCRATCHPAGE,
+				CMnSCRATCHPAGE(mode));
+
+	PRINT_REG_32bit(asd_ha, CLINKCON, CLINKCON);
+	PRINT_REG_8bit(asd_ha, CCONMSK, CCONMSK);
+	PRINT_REG_8bit(asd_ha, CCONEXIST, CCONEXIST);
+	PRINT_REG_16bit(asd_ha, CCONMODE, CCONMODE);
+	PRINT_REG_32bit(asd_ha, CTIMERCALC, CTIMERCALC);
+	PRINT_REG_8bit(asd_ha, CINTDIS, CINTDIS);
+
+	asd_printk("SCRATCH MEMORY\n");
+
+	asd_printk("MIP 4 >>>>>\n");
+	PRINT_MIS_word(asd_ha, Q_EXE_HEAD);
+	PRINT_MIS_word(asd_ha, Q_EXE_TAIL);
+	PRINT_MIS_word(asd_ha, Q_DONE_HEAD);
+	PRINT_MIS_word(asd_ha, Q_DONE_TAIL);
+	PRINT_MIS_word(asd_ha, Q_SEND_HEAD);
+	PRINT_MIS_word(asd_ha, Q_SEND_TAIL);
+	PRINT_MIS_word(asd_ha, Q_DMA2CHIM_HEAD);
+	PRINT_MIS_word(asd_ha, Q_DMA2CHIM_TAIL);
+	PRINT_MIS_word(asd_ha, Q_COPY_HEAD);
+	PRINT_MIS_word(asd_ha, Q_COPY_TAIL);
+	PRINT_MIS_word(asd_ha, REG0);
+	PRINT_MIS_word(asd_ha, REG1);
+	PRINT_MIS_dword(asd_ha, REG2);
+	PRINT_MIS_byte(asd_ha, LINK_CTL_Q_MAP);
+	PRINT_MIS_byte(asd_ha, MAX_CSEQ_MODE);
+	PRINT_MIS_byte(asd_ha, FREE_LIST_HACK_COUNT);
+
+	asd_printk("MIP 5 >>>>\n");
+	PRINT_MIS_qword(asd_ha, EST_NEXUS_REQ_QUEUE);
+	PRINT_MIS_qword(asd_ha, EST_NEXUS_REQ_COUNT);
+	PRINT_MIS_word(asd_ha, Q_EST_NEXUS_HEAD);
+	PRINT_MIS_word(asd_ha, Q_EST_NEXUS_TAIL);
+	PRINT_MIS_word(asd_ha, NEED_EST_NEXUS_SCB);
+	PRINT_MIS_byte(asd_ha, EST_NEXUS_REQ_HEAD);
+	PRINT_MIS_byte(asd_ha, EST_NEXUS_REQ_TAIL);
+	PRINT_MIS_byte(asd_ha, EST_NEXUS_SCB_OFFSET);
+
+	asd_printk("MIP 6 >>>>\n");
+	PRINT_MIS_word(asd_ha, INT_ROUT_RET_ADDR0);
+	PRINT_MIS_word(asd_ha, INT_ROUT_RET_ADDR1);
+	PRINT_MIS_word(asd_ha, INT_ROUT_SCBPTR);
+	PRINT_MIS_byte(asd_ha, INT_ROUT_MODE);
+	PRINT_MIS_byte(asd_ha, ISR_SCRATCH_FLAGS);
+	PRINT_MIS_word(asd_ha, ISR_SAVE_SINDEX);
+	PRINT_MIS_word(asd_ha, ISR_SAVE_DINDEX);
+	PRINT_MIS_word(asd_ha, Q_MONIRTT_HEAD);
+	PRINT_MIS_word(asd_ha, Q_MONIRTT_TAIL);
+	PRINT_MIS_byte(asd_ha, FREE_SCB_MASK);
+	PRINT_MIS_word(asd_ha, BUILTIN_FREE_SCB_HEAD);
+	PRINT_MIS_word(asd_ha, BUILTIN_FREE_SCB_TAIL);
+	PRINT_MIS_word(asd_ha, EXTENDED_FREE_SCB_HEAD);
+	PRINT_MIS_word(asd_ha, EXTENDED_FREE_SCB_TAIL);
+
+	asd_printk("MIP 7 >>>>\n");
+	PRINT_MIS_qword(asd_ha, EMPTY_REQ_QUEUE);
+	PRINT_MIS_qword(asd_ha, EMPTY_REQ_COUNT);
+	PRINT_MIS_word(asd_ha, Q_EMPTY_HEAD);
+	PRINT_MIS_word(asd_ha, Q_EMPTY_TAIL);
+	PRINT_MIS_word(asd_ha, NEED_EMPTY_SCB);
+	PRINT_MIS_byte(asd_ha, EMPTY_REQ_HEAD);
+	PRINT_MIS_byte(asd_ha, EMPTY_REQ_TAIL);
+	PRINT_MIS_byte(asd_ha, EMPTY_SCB_OFFSET);
+	PRINT_MIS_word(asd_ha, PRIMITIVE_DATA);
+	PRINT_MIS_dword(asd_ha, TIMEOUT_CONST);
+
+	asd_printk("MDP 0 >>>>\n");
+	asd_printk("%-20s %6s %6s %6s %6s %6s %6s %6s %6s\n",
+		   "Mode: ", "0", "1", "2", "3", "4", "5", "6", "7");
+	PRINT_CMDP_word(asd_ha, LRM_SAVE_SINDEX);
+	PRINT_CMDP_word(asd_ha, LRM_SAVE_SCBPTR);
+	PRINT_CMDP_word(asd_ha, Q_LINK_HEAD);
+	PRINT_CMDP_word(asd_ha, Q_LINK_TAIL);
+	PRINT_CMDP_byte(asd_ha, LRM_SAVE_SCRPAGE);
+
+	asd_printk("MDP 0 Mode 8 >>>>\n");
+	PRINT_MIS_word(asd_ha, RET_ADDR);
+	PRINT_MIS_word(asd_ha, RET_SCBPTR);
+	PRINT_MIS_word(asd_ha, SAVE_SCBPTR);
+	PRINT_MIS_word(asd_ha, EMPTY_TRANS_CTX);
+	PRINT_MIS_word(asd_ha, RESP_LEN);
+	PRINT_MIS_word(asd_ha, TMF_SCBPTR);
+	PRINT_MIS_word(asd_ha, GLOBAL_PREV_SCB);
+	PRINT_MIS_word(asd_ha, GLOBAL_HEAD);
+	PRINT_MIS_word(asd_ha, CLEAR_LU_HEAD);
+	PRINT_MIS_byte(asd_ha, TMF_OPCODE);
+	PRINT_MIS_byte(asd_ha, SCRATCH_FLAGS);
+	PRINT_MIS_word(asd_ha, HSB_SITE);
+	PRINT_MIS_word(asd_ha, FIRST_INV_SCB_SITE);
+	PRINT_MIS_word(asd_ha, FIRST_INV_DDB_SITE);
+
+	asd_printk("MDP 1 Mode 8 >>>>\n");
+	PRINT_MIS_qword(asd_ha, LUN_TO_CLEAR);
+	PRINT_MIS_qword(asd_ha, LUN_TO_CHECK);
+
+	asd_printk("MDP 2 Mode 8 >>>>\n");
+	PRINT_MIS_qword(asd_ha, HQ_NEW_POINTER);
+	PRINT_MIS_qword(asd_ha, HQ_DONE_BASE);
+	PRINT_MIS_dword(asd_ha, HQ_DONE_POINTER);
+	PRINT_MIS_byte(asd_ha, HQ_DONE_PASS);
+}
+
+#define PRINT_LREG_8bit(_h, _lseq, _n) \
+        asd_printk(STR_8BIT, #_n, _n, asd_read_reg_byte(_h, Lm##_n(_lseq)))
+#define PRINT_LREG_16bit(_h, _lseq, _n) \
+        asd_printk(STR_16BIT, #_n, _n, asd_read_reg_word(_h, Lm##_n(_lseq)))
+#define PRINT_LREG_32bit(_h, _lseq, _n) \
+        asd_printk(STR_32BIT, #_n, _n, asd_read_reg_dword(_h, Lm##_n(_lseq)))
+
+#define PRINT_LMIP_byte(_h, _lseq, _n)                              \
+	asd_printk(STR_8BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \
+		   asd_read_reg_byte(_h, LmSEQ_##_n(_lseq)))
+#define PRINT_LMIP_word(_h, _lseq, _n)                              \
+	asd_printk(STR_16BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \
+		   asd_read_reg_word(_h, LmSEQ_##_n(_lseq)))
+#define PRINT_LMIP_dword(_h, _lseq, _n)                             \
+	asd_printk(STR_32BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \
+		   asd_read_reg_dword(_h, LmSEQ_##_n(_lseq)))
+#define PRINT_LMIP_qword(_h, _lseq, _n)                                \
+	asd_printk(STR_64BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \
+		 (unsigned long long)(((unsigned long long) \
+		 asd_read_reg_dword(_h, LmSEQ_##_n(_lseq))) \
+	          | (((unsigned long long) \
+		 asd_read_reg_dword(_h, LmSEQ_##_n(_lseq)+4))<<32)))
+
+static void asd_print_lseq_cio_reg(struct asd_ha_struct *asd_ha,
+				   u32 lseq_cio_addr, int i)
+{
+	switch (LSEQmCIOREGS[i].width) {
+	case 8:
+		asd_printk("%20s[0x%x]: 0x%02x\n", LSEQmCIOREGS[i].name,
+			   LSEQmCIOREGS[i].offs,
+			   asd_read_reg_byte(asd_ha, lseq_cio_addr +
+					     LSEQmCIOREGS[i].offs));
+
+		break;
+	case 16:
+		asd_printk("%20s[0x%x]: 0x%04x\n", LSEQmCIOREGS[i].name,
+			   LSEQmCIOREGS[i].offs,
+			   asd_read_reg_word(asd_ha, lseq_cio_addr +
+					     LSEQmCIOREGS[i].offs));
+
+		break;
+	case 32:
+		asd_printk("%20s[0x%x]: 0x%08x\n", LSEQmCIOREGS[i].name,
+			   LSEQmCIOREGS[i].offs,
+			   asd_read_reg_dword(asd_ha, lseq_cio_addr +
+					      LSEQmCIOREGS[i].offs));
+		break;
+	}
+}
+
+static void asd_dump_lseq_state(struct asd_ha_struct *asd_ha, int lseq)
+{
+	u32 moffs;
+	int mode;
+
+	asd_printk("LSEQ %d STATE\n", lseq);
+
+	asd_printk("LSEQ%d: ARP2 REGISTERS\n", lseq);
+	PRINT_LREG_32bit(asd_ha, lseq, ARP2CTL);
+	PRINT_LREG_32bit(asd_ha, lseq, ARP2INT);
+	PRINT_LREG_32bit(asd_ha, lseq, ARP2INTEN);
+	PRINT_LREG_8bit(asd_ha, lseq, MODEPTR);
+	PRINT_LREG_8bit(asd_ha, lseq, ALTMODE);
+	PRINT_LREG_8bit(asd_ha, lseq, FLAG);
+	PRINT_LREG_8bit(asd_ha, lseq, ARP2INTCTL);
+	PRINT_LREG_16bit(asd_ha, lseq, STACK);
+	PRINT_LREG_16bit(asd_ha, lseq, PRGMCNT);
+	PRINT_LREG_16bit(asd_ha, lseq, ACCUM);
+	PRINT_LREG_16bit(asd_ha, lseq, SINDEX);
+	PRINT_LREG_16bit(asd_ha, lseq, DINDEX);
+	PRINT_LREG_8bit(asd_ha, lseq, SINDIR);
+	PRINT_LREG_8bit(asd_ha, lseq, DINDIR);
+	PRINT_LREG_8bit(asd_ha, lseq, JUMLDIR);
+	PRINT_LREG_8bit(asd_ha, lseq, ARP2HALTCODE);
+	PRINT_LREG_16bit(asd_ha, lseq, CURRADDR);
+	PRINT_LREG_16bit(asd_ha, lseq, LASTADDR);
+	PRINT_LREG_16bit(asd_ha, lseq, NXTLADDR);
+
+	asd_printk("LSEQ%d: IOP REGISTERS\n", lseq);
+
+	PRINT_LREG_32bit(asd_ha, lseq, MODECTL);
+	PRINT_LREG_32bit(asd_ha, lseq, DBGMODE);
+	PRINT_LREG_32bit(asd_ha, lseq, CONTROL);
+	PRINT_REG_32bit(asd_ha, BISTCTL0, LmBISTCTL0(lseq));
+	PRINT_REG_32bit(asd_ha, BISTCTL1, LmBISTCTL1(lseq));
+
+	asd_printk("LSEQ%d: CIO REGISTERS\n", lseq);
+	asd_printk("Mode common:\n");
+
+	for (mode = 0; mode < 8; mode++) {
+		u32 lseq_cio_addr = LmSEQ_PHY_BASE(mode, lseq);
+		int i;
+
+		for (i = 0; LSEQmCIOREGS[i].name; i++)
+			if (LSEQmCIOREGS[i].mode == MODE_COMMON)
+				asd_print_lseq_cio_reg(asd_ha,lseq_cio_addr,i);
+	}
+
+	asd_printk("Mode unique:\n");
+	for (mode = 0; mode < 8; mode++) {
+		u32 lseq_cio_addr = LmSEQ_PHY_BASE(mode, lseq);
+		int i;
+
+		asd_printk("Mode %d\n", mode);
+		for  (i = 0; LSEQmCIOREGS[i].name; i++) {
+			if (!(LSEQmCIOREGS[i].mode & (1 << mode)))
+				continue;
+			asd_print_lseq_cio_reg(asd_ha, lseq_cio_addr, i);
+		}
+	}
+
+	asd_printk("SCRATCH MEMORY\n");
+
+	asd_printk("LSEQ%d MIP 0 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, Q_TGTXFR_HEAD);
+	PRINT_LMIP_word(asd_ha, lseq, Q_TGTXFR_TAIL);
+	PRINT_LMIP_byte(asd_ha, lseq, LINK_NUMBER);
+	PRINT_LMIP_byte(asd_ha, lseq, SCRATCH_FLAGS);
+	PRINT_LMIP_qword(asd_ha, lseq, CONNECTION_STATE);
+	PRINT_LMIP_word(asd_ha, lseq, CONCTL);
+	PRINT_LMIP_byte(asd_ha, lseq, CONSTAT);
+	PRINT_LMIP_byte(asd_ha, lseq, CONNECTION_MODES);
+	PRINT_LMIP_word(asd_ha, lseq, REG1_ISR);
+	PRINT_LMIP_word(asd_ha, lseq, REG2_ISR);
+	PRINT_LMIP_word(asd_ha, lseq, REG3_ISR);
+	PRINT_LMIP_qword(asd_ha, lseq,REG0_ISR);
+
+	asd_printk("LSEQ%d MIP 1 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR0);
+	PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR1);
+	PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR2);
+	PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR3);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE0);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE1);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE2);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE3);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_HEAD);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_TAIL);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_BUF_AVAIL);
+	PRINT_LMIP_dword(asd_ha, lseq, TIMEOUT_CONST);
+	PRINT_LMIP_word(asd_ha, lseq, ISR_SAVE_SINDEX);
+	PRINT_LMIP_word(asd_ha, lseq, ISR_SAVE_DINDEX);
+
+	asd_printk("LSEQ%d MIP 2 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR0);
+	PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR1);
+	PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR2);
+	PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR3);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD0);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD1);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD2);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD3);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_HEAD);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_TAIL);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_BUFS_AVAIL);
+
+	asd_printk("LSEQ%d MIP 3 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, DEV_PRES_TMR_TOUT_CONST);
+	PRINT_LMIP_dword(asd_ha, lseq, SATA_INTERLOCK_TIMEOUT);
+	PRINT_LMIP_dword(asd_ha, lseq, SRST_ASSERT_TIMEOUT);
+	PRINT_LMIP_dword(asd_ha, lseq, RCV_FIS_TIMEOUT);
+	PRINT_LMIP_dword(asd_ha, lseq, ONE_MILLISEC_TIMEOUT);
+	PRINT_LMIP_dword(asd_ha, lseq, TEN_MS_COMINIT_TIMEOUT);
+	PRINT_LMIP_dword(asd_ha, lseq, SMP_RCV_TIMEOUT);
+
+	for (mode = 0; mode < 3; mode++) {
+		asd_printk("LSEQ%d MDP 0 MODE %d >>>>\n", lseq, mode);
+		moffs = mode * LSEQ_MODE_SCRATCH_SIZE;
+
+		asd_printk(STR_16BIT, "RET_ADDR", 0,
+			   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR(lseq)
+					     + moffs));
+		asd_printk(STR_16BIT, "REG0_MODE", 2,
+			   asd_read_reg_word(asd_ha, LmSEQ_REG0_MODE(lseq)
+					     + moffs));
+		asd_printk(STR_16BIT, "MODE_FLAGS", 4,
+			   asd_read_reg_word(asd_ha, LmSEQ_MODE_FLAGS(lseq)
+					     + moffs));
+		asd_printk(STR_16BIT, "RET_ADDR2", 0x6,
+			   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR2(lseq)
+					     + moffs));
+		asd_printk(STR_16BIT, "RET_ADDR1", 0x8,
+			   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR1(lseq)
+					     + moffs));
+		asd_printk(STR_8BIT, "OPCODE_TO_CSEQ", 0xB,
+			   asd_read_reg_byte(asd_ha, LmSEQ_OPCODE_TO_CSEQ(lseq)
+					     + moffs));
+		asd_printk(STR_16BIT, "DATA_TO_CSEQ", 0xC,
+			   asd_read_reg_word(asd_ha, LmSEQ_DATA_TO_CSEQ(lseq)
+					     + moffs));
+	}
+
+	asd_printk("LSEQ%d MDP 0 MODE 5 >>>>\n", lseq);
+	moffs = LSEQ_MODE5_PAGE0_OFFSET;
+	asd_printk(STR_16BIT, "RET_ADDR", 0,
+		   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR(lseq) + moffs));
+	asd_printk(STR_16BIT, "REG0_MODE", 2,
+		   asd_read_reg_word(asd_ha, LmSEQ_REG0_MODE(lseq) + moffs));
+	asd_printk(STR_16BIT, "MODE_FLAGS", 4,
+		   asd_read_reg_word(asd_ha, LmSEQ_MODE_FLAGS(lseq) + moffs));
+	asd_printk(STR_16BIT, "RET_ADDR2", 0x6,
+		   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR2(lseq) + moffs));
+	asd_printk(STR_16BIT, "RET_ADDR1", 0x8,
+		   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR1(lseq) + moffs));
+	asd_printk(STR_8BIT, "OPCODE_TO_CSEQ", 0xB,
+	   asd_read_reg_byte(asd_ha, LmSEQ_OPCODE_TO_CSEQ(lseq) + moffs));
+	asd_printk(STR_16BIT, "DATA_TO_CSEQ", 0xC,
+	   asd_read_reg_word(asd_ha, LmSEQ_DATA_TO_CSEQ(lseq) + moffs));
+
+	asd_printk("LSEQ%d MDP 0 MODE 0 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, FIRST_INV_DDB_SITE);
+	PRINT_LMIP_word(asd_ha, lseq, EMPTY_TRANS_CTX);
+	PRINT_LMIP_word(asd_ha, lseq, RESP_LEN);
+	PRINT_LMIP_word(asd_ha, lseq, FIRST_INV_SCB_SITE);
+	PRINT_LMIP_dword(asd_ha, lseq, INTEN_SAVE);
+	PRINT_LMIP_byte(asd_ha, lseq, LINK_RST_FRM_LEN);
+	PRINT_LMIP_byte(asd_ha, lseq, LINK_RST_PROTOCOL);
+	PRINT_LMIP_byte(asd_ha, lseq, RESP_STATUS);
+	PRINT_LMIP_byte(asd_ha, lseq, LAST_LOADED_SGE);
+	PRINT_LMIP_byte(asd_ha, lseq, SAVE_SCBPTR);
+
+	asd_printk("LSEQ%d MDP 0 MODE 1 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, Q_XMIT_HEAD);
+	PRINT_LMIP_word(asd_ha, lseq, M1_EMPTY_TRANS_CTX);
+	PRINT_LMIP_word(asd_ha, lseq, INI_CONN_TAG);
+	PRINT_LMIP_byte(asd_ha, lseq, FAILED_OPEN_STATUS);
+	PRINT_LMIP_byte(asd_ha, lseq, XMIT_REQUEST_TYPE);
+	PRINT_LMIP_byte(asd_ha, lseq, M1_RESP_STATUS);
+	PRINT_LMIP_byte(asd_ha, lseq, M1_LAST_LOADED_SGE);
+	PRINT_LMIP_word(asd_ha, lseq, M1_SAVE_SCBPTR);
+
+	asd_printk("LSEQ%d MDP 0 MODE 2 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, PORT_COUNTER);
+	PRINT_LMIP_word(asd_ha, lseq, PM_TABLE_PTR);
+	PRINT_LMIP_word(asd_ha, lseq, SATA_INTERLOCK_TMR_SAVE);
+	PRINT_LMIP_word(asd_ha, lseq, IP_BITL);
+	PRINT_LMIP_word(asd_ha, lseq, COPY_SMP_CONN_TAG);
+	PRINT_LMIP_byte(asd_ha, lseq, P0M2_OFFS1AH);
+
+	asd_printk("LSEQ%d MDP 0 MODE 4/5 >>>>\n", lseq);
+	PRINT_LMIP_byte(asd_ha, lseq, SAVED_OOB_STATUS);
+	PRINT_LMIP_byte(asd_ha, lseq, SAVED_OOB_MODE);
+	PRINT_LMIP_word(asd_ha, lseq, Q_LINK_HEAD);
+	PRINT_LMIP_byte(asd_ha, lseq, LINK_RST_ERR);
+	PRINT_LMIP_byte(asd_ha, lseq, SAVED_OOB_SIGNALS);
+	PRINT_LMIP_byte(asd_ha, lseq, SAS_RESET_MODE);
+	PRINT_LMIP_byte(asd_ha, lseq, LINK_RESET_RETRY_COUNT);
+	PRINT_LMIP_byte(asd_ha, lseq, NUM_LINK_RESET_RETRIES);
+	PRINT_LMIP_word(asd_ha, lseq, OOB_INT_ENABLES);
+	PRINT_LMIP_word(asd_ha, lseq, NOTIFY_TIMER_TIMEOUT);
+	PRINT_LMIP_word(asd_ha, lseq, NOTIFY_TIMER_DOWN_COUNT);
+
+	asd_printk("LSEQ%d MDP 1 MODE 0 >>>>\n", lseq);
+	PRINT_LMIP_qword(asd_ha, lseq, SG_LIST_PTR_ADDR0);
+	PRINT_LMIP_qword(asd_ha, lseq, SG_LIST_PTR_ADDR1);
+
+	asd_printk("LSEQ%d MDP 1 MODE 1 >>>>\n", lseq);
+	PRINT_LMIP_qword(asd_ha, lseq, M1_SG_LIST_PTR_ADDR0);
+	PRINT_LMIP_qword(asd_ha, lseq, M1_SG_LIST_PTR_ADDR1);
+
+	asd_printk("LSEQ%d MDP 1 MODE 2 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, INVALID_DWORD_COUNT);
+	PRINT_LMIP_dword(asd_ha, lseq, DISPARITY_ERROR_COUNT);
+	PRINT_LMIP_dword(asd_ha, lseq, LOSS_OF_SYNC_COUNT);
+
+	asd_printk("LSEQ%d MDP 1 MODE 4/5 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, FRAME_TYPE_MASK);
+	PRINT_LMIP_dword(asd_ha, lseq, HASHED_SRC_ADDR_MASK_PRINT);
+	PRINT_LMIP_byte(asd_ha, lseq, NUM_FILL_BYTES_MASK);
+	PRINT_LMIP_word(asd_ha, lseq, TAG_MASK);
+	PRINT_LMIP_word(asd_ha, lseq, TARGET_PORT_XFER_TAG);
+	PRINT_LMIP_dword(asd_ha, lseq, DATA_OFFSET);
+
+	asd_printk("LSEQ%d MDP 2 MODE 0 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, SMP_RCV_TIMER_TERM_TS);
+	PRINT_LMIP_byte(asd_ha, lseq, DEVICE_BITS);
+	PRINT_LMIP_word(asd_ha, lseq, SDB_DDB);
+	PRINT_LMIP_word(asd_ha, lseq, SDB_NUM_TAGS);
+	PRINT_LMIP_word(asd_ha, lseq, SDB_CURR_TAG);
+
+	asd_printk("LSEQ%d MDP 2 MODE 1 >>>>\n", lseq);
+	PRINT_LMIP_qword(asd_ha, lseq, TX_ID_ADDR_FRAME);
+	PRINT_LMIP_dword(asd_ha, lseq, OPEN_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, SRST_AS_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, LAST_LOADED_SG_EL);
+
+	asd_printk("LSEQ%d MDP 2 MODE 2 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, CLOSE_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, BREAK_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, DWS_RESET_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, SATA_INTERLOCK_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, MCTL_TIMER_TERM_TS);
+
+	asd_printk("LSEQ%d MDP 2 MODE 4/5 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, COMINIT_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, RCV_ID_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, RCV_FIS_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, DEV_PRES_TIMER_TERM_TS);
+}
+
+/**
+ * asd_dump_ddb_site -- dump a CSEQ DDB site
+ * @asd_ha: pointer to host adapter structure
+ * @site_no: site number of interest
+ */
+void asd_dump_target_ddb(struct asd_ha_struct *asd_ha, u16 site_no)
+{
+	if (site_no >= asd_ha->hw_prof.max_ddbs)
+		return;
+
+#define DDB_FIELDB(__name)                                        \
+	asd_ddbsite_read_byte(asd_ha, site_no,                    \
+			      offsetof(struct asd_ddb_ssp_smp_target_port, __name))
+#define DDB2_FIELDB(__name)                                       \
+	asd_ddbsite_read_byte(asd_ha, site_no,                    \
+			      offsetof(struct asd_ddb_stp_sata_target_port, __name))
+#define DDB_FIELDW(__name)                                        \
+	asd_ddbsite_read_word(asd_ha, site_no,                    \
+			      offsetof(struct asd_ddb_ssp_smp_target_port, __name))
+
+#define DDB_FIELDD(__name)                                         \
+	asd_ddbsite_read_dword(asd_ha, site_no,                    \
+			       offsetof(struct asd_ddb_ssp_smp_target_port, __name))
+
+	asd_printk("DDB: 0x%02x\n", site_no);
+	asd_printk("conn_type: 0x%02x\n", DDB_FIELDB(conn_type));
+	asd_printk("conn_rate: 0x%02x\n", DDB_FIELDB(conn_rate));
+	asd_printk("init_conn_tag: 0x%04x\n", be16_to_cpu(DDB_FIELDW(init_conn_tag)));
+	asd_printk("send_queue_head: 0x%04x\n", be16_to_cpu(DDB_FIELDW(send_queue_head)));
+	asd_printk("sq_suspended: 0x%02x\n", DDB_FIELDB(sq_suspended));
+	asd_printk("DDB Type: 0x%02x\n", DDB_FIELDB(ddb_type));
+	asd_printk("AWT Default: 0x%04x\n", DDB_FIELDW(awt_def));
+	asd_printk("compat_features: 0x%02x\n", DDB_FIELDB(compat_features));
+	asd_printk("Pathway Blocked Count: 0x%02x\n",
+		   DDB_FIELDB(pathway_blocked_count));
+	asd_printk("arb_wait_time: 0x%04x\n", DDB_FIELDW(arb_wait_time));
+	asd_printk("more_compat_features: 0x%08x\n",
+		   DDB_FIELDD(more_compat_features));
+	asd_printk("Conn Mask: 0x%02x\n", DDB_FIELDB(conn_mask));
+	asd_printk("flags: 0x%02x\n", DDB_FIELDB(flags));
+	asd_printk("flags2: 0x%02x\n", DDB2_FIELDB(flags2));
+	asd_printk("ExecQ Tail: 0x%04x\n",DDB_FIELDW(exec_queue_tail));
+	asd_printk("SendQ Tail: 0x%04x\n",DDB_FIELDW(send_queue_tail));
+	asd_printk("Active Task Count: 0x%04x\n",
+		   DDB_FIELDW(active_task_count));
+	asd_printk("ITNL Reason: 0x%02x\n", DDB_FIELDB(itnl_reason));
+	asd_printk("ITNL Timeout Const: 0x%04x\n", DDB_FIELDW(itnl_timeout));
+	asd_printk("ITNL timestamp: 0x%08x\n", DDB_FIELDD(itnl_timestamp));
+}
+
+void asd_dump_ddb_0(struct asd_ha_struct *asd_ha)
+{
+#define DDB0_FIELDB(__name)                                  \
+	asd_ddbsite_read_byte(asd_ha, 0,                     \
+			      offsetof(struct asd_ddb_seq_shared, __name))
+#define DDB0_FIELDW(__name)                                  \
+	asd_ddbsite_read_word(asd_ha, 0,                     \
+			      offsetof(struct asd_ddb_seq_shared, __name))
+
+#define DDB0_FIELDD(__name)                                  \
+	asd_ddbsite_read_dword(asd_ha,0 ,                    \
+			       offsetof(struct asd_ddb_seq_shared, __name))
+
+#define DDB0_FIELDA(__name, _o)                              \
+	asd_ddbsite_read_byte(asd_ha, 0,                     \
+			      offsetof(struct asd_ddb_seq_shared, __name)+_o)
+
+
+	asd_printk("DDB: 0\n");
+	asd_printk("q_free_ddb_head:%04x\n", DDB0_FIELDW(q_free_ddb_head));
+	asd_printk("q_free_ddb_tail:%04x\n", DDB0_FIELDW(q_free_ddb_tail));
+	asd_printk("q_free_ddb_cnt:%04x\n",  DDB0_FIELDW(q_free_ddb_cnt));
+	asd_printk("q_used_ddb_head:%04x\n", DDB0_FIELDW(q_used_ddb_head));
+	asd_printk("q_used_ddb_tail:%04x\n", DDB0_FIELDW(q_used_ddb_tail));
+	asd_printk("shared_mem_lock:%04x\n", DDB0_FIELDW(shared_mem_lock));
+	asd_printk("smp_conn_tag:%04x\n",    DDB0_FIELDW(smp_conn_tag));
+	asd_printk("est_nexus_buf_cnt:%04x\n", DDB0_FIELDW(est_nexus_buf_cnt));
+	asd_printk("est_nexus_buf_thresh:%04x\n",
+		   DDB0_FIELDW(est_nexus_buf_thresh));
+	asd_printk("conn_not_active:%02x\n", DDB0_FIELDB(conn_not_active));
+	asd_printk("phy_is_up:%02x\n",       DDB0_FIELDB(phy_is_up));
+	asd_printk("port_map_by_links:%02x %02x %02x %02x "
+		   "%02x %02x %02x %02x\n",
+		   DDB0_FIELDA(port_map_by_links, 0),
+		   DDB0_FIELDA(port_map_by_links, 1),
+		   DDB0_FIELDA(port_map_by_links, 2),
+		   DDB0_FIELDA(port_map_by_links, 3),
+		   DDB0_FIELDA(port_map_by_links, 4),
+		   DDB0_FIELDA(port_map_by_links, 5),
+		   DDB0_FIELDA(port_map_by_links, 6),
+		   DDB0_FIELDA(port_map_by_links, 7));
+}
+
+static void asd_dump_scb_site(struct asd_ha_struct *asd_ha, u16 site_no)
+{
+
+#define SCB_FIELDB(__name)                                                 \
+	asd_scbsite_read_byte(asd_ha, site_no, sizeof(struct scb_header)   \
+			      + offsetof(struct initiate_ssp_task, __name))
+#define SCB_FIELDW(__name)                                                 \
+	asd_scbsite_read_word(asd_ha, site_no, sizeof(struct scb_header)   \
+			      + offsetof(struct initiate_ssp_task, __name))
+#define SCB_FIELDD(__name)                                                 \
+	asd_scbsite_read_dword(asd_ha, site_no, sizeof(struct scb_header)  \
+			       + offsetof(struct initiate_ssp_task, __name))
+
+	asd_printk("Total Xfer Len: 0x%08x.\n", SCB_FIELDD(total_xfer_len));
+	asd_printk("Frame Type: 0x%02x.\n", SCB_FIELDB(ssp_frame.frame_type));
+	asd_printk("Tag: 0x%04x.\n", SCB_FIELDW(ssp_frame.tag));
+	asd_printk("Target Port Xfer Tag: 0x%04x.\n",
+		   SCB_FIELDW(ssp_frame.tptt));
+	asd_printk("Data Offset: 0x%08x.\n", SCB_FIELDW(ssp_frame.data_offs));
+	asd_printk("Retry Count: 0x%02x.\n", SCB_FIELDB(retry_count));
+}
+
+/**
+ * asd_dump_scb_sites -- dump currently used CSEQ SCB sites
+ * @asd_ha: pointer to host adapter struct
+ */
+void asd_dump_scb_sites(struct asd_ha_struct *asd_ha)
+{
+	u16	site_no;
+
+	for (site_no = 0; site_no < asd_ha->hw_prof.max_scbs; site_no++) {
+		u8 opcode;
+
+		if (!SCB_SITE_VALID(site_no))
+			continue;
+
+		/* We are only interested in SCB sites currently used.
+		 */
+		opcode = asd_scbsite_read_byte(asd_ha, site_no,
+					       offsetof(struct scb_header,
+							opcode));
+		if (opcode == 0xFF)
+			continue;
+
+		asd_printk("\nSCB: 0x%x\n", site_no);
+		asd_dump_scb_site(asd_ha, site_no);
+	}
+}
+
+/**
+ * ads_dump_seq_state -- dump CSEQ and LSEQ states
+ * @asd_ha: pointer to host adapter structure
+ * @lseq_mask: mask of LSEQs of interest
+ */
+void asd_dump_seq_state(struct asd_ha_struct *asd_ha, u8 lseq_mask)
+{
+	int lseq;
+
+	asd_dump_cseq_state(asd_ha);
+
+	if (lseq_mask != 0)
+		for_each_sequencer(lseq_mask, lseq_mask, lseq)
+			asd_dump_lseq_state(asd_ha, lseq);
+}
+
+void asd_dump_frame_rcvd(struct asd_phy *phy,
+			 struct done_list_struct *dl)
+{
+	unsigned long flags;
+	int i;
+
+	switch ((dl->status_block[1] & 0x70) >> 3) {
+	case SAS_PROTO_STP:
+		ASD_DPRINTK("STP proto device-to-host FIS:\n");
+		break;
+	default:
+	case SAS_PROTO_SSP:
+		ASD_DPRINTK("SAS proto IDENTIFY:\n");
+		break;
+	}
+	spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags);
+	for (i = 0; i < phy->sas_phy.frame_rcvd_size; i+=4)
+		ASD_DPRINTK("%02x: %02x %02x %02x %02x\n",
+			    i,
+			    phy->frame_rcvd[i],
+			    phy->frame_rcvd[i+1],
+			    phy->frame_rcvd[i+2],
+			    phy->frame_rcvd[i+3]);
+	spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags);
+}
+
+static inline void asd_dump_scb(struct asd_ascb *ascb, int ind)
+{
+	asd_printk("scb%d: vaddr: 0x%p, dma_handle: 0x%llx, next: 0x%llx, "
+		   "index:%d, opcode:0x%02x\n",
+		   ind, ascb->dma_scb.vaddr,
+		   (unsigned long long)ascb->dma_scb.dma_handle,
+		   (unsigned long long)
+		   le64_to_cpu(ascb->scb->header.next_scb),
+		   le16_to_cpu(ascb->scb->header.index),
+		   ascb->scb->header.opcode);
+}
+
+void asd_dump_scb_list(struct asd_ascb *ascb, int num)
+{
+	int i = 0;
+
+	asd_printk("dumping %d scbs:\n", num);
+
+	asd_dump_scb(ascb, i++);
+	--num;
+
+	if (num > 0 && !list_empty(&ascb->list)) {
+		struct list_head *el;
+
+		list_for_each(el, &ascb->list) {
+			struct asd_ascb *s = list_entry(el, struct asd_ascb,
+							list);
+			asd_dump_scb(s, i++);
+			if (--num <= 0)
+				break;
+		}
+	}
+}
+
+#endif /* ASD_DEBUG */
diff --git a/drivers/scsi/aic94xx/aic94xx_dump.h b/drivers/scsi/aic94xx/aic94xx_dump.h
new file mode 100644
index 0000000000000..0c388e7da6bbc
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_dump.h
@@ -0,0 +1,52 @@
+/*
+ * Aic94xx SAS/SATA driver dump header file.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef _AIC94XX_DUMP_H_
+#define _AIC94XX_DUMP_H_
+
+#ifdef ASD_DEBUG
+
+void asd_dump_ddb_0(struct asd_ha_struct *asd_ha);
+void asd_dump_target_ddb(struct asd_ha_struct *asd_ha, u16 site_no);
+void asd_dump_scb_sites(struct asd_ha_struct *asd_ha);
+void asd_dump_seq_state(struct asd_ha_struct *asd_ha, u8 lseq_mask);
+void asd_dump_frame_rcvd(struct asd_phy *phy,
+			 struct done_list_struct *dl);
+void asd_dump_scb_list(struct asd_ascb *ascb, int num);
+#else /* ASD_DEBUG */
+
+static inline void asd_dump_ddb_0(struct asd_ha_struct *asd_ha) { }
+static inline void asd_dump_target_ddb(struct asd_ha_struct *asd_ha,
+				     u16 site_no) { }
+static inline void asd_dump_scb_sites(struct asd_ha_struct *asd_ha) { }
+static inline void asd_dump_seq_state(struct asd_ha_struct *asd_ha,
+				      u8 lseq_mask) { }
+static inline void asd_dump_frame_rcvd(struct asd_phy *phy,
+				       struct done_list_struct *dl) { }
+static inline void asd_dump_scb_list(struct asd_ascb *ascb, int num) { }
+#endif /* ASD_DEBUG */
+
+#endif /* _AIC94XX_DUMP_H_ */
diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c
new file mode 100644
index 0000000000000..075cea85b56b6
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_hwi.c
@@ -0,0 +1,1376 @@
+/*
+ * Aic94xx SAS/SATA driver hardware interface.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+
+#include "aic94xx.h"
+#include "aic94xx_reg.h"
+#include "aic94xx_hwi.h"
+#include "aic94xx_seq.h"
+#include "aic94xx_dump.h"
+
+u32 MBAR0_SWB_SIZE;
+
+/* ---------- Initialization ---------- */
+
+static void asd_get_user_sas_addr(struct asd_ha_struct *asd_ha)
+{
+	extern char sas_addr_str[];
+	/* If the user has specified a WWN it overrides other settings
+	 */
+	if (sas_addr_str[0] != '\0')
+		asd_destringify_sas_addr(asd_ha->hw_prof.sas_addr,
+					 sas_addr_str);
+	else if (asd_ha->hw_prof.sas_addr[0] != 0)
+		asd_stringify_sas_addr(sas_addr_str, asd_ha->hw_prof.sas_addr);
+}
+
+static void asd_propagate_sas_addr(struct asd_ha_struct *asd_ha)
+{
+	int i;
+
+	for (i = 0; i < ASD_MAX_PHYS; i++) {
+		if (asd_ha->hw_prof.phy_desc[i].sas_addr[0] == 0)
+			continue;
+		/* Set a phy's address only if it has none.
+		 */
+		ASD_DPRINTK("setting phy%d addr to %llx\n", i,
+			    SAS_ADDR(asd_ha->hw_prof.sas_addr));
+		memcpy(asd_ha->hw_prof.phy_desc[i].sas_addr,
+		       asd_ha->hw_prof.sas_addr, SAS_ADDR_SIZE);
+	}
+}
+
+/* ---------- PHY initialization ---------- */
+
+static void asd_init_phy_identify(struct asd_phy *phy)
+{
+	phy->identify_frame = phy->id_frm_tok->vaddr;
+
+	memset(phy->identify_frame, 0, sizeof(*phy->identify_frame));
+
+	phy->identify_frame->dev_type = SAS_END_DEV;
+	if (phy->sas_phy.role & PHY_ROLE_INITIATOR)
+		phy->identify_frame->initiator_bits = phy->sas_phy.iproto;
+	if (phy->sas_phy.role & PHY_ROLE_TARGET)
+		phy->identify_frame->target_bits = phy->sas_phy.tproto;
+	memcpy(phy->identify_frame->sas_addr, phy->phy_desc->sas_addr,
+	       SAS_ADDR_SIZE);
+	phy->identify_frame->phy_id = phy->sas_phy.id;
+}
+
+static int asd_init_phy(struct asd_phy *phy)
+{
+	struct asd_ha_struct *asd_ha = phy->sas_phy.ha->lldd_ha;
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+
+	sas_phy->enabled = 1;
+	sas_phy->class = SAS;
+	sas_phy->iproto = SAS_PROTO_ALL;
+	sas_phy->tproto = 0;
+	sas_phy->type = PHY_TYPE_PHYSICAL;
+	sas_phy->role = PHY_ROLE_INITIATOR;
+	sas_phy->oob_mode = OOB_NOT_CONNECTED;
+	sas_phy->linkrate = PHY_LINKRATE_NONE;
+
+	phy->id_frm_tok = asd_alloc_coherent(asd_ha,
+					     sizeof(*phy->identify_frame),
+					     GFP_KERNEL);
+	if (!phy->id_frm_tok) {
+		asd_printk("no mem for IDENTIFY for phy%d\n", sas_phy->id);
+		return -ENOMEM;
+	} else
+		asd_init_phy_identify(phy);
+
+	memset(phy->frame_rcvd, 0, sizeof(phy->frame_rcvd));
+
+	return 0;
+}
+
+static int asd_init_phys(struct asd_ha_struct *asd_ha)
+{
+	u8 i;
+	u8 phy_mask = asd_ha->hw_prof.enabled_phys;
+
+	for (i = 0; i < ASD_MAX_PHYS; i++) {
+		struct asd_phy *phy = &asd_ha->phys[i];
+
+		phy->phy_desc = &asd_ha->hw_prof.phy_desc[i];
+
+		phy->sas_phy.enabled = 0;
+		phy->sas_phy.id = i;
+		phy->sas_phy.sas_addr = &phy->phy_desc->sas_addr[0];
+		phy->sas_phy.frame_rcvd = &phy->frame_rcvd[0];
+		phy->sas_phy.ha = &asd_ha->sas_ha;
+		phy->sas_phy.lldd_phy = phy;
+	}
+
+	/* Now enable and initialize only the enabled phys. */
+	for_each_phy(phy_mask, phy_mask, i) {
+		int err = asd_init_phy(&asd_ha->phys[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/* ---------- Sliding windows ---------- */
+
+static int asd_init_sw(struct asd_ha_struct *asd_ha)
+{
+	struct pci_dev *pcidev = asd_ha->pcidev;
+	int err;
+	u32 v;
+
+	/* Unlock MBARs */
+	err = pci_read_config_dword(pcidev, PCI_CONF_MBAR_KEY, &v);
+	if (err) {
+		asd_printk("couldn't access conf. space of %s\n",
+			   pci_name(pcidev));
+		goto Err;
+	}
+	if (v)
+		err = pci_write_config_dword(pcidev, PCI_CONF_MBAR_KEY, v);
+	if (err) {
+		asd_printk("couldn't write to MBAR_KEY of %s\n",
+			   pci_name(pcidev));
+		goto Err;
+	}
+
+	/* Set sliding windows A, B and C to point to proper internal
+	 * memory regions.
+	 */
+	pci_write_config_dword(pcidev, PCI_CONF_MBAR0_SWA, REG_BASE_ADDR);
+	pci_write_config_dword(pcidev, PCI_CONF_MBAR0_SWB,
+			       REG_BASE_ADDR_CSEQCIO);
+	pci_write_config_dword(pcidev, PCI_CONF_MBAR0_SWC, REG_BASE_ADDR_EXSI);
+	asd_ha->io_handle[0].swa_base = REG_BASE_ADDR;
+	asd_ha->io_handle[0].swb_base = REG_BASE_ADDR_CSEQCIO;
+	asd_ha->io_handle[0].swc_base = REG_BASE_ADDR_EXSI;
+	MBAR0_SWB_SIZE = asd_ha->io_handle[0].len - 0x80;
+	if (!asd_ha->iospace) {
+		/* MBAR1 will point to OCM (On Chip Memory) */
+		pci_write_config_dword(pcidev, PCI_CONF_MBAR1, OCM_BASE_ADDR);
+		asd_ha->io_handle[1].swa_base = OCM_BASE_ADDR;
+	}
+	spin_lock_init(&asd_ha->iolock);
+Err:
+	return err;
+}
+
+/* ---------- SCB initialization ---------- */
+
+/**
+ * asd_init_scbs - manually allocate the first SCB.
+ * @asd_ha: pointer to host adapter structure
+ *
+ * This allocates the very first SCB which would be sent to the
+ * sequencer for execution.  Its bus address is written to
+ * CSEQ_Q_NEW_POINTER, mode page 2, mode 8.  Since the bus address of
+ * the _next_ scb to be DMA-ed to the host adapter is read from the last
+ * SCB DMA-ed to the host adapter, we have to always stay one step
+ * ahead of the sequencer and keep one SCB already allocated.
+ */
+static int asd_init_scbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int bitmap_bytes;
+
+	/* allocate the index array and bitmap */
+	asd_ha->seq.tc_index_bitmap_bits = asd_ha->hw_prof.max_scbs;
+	asd_ha->seq.tc_index_array = kzalloc(asd_ha->seq.tc_index_bitmap_bits*
+					     sizeof(void *), GFP_KERNEL);
+	if (!asd_ha->seq.tc_index_array)
+		return -ENOMEM;
+
+	bitmap_bytes = (asd_ha->seq.tc_index_bitmap_bits+7)/8;
+	bitmap_bytes = BITS_TO_LONGS(bitmap_bytes*8)*sizeof(unsigned long);
+	asd_ha->seq.tc_index_bitmap = kzalloc(bitmap_bytes, GFP_KERNEL);
+	if (!asd_ha->seq.tc_index_bitmap)
+		return -ENOMEM;
+
+	spin_lock_init(&seq->tc_index_lock);
+
+	seq->next_scb.size = sizeof(struct scb);
+	seq->next_scb.vaddr = dma_pool_alloc(asd_ha->scb_pool, GFP_KERNEL,
+					     &seq->next_scb.dma_handle);
+	if (!seq->next_scb.vaddr) {
+		kfree(asd_ha->seq.tc_index_bitmap);
+		kfree(asd_ha->seq.tc_index_array);
+		asd_ha->seq.tc_index_bitmap = NULL;
+		asd_ha->seq.tc_index_array = NULL;
+		return -ENOMEM;
+	}
+
+	seq->pending = 0;
+	spin_lock_init(&seq->pend_q_lock);
+	INIT_LIST_HEAD(&seq->pend_q);
+
+	return 0;
+}
+
+static inline void asd_get_max_scb_ddb(struct asd_ha_struct *asd_ha)
+{
+	asd_ha->hw_prof.max_scbs = asd_get_cmdctx_size(asd_ha)/ASD_SCB_SIZE;
+	asd_ha->hw_prof.max_ddbs = asd_get_devctx_size(asd_ha)/ASD_DDB_SIZE;
+	ASD_DPRINTK("max_scbs:%d, max_ddbs:%d\n",
+		    asd_ha->hw_prof.max_scbs,
+		    asd_ha->hw_prof.max_ddbs);
+}
+
+/* ---------- Done List initialization ---------- */
+
+static void asd_dl_tasklet_handler(unsigned long);
+
+static int asd_init_dl(struct asd_ha_struct *asd_ha)
+{
+	asd_ha->seq.actual_dl
+		= asd_alloc_coherent(asd_ha,
+			     ASD_DL_SIZE * sizeof(struct done_list_struct),
+				     GFP_KERNEL);
+	if (!asd_ha->seq.actual_dl)
+		return -ENOMEM;
+	asd_ha->seq.dl = asd_ha->seq.actual_dl->vaddr;
+	asd_ha->seq.dl_toggle = ASD_DEF_DL_TOGGLE;
+	asd_ha->seq.dl_next = 0;
+	tasklet_init(&asd_ha->seq.dl_tasklet, asd_dl_tasklet_handler,
+		     (unsigned long) asd_ha);
+
+	return 0;
+}
+
+/* ---------- EDB and ESCB init ---------- */
+
+static int asd_alloc_edbs(struct asd_ha_struct *asd_ha, unsigned int gfp_flags)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int i;
+
+	seq->edb_arr = kmalloc(seq->num_edbs*sizeof(*seq->edb_arr), gfp_flags);
+	if (!seq->edb_arr)
+		return -ENOMEM;
+
+	for (i = 0; i < seq->num_edbs; i++) {
+		seq->edb_arr[i] = asd_alloc_coherent(asd_ha, ASD_EDB_SIZE,
+						     gfp_flags);
+		if (!seq->edb_arr[i])
+			goto Err_unroll;
+		memset(seq->edb_arr[i]->vaddr, 0, ASD_EDB_SIZE);
+	}
+
+	ASD_DPRINTK("num_edbs:%d\n", seq->num_edbs);
+
+	return 0;
+
+Err_unroll:
+	for (i-- ; i >= 0; i--)
+		asd_free_coherent(asd_ha, seq->edb_arr[i]);
+	kfree(seq->edb_arr);
+	seq->edb_arr = NULL;
+
+	return -ENOMEM;
+}
+
+static int asd_alloc_escbs(struct asd_ha_struct *asd_ha,
+			   unsigned int gfp_flags)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	struct asd_ascb *escb;
+	int i, escbs;
+
+	seq->escb_arr = kmalloc(seq->num_escbs*sizeof(*seq->escb_arr),
+				gfp_flags);
+	if (!seq->escb_arr)
+		return -ENOMEM;
+
+	escbs = seq->num_escbs;
+	escb = asd_ascb_alloc_list(asd_ha, &escbs, gfp_flags);
+	if (!escb) {
+		asd_printk("couldn't allocate list of escbs\n");
+		goto Err;
+	}
+	seq->num_escbs -= escbs;  /* subtract what was not allocated */
+	ASD_DPRINTK("num_escbs:%d\n", seq->num_escbs);
+
+	for (i = 0; i < seq->num_escbs; i++, escb = list_entry(escb->list.next,
+							       struct asd_ascb,
+							       list)) {
+		seq->escb_arr[i] = escb;
+		escb->scb->header.opcode = EMPTY_SCB;
+	}
+
+	return 0;
+Err:
+	kfree(seq->escb_arr);
+	seq->escb_arr = NULL;
+	return -ENOMEM;
+
+}
+
+static void asd_assign_edbs2escbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int i, k, z = 0;
+
+	for (i = 0; i < seq->num_escbs; i++) {
+		struct asd_ascb *ascb = seq->escb_arr[i];
+		struct empty_scb *escb = &ascb->scb->escb;
+
+		ascb->edb_index = z;
+
+		escb->num_valid = ASD_EDBS_PER_SCB;
+
+		for (k = 0; k < ASD_EDBS_PER_SCB; k++) {
+			struct sg_el *eb = &escb->eb[k];
+			struct asd_dma_tok *edb = seq->edb_arr[z++];
+
+			memset(eb, 0, sizeof(*eb));
+			eb->bus_addr = cpu_to_le64(((u64) edb->dma_handle));
+			eb->size = cpu_to_le32(((u32) edb->size));
+		}
+	}
+}
+
+/**
+ * asd_init_escbs -- allocate and initialize empty scbs
+ * @asd_ha: pointer to host adapter structure
+ *
+ * An empty SCB has sg_elements of ASD_EDBS_PER_SCB (7) buffers.
+ * They transport sense data, etc.
+ */
+static int asd_init_escbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int err = 0;
+
+	/* Allocate two empty data buffers (edb) per sequencer. */
+	int edbs = 2*(1+asd_ha->hw_prof.num_phys);
+
+	seq->num_escbs = (edbs+ASD_EDBS_PER_SCB-1)/ASD_EDBS_PER_SCB;
+	seq->num_edbs = seq->num_escbs * ASD_EDBS_PER_SCB;
+
+	err = asd_alloc_edbs(asd_ha, GFP_KERNEL);
+	if (err) {
+		asd_printk("couldn't allocate edbs\n");
+		return err;
+	}
+
+	err = asd_alloc_escbs(asd_ha, GFP_KERNEL);
+	if (err) {
+		asd_printk("couldn't allocate escbs\n");
+		return err;
+	}
+
+	asd_assign_edbs2escbs(asd_ha);
+	/* In order to insure that normal SCBs do not overfill sequencer
+	 * memory and leave no space for escbs (halting condition),
+	 * we increment pending here by the number of escbs.  However,
+	 * escbs are never pending.
+	 */
+	seq->pending   = seq->num_escbs;
+	seq->can_queue = 1 + (asd_ha->hw_prof.max_scbs - seq->pending)/2;
+
+	return 0;
+}
+
+/* ---------- HW initialization ---------- */
+
+/**
+ * asd_chip_hardrst -- hard reset the chip
+ * @asd_ha: pointer to host adapter structure
+ *
+ * This takes 16 cycles and is synchronous to CFCLK, which runs
+ * at 200 MHz, so this should take at most 80 nanoseconds.
+ */
+int asd_chip_hardrst(struct asd_ha_struct *asd_ha)
+{
+	int i;
+	int count = 100;
+	u32 reg;
+
+	for (i = 0 ; i < 4 ; i++) {
+		asd_write_reg_dword(asd_ha, COMBIST, HARDRST);
+	}
+
+	do {
+		udelay(1);
+		reg = asd_read_reg_dword(asd_ha, CHIMINT);
+		if (reg & HARDRSTDET) {
+			asd_write_reg_dword(asd_ha, CHIMINT,
+					    HARDRSTDET|PORRSTDET);
+			return 0;
+		}
+	} while (--count > 0);
+
+	return -ENODEV;
+}
+
+/**
+ * asd_init_chip -- initialize the chip
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Hard resets the chip, disables HA interrupts, downloads the sequnecer
+ * microcode and starts the sequencers.  The caller has to explicitly
+ * enable HA interrupts with asd_enable_ints(asd_ha).
+ */
+static int asd_init_chip(struct asd_ha_struct *asd_ha)
+{
+	int err;
+
+	err = asd_chip_hardrst(asd_ha);
+	if (err) {
+		asd_printk("couldn't hard reset %s\n",
+			    pci_name(asd_ha->pcidev));
+		goto out;
+	}
+
+	asd_disable_ints(asd_ha);
+
+	err = asd_init_seqs(asd_ha);
+	if (err) {
+		asd_printk("couldn't init seqs for %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto out;
+	}
+
+	err = asd_start_seqs(asd_ha);
+	if (err) {
+		asd_printk("coudln't start seqs for %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto out;
+	}
+out:
+	return err;
+}
+
+#define MAX_DEVS ((OCM_MAX_SIZE) / (ASD_DDB_SIZE))
+
+static int max_devs = 0;
+module_param_named(max_devs, max_devs, int, S_IRUGO);
+MODULE_PARM_DESC(max_devs, "\n"
+	"\tMaximum number of SAS devices to support (not LUs).\n"
+	"\tDefault: 2176, Maximum: 65663.\n");
+
+static int max_cmnds = 0;
+module_param_named(max_cmnds, max_cmnds, int, S_IRUGO);
+MODULE_PARM_DESC(max_cmnds, "\n"
+	"\tMaximum number of commands queuable.\n"
+	"\tDefault: 512, Maximum: 66047.\n");
+
+static void asd_extend_devctx_ocm(struct asd_ha_struct *asd_ha)
+{
+	unsigned long dma_addr = OCM_BASE_ADDR;
+	u32 d;
+
+	dma_addr -= asd_ha->hw_prof.max_ddbs * ASD_DDB_SIZE;
+	asd_write_reg_addr(asd_ha, DEVCTXBASE, (dma_addr_t) dma_addr);
+	d = asd_read_reg_dword(asd_ha, CTXDOMAIN);
+	d |= 4;
+	asd_write_reg_dword(asd_ha, CTXDOMAIN, d);
+	asd_ha->hw_prof.max_ddbs += MAX_DEVS;
+}
+
+static int asd_extend_devctx(struct asd_ha_struct *asd_ha)
+{
+	dma_addr_t dma_handle;
+	unsigned long dma_addr;
+	u32 d;
+	int size;
+
+	asd_extend_devctx_ocm(asd_ha);
+
+	asd_ha->hw_prof.ddb_ext = NULL;
+	if (max_devs <= asd_ha->hw_prof.max_ddbs || max_devs > 0xFFFF) {
+		max_devs = asd_ha->hw_prof.max_ddbs;
+		return 0;
+	}
+
+	size = (max_devs - asd_ha->hw_prof.max_ddbs + 1) * ASD_DDB_SIZE;
+
+	asd_ha->hw_prof.ddb_ext = asd_alloc_coherent(asd_ha, size, GFP_KERNEL);
+	if (!asd_ha->hw_prof.ddb_ext) {
+		asd_printk("couldn't allocate memory for %d devices\n",
+			   max_devs);
+		max_devs = asd_ha->hw_prof.max_ddbs;
+		return -ENOMEM;
+	}
+	dma_handle = asd_ha->hw_prof.ddb_ext->dma_handle;
+	dma_addr = ALIGN((unsigned long) dma_handle, ASD_DDB_SIZE);
+	dma_addr -= asd_ha->hw_prof.max_ddbs * ASD_DDB_SIZE;
+	dma_handle = (dma_addr_t) dma_addr;
+	asd_write_reg_addr(asd_ha, DEVCTXBASE, dma_handle);
+	d = asd_read_reg_dword(asd_ha, CTXDOMAIN);
+	d &= ~4;
+	asd_write_reg_dword(asd_ha, CTXDOMAIN, d);
+
+	asd_ha->hw_prof.max_ddbs = max_devs;
+
+	return 0;
+}
+
+static int asd_extend_cmdctx(struct asd_ha_struct *asd_ha)
+{
+	dma_addr_t dma_handle;
+	unsigned long dma_addr;
+	u32 d;
+	int size;
+
+	asd_ha->hw_prof.scb_ext = NULL;
+	if (max_cmnds <= asd_ha->hw_prof.max_scbs || max_cmnds > 0xFFFF) {
+		max_cmnds = asd_ha->hw_prof.max_scbs;
+		return 0;
+	}
+
+	size = (max_cmnds - asd_ha->hw_prof.max_scbs + 1) * ASD_SCB_SIZE;
+
+	asd_ha->hw_prof.scb_ext = asd_alloc_coherent(asd_ha, size, GFP_KERNEL);
+	if (!asd_ha->hw_prof.scb_ext) {
+		asd_printk("couldn't allocate memory for %d commands\n",
+			   max_cmnds);
+		max_cmnds = asd_ha->hw_prof.max_scbs;
+		return -ENOMEM;
+	}
+	dma_handle = asd_ha->hw_prof.scb_ext->dma_handle;
+	dma_addr = ALIGN((unsigned long) dma_handle, ASD_SCB_SIZE);
+	dma_addr -= asd_ha->hw_prof.max_scbs * ASD_SCB_SIZE;
+	dma_handle = (dma_addr_t) dma_addr;
+	asd_write_reg_addr(asd_ha, CMDCTXBASE, dma_handle);
+	d = asd_read_reg_dword(asd_ha, CTXDOMAIN);
+	d &= ~1;
+	asd_write_reg_dword(asd_ha, CTXDOMAIN, d);
+
+	asd_ha->hw_prof.max_scbs = max_cmnds;
+
+	return 0;
+}
+
+/**
+ * asd_init_ctxmem -- initialize context memory
+ * asd_ha: pointer to host adapter structure
+ *
+ * This function sets the maximum number of SCBs and
+ * DDBs which can be used by the sequencer.  This is normally
+ * 512 and 128 respectively.  If support for more SCBs or more DDBs
+ * is required then CMDCTXBASE, DEVCTXBASE and CTXDOMAIN are
+ * initialized here to extend context memory to point to host memory,
+ * thus allowing unlimited support for SCBs and DDBs -- only limited
+ * by host memory.
+ */
+static int asd_init_ctxmem(struct asd_ha_struct *asd_ha)
+{
+	int bitmap_bytes;
+
+	asd_get_max_scb_ddb(asd_ha);
+	asd_extend_devctx(asd_ha);
+	asd_extend_cmdctx(asd_ha);
+
+	/* The kernel wants bitmaps to be unsigned long sized. */
+	bitmap_bytes = (asd_ha->hw_prof.max_ddbs+7)/8;
+	bitmap_bytes = BITS_TO_LONGS(bitmap_bytes*8)*sizeof(unsigned long);
+	asd_ha->hw_prof.ddb_bitmap = kzalloc(bitmap_bytes, GFP_KERNEL);
+	if (!asd_ha->hw_prof.ddb_bitmap)
+		return -ENOMEM;
+	spin_lock_init(&asd_ha->hw_prof.ddb_lock);
+
+	return 0;
+}
+
+int asd_init_hw(struct asd_ha_struct *asd_ha)
+{
+	int err;
+	u32 v;
+
+	err = asd_init_sw(asd_ha);
+	if (err)
+		return err;
+
+	err = pci_read_config_dword(asd_ha->pcidev, PCIC_HSTPCIX_CNTRL, &v);
+	if (err) {
+		asd_printk("couldn't read PCIC_HSTPCIX_CNTRL of %s\n",
+			   pci_name(asd_ha->pcidev));
+		return err;
+	}
+	pci_write_config_dword(asd_ha->pcidev, PCIC_HSTPCIX_CNTRL,
+					v | SC_TMR_DIS);
+	if (err) {
+		asd_printk("couldn't disable split completion timer of %s\n",
+			   pci_name(asd_ha->pcidev));
+		return err;
+	}
+
+	err = asd_read_ocm(asd_ha);
+	if (err) {
+		asd_printk("couldn't read ocm(%d)\n", err);
+		/* While suspicios, it is not an error that we
+		 * couldn't read the OCM. */
+	}
+
+	err = asd_read_flash(asd_ha);
+	if (err) {
+		asd_printk("couldn't read flash(%d)\n", err);
+		/* While suspicios, it is not an error that we
+		 * couldn't read FLASH memory.
+		 */
+	}
+
+	asd_init_ctxmem(asd_ha);
+
+	asd_get_user_sas_addr(asd_ha);
+	if (!asd_ha->hw_prof.sas_addr[0]) {
+		asd_printk("No SAS Address provided for %s\n",
+			   pci_name(asd_ha->pcidev));
+		err = -ENODEV;
+		goto Out;
+	}
+
+	asd_propagate_sas_addr(asd_ha);
+
+	err = asd_init_phys(asd_ha);
+	if (err) {
+		asd_printk("couldn't initialize phys for %s\n",
+			    pci_name(asd_ha->pcidev));
+		goto Out;
+	}
+
+	err = asd_init_scbs(asd_ha);
+	if (err) {
+		asd_printk("couldn't initialize scbs for %s\n",
+			    pci_name(asd_ha->pcidev));
+		goto Out;
+	}
+
+	err = asd_init_dl(asd_ha);
+	if (err) {
+		asd_printk("couldn't initialize the done list:%d\n",
+			    err);
+		goto Out;
+	}
+
+	err = asd_init_escbs(asd_ha);
+	if (err) {
+		asd_printk("couldn't initialize escbs\n");
+		goto Out;
+	}
+
+	err = asd_init_chip(asd_ha);
+	if (err) {
+		asd_printk("couldn't init the chip\n");
+		goto Out;
+	}
+Out:
+	return err;
+}
+
+/* ---------- Chip reset ---------- */
+
+/**
+ * asd_chip_reset -- reset the host adapter, etc
+ * @asd_ha: pointer to host adapter structure of interest
+ *
+ * Called from the ISR.  Hard reset the chip.  Let everything
+ * timeout.  This should be no different than hot-unplugging the
+ * host adapter.  Once everything times out we'll init the chip with
+ * a call to asd_init_chip() and enable interrupts with asd_enable_ints().
+ * XXX finish.
+ */
+static void asd_chip_reset(struct asd_ha_struct *asd_ha)
+{
+	struct sas_ha_struct *sas_ha = &asd_ha->sas_ha;
+
+	ASD_DPRINTK("chip reset for %s\n", pci_name(asd_ha->pcidev));
+	asd_chip_hardrst(asd_ha);
+	sas_ha->notify_ha_event(sas_ha, HAE_RESET);
+}
+
+/* ---------- Done List Routines ---------- */
+
+static void asd_dl_tasklet_handler(unsigned long data)
+{
+	struct asd_ha_struct *asd_ha = (struct asd_ha_struct *) data;
+	struct asd_seq_data *seq = &asd_ha->seq;
+	unsigned long flags;
+
+	while (1) {
+		struct done_list_struct *dl = &seq->dl[seq->dl_next];
+		struct asd_ascb *ascb;
+
+		if ((dl->toggle & DL_TOGGLE_MASK) != seq->dl_toggle)
+			break;
+
+		/* find the aSCB */
+		spin_lock_irqsave(&seq->tc_index_lock, flags);
+		ascb = asd_tc_index_find(seq, (int)le16_to_cpu(dl->index));
+		spin_unlock_irqrestore(&seq->tc_index_lock, flags);
+		if (unlikely(!ascb)) {
+			ASD_DPRINTK("BUG:sequencer:dl:no ascb?!\n");
+			goto next_1;
+		} else if (ascb->scb->header.opcode == EMPTY_SCB) {
+			goto out;
+		} else if (!ascb->uldd_timer && !del_timer(&ascb->timer)) {
+			goto next_1;
+		}
+		spin_lock_irqsave(&seq->pend_q_lock, flags);
+		list_del_init(&ascb->list);
+		seq->pending--;
+		spin_unlock_irqrestore(&seq->pend_q_lock, flags);
+	out:
+		ascb->tasklet_complete(ascb, dl);
+
+	next_1:
+		seq->dl_next = (seq->dl_next + 1) & (ASD_DL_SIZE-1);
+		if (!seq->dl_next)
+			seq->dl_toggle ^= DL_TOGGLE_MASK;
+	}
+}
+
+/* ---------- Interrupt Service Routines ---------- */
+
+/**
+ * asd_process_donelist_isr -- schedule processing of done list entries
+ * @asd_ha: pointer to host adapter structure
+ */
+static inline void asd_process_donelist_isr(struct asd_ha_struct *asd_ha)
+{
+	tasklet_schedule(&asd_ha->seq.dl_tasklet);
+}
+
+/**
+ * asd_com_sas_isr -- process device communication interrupt (COMINT)
+ * @asd_ha: pointer to host adapter structure
+ */
+static inline void asd_com_sas_isr(struct asd_ha_struct *asd_ha)
+{
+	u32 comstat = asd_read_reg_dword(asd_ha, COMSTAT);
+
+	/* clear COMSTAT int */
+	asd_write_reg_dword(asd_ha, COMSTAT, 0xFFFFFFFF);
+
+	if (comstat & CSBUFPERR) {
+		asd_printk("%s: command/status buffer dma parity error\n",
+			   pci_name(asd_ha->pcidev));
+	} else if (comstat & CSERR) {
+		int i;
+		u32 dmaerr = asd_read_reg_dword(asd_ha, DMAERR);
+		dmaerr &= 0xFF;
+		asd_printk("%s: command/status dma error, DMAERR: 0x%02x, "
+			   "CSDMAADR: 0x%04x, CSDMAADR+4: 0x%04x\n",
+			   pci_name(asd_ha->pcidev),
+			   dmaerr,
+			   asd_read_reg_dword(asd_ha, CSDMAADR),
+			   asd_read_reg_dword(asd_ha, CSDMAADR+4));
+		asd_printk("CSBUFFER:\n");
+		for (i = 0; i < 8; i++) {
+			asd_printk("%08x %08x %08x %08x\n",
+				   asd_read_reg_dword(asd_ha, CSBUFFER),
+				   asd_read_reg_dword(asd_ha, CSBUFFER+4),
+				   asd_read_reg_dword(asd_ha, CSBUFFER+8),
+				   asd_read_reg_dword(asd_ha, CSBUFFER+12));
+		}
+		asd_dump_seq_state(asd_ha, 0);
+	} else if (comstat & OVLYERR) {
+		u32 dmaerr = asd_read_reg_dword(asd_ha, DMAERR);
+		dmaerr = (dmaerr >> 8) & 0xFF;
+		asd_printk("%s: overlay dma error:0x%x\n",
+			   pci_name(asd_ha->pcidev),
+			   dmaerr);
+	}
+	asd_chip_reset(asd_ha);
+}
+
+static inline void asd_arp2_err(struct asd_ha_struct *asd_ha, u32 dchstatus)
+{
+	static const char *halt_code[256] = {
+		"UNEXPECTED_INTERRUPT0",
+		"UNEXPECTED_INTERRUPT1",
+		"UNEXPECTED_INTERRUPT2",
+		"UNEXPECTED_INTERRUPT3",
+		"UNEXPECTED_INTERRUPT4",
+		"UNEXPECTED_INTERRUPT5",
+		"UNEXPECTED_INTERRUPT6",
+		"UNEXPECTED_INTERRUPT7",
+		"UNEXPECTED_INTERRUPT8",
+		"UNEXPECTED_INTERRUPT9",
+		"UNEXPECTED_INTERRUPT10",
+		[11 ... 19] = "unknown[11,19]",
+		"NO_FREE_SCB_AVAILABLE",
+		"INVALID_SCB_OPCODE",
+		"INVALID_MBX_OPCODE",
+		"INVALID_ATA_STATE",
+		"ATA_QUEUE_FULL",
+		"ATA_TAG_TABLE_FAULT",
+		"ATA_TAG_MASK_FAULT",
+		"BAD_LINK_QUEUE_STATE",
+		"DMA2CHIM_QUEUE_ERROR",
+		"EMPTY_SCB_LIST_FULL",
+		"unknown[30]",
+		"IN_USE_SCB_ON_FREE_LIST",
+		"BAD_OPEN_WAIT_STATE",
+		"INVALID_STP_AFFILIATION",
+		"unknown[34]",
+		"EXEC_QUEUE_ERROR",
+		"TOO_MANY_EMPTIES_NEEDED",
+		"EMPTY_REQ_QUEUE_ERROR",
+		"Q_MONIRTT_MGMT_ERROR",
+		"TARGET_MODE_FLOW_ERROR",
+		"DEVICE_QUEUE_NOT_FOUND",
+		"START_IRTT_TIMER_ERROR",
+		"ABORT_TASK_ILLEGAL_REQ",
+		[43 ... 255] = "unknown[43,255]"
+	};
+
+	if (dchstatus & CSEQINT) {
+		u32 arp2int = asd_read_reg_dword(asd_ha, CARP2INT);
+
+		if (arp2int & (ARP2WAITTO|ARP2ILLOPC|ARP2PERR|ARP2CIOPERR)) {
+			asd_printk("%s: CSEQ arp2int:0x%x\n",
+				   pci_name(asd_ha->pcidev),
+				   arp2int);
+		} else if (arp2int & ARP2HALTC)
+			asd_printk("%s: CSEQ halted: %s\n",
+				   pci_name(asd_ha->pcidev),
+				   halt_code[(arp2int>>16)&0xFF]);
+		else
+			asd_printk("%s: CARP2INT:0x%x\n",
+				   pci_name(asd_ha->pcidev),
+				   arp2int);
+	}
+	if (dchstatus & LSEQINT_MASK) {
+		int lseq;
+		u8  lseq_mask = dchstatus & LSEQINT_MASK;
+
+		for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+			u32 arp2int = asd_read_reg_dword(asd_ha,
+							 LmARP2INT(lseq));
+			if (arp2int & (ARP2WAITTO | ARP2ILLOPC | ARP2PERR
+				       | ARP2CIOPERR)) {
+				asd_printk("%s: LSEQ%d arp2int:0x%x\n",
+					   pci_name(asd_ha->pcidev),
+					   lseq, arp2int);
+				/* XXX we should only do lseq reset */
+			} else if (arp2int & ARP2HALTC)
+				asd_printk("%s: LSEQ%d halted: %s\n",
+					   pci_name(asd_ha->pcidev),
+					   lseq,halt_code[(arp2int>>16)&0xFF]);
+			else
+				asd_printk("%s: LSEQ%d ARP2INT:0x%x\n",
+					   pci_name(asd_ha->pcidev), lseq,
+					   arp2int);
+		}
+	}
+	asd_chip_reset(asd_ha);
+}
+
+/**
+ * asd_dch_sas_isr -- process device channel interrupt (DEVINT)
+ * @asd_ha: pointer to host adapter structure
+ */
+static inline void asd_dch_sas_isr(struct asd_ha_struct *asd_ha)
+{
+	u32 dchstatus = asd_read_reg_dword(asd_ha, DCHSTATUS);
+
+	if (dchstatus & CFIFTOERR) {
+		asd_printk("%s: CFIFTOERR\n", pci_name(asd_ha->pcidev));
+		asd_chip_reset(asd_ha);
+	} else
+		asd_arp2_err(asd_ha, dchstatus);
+}
+
+/**
+ * ads_rbi_exsi_isr -- process external system interface interrupt (INITERR)
+ * @asd_ha: pointer to host adapter structure
+ */
+static inline void asd_rbi_exsi_isr(struct asd_ha_struct *asd_ha)
+{
+	u32 stat0r = asd_read_reg_dword(asd_ha, ASISTAT0R);
+
+	if (!(stat0r & ASIERR)) {
+		asd_printk("hmm, EXSI interrupted but no error?\n");
+		return;
+	}
+
+	if (stat0r & ASIFMTERR) {
+		asd_printk("ASI SEEPROM format error for %s\n",
+			   pci_name(asd_ha->pcidev));
+	} else if (stat0r & ASISEECHKERR) {
+		u32 stat1r = asd_read_reg_dword(asd_ha, ASISTAT1R);
+		asd_printk("ASI SEEPROM checksum 0x%x error for %s\n",
+			   stat1r & CHECKSUM_MASK,
+			   pci_name(asd_ha->pcidev));
+	} else {
+		u32 statr = asd_read_reg_dword(asd_ha, ASIERRSTATR);
+
+		if (!(statr & CPI2ASIMSTERR_MASK)) {
+			ASD_DPRINTK("hmm, ASIERR?\n");
+			return;
+		} else {
+			u32 addr = asd_read_reg_dword(asd_ha, ASIERRADDR);
+			u32 data = asd_read_reg_dword(asd_ha, ASIERRDATAR);
+
+			asd_printk("%s: CPI2 xfer err: addr: 0x%x, wdata: 0x%x, "
+				   "count: 0x%x, byteen: 0x%x, targerr: 0x%x "
+				   "master id: 0x%x, master err: 0x%x\n",
+				   pci_name(asd_ha->pcidev),
+				   addr, data,
+				   (statr & CPI2ASIBYTECNT_MASK) >> 16,
+				   (statr & CPI2ASIBYTEEN_MASK) >> 12,
+				   (statr & CPI2ASITARGERR_MASK) >> 8,
+				   (statr & CPI2ASITARGMID_MASK) >> 4,
+				   (statr & CPI2ASIMSTERR_MASK));
+		}
+	}
+	asd_chip_reset(asd_ha);
+}
+
+/**
+ * asd_hst_pcix_isr -- process host interface interrupts
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Asserted on PCIX errors: target abort, etc.
+ */
+static inline void asd_hst_pcix_isr(struct asd_ha_struct *asd_ha)
+{
+	u16 status;
+	u32 pcix_status;
+	u32 ecc_status;
+
+	pci_read_config_word(asd_ha->pcidev, PCI_STATUS, &status);
+	pci_read_config_dword(asd_ha->pcidev, PCIX_STATUS, &pcix_status);
+	pci_read_config_dword(asd_ha->pcidev, ECC_CTRL_STAT, &ecc_status);
+
+	if (status & PCI_STATUS_DETECTED_PARITY)
+		asd_printk("parity error for %s\n", pci_name(asd_ha->pcidev));
+	else if (status & PCI_STATUS_REC_MASTER_ABORT)
+		asd_printk("master abort for %s\n", pci_name(asd_ha->pcidev));
+	else if (status & PCI_STATUS_REC_TARGET_ABORT)
+		asd_printk("target abort for %s\n", pci_name(asd_ha->pcidev));
+	else if (status & PCI_STATUS_PARITY)
+		asd_printk("data parity for %s\n", pci_name(asd_ha->pcidev));
+	else if (pcix_status & RCV_SCE) {
+		asd_printk("received split completion error for %s\n",
+			   pci_name(asd_ha->pcidev));
+		pci_write_config_dword(asd_ha->pcidev,PCIX_STATUS,pcix_status);
+		/* XXX: Abort task? */
+		return;
+	} else if (pcix_status & UNEXP_SC) {
+		asd_printk("unexpected split completion for %s\n",
+			   pci_name(asd_ha->pcidev));
+		pci_write_config_dword(asd_ha->pcidev,PCIX_STATUS,pcix_status);
+		/* ignore */
+		return;
+	} else if (pcix_status & SC_DISCARD)
+		asd_printk("split completion discarded for %s\n",
+			   pci_name(asd_ha->pcidev));
+	else if (ecc_status & UNCOR_ECCERR)
+		asd_printk("uncorrectable ECC error for %s\n",
+			   pci_name(asd_ha->pcidev));
+	asd_chip_reset(asd_ha);
+}
+
+/**
+ * asd_hw_isr -- host adapter interrupt service routine
+ * @irq: ignored
+ * @dev_id: pointer to host adapter structure
+ * @regs: ignored
+ *
+ * The ISR processes done list entries and level 3 error handling.
+ */
+irqreturn_t asd_hw_isr(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct asd_ha_struct *asd_ha = dev_id;
+	u32 chimint = asd_read_reg_dword(asd_ha, CHIMINT);
+
+	if (!chimint)
+		return IRQ_NONE;
+
+	asd_write_reg_dword(asd_ha, CHIMINT, chimint);
+	(void) asd_read_reg_dword(asd_ha, CHIMINT);
+
+	if (chimint & DLAVAIL)
+		asd_process_donelist_isr(asd_ha);
+	if (chimint & COMINT)
+		asd_com_sas_isr(asd_ha);
+	if (chimint & DEVINT)
+		asd_dch_sas_isr(asd_ha);
+	if (chimint & INITERR)
+		asd_rbi_exsi_isr(asd_ha);
+	if (chimint & HOSTERR)
+		asd_hst_pcix_isr(asd_ha);
+
+	return IRQ_HANDLED;
+}
+
+/* ---------- SCB handling ---------- */
+
+static inline struct asd_ascb *asd_ascb_alloc(struct asd_ha_struct *asd_ha,
+					      unsigned int gfp_flags)
+{
+	extern kmem_cache_t *asd_ascb_cache;
+	struct asd_seq_data *seq = &asd_ha->seq;
+	struct asd_ascb *ascb;
+	unsigned long flags;
+
+	ascb = kmem_cache_alloc(asd_ascb_cache, gfp_flags);
+
+	if (ascb) {
+		memset(ascb, 0, sizeof(*ascb));
+		ascb->dma_scb.size = sizeof(struct scb);
+		ascb->dma_scb.vaddr = dma_pool_alloc(asd_ha->scb_pool,
+						     gfp_flags,
+						    &ascb->dma_scb.dma_handle);
+		if (!ascb->dma_scb.vaddr) {
+			kmem_cache_free(asd_ascb_cache, ascb);
+			return NULL;
+		}
+		memset(ascb->dma_scb.vaddr, 0, sizeof(struct scb));
+		asd_init_ascb(asd_ha, ascb);
+
+		spin_lock_irqsave(&seq->tc_index_lock, flags);
+		ascb->tc_index = asd_tc_index_get(seq, ascb);
+		spin_unlock_irqrestore(&seq->tc_index_lock, flags);
+		if (ascb->tc_index == -1)
+			goto undo;
+
+		ascb->scb->header.index = cpu_to_le16((u16)ascb->tc_index);
+	}
+
+	return ascb;
+undo:
+	dma_pool_free(asd_ha->scb_pool, ascb->dma_scb.vaddr,
+		      ascb->dma_scb.dma_handle);
+	kmem_cache_free(asd_ascb_cache, ascb);
+	ASD_DPRINTK("no index for ascb\n");
+	return NULL;
+}
+
+/**
+ * asd_ascb_alloc_list -- allocate a list of aSCBs
+ * @asd_ha: pointer to host adapter structure
+ * @num: pointer to integer number of aSCBs
+ * @gfp_flags: GFP_ flags.
+ *
+ * This is the only function which is used to allocate aSCBs.
+ * It can allocate one or many. If more than one, then they form
+ * a linked list in two ways: by their list field of the ascb struct
+ * and by the next_scb field of the scb_header.
+ *
+ * Returns NULL if no memory was available, else pointer to a list
+ * of ascbs.  When this function returns, @num would be the number
+ * of SCBs which were not able to be allocated, 0 if all requested
+ * were able to be allocated.
+ */
+struct asd_ascb *asd_ascb_alloc_list(struct asd_ha_struct
+				     *asd_ha, int *num,
+				     unsigned int gfp_flags)
+{
+	struct asd_ascb *first = NULL;
+
+	for ( ; *num > 0; --*num) {
+		struct asd_ascb *ascb = asd_ascb_alloc(asd_ha, gfp_flags);
+
+		if (!ascb)
+			break;
+		else if (!first)
+			first = ascb;
+		else {
+			struct asd_ascb *last = list_entry(first->list.prev,
+							   struct asd_ascb,
+							   list);
+			list_add_tail(&ascb->list, &first->list);
+			last->scb->header.next_scb =
+				cpu_to_le64(((u64)ascb->dma_scb.dma_handle));
+		}
+	}
+
+	return first;
+}
+
+/**
+ * asd_swap_head_scb -- swap the head scb
+ * @asd_ha: pointer to host adapter structure
+ * @ascb: pointer to the head of an ascb list
+ *
+ * The sequencer knows the DMA address of the next SCB to be DMAed to
+ * the host adapter, from initialization or from the last list DMAed.
+ * seq->next_scb keeps the address of this SCB.  The sequencer will
+ * DMA to the host adapter this list of SCBs.  But the head (first
+ * element) of this list is not known to the sequencer.  Here we swap
+ * the head of the list with the known SCB (memcpy()).
+ * Only one memcpy() is required per list so it is in our interest
+ * to keep the list of SCB as long as possible so that the ratio
+ * of number of memcpy calls to the number of SCB DMA-ed is as small
+ * as possible.
+ *
+ * LOCKING: called with the pending list lock held.
+ */
+static inline void asd_swap_head_scb(struct asd_ha_struct *asd_ha,
+				     struct asd_ascb *ascb)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	struct asd_ascb *last = list_entry(ascb->list.prev,
+					   struct asd_ascb,
+					   list);
+	struct asd_dma_tok t = ascb->dma_scb;
+
+	memcpy(seq->next_scb.vaddr, ascb->scb, sizeof(*ascb->scb));
+	ascb->dma_scb = seq->next_scb;
+	ascb->scb = ascb->dma_scb.vaddr;
+	seq->next_scb = t;
+	last->scb->header.next_scb =
+		cpu_to_le64(((u64)seq->next_scb.dma_handle));
+}
+
+/**
+ * asd_start_timers -- (add and) start timers of SCBs
+ * @list: pointer to struct list_head of the scbs
+ * @to: timeout in jiffies
+ *
+ * If an SCB in the @list has no timer function, assign the default
+ * one,  then start the timer of the SCB.  This function is
+ * intended to be called from asd_post_ascb_list(), just prior to
+ * posting the SCBs to the sequencer.
+ */
+static inline void asd_start_scb_timers(struct list_head *list)
+{
+	struct asd_ascb *ascb;
+	list_for_each_entry(ascb, list, list) {
+		if (!ascb->uldd_timer) {
+			ascb->timer.data = (unsigned long) ascb;
+			ascb->timer.function = asd_ascb_timedout;
+			ascb->timer.expires = jiffies + AIC94XX_SCB_TIMEOUT;
+			add_timer(&ascb->timer);
+		}
+	}
+}
+
+/**
+ * asd_post_ascb_list -- post a list of 1 or more aSCBs to the host adapter
+ * @asd_ha: pointer to a host adapter structure
+ * @ascb: pointer to the first aSCB in the list
+ * @num: number of aSCBs in the list (to be posted)
+ *
+ * See queueing comment in asd_post_escb_list().
+ *
+ * Additional note on queuing: In order to minimize the ratio of memcpy()
+ * to the number of ascbs sent, we try to batch-send as many ascbs as possible
+ * in one go.
+ * Two cases are possible:
+ *    A) can_queue >= num,
+ *    B) can_queue < num.
+ * Case A: we can send the whole batch at once.  Increment "pending"
+ * in the beginning of this function, when it is checked, in order to
+ * eliminate races when this function is called by multiple processes.
+ * Case B: should never happen if the managing layer considers
+ * lldd_queue_size.
+ */
+int asd_post_ascb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb,
+		       int num)
+{
+	unsigned long flags;
+	LIST_HEAD(list);
+	int can_queue;
+
+	spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags);
+	can_queue = asd_ha->hw_prof.max_scbs - asd_ha->seq.pending;
+	if (can_queue >= num)
+		asd_ha->seq.pending += num;
+	else
+		can_queue = 0;
+
+	if (!can_queue) {
+		spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+		asd_printk("%s: scb queue full\n", pci_name(asd_ha->pcidev));
+		return -SAS_QUEUE_FULL;
+	}
+
+	asd_swap_head_scb(asd_ha, ascb);
+
+	__list_add(&list, ascb->list.prev, &ascb->list);
+
+	asd_start_scb_timers(&list);
+
+	asd_ha->seq.scbpro += num;
+	list_splice_init(&list, asd_ha->seq.pend_q.prev);
+	asd_write_reg_dword(asd_ha, SCBPRO, (u32)asd_ha->seq.scbpro);
+	spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+
+	return 0;
+}
+
+/**
+ * asd_post_escb_list -- post a list of 1 or more empty scb
+ * @asd_ha: pointer to a host adapter structure
+ * @ascb: pointer to the first empty SCB in the list
+ * @num: number of aSCBs in the list (to be posted)
+ *
+ * This is essentially the same as asd_post_ascb_list, but we do not
+ * increment pending, add those to the pending list or get indexes.
+ * See asd_init_escbs() and asd_init_post_escbs().
+ *
+ * Since sending a list of ascbs is a superset of sending a single
+ * ascb, this function exists to generalize this.  More specifically,
+ * when sending a list of those, we want to do only a _single_
+ * memcpy() at swap head, as opposed to for each ascb sent (in the
+ * case of sending them one by one).  That is, we want to minimize the
+ * ratio of memcpy() operations to the number of ascbs sent.  The same
+ * logic applies to asd_post_ascb_list().
+ */
+int asd_post_escb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb,
+		       int num)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags);
+	asd_swap_head_scb(asd_ha, ascb);
+	asd_ha->seq.scbpro += num;
+	asd_write_reg_dword(asd_ha, SCBPRO, (u32)asd_ha->seq.scbpro);
+	spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+
+	return 0;
+}
+
+/* ---------- LED ---------- */
+
+/**
+ * asd_turn_led -- turn on/off an LED
+ * @asd_ha: pointer to host adapter structure
+ * @phy_id: the PHY id whose LED we want to manupulate
+ * @op: 1 to turn on, 0 to turn off
+ */
+void asd_turn_led(struct asd_ha_struct *asd_ha, int phy_id, int op)
+{
+	if (phy_id < ASD_MAX_PHYS) {
+		u32 v = asd_read_reg_dword(asd_ha, LmCONTROL(phy_id));
+		if (op)
+			v |= LEDPOL;
+		else
+			v &= ~LEDPOL;
+		asd_write_reg_dword(asd_ha, LmCONTROL(phy_id), v);
+	}
+}
+
+/**
+ * asd_control_led -- enable/disable an LED on the board
+ * @asd_ha: pointer to host adapter structure
+ * @phy_id: integer, the phy id
+ * @op: integer, 1 to enable, 0 to disable the LED
+ *
+ * First we output enable the LED, then we set the source
+ * to be an external module.
+ */
+void asd_control_led(struct asd_ha_struct *asd_ha, int phy_id, int op)
+{
+	if (phy_id < ASD_MAX_PHYS) {
+		u32 v;
+
+		v = asd_read_reg_dword(asd_ha, GPIOOER);
+		if (op)
+			v |= (1 << phy_id);
+		else
+			v &= ~(1 << phy_id);
+		asd_write_reg_dword(asd_ha, GPIOOER, v);
+
+		v = asd_read_reg_dword(asd_ha, GPIOCNFGR);
+		if (op)
+			v |= (1 << phy_id);
+		else
+			v &= ~(1 << phy_id);
+		asd_write_reg_dword(asd_ha, GPIOCNFGR, v);
+	}
+}
+
+/* ---------- PHY enable ---------- */
+
+static int asd_enable_phy(struct asd_ha_struct *asd_ha, int phy_id)
+{
+	struct asd_phy *phy = &asd_ha->phys[phy_id];
+
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, INT_ENABLE_2), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, HOT_PLUG_DELAY),
+			   HOTPLUG_DELAY_TIMEOUT);
+
+	/* Get defaults from manuf. sector */
+	/* XXX we need defaults for those in case MS is broken. */
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_0),
+			   phy->phy_desc->phy_control_0);
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_1),
+			   phy->phy_desc->phy_control_1);
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_2),
+			   phy->phy_desc->phy_control_2);
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_3),
+			   phy->phy_desc->phy_control_3);
+
+	asd_write_reg_dword(asd_ha, LmSEQ_TEN_MS_COMINIT_TIMEOUT(phy_id),
+			    ASD_COMINIT_TIMEOUT);
+
+	asd_write_reg_addr(asd_ha, LmSEQ_TX_ID_ADDR_FRAME(phy_id),
+			   phy->id_frm_tok->dma_handle);
+
+	asd_control_led(asd_ha, phy_id, 1);
+
+	return 0;
+}
+
+int asd_enable_phys(struct asd_ha_struct *asd_ha, const u8 phy_mask)
+{
+	u8  phy_m;
+	u8  i;
+	int num = 0, k;
+	struct asd_ascb *ascb;
+	struct asd_ascb *ascb_list;
+
+	if (!phy_mask) {
+		asd_printk("%s called with phy_mask of 0!?\n", __FUNCTION__);
+		return 0;
+	}
+
+	for_each_phy(phy_mask, phy_m, i) {
+		num++;
+		asd_enable_phy(asd_ha, i);
+	}
+
+	k = num;
+	ascb_list = asd_ascb_alloc_list(asd_ha, &k, GFP_KERNEL);
+	if (!ascb_list) {
+		asd_printk("no memory for control phy ascb list\n");
+		return -ENOMEM;
+	}
+	num -= k;
+
+	ascb = ascb_list;
+	for_each_phy(phy_mask, phy_m, i) {
+		asd_build_control_phy(ascb, i, ENABLE_PHY);
+		ascb = list_entry(ascb->list.next, struct asd_ascb, list);
+	}
+	ASD_DPRINTK("posting %d control phy scbs\n", num);
+	k = asd_post_ascb_list(asd_ha, ascb_list, num);
+	if (k)
+		asd_ascb_free_list(ascb_list);
+
+	return k;
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.h b/drivers/scsi/aic94xx/aic94xx_hwi.h
new file mode 100644
index 0000000000000..c7d505388fed2
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_hwi.h
@@ -0,0 +1,397 @@
+/*
+ * Aic94xx SAS/SATA driver hardware interface header file.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef _AIC94XX_HWI_H_
+#define _AIC94XX_HWI_H_
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+
+#include <scsi/libsas.h>
+
+#include "aic94xx.h"
+#include "aic94xx_sas.h"
+
+/* Define ASD_MAX_PHYS to the maximum phys ever. Currently 8. */
+#define ASD_MAX_PHYS       8
+#define ASD_PCBA_SN_SIZE   12
+
+/* Those are to be further named properly, the "RAZORx" part, and
+ * subsequently included in include/linux/pci_ids.h.
+ */
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR10 0x410
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR12 0x412
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR1E 0x41E
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR30 0x430
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR32 0x432
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR3E 0x43E
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR3F 0x43F
+
+struct asd_ha_addrspace {
+	void __iomem  *addr;
+	unsigned long  start;       /* pci resource start */
+	unsigned long  len;         /* pci resource len */
+	unsigned long  flags;       /* pci resource flags */
+
+	/* addresses internal to the host adapter */
+	u32 swa_base; /* mmspace 1 (MBAR1) uses this only */
+	u32 swb_base;
+	u32 swc_base;
+};
+
+struct bios_struct {
+	int    present;
+	u8     maj;
+	u8     min;
+	u32    bld;
+};
+
+struct unit_element_struct {
+	u16    num;
+	u16    size;
+	void   *area;
+};
+
+struct flash_struct {
+	u32    bar;
+	int    present;
+	int    wide;
+	u8     manuf;
+	u8     dev_id;
+	u8     sec_prot;
+
+	u32    dir_offs;
+};
+
+struct asd_phy_desc {
+	/* From CTRL-A settings, then set to what is appropriate */
+	u8     sas_addr[SAS_ADDR_SIZE];
+	u8     max_sas_lrate;
+	u8     min_sas_lrate;
+	u8     max_sata_lrate;
+	u8     min_sata_lrate;
+	u8     flags;
+#define ASD_CRC_DIS  1
+#define ASD_SATA_SPINUP_HOLD 2
+
+	u8     phy_control_0; /* mode 5 reg 0x160 */
+	u8     phy_control_1; /* mode 5 reg 0x161 */
+	u8     phy_control_2; /* mode 5 reg 0x162 */
+	u8     phy_control_3; /* mode 5 reg 0x163 */
+};
+
+struct asd_dma_tok {
+	void *vaddr;
+	dma_addr_t dma_handle;
+	size_t size;
+};
+
+struct hw_profile {
+	struct bios_struct bios;
+	struct unit_element_struct ue;
+	struct flash_struct flash;
+
+	u8     sas_addr[SAS_ADDR_SIZE];
+	char   pcba_sn[ASD_PCBA_SN_SIZE+1];
+
+	u8     enabled_phys;	  /* mask of enabled phys */
+	struct asd_phy_desc phy_desc[ASD_MAX_PHYS];
+	u32    max_scbs;	  /* absolute sequencer scb queue size */
+	struct asd_dma_tok *scb_ext;
+	u32    max_ddbs;
+	struct asd_dma_tok *ddb_ext;
+
+	spinlock_t ddb_lock;
+	void  *ddb_bitmap;
+
+	int    num_phys;	  /* ENABLEABLE */
+	int    max_phys;	  /* REPORTED + ENABLEABLE */
+
+	unsigned addr_range;	  /* max # of addrs; max # of possible ports */
+	unsigned port_name_base;
+	unsigned dev_name_base;
+	unsigned sata_name_base;
+};
+
+struct asd_ascb {
+	struct list_head list;
+	struct asd_ha_struct *ha;
+
+	struct scb *scb;	  /* equals dma_scb->vaddr */
+	struct asd_dma_tok dma_scb;
+	struct asd_dma_tok *sg_arr;
+
+	void (*tasklet_complete)(struct asd_ascb *, struct done_list_struct *);
+	u8     uldd_timer:1;
+
+	/* internally generated command */
+	struct timer_list timer;
+	struct completion completion;
+	u8        tag_valid:1;
+	__be16    tag;		  /* error recovery only */
+
+	/* If this is an Empty SCB, index of first edb in seq->edb_arr. */
+	int    edb_index;
+
+	/* Used by the timer timeout function. */
+	int    tc_index;
+
+	void   *uldd_task;
+};
+
+#define ASD_DL_SIZE_BITS   0x8
+#define ASD_DL_SIZE        (1<<(2+ASD_DL_SIZE_BITS))
+#define ASD_DEF_DL_TOGGLE  0x01
+
+struct asd_seq_data {
+	spinlock_t pend_q_lock;
+	u16    scbpro;
+	int    pending;
+	struct list_head pend_q;
+	int    can_queue;	  /* per adapter */
+	struct asd_dma_tok next_scb; /* next scb to be delivered to CSEQ */
+
+	spinlock_t tc_index_lock;
+	void **tc_index_array;
+	void *tc_index_bitmap;
+	int   tc_index_bitmap_bits;
+
+	struct tasklet_struct dl_tasklet;
+	struct done_list_struct *dl; /* array of done list entries, equals */
+	struct asd_dma_tok *actual_dl; /* actual_dl->vaddr */
+	int    dl_toggle;
+	int    dl_next;
+
+	int    num_edbs;
+	struct asd_dma_tok **edb_arr;
+	int    num_escbs;
+	struct asd_ascb **escb_arr; /* array of pointers to escbs */
+};
+
+/* This is the Host Adapter structure.  It describes the hardware
+ * SAS adapter.
+ */
+struct asd_ha_struct {
+	struct pci_dev   *pcidev;
+	const char       *name;
+
+	struct sas_ha_struct sas_ha;
+
+	u8                revision_id;
+
+	int               iospace;
+	spinlock_t        iolock;
+	struct asd_ha_addrspace io_handle[2];
+
+	struct hw_profile hw_prof;
+
+	struct asd_phy    phys[ASD_MAX_PHYS];
+	struct asd_sas_port   ports[ASD_MAX_PHYS];
+
+	struct dma_pool  *scb_pool;
+
+	struct asd_seq_data  seq; /* sequencer related */
+};
+
+/* ---------- Common macros ---------- */
+
+#define ASD_BUSADDR_LO(__dma_handle) ((u32)(__dma_handle))
+#define ASD_BUSADDR_HI(__dma_handle) (((sizeof(dma_addr_t))==8)     \
+                                    ? ((u32)((__dma_handle) >> 32)) \
+                                    : ((u32)0))
+
+#define dev_to_asd_ha(__dev)  pci_get_drvdata(to_pci_dev(__dev))
+#define SCB_SITE_VALID(__site_no) (((__site_no) & 0xF0FF) != 0x00FF   \
+				 && ((__site_no) & 0xF0FF) > 0x001F)
+/* For each bit set in __lseq_mask, set __lseq to equal the bit
+ * position of the set bit and execute the statement following.
+ * __mc is the temporary mask, used as a mask "counter".
+ */
+#define for_each_sequencer(__lseq_mask, __mc, __lseq)                        \
+	for ((__mc)=(__lseq_mask),(__lseq)=0;(__mc)!=0;(__lseq++),(__mc)>>=1)\
+		if (((__mc) & 1))
+#define for_each_phy(__lseq_mask, __mc, __lseq)                              \
+	for ((__mc)=(__lseq_mask),(__lseq)=0;(__mc)!=0;(__lseq++),(__mc)>>=1)\
+		if (((__mc) & 1))
+
+#define PHY_ENABLED(_HA, _I) ((_HA)->hw_prof.enabled_phys & (1<<(_I)))
+
+/* ---------- DMA allocs ---------- */
+
+static inline struct asd_dma_tok *asd_dmatok_alloc(unsigned int flags)
+{
+	return kmem_cache_alloc(asd_dma_token_cache, flags);
+}
+
+static inline void asd_dmatok_free(struct asd_dma_tok *token)
+{
+	kmem_cache_free(asd_dma_token_cache, token);
+}
+
+static inline struct asd_dma_tok *asd_alloc_coherent(struct asd_ha_struct *
+						     asd_ha, size_t size,
+						     unsigned int flags)
+{
+	struct asd_dma_tok *token = asd_dmatok_alloc(flags);
+	if (token) {
+		token->size = size;
+		token->vaddr = dma_alloc_coherent(&asd_ha->pcidev->dev,
+						  token->size,
+						  &token->dma_handle,
+						  flags);
+		if (!token->vaddr) {
+			asd_dmatok_free(token);
+			token = NULL;
+		}
+	}
+	return token;
+}
+
+static inline void asd_free_coherent(struct asd_ha_struct *asd_ha,
+				     struct asd_dma_tok *token)
+{
+	if (token) {
+		dma_free_coherent(&asd_ha->pcidev->dev, token->size,
+				  token->vaddr, token->dma_handle);
+		asd_dmatok_free(token);
+	}
+}
+
+static inline void asd_init_ascb(struct asd_ha_struct *asd_ha,
+				 struct asd_ascb *ascb)
+{
+	INIT_LIST_HEAD(&ascb->list);
+	ascb->scb = ascb->dma_scb.vaddr;
+	ascb->ha = asd_ha;
+	ascb->timer.function = NULL;
+	init_timer(&ascb->timer);
+	ascb->tc_index = -1;
+	init_completion(&ascb->completion);
+}
+
+/* Must be called with the tc_index_lock held!
+ */
+static inline void asd_tc_index_release(struct asd_seq_data *seq, int index)
+{
+	seq->tc_index_array[index] = NULL;
+	clear_bit(index, seq->tc_index_bitmap);
+}
+
+/* Must be called with the tc_index_lock held!
+ */
+static inline int asd_tc_index_get(struct asd_seq_data *seq, void *ptr)
+{
+	int index;
+
+	index = find_first_zero_bit(seq->tc_index_bitmap,
+				    seq->tc_index_bitmap_bits);
+	if (index == seq->tc_index_bitmap_bits)
+		return -1;
+
+	seq->tc_index_array[index] = ptr;
+	set_bit(index, seq->tc_index_bitmap);
+
+	return index;
+}
+
+/* Must be called with the tc_index_lock held!
+ */
+static inline void *asd_tc_index_find(struct asd_seq_data *seq, int index)
+{
+	return seq->tc_index_array[index];
+}
+
+/**
+ * asd_ascb_free -- free a single aSCB after is has completed
+ * @ascb: pointer to the aSCB of interest
+ *
+ * This frees an aSCB after it has been executed/completed by
+ * the sequencer.
+ */
+static inline void asd_ascb_free(struct asd_ascb *ascb)
+{
+	if (ascb) {
+		struct asd_ha_struct *asd_ha = ascb->ha;
+		unsigned long flags;
+
+		BUG_ON(!list_empty(&ascb->list));
+		spin_lock_irqsave(&ascb->ha->seq.tc_index_lock, flags);
+		asd_tc_index_release(&ascb->ha->seq, ascb->tc_index);
+		spin_unlock_irqrestore(&ascb->ha->seq.tc_index_lock, flags);
+		dma_pool_free(asd_ha->scb_pool, ascb->dma_scb.vaddr,
+			      ascb->dma_scb.dma_handle);
+		kmem_cache_free(asd_ascb_cache, ascb);
+	}
+}
+
+/**
+ * asd_ascb_list_free -- free a list of ascbs
+ * @ascb_list: a list of ascbs
+ *
+ * This function will free a list of ascbs allocated by asd_ascb_alloc_list.
+ * It is used when say the scb queueing function returned QUEUE_FULL,
+ * and we do not need the ascbs any more.
+ */
+static inline void asd_ascb_free_list(struct asd_ascb *ascb_list)
+{
+	LIST_HEAD(list);
+	struct list_head *n, *pos;
+
+	__list_add(&list, ascb_list->list.prev, &ascb_list->list);
+	list_for_each_safe(pos, n, &list) {
+		list_del_init(pos);
+		asd_ascb_free(list_entry(pos, struct asd_ascb, list));
+	}
+}
+
+/* ---------- Function declarations ---------- */
+
+int  asd_init_hw(struct asd_ha_struct *asd_ha);
+irqreturn_t asd_hw_isr(int irq, void *dev_id, struct pt_regs *regs);
+
+
+struct asd_ascb *asd_ascb_alloc_list(struct asd_ha_struct
+				     *asd_ha, int *num,
+				     unsigned int gfp_mask);
+
+int  asd_post_ascb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb,
+			int num);
+int  asd_post_escb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb,
+			int num);
+
+int  asd_init_post_escbs(struct asd_ha_struct *asd_ha);
+void asd_build_control_phy(struct asd_ascb *ascb, int phy_id, u8 subfunc);
+void asd_control_led(struct asd_ha_struct *asd_ha, int phy_id, int op);
+void asd_turn_led(struct asd_ha_struct *asd_ha, int phy_id, int op);
+int  asd_enable_phys(struct asd_ha_struct *asd_ha, const u8 phy_mask);
+void asd_build_initiate_link_adm_task(struct asd_ascb *ascb, int phy_id,
+				      u8 subfunc);
+
+void asd_ascb_timedout(unsigned long data);
+int  asd_chip_hardrst(struct asd_ha_struct *asd_ha);
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
new file mode 100644
index 0000000000000..3ec2e46f80c61
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_init.c
@@ -0,0 +1,860 @@
+/*
+ * Aic94xx SAS/SATA driver initialization.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include <scsi/scsi_host.h>
+
+#include "aic94xx.h"
+#include "aic94xx_reg.h"
+#include "aic94xx_hwi.h"
+#include "aic94xx_seq.h"
+
+/* The format is "version.release.patchlevel" */
+#define ASD_DRIVER_VERSION "1.0.2"
+
+static int use_msi = 0;
+module_param_named(use_msi, use_msi, int, S_IRUGO);
+MODULE_PARM_DESC(use_msi, "\n"
+	"\tEnable(1) or disable(0) using PCI MSI.\n"
+	"\tDefault: 0");
+
+static int lldd_max_execute_num = 0;
+module_param_named(collector, lldd_max_execute_num, int, S_IRUGO);
+MODULE_PARM_DESC(collector, "\n"
+	"\tIf greater than one, tells the SAS Layer to run in Task Collector\n"
+	"\tMode.  If 1 or 0, tells the SAS Layer to run in Direct Mode.\n"
+	"\tThe aic94xx SAS LLDD supports both modes.\n"
+	"\tDefault: 0 (Direct Mode).\n");
+
+char sas_addr_str[2*SAS_ADDR_SIZE + 1] = "";
+
+static struct scsi_transport_template *aic94xx_transport_template;
+
+static struct scsi_host_template aic94xx_sht = {
+	.module			= THIS_MODULE,
+	/* .name is initialized */
+	.name			= "aic94xx",
+	.queuecommand		= sas_queuecommand,
+	.target_alloc		= sas_target_alloc,
+	.slave_configure	= sas_slave_configure,
+	.slave_destroy		= sas_slave_destroy,
+	.change_queue_depth	= sas_change_queue_depth,
+	.change_queue_type	= sas_change_queue_type,
+	.bios_param		= sas_bios_param,
+	.can_queue		= 1,
+	.cmd_per_lun		= 1,
+	.this_id		= -1,
+	.sg_tablesize		= SG_ALL,
+	.max_sectors		= SCSI_DEFAULT_MAX_SECTORS,
+	.use_clustering		= ENABLE_CLUSTERING,
+};
+
+static int __devinit asd_map_memio(struct asd_ha_struct *asd_ha)
+{
+	int err, i;
+	struct asd_ha_addrspace *io_handle;
+
+	asd_ha->iospace = 0;
+	for (i = 0; i < 3; i += 2) {
+		io_handle = &asd_ha->io_handle[i==0?0:1];
+		io_handle->start = pci_resource_start(asd_ha->pcidev, i);
+		io_handle->len   = pci_resource_len(asd_ha->pcidev, i);
+		io_handle->flags = pci_resource_flags(asd_ha->pcidev, i);
+		err = -ENODEV;
+		if (!io_handle->start || !io_handle->len) {
+			asd_printk("MBAR%d start or length for %s is 0.\n",
+				   i==0?0:1, pci_name(asd_ha->pcidev));
+			goto Err;
+		}
+		err = pci_request_region(asd_ha->pcidev, i, ASD_DRIVER_NAME);
+		if (err) {
+			asd_printk("couldn't reserve memory region for %s\n",
+				   pci_name(asd_ha->pcidev));
+			goto Err;
+		}
+		if (io_handle->flags & IORESOURCE_CACHEABLE)
+			io_handle->addr = ioremap(io_handle->start,
+						  io_handle->len);
+		else
+			io_handle->addr = ioremap_nocache(io_handle->start,
+							  io_handle->len);
+		if (!io_handle->addr) {
+			asd_printk("couldn't map MBAR%d of %s\n", i==0?0:1,
+				   pci_name(asd_ha->pcidev));
+			goto Err_unreq;
+		}
+	}
+
+	return 0;
+Err_unreq:
+	pci_release_region(asd_ha->pcidev, i);
+Err:
+	if (i > 0) {
+		io_handle = &asd_ha->io_handle[0];
+		iounmap(io_handle->addr);
+		pci_release_region(asd_ha->pcidev, 0);
+	}
+	return err;
+}
+
+static void __devexit asd_unmap_memio(struct asd_ha_struct *asd_ha)
+{
+	struct asd_ha_addrspace *io_handle;
+
+	io_handle = &asd_ha->io_handle[1];
+	iounmap(io_handle->addr);
+	pci_release_region(asd_ha->pcidev, 2);
+
+	io_handle = &asd_ha->io_handle[0];
+	iounmap(io_handle->addr);
+	pci_release_region(asd_ha->pcidev, 0);
+}
+
+static int __devinit asd_map_ioport(struct asd_ha_struct *asd_ha)
+{
+	int i = PCI_IOBAR_OFFSET, err;
+	struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[0];
+
+	asd_ha->iospace = 1;
+	io_handle->start = pci_resource_start(asd_ha->pcidev, i);
+	io_handle->len   = pci_resource_len(asd_ha->pcidev, i);
+	io_handle->flags = pci_resource_flags(asd_ha->pcidev, i);
+	io_handle->addr  = (void __iomem *) io_handle->start;
+	if (!io_handle->start || !io_handle->len) {
+		asd_printk("couldn't get IO ports for %s\n",
+			   pci_name(asd_ha->pcidev));
+		return -ENODEV;
+	}
+	err = pci_request_region(asd_ha->pcidev, i, ASD_DRIVER_NAME);
+	if (err) {
+		asd_printk("couldn't reserve io space for %s\n",
+			   pci_name(asd_ha->pcidev));
+	}
+
+	return err;
+}
+
+static void __devexit asd_unmap_ioport(struct asd_ha_struct *asd_ha)
+{
+	pci_release_region(asd_ha->pcidev, PCI_IOBAR_OFFSET);
+}
+
+static int __devinit asd_map_ha(struct asd_ha_struct *asd_ha)
+{
+	int err;
+	u16 cmd_reg;
+
+	err = pci_read_config_word(asd_ha->pcidev, PCI_COMMAND, &cmd_reg);
+	if (err) {
+		asd_printk("couldn't read command register of %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto Err;
+	}
+
+	err = -ENODEV;
+	if (cmd_reg & PCI_COMMAND_MEMORY) {
+		if ((err = asd_map_memio(asd_ha)))
+			goto Err;
+	} else if (cmd_reg & PCI_COMMAND_IO) {
+		if ((err = asd_map_ioport(asd_ha)))
+			goto Err;
+		asd_printk("%s ioport mapped -- upgrade your hardware\n",
+			   pci_name(asd_ha->pcidev));
+	} else {
+		asd_printk("no proper device access to %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto Err;
+	}
+
+	return 0;
+Err:
+	return err;
+}
+
+static void __devexit asd_unmap_ha(struct asd_ha_struct *asd_ha)
+{
+	if (asd_ha->iospace)
+		asd_unmap_ioport(asd_ha);
+	else
+		asd_unmap_memio(asd_ha);
+}
+
+static const char *asd_dev_rev[30] = {
+	[0] = "A0",
+	[1] = "A1",
+	[8] = "B0",
+};
+
+static int __devinit asd_common_setup(struct asd_ha_struct *asd_ha)
+{
+	int err, i;
+
+	err = pci_read_config_byte(asd_ha->pcidev, PCI_REVISION_ID,
+				   &asd_ha->revision_id);
+	if (err) {
+		asd_printk("couldn't read REVISION ID register of %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto Err;
+	}
+	err = -ENODEV;
+	if (asd_ha->revision_id < AIC9410_DEV_REV_B0) {
+		asd_printk("%s is revision %s (%X), which is not supported\n",
+			   pci_name(asd_ha->pcidev),
+			   asd_dev_rev[asd_ha->revision_id],
+			   asd_ha->revision_id);
+		goto Err;
+	}
+	/* Provide some sane default values. */
+	asd_ha->hw_prof.max_scbs = 512;
+	asd_ha->hw_prof.max_ddbs = 128;
+	asd_ha->hw_prof.num_phys = ASD_MAX_PHYS;
+	/* All phys are enabled, by default. */
+	asd_ha->hw_prof.enabled_phys = 0xFF;
+	for (i = 0; i < ASD_MAX_PHYS; i++) {
+		asd_ha->hw_prof.phy_desc[i].max_sas_lrate = PHY_LINKRATE_3;
+		asd_ha->hw_prof.phy_desc[i].min_sas_lrate = PHY_LINKRATE_1_5;
+		asd_ha->hw_prof.phy_desc[i].max_sata_lrate= PHY_LINKRATE_1_5;
+		asd_ha->hw_prof.phy_desc[i].min_sata_lrate= PHY_LINKRATE_1_5;
+	}
+
+	return 0;
+Err:
+	return err;
+}
+
+static int __devinit asd_aic9410_setup(struct asd_ha_struct *asd_ha)
+{
+	int err = asd_common_setup(asd_ha);
+
+	if (err)
+		return err;
+
+	asd_ha->hw_prof.addr_range = 8;
+	asd_ha->hw_prof.port_name_base = 0;
+	asd_ha->hw_prof.dev_name_base = 8;
+	asd_ha->hw_prof.sata_name_base = 16;
+
+	return 0;
+}
+
+static int __devinit asd_aic9405_setup(struct asd_ha_struct *asd_ha)
+{
+	int err = asd_common_setup(asd_ha);
+
+	if (err)
+		return err;
+
+	asd_ha->hw_prof.addr_range = 4;
+	asd_ha->hw_prof.port_name_base = 0;
+	asd_ha->hw_prof.dev_name_base = 4;
+	asd_ha->hw_prof.sata_name_base = 8;
+
+	return 0;
+}
+
+static ssize_t asd_show_dev_rev(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct asd_ha_struct *asd_ha = dev_to_asd_ha(dev);
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			asd_dev_rev[asd_ha->revision_id]);
+}
+static DEVICE_ATTR(revision, S_IRUGO, asd_show_dev_rev, NULL);
+
+static ssize_t asd_show_dev_bios_build(struct device *dev,
+				       struct device_attribute *attr,char *buf)
+{
+	struct asd_ha_struct *asd_ha = dev_to_asd_ha(dev);
+	return snprintf(buf, PAGE_SIZE, "%d\n", asd_ha->hw_prof.bios.bld);
+}
+static DEVICE_ATTR(bios_build, S_IRUGO, asd_show_dev_bios_build, NULL);
+
+static ssize_t asd_show_dev_pcba_sn(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct asd_ha_struct *asd_ha = dev_to_asd_ha(dev);
+	return snprintf(buf, PAGE_SIZE, "%s\n", asd_ha->hw_prof.pcba_sn);
+}
+static DEVICE_ATTR(pcba_sn, S_IRUGO, asd_show_dev_pcba_sn, NULL);
+
+static void asd_create_dev_attrs(struct asd_ha_struct *asd_ha)
+{
+	device_create_file(&asd_ha->pcidev->dev, &dev_attr_revision);
+	device_create_file(&asd_ha->pcidev->dev, &dev_attr_bios_build);
+	device_create_file(&asd_ha->pcidev->dev, &dev_attr_pcba_sn);
+}
+
+static void asd_remove_dev_attrs(struct asd_ha_struct *asd_ha)
+{
+	device_remove_file(&asd_ha->pcidev->dev, &dev_attr_revision);
+	device_remove_file(&asd_ha->pcidev->dev, &dev_attr_bios_build);
+	device_remove_file(&asd_ha->pcidev->dev, &dev_attr_pcba_sn);
+}
+
+/* The first entry, 0, is used for dynamic ids, the rest for devices
+ * we know about.
+ */
+static struct asd_pcidev_struct {
+	const char * name;
+	int (*setup)(struct asd_ha_struct *asd_ha);
+} asd_pcidev_data[] = {
+	/* Id 0 is used for dynamic ids. */
+	{ .name  = "Adaptec AIC-94xx SAS/SATA Host Adapter",
+	  .setup = asd_aic9410_setup
+	},
+	{ .name  = "Adaptec AIC-9410W SAS/SATA Host Adapter",
+	  .setup = asd_aic9410_setup
+	},
+	{ .name  = "Adaptec AIC-9405W SAS/SATA Host Adapter",
+	  .setup = asd_aic9405_setup
+	},
+};
+
+static inline int asd_create_ha_caches(struct asd_ha_struct *asd_ha)
+{
+	asd_ha->scb_pool = dma_pool_create(ASD_DRIVER_NAME "_scb_pool",
+					   &asd_ha->pcidev->dev,
+					   sizeof(struct scb),
+					   8, 0);
+	if (!asd_ha->scb_pool) {
+		asd_printk("couldn't create scb pool\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * asd_free_edbs -- free empty data buffers
+ * asd_ha: pointer to host adapter structure
+ */
+static inline void asd_free_edbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int i;
+
+	for (i = 0; i < seq->num_edbs; i++)
+		asd_free_coherent(asd_ha, seq->edb_arr[i]);
+	kfree(seq->edb_arr);
+	seq->edb_arr = NULL;
+}
+
+static inline void asd_free_escbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int i;
+
+	for (i = 0; i < seq->num_escbs; i++) {
+		if (!list_empty(&seq->escb_arr[i]->list))
+			list_del_init(&seq->escb_arr[i]->list);
+
+		asd_ascb_free(seq->escb_arr[i]);
+	}
+	kfree(seq->escb_arr);
+	seq->escb_arr = NULL;
+}
+
+static inline void asd_destroy_ha_caches(struct asd_ha_struct *asd_ha)
+{
+	int i;
+
+	if (asd_ha->hw_prof.ddb_ext)
+		asd_free_coherent(asd_ha, asd_ha->hw_prof.ddb_ext);
+	if (asd_ha->hw_prof.scb_ext)
+		asd_free_coherent(asd_ha, asd_ha->hw_prof.scb_ext);
+
+	if (asd_ha->hw_prof.ddb_bitmap)
+		kfree(asd_ha->hw_prof.ddb_bitmap);
+	asd_ha->hw_prof.ddb_bitmap = NULL;
+
+	for (i = 0; i < ASD_MAX_PHYS; i++) {
+		struct asd_phy *phy = &asd_ha->phys[i];
+
+		asd_free_coherent(asd_ha, phy->id_frm_tok);
+	}
+	if (asd_ha->seq.escb_arr)
+		asd_free_escbs(asd_ha);
+	if (asd_ha->seq.edb_arr)
+		asd_free_edbs(asd_ha);
+	if (asd_ha->hw_prof.ue.area) {
+		kfree(asd_ha->hw_prof.ue.area);
+		asd_ha->hw_prof.ue.area = NULL;
+	}
+	if (asd_ha->seq.tc_index_array) {
+		kfree(asd_ha->seq.tc_index_array);
+		kfree(asd_ha->seq.tc_index_bitmap);
+		asd_ha->seq.tc_index_array = NULL;
+		asd_ha->seq.tc_index_bitmap = NULL;
+	}
+	if (asd_ha->seq.actual_dl) {
+			asd_free_coherent(asd_ha, asd_ha->seq.actual_dl);
+			asd_ha->seq.actual_dl = NULL;
+			asd_ha->seq.dl = NULL;
+	}
+	if (asd_ha->seq.next_scb.vaddr) {
+		dma_pool_free(asd_ha->scb_pool, asd_ha->seq.next_scb.vaddr,
+			      asd_ha->seq.next_scb.dma_handle);
+		asd_ha->seq.next_scb.vaddr = NULL;
+	}
+	dma_pool_destroy(asd_ha->scb_pool);
+	asd_ha->scb_pool = NULL;
+}
+
+kmem_cache_t *asd_dma_token_cache;
+kmem_cache_t *asd_ascb_cache;
+
+static int asd_create_global_caches(void)
+{
+	if (!asd_dma_token_cache) {
+		asd_dma_token_cache
+			= kmem_cache_create(ASD_DRIVER_NAME "_dma_token",
+					    sizeof(struct asd_dma_tok),
+					    0,
+					    SLAB_HWCACHE_ALIGN,
+					    NULL, NULL);
+		if (!asd_dma_token_cache) {
+			asd_printk("couldn't create dma token cache\n");
+			return -ENOMEM;
+		}
+	}
+
+	if (!asd_ascb_cache) {
+		asd_ascb_cache = kmem_cache_create(ASD_DRIVER_NAME "_ascb",
+						   sizeof(struct asd_ascb),
+						   0,
+						   SLAB_HWCACHE_ALIGN,
+						   NULL, NULL);
+		if (!asd_ascb_cache) {
+			asd_printk("couldn't create ascb cache\n");
+			goto Err;
+		}
+	}
+
+	return 0;
+Err:
+	kmem_cache_destroy(asd_dma_token_cache);
+	asd_dma_token_cache = NULL;
+	return -ENOMEM;
+}
+
+static void asd_destroy_global_caches(void)
+{
+	if (asd_dma_token_cache)
+		kmem_cache_destroy(asd_dma_token_cache);
+	asd_dma_token_cache = NULL;
+
+	if (asd_ascb_cache)
+		kmem_cache_destroy(asd_ascb_cache);
+	asd_ascb_cache = NULL;
+}
+
+static int asd_register_sas_ha(struct asd_ha_struct *asd_ha)
+{
+	int i;
+	struct asd_sas_phy   **sas_phys =
+		kmalloc(ASD_MAX_PHYS * sizeof(struct asd_sas_phy), GFP_KERNEL);
+	struct asd_sas_port  **sas_ports =
+		kmalloc(ASD_MAX_PHYS * sizeof(struct asd_sas_port), GFP_KERNEL);
+
+	if (!sas_phys || !sas_ports) {
+		kfree(sas_phys);
+		kfree(sas_ports);
+		return -ENOMEM;
+	}
+
+	asd_ha->sas_ha.sas_ha_name = (char *) asd_ha->name;
+	asd_ha->sas_ha.lldd_module = THIS_MODULE;
+	asd_ha->sas_ha.sas_addr = &asd_ha->hw_prof.sas_addr[0];
+
+	for (i = 0; i < ASD_MAX_PHYS; i++) {
+		sas_phys[i] = &asd_ha->phys[i].sas_phy;
+		sas_ports[i] = &asd_ha->ports[i];
+	}
+
+	asd_ha->sas_ha.sas_phy = sas_phys;
+	asd_ha->sas_ha.sas_port= sas_ports;
+	asd_ha->sas_ha.num_phys= ASD_MAX_PHYS;
+
+	asd_ha->sas_ha.lldd_queue_size = asd_ha->seq.can_queue;
+
+	return sas_register_ha(&asd_ha->sas_ha);
+}
+
+static int asd_unregister_sas_ha(struct asd_ha_struct *asd_ha)
+{
+	int err;
+
+	err = sas_unregister_ha(&asd_ha->sas_ha);
+
+	sas_remove_host(asd_ha->sas_ha.core.shost);
+	scsi_remove_host(asd_ha->sas_ha.core.shost);
+	scsi_host_put(asd_ha->sas_ha.core.shost);
+
+	kfree(asd_ha->sas_ha.sas_phy);
+	kfree(asd_ha->sas_ha.sas_port);
+
+	return err;
+}
+
+static int __devinit asd_pci_probe(struct pci_dev *dev,
+				   const struct pci_device_id *id)
+{
+	struct asd_pcidev_struct *asd_dev;
+	unsigned asd_id = (unsigned) id->driver_data;
+	struct asd_ha_struct *asd_ha;
+	struct Scsi_Host *shost;
+	int err;
+
+	if (asd_id >= ARRAY_SIZE(asd_pcidev_data)) {
+		asd_printk("wrong driver_data in PCI table\n");
+		return -ENODEV;
+	}
+
+	if ((err = pci_enable_device(dev))) {
+		asd_printk("couldn't enable device %s\n", pci_name(dev));
+		return err;
+	}
+
+	pci_set_master(dev);
+
+	err = -ENOMEM;
+
+	shost = scsi_host_alloc(&aic94xx_sht, sizeof(void *));
+	if (!shost)
+		goto Err;
+
+	asd_dev = &asd_pcidev_data[asd_id];
+
+	asd_ha = kzalloc(sizeof(*asd_ha), GFP_KERNEL);
+	if (!asd_ha) {
+		asd_printk("out of memory\n");
+		goto Err;
+	}
+	asd_ha->pcidev = dev;
+	asd_ha->sas_ha.pcidev = asd_ha->pcidev;
+	asd_ha->sas_ha.lldd_ha = asd_ha;
+
+	asd_ha->name = asd_dev->name;
+	asd_printk("found %s, device %s\n", asd_ha->name, pci_name(dev));
+
+	SHOST_TO_SAS_HA(shost) = &asd_ha->sas_ha;
+	asd_ha->sas_ha.core.shost = shost;
+	shost->transportt = aic94xx_transport_template;
+	shost->max_id = ~0;
+	shost->max_lun = ~0;
+	shost->max_cmd_len = 16;
+
+	err = scsi_add_host(shost, &dev->dev);
+	if (err) {
+		scsi_host_put(shost);
+		goto Err_free;
+	}
+
+
+
+	err = asd_dev->setup(asd_ha);
+	if (err)
+		goto Err_free;
+
+	err = -ENODEV;
+	if (!pci_set_dma_mask(dev, DMA_64BIT_MASK)
+	    && !pci_set_consistent_dma_mask(dev, DMA_64BIT_MASK))
+		;
+	else if (!pci_set_dma_mask(dev, DMA_32BIT_MASK)
+		 && !pci_set_consistent_dma_mask(dev, DMA_32BIT_MASK))
+		;
+	else {
+		asd_printk("no suitable DMA mask for %s\n", pci_name(dev));
+		goto Err_free;
+	}
+
+	pci_set_drvdata(dev, asd_ha);
+
+	err = asd_map_ha(asd_ha);
+	if (err)
+		goto Err_free;
+
+	err = asd_create_ha_caches(asd_ha);
+        if (err)
+		goto Err_unmap;
+
+	err = asd_init_hw(asd_ha);
+	if (err)
+		goto Err_free_cache;
+
+	asd_printk("device %s: SAS addr %llx, PCBA SN %s, %d phys, %d enabled "
+		   "phys, flash %s, BIOS %s%d\n",
+		   pci_name(dev), SAS_ADDR(asd_ha->hw_prof.sas_addr),
+		   asd_ha->hw_prof.pcba_sn, asd_ha->hw_prof.max_phys,
+		   asd_ha->hw_prof.num_phys,
+		   asd_ha->hw_prof.flash.present ? "present" : "not present",
+		   asd_ha->hw_prof.bios.present ? "build " : "not present",
+		   asd_ha->hw_prof.bios.bld);
+
+	if (use_msi)
+		pci_enable_msi(asd_ha->pcidev);
+
+	err = request_irq(asd_ha->pcidev->irq, asd_hw_isr, SA_SHIRQ,
+			  ASD_DRIVER_NAME, asd_ha);
+	if (err) {
+		asd_printk("couldn't get irq %d for %s\n",
+			   asd_ha->pcidev->irq, pci_name(asd_ha->pcidev));
+		goto Err_irq;
+	}
+	asd_enable_ints(asd_ha);
+
+	err = asd_init_post_escbs(asd_ha);
+	if (err) {
+		asd_printk("couldn't post escbs for %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto Err_escbs;
+	}
+	ASD_DPRINTK("escbs posted\n");
+
+	asd_create_dev_attrs(asd_ha);
+
+	err = asd_register_sas_ha(asd_ha);
+	if (err)
+		goto Err_reg_sas;
+
+	err = asd_enable_phys(asd_ha, asd_ha->hw_prof.enabled_phys);
+	if (err) {
+		asd_printk("coudln't enable phys, err:%d\n", err);
+		goto Err_en_phys;
+	}
+	ASD_DPRINTK("enabled phys\n");
+	/* give the phy enabling interrupt event time to come in (1s
+	 * is empirically about all it takes) */
+	ssleep(1);
+	/* Wait for discovery to finish */
+	scsi_flush_work(asd_ha->sas_ha.core.shost);
+
+	return 0;
+Err_en_phys:
+	asd_unregister_sas_ha(asd_ha);
+Err_reg_sas:
+	asd_remove_dev_attrs(asd_ha);
+Err_escbs:
+	asd_disable_ints(asd_ha);
+	free_irq(dev->irq, asd_ha);
+Err_irq:
+	if (use_msi)
+		pci_disable_msi(dev);
+	asd_chip_hardrst(asd_ha);
+Err_free_cache:
+	asd_destroy_ha_caches(asd_ha);
+Err_unmap:
+	asd_unmap_ha(asd_ha);
+Err_free:
+	kfree(asd_ha);
+	scsi_remove_host(shost);
+Err:
+	pci_disable_device(dev);
+	return err;
+}
+
+static void asd_free_queues(struct asd_ha_struct *asd_ha)
+{
+	unsigned long flags;
+	LIST_HEAD(pending);
+	struct list_head *n, *pos;
+
+	spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags);
+	asd_ha->seq.pending = 0;
+	list_splice_init(&asd_ha->seq.pend_q, &pending);
+	spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+
+	if (!list_empty(&pending))
+		ASD_DPRINTK("Uh-oh! Pending is not empty!\n");
+
+	list_for_each_safe(pos, n, &pending) {
+		struct asd_ascb *ascb = list_entry(pos, struct asd_ascb, list);
+		list_del_init(pos);
+		ASD_DPRINTK("freeing from pending\n");
+		asd_ascb_free(ascb);
+	}
+}
+
+static void asd_turn_off_leds(struct asd_ha_struct *asd_ha)
+{
+	u8 phy_mask = asd_ha->hw_prof.enabled_phys;
+	u8 i;
+
+	for_each_phy(phy_mask, phy_mask, i) {
+		asd_turn_led(asd_ha, i, 0);
+		asd_control_led(asd_ha, i, 0);
+	}
+}
+
+static void __devexit asd_pci_remove(struct pci_dev *dev)
+{
+	struct asd_ha_struct *asd_ha = pci_get_drvdata(dev);
+
+	if (!asd_ha)
+		return;
+
+	asd_unregister_sas_ha(asd_ha);
+
+	asd_disable_ints(asd_ha);
+
+	asd_remove_dev_attrs(asd_ha);
+
+	/* XXX more here as needed */
+
+	free_irq(dev->irq, asd_ha);
+	if (use_msi)
+		pci_disable_msi(asd_ha->pcidev);
+	asd_turn_off_leds(asd_ha);
+	asd_chip_hardrst(asd_ha);
+	asd_free_queues(asd_ha);
+	asd_destroy_ha_caches(asd_ha);
+	asd_unmap_ha(asd_ha);
+	kfree(asd_ha);
+	pci_disable_device(dev);
+	return;
+}
+
+static ssize_t asd_version_show(struct device_driver *driver, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", ASD_DRIVER_VERSION);
+}
+static DRIVER_ATTR(version, S_IRUGO, asd_version_show, NULL);
+
+static void asd_create_driver_attrs(struct device_driver *driver)
+{
+	driver_create_file(driver, &driver_attr_version);
+}
+
+static void asd_remove_driver_attrs(struct device_driver *driver)
+{
+	driver_remove_file(driver, &driver_attr_version);
+}
+
+static struct sas_domain_function_template aic94xx_transport_functions = {
+	.lldd_port_formed	= asd_update_port_links,
+
+	.lldd_dev_found		= asd_dev_found,
+	.lldd_dev_gone		= asd_dev_gone,
+
+	.lldd_execute_task	= asd_execute_task,
+
+	.lldd_abort_task	= asd_abort_task,
+	.lldd_abort_task_set	= asd_abort_task_set,
+	.lldd_clear_aca		= asd_clear_aca,
+	.lldd_clear_task_set	= asd_clear_task_set,
+	.lldd_I_T_nexus_reset	= NULL,
+	.lldd_lu_reset		= asd_lu_reset,
+	.lldd_query_task	= asd_query_task,
+
+	.lldd_clear_nexus_port	= asd_clear_nexus_port,
+	.lldd_clear_nexus_ha	= asd_clear_nexus_ha,
+
+	.lldd_control_phy	= asd_control_phy,
+};
+
+static const struct pci_device_id aic94xx_pci_table[] __devinitdata = {
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR10),
+	 0, 0, 1},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR12),
+	 0, 0, 1},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR1E),
+	 0, 0, 1},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR30),
+	 0, 0, 2},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR32),
+	 0, 0, 2},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR3E),
+	 0, 0, 2},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR3F),
+	 0, 0, 2},
+	{}
+};
+
+MODULE_DEVICE_TABLE(pci, aic94xx_pci_table);
+
+static struct pci_driver aic94xx_pci_driver = {
+	.name		= ASD_DRIVER_NAME,
+	.id_table	= aic94xx_pci_table,
+	.probe		= asd_pci_probe,
+	.remove		= __devexit_p(asd_pci_remove),
+};
+
+static int __init aic94xx_init(void)
+{
+	int err;
+
+
+	asd_printk("%s version %s loaded\n", ASD_DRIVER_DESCRIPTION,
+		   ASD_DRIVER_VERSION);
+
+	err = asd_create_global_caches();
+	if (err)
+		return err;
+
+	aic94xx_transport_template =
+		sas_domain_attach_transport(&aic94xx_transport_functions);
+	if (err)
+		goto out_destroy_caches;
+
+	err = pci_register_driver(&aic94xx_pci_driver);
+	if (err)
+		goto out_release_transport;
+
+	asd_create_driver_attrs(&aic94xx_pci_driver.driver);
+
+	return err;
+
+ out_release_transport:
+	sas_release_transport(aic94xx_transport_template);
+ out_destroy_caches:
+	asd_destroy_global_caches();
+
+	return err;
+}
+
+static void __exit aic94xx_exit(void)
+{
+	asd_remove_driver_attrs(&aic94xx_pci_driver.driver);
+	pci_unregister_driver(&aic94xx_pci_driver);
+	sas_release_transport(aic94xx_transport_template);
+	asd_destroy_global_caches();
+	asd_printk("%s version %s unloaded\n", ASD_DRIVER_DESCRIPTION,
+		   ASD_DRIVER_VERSION);
+}
+
+module_init(aic94xx_init);
+module_exit(aic94xx_exit);
+
+MODULE_AUTHOR("Luben Tuikov <luben_tuikov@adaptec.com>");
+MODULE_DESCRIPTION(ASD_DRIVER_DESCRIPTION);
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(ASD_DRIVER_VERSION);
diff --git a/drivers/scsi/aic94xx/aic94xx_reg.c b/drivers/scsi/aic94xx/aic94xx_reg.c
new file mode 100644
index 0000000000000..f210dac3203df
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_reg.c
@@ -0,0 +1,332 @@
+/*
+ * Aic94xx SAS/SATA driver register access.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+#include "aic94xx_reg.h"
+#include "aic94xx.h"
+
+/* Writing to device address space.
+ * Offset comes before value to remind that the operation of
+ * this function is *offs = val.
+ */
+static inline void asd_write_byte(struct asd_ha_struct *asd_ha,
+				  unsigned long offs, u8 val)
+{
+	if (unlikely(asd_ha->iospace))
+		outb(val,
+		     (unsigned long)asd_ha->io_handle[0].addr + (offs & 0xFF));
+	else
+		writeb(val, asd_ha->io_handle[0].addr + offs);
+	wmb();
+}
+
+static inline void asd_write_word(struct asd_ha_struct *asd_ha,
+				  unsigned long offs, u16 val)
+{
+	if (unlikely(asd_ha->iospace))
+		outw(val,
+		     (unsigned long)asd_ha->io_handle[0].addr + (offs & 0xFF));
+	else
+		writew(val, asd_ha->io_handle[0].addr + offs);
+	wmb();
+}
+
+static inline void asd_write_dword(struct asd_ha_struct *asd_ha,
+				   unsigned long offs, u32 val)
+{
+	if (unlikely(asd_ha->iospace))
+		outl(val,
+		     (unsigned long)asd_ha->io_handle[0].addr + (offs & 0xFF));
+	else
+		writel(val, asd_ha->io_handle[0].addr + offs);
+	wmb();
+}
+
+/* Reading from device address space.
+ */
+static inline u8 asd_read_byte(struct asd_ha_struct *asd_ha,
+			       unsigned long offs)
+{
+	u8 val;
+	if (unlikely(asd_ha->iospace))
+		val = inb((unsigned long) asd_ha->io_handle[0].addr
+			  + (offs & 0xFF));
+	else
+		val = readb(asd_ha->io_handle[0].addr + offs);
+	rmb();
+	return val;
+}
+
+static inline u16 asd_read_word(struct asd_ha_struct *asd_ha,
+				unsigned long offs)
+{
+	u16 val;
+	if (unlikely(asd_ha->iospace))
+		val = inw((unsigned long)asd_ha->io_handle[0].addr
+			  + (offs & 0xFF));
+	else
+		val = readw(asd_ha->io_handle[0].addr + offs);
+	rmb();
+	return val;
+}
+
+static inline u32 asd_read_dword(struct asd_ha_struct *asd_ha,
+				 unsigned long offs)
+{
+	u32 val;
+	if (unlikely(asd_ha->iospace))
+		val = inl((unsigned long) asd_ha->io_handle[0].addr
+			  + (offs & 0xFF));
+	else
+		val = readl(asd_ha->io_handle[0].addr + offs);
+	rmb();
+	return val;
+}
+
+static inline u32 asd_mem_offs_swa(void)
+{
+	return 0;
+}
+
+static inline u32 asd_mem_offs_swc(void)
+{
+	return asd_mem_offs_swa() + MBAR0_SWA_SIZE;
+}
+
+static inline u32 asd_mem_offs_swb(void)
+{
+	return asd_mem_offs_swc() + MBAR0_SWC_SIZE + 0x20;
+}
+
+/* We know that the register wanted is in the range
+ * of the sliding window.
+ */
+#define ASD_READ_SW(ww, type, ord)                                     \
+static inline type asd_read_##ww##_##ord (struct asd_ha_struct *asd_ha,\
+					  u32 reg)                     \
+{                                                                      \
+	struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[0];    \
+	u32 map_offs=(reg - io_handle-> ww##_base )+asd_mem_offs_##ww ();\
+	return asd_read_##ord (asd_ha, (unsigned long) map_offs);      \
+}
+
+#define ASD_WRITE_SW(ww, type, ord)                                    \
+static inline void asd_write_##ww##_##ord (struct asd_ha_struct *asd_ha,\
+				  u32 reg, type val)                   \
+{                                                                      \
+	struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[0];    \
+	u32 map_offs=(reg - io_handle-> ww##_base )+asd_mem_offs_##ww ();\
+	asd_write_##ord (asd_ha, (unsigned long) map_offs, val);       \
+}
+
+ASD_READ_SW(swa, u8,  byte);
+ASD_READ_SW(swa, u16, word);
+ASD_READ_SW(swa, u32, dword);
+
+ASD_READ_SW(swb, u8,  byte);
+ASD_READ_SW(swb, u16, word);
+ASD_READ_SW(swb, u32, dword);
+
+ASD_READ_SW(swc, u8,  byte);
+ASD_READ_SW(swc, u16, word);
+ASD_READ_SW(swc, u32, dword);
+
+ASD_WRITE_SW(swa, u8,  byte);
+ASD_WRITE_SW(swa, u16, word);
+ASD_WRITE_SW(swa, u32, dword);
+
+ASD_WRITE_SW(swb, u8,  byte);
+ASD_WRITE_SW(swb, u16, word);
+ASD_WRITE_SW(swb, u32, dword);
+
+ASD_WRITE_SW(swc, u8,  byte);
+ASD_WRITE_SW(swc, u16, word);
+ASD_WRITE_SW(swc, u32, dword);
+
+/*
+ * A word about sliding windows:
+ * MBAR0 is divided into sliding windows A, C and B, in that order.
+ * SWA starts at offset 0 of MBAR0, up to 0x57, with size 0x58 bytes.
+ * SWC starts at offset 0x58 of MBAR0, up to 0x60, with size 0x8 bytes.
+ * From 0x60 to 0x7F, we have a copy of PCI config space 0x60-0x7F.
+ * SWB starts at offset 0x80 of MBAR0 and extends to the end of MBAR0.
+ * See asd_init_sw() in aic94xx_hwi.c
+ *
+ * We map the most common registers we'd access of the internal 4GB
+ * host adapter memory space.  If a register/internal memory location
+ * is wanted which is not mapped, we slide SWB, by paging it,
+ * see asd_move_swb() in aic94xx_reg.c.
+ */
+
+/**
+ * asd_move_swb -- move sliding window B
+ * @asd_ha: pointer to host adapter structure
+ * @reg: register desired to be within range of the new window
+ */
+static inline void asd_move_swb(struct asd_ha_struct *asd_ha, u32 reg)
+{
+	u32 base = reg & ~(MBAR0_SWB_SIZE-1);
+	pci_write_config_dword(asd_ha->pcidev, PCI_CONF_MBAR0_SWB, base);
+	asd_ha->io_handle[0].swb_base = base;
+}
+
+static void __asd_write_reg_byte(struct asd_ha_struct *asd_ha, u32 reg, u8 val)
+{
+	struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0];
+	BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR);
+	if (io_handle->swa_base <= reg
+	    && reg < io_handle->swa_base + MBAR0_SWA_SIZE)
+		asd_write_swa_byte (asd_ha, reg,val);
+	else if (io_handle->swb_base <= reg
+		 && reg < io_handle->swb_base + MBAR0_SWB_SIZE)
+		asd_write_swb_byte (asd_ha, reg, val);
+	else if (io_handle->swc_base <= reg
+		 && reg < io_handle->swc_base + MBAR0_SWC_SIZE)
+		asd_write_swc_byte (asd_ha, reg, val);
+	else {
+		/* Ok, we have to move SWB */
+		asd_move_swb(asd_ha, reg);
+		asd_write_swb_byte (asd_ha, reg, val);
+	}
+}
+
+#define ASD_WRITE_REG(type, ord)                                  \
+void asd_write_reg_##ord (struct asd_ha_struct *asd_ha, u32 reg, type val)\
+{                                                                 \
+	struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0]; \
+	unsigned long flags;                                      \
+	BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR);         \
+	spin_lock_irqsave(&asd_ha->iolock, flags);                \
+	if (io_handle->swa_base <= reg                            \
+	    && reg < io_handle->swa_base + MBAR0_SWA_SIZE)        \
+		asd_write_swa_##ord (asd_ha, reg,val);            \
+	else if (io_handle->swb_base <= reg                       \
+		 && reg < io_handle->swb_base + MBAR0_SWB_SIZE)   \
+		asd_write_swb_##ord (asd_ha, reg, val);           \
+	else if (io_handle->swc_base <= reg                       \
+		 && reg < io_handle->swc_base + MBAR0_SWC_SIZE)   \
+		asd_write_swc_##ord (asd_ha, reg, val);           \
+	else {                                                    \
+		/* Ok, we have to move SWB */                     \
+		asd_move_swb(asd_ha, reg);                        \
+		asd_write_swb_##ord (asd_ha, reg, val);           \
+	}                                                         \
+	spin_unlock_irqrestore(&asd_ha->iolock, flags);           \
+}
+
+ASD_WRITE_REG(u8, byte);
+ASD_WRITE_REG(u16,word);
+ASD_WRITE_REG(u32,dword);
+
+static u8 __asd_read_reg_byte(struct asd_ha_struct *asd_ha, u32 reg)
+{
+	struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0];
+	u8 val;
+	BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR);
+	if (io_handle->swa_base <= reg
+	    && reg < io_handle->swa_base + MBAR0_SWA_SIZE)
+		val = asd_read_swa_byte (asd_ha, reg);
+	else if (io_handle->swb_base <= reg
+		 && reg < io_handle->swb_base + MBAR0_SWB_SIZE)
+		val = asd_read_swb_byte (asd_ha, reg);
+	else if (io_handle->swc_base <= reg
+		 && reg < io_handle->swc_base + MBAR0_SWC_SIZE)
+		val = asd_read_swc_byte (asd_ha, reg);
+	else {
+		/* Ok, we have to move SWB */
+		asd_move_swb(asd_ha, reg);
+		val = asd_read_swb_byte (asd_ha, reg);
+	}
+	return val;
+}
+
+#define ASD_READ_REG(type, ord)                                   \
+type asd_read_reg_##ord (struct asd_ha_struct *asd_ha, u32 reg)   \
+{                                                                 \
+	struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0]; \
+	type val;                                                 \
+	unsigned long flags;                                      \
+	BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR);         \
+	spin_lock_irqsave(&asd_ha->iolock, flags);                \
+	if (io_handle->swa_base <= reg                            \
+	    && reg < io_handle->swa_base + MBAR0_SWA_SIZE)        \
+		val = asd_read_swa_##ord (asd_ha, reg);           \
+	else if (io_handle->swb_base <= reg                       \
+		 && reg < io_handle->swb_base + MBAR0_SWB_SIZE)   \
+		val = asd_read_swb_##ord (asd_ha, reg);           \
+	else if (io_handle->swc_base <= reg                       \
+		 && reg < io_handle->swc_base + MBAR0_SWC_SIZE)   \
+		val = asd_read_swc_##ord (asd_ha, reg);           \
+	else {                                                    \
+		/* Ok, we have to move SWB */                     \
+		asd_move_swb(asd_ha, reg);                        \
+		val = asd_read_swb_##ord (asd_ha, reg);           \
+	}                                                         \
+	spin_unlock_irqrestore(&asd_ha->iolock, flags);           \
+	return val;                                               \
+}
+
+ASD_READ_REG(u8, byte);
+ASD_READ_REG(u16,word);
+ASD_READ_REG(u32,dword);
+
+/**
+ * asd_read_reg_string -- read a string of bytes from io space memory
+ * @asd_ha: pointer to host adapter structure
+ * @dst: pointer to a destination buffer where data will be written to
+ * @offs: start offset (register) to read from
+ * @count: number of bytes to read
+ */
+void asd_read_reg_string(struct asd_ha_struct *asd_ha, void *dst,
+			 u32 offs, int count)
+{
+	u8 *p = dst;
+	unsigned long flags;
+
+	spin_lock_irqsave(&asd_ha->iolock, flags);
+	for ( ; count > 0; count--, offs++, p++)
+		*p = __asd_read_reg_byte(asd_ha, offs);
+	spin_unlock_irqrestore(&asd_ha->iolock, flags);
+}
+
+/**
+ * asd_write_reg_string -- write a string of bytes to io space memory
+ * @asd_ha: pointer to host adapter structure
+ * @src: pointer to source buffer where data will be read from
+ * @offs: start offset (register) to write to
+ * @count: number of bytes to write
+ */
+void asd_write_reg_string(struct asd_ha_struct *asd_ha, void *src,
+			  u32 offs, int count)
+{
+	u8 *p = src;
+	unsigned long flags;
+
+	spin_lock_irqsave(&asd_ha->iolock, flags);
+	for ( ; count > 0; count--, offs++, p++)
+		__asd_write_reg_byte(asd_ha, offs, *p);
+	spin_unlock_irqrestore(&asd_ha->iolock, flags);
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_reg.h b/drivers/scsi/aic94xx/aic94xx_reg.h
new file mode 100644
index 0000000000000..2279307fd27ed
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_reg.h
@@ -0,0 +1,302 @@
+/*
+ * Aic94xx SAS/SATA driver hardware registers definitions.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef _AIC94XX_REG_H_
+#define _AIC94XX_REG_H_
+
+#include <asm/io.h>
+#include "aic94xx_hwi.h"
+
+/* Values */
+#define AIC9410_DEV_REV_B0            0x8
+
+/* MBAR0, SWA, SWB, SWC, internal memory space addresses */
+#define REG_BASE_ADDR                 0xB8000000
+#define REG_BASE_ADDR_CSEQCIO         0xB8002000
+#define REG_BASE_ADDR_EXSI            0xB8042800
+
+#define MBAR0_SWA_SIZE                0x58
+extern  u32    MBAR0_SWB_SIZE;
+#define MBAR0_SWC_SIZE                0x8
+
+/* MBAR1, points to On Chip Memory */
+#define OCM_BASE_ADDR                 0xA0000000
+#define OCM_MAX_SIZE                  0x20000
+
+/* Smallest address possible to reference */
+#define ALL_BASE_ADDR                 OCM_BASE_ADDR
+
+/* PCI configuration space registers */
+#define PCI_IOBAR_OFFSET              4
+
+#define PCI_CONF_MBAR1                0x6C
+#define PCI_CONF_MBAR0_SWA            0x70
+#define PCI_CONF_MBAR0_SWB            0x74
+#define PCI_CONF_MBAR0_SWC            0x78
+#define PCI_CONF_MBAR_KEY             0x7C
+#define PCI_CONF_FLSH_BAR             0xB8
+
+#include "aic94xx_reg_def.h"
+
+u8  asd_read_reg_byte(struct asd_ha_struct *asd_ha, u32 reg);
+u16 asd_read_reg_word(struct asd_ha_struct *asd_ha, u32 reg);
+u32 asd_read_reg_dword(struct asd_ha_struct *asd_ha, u32 reg);
+
+void asd_write_reg_byte(struct asd_ha_struct *asd_ha, u32 reg, u8 val);
+void asd_write_reg_word(struct asd_ha_struct *asd_ha, u32 reg, u16 val);
+void asd_write_reg_dword(struct asd_ha_struct *asd_ha, u32 reg, u32 val);
+
+void asd_read_reg_string(struct asd_ha_struct *asd_ha, void *dst,
+			 u32 offs, int count);
+void asd_write_reg_string(struct asd_ha_struct *asd_ha, void *src,
+			  u32 offs, int count);
+
+#define ASD_READ_OCM(type, ord, S)                                    \
+static inline type asd_read_ocm_##ord (struct asd_ha_struct *asd_ha,  \
+					 u32 offs)                    \
+{                                                                     \
+	struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[1];   \
+	type val = read##S (io_handle->addr + (unsigned long) offs);  \
+	rmb();                                                        \
+	return val;                                                   \
+}
+
+ASD_READ_OCM(u8, byte, b);
+ASD_READ_OCM(u16,word, w);
+ASD_READ_OCM(u32,dword,l);
+
+#define ASD_WRITE_OCM(type, ord, S)                                    \
+static inline void asd_write_ocm_##ord (struct asd_ha_struct *asd_ha,  \
+					 u32 offs, type val)          \
+{                                                                     \
+	struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[1];   \
+	write##S (val, io_handle->addr + (unsigned long) offs);       \
+	return;                                                       \
+}
+
+ASD_WRITE_OCM(u8, byte, b);
+ASD_WRITE_OCM(u16,word, w);
+ASD_WRITE_OCM(u32,dword,l);
+
+#define ASD_DDBSITE_READ(type, ord)                                        \
+static inline type asd_ddbsite_read_##ord (struct asd_ha_struct *asd_ha,   \
+					   u16 ddb_site_no,                \
+					   u16 offs)                       \
+{                                                                          \
+	asd_write_reg_word(asd_ha, ALTCIOADR, MnDDB_SITE + offs);          \
+	asd_write_reg_word(asd_ha, ADDBPTR, ddb_site_no);                  \
+	return asd_read_reg_##ord (asd_ha, CTXACCESS);                     \
+}
+
+ASD_DDBSITE_READ(u32, dword);
+ASD_DDBSITE_READ(u16, word);
+
+static inline u8 asd_ddbsite_read_byte(struct asd_ha_struct *asd_ha,
+				       u16 ddb_site_no,
+				       u16 offs)
+{
+	if (offs & 1)
+		return asd_ddbsite_read_word(asd_ha, ddb_site_no,
+					     offs & ~1) >> 8;
+	else
+		return asd_ddbsite_read_word(asd_ha, ddb_site_no,
+					     offs) & 0xFF;
+}
+
+
+#define ASD_DDBSITE_WRITE(type, ord)                                       \
+static inline void asd_ddbsite_write_##ord (struct asd_ha_struct *asd_ha,  \
+					u16 ddb_site_no,                   \
+					u16 offs, type val)                \
+{                                                                          \
+	asd_write_reg_word(asd_ha, ALTCIOADR, MnDDB_SITE + offs);          \
+	asd_write_reg_word(asd_ha, ADDBPTR, ddb_site_no);                  \
+	asd_write_reg_##ord (asd_ha, CTXACCESS, val);                      \
+}
+
+ASD_DDBSITE_WRITE(u32, dword);
+ASD_DDBSITE_WRITE(u16, word);
+
+static inline void asd_ddbsite_write_byte(struct asd_ha_struct *asd_ha,
+					  u16 ddb_site_no,
+					  u16 offs, u8 val)
+{
+	u16 base = offs & ~1;
+	u16 rval = asd_ddbsite_read_word(asd_ha, ddb_site_no, base);
+	if (offs & 1)
+		rval = (val << 8) | (rval & 0xFF);
+	else
+		rval = (rval & 0xFF00) | val;
+	asd_ddbsite_write_word(asd_ha, ddb_site_no, base, rval);
+}
+
+
+#define ASD_SCBSITE_READ(type, ord)                                        \
+static inline type asd_scbsite_read_##ord (struct asd_ha_struct *asd_ha,   \
+					   u16 scb_site_no,                \
+					   u16 offs)                       \
+{                                                                          \
+	asd_write_reg_word(asd_ha, ALTCIOADR, MnSCB_SITE + offs);          \
+	asd_write_reg_word(asd_ha, ASCBPTR, scb_site_no);                  \
+	return asd_read_reg_##ord (asd_ha, CTXACCESS);                     \
+}
+
+ASD_SCBSITE_READ(u32, dword);
+ASD_SCBSITE_READ(u16, word);
+
+static inline u8 asd_scbsite_read_byte(struct asd_ha_struct *asd_ha,
+				       u16 scb_site_no,
+				       u16 offs)
+{
+	if (offs & 1)
+		return asd_scbsite_read_word(asd_ha, scb_site_no,
+					     offs & ~1) >> 8;
+	else
+		return asd_scbsite_read_word(asd_ha, scb_site_no,
+					     offs) & 0xFF;
+}
+
+
+#define ASD_SCBSITE_WRITE(type, ord)                                       \
+static inline void asd_scbsite_write_##ord (struct asd_ha_struct *asd_ha,  \
+					u16 scb_site_no,                   \
+					u16 offs, type val)                \
+{                                                                          \
+	asd_write_reg_word(asd_ha, ALTCIOADR, MnSCB_SITE + offs);          \
+	asd_write_reg_word(asd_ha, ASCBPTR, scb_site_no);                  \
+	asd_write_reg_##ord (asd_ha, CTXACCESS, val);                      \
+}
+
+ASD_SCBSITE_WRITE(u32, dword);
+ASD_SCBSITE_WRITE(u16, word);
+
+static inline void asd_scbsite_write_byte(struct asd_ha_struct *asd_ha,
+					  u16 scb_site_no,
+					  u16 offs, u8 val)
+{
+	u16 base = offs & ~1;
+	u16 rval = asd_scbsite_read_word(asd_ha, scb_site_no, base);
+	if (offs & 1)
+		rval = (val << 8) | (rval & 0xFF);
+	else
+		rval = (rval & 0xFF00) | val;
+	asd_scbsite_write_word(asd_ha, scb_site_no, base, rval);
+}
+
+/**
+ * asd_ddbsite_update_word -- atomically update a word in a ddb site
+ * @asd_ha: pointer to host adapter structure
+ * @ddb_site_no: the DDB site number
+ * @offs: the offset into the DDB
+ * @oldval: old value found in that offset
+ * @newval: the new value to replace it
+ *
+ * This function is used when the sequencers are running and we need to
+ * update a DDB site atomically without expensive pausing and upausing
+ * of the sequencers and accessing the DDB site through the CIO bus.
+ *
+ * Return 0 on success; -EFAULT on parity error; -EAGAIN if the old value
+ * is different than the current value at that offset.
+ */
+static inline int asd_ddbsite_update_word(struct asd_ha_struct *asd_ha,
+					  u16 ddb_site_no, u16 offs,
+					  u16 oldval, u16 newval)
+{
+	u8  done;
+	u16 oval = asd_ddbsite_read_word(asd_ha, ddb_site_no, offs);
+	if (oval != oldval)
+		return -EAGAIN;
+	asd_write_reg_word(asd_ha, AOLDDATA, oldval);
+	asd_write_reg_word(asd_ha, ANEWDATA, newval);
+	do {
+		done = asd_read_reg_byte(asd_ha, ATOMICSTATCTL);
+	} while (!(done & ATOMICDONE));
+	if (done & ATOMICERR)
+		return -EFAULT;	  /* parity error */
+	else if (done & ATOMICWIN)
+		return 0;	  /* success */
+	else
+		return -EAGAIN;	  /* oldval different than current value */
+}
+
+static inline int asd_ddbsite_update_byte(struct asd_ha_struct *asd_ha,
+					  u16 ddb_site_no, u16 offs,
+					  u8 _oldval, u8 _newval)
+{
+	u16 base = offs & ~1;
+	u16 oval;
+	u16 nval = asd_ddbsite_read_word(asd_ha, ddb_site_no, base);
+	if (offs & 1) {
+		if ((nval >> 8) != _oldval)
+			return -EAGAIN;
+		nval = (_newval << 8) | (nval & 0xFF);
+		oval = (_oldval << 8) | (nval & 0xFF);
+	} else {
+		if ((nval & 0xFF) != _oldval)
+			return -EAGAIN;
+		nval = (nval & 0xFF00) | _newval;
+		oval = (nval & 0xFF00) | _oldval;
+	}
+	return asd_ddbsite_update_word(asd_ha, ddb_site_no, base, oval, nval);
+}
+
+static inline void asd_write_reg_addr(struct asd_ha_struct *asd_ha, u32 reg,
+				      dma_addr_t dma_handle)
+{
+	asd_write_reg_dword(asd_ha, reg,   ASD_BUSADDR_LO(dma_handle));
+	asd_write_reg_dword(asd_ha, reg+4, ASD_BUSADDR_HI(dma_handle));
+}
+
+static inline u32 asd_get_cmdctx_size(struct asd_ha_struct *asd_ha)
+{
+	/* DCHREVISION returns 0, possibly broken */
+	u32 ctxmemsize = asd_read_reg_dword(asd_ha, LmMnINT(0,0)) & CTXMEMSIZE;
+	return ctxmemsize ? 65536 : 32768;
+}
+
+static inline u32 asd_get_devctx_size(struct asd_ha_struct *asd_ha)
+{
+	u32 ctxmemsize = asd_read_reg_dword(asd_ha, LmMnINT(0,0)) & CTXMEMSIZE;
+	return ctxmemsize ? 8192 : 4096;
+}
+
+static inline void asd_disable_ints(struct asd_ha_struct *asd_ha)
+{
+	asd_write_reg_dword(asd_ha, CHIMINTEN, RST_CHIMINTEN);
+}
+
+static inline void asd_enable_ints(struct asd_ha_struct *asd_ha)
+{
+	/* Enable COM SAS interrupt on errors, COMSTAT */
+	asd_write_reg_dword(asd_ha, COMSTATEN,
+			    EN_CSBUFPERR | EN_CSERR | EN_OVLYERR);
+	/* Enable DCH SAS CFIFTOERR */
+	asd_write_reg_dword(asd_ha, DCHSTATUS, EN_CFIFTOERR);
+	/* Enable Host Device interrupts */
+	asd_write_reg_dword(asd_ha, CHIMINTEN, SET_CHIMINTEN);
+}
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_reg_def.h b/drivers/scsi/aic94xx/aic94xx_reg_def.h
new file mode 100644
index 0000000000000..b79f45f3ad47d
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_reg_def.h
@@ -0,0 +1,2398 @@
+/*
+ * Aic94xx SAS/SATA driver hardware registers defintions.
+ *
+ * Copyright (C) 2004 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2004 David Chaw <david_chaw@adaptec.com>
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * Luben Tuikov: Some register value updates to make it work with the window
+ * agnostic register r/w functions.  Some register corrections, sizes,
+ * etc.
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * $Id: //depot/aic94xx/aic94xx_reg_def.h#27 $
+ *
+ */
+
+#ifndef _ADP94XX_REG_DEF_H_
+#define _ADP94XX_REG_DEF_H_
+
+/*
+ * Common definitions.
+ */
+#define CSEQ_MODE_PAGE_SIZE	0x200		/* CSEQ mode page size */
+#define LmSEQ_MODE_PAGE_SIZE	0x200		/* LmSEQ mode page size */
+#define LmSEQ_HOST_REG_SIZE   	0x4000		/* LmSEQ Host Register size */
+
+/********************* COM_SAS registers definition *************************/
+
+/* The base is REG_BASE_ADDR, defined in aic94xx_reg.h.
+ */
+
+/*
+ * CHIM Registers, Address Range : (0x00-0xFF)
+ */
+#define COMBIST		(REG_BASE_ADDR + 0x00)
+
+/* bits 31:24 */
+#define		L7BLKRST		0x80000000
+#define		L6BLKRST		0x40000000
+#define		L5BLKRST		0x20000000
+#define		L4BLKRST		0x10000000
+#define		L3BLKRST		0x08000000
+#define		L2BLKRST		0x04000000
+#define		L1BLKRST		0x02000000
+#define		L0BLKRST		0x01000000
+#define		LmBLKRST		0xFF000000
+#define LmBLKRST_COMBIST(phyid)		(1 << (24 + phyid))
+
+#define		OCMBLKRST		0x00400000
+#define		CTXMEMBLKRST		0x00200000
+#define		CSEQBLKRST		0x00100000
+#define		EXSIBLKRST		0x00040000
+#define		DPIBLKRST		0x00020000
+#define		DFIFBLKRST		0x00010000
+#define		HARDRST			0x00000200
+#define		COMBLKRST		0x00000100
+#define		FRCDFPERR		0x00000080
+#define		FRCCIOPERR		0x00000020
+#define		FRCBISTERR		0x00000010
+#define		COMBISTEN		0x00000004
+#define		COMBISTDONE		0x00000002	/* ro */
+#define 	COMBISTFAIL		0x00000001	/* ro */
+
+#define COMSTAT		(REG_BASE_ADDR + 0x04)
+
+#define		REQMBXREAD		0x00000040
+#define 	RSPMBXAVAIL		0x00000020
+#define 	CSBUFPERR		0x00000008
+#define		OVLYERR			0x00000004
+#define 	CSERR			0x00000002
+#define		OVLYDMADONE		0x00000001
+
+#define		COMSTAT_MASK		(REQMBXREAD | RSPMBXAVAIL | \
+					 CSBUFPERR | OVLYERR | CSERR |\
+					 OVLYDMADONE)
+
+#define COMSTATEN	(REG_BASE_ADDR + 0x08)
+
+#define		EN_REQMBXREAD		0x00000040
+#define		EN_RSPMBXAVAIL		0x00000020
+#define		EN_CSBUFPERR		0x00000008
+#define		EN_OVLYERR		0x00000004
+#define		EN_CSERR		0x00000002
+#define		EN_OVLYDONE		0x00000001
+
+#define SCBPRO		(REG_BASE_ADDR + 0x0C)
+
+#define		SCBCONS_MASK		0xFFFF0000
+#define		SCBPRO_MASK		0x0000FFFF
+
+#define CHIMREQMBX	(REG_BASE_ADDR + 0x10)
+
+#define CHIMRSPMBX	(REG_BASE_ADDR + 0x14)
+
+#define CHIMINT		(REG_BASE_ADDR + 0x18)
+
+#define		EXT_INT0		0x00000800
+#define		EXT_INT1		0x00000400
+#define		PORRSTDET		0x00000200
+#define		HARDRSTDET		0x00000100
+#define		DLAVAILQ		0x00000080	/* ro */
+#define		HOSTERR			0x00000040
+#define		INITERR			0x00000020
+#define		DEVINT			0x00000010
+#define		COMINT			0x00000008
+#define		DEVTIMER2		0x00000004
+#define		DEVTIMER1		0x00000002
+#define		DLAVAIL			0x00000001
+
+#define		CHIMINT_MASK		(HOSTERR | INITERR | DEVINT | COMINT |\
+					 DEVTIMER2 | DEVTIMER1 | DLAVAIL)
+
+#define 	DEVEXCEPT_MASK		(HOSTERR | INITERR | DEVINT | COMINT)
+
+#define CHIMINTEN	(REG_BASE_ADDR + 0x1C)
+
+#define		RST_EN_EXT_INT1		0x01000000
+#define		RST_EN_EXT_INT0		0x00800000
+#define		RST_EN_HOSTERR		0x00400000
+#define		RST_EN_INITERR		0x00200000
+#define		RST_EN_DEVINT		0x00100000
+#define		RST_EN_COMINT		0x00080000
+#define		RST_EN_DEVTIMER2	0x00040000
+#define		RST_EN_DEVTIMER1	0x00020000
+#define		RST_EN_DLAVAIL		0x00010000
+#define		SET_EN_EXT_INT1		0x00000100
+#define		SET_EN_EXT_INT0		0x00000080
+#define		SET_EN_HOSTERR		0x00000040
+#define		SET_EN_INITERR		0x00000020
+#define		SET_EN_DEVINT		0x00000010
+#define		SET_EN_COMINT		0x00000008
+#define		SET_EN_DEVTIMER2	0x00000004
+#define		SET_EN_DEVTIMER1	0x00000002
+#define		SET_EN_DLAVAIL		0x00000001
+
+#define		RST_CHIMINTEN		(RST_EN_HOSTERR | RST_EN_INITERR | \
+					 RST_EN_DEVINT | RST_EN_COMINT | \
+					 RST_EN_DEVTIMER2 | RST_EN_DEVTIMER1 |\
+					 RST_EN_DLAVAIL)
+
+#define		SET_CHIMINTEN		(SET_EN_HOSTERR | SET_EN_INITERR |\
+					 SET_EN_DEVINT | SET_EN_COMINT |\
+					 SET_EN_DLAVAIL)
+
+#define OVLYDMACTL	(REG_BASE_ADDR + 0x20)
+
+#define		OVLYADR_MASK		0x07FF0000
+#define		OVLYLSEQ_MASK		0x0000FF00
+#define		OVLYCSEQ		0x00000080
+#define		OVLYHALTERR		0x00000040
+#define		PIOCMODE		0x00000020
+#define		RESETOVLYDMA		0x00000008	/* wo */
+#define		STARTOVLYDMA		0x00000004
+#define		STOPOVLYDMA		0x00000002	/* wo */
+#define		OVLYDMAACT		0x00000001	/* ro */
+
+#define OVLYDMACNT	(REG_BASE_ADDR + 0x24)
+
+#define		OVLYDOMAIN1		0x20000000	/* ro */
+#define		OVLYDOMAIN0		0x10000000
+#define		OVLYBUFADR_MASK		0x007F0000
+#define		OVLYDMACNT_MASK		0x00003FFF
+
+#define OVLYDMAADR	(REG_BASE_ADDR + 0x28)
+
+#define DMAERR		(REG_BASE_ADDR + 0x30)
+
+#define		OVLYERRSTAT_MASK	0x0000FF00	/* ro */
+#define		CSERRSTAT_MASK		0x000000FF	/* ro */
+
+#define SPIODATA	(REG_BASE_ADDR + 0x34)
+
+/* 0x38 - 0x3C are reserved  */
+
+#define T1CNTRLR	(REG_BASE_ADDR + 0x40)
+
+#define		T1DONE			0x00010000	/* ro */
+#define		TIMER64			0x00000400
+#define		T1ENABLE		0x00000200
+#define		T1RELOAD		0x00000100
+#define		T1PRESCALER_MASK	0x00000003
+
+#define	T1CMPR		(REG_BASE_ADDR + 0x44)
+
+#define T1CNTR		(REG_BASE_ADDR + 0x48)
+
+#define T2CNTRLR	(REG_BASE_ADDR + 0x4C)
+
+#define		T2DONE			0x00010000	/* ro */
+#define		T2ENABLE		0x00000200
+#define		T2RELOAD		0x00000100
+#define		T2PRESCALER_MASK	0x00000003
+
+#define	T2CMPR		(REG_BASE_ADDR + 0x50)
+
+#define T2CNTR		(REG_BASE_ADDR + 0x54)
+
+/* 0x58h - 0xFCh are reserved */
+
+/*
+ * DCH_SAS Registers, Address Range : (0x800-0xFFF)
+ */
+#define CMDCTXBASE	(REG_BASE_ADDR + 0x800)
+
+#define DEVCTXBASE	(REG_BASE_ADDR + 0x808)
+
+#define CTXDOMAIN	(REG_BASE_ADDR + 0x810)
+
+#define		DEVCTXDOMAIN1		0x00000008	/* ro */
+#define		DEVCTXDOMAIN0		0x00000004
+#define		CMDCTXDOMAIN1		0x00000002	/* ro */
+#define		CMDCTXDOMAIN0		0x00000001
+
+#define DCHCTL		(REG_BASE_ADDR + 0x814)
+
+#define		OCMBISTREPAIR		0x00080000
+#define		OCMBISTEN		0x00040000
+#define		OCMBISTDN		0x00020000	/* ro */
+#define		OCMBISTFAIL		0x00010000	/* ro */
+#define		DDBBISTEN		0x00004000
+#define		DDBBISTDN		0x00002000	/* ro */
+#define		DDBBISTFAIL		0x00001000	/* ro */
+#define		SCBBISTEN		0x00000400
+#define		SCBBISTDN		0x00000200	/* ro */
+#define		SCBBISTFAIL		0x00000100	/* ro */
+
+#define		MEMSEL_MASK		0x000000E0
+#define		MEMSEL_CCM_LSEQ		0x00000000
+#define		MEMSEL_CCM_IOP		0x00000020
+#define		MEMSEL_CCM_SASCTL	0x00000040
+#define		MEMSEL_DCM_LSEQ		0x00000060
+#define		MEMSEL_DCM_IOP		0x00000080
+#define		MEMSEL_OCM		0x000000A0
+
+#define		FRCERR			0x00000010
+#define		AUTORLS			0x00000001
+
+#define DCHREVISION	(REG_BASE_ADDR + 0x818)
+
+#define		DCHREVISION_MASK	0x000000FF
+
+#define DCHSTATUS	(REG_BASE_ADDR + 0x81C)
+
+#define		EN_CFIFTOERR		0x00020000
+#define		CFIFTOERR		0x00000200
+#define		CSEQINT			0x00000100	/* ro */
+#define		LSEQ7INT		0x00000080	/* ro */
+#define		LSEQ6INT		0x00000040	/* ro */
+#define		LSEQ5INT		0x00000020	/* ro */
+#define		LSEQ4INT		0x00000010	/* ro */
+#define		LSEQ3INT		0x00000008	/* ro */
+#define		LSEQ2INT		0x00000004	/* ro */
+#define		LSEQ1INT		0x00000002	/* ro */
+#define		LSEQ0INT		0x00000001	/* ro */
+
+#define		LSEQINT_MASK		(LSEQ7INT | LSEQ6INT | LSEQ5INT |\
+					 LSEQ4INT | LSEQ3INT | LSEQ2INT	|\
+					 LSEQ1INT | LSEQ0INT)
+
+#define DCHDFIFDEBUG	(REG_BASE_ADDR + 0x820)
+#define		ENFAIRMST		0x00FF0000
+#define		DISWRMST9		0x00000200
+#define		DISWRMST8		0x00000100
+#define		DISRDMST		0x000000FF
+
+#define ATOMICSTATCTL	(REG_BASE_ADDR + 0x824)
+/* 8 bit wide */
+#define		AUTOINC			0x80
+#define		ATOMICERR		0x04
+#define		ATOMICWIN		0x02
+#define		ATOMICDONE		0x01
+
+
+#define ALTCIOADR	(REG_BASE_ADDR + 0x828)
+/* 16 bit; bits 8:0 define CIO addr space of CSEQ */
+
+#define ASCBPTR		(REG_BASE_ADDR + 0x82C)
+/* 16 bit wide */
+
+#define ADDBPTR		(REG_BASE_ADDR + 0x82E)
+/* 16 bit wide */
+
+#define ANEWDATA	(REG_BASE_ADDR + 0x830)
+/* 16 bit */
+
+#define AOLDDATA	(REG_BASE_ADDR + 0x834)
+/* 16 bit */
+
+#define CTXACCESS	(REG_BASE_ADDR + 0x838)
+/* 32 bit */
+
+/* 0x83Ch - 0xFFCh are reserved */
+
+/*
+ * ARP2 External Processor Registers, Address Range : (0x00-0x1F)
+ */
+#define ARP2CTL		0x00
+
+#define		FRCSCRPERR		0x00040000
+#define		FRCARP2PERR		0x00020000
+#define		FRCARP2ILLOPC		0x00010000
+#define		ENWAITTO		0x00008000
+#define		PERRORDIS		0x00004000
+#define		FAILDIS			0x00002000
+#define		CIOPERRDIS		0x00001000
+#define		BREAKEN3		0x00000800
+#define		BREAKEN2		0x00000400
+#define		BREAKEN1		0x00000200
+#define		BREAKEN0		0x00000100
+#define		EPAUSE			0x00000008
+#define		PAUSED			0x00000004	/* ro */
+#define		STEP			0x00000002
+#define		ARP2RESET		0x00000001	/* wo */
+
+#define ARP2INT		0x04
+
+#define		HALTCODE_MASK		0x00FF0000	/* ro */
+#define		ARP2WAITTO		0x00000100
+#define		ARP2HALTC		0x00000080
+#define		ARP2ILLOPC		0x00000040
+#define		ARP2PERR		0x00000020
+#define		ARP2CIOPERR		0x00000010
+#define		ARP2BREAK3		0x00000008
+#define		ARP2BREAK2		0x00000004
+#define		ARP2BREAK1		0x00000002
+#define		ARP2BREAK0		0x00000001
+
+#define ARP2INTEN	0x08
+
+#define		EN_ARP2WAITTO		0x00000100
+#define		EN_ARP2HALTC		0x00000080
+#define		EN_ARP2ILLOPC		0x00000040
+#define		EN_ARP2PERR		0x00000020
+#define		EN_ARP2CIOPERR		0x00000010
+#define		EN_ARP2BREAK3		0x00000008
+#define		EN_ARP2BREAK2		0x00000004
+#define		EN_ARP2BREAK1		0x00000002
+#define		EN_ARP2BREAK0		0x00000001
+
+#define ARP2BREAKADR01	0x0C
+
+#define		BREAKADR1_MASK		0x0FFF0000
+#define		BREAKADR0_MASK		0x00000FFF
+
+#define	ARP2BREAKADR23	0x10
+
+#define		BREAKADR3_MASK		0x0FFF0000
+#define		BREAKADR2_MASK		0x00000FFF
+
+/* 0x14h - 0x1Ch are reserved */
+
+/*
+ * ARP2 Registers, Address Range : (0x00-0x1F)
+ * The definitions have the same address offset for CSEQ and LmSEQ
+ * CIO Bus Registers.
+ */
+#define MODEPTR		0x00
+
+#define		DSTMODE			0xF0
+#define		SRCMODE			0x0F
+
+#define ALTMODE		0x01
+
+#define		ALTDMODE		0xF0
+#define		ALTSMODE		0x0F
+
+#define ATOMICXCHG	0x02
+
+#define FLAG		0x04
+
+#define		INTCODE_MASK		0xF0
+#define		ALTMODEV2		0x04
+#define		CARRY_INT		0x02
+#define		CARRY			0x01
+
+#define ARP2INTCTL	0x05
+
+#define 	PAUSEDIS		0x80
+#define		RSTINTCTL		0x40
+#define		POPALTMODE		0x08
+#define		ALTMODEV		0x04
+#define		INTMASK			0x02
+#define		IRET			0x01
+
+#define STACK		0x06
+
+#define FUNCTION1	0x07
+
+#define PRGMCNT		0x08
+
+#define ACCUM		0x0A
+
+#define SINDEX		0x0C
+
+#define DINDEX		0x0E
+
+#define ALLONES		0x10
+
+#define ALLZEROS	0x11
+
+#define SINDIR		0x12
+
+#define DINDIR		0x13
+
+#define JUMLDIR		0x14
+
+#define ARP2HALTCODE	0x15
+
+#define CURRADDR	0x16
+
+#define LASTADDR	0x18
+
+#define NXTLADDR	0x1A
+
+#define DBGPORTPTR	0x1C
+
+#define DBGPORT		0x1D
+
+/*
+ * CIO Registers.
+ * The definitions have the same address offset for CSEQ and LmSEQ
+ * CIO Bus Registers.
+ */
+#define MnSCBPTR      	0x20
+
+#define MnDDBPTR      	0x22
+
+#define SCRATCHPAGE	0x24
+
+#define MnSCRATCHPAGE	0x25
+
+#define SCRATCHPAGESV	0x26
+
+#define MnSCRATCHPAGESV	0x27
+
+#define MnDMAERRS	0x46
+
+#define MnSGDMAERRS	0x47
+
+#define MnSGBUF		0x53
+
+#define MnSGDMASTAT	0x5b
+
+#define MnDDMACTL	0x5c	/* RAZOR.rspec.fm rev 1.5 is wrong */
+
+#define MnDDMASTAT	0x5d	/* RAZOR.rspec.fm rev 1.5 is wrong */
+
+#define MnDDMAMODE	0x5e	/* RAZOR.rspec.fm rev 1.5 is wrong */
+
+#define MnDMAENG	0x60
+
+#define MnPIPECTL	0x61
+
+#define MnSGBADR	0x65
+
+#define MnSCB_SITE	0x100
+
+#define MnDDB_SITE	0x180
+
+/*
+ * The common definitions below have the same address offset for both
+ * CSEQ and LmSEQ.
+ */
+#define BISTCTL0	0x4C
+
+#define BISTCTL1	0x50
+
+#define MAPPEDSCR	0x800
+
+/*
+ * CSEQ Host Register, Address Range : (0x000-0xFFC)
+ */
+#define CSEQ_HOST_REG_BASE_ADR		0xB8001000
+
+#define CARP2CTL			(CSEQ_HOST_REG_BASE_ADR	+ ARP2CTL)
+
+#define CARP2INT			(CSEQ_HOST_REG_BASE_ADR	+ ARP2INT)
+
+#define CARP2INTEN			(CSEQ_HOST_REG_BASE_ADR	+ ARP2INTEN)
+
+#define CARP2BREAKADR01			(CSEQ_HOST_REG_BASE_ADR+ARP2BREAKADR01)
+
+#define CARP2BREAKADR23			(CSEQ_HOST_REG_BASE_ADR+ARP2BREAKADR23)
+
+#define CBISTCTL			(CSEQ_HOST_REG_BASE_ADR	+ BISTCTL1)
+
+#define		CSEQRAMBISTEN		0x00000040
+#define		CSEQRAMBISTDN		0x00000020	/* ro */
+#define		CSEQRAMBISTFAIL		0x00000010	/* ro */
+#define		CSEQSCRBISTEN		0x00000004
+#define		CSEQSCRBISTDN		0x00000002	/* ro */
+#define		CSEQSCRBISTFAIL		0x00000001	/* ro */
+
+#define CMAPPEDSCR			(CSEQ_HOST_REG_BASE_ADR	+ MAPPEDSCR)
+
+/*
+ * CSEQ CIO Bus Registers, Address Range : (0x0000-0x1FFC)
+ * 16 modes, each mode is 512 bytes.
+ * Unless specified, the register should valid for all modes.
+ */
+#define CSEQ_CIO_REG_BASE_ADR		REG_BASE_ADDR_CSEQCIO
+
+#define CSEQm_CIO_REG(Mode, Reg) \
+		(CSEQ_CIO_REG_BASE_ADR  + \
+		((u32) (Mode) * CSEQ_MODE_PAGE_SIZE) + (u32) (Reg))
+
+#define CMODEPTR	(CSEQ_CIO_REG_BASE_ADR + MODEPTR)
+
+#define CALTMODE	(CSEQ_CIO_REG_BASE_ADR + ALTMODE)
+
+#define CATOMICXCHG	(CSEQ_CIO_REG_BASE_ADR + ATOMICXCHG)
+
+#define CFLAG		(CSEQ_CIO_REG_BASE_ADR + FLAG)
+
+#define CARP2INTCTL	(CSEQ_CIO_REG_BASE_ADR + ARP2INTCTL)
+
+#define CSTACK		(CSEQ_CIO_REG_BASE_ADR + STACK)
+
+#define CFUNCTION1	(CSEQ_CIO_REG_BASE_ADR + FUNCTION1)
+
+#define CPRGMCNT	(CSEQ_CIO_REG_BASE_ADR + PRGMCNT)
+
+#define CACCUM		(CSEQ_CIO_REG_BASE_ADR + ACCUM)
+
+#define CSINDEX		(CSEQ_CIO_REG_BASE_ADR + SINDEX)
+
+#define CDINDEX		(CSEQ_CIO_REG_BASE_ADR + DINDEX)
+
+#define CALLONES	(CSEQ_CIO_REG_BASE_ADR + ALLONES)
+
+#define CALLZEROS	(CSEQ_CIO_REG_BASE_ADR + ALLZEROS)
+
+#define CSINDIR		(CSEQ_CIO_REG_BASE_ADR + SINDIR)
+
+#define CDINDIR		(CSEQ_CIO_REG_BASE_ADR + DINDIR)
+
+#define CJUMLDIR	(CSEQ_CIO_REG_BASE_ADR + JUMLDIR)
+
+#define CARP2HALTCODE	(CSEQ_CIO_REG_BASE_ADR + ARP2HALTCODE)
+
+#define CCURRADDR	(CSEQ_CIO_REG_BASE_ADR + CURRADDR)
+
+#define CLASTADDR	(CSEQ_CIO_REG_BASE_ADR + LASTADDR)
+
+#define CNXTLADDR	(CSEQ_CIO_REG_BASE_ADR + NXTLADDR)
+
+#define CDBGPORTPTR	(CSEQ_CIO_REG_BASE_ADR + DBGPORTPTR)
+
+#define CDBGPORT	(CSEQ_CIO_REG_BASE_ADR + DBGPORT)
+
+#define CSCRATCHPAGE	(CSEQ_CIO_REG_BASE_ADR + SCRATCHPAGE)
+
+#define CMnSCBPTR(Mode)       CSEQm_CIO_REG(Mode, MnSCBPTR)
+
+#define CMnDDBPTR(Mode)       CSEQm_CIO_REG(Mode, MnDDBPTR)
+
+#define CMnSCRATCHPAGE(Mode)		CSEQm_CIO_REG(Mode, MnSCRATCHPAGE)
+
+#define CLINKCON	(CSEQ_CIO_REG_BASE_ADR + 0x28)
+
+#define	CCIOAACESS	(CSEQ_CIO_REG_BASE_ADR + 0x2C)
+
+/* mode 0-7 */
+#define MnREQMBX 0x30
+#define CMnREQMBX(Mode)			CSEQm_CIO_REG(Mode, 0x30)
+
+/* mode 8 */
+#define CSEQCON				CSEQm_CIO_REG(8, 0x30)
+
+/* mode 0-7 */
+#define MnRSPMBX 0x34
+#define CMnRSPMBX(Mode)			CSEQm_CIO_REG(Mode, 0x34)
+
+/* mode 8 */
+#define CSEQCOMCTL			CSEQm_CIO_REG(8, 0x34)
+
+/* mode 8 */
+#define CSEQCOMSTAT			CSEQm_CIO_REG(8, 0x35)
+
+/* mode 8 */
+#define CSEQCOMINTEN			CSEQm_CIO_REG(8, 0x36)
+
+/* mode 8 */
+#define CSEQCOMDMACTL			CSEQm_CIO_REG(8, 0x37)
+
+#define		CSHALTERR		0x10
+#define		RESETCSDMA		0x08		/* wo */
+#define		STARTCSDMA		0x04
+#define		STOPCSDMA		0x02		/* wo */
+#define		CSDMAACT		0x01		/* ro */
+
+/* mode 0-7 */
+#define MnINT 0x38
+#define CMnINT(Mode)			CSEQm_CIO_REG(Mode, 0x38)
+
+#define		CMnREQMBXE		0x02
+#define		CMnRSPMBXF		0x01
+#define		CMnINT_MASK		0x00000003
+
+/* mode 8 */
+#define CSEQREQMBX			CSEQm_CIO_REG(8, 0x38)
+
+/* mode 0-7 */
+#define MnINTEN 0x3C
+#define CMnINTEN(Mode)			CSEQm_CIO_REG(Mode, 0x3C)
+
+#define		EN_CMnRSPMBXF		0x01
+
+/* mode 8 */
+#define CSEQRSPMBX			CSEQm_CIO_REG(8, 0x3C)
+
+/* mode 8 */
+#define CSDMAADR			CSEQm_CIO_REG(8, 0x40)
+
+/* mode 8 */
+#define CSDMACNT			CSEQm_CIO_REG(8, 0x48)
+
+/* mode 8 */
+#define CSEQDLCTL			CSEQm_CIO_REG(8, 0x4D)
+
+#define		DONELISTEND		0x10
+#define 	DONELISTSIZE_MASK	0x0F
+#define		DONELISTSIZE_8ELEM	0x01
+#define		DONELISTSIZE_16ELEM	0x02
+#define		DONELISTSIZE_32ELEM	0x03
+#define		DONELISTSIZE_64ELEM	0x04
+#define		DONELISTSIZE_128ELEM	0x05
+#define		DONELISTSIZE_256ELEM	0x06
+#define		DONELISTSIZE_512ELEM	0x07
+#define		DONELISTSIZE_1024ELEM	0x08
+#define		DONELISTSIZE_2048ELEM	0x09
+#define		DONELISTSIZE_4096ELEM	0x0A
+#define		DONELISTSIZE_8192ELEM	0x0B
+#define		DONELISTSIZE_16384ELEM	0x0C
+
+/* mode 8 */
+#define CSEQDLOFFS			CSEQm_CIO_REG(8, 0x4E)
+
+/* mode 11 */
+#define CM11INTVEC0			CSEQm_CIO_REG(11, 0x50)
+
+/* mode 11 */
+#define CM11INTVEC1			CSEQm_CIO_REG(11, 0x52)
+
+/* mode 11 */
+#define CM11INTVEC2			CSEQm_CIO_REG(11, 0x54)
+
+#define	CCONMSK	  			(CSEQ_CIO_REG_BASE_ADR + 0x60)
+
+#define	CCONEXIST			(CSEQ_CIO_REG_BASE_ADR + 0x61)
+
+#define	CCONMODE			(CSEQ_CIO_REG_BASE_ADR + 0x62)
+
+#define CTIMERCALC			(CSEQ_CIO_REG_BASE_ADR + 0x64)
+
+#define CINTDIS				(CSEQ_CIO_REG_BASE_ADR + 0x68)
+
+/* mode 8, 32x32 bits, 128 bytes of mapped buffer */
+#define CSBUFFER			CSEQm_CIO_REG(8, 0x80)
+
+#define	CSCRATCH			(CSEQ_CIO_REG_BASE_ADR + 0x1C0)
+
+/* mode 0-8 */
+#define CMnSCRATCH(Mode)		CSEQm_CIO_REG(Mode, 0x1E0)
+
+/*
+ * CSEQ Mapped Instruction RAM Page, Address Range : (0x0000-0x1FFC)
+ */
+#define CSEQ_RAM_REG_BASE_ADR		0xB8004000
+
+/*
+ * The common definitions below have the same address offset for all the Link
+ * sequencers.
+ */
+#define MODECTL		0x40
+
+#define DBGMODE		0x44
+
+#define CONTROL		0x48
+#define LEDTIMER		0x00010000
+#define LEDTIMERS_10us		0x00000000
+#define LEDTIMERS_1ms		0x00000800
+#define LEDTIMERS_100ms		0x00001000
+#define LEDMODE_TXRX		0x00000000
+#define LEDMODE_CONNECTED	0x00000200
+#define LEDPOL			0x00000100
+
+#define LSEQRAM		0x1000
+
+/*
+ * LmSEQ Host Registers, Address Range : (0x0000-0x3FFC)
+ */
+#define LSEQ0_HOST_REG_BASE_ADR		0xB8020000
+#define LSEQ1_HOST_REG_BASE_ADR		0xB8024000
+#define LSEQ2_HOST_REG_BASE_ADR		0xB8028000
+#define LSEQ3_HOST_REG_BASE_ADR		0xB802C000
+#define LSEQ4_HOST_REG_BASE_ADR		0xB8030000
+#define LSEQ5_HOST_REG_BASE_ADR		0xB8034000
+#define LSEQ6_HOST_REG_BASE_ADR		0xB8038000
+#define LSEQ7_HOST_REG_BASE_ADR		0xB803C000
+
+#define LmARP2CTL(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					ARP2CTL)
+
+#define LmARP2INT(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					ARP2INT)
+
+#define LmARP2INTEN(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					ARP2INTEN)
+
+#define LmDBGMODE(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					DBGMODE)
+
+#define LmCONTROL(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					CONTROL)
+
+#define LmARP2BREAKADR01(LinkNum)	(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					ARP2BREAKADR01)
+
+#define LmARP2BREAKADR23(LinkNum)	(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					ARP2BREAKADR23)
+
+#define LmMODECTL(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					MODECTL)
+
+#define		LmAUTODISCI		0x08000000
+#define		LmDSBLBITLT		0x04000000
+#define		LmDSBLANTT		0x02000000
+#define		LmDSBLCRTT		0x01000000
+#define		LmDSBLCONT		0x00000100
+#define		LmPRIMODE		0x00000080
+#define		LmDSBLHOLD		0x00000040
+#define		LmDISACK		0x00000020
+#define		LmBLIND48		0x00000010
+#define		LmRCVMODE_MASK		0x0000000C
+#define		LmRCVMODE_PLD		0x00000000
+#define		LmRCVMODE_HPC		0x00000004
+
+#define LmDBGMODE(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					DBGMODE)
+
+#define		LmFRCPERR		0x80000000
+#define		LmMEMSEL_MASK		0x30000000
+#define		LmFRCRBPERR		0x00000000
+#define		LmFRCTBPERR		0x10000000
+#define		LmFRCSGBPERR		0x20000000
+#define		LmFRCARBPERR		0x30000000
+#define		LmRCVIDW		0x00080000
+#define		LmINVDWERR		0x00040000
+#define		LmRCVDISP		0x00004000
+#define		LmDISPERR		0x00002000
+#define		LmDSBLDSCR		0x00000800
+#define		LmDSBLSCR		0x00000400
+#define		LmFRCNAK		0x00000200
+#define		LmFRCROFS		0x00000100
+#define		LmFRCCRC		0x00000080
+#define		LmFRMTYPE_MASK		0x00000070
+#define		LmSG_DATA		0x00000000
+#define		LmSG_COMMAND		0x00000010
+#define		LmSG_TASK		0x00000020
+#define		LmSG_TGTXFER		0x00000030
+#define		LmSG_RESPONSE		0x00000040
+#define		LmSG_IDENADDR		0x00000050
+#define		LmSG_OPENADDR		0x00000060
+#define		LmDISCRCGEN		0x00000008
+#define		LmDISCRCCHK		0x00000004
+#define		LmSSXMTFRM		0x00000002
+#define		LmSSRCVFRM		0x00000001
+
+#define LmCONTROL(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					CONTROL)
+
+#define		LmSTEPXMTFRM		0x00000002
+#define		LmSTEPRCVFRM		0x00000001
+
+#define LmBISTCTL0(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					BISTCTL0)
+
+#define		ARBBISTEN		0x40000000
+#define		ARBBISTDN		0x20000000	/* ro */
+#define		ARBBISTFAIL		0x10000000	/* ro */
+#define		TBBISTEN		0x00000400
+#define		TBBISTDN		0x00000200	/* ro */
+#define		TBBISTFAIL		0x00000100	/* ro */
+#define		RBBISTEN		0x00000040
+#define		RBBISTDN		0x00000020	/* ro */
+#define		RBBISTFAIL		0x00000010	/* ro */
+#define		SGBISTEN		0x00000004
+#define		SGBISTDN		0x00000002	/* ro */
+#define		SGBISTFAIL		0x00000001	/* ro */
+
+#define LmBISTCTL1(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	 \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) +\
+					BISTCTL1)
+
+#define		LmRAMPAGE1		0x00000200
+#define		LmRAMPAGE0		0x00000100
+#define		LmIMEMBISTEN		0x00000040
+#define		LmIMEMBISTDN		0x00000020	/* ro */
+#define		LmIMEMBISTFAIL		0x00000010	/* ro */
+#define		LmSCRBISTEN		0x00000004
+#define		LmSCRBISTDN		0x00000002	/* ro */
+#define		LmSCRBISTFAIL		0x00000001	/* ro */
+#define		LmRAMPAGE		(LmRAMPAGE1 + LmRAMPAGE0)
+#define		LmRAMPAGE_LSHIFT	0x8
+
+#define LmSCRATCH(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	   \
+					((LinkNum) * LmSEQ_HOST_REG_SIZE) +\
+					MAPPEDSCR)
+
+#define LmSEQRAM(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	   \
+					((LinkNum) * LmSEQ_HOST_REG_SIZE) +\
+					LSEQRAM)
+
+/*
+ * LmSEQ CIO Bus Register, Address Range : (0x0000-0xFFC)
+ * 8 modes, each mode is 512 bytes.
+ * Unless specified, the register should valid for all modes.
+ */
+#define LmSEQ_CIOBUS_REG_BASE		0x2000
+
+#define  LmSEQ_PHY_BASE(Mode, LinkNum) \
+		(LSEQ0_HOST_REG_BASE_ADR + \
+		(LmSEQ_HOST_REG_SIZE * (u32) (LinkNum)) + \
+		LmSEQ_CIOBUS_REG_BASE + \
+		((u32) (Mode) * LmSEQ_MODE_PAGE_SIZE))
+
+#define  LmSEQ_PHY_REG(Mode, LinkNum, Reg) \
+                 (LmSEQ_PHY_BASE(Mode, LinkNum) + (u32) (Reg))
+
+#define LmMODEPTR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, MODEPTR)
+
+#define LmALTMODE(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ALTMODE)
+
+#define LmATOMICXCHG(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ATOMICXCHG)
+
+#define LmFLAG(LinkNum)			LmSEQ_PHY_REG(0, LinkNum, FLAG)
+
+#define LmARP2INTCTL(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ARP2INTCTL)
+
+#define LmSTACK(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, STACK)
+
+#define LmFUNCTION1(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, FUNCTION1)
+
+#define LmPRGMCNT(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, PRGMCNT)
+
+#define LmACCUM(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ACCUM)
+
+#define LmSINDEX(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, SINDEX)
+
+#define LmDINDEX(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, DINDEX)
+
+#define LmALLONES(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ALLONES)
+
+#define LmALLZEROS(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ALLZEROS)
+
+#define LmSINDIR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, SINDIR)
+
+#define LmDINDIR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, DINDIR)
+
+#define LmJUMLDIR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, JUMLDIR)
+
+#define LmARP2HALTCODE(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ARP2HALTCODE)
+
+#define LmCURRADDR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, CURRADDR)
+
+#define LmLASTADDR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, LASTADDR)
+
+#define LmNXTLADDR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, NXTLADDR)
+
+#define LmDBGPORTPTR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, DBGPORTPTR)
+
+#define LmDBGPORT(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, DBGPORT)
+
+#define LmSCRATCHPAGE(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, SCRATCHPAGE)
+
+#define LmMnSCRATCHPAGE(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 	\
+						      MnSCRATCHPAGE)
+
+#define LmTIMERCALC(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x28)
+
+#define LmREQMBX(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x30)
+
+#define LmRSPMBX(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x34)
+
+#define LmMnINT(LinkNum, Mode)		LmSEQ_PHY_REG(Mode, LinkNum, 0x38)
+
+#define		CTXMEMSIZE		0x80000000	/* ro */
+#define		LmACKREQ		0x08000000
+#define		LmNAKREQ		0x04000000
+#define		LmMnXMTERR		0x02000000
+#define		LmM5OOBSVC		0x01000000
+#define		LmHWTINT		0x00800000
+#define		LmMnCTXDONE		0x00100000
+#define		LmM2REQMBXF		0x00080000
+#define		LmM2RSPMBXE		0x00040000
+#define		LmMnDMAERR		0x00020000
+#define		LmRCVPRIM		0x00010000
+#define		LmRCVERR		0x00008000
+#define		LmADDRRCV		0x00004000
+#define		LmMnHDRMISS		0x00002000
+#define		LmMnWAITSCB		0x00001000
+#define		LmMnRLSSCB		0x00000800
+#define		LmMnSAVECTX		0x00000400
+#define		LmMnFETCHSG		0x00000200
+#define		LmMnLOADCTX		0x00000100
+#define		LmMnCFGICL		0x00000080
+#define		LmMnCFGSATA		0x00000040
+#define		LmMnCFGEXPSATA		0x00000020
+#define		LmMnCFGCMPLT		0x00000010
+#define		LmMnCFGRBUF		0x00000008
+#define		LmMnSAVETTR		0x00000004
+#define		LmMnCFGRDAT		0x00000002
+#define		LmMnCFGHDR		0x00000001
+
+#define LmMnINTEN(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x3C)
+
+#define		EN_LmACKREQ		0x08000000
+#define		EN_LmNAKREQ		0x04000000
+#define		EN_LmMnXMTERR		0x02000000
+#define		EN_LmM5OOBSVC		0x01000000
+#define		EN_LmHWTINT		0x00800000
+#define		EN_LmMnCTXDONE		0x00100000
+#define		EN_LmM2REQMBXF		0x00080000
+#define		EN_LmM2RSPMBXE		0x00040000
+#define		EN_LmMnDMAERR		0x00020000
+#define		EN_LmRCVPRIM		0x00010000
+#define		EN_LmRCVERR		0x00008000
+#define		EN_LmADDRRCV		0x00004000
+#define		EN_LmMnHDRMISS		0x00002000
+#define		EN_LmMnWAITSCB		0x00001000
+#define		EN_LmMnRLSSCB		0x00000800
+#define		EN_LmMnSAVECTX		0x00000400
+#define		EN_LmMnFETCHSG		0x00000200
+#define		EN_LmMnLOADCTX		0x00000100
+#define		EN_LmMnCFGICL		0x00000080
+#define		EN_LmMnCFGSATA		0x00000040
+#define		EN_LmMnCFGEXPSATA	0x00000020
+#define		EN_LmMnCFGCMPLT		0x00000010
+#define		EN_LmMnCFGRBUF		0x00000008
+#define		EN_LmMnSAVETTR		0x00000004
+#define		EN_LmMnCFGRDAT		0x00000002
+#define		EN_LmMnCFGHDR		0x00000001
+
+#define		LmM0INTEN_MASK		(EN_LmMnCFGCMPLT | EN_LmMnCFGRBUF | \
+					 EN_LmMnSAVETTR | EN_LmMnCFGRDAT | \
+					 EN_LmMnCFGHDR | EN_LmRCVERR | \
+					 EN_LmADDRRCV | EN_LmMnHDRMISS | \
+					 EN_LmMnRLSSCB | EN_LmMnSAVECTX | \
+					 EN_LmMnFETCHSG | EN_LmMnLOADCTX | \
+					 EN_LmHWTINT | EN_LmMnCTXDONE | \
+					 EN_LmRCVPRIM | EN_LmMnCFGSATA | \
+					 EN_LmMnCFGEXPSATA | EN_LmMnDMAERR)
+
+#define		LmM1INTEN_MASK		(EN_LmMnCFGCMPLT | EN_LmADDRRCV | \
+					 EN_LmMnRLSSCB | EN_LmMnSAVECTX | \
+					 EN_LmMnFETCHSG | EN_LmMnLOADCTX | \
+					 EN_LmMnXMTERR | EN_LmHWTINT | \
+					 EN_LmMnCTXDONE | EN_LmRCVPRIM | \
+					 EN_LmRCVERR | EN_LmMnDMAERR)
+
+#define		LmM2INTEN_MASK		(EN_LmADDRRCV | EN_LmHWTINT | \
+					 EN_LmM2REQMBXF | EN_LmRCVPRIM | \
+					 EN_LmRCVERR)
+
+#define		LmM5INTEN_MASK		(EN_LmADDRRCV | EN_LmM5OOBSVC | \
+					 EN_LmHWTINT | EN_LmRCVPRIM | \
+					 EN_LmRCVERR)
+
+#define LmXMTPRIMD(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x40)
+
+#define LmXMTPRIMCS(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x44)
+
+#define LmCONSTAT(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x45)
+
+#define LmMnDMAERRS(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x46)
+
+#define LmMnSGDMAERRS(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x47)
+
+#define LmM0EXPHDRP(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x48)
+
+#define LmM1SASALIGN(LinkNum)		LmSEQ_PHY_REG(1, LinkNum, 0x48)
+#define SAS_ALIGN_DEFAULT		0xFF
+
+#define LmM0MSKHDRP(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x49)
+
+#define LmM1STPALIGN(LinkNum)		LmSEQ_PHY_REG(1, LinkNum, 0x49)
+#define STP_ALIGN_DEFAULT		0x1F
+
+#define LmM0RCVHDRP(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x4A)
+
+#define LmM1XMTHDRP(LinkNum)		LmSEQ_PHY_REG(1, LinkNum, 0x4A)
+
+#define LmM0ICLADR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x4B)
+
+#define LmM1ALIGNMODE(LinkNum)		LmSEQ_PHY_REG(1, LinkNum, 0x4B)
+
+#define		LmDISALIGN		0x20
+#define		LmROTSTPALIGN		0x10
+#define		LmSTPALIGN		0x08
+#define		LmROTNOTIFY		0x04
+#define		LmDUALALIGN		0x02
+#define		LmROTALIGN		0x01
+
+#define LmM0EXPRCVNT(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x4C)
+
+#define LmM1XMTCNT(LinkNum)		LmSEQ_PHY_REG(1, LinkNum, 0x4C)
+
+#define LmMnBUFSTAT(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x4E)
+
+#define		LmMnBUFPERR		0x01
+
+/* mode 0-1 */
+#define LmMnXFRLVL(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x59)
+
+#define		LmMnXFRLVL_128		0x05
+#define		LmMnXFRLVL_256		0x04
+#define		LmMnXFRLVL_512		0x03
+#define		LmMnXFRLVL_1024		0x02
+#define		LmMnXFRLVL_1536		0x01
+#define		LmMnXFRLVL_2048		0x00
+
+ /* mode 0-1 */
+#define LmMnSGDMACTL(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x5A)
+
+#define 	LmMnRESETSG		0x04
+#define 	LmMnSTOPSG		0x02
+#define 	LmMnSTARTSG		0x01
+
+/* mode 0-1 */
+#define LmMnSGDMASTAT(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x5B)
+
+/* mode 0-1 */
+#define LmMnDDMACTL(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x5C)
+
+#define 	LmMnFLUSH		0x40		/* wo */
+#define 	LmMnRLSRTRY		0x20		/* wo */
+#define 	LmMnDISCARD		0x10		/* wo */
+#define 	LmMnRESETDAT		0x08		/* wo */
+#define 	LmMnSUSDAT		0x04		/* wo */
+#define 	LmMnSTOPDAT		0x02		/* wo */
+#define 	LmMnSTARTDAT		0x01		/* wo */
+
+/* mode 0-1 */
+#define LmMnDDMASTAT(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x5D)
+
+#define		LmMnDPEMPTY		0x80
+#define		LmMnFLUSHING		0x40
+#define		LmMnDDMAREQ		0x20
+#define		LmMnHDMAREQ		0x10
+#define		LmMnDATFREE		0x08
+#define		LmMnDATSUS		0x04
+#define		LmMnDATACT		0x02
+#define		LmMnDATEN		0x01
+
+/* mode 0-1 */
+#define LmMnDDMAMODE(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x5E)
+
+#define 	LmMnDMATYPE_NORMAL		0x0000
+#define 	LmMnDMATYPE_HOST_ONLY_TX	0x0001
+#define 	LmMnDMATYPE_DEVICE_ONLY_TX	0x0002
+#define 	LmMnDMATYPE_INVALID		0x0003
+#define 	LmMnDMATYPE_MASK	0x0003
+
+#define 	LmMnDMAWRAP		0x0004
+#define 	LmMnBITBUCKET		0x0008
+#define 	LmMnDISHDR		0x0010
+#define 	LmMnSTPCRC		0x0020
+#define 	LmXTEST			0x0040
+#define 	LmMnDISCRC		0x0080
+#define 	LmMnENINTLK		0x0100
+#define 	LmMnADDRFRM		0x0400
+#define 	LmMnENXMTCRC		0x0800
+
+/* mode 0-1 */
+#define LmMnXFRCNT(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x70)
+
+/* mode 0-1 */
+#define LmMnDPSEL(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x7B)
+#define 	LmMnDPSEL_MASK		0x07
+#define 	LmMnEOLPRE		0x40
+#define 	LmMnEOSPRE		0x80
+
+/* Registers used in conjunction with LmMnDPSEL and LmMnDPACC registers */
+/* Receive Mode n = 0 */
+#define LmMnHRADDR			0x00
+#define LmMnHBYTECNT			0x01
+#define LmMnHREWIND			0x02
+#define LmMnDWADDR			0x03
+#define LmMnDSPACECNT			0x04
+#define LmMnDFRMSIZE			0x05
+
+/* Registers used in conjunction with LmMnDPSEL and LmMnDPACC registers */
+/* Transmit Mode n = 1 */
+#define LmMnHWADDR			0x00
+#define LmMnHSPACECNT			0x01
+/* #define LmMnHREWIND			0x02 */
+#define LmMnDRADDR			0x03
+#define LmMnDBYTECNT			0x04
+/* #define LmMnDFRMSIZE			0x05 */
+
+/* mode 0-1 */
+#define LmMnDPACC(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x78)
+#define 	LmMnDPACC_MASK		0x00FFFFFF
+
+/* mode 0-1 */
+#define LmMnHOLDLVL(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x7D)
+
+#define LmPRMSTAT0(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x80)
+#define LmPRMSTAT0BYTE0			0x80
+#define LmPRMSTAT0BYTE1			0x81
+#define LmPRMSTAT0BYTE2			0x82
+#define LmPRMSTAT0BYTE3			0x83
+
+#define		LmFRAMERCVD		0x80000000
+#define		LmXFRRDYRCVD		0x40000000
+#define		LmUNKNOWNP		0x20000000
+#define		LmBREAK			0x10000000
+#define		LmDONE			0x08000000
+#define		LmOPENACPT		0x04000000
+#define		LmOPENRJCT		0x02000000
+#define		LmOPENRTRY		0x01000000
+#define		LmCLOSERV1		0x00800000
+#define		LmCLOSERV0		0x00400000
+#define		LmCLOSENORM		0x00200000
+#define		LmCLOSECLAF		0x00100000
+#define		LmNOTIFYRV2		0x00080000
+#define		LmNOTIFYRV1		0x00040000
+#define		LmNOTIFYRV0		0x00020000
+#define		LmNOTIFYSPIN		0x00010000
+#define		LmBROADRV4		0x00008000
+#define		LmBROADRV3		0x00004000
+#define		LmBROADRV2		0x00002000
+#define		LmBROADRV1		0x00001000
+#define		LmBROADSES		0x00000800
+#define		LmBROADRVCH1		0x00000400
+#define		LmBROADRVCH0		0x00000200
+#define		LmBROADCH		0x00000100
+#define		LmAIPRVWP		0x00000080
+#define		LmAIPWP			0x00000040
+#define		LmAIPWD			0x00000020
+#define		LmAIPWC			0x00000010
+#define		LmAIPRV2		0x00000008
+#define		LmAIPRV1		0x00000004
+#define		LmAIPRV0		0x00000002
+#define		LmAIPNRML		0x00000001
+
+#define		LmBROADCAST_MASK	(LmBROADCH | LmBROADRVCH0 | \
+					 LmBROADRVCH1)
+
+#define LmPRMSTAT1(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x84)
+#define LmPRMSTAT1BYTE0			0x84
+#define LmPRMSTAT1BYTE1			0x85
+#define LmPRMSTAT1BYTE2			0x86
+#define LmPRMSTAT1BYTE3			0x87
+
+#define		LmFRMRCVDSTAT		0x80000000
+#define		LmBREAK_DET		0x04000000
+#define		LmCLOSE_DET		0x02000000
+#define		LmDONE_DET		0x01000000
+#define		LmXRDY			0x00040000
+#define 	LmSYNCSRST		0x00020000
+#define 	LmSYNC			0x00010000
+#define 	LmXHOLD			0x00008000
+#define 	LmRRDY			0x00004000
+#define 	LmHOLD			0x00002000
+#define 	LmROK			0x00001000
+#define 	LmRIP			0x00000800
+#define 	LmCRBLK			0x00000400
+#define 	LmACK			0x00000200
+#define 	LmNAK			0x00000100
+#define 	LmHARDRST		0x00000080
+#define 	LmERROR			0x00000040
+#define 	LmRERR			0x00000020
+#define 	LmPMREQP		0x00000010
+#define 	LmPMREQS		0x00000008
+#define 	LmPMACK			0x00000004
+#define 	LmPMNAK			0x00000002
+#define 	LmDMAT			0x00000001
+
+/* mode 1 */
+#define	LmMnSATAFS(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x7E)
+#define	LmMnXMTSIZE(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x93)
+
+/* mode 0 */
+#define LmMnFRMERR(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0xB0)
+
+#define		LmACRCERR		0x00000800
+#define		LmPHYOVRN		0x00000400
+#define		LmOBOVRN		0x00000200
+#define 	LmMnZERODATA		0x00000100
+#define		LmSATAINTLK		0x00000080
+#define		LmMnCRCERR		0x00000020
+#define		LmRRDYOVRN		0x00000010
+#define		LmMISSSOAF		0x00000008
+#define		LmMISSSOF		0x00000004
+#define		LmMISSEOAF		0x00000002
+#define		LmMISSEOF		0x00000001
+
+#define LmFRMERREN(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0xB4)
+
+#define 	EN_LmACRCERR		0x00000800
+#define 	EN_LmPHYOVRN		0x00000400
+#define 	EN_LmOBOVRN		0x00000200
+#define 	EN_LmMnZERODATA		0x00000100
+#define 	EN_LmSATAINTLK		0x00000080
+#define 	EN_LmFRMBAD		0x00000040
+#define 	EN_LmMnCRCERR		0x00000020
+#define 	EN_LmRRDYOVRN		0x00000010
+#define 	EN_LmMISSSOAF		0x00000008
+#define 	EN_LmMISSSOF		0x00000004
+#define 	EN_LmMISSEOAF		0x00000002
+#define 	EN_LmMISSEOF		0x00000001
+
+#define 	LmFRMERREN_MASK  	(EN_LmSATAINTLK | EN_LmMnCRCERR | \
+					 EN_LmRRDYOVRN | EN_LmMISSSOF | \
+					 EN_LmMISSEOAF | EN_LmMISSEOF | \
+					 EN_LmACRCERR | LmPHYOVRN | \
+					 EN_LmOBOVRN | EN_LmMnZERODATA)
+
+#define LmHWTSTATEN(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0xC5)
+
+#define		EN_LmDONETO		0x80
+#define		EN_LmINVDISP		0x40
+#define		EN_LmINVDW		0x20
+#define		EN_LmDWSEVENT		0x08
+#define		EN_LmCRTTTO		0x04
+#define		EN_LmANTTTO		0x02
+#define		EN_LmBITLTTO		0x01
+
+#define		LmHWTSTATEN_MASK	(EN_LmINVDISP | EN_LmINVDW | \
+					 EN_LmDWSEVENT | EN_LmCRTTTO | \
+					 EN_LmANTTTO | EN_LmDONETO | \
+					 EN_LmBITLTTO)
+
+#define LmHWTSTAT(LinkNum) 		LmSEQ_PHY_REG(0, LinkNum, 0xC7)
+
+#define		LmDONETO		0x80
+#define		LmINVDISP		0x40
+#define		LmINVDW			0x20
+#define		LmDWSEVENT		0x08
+#define		LmCRTTTO		0x04
+#define		LmANTTTO		0x02
+#define		LmBITLTTO		0x01
+
+#define LmMnDATABUFADR(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0xC8)
+#define		LmDATABUFADR_MASK	0x0FFF
+
+#define LmMnDATABUF(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0xCA)
+
+#define	LmPRIMSTAT0EN(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0xE0)
+
+#define 	EN_LmUNKNOWNP 		0x20000000
+#define 	EN_LmBREAK		0x10000000
+#define 	EN_LmDONE		0x08000000
+#define 	EN_LmOPENACPT		0x04000000
+#define 	EN_LmOPENRJCT		0x02000000
+#define 	EN_LmOPENRTRY		0x01000000
+#define 	EN_LmCLOSERV1		0x00800000
+#define 	EN_LmCLOSERV0		0x00400000
+#define 	EN_LmCLOSENORM		0x00200000
+#define 	EN_LmCLOSECLAF		0x00100000
+#define 	EN_LmNOTIFYRV2		0x00080000
+#define 	EN_LmNOTIFYRV1		0x00040000
+#define 	EN_LmNOTIFYRV0		0x00020000
+#define 	EN_LmNOTIFYSPIN		0x00010000
+#define 	EN_LmBROADRV4		0x00008000
+#define 	EN_LmBROADRV3		0x00004000
+#define 	EN_LmBROADRV2		0x00002000
+#define 	EN_LmBROADRV1		0x00001000
+#define 	EN_LmBROADRV0		0x00000800
+#define 	EN_LmBROADRVCH1		0x00000400
+#define 	EN_LmBROADRVCH0		0x00000200
+#define 	EN_LmBROADCH		0x00000100
+#define 	EN_LmAIPRVWP		0x00000080
+#define 	EN_LmAIPWP		0x00000040
+#define 	EN_LmAIPWD		0x00000020
+#define 	EN_LmAIPWC		0x00000010
+#define 	EN_LmAIPRV2		0x00000008
+#define 	EN_LmAIPRV1		0x00000004
+#define 	EN_LmAIPRV0		0x00000002
+#define 	EN_LmAIPNRML		0x00000001
+
+#define		LmPRIMSTAT0EN_MASK	(EN_LmBREAK | \
+					 EN_LmDONE | EN_LmOPENACPT | \
+					 EN_LmOPENRJCT | EN_LmOPENRTRY | \
+					 EN_LmCLOSERV1 | EN_LmCLOSERV0 | \
+					 EN_LmCLOSENORM | EN_LmCLOSECLAF | \
+					 EN_LmBROADRV4 | EN_LmBROADRV3 | \
+					 EN_LmBROADRV2 | EN_LmBROADRV1 | \
+					 EN_LmBROADRV0 | EN_LmBROADRVCH1 | \
+					 EN_LmBROADRVCH0 | EN_LmBROADCH | \
+					 EN_LmAIPRVWP | EN_LmAIPWP | \
+					 EN_LmAIPWD | EN_LmAIPWC | \
+					 EN_LmAIPRV2 | EN_LmAIPRV1 | \
+					 EN_LmAIPRV0 | EN_LmAIPNRML)
+
+#define LmPRIMSTAT1EN(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0xE4)
+
+#define		EN_LmXRDY		0x00040000
+#define		EN_LmSYNCSRST		0x00020000
+#define		EN_LmSYNC		0x00010000
+#define 	EN_LmXHOLD		0x00008000
+#define 	EN_LmRRDY		0x00004000
+#define 	EN_LmHOLD		0x00002000
+#define 	EN_LmROK		0x00001000
+#define 	EN_LmRIP		0x00000800
+#define 	EN_LmCRBLK		0x00000400
+#define 	EN_LmACK		0x00000200
+#define 	EN_LmNAK		0x00000100
+#define 	EN_LmHARDRST		0x00000080
+#define 	EN_LmERROR		0x00000040
+#define 	EN_LmRERR		0x00000020
+#define 	EN_LmPMREQP		0x00000010
+#define 	EN_LmPMREQS		0x00000008
+#define 	EN_LmPMACK		0x00000004
+#define 	EN_LmPMNAK		0x00000002
+#define 	EN_LmDMAT		0x00000001
+
+#define LmPRIMSTAT1EN_MASK		(EN_LmHARDRST | \
+					 EN_LmSYNCSRST | \
+					 EN_LmPMREQP | EN_LmPMREQS | \
+					 EN_LmPMACK | EN_LmPMNAK)
+
+#define LmSMSTATE(LinkNum) 		LmSEQ_PHY_REG(0, LinkNum, 0xE8)
+
+#define LmSMSTATEBRK(LinkNum) 		LmSEQ_PHY_REG(0, LinkNum, 0xEC)
+
+#define LmSMDBGCTL(LinkNum) 		LmSEQ_PHY_REG(0, LinkNum, 0xF0)
+
+
+/*
+ * LmSEQ CIO Bus Mode 3 Register.
+ * Mode 3: Configuration and Setup, IOP Context SCB.
+ */
+#define LmM3SATATIMER(LinkNum) 		LmSEQ_PHY_REG(3, LinkNum, 0x48)
+
+#define LmM3INTVEC0(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x90)
+
+#define LmM3INTVEC1(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x92)
+
+#define LmM3INTVEC2(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x94)
+
+#define LmM3INTVEC3(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x96)
+
+#define LmM3INTVEC4(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x98)
+
+#define LmM3INTVEC5(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x9A)
+
+#define LmM3INTVEC6(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x9C)
+
+#define LmM3INTVEC7(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x9E)
+
+#define LmM3INTVEC8(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0xA4)
+
+#define LmM3INTVEC9(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0xA6)
+
+#define LmM3INTVEC10(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0xB0)
+
+#define LmM3FRMGAP(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0xB4)
+
+#define LmBITL_TIMER(LinkNum) 		LmSEQ_PHY_REG(0, LinkNum, 0xA2)
+
+#define LmWWN(LinkNum) 			LmSEQ_PHY_REG(0, LinkNum, 0xA8)
+
+
+/*
+ * LmSEQ CIO Bus Mode 5 Registers.
+ * Mode 5: Phy/OOB Control and Status.
+ */
+#define LmSEQ_OOB_REG(phy_id, reg)	LmSEQ_PHY_REG(5, (phy_id), (reg))
+
+#define OOB_BFLTR	0x100
+
+#define		BFLTR_THR_MASK		0xF0
+#define		BFLTR_TC_MASK		0x0F
+
+#define OOB_INIT_MIN	0x102
+
+#define OOB_INIT_MAX	0x104
+
+#define OOB_INIT_NEG	0x106
+
+#define	OOB_SAS_MIN	0x108
+
+#define OOB_SAS_MAX	0x10A
+
+#define OOB_SAS_NEG	0x10C
+
+#define OOB_WAKE_MIN	0x10E
+
+#define OOB_WAKE_MAX	0x110
+
+#define OOB_WAKE_NEG	0x112
+
+#define OOB_IDLE_MAX	0x114
+
+#define OOB_BURST_MAX	0x116
+
+#define OOB_DATA_KBITS	0x126
+
+#define OOB_ALIGN_0_DATA	0x12C
+
+#define OOB_ALIGN_1_DATA	0x130
+
+#define D10_2_DATA_k		0x00
+#define SYNC_DATA_k		0x02
+#define ALIGN_1_DATA_k		0x04
+#define ALIGN_0_DATA_k		0x08
+#define BURST_DATA_k		0x10
+
+#define OOB_PHY_RESET_COUNT	0x13C
+
+#define OOB_SIG_GEN	0x140
+
+#define		START_OOB		0x80
+#define		START_DWS		0x40
+#define		ALIGN_CNT3		0x30
+#define 	ALIGN_CNT2		0x20
+#define 	ALIGN_CNT1		0x10
+#define 	ALIGN_CNT4		0x00
+#define		STOP_DWS		0x08
+#define		SEND_COMSAS		0x04
+#define		SEND_COMINIT		0x02
+#define		SEND_COMWAKE		0x01
+
+#define OOB_XMIT	0x141
+
+#define		TX_ENABLE		0x80
+#define		XMIT_OOB_BURST		0x10
+#define		XMIT_D10_2		0x08
+#define		XMIT_SYNC		0x04
+#define		XMIT_ALIGN_1		0x02
+#define		XMIT_ALIGN_0		0x01
+
+#define FUNCTION_MASK	0x142
+
+#define		SAS_MODE_DIS		0x80
+#define		SATA_MODE_DIS		0x40
+#define		SPINUP_HOLD_DIS		0x20
+#define		HOT_PLUG_DIS		0x10
+#define		SATA_PS_DIS		0x08
+#define		FUNCTION_MASK_DEFAULT	(SPINUP_HOLD_DIS | SATA_PS_DIS)
+
+#define OOB_MODE	0x143
+
+#define		SAS_MODE		0x80
+#define		SATA_MODE		0x40
+#define		SLOW_CLK		0x20
+#define		FORCE_XMIT_15		0x08
+#define		PHY_SPEED_60		0x04
+#define		PHY_SPEED_30		0x02
+#define		PHY_SPEED_15		0x01
+
+#define	CURRENT_STATUS	0x144
+
+#define		CURRENT_OOB_DONE	0x80
+#define		CURRENT_LOSS_OF_SIGNAL	0x40
+#define		CURRENT_SPINUP_HOLD	0x20
+#define		CURRENT_HOT_PLUG_CNCT	0x10
+#define		CURRENT_GTO_TIMEOUT	0x08
+#define		CURRENT_OOB_TIMEOUT	0x04
+#define		CURRENT_DEVICE_PRESENT	0x02
+#define		CURRENT_OOB_ERROR	0x01
+
+#define 	CURRENT_OOB1_ERROR	(CURRENT_HOT_PLUG_CNCT | \
+					 CURRENT_GTO_TIMEOUT)
+
+#define 	CURRENT_OOB2_ERROR	(CURRENT_HOT_PLUG_CNCT | \
+					 CURRENT_OOB_ERROR)
+
+#define		DEVICE_ADDED_W_CNT	(CURRENT_OOB_DONE | \
+					 CURRENT_HOT_PLUG_CNCT | \
+					 CURRENT_DEVICE_PRESENT)
+
+#define		DEVICE_ADDED_WO_CNT	(CURRENT_OOB_DONE | \
+					 CURRENT_DEVICE_PRESENT)
+
+#define 	DEVICE_REMOVED		CURRENT_LOSS_OF_SIGNAL
+
+#define		CURRENT_PHY_MASK	(CURRENT_OOB_DONE | \
+					 CURRENT_LOSS_OF_SIGNAL | \
+					 CURRENT_SPINUP_HOLD | \
+					 CURRENT_HOT_PLUG_CNCT | \
+					 CURRENT_GTO_TIMEOUT | \
+					 CURRENT_DEVICE_PRESENT | \
+					 CURRENT_OOB_ERROR )
+
+#define		CURRENT_ERR_MASK	(CURRENT_LOSS_OF_SIGNAL | \
+					 CURRENT_GTO_TIMEOUT | \
+					 CURRENT_OOB_TIMEOUT | \
+					 CURRENT_OOB_ERROR )
+
+#define SPEED_MASK	0x145
+
+#define		SATA_SPEED_30_DIS	0x10
+#define		SATA_SPEED_15_DIS	0x08
+#define		SAS_SPEED_60_DIS	0x04
+#define		SAS_SPEED_30_DIS	0x02
+#define		SAS_SPEED_15_DIS	0x01
+#define		SAS_SPEED_MASK_DEFAULT	0x00
+
+#define OOB_TIMER_ENABLE	0x14D
+
+#define		HOT_PLUG_EN		0x80
+#define		RCD_EN			0x40
+#define 	COMTIMER_EN		0x20
+#define		SNTT_EN			0x10
+#define		SNLT_EN			0x04
+#define		SNWT_EN			0x02
+#define		ALIGN_EN		0x01
+
+#define OOB_STATUS		0x14E
+
+#define		OOB_DONE		0x80
+#define		LOSS_OF_SIGNAL		0x40		/* ro */
+#define		SPINUP_HOLD		0x20
+#define		HOT_PLUG_CNCT		0x10		/* ro */
+#define		GTO_TIMEOUT		0x08		/* ro */
+#define		OOB_TIMEOUT		0x04		/* ro */
+#define		DEVICE_PRESENT		0x02		/* ro */
+#define		OOB_ERROR		0x01		/* ro */
+
+#define		OOB_STATUS_ERROR_MASK	(LOSS_OF_SIGNAL | GTO_TIMEOUT | \
+					 OOB_TIMEOUT | OOB_ERROR)
+
+#define OOB_STATUS_CLEAR	0x14F
+
+#define		OOB_DONE_CLR		0x80
+#define		LOSS_OF_SIGNAL_CLR 	0x40
+#define		SPINUP_HOLD_CLR		0x20
+#define		HOT_PLUG_CNCT_CLR     	0x10
+#define		GTO_TIMEOUT_CLR		0x08
+#define		OOB_TIMEOUT_CLR		0x04
+#define		OOB_ERROR_CLR		0x01
+
+#define HOT_PLUG_DELAY		0x150
+/* In 5 ms units. 20 = 100 ms. */
+#define	HOTPLUG_DELAY_TIMEOUT		20
+
+
+#define INT_ENABLE_2		0x15A
+
+#define		OOB_DONE_EN		0x80
+#define		LOSS_OF_SIGNAL_EN	0x40
+#define		SPINUP_HOLD_EN		0x20
+#define		HOT_PLUG_CNCT_EN	0x10
+#define		GTO_TIMEOUT_EN		0x08
+#define		OOB_TIMEOUT_EN		0x04
+#define		DEVICE_PRESENT_EN	0x02
+#define		OOB_ERROR_EN		0x01
+
+#define PHY_CONTROL_0		0x160
+
+#define		PHY_LOWPWREN_TX		0x80
+#define		PHY_LOWPWREN_RX		0x40
+#define		SPARE_REG_160_B5	0x20
+#define		OFFSET_CANCEL_RX	0x10
+
+/* bits 3:2 */
+#define		PHY_RXCOMCENTER_60V	0x00
+#define		PHY_RXCOMCENTER_70V	0x04
+#define		PHY_RXCOMCENTER_80V	0x08
+#define		PHY_RXCOMCENTER_90V	0x0C
+#define 	PHY_RXCOMCENTER_MASK	0x0C
+
+#define		PHY_RESET		0x02
+#define		SAS_DEFAULT_SEL		0x01
+
+#define PHY_CONTROL_1		0x161
+
+/* bits 2:0 */
+#define		SATA_PHY_DETLEVEL_50mv	0x00
+#define		SATA_PHY_DETLEVEL_75mv	0x01
+#define		SATA_PHY_DETLEVEL_100mv	0x02
+#define		SATA_PHY_DETLEVEL_125mv	0x03
+#define		SATA_PHY_DETLEVEL_150mv	0x04
+#define		SATA_PHY_DETLEVEL_175mv	0x05
+#define		SATA_PHY_DETLEVEL_200mv	0x06
+#define		SATA_PHY_DETLEVEL_225mv	0x07
+#define		SATA_PHY_DETLEVEL_MASK	0x07
+
+/* bits 5:3 */
+#define		SAS_PHY_DETLEVEL_50mv	0x00
+#define		SAS_PHY_DETLEVEL_75mv	0x08
+#define		SAS_PHY_DETLEVEL_100mv	0x10
+#define		SAS_PHY_DETLEVEL_125mv	0x11
+#define		SAS_PHY_DETLEVEL_150mv	0x20
+#define		SAS_PHY_DETLEVEL_175mv	0x21
+#define		SAS_PHY_DETLEVEL_200mv	0x30
+#define		SAS_PHY_DETLEVEL_225mv	0x31
+#define		SAS_PHY_DETLEVEL_MASK	0x38
+
+#define PHY_CONTROL_2		0x162
+
+/* bits 7:5 */
+#define 	SATA_PHY_DRV_400mv	0x00
+#define 	SATA_PHY_DRV_450mv	0x20
+#define 	SATA_PHY_DRV_500mv	0x40
+#define 	SATA_PHY_DRV_550mv	0x60
+#define 	SATA_PHY_DRV_600mv	0x80
+#define 	SATA_PHY_DRV_650mv	0xA0
+#define 	SATA_PHY_DRV_725mv	0xC0
+#define 	SATA_PHY_DRV_800mv	0xE0
+#define		SATA_PHY_DRV_MASK	0xE0
+
+/* bits 4:3 */
+#define 	SATA_PREEMP_0		0x00
+#define 	SATA_PREEMP_1		0x08
+#define 	SATA_PREEMP_2		0x10
+#define 	SATA_PREEMP_3		0x18
+#define 	SATA_PREEMP_MASK	0x18
+
+#define 	SATA_CMSH1P5		0x04
+
+/* bits 1:0 */
+#define 	SATA_SLEW_0		0x00
+#define 	SATA_SLEW_1		0x01
+#define 	SATA_SLEW_2		0x02
+#define 	SATA_SLEW_3		0x03
+#define 	SATA_SLEW_MASK		0x03
+
+#define PHY_CONTROL_3		0x163
+
+/* bits 7:5 */
+#define 	SAS_PHY_DRV_400mv	0x00
+#define 	SAS_PHY_DRV_450mv	0x20
+#define 	SAS_PHY_DRV_500mv	0x40
+#define 	SAS_PHY_DRV_550mv	0x60
+#define 	SAS_PHY_DRV_600mv	0x80
+#define 	SAS_PHY_DRV_650mv	0xA0
+#define 	SAS_PHY_DRV_725mv	0xC0
+#define 	SAS_PHY_DRV_800mv	0xE0
+#define		SAS_PHY_DRV_MASK	0xE0
+
+/* bits 4:3 */
+#define 	SAS_PREEMP_0		0x00
+#define 	SAS_PREEMP_1		0x08
+#define 	SAS_PREEMP_2		0x10
+#define 	SAS_PREEMP_3		0x18
+#define 	SAS_PREEMP_MASK		0x18
+
+#define 	SAS_CMSH1P5		0x04
+
+/* bits 1:0 */
+#define 	SAS_SLEW_0		0x00
+#define 	SAS_SLEW_1		0x01
+#define 	SAS_SLEW_2		0x02
+#define 	SAS_SLEW_3		0x03
+#define 	SAS_SLEW_MASK		0x03
+
+#define PHY_CONTROL_4		0x168
+
+#define		PHY_DONE_CAL_TX		0x80
+#define		PHY_DONE_CAL_RX		0x40
+#define		RX_TERM_LOAD_DIS	0x20
+#define		TX_TERM_LOAD_DIS	0x10
+#define		AUTO_TERM_CAL_DIS	0x08
+#define		PHY_SIGDET_FLTR_EN	0x04
+#define		OSC_FREQ		0x02
+#define		PHY_START_CAL		0x01
+
+/*
+ * HST_PCIX2 Registers, Addresss Range: (0x00-0xFC)
+ */
+#define PCIX_REG_BASE_ADR		0xB8040000
+
+#define PCIC_VENDOR_ID	0x00
+
+#define PCIC_DEVICE_ID	0x02
+
+#define PCIC_COMMAND	0x04
+
+#define		INT_DIS			0x0400
+#define		FBB_EN			0x0200		/* ro */
+#define		SERR_EN			0x0100
+#define		STEP_EN			0x0080		/* ro */
+#define		PERR_EN			0x0040
+#define		VGA_EN			0x0020		/* ro */
+#define		MWI_EN			0x0010
+#define		SPC_EN			0x0008
+#define		MST_EN			0x0004
+#define		MEM_EN			0x0002
+#define		IO_EN			0x0001
+
+#define	PCIC_STATUS	0x06
+
+#define		PERR_DET		0x8000
+#define		SERR_GEN		0x4000
+#define		MABT_DET		0x2000
+#define		TABT_DET		0x1000
+#define		TABT_GEN		0x0800
+#define		DPERR_DET		0x0100
+#define		CAP_LIST		0x0010
+#define		INT_STAT		0x0008
+
+#define	PCIC_DEVREV_ID	0x08
+
+#define	PCIC_CLASS_CODE	0x09
+
+#define	PCIC_CACHELINE_SIZE	0x0C
+
+#define	PCIC_MBAR0	0x10
+
+#define 	PCIC_MBAR0_OFFSET	0
+
+#define	PCIC_MBAR1	0x18
+
+#define 	PCIC_MBAR1_OFFSET	2
+
+#define	PCIC_IOBAR	0x20
+
+#define 	PCIC_IOBAR_OFFSET	4
+
+#define	PCIC_SUBVENDOR_ID	0x2C
+
+#define PCIC_SUBSYTEM_ID	0x2E
+
+#define PCIX_STATUS		0x44
+#define 	RCV_SCE		0x20000000
+#define 	UNEXP_SC	0x00080000
+#define 	SC_DISCARD	0x00040000
+
+#define ECC_CTRL_STAT		0x48
+#define 	UNCOR_ECCERR	0x00000008
+
+#define PCIC_PM_CSR		0x5C
+
+#define		PWR_STATE_D0		0
+#define		PWR_STATE_D1		1	/* not supported */
+#define		PWR_STATE_D2		2 	/* not supported */
+#define		PWR_STATE_D3		3
+
+#define PCIC_BASE1	0x6C	/* internal use only */
+
+#define		BASE1_RSVD		0xFFFFFFF8
+
+#define PCIC_BASEA	0x70	/* internal use only */
+
+#define		BASEA_RSVD		0xFFFFFFC0
+#define 	BASEA_START		0
+
+#define PCIC_BASEB	0x74	/* internal use only */
+
+#define		BASEB_RSVD		0xFFFFFF80
+#define		BASEB_IOMAP_MASK	0x7F
+#define 	BASEB_START		0x80
+
+#define PCIC_BASEC	0x78	/* internal use only */
+
+#define		BASEC_RSVD		0xFFFFFFFC
+#define 	BASEC_MASK		0x03
+#define 	BASEC_START		0x58
+
+#define PCIC_MBAR_KEY	0x7C	/* internal use only */
+
+#define 	MBAR_KEY_MASK		0xFFFFFFFF
+
+#define PCIC_HSTPCIX_CNTRL	0xA0
+
+#define 	REWIND_DIS		0x0800
+#define		SC_TMR_DIS		0x04000000
+
+#define PCIC_MBAR0_MASK	0xA8
+#define		PCIC_MBAR0_SIZE_MASK 	0x1FFFE000
+#define		PCIC_MBAR0_SIZE_SHIFT 	13
+#define		PCIC_MBAR0_SIZE(val)	\
+		    (((val) & PCIC_MBAR0_SIZE_MASK) >> PCIC_MBAR0_SIZE_SHIFT)
+
+#define PCIC_FLASH_MBAR	0xB8
+
+#define PCIC_INTRPT_STAT 0xD4
+
+#define PCIC_TP_CTRL	0xFC
+
+/*
+ * EXSI Registers, Addresss Range: (0x00-0xFC)
+ */
+#define EXSI_REG_BASE_ADR		REG_BASE_ADDR_EXSI
+
+#define	EXSICNFGR	(EXSI_REG_BASE_ADR + 0x00)
+
+#define		OCMINITIALIZED		0x80000000
+#define		ASIEN			0x00400000
+#define		HCMODE			0x00200000
+#define		PCIDEF			0x00100000
+#define		COMSTOCK		0x00080000
+#define		SEEPROMEND		0x00040000
+#define		MSTTIMEN		0x00020000
+#define		XREGEX			0x00000200
+#define		NVRAMW			0x00000100
+#define		NVRAMEX			0x00000080
+#define		SRAMW			0x00000040
+#define		SRAMEX			0x00000020
+#define		FLASHW			0x00000010
+#define		FLASHEX			0x00000008
+#define		SEEPROMCFG		0x00000004
+#define		SEEPROMTYP		0x00000002
+#define		SEEPROMEX		0x00000001
+
+
+#define EXSICNTRLR	(EXSI_REG_BASE_ADR + 0x04)
+
+#define		MODINT_EN		0x00000001
+
+
+#define PMSTATR		(EXSI_REG_BASE_ADR + 0x10)
+
+#define		FLASHRST		0x00000002
+#define		FLASHRDY		0x00000001
+
+
+#define FLCNFGR		(EXSI_REG_BASE_ADR + 0x14)
+
+#define		FLWEH_MASK		0x30000000
+#define		FLWESU_MASK		0x0C000000
+#define		FLWEPW_MASK		0x03F00000
+#define		FLOEH_MASK		0x000C0000
+#define 	FLOESU_MASK		0x00030000
+#define 	FLOEPW_MASK		0x0000FC00
+#define 	FLCSH_MASK		0x00000300
+#define 	FLCSSU_MASK		0x000000C0
+#define 	FLCSPW_MASK		0x0000003F
+
+#define SRCNFGR		(EXSI_REG_BASE_ADR + 0x18)
+
+#define		SRWEH_MASK		0x30000000
+#define		SRWESU_MASK		0x0C000000
+#define		SRWEPW_MASK		0x03F00000
+
+#define		SROEH_MASK		0x000C0000
+#define 	SROESU_MASK		0x00030000
+#define 	SROEPW_MASK		0x0000FC00
+#define		SRCSH_MASK		0x00000300
+#define		SRCSSU_MASK		0x000000C0
+#define		SRCSPW_MASK		0x0000003F
+
+#define NVCNFGR		(EXSI_REG_BASE_ADR + 0x1C)
+
+#define 	NVWEH_MASK		0x30000000
+#define 	NVWESU_MASK		0x0C000000
+#define 	NVWEPW_MASK		0x03F00000
+#define 	NVOEH_MASK		0x000C0000
+#define 	NVOESU_MASK		0x00030000
+#define 	NVOEPW_MASK		0x0000FC00
+#define 	NVCSH_MASK		0x00000300
+#define 	NVCSSU_MASK		0x000000C0
+#define 	NVCSPW_MASK		0x0000003F
+
+#define XRCNFGR		(EXSI_REG_BASE_ADR + 0x20)
+
+#define 	XRWEH_MASK		0x30000000
+#define 	XRWESU_MASK		0x0C000000
+#define 	XRWEPW_MASK		0x03F00000
+#define 	XROEH_MASK		0x000C0000
+#define 	XROESU_MASK		0x00030000
+#define 	XROEPW_MASK		0x0000FC00
+#define 	XRCSH_MASK		0x00000300
+#define 	XRCSSU_MASK		0x000000C0
+#define		XRCSPW_MASK		0x0000003F
+
+#define XREGADDR	(EXSI_REG_BASE_ADR + 0x24)
+
+#define 	XRADDRINCEN		0x80000000
+#define 	XREGADD_MASK		0x007FFFFF
+
+
+#define XREGDATAR	(EXSI_REG_BASE_ADR + 0x28)
+
+#define		XREGDATA_MASK 		0x0000FFFF
+
+#define GPIOOER		(EXSI_REG_BASE_ADR + 0x40)
+
+#define GPIOODENR	(EXSI_REG_BASE_ADR + 0x44)
+
+#define GPIOINVR	(EXSI_REG_BASE_ADR + 0x48)
+
+#define GPIODATAOR	(EXSI_REG_BASE_ADR + 0x4C)
+
+#define GPIODATAIR	(EXSI_REG_BASE_ADR + 0x50)
+
+#define GPIOCNFGR	(EXSI_REG_BASE_ADR + 0x54)
+
+#define		GPIO_EXTSRC		0x00000001
+
+#define SCNTRLR		(EXSI_REG_BASE_ADR + 0xA0)
+
+#define 	SXFERDONE		0x00000100
+#define 	SXFERCNT_MASK		0x000000E0
+#define 	SCMDTYP_MASK		0x0000001C
+#define 	SXFERSTART		0x00000002
+#define 	SXFEREN			0x00000001
+
+#define	SRATER		(EXSI_REG_BASE_ADR + 0xA4)
+
+#define	SADDRR		(EXSI_REG_BASE_ADR + 0xA8)
+
+#define 	SADDR_MASK		0x0000FFFF
+
+#define SDATAOR		(EXSI_REG_BASE_ADR + 0xAC)
+
+#define	SDATAOR0	(EXSI_REG_BASE_ADR + 0xAC)
+#define SDATAOR1	(EXSI_REG_BASE_ADR + 0xAD)
+#define SDATAOR2	(EXSI_REG_BASE_ADR + 0xAE)
+#define SDATAOR3	(EXSI_REG_BASE_ADR + 0xAF)
+
+#define SDATAIR		(EXSI_REG_BASE_ADR + 0xB0)
+
+#define SDATAIR0	(EXSI_REG_BASE_ADR + 0xB0)
+#define SDATAIR1	(EXSI_REG_BASE_ADR + 0xB1)
+#define SDATAIR2	(EXSI_REG_BASE_ADR + 0xB2)
+#define SDATAIR3	(EXSI_REG_BASE_ADR + 0xB3)
+
+#define ASISTAT0R	(EXSI_REG_BASE_ADR + 0xD0)
+#define 	ASIFMTERR		0x00000400
+#define 	ASISEECHKERR		0x00000200
+#define 	ASIERR			0x00000100
+
+#define ASISTAT1R	(EXSI_REG_BASE_ADR + 0xD4)
+#define 	CHECKSUM_MASK		0x0000FFFF
+
+#define ASIERRADDR	(EXSI_REG_BASE_ADR + 0xD8)
+#define ASIERRDATAR	(EXSI_REG_BASE_ADR + 0xDC)
+#define ASIERRSTATR	(EXSI_REG_BASE_ADR + 0xE0)
+#define 	CPI2ASIBYTECNT_MASK	0x00070000
+#define 	CPI2ASIBYTEEN_MASK      0x0000F000
+#define 	CPI2ASITARGERR_MASK	0x00000F00
+#define 	CPI2ASITARGMID_MASK	0x000000F0
+#define 	CPI2ASIMSTERR_MASK	0x0000000F
+
+/*
+ * XSRAM, External SRAM (DWord and any BE pattern accessible)
+ */
+#define XSRAM_REG_BASE_ADDR             0xB8100000
+#define XSRAM_SIZE                        0x100000
+
+/*
+ * NVRAM Registers, Address Range: (0x00000 - 0x3FFFF).
+ */
+#define		NVRAM_REG_BASE_ADR	0xBF800000
+#define		NVRAM_MAX_BASE_ADR	0x003FFFFF
+
+/* OCM base address */
+#define		OCM_BASE_ADDR		0xA0000000
+#define		OCM_MAX_SIZE		0x20000
+
+/*
+ * Sequencers (Central and Link) Scratch RAM page definitions.
+ */
+
+/*
+ * The Central Management Sequencer (CSEQ) Scratch Memory is a 1024
+ * byte memory.  It is dword accessible and has byte parity
+ * protection. The CSEQ accesses it in 32 byte windows, either as mode
+ * dependent or mode independent memory. Each mode has 96 bytes,
+ * (three 32 byte pages 0-2, not contiguous), leaving 128 bytes of
+ * Mode Independent memory (four 32 byte pages 3-7). Note that mode
+ * dependent scratch memory, Mode 8, page 0-3 overlaps mode
+ * independent scratch memory, pages 0-3.
+ * - 896 bytes of mode dependent scratch, 96 bytes per Modes 0-7, and
+ * 128 bytes in mode 8,
+ * - 259 bytes of mode independent scratch, common to modes 0-15.
+ *
+ * Sequencer scratch RAM is 1024 bytes.  This scratch memory is
+ * divided into mode dependent and mode independent scratch with this
+ * memory further subdivided into pages of size 32 bytes. There are 5
+ * pages (160 bytes) of mode independent scratch and 3 pages of
+ * dependent scratch memory for modes 0-7 (768 bytes). Mode 8 pages
+ * 0-2 dependent scratch overlap with pages 0-2 of mode independent
+ * scratch memory.
+ *
+ * The host accesses this scratch in a different manner from the
+ * central sequencer. The sequencer has to use CSEQ registers CSCRPAGE
+ * and CMnSCRPAGE to access the scratch memory. A flat mapping of the
+ * scratch memory is avaliable for software convenience and to prevent
+ * corruption while the sequencer is running. This memory is mapped
+ * onto addresses 800h - BFFh, total of 400h bytes.
+ *
+ * These addresses are mapped as follows:
+ *
+ *        800h-83Fh   Mode Dependent Scratch Mode 0 Pages 0-1
+ *        840h-87Fh   Mode Dependent Scratch Mode 1 Pages 0-1
+ *        880h-8BFh   Mode Dependent Scratch Mode 2 Pages 0-1
+ *        8C0h-8FFh   Mode Dependent Scratch Mode 3 Pages 0-1
+ *        900h-93Fh   Mode Dependent Scratch Mode 4 Pages 0-1
+ *        940h-97Fh   Mode Dependent Scratch Mode 5 Pages 0-1
+ *        980h-9BFh   Mode Dependent Scratch Mode 6 Pages 0-1
+ *        9C0h-9FFh   Mode Dependent Scratch Mode 7 Pages 0-1
+ *        A00h-A5Fh   Mode Dependent Scratch Mode 8 Pages 0-2
+ *                    Mode Independent Scratch Pages 0-2
+ *        A60h-A7Fh   Mode Dependent Scratch Mode 8 Page 3
+ *                    Mode Independent Scratch Page 3
+ *        A80h-AFFh   Mode Independent Scratch Pages 4-7
+ *        B00h-B1Fh   Mode Dependent Scratch Mode 0 Page 2
+ *        B20h-B3Fh   Mode Dependent Scratch Mode 1 Page 2
+ *        B40h-B5Fh   Mode Dependent Scratch Mode 2 Page 2
+ *        B60h-B7Fh   Mode Dependent Scratch Mode 3 Page 2
+ *        B80h-B9Fh   Mode Dependent Scratch Mode 4 Page 2
+ *        BA0h-BBFh   Mode Dependent Scratch Mode 5 Page 2
+ *        BC0h-BDFh   Mode Dependent Scratch Mode 6 Page 2
+ *        BE0h-BFFh   Mode Dependent Scratch Mode 7 Page 2
+ */
+
+/* General macros */
+#define CSEQ_PAGE_SIZE			32  /* Scratch page size (in bytes) */
+
+/* All macros start with offsets from base + 0x800 (CMAPPEDSCR).
+ * Mode dependent scratch page 0, mode 0.
+ * For modes 1-7 you have to do arithmetic. */
+#define CSEQ_LRM_SAVE_SINDEX		(CMAPPEDSCR + 0x0000)
+#define CSEQ_LRM_SAVE_SCBPTR		(CMAPPEDSCR + 0x0002)
+#define CSEQ_Q_LINK_HEAD		(CMAPPEDSCR + 0x0004)
+#define CSEQ_Q_LINK_TAIL		(CMAPPEDSCR + 0x0006)
+#define CSEQ_LRM_SAVE_SCRPAGE		(CMAPPEDSCR + 0x0008)
+
+/* Mode dependent scratch page 0 mode 8 macros. */
+#define CSEQ_RET_ADDR			(CMAPPEDSCR + 0x0200)
+#define CSEQ_RET_SCBPTR			(CMAPPEDSCR + 0x0202)
+#define CSEQ_SAVE_SCBPTR		(CMAPPEDSCR + 0x0204)
+#define CSEQ_EMPTY_TRANS_CTX		(CMAPPEDSCR + 0x0206)
+#define CSEQ_RESP_LEN			(CMAPPEDSCR + 0x0208)
+#define CSEQ_TMF_SCBPTR			(CMAPPEDSCR + 0x020A)
+#define CSEQ_GLOBAL_PREV_SCB		(CMAPPEDSCR + 0x020C)
+#define CSEQ_GLOBAL_HEAD		(CMAPPEDSCR + 0x020E)
+#define CSEQ_CLEAR_LU_HEAD		(CMAPPEDSCR + 0x0210)
+#define CSEQ_TMF_OPCODE			(CMAPPEDSCR + 0x0212)
+#define CSEQ_SCRATCH_FLAGS		(CMAPPEDSCR + 0x0213)
+#define CSEQ_HSB_SITE                   (CMAPPEDSCR + 0x021A)
+#define CSEQ_FIRST_INV_SCB_SITE		(CMAPPEDSCR + 0x021C)
+#define CSEQ_FIRST_INV_DDB_SITE		(CMAPPEDSCR + 0x021E)
+
+/* Mode dependent scratch page 1 mode 8 macros. */
+#define CSEQ_LUN_TO_CLEAR		(CMAPPEDSCR + 0x0220)
+#define CSEQ_LUN_TO_CHECK		(CMAPPEDSCR + 0x0228)
+
+/* Mode dependent scratch page 2 mode 8 macros */
+#define CSEQ_HQ_NEW_POINTER		(CMAPPEDSCR + 0x0240)
+#define CSEQ_HQ_DONE_BASE		(CMAPPEDSCR + 0x0248)
+#define CSEQ_HQ_DONE_POINTER		(CMAPPEDSCR + 0x0250)
+#define CSEQ_HQ_DONE_PASS		(CMAPPEDSCR + 0x0254)
+
+/* Mode independent scratch page 4 macros. */
+#define CSEQ_Q_EXE_HEAD			(CMAPPEDSCR + 0x0280)
+#define CSEQ_Q_EXE_TAIL			(CMAPPEDSCR + 0x0282)
+#define CSEQ_Q_DONE_HEAD                (CMAPPEDSCR + 0x0284)
+#define CSEQ_Q_DONE_TAIL                (CMAPPEDSCR + 0x0286)
+#define CSEQ_Q_SEND_HEAD		(CMAPPEDSCR + 0x0288)
+#define CSEQ_Q_SEND_TAIL		(CMAPPEDSCR + 0x028A)
+#define CSEQ_Q_DMA2CHIM_HEAD		(CMAPPEDSCR + 0x028C)
+#define CSEQ_Q_DMA2CHIM_TAIL		(CMAPPEDSCR + 0x028E)
+#define CSEQ_Q_COPY_HEAD		(CMAPPEDSCR + 0x0290)
+#define CSEQ_Q_COPY_TAIL		(CMAPPEDSCR + 0x0292)
+#define CSEQ_REG0			(CMAPPEDSCR + 0x0294)
+#define CSEQ_REG1			(CMAPPEDSCR + 0x0296)
+#define CSEQ_REG2			(CMAPPEDSCR + 0x0298)
+#define CSEQ_LINK_CTL_Q_MAP		(CMAPPEDSCR + 0x029C)
+#define CSEQ_MAX_CSEQ_MODE		(CMAPPEDSCR + 0x029D)
+#define CSEQ_FREE_LIST_HACK_COUNT	(CMAPPEDSCR + 0x029E)
+
+/* Mode independent scratch page 5 macros. */
+#define CSEQ_EST_NEXUS_REQ_QUEUE	(CMAPPEDSCR + 0x02A0)
+#define CSEQ_EST_NEXUS_REQ_COUNT	(CMAPPEDSCR + 0x02A8)
+#define CSEQ_Q_EST_NEXUS_HEAD		(CMAPPEDSCR + 0x02B0)
+#define CSEQ_Q_EST_NEXUS_TAIL		(CMAPPEDSCR + 0x02B2)
+#define CSEQ_NEED_EST_NEXUS_SCB		(CMAPPEDSCR + 0x02B4)
+#define CSEQ_EST_NEXUS_REQ_HEAD		(CMAPPEDSCR + 0x02B6)
+#define CSEQ_EST_NEXUS_REQ_TAIL		(CMAPPEDSCR + 0x02B7)
+#define CSEQ_EST_NEXUS_SCB_OFFSET	(CMAPPEDSCR + 0x02B8)
+
+/* Mode independent scratch page 6 macros. */
+#define CSEQ_INT_ROUT_RET_ADDR0		(CMAPPEDSCR + 0x02C0)
+#define CSEQ_INT_ROUT_RET_ADDR1		(CMAPPEDSCR + 0x02C2)
+#define CSEQ_INT_ROUT_SCBPTR		(CMAPPEDSCR + 0x02C4)
+#define CSEQ_INT_ROUT_MODE		(CMAPPEDSCR + 0x02C6)
+#define CSEQ_ISR_SCRATCH_FLAGS		(CMAPPEDSCR + 0x02C7)
+#define CSEQ_ISR_SAVE_SINDEX		(CMAPPEDSCR + 0x02C8)
+#define CSEQ_ISR_SAVE_DINDEX		(CMAPPEDSCR + 0x02CA)
+#define CSEQ_Q_MONIRTT_HEAD		(CMAPPEDSCR + 0x02D0)
+#define CSEQ_Q_MONIRTT_TAIL		(CMAPPEDSCR + 0x02D2)
+#define CSEQ_FREE_SCB_MASK		(CMAPPEDSCR + 0x02D5)
+#define CSEQ_BUILTIN_FREE_SCB_HEAD	(CMAPPEDSCR + 0x02D6)
+#define CSEQ_BUILTIN_FREE_SCB_TAIL	(CMAPPEDSCR + 0x02D8)
+#define CSEQ_EXTENDED_FREE_SCB_HEAD	(CMAPPEDSCR + 0x02DA)
+#define CSEQ_EXTENDED_FREE_SCB_TAIL	(CMAPPEDSCR + 0x02DC)
+
+/* Mode independent scratch page 7 macros. */
+#define CSEQ_EMPTY_REQ_QUEUE		(CMAPPEDSCR + 0x02E0)
+#define CSEQ_EMPTY_REQ_COUNT		(CMAPPEDSCR + 0x02E8)
+#define CSEQ_Q_EMPTY_HEAD		(CMAPPEDSCR + 0x02F0)
+#define CSEQ_Q_EMPTY_TAIL		(CMAPPEDSCR + 0x02F2)
+#define CSEQ_NEED_EMPTY_SCB		(CMAPPEDSCR + 0x02F4)
+#define CSEQ_EMPTY_REQ_HEAD		(CMAPPEDSCR + 0x02F6)
+#define CSEQ_EMPTY_REQ_TAIL		(CMAPPEDSCR + 0x02F7)
+#define CSEQ_EMPTY_SCB_OFFSET		(CMAPPEDSCR + 0x02F8)
+#define CSEQ_PRIMITIVE_DATA		(CMAPPEDSCR + 0x02FA)
+#define CSEQ_TIMEOUT_CONST		(CMAPPEDSCR + 0x02FC)
+
+/***************************************************************************
+* Link m Sequencer scratch RAM is 512 bytes.
+* This scratch memory is divided into mode dependent and mode
+* independent scratch with this memory further subdivided into
+* pages of size 32 bytes. There are 4 pages (128 bytes) of
+* mode independent scratch and 4 pages of dependent scratch
+* memory for modes 0-2 (384 bytes).
+*
+* The host accesses this scratch in a different manner from the
+* link sequencer. The sequencer has to use LSEQ registers
+* LmSCRPAGE and LmMnSCRPAGE to access the scratch memory. A flat
+* mapping of the scratch memory is avaliable for software
+* convenience and to prevent corruption while the sequencer is
+* running. This memory is mapped onto addresses 800h - 9FFh.
+*
+* These addresses are mapped as follows:
+*
+*        800h-85Fh   Mode Dependent Scratch Mode 0 Pages 0-2
+*        860h-87Fh   Mode Dependent Scratch Mode 0 Page 3
+*                    Mode Dependent Scratch Mode 5 Page 0
+*        880h-8DFh   Mode Dependent Scratch Mode 1 Pages 0-2
+*        8E0h-8FFh   Mode Dependent Scratch Mode 1 Page 3
+*                    Mode Dependent Scratch Mode 5 Page 1
+*        900h-95Fh   Mode Dependent Scratch Mode 2 Pages 0-2
+*        960h-97Fh   Mode Dependent Scratch Mode 2 Page 3
+*                    Mode Dependent Scratch Mode 5 Page 2
+*        980h-9DFh   Mode Independent Scratch Pages 0-3
+*        9E0h-9FFh   Mode Independent Scratch Page 3
+*                    Mode Dependent Scratch Mode 5 Page 3
+*
+****************************************************************************/
+/* General macros */
+#define LSEQ_MODE_SCRATCH_SIZE		0x80 /* Size of scratch RAM per mode */
+#define LSEQ_PAGE_SIZE			0x20 /* Scratch page size (in bytes) */
+#define LSEQ_MODE5_PAGE0_OFFSET 	0x60
+
+/* Common mode dependent scratch page 0 macros for modes 0,1,2, and 5 */
+/* Indexed using LSEQ_MODE_SCRATCH_SIZE * mode, for modes 0,1,2. */
+#define LmSEQ_RET_ADDR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0000)
+#define LmSEQ_REG0_MODE(LinkNum)	(LmSCRATCH(LinkNum) + 0x0002)
+#define LmSEQ_MODE_FLAGS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0004)
+
+/* Mode flag macros (byte 0) */
+#define		SAS_SAVECTX_OCCURRED		0x80
+#define		SAS_OOBSVC_OCCURRED		0x40
+#define		SAS_OOB_DEVICE_PRESENT		0x20
+#define		SAS_CFGHDR_OCCURRED		0x10
+#define		SAS_RCV_INTS_ARE_DISABLED	0x08
+#define		SAS_OOB_HOT_PLUG_CNCT		0x04
+#define		SAS_AWAIT_OPEN_CONNECTION	0x02
+#define		SAS_CFGCMPLT_OCCURRED		0x01
+
+/* Mode flag macros (byte 1) */
+#define		SAS_RLSSCB_OCCURRED		0x80
+#define		SAS_FORCED_HEADER_MISS		0x40
+
+#define LmSEQ_RET_ADDR2(LinkNum)	(LmSCRATCH(LinkNum) + 0x0006)
+#define LmSEQ_RET_ADDR1(LinkNum)	(LmSCRATCH(LinkNum) + 0x0008)
+#define LmSEQ_OPCODE_TO_CSEQ(LinkNum)	(LmSCRATCH(LinkNum) + 0x000B)
+#define LmSEQ_DATA_TO_CSEQ(LinkNum)	(LmSCRATCH(LinkNum) + 0x000C)
+
+/* Mode dependent scratch page 0 macros for mode 0 (non-common) */
+/* Absolute offsets */
+#define LmSEQ_FIRST_INV_DDB_SITE(LinkNum)	(LmSCRATCH(LinkNum) + 0x000E)
+#define LmSEQ_EMPTY_TRANS_CTX(LinkNum)		(LmSCRATCH(LinkNum) + 0x0010)
+#define LmSEQ_RESP_LEN(LinkNum)			(LmSCRATCH(LinkNum) + 0x0012)
+#define LmSEQ_FIRST_INV_SCB_SITE(LinkNum)	(LmSCRATCH(LinkNum) + 0x0014)
+#define LmSEQ_INTEN_SAVE(LinkNum)		(LmSCRATCH(LinkNum) + 0x0016)
+#define LmSEQ_LINK_RST_FRM_LEN(LinkNum)		(LmSCRATCH(LinkNum) + 0x001A)
+#define LmSEQ_LINK_RST_PROTOCOL(LinkNum)	(LmSCRATCH(LinkNum) + 0x001B)
+#define LmSEQ_RESP_STATUS(LinkNum)		(LmSCRATCH(LinkNum) + 0x001C)
+#define LmSEQ_LAST_LOADED_SGE(LinkNum)		(LmSCRATCH(LinkNum) + 0x001D)
+#define LmSEQ_SAVE_SCBPTR(LinkNum)		(LmSCRATCH(LinkNum) + 0x001E)
+
+/* Mode dependent scratch page 0 macros for mode 1 (non-common) */
+/* Absolute offsets */
+#define LmSEQ_Q_XMIT_HEAD(LinkNum)		(LmSCRATCH(LinkNum) + 0x008E)
+#define LmSEQ_M1_EMPTY_TRANS_CTX(LinkNum)	(LmSCRATCH(LinkNum) + 0x0090)
+#define LmSEQ_INI_CONN_TAG(LinkNum)		(LmSCRATCH(LinkNum) + 0x0092)
+#define LmSEQ_FAILED_OPEN_STATUS(LinkNum)	(LmSCRATCH(LinkNum) + 0x009A)
+#define LmSEQ_XMIT_REQUEST_TYPE(LinkNum)	(LmSCRATCH(LinkNum) + 0x009B)
+#define LmSEQ_M1_RESP_STATUS(LinkNum)		(LmSCRATCH(LinkNum) + 0x009C)
+#define LmSEQ_M1_LAST_LOADED_SGE(LinkNum)	(LmSCRATCH(LinkNum) + 0x009D)
+#define LmSEQ_M1_SAVE_SCBPTR(LinkNum)		(LmSCRATCH(LinkNum) + 0x009E)
+
+/* Mode dependent scratch page 0 macros for mode 2 (non-common) */
+#define LmSEQ_PORT_COUNTER(LinkNum)		(LmSCRATCH(LinkNum) + 0x010E)
+#define LmSEQ_PM_TABLE_PTR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0110)
+#define LmSEQ_SATA_INTERLOCK_TMR_SAVE(LinkNum)	(LmSCRATCH(LinkNum) + 0x0112)
+#define LmSEQ_IP_BITL(LinkNum)			(LmSCRATCH(LinkNum) + 0x0114)
+#define LmSEQ_COPY_SMP_CONN_TAG(LinkNum)	(LmSCRATCH(LinkNum) + 0x0116)
+#define LmSEQ_P0M2_OFFS1AH(LinkNum)		(LmSCRATCH(LinkNum) + 0x011A)
+
+/* Mode dependent scratch page 0 macros for modes 4/5 (non-common) */
+/* Absolute offsets */
+#define LmSEQ_SAVED_OOB_STATUS(LinkNum)		(LmSCRATCH(LinkNum) + 0x006E)
+#define LmSEQ_SAVED_OOB_MODE(LinkNum)		(LmSCRATCH(LinkNum) + 0x006F)
+#define LmSEQ_Q_LINK_HEAD(LinkNum)		(LmSCRATCH(LinkNum) + 0x0070)
+#define LmSEQ_LINK_RST_ERR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0072)
+#define LmSEQ_SAVED_OOB_SIGNALS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0073)
+#define LmSEQ_SAS_RESET_MODE(LinkNum)		(LmSCRATCH(LinkNum) + 0x0074)
+#define LmSEQ_LINK_RESET_RETRY_COUNT(LinkNum)	(LmSCRATCH(LinkNum) + 0x0075)
+#define LmSEQ_NUM_LINK_RESET_RETRIES(LinkNum)	(LmSCRATCH(LinkNum) + 0x0076)
+#define LmSEQ_OOB_INT_ENABLES(LinkNum)		(LmSCRATCH(LinkNum) + 0x007A)
+#define LmSEQ_NOTIFY_TIMER_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x007C)
+#define LmSEQ_NOTIFY_TIMER_DOWN_COUNT(LinkNum)	(LmSCRATCH(LinkNum) + 0x007E)
+
+/* Mode dependent scratch page 1, mode 0 and mode 1 */
+#define LmSEQ_SG_LIST_PTR_ADDR0(LinkNum)        (LmSCRATCH(LinkNum) + 0x0020)
+#define LmSEQ_SG_LIST_PTR_ADDR1(LinkNum)        (LmSCRATCH(LinkNum) + 0x0030)
+#define LmSEQ_M1_SG_LIST_PTR_ADDR0(LinkNum)     (LmSCRATCH(LinkNum) + 0x00A0)
+#define LmSEQ_M1_SG_LIST_PTR_ADDR1(LinkNum)     (LmSCRATCH(LinkNum) + 0x00B0)
+
+/* Mode dependent scratch page 1 macros for mode 2 */
+/* Absolute offsets */
+#define LmSEQ_INVALID_DWORD_COUNT(LinkNum)	(LmSCRATCH(LinkNum) + 0x0120)
+#define LmSEQ_DISPARITY_ERROR_COUNT(LinkNum) 	(LmSCRATCH(LinkNum) + 0x0124)
+#define LmSEQ_LOSS_OF_SYNC_COUNT(LinkNum)	(LmSCRATCH(LinkNum) + 0x0128)
+
+/* Mode dependent scratch page 1 macros for mode 4/5 */
+#define LmSEQ_FRAME_TYPE_MASK(LinkNum)	      (LmSCRATCH(LinkNum) + 0x00E0)
+#define LmSEQ_HASHED_DEST_ADDR_MASK(LinkNum)  (LmSCRATCH(LinkNum) + 0x00E1)
+#define LmSEQ_HASHED_SRC_ADDR_MASK_PRINT(LinkNum) (LmSCRATCH(LinkNum) + 0x00E4)
+#define LmSEQ_HASHED_SRC_ADDR_MASK(LinkNum)   (LmSCRATCH(LinkNum) + 0x00E5)
+#define LmSEQ_NUM_FILL_BYTES_MASK(LinkNum)    (LmSCRATCH(LinkNum) + 0x00EB)
+#define LmSEQ_TAG_MASK(LinkNum)		      (LmSCRATCH(LinkNum) + 0x00F0)
+#define LmSEQ_TARGET_PORT_XFER_TAG(LinkNum)   (LmSCRATCH(LinkNum) + 0x00F2)
+#define LmSEQ_DATA_OFFSET(LinkNum)	      (LmSCRATCH(LinkNum) + 0x00F4)
+
+/* Mode dependent scratch page 2 macros for mode 0 */
+/* Absolute offsets */
+#define LmSEQ_SMP_RCV_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0040)
+#define LmSEQ_DEVICE_BITS(LinkNum)		(LmSCRATCH(LinkNum) + 0x005B)
+#define LmSEQ_SDB_DDB(LinkNum)			(LmSCRATCH(LinkNum) + 0x005C)
+#define LmSEQ_SDB_NUM_TAGS(LinkNum)		(LmSCRATCH(LinkNum) + 0x005E)
+#define LmSEQ_SDB_CURR_TAG(LinkNum)		(LmSCRATCH(LinkNum) + 0x005F)
+
+/* Mode dependent scratch page 2 macros for mode 1 */
+/* Absolute offsets */
+/* byte 0 bits 1-0 are domain select. */
+#define LmSEQ_TX_ID_ADDR_FRAME(LinkNum)		(LmSCRATCH(LinkNum) + 0x00C0)
+#define LmSEQ_OPEN_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x00C8)
+#define LmSEQ_SRST_AS_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x00CC)
+#define LmSEQ_LAST_LOADED_SG_EL(LinkNum)	(LmSCRATCH(LinkNum) + 0x00D4)
+
+/* Mode dependent scratch page 2 macros for mode 2 */
+/* Absolute offsets */
+#define LmSEQ_STP_SHUTDOWN_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x0140)
+#define LmSEQ_CLOSE_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0144)
+#define LmSEQ_BREAK_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0148)
+#define LmSEQ_DWS_RESET_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x014C)
+#define LmSEQ_SATA_INTERLOCK_TIMER_TERM_TS(LinkNum) \
+						(LmSCRATCH(LinkNum) + 0x0150)
+#define LmSEQ_MCTL_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0154)
+
+/* Mode dependent scratch page 2 macros for mode 5 */
+#define LmSEQ_COMINIT_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0160)
+#define LmSEQ_RCV_ID_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0164)
+#define LmSEQ_RCV_FIS_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0168)
+#define LmSEQ_DEV_PRES_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x016C)
+
+/* Mode dependent scratch page 3 macros for modes 0 and 1 */
+/* None defined */
+
+/* Mode dependent scratch page 3 macros for modes 2 and 5 */
+/* None defined */
+
+/* Mode Independent Scratch page 0 macros. */
+#define LmSEQ_Q_TGTXFR_HEAD(LinkNum)	(LmSCRATCH(LinkNum) + 0x0180)
+#define LmSEQ_Q_TGTXFR_TAIL(LinkNum)	(LmSCRATCH(LinkNum) + 0x0182)
+#define LmSEQ_LINK_NUMBER(LinkNum)	(LmSCRATCH(LinkNum) + 0x0186)
+#define LmSEQ_SCRATCH_FLAGS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0187)
+/*
+ * Currently only bit 0, SAS_DWSAQD, is used.
+ */
+#define		SAS_DWSAQD			0x01  /*
+						       * DWSSTATUS: DWSAQD
+						       * bit las read in ISR.
+						       */
+#define  LmSEQ_CONNECTION_STATE(LinkNum) (LmSCRATCH(LinkNum) + 0x0188)
+/* Connection states (byte 0) */
+#define		SAS_WE_OPENED_CS		0x01
+#define		SAS_DEVICE_OPENED_CS		0x02
+#define		SAS_WE_SENT_DONE_CS		0x04
+#define		SAS_DEVICE_SENT_DONE_CS		0x08
+#define		SAS_WE_SENT_CLOSE_CS		0x10
+#define		SAS_DEVICE_SENT_CLOSE_CS	0x20
+#define		SAS_WE_SENT_BREAK_CS		0x40
+#define		SAS_DEVICE_SENT_BREAK_CS	0x80
+/* Connection states (byte 1) */
+#define		SAS_OPN_TIMEOUT_OR_OPN_RJCT_CS	0x01
+#define		SAS_AIP_RECEIVED_CS		0x02
+#define		SAS_CREDIT_TIMEOUT_OCCURRED_CS	0x04
+#define		SAS_ACKNAK_TIMEOUT_OCCURRED_CS	0x08
+#define		SAS_SMPRSP_TIMEOUT_OCCURRED_CS	0x10
+#define		SAS_DONE_TIMEOUT_OCCURRED_CS	0x20
+/* Connection states (byte 2) */
+#define		SAS_SMP_RESPONSE_RECEIVED_CS	0x01
+#define		SAS_INTLK_TIMEOUT_OCCURRED_CS	0x02
+#define		SAS_DEVICE_SENT_DMAT_CS		0x04
+#define		SAS_DEVICE_SENT_SYNCSRST_CS	0x08
+#define		SAS_CLEARING_AFFILIATION_CS	0x20
+#define		SAS_RXTASK_ACTIVE_CS		0x40
+#define		SAS_TXTASK_ACTIVE_CS		0x80
+/* Connection states (byte 3) */
+#define		SAS_PHY_LOSS_OF_SIGNAL_CS	0x01
+#define		SAS_DWS_TIMER_EXPIRED_CS	0x02
+#define		SAS_LINK_RESET_NOT_COMPLETE_CS	0x04
+#define		SAS_PHY_DISABLED_CS		0x08
+#define		SAS_LINK_CTL_TASK_ACTIVE_CS	0x10
+#define		SAS_PHY_EVENT_TASK_ACTIVE_CS	0x20
+#define		SAS_DEVICE_SENT_ID_FRAME_CS	0x40
+#define		SAS_DEVICE_SENT_REG_FIS_CS	0x40
+#define		SAS_DEVICE_SENT_HARD_RESET_CS	0x80
+#define  	SAS_PHY_IS_DOWN_FLAGS	(SAS_PHY_LOSS_OF_SIGNAL_CS|\
+					 SAS_DWS_TIMER_EXPIRED_CS |\
+					 SAS_LINK_RESET_NOT_COMPLETE_CS|\
+					 SAS_PHY_DISABLED_CS)
+
+#define		SAS_LINK_CTL_PHY_EVENT_FLAGS   (SAS_LINK_CTL_TASK_ACTIVE_CS |\
+						SAS_PHY_EVENT_TASK_ACTIVE_CS |\
+						SAS_DEVICE_SENT_ID_FRAME_CS  |\
+						SAS_DEVICE_SENT_HARD_RESET_CS)
+
+#define LmSEQ_CONCTL(LinkNum)		(LmSCRATCH(LinkNum) + 0x018C)
+#define LmSEQ_CONSTAT(LinkNum)		(LmSCRATCH(LinkNum) + 0x018E)
+#define LmSEQ_CONNECTION_MODES(LinkNum)	(LmSCRATCH(LinkNum) + 0x018F)
+#define LmSEQ_REG1_ISR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0192)
+#define LmSEQ_REG2_ISR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0194)
+#define LmSEQ_REG3_ISR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0196)
+#define LmSEQ_REG0_ISR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0198)
+
+/* Mode independent scratch page 1 macros. */
+#define LmSEQ_EST_NEXUS_SCBPTR0(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A0)
+#define LmSEQ_EST_NEXUS_SCBPTR1(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A2)
+#define LmSEQ_EST_NEXUS_SCBPTR2(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A4)
+#define LmSEQ_EST_NEXUS_SCBPTR3(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A6)
+#define LmSEQ_EST_NEXUS_SCB_OPCODE0(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A8)
+#define LmSEQ_EST_NEXUS_SCB_OPCODE1(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A9)
+#define LmSEQ_EST_NEXUS_SCB_OPCODE2(LinkNum)	(LmSCRATCH(LinkNum) + 0x01AA)
+#define LmSEQ_EST_NEXUS_SCB_OPCODE3(LinkNum)	(LmSCRATCH(LinkNum) + 0x01AB)
+#define LmSEQ_EST_NEXUS_SCB_HEAD(LinkNum)	(LmSCRATCH(LinkNum) + 0x01AC)
+#define LmSEQ_EST_NEXUS_SCB_TAIL(LinkNum)	(LmSCRATCH(LinkNum) + 0x01AD)
+#define LmSEQ_EST_NEXUS_BUF_AVAIL(LinkNum)	(LmSCRATCH(LinkNum) + 0x01AE)
+#define LmSEQ_TIMEOUT_CONST(LinkNum)		(LmSCRATCH(LinkNum) + 0x01B8)
+#define LmSEQ_ISR_SAVE_SINDEX(LinkNum)	        (LmSCRATCH(LinkNum) + 0x01BC)
+#define LmSEQ_ISR_SAVE_DINDEX(LinkNum)	        (LmSCRATCH(LinkNum) + 0x01BE)
+
+/* Mode independent scratch page 2 macros. */
+#define LmSEQ_EMPTY_SCB_PTR0(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C0)
+#define LmSEQ_EMPTY_SCB_PTR1(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C2)
+#define LmSEQ_EMPTY_SCB_PTR2(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C4)
+#define LmSEQ_EMPTY_SCB_PTR3(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C6)
+#define LmSEQ_EMPTY_SCB_OPCD0(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C8)
+#define LmSEQ_EMPTY_SCB_OPCD1(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C9)
+#define LmSEQ_EMPTY_SCB_OPCD2(LinkNum)	(LmSCRATCH(LinkNum) + 0x01CA)
+#define LmSEQ_EMPTY_SCB_OPCD3(LinkNum)	(LmSCRATCH(LinkNum) + 0x01CB)
+#define LmSEQ_EMPTY_SCB_HEAD(LinkNum)	(LmSCRATCH(LinkNum) + 0x01CC)
+#define LmSEQ_EMPTY_SCB_TAIL(LinkNum)	(LmSCRATCH(LinkNum) + 0x01CD)
+#define LmSEQ_EMPTY_BUFS_AVAIL(LinkNum)	(LmSCRATCH(LinkNum) + 0x01CE)
+#define LmSEQ_ATA_SCR_REGS(LinkNum)	(LmSCRATCH(LinkNum) + 0x01D4)
+
+/* Mode independent scratch page 3 macros. */
+#define LmSEQ_DEV_PRES_TMR_TOUT_CONST(LinkNum)	(LmSCRATCH(LinkNum) + 0x01E0)
+#define LmSEQ_SATA_INTERLOCK_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x01E4)
+#define LmSEQ_STP_SHUTDOWN_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x01E8)
+#define LmSEQ_SRST_ASSERT_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x01EC)
+#define LmSEQ_RCV_FIS_TIMEOUT(LinkNum)		(LmSCRATCH(LinkNum) + 0x01F0)
+#define LmSEQ_ONE_MILLISEC_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x01F4)
+#define LmSEQ_TEN_MS_COMINIT_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x01F8)
+#define LmSEQ_SMP_RCV_TIMEOUT(LinkNum)		(LmSCRATCH(LinkNum) + 0x01FC)
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_sas.h b/drivers/scsi/aic94xx/aic94xx_sas.h
new file mode 100644
index 0000000000000..64d2317123451
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_sas.h
@@ -0,0 +1,785 @@
+/*
+ * Aic94xx SAS/SATA driver SAS definitions and hardware interface header file.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef _AIC94XX_SAS_H_
+#define _AIC94XX_SAS_H_
+
+#include <scsi/libsas.h>
+
+/* ---------- DDBs ---------- */
+/* DDBs are device descriptor blocks which describe a device in the
+ * domain that this sequencer can maintain low-level connections for
+ * us.  They are be 64 bytes.
+ */
+
+struct asd_ddb_ssp_smp_target_port {
+	u8     conn_type;	  /* byte 0 */
+#define DDB_TP_CONN_TYPE 0x81	  /* Initiator port and addr frame type 0x01 */
+
+	u8     conn_rate;
+	__be16 init_conn_tag;
+	u8     dest_sas_addr[8];  /* bytes 4-11 */
+
+	__le16 send_queue_head;
+	u8     sq_suspended;
+	u8     ddb_type;	  /* DDB_TYPE_TARGET */
+#define DDB_TYPE_UNUSED    0xFF
+#define DDB_TYPE_TARGET    0xFE
+#define DDB_TYPE_INITIATOR 0xFD
+#define DDB_TYPE_PM_PORT   0xFC
+
+	__le16 _r_a;
+	__be16 awt_def;
+
+	u8     compat_features;	  /* byte 20 */
+	u8     pathway_blocked_count;
+	__be16 arb_wait_time;
+	__be32 more_compat_features; /* byte 24 */
+
+	u8     conn_mask;
+	u8     flags;	  /* concurrent conn:2,2 and open:0(1) */
+#define CONCURRENT_CONN_SUPP 0x04
+#define OPEN_REQUIRED        0x01
+
+	u16    _r_b;
+	__le16 exec_queue_tail;
+	__le16 send_queue_tail;
+	__le16 sister_ddb;
+
+	__le16 _r_c;
+
+	u8     max_concurrent_conn;
+	u8     num_concurrent_conn;
+	u8     num_contexts;
+
+	u8     _r_d;
+
+	__le16 active_task_count;
+
+	u8     _r_e[9];
+
+	u8     itnl_reason;	  /* I_T nexus loss reason */
+
+	__le16 _r_f;
+
+	__le16 itnl_timeout;
+#define ITNL_TIMEOUT_CONST 0x7D0 /* 2 seconds */
+
+	__le32 itnl_timestamp;
+} __attribute__ ((packed));
+
+struct asd_ddb_stp_sata_target_port {
+	u8     conn_type;	  /* byte 0 */
+	u8     conn_rate;
+	__be16 init_conn_tag;
+	u8     dest_sas_addr[8];  /* bytes 4-11 */
+
+	__le16 send_queue_head;
+	u8     sq_suspended;
+	u8     ddb_type;	  /* DDB_TYPE_TARGET */
+
+	__le16 _r_a;
+
+	__be16 awt_def;
+	u8     compat_features;	  /* byte 20 */
+	u8     pathway_blocked_count;
+	__be16 arb_wait_time;
+	__be32 more_compat_features; /* byte 24 */
+
+	u8     conn_mask;
+	u8     flags;	  /* concurrent conn:2,2 and open:0(1) */
+#define SATA_MULTIPORT     0x80
+#define SUPPORTS_AFFIL     0x40
+#define STP_AFFIL_POL      0x20
+
+	u8     _r_b;
+	u8     flags2;		  /* STP close policy:0 */
+#define STP_CL_POL_NO_TX    0x00
+#define STP_CL_POL_BTW_CMDS 0x01
+
+	__le16 exec_queue_tail;
+	__le16 send_queue_tail;
+	__le16 sister_ddb;
+	__le16 ata_cmd_scbptr;
+	__le32 sata_tag_alloc_mask;
+	__le16 active_task_count;
+	__le16 _r_c;
+	__le32 sata_sactive;
+	u8     num_sata_tags;
+	u8     sata_status;
+	u8     sata_ending_status;
+	u8     itnl_reason;	  /* I_T nexus loss reason */
+	__le16 ncq_data_scb_ptr;
+	__le16 itnl_timeout;
+	__le32 itnl_timestamp;
+} __attribute__ ((packed));
+
+/* This struct asd_ddb_init_port, describes the device descriptor block
+ * of an initiator port (when the sequencer is operating in target mode).
+ * Bytes [0,11] and [20,27] are from the OPEN address frame.
+ * The sequencer allocates an initiator port DDB entry.
+ */
+struct asd_ddb_init_port {
+	u8     conn_type;	  /* byte 0 */
+	u8     conn_rate;
+	__be16 init_conn_tag;     /* BE */
+	u8     dest_sas_addr[8];
+	__le16 send_queue_head;   /* LE, byte 12 */
+	u8     sq_suspended;
+	u8     ddb_type;	  /* DDB_TYPE_INITIATOR */
+	__le16 _r_a;
+	__be16 awt_def;		  /* BE */
+	u8     compat_features;
+	u8     pathway_blocked_count;
+	__be16 arb_wait_time;	  /* BE */
+	__be32 more_compat_features; /* BE */
+	u8     conn_mask;
+	u8     flags;		  /* == 5 */
+	u16    _r_b;
+	__le16 exec_queue_tail;	  /* execution queue tail */
+	__le16 send_queue_tail;
+	__le16 sister_ddb;
+	__le16 init_resp_timeout; /* initiator response timeout */
+	__le32 _r_c;
+	__le16 active_tasks;	  /* active task count */
+	__le16 init_list;	  /* initiator list link pointer */
+	__le32 _r_d;
+	u8     max_conn_to[3]; /* from Conn-Disc mode page, in us, LE */
+	u8     itnl_reason;	  /* I_T nexus loss reason */
+	__le16 bus_inact_to; /* from Conn-Disc mode page, in 100 us, LE */
+	__le16 itnl_to;		  /* from the Protocol Specific Port Ctrl MP */
+	__le32 itnl_timestamp;
+} __attribute__ ((packed));
+
+/* This struct asd_ddb_sata_tag, describes a look-up table to be used
+ * by the sequencers.  SATA II, IDENTIFY DEVICE data, word 76, bit 8:
+ * NCQ support.  This table is used by the sequencers to find the
+ * corresponding SCB, given a SATA II tag value.
+ */
+struct asd_ddb_sata_tag {
+	__le16 scb_pointer[32];
+} __attribute__ ((packed));
+
+/* This struct asd_ddb_sata_pm_table, describes a port number to
+ * connection handle look-up table.  SATA targets attached to a port
+ * multiplier require a 4-bit port number value.  There is one DDB
+ * entry of this type for each SATA port multiplier (sister DDB).
+ * Given a SATA PM port number, this table gives us the SATA PM Port
+ * DDB of the SATA port multiplier port (i.e. the SATA target
+ * discovered on the port).
+ */
+struct asd_ddb_sata_pm_table {
+	__le16 ddb_pointer[16];
+	__le16 _r_a[16];
+} __attribute__ ((packed));
+
+/* This struct asd_ddb_sata_pm_port, describes the SATA port multiplier
+ * port format DDB.
+ */
+struct asd_ddb_sata_pm_port {
+	u8     _r_a[15];
+	u8     ddb_type;
+	u8     _r_b[13];
+	u8     pm_port_flags;
+#define PM_PORT_MASK  0xF0
+#define PM_PORT_SET   0x02
+	u8     _r_c[6];
+	__le16 sister_ddb;
+	__le16 ata_cmd_scbptr;
+	__le32 sata_tag_alloc_mask;
+	__le16 active_task_count;
+	__le16 parent_ddb;
+	__le32 sata_sactive;
+	u8     num_sata_tags;
+	u8     sata_status;
+	u8     sata_ending_status;
+	u8     _r_d[9];
+} __attribute__ ((packed));
+
+/* This struct asd_ddb_seq_shared, describes a DDB shared by the
+ * central and link sequencers.  port_map_by_links is indexed phy
+ * number [0,7]; each byte is a bit mask of all the phys that are in
+ * the same port as the indexed phy.
+ */
+struct asd_ddb_seq_shared {
+	__le16 q_free_ddb_head;
+	__le16 q_free_ddb_tail;
+	__le16 q_free_ddb_cnt;
+	__le16 q_used_ddb_head;
+	__le16 q_used_ddb_tail;
+	__le16 shared_mem_lock;
+	__le16 smp_conn_tag;
+	__le16 est_nexus_buf_cnt;
+	__le16 est_nexus_buf_thresh;
+	u32    _r_a;
+	u8     settable_max_contexts;
+	u8     _r_b[23];
+	u8     conn_not_active;
+	u8     phy_is_up;
+	u8     _r_c[8];
+	u8     port_map_by_links[8];
+} __attribute__ ((packed));
+
+/* ---------- SG Element ---------- */
+
+/* This struct sg_el, describes the hardware scatter gather buffer
+ * element.  All entries are little endian.  In an SCB, there are 2 of
+ * this, plus one more, called a link element of this indicating a
+ * sublist if needed.
+ *
+ * A link element has only the bus address set and the flags (DS) bit
+ * valid.  The bus address points to the start of the sublist.
+ *
+ * If a sublist is needed, then that sublist should also include the 2
+ * sg_el embedded in the SCB, in which case next_sg_offset is 32,
+ * since sizeof(sg_el) = 16; EOS should be 1 and EOL 0 in this case.
+ */
+struct sg_el {
+	__le64 bus_addr;
+	__le32 size;
+	__le16 _r;
+	u8     next_sg_offs;
+	u8     flags;
+#define ASD_SG_EL_DS_MASK   0x30
+#define ASD_SG_EL_DS_OCM    0x10
+#define ASD_SG_EL_DS_HM     0x00
+#define ASD_SG_EL_LIST_MASK 0xC0
+#define ASD_SG_EL_LIST_EOL  0x40
+#define ASD_SG_EL_LIST_EOS  0x80
+} __attribute__ ((packed));
+
+/* ---------- SCBs ---------- */
+
+/* An SCB (sequencer control block) is comprised of a common header
+ * and a task part, for a total of 128 bytes.  All fields are in LE
+ * order, unless otherwise noted.
+ */
+
+/* This struct scb_header, defines the SCB header format.
+ */
+struct scb_header {
+	__le64 next_scb;
+	__le16 index;		  /* transaction context */
+	u8     opcode;
+} __attribute__ ((packed));
+
+/* SCB opcodes: Execution queue
+ */
+#define INITIATE_SSP_TASK       0x00
+#define INITIATE_LONG_SSP_TASK  0x01
+#define INITIATE_BIDIR_SSP_TASK 0x02
+#define ABORT_TASK              0x03
+#define INITIATE_SSP_TMF        0x04
+#define SSP_TARG_GET_DATA       0x05
+#define SSP_TARG_GET_DATA_GOOD  0x06
+#define SSP_TARG_SEND_RESP      0x07
+#define QUERY_SSP_TASK          0x08
+#define INITIATE_ATA_TASK       0x09
+#define INITIATE_ATAPI_TASK     0x0a
+#define CONTROL_ATA_DEV         0x0b
+#define INITIATE_SMP_TASK       0x0c
+#define SMP_TARG_SEND_RESP      0x0f
+
+/* SCB opcodes: Send Queue
+ */
+#define SSP_TARG_SEND_DATA      0x40
+#define SSP_TARG_SEND_DATA_GOOD 0x41
+
+/* SCB opcodes: Link Queue
+ */
+#define CONTROL_PHY             0x80
+#define SEND_PRIMITIVE          0x81
+#define INITIATE_LINK_ADM_TASK  0x82
+
+/* SCB opcodes: other
+ */
+#define EMPTY_SCB               0xc0
+#define INITIATE_SEQ_ADM_TASK   0xc1
+#define EST_ICL_TARG_WINDOW     0xc2
+#define COPY_MEM                0xc3
+#define CLEAR_NEXUS             0xc4
+#define INITIATE_DDB_ADM_TASK   0xc6
+#define ESTABLISH_NEXUS_ESCB    0xd0
+
+#define LUN_SIZE                8
+
+/* See SAS spec, task IU
+ */
+struct ssp_task_iu {
+	u8     lun[LUN_SIZE];	  /* BE */
+	u16    _r_a;
+	u8     tmf;
+	u8     _r_b;
+	__be16 tag;		  /* BE */
+	u8     _r_c[14];
+} __attribute__ ((packed));
+
+/* See SAS spec, command IU
+ */
+struct ssp_command_iu {
+	u8     lun[LUN_SIZE];
+	u8     _r_a;
+	u8     efb_prio_attr;	  /* enable first burst, task prio & attr */
+#define EFB_MASK        0x80
+#define TASK_PRIO_MASK	0x78
+#define TASK_ATTR_MASK  0x07
+
+	u8    _r_b;
+	u8     add_cdb_len;	  /* in dwords, since bit 0,1 are reserved */
+	union {
+		u8     cdb[16];
+		struct {
+			__le64 long_cdb_addr;	  /* bus address, LE */
+			__le32 long_cdb_size;	  /* LE */
+			u8     _r_c[3];
+			u8     eol_ds;		  /* eol:6,6, ds:5,4 */
+		} long_cdb;	  /* sequencer extension */
+	};
+} __attribute__ ((packed));
+
+struct xfer_rdy_iu {
+	__be32 requested_offset;  /* BE */
+	__be32 write_data_len;	  /* BE */
+	__be32 _r_a;
+} __attribute__ ((packed));
+
+/* ---------- SCB tasks ---------- */
+
+/* This is both ssp_task and long_ssp_task
+ */
+struct initiate_ssp_task {
+	u8     proto_conn_rate;	  /* proto:6,4, conn_rate:3,0 */
+	__le32 total_xfer_len;
+	struct ssp_frame_hdr  ssp_frame;
+	struct ssp_command_iu ssp_cmd;
+	__le16 sister_scb;	  /* 0xFFFF */
+	__le16 conn_handle;	  /* index to DDB for the intended target */
+	u8     data_dir;	  /* :1,0 */
+#define DATA_DIR_NONE   0x00
+#define DATA_DIR_IN     0x01
+#define DATA_DIR_OUT    0x02
+#define DATA_DIR_BYRECIPIENT 0x03
+
+	u8     _r_a;
+	u8     retry_count;
+	u8     _r_b[5];
+	struct sg_el sg_element[3]; /* 2 real and 1 link */
+} __attribute__ ((packed));
+
+/* This defines both ata_task and atapi_task.
+ * ata: C bit of FIS should be 1,
+ * atapi: C bit of FIS should be 1, and command register should be 0xA0,
+ * to indicate a packet command.
+ */
+struct initiate_ata_task {
+	u8     proto_conn_rate;
+	__le32 total_xfer_len;
+	struct host_to_dev_fis fis;
+	__le32 data_offs;
+	u8     atapi_packet[16];
+	u8     _r_a[12];
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     ata_flags;	  /* CSMI:6,6, DTM:4,4, QT:3,3, data dir:1,0 */
+#define CSMI_TASK           0x40
+#define DATA_XFER_MODE_DMA  0x10
+#define ATA_Q_TYPE_MASK     0x08
+#define	ATA_Q_TYPE_UNTAGGED 0x00
+#define ATA_Q_TYPE_NCQ      0x08
+
+	u8     _r_b;
+	u8     retry_count;
+	u8     _r_c;
+	u8     flags;
+#define STP_AFFIL_POLICY   0x20
+#define SET_AFFIL_POLICY   0x10
+#define RET_PARTIAL_SGLIST 0x02
+
+	u8     _r_d[3];
+	struct sg_el sg_element[3];
+} __attribute__ ((packed));
+
+struct initiate_smp_task {
+	u8     proto_conn_rate;
+	u8     _r_a[40];
+	struct sg_el smp_req;
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     _r_c[8];
+	struct sg_el smp_resp;
+	u8     _r_d[32];
+} __attribute__ ((packed));
+
+struct control_phy {
+	u8     phy_id;
+	u8     sub_func;
+#define DISABLE_PHY            0x00
+#define ENABLE_PHY             0x01
+#define RELEASE_SPINUP_HOLD    0x02
+#define ENABLE_PHY_NO_SAS_OOB  0x03
+#define ENABLE_PHY_NO_SATA_OOB 0x04
+#define PHY_NO_OP              0x05
+#define EXECUTE_HARD_RESET     0x81
+
+	u8     func_mask;
+	u8     speed_mask;
+	u8     hot_plug_delay;
+	u8     port_type;
+	u8     flags;
+#define DEV_PRES_TIMER_OVERRIDE_ENABLE 0x01
+#define DISABLE_PHY_IF_OOB_FAILS       0x02
+
+	__le32 timeout_override;
+	u8     link_reset_retries;
+	u8     _r_a[47];
+	__le16 conn_handle;
+	u8     _r_b[56];
+} __attribute__ ((packed));
+
+struct control_ata_dev {
+	u8     proto_conn_rate;
+	__le32 _r_a;
+	struct host_to_dev_fis fis;
+	u8     _r_b[32];
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     ata_flags;	  /* 0 */
+	u8     _r_c[55];
+} __attribute__ ((packed));
+
+struct empty_scb {
+	u8     num_valid;
+	__le32 _r_a;
+#define ASD_EDBS_PER_SCB 7
+/* header+data+CRC+DMA suffix data */
+#define ASD_EDB_SIZE (24+1024+4+16)
+	struct sg_el eb[ASD_EDBS_PER_SCB];
+#define ELEMENT_NOT_VALID  0xC0
+} __attribute__ ((packed));
+
+struct initiate_link_adm {
+	u8     phy_id;
+	u8     sub_func;
+#define GET_LINK_ERROR_COUNT      0x00
+#define RESET_LINK_ERROR_COUNT    0x01
+#define ENABLE_NOTIFY_SPINUP_INTS 0x02
+
+	u8     _r_a[57];
+	__le16 conn_handle;
+	u8     _r_b[56];
+} __attribute__ ((packed));
+
+struct copy_memory {
+	u8     _r_a;
+	__le16 xfer_len;
+	__le16 _r_b;
+	__le64 src_busaddr;
+	u8     src_ds;		  /* See definition of sg_el */
+	u8     _r_c[45];
+	__le16 conn_handle;
+	__le64 _r_d;
+	__le64 dest_busaddr;
+	u8     dest_ds;		  /* See definition of sg_el */
+	u8     _r_e[39];
+} __attribute__ ((packed));
+
+struct abort_task {
+	u8     proto_conn_rate;
+	__le32 _r_a;
+	struct ssp_frame_hdr ssp_frame;
+	struct ssp_task_iu ssp_task;
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     flags;	  /* ovrd_itnl_timer:3,3, suspend_data_trans:2,2 */
+#define SUSPEND_DATA_TRANS 0x04
+
+	u8     _r_b;
+	u8     retry_count;
+	u8     _r_c[5];
+	__le16 index;  /* Transaction context of task to be queried */
+	__le16 itnl_to;
+	u8     _r_d[44];
+} __attribute__ ((packed));
+
+struct clear_nexus {
+	u8     nexus;
+#define NEXUS_ADAPTER  0x00
+#define NEXUS_PORT     0x01
+#define NEXUS_I_T      0x02
+#define NEXUS_I_T_L    0x03
+#define NEXUS_TAG      0x04
+#define NEXUS_TRANS_CX 0x05
+#define NEXUS_SATA_TAG 0x06
+#define NEXUS_T_L      0x07
+#define NEXUS_L        0x08
+#define NEXUS_T_TAG    0x09
+
+	__le32 _r_a;
+	u8     flags;
+#define SUSPEND_TX     0x80
+#define RESUME_TX      0x40
+#define SEND_Q         0x04
+#define EXEC_Q         0x02
+#define NOTINQ         0x01
+
+	u8     _r_b[3];
+	u8     conn_mask;
+	u8     _r_c[19];
+	struct ssp_task_iu ssp_task; /* LUN and TAG */
+	__le16 _r_d;
+	__le16 conn_handle;
+	__le64 _r_e;
+	__le16 index;  /* Transaction context of task to be cleared */
+	__le16 context;		  /* Clear nexus context */
+	u8     _r_f[44];
+} __attribute__ ((packed));
+
+struct initiate_ssp_tmf {
+	u8     proto_conn_rate;
+	__le32 _r_a;
+	struct ssp_frame_hdr ssp_frame;
+	struct ssp_task_iu ssp_task;
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     flags;	  /* itnl override and suspend data tx */
+#define OVERRIDE_ITNL_TIMER  8
+
+	u8     _r_b;
+	u8     retry_count;
+	u8     _r_c[5];
+	__le16 index;  /* Transaction context of task to be queried */
+	__le16 itnl_to;
+	u8     _r_d[44];
+} __attribute__ ((packed));
+
+/* Transmits an arbitrary primitive on the link.
+ * Used for NOTIFY and BROADCAST.
+ */
+struct send_prim {
+	u8     phy_id;
+	u8     wait_transmit; 	  /* :0,0 */
+	u8     xmit_flags;
+#define XMTPSIZE_MASK      0xF0
+#define XMTPSIZE_SINGLE    0x10
+#define XMTPSIZE_REPEATED  0x20
+#define XMTPSIZE_CONT      0x20
+#define XMTPSIZE_TRIPLE    0x30
+#define XMTPSIZE_REDUNDANT 0x60
+#define XMTPSIZE_INF       0
+
+#define XMTCONTEN          0x04
+#define XMTPFRM            0x02	  /* Transmit at the next frame boundary */
+#define XMTPIMM            0x01	  /* Transmit immediately */
+
+	__le16 _r_a;
+	u8     prim[4];		  /* K, D0, D1, D2 */
+	u8     _r_b[50];
+	__le16 conn_handle;
+	u8     _r_c[56];
+} __attribute__ ((packed));
+
+/* This describes both SSP Target Get Data and SSP Target Get Data And
+ * Send Good Response SCBs.  Used when the sequencer is operating in
+ * target mode...
+ */
+struct ssp_targ_get_data {
+	u8     proto_conn_rate;
+	__le32 total_xfer_len;
+	struct ssp_frame_hdr ssp_frame;
+	struct xfer_rdy_iu  xfer_rdy;
+	u8     lun[LUN_SIZE];
+	__le64 _r_a;
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     data_dir;	  /* 01b */
+	u8     _r_b;
+	u8     retry_count;
+	u8     _r_c[5];
+	struct sg_el sg_element[3];
+} __attribute__ ((packed));
+
+/* ---------- The actual SCB struct ---------- */
+
+struct scb {
+	struct scb_header header;
+	union {
+		struct initiate_ssp_task ssp_task;
+		struct initiate_ata_task ata_task;
+		struct initiate_smp_task smp_task;
+		struct control_phy       control_phy;
+		struct control_ata_dev   control_ata_dev;
+		struct empty_scb         escb;
+		struct initiate_link_adm link_adm;
+		struct copy_memory       cp_mem;
+		struct abort_task        abort_task;
+		struct clear_nexus       clear_nexus;
+		struct initiate_ssp_tmf  ssp_tmf;
+	};
+} __attribute__ ((packed));
+
+/* ---------- Done List ---------- */
+/* The done list entry opcode field is defined below.
+ * The mnemonic encoding and meaning is as follows:
+ * TC - Task Complete, status was received and acknowledged
+ * TF - Task Failed, indicates an error prior to receiving acknowledgment
+ *   for the command:
+ *   - no conn,
+ *   - NACK or R_ERR received in response to this command,
+ *   - credit blocked or not available, or in the case of SMP request,
+ *   - no SMP response was received.
+ *   In these four cases it is known that the target didn't receive the
+ *   command.
+ * TI - Task Interrupted, error after the command was acknowledged.  It is
+ *   known that the command was received by the target.
+ * TU - Task Unacked, command was transmitted but neither ACK (R_OK) nor NAK
+ *   (R_ERR) was received due to loss of signal, broken connection, loss of
+ *   dword sync or other reason.  The application client should send the
+ *   appropriate task query.
+ * TA - Task Aborted, see TF.
+ * _RESP - The completion includes an empty buffer containing status.
+ * TO - Timeout.
+ */
+#define TC_NO_ERROR             0x00
+#define TC_UNDERRUN             0x01
+#define TC_OVERRUN              0x02
+#define TF_OPEN_TO              0x03
+#define TF_OPEN_REJECT          0x04
+#define TI_BREAK                0x05
+#define TI_PROTO_ERR            0x06
+#define TC_SSP_RESP             0x07
+#define TI_PHY_DOWN             0x08
+#define TF_PHY_DOWN             0x09
+#define TC_LINK_ADM_RESP        0x0a
+#define TC_CSMI                 0x0b
+#define TC_ATA_RESP             0x0c
+#define TU_PHY_DOWN             0x0d
+#define TU_BREAK                0x0e
+#define TI_SATA_TO              0x0f
+#define TI_NAK                  0x10
+#define TC_CONTROL_PHY          0x11
+#define TF_BREAK                0x12
+#define TC_RESUME               0x13
+#define TI_ACK_NAK_TO           0x14
+#define TF_SMPRSP_TO            0x15
+#define TF_SMP_XMIT_RCV_ERR     0x16
+#define TC_PARTIAL_SG_LIST      0x17
+#define TU_ACK_NAK_TO           0x18
+#define TU_SATA_TO              0x19
+#define TF_NAK_RECV             0x1a
+#define TA_I_T_NEXUS_LOSS       0x1b
+#define TC_ATA_R_ERR_RECV       0x1c
+#define TF_TMF_NO_CTX           0x1d
+#define TA_ON_REQ               0x1e
+#define TF_TMF_NO_TAG           0x1f
+#define TF_TMF_TAG_FREE         0x20
+#define TF_TMF_TASK_DONE        0x21
+#define TF_TMF_NO_CONN_HANDLE   0x22
+#define TC_TASK_CLEARED         0x23
+#define TI_SYNCS_RECV           0x24
+#define TU_SYNCS_RECV           0x25
+#define TF_IRTT_TO              0x26
+#define TF_NO_SMP_CONN          0x27
+#define TF_IU_SHORT             0x28
+#define TF_DATA_OFFS_ERR        0x29
+#define TF_INV_CONN_HANDLE      0x2a
+#define TF_REQUESTED_N_PENDING  0x2b
+
+/* 0xc1 - 0xc7: empty buffer received,
+   0xd1 - 0xd7: establish nexus empty buffer received
+*/
+/* This is the ESCB mask */
+#define ESCB_RECVD              0xC0
+
+
+/* This struct done_list_struct defines the done list entry.
+ * All fields are LE.
+ */
+struct done_list_struct {
+	__le16 index;		  /* aka transaction context */
+	u8     opcode;
+	u8     status_block[4];
+	u8     toggle;		  /* bit 0 */
+#define DL_TOGGLE_MASK     0x01
+} __attribute__ ((packed));
+
+/* ---------- PHYS ---------- */
+
+struct asd_phy {
+	struct asd_sas_phy        sas_phy;
+	struct asd_phy_desc   *phy_desc; /* hw profile */
+
+	struct sas_identify_frame *identify_frame;
+	struct asd_dma_tok  *id_frm_tok;
+
+	u8         frame_rcvd[ASD_EDB_SIZE];
+};
+
+
+#define ASD_SCB_SIZE sizeof(struct scb)
+#define ASD_DDB_SIZE sizeof(struct asd_ddb_ssp_smp_target_port)
+
+/* Define this to 0 if you do not want NOTIFY (ENABLE SPINIP) sent.
+ * Default: 0x10 (it's a mask)
+ */
+#define ASD_NOTIFY_ENABLE_SPINUP  0x10
+
+/* If enabled, set this to the interval between transmission
+ * of NOTIFY (ENABLE SPINUP). In units of 200 us.
+ */
+#define ASD_NOTIFY_TIMEOUT        2500
+
+/* Initial delay after OOB, before we transmit NOTIFY (ENABLE SPINUP).
+ * If 0, transmit immediately. In milliseconds.
+ */
+#define ASD_NOTIFY_DOWN_COUNT     0
+
+/* Device present timer timeout constant, 10 ms. */
+#define ASD_DEV_PRESENT_TIMEOUT   0x2710
+
+#define ASD_SATA_INTERLOCK_TIMEOUT 0
+
+/* How long to wait before shutting down an STP connection, unless
+ * an STP target sent frame(s). 50 usec.
+ * IGNORED by the sequencer (i.e. value 0 always).
+ */
+#define ASD_STP_SHUTDOWN_TIMEOUT  0x0
+
+/* ATA soft reset timer timeout. 5 usec. */
+#define ASD_SRST_ASSERT_TIMEOUT   0x05
+
+/* 31 sec */
+#define ASD_RCV_FIS_TIMEOUT       0x01D905C0
+
+#define ASD_ONE_MILLISEC_TIMEOUT  0x03e8
+
+/* COMINIT timer */
+#define ASD_TEN_MILLISEC_TIMEOUT  0x2710
+#define ASD_COMINIT_TIMEOUT ASD_TEN_MILLISEC_TIMEOUT
+
+/* 1 sec */
+#define ASD_SMP_RCV_TIMEOUT       0x000F4240
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c
new file mode 100644
index 0000000000000..fc1b7438a9132
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_scb.c
@@ -0,0 +1,732 @@
+/*
+ * Aic94xx SAS/SATA driver SCB management.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "aic94xx.h"
+#include "aic94xx_reg.h"
+#include "aic94xx_hwi.h"
+#include "aic94xx_seq.h"
+
+#include "aic94xx_dump.h"
+
+/* ---------- EMPTY SCB ---------- */
+
+#define DL_PHY_MASK      7
+#define BYTES_DMAED      0
+#define PRIMITIVE_RECVD  0x08
+#define PHY_EVENT        0x10
+#define LINK_RESET_ERROR 0x18
+#define TIMER_EVENT      0x20
+#define REQ_TASK_ABORT   0xF0
+#define REQ_DEVICE_RESET 0xF1
+#define SIGNAL_NCQ_ERROR 0xF2
+#define CLEAR_NCQ_ERROR  0xF3
+
+#define PHY_EVENTS_STATUS (CURRENT_LOSS_OF_SIGNAL | CURRENT_OOB_DONE   \
+			   | CURRENT_SPINUP_HOLD | CURRENT_GTO_TIMEOUT \
+			   | CURRENT_OOB_ERROR)
+
+static inline void get_lrate_mode(struct asd_phy *phy, u8 oob_mode)
+{
+	switch (oob_mode & 7) {
+	case PHY_SPEED_60:
+		/* FIXME: sas transport class doesn't have this */
+		phy->sas_phy.linkrate = PHY_LINKRATE_6;
+		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS;
+		break;
+	case PHY_SPEED_30:
+		phy->sas_phy.linkrate = PHY_LINKRATE_3;
+		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS;
+		break;
+	case PHY_SPEED_15:
+		phy->sas_phy.linkrate = PHY_LINKRATE_1_5;
+		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS;
+		break;
+	}
+	if (oob_mode & SAS_MODE)
+		phy->sas_phy.oob_mode = SAS_OOB_MODE;
+	else if (oob_mode & SATA_MODE)
+		phy->sas_phy.oob_mode = SATA_OOB_MODE;
+}
+
+static inline void asd_phy_event_tasklet(struct asd_ascb *ascb,
+					 struct done_list_struct *dl)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct sas_ha_struct *sas_ha = &asd_ha->sas_ha;
+	int phy_id = dl->status_block[0] & DL_PHY_MASK;
+	struct asd_phy *phy = &asd_ha->phys[phy_id];
+
+	u8 oob_status = dl->status_block[1] & PHY_EVENTS_STATUS;
+	u8 oob_mode   = dl->status_block[2];
+
+	switch (oob_status) {
+	case CURRENT_LOSS_OF_SIGNAL:
+		/* directly attached device was removed */
+		ASD_DPRINTK("phy%d: device unplugged\n", phy_id);
+		asd_turn_led(asd_ha, phy_id, 0);
+		sas_phy_disconnected(&phy->sas_phy);
+		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL);
+		break;
+	case CURRENT_OOB_DONE:
+		/* hot plugged device */
+		asd_turn_led(asd_ha, phy_id, 1);
+		get_lrate_mode(phy, oob_mode);
+		ASD_DPRINTK("phy%d device plugged: lrate:0x%x, proto:0x%x\n",
+			    phy_id, phy->sas_phy.linkrate, phy->sas_phy.iproto);
+		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE);
+		break;
+	case CURRENT_SPINUP_HOLD:
+		/* hot plug SATA, no COMWAKE sent */
+		asd_turn_led(asd_ha, phy_id, 1);
+		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD);
+		break;
+	case CURRENT_GTO_TIMEOUT:
+	case CURRENT_OOB_ERROR:
+		ASD_DPRINTK("phy%d error while OOB: oob status:0x%x\n", phy_id,
+			    dl->status_block[1]);
+		asd_turn_led(asd_ha, phy_id, 0);
+		sas_phy_disconnected(&phy->sas_phy);
+		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR);
+		break;
+	}
+}
+
+/* If phys are enabled sparsely, this will do the right thing. */
+static inline unsigned ord_phy(struct asd_ha_struct *asd_ha,
+			       struct asd_phy *phy)
+{
+	u8 enabled_mask = asd_ha->hw_prof.enabled_phys;
+	int i, k = 0;
+
+	for_each_phy(enabled_mask, enabled_mask, i) {
+		if (&asd_ha->phys[i] == phy)
+			return k;
+		k++;
+	}
+	return 0;
+}
+
+/**
+ * asd_get_attached_sas_addr -- extract/generate attached SAS address
+ * phy: pointer to asd_phy
+ * sas_addr: pointer to buffer where the SAS address is to be written
+ *
+ * This function extracts the SAS address from an IDENTIFY frame
+ * received.  If OOB is SATA, then a SAS address is generated from the
+ * HA tables.
+ *
+ * LOCKING: the frame_rcvd_lock needs to be held since this parses the frame
+ * buffer.
+ */
+static inline void asd_get_attached_sas_addr(struct asd_phy *phy, u8 *sas_addr)
+{
+	if (phy->sas_phy.frame_rcvd[0] == 0x34
+	    && phy->sas_phy.oob_mode == SATA_OOB_MODE) {
+		struct asd_ha_struct *asd_ha = phy->sas_phy.ha->lldd_ha;
+		/* FIS device-to-host */
+		u64 addr = be64_to_cpu(*(__be64 *)phy->phy_desc->sas_addr);
+
+		addr += asd_ha->hw_prof.sata_name_base + ord_phy(asd_ha, phy);
+		*(__be64 *)sas_addr = cpu_to_be64(addr);
+	} else {
+		struct sas_identify_frame *idframe =
+			(void *) phy->sas_phy.frame_rcvd;
+		memcpy(sas_addr, idframe->sas_addr, SAS_ADDR_SIZE);
+	}
+}
+
+static inline void asd_bytes_dmaed_tasklet(struct asd_ascb *ascb,
+					   struct done_list_struct *dl,
+					   int edb_id, int phy_id)
+{
+	unsigned long flags;
+	int edb_el = edb_id + ascb->edb_index;
+	struct asd_dma_tok *edb = ascb->ha->seq.edb_arr[edb_el];
+	struct asd_phy *phy = &ascb->ha->phys[phy_id];
+	struct sas_ha_struct *sas_ha = phy->sas_phy.ha;
+	u16 size = ((dl->status_block[3] & 7) << 8) | dl->status_block[2];
+
+	size = min(size, (u16) sizeof(phy->frame_rcvd));
+
+	spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags);
+	memcpy(phy->sas_phy.frame_rcvd, edb->vaddr, size);
+	phy->sas_phy.frame_rcvd_size = size;
+	asd_get_attached_sas_addr(phy, phy->sas_phy.attached_sas_addr);
+	spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags);
+	asd_dump_frame_rcvd(phy, dl);
+	sas_ha->notify_port_event(&phy->sas_phy, PORTE_BYTES_DMAED);
+}
+
+static inline void asd_link_reset_err_tasklet(struct asd_ascb *ascb,
+					      struct done_list_struct *dl,
+					      int phy_id)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct sas_ha_struct *sas_ha = &asd_ha->sas_ha;
+	struct asd_sas_phy *sas_phy = sas_ha->sas_phy[phy_id];
+	u8 lr_error = dl->status_block[1];
+	u8 retries_left = dl->status_block[2];
+
+	switch (lr_error) {
+	case 0:
+		ASD_DPRINTK("phy%d: Receive ID timer expired\n", phy_id);
+		break;
+	case 1:
+		ASD_DPRINTK("phy%d: Loss of signal\n", phy_id);
+		break;
+	case 2:
+		ASD_DPRINTK("phy%d: Loss of dword sync\n", phy_id);
+		break;
+	case 3:
+		ASD_DPRINTK("phy%d: Receive FIS timeout\n", phy_id);
+		break;
+	default:
+		ASD_DPRINTK("phy%d: unknown link reset error code: 0x%x\n",
+			    phy_id, lr_error);
+		break;
+	}
+
+	asd_turn_led(asd_ha, phy_id, 0);
+	sas_phy_disconnected(sas_phy);
+	sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+
+	if (retries_left == 0) {
+		int num = 1;
+		struct asd_ascb *cp = asd_ascb_alloc_list(ascb->ha, &num,
+							  GFP_ATOMIC);
+		if (!cp) {
+			asd_printk("%s: out of memory\n", __FUNCTION__);
+			goto out;
+		}
+		ASD_DPRINTK("phy%d: retries:0 performing link reset seq\n",
+			    phy_id);
+		asd_build_control_phy(cp, phy_id, ENABLE_PHY);
+		if (asd_post_ascb_list(ascb->ha, cp, 1) != 0)
+			asd_ascb_free(cp);
+	}
+out:
+	;
+}
+
+static inline void asd_primitive_rcvd_tasklet(struct asd_ascb *ascb,
+					      struct done_list_struct *dl,
+					      int phy_id)
+{
+	unsigned long flags;
+	struct sas_ha_struct *sas_ha = &ascb->ha->sas_ha;
+	struct asd_sas_phy *sas_phy = sas_ha->sas_phy[phy_id];
+	u8  reg  = dl->status_block[1];
+	u32 cont = dl->status_block[2] << ((reg & 3)*8);
+
+	reg &= ~3;
+	switch (reg) {
+	case LmPRMSTAT0BYTE0:
+		switch (cont) {
+		case LmBROADCH:
+		case LmBROADRVCH0:
+		case LmBROADRVCH1:
+		case LmBROADSES:
+			ASD_DPRINTK("phy%d: BROADCAST change received:%d\n",
+				    phy_id, cont);
+			spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
+			sas_phy->sas_prim = ffs(cont);
+			spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
+			sas_ha->notify_port_event(sas_phy,PORTE_BROADCAST_RCVD);
+			break;
+
+		case LmUNKNOWNP:
+			ASD_DPRINTK("phy%d: unknown BREAK\n", phy_id);
+			break;
+
+		default:
+			ASD_DPRINTK("phy%d: primitive reg:0x%x, cont:0x%04x\n",
+				    phy_id, reg, cont);
+			break;
+		}
+		break;
+	case LmPRMSTAT1BYTE0:
+		switch (cont) {
+		case LmHARDRST:
+			ASD_DPRINTK("phy%d: HARD_RESET primitive rcvd\n",
+				    phy_id);
+			/* The sequencer disables all phys on that port.
+			 * We have to re-enable the phys ourselves. */
+			sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET);
+			break;
+
+		default:
+			ASD_DPRINTK("phy%d: primitive reg:0x%x, cont:0x%04x\n",
+				    phy_id, reg, cont);
+			break;
+		}
+		break;
+	default:
+		ASD_DPRINTK("unknown primitive register:0x%x\n",
+			    dl->status_block[1]);
+		break;
+	}
+}
+
+/**
+ * asd_invalidate_edb -- invalidate an EDB and if necessary post the ESCB
+ * @ascb: pointer to Empty SCB
+ * @edb_id: index [0,6] to the empty data buffer which is to be invalidated
+ *
+ * After an EDB has been invalidated, if all EDBs in this ESCB have been
+ * invalidated, the ESCB is posted back to the sequencer.
+ * Context is tasklet/IRQ.
+ */
+void asd_invalidate_edb(struct asd_ascb *ascb, int edb_id)
+{
+	struct asd_seq_data *seq = &ascb->ha->seq;
+	struct empty_scb *escb = &ascb->scb->escb;
+	struct sg_el     *eb   = &escb->eb[edb_id];
+	struct asd_dma_tok *edb = seq->edb_arr[ascb->edb_index + edb_id];
+
+	memset(edb->vaddr, 0, ASD_EDB_SIZE);
+	eb->flags |= ELEMENT_NOT_VALID;
+	escb->num_valid--;
+
+	if (escb->num_valid == 0) {
+		int i;
+		/* ASD_DPRINTK("reposting escb: vaddr: 0x%p, "
+			    "dma_handle: 0x%08llx, next: 0x%08llx, "
+			    "index:%d, opcode:0x%02x\n",
+			    ascb->dma_scb.vaddr,
+			    (u64)ascb->dma_scb.dma_handle,
+			    le64_to_cpu(ascb->scb->header.next_scb),
+			    le16_to_cpu(ascb->scb->header.index),
+			    ascb->scb->header.opcode);
+		*/
+		escb->num_valid = ASD_EDBS_PER_SCB;
+		for (i = 0; i < ASD_EDBS_PER_SCB; i++)
+			escb->eb[i].flags = 0;
+		if (!list_empty(&ascb->list))
+			list_del_init(&ascb->list);
+		i = asd_post_escb_list(ascb->ha, ascb, 1);
+		if (i)
+			asd_printk("couldn't post escb, err:%d\n", i);
+	}
+}
+
+static void escb_tasklet_complete(struct asd_ascb *ascb,
+				  struct done_list_struct *dl)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct sas_ha_struct *sas_ha = &asd_ha->sas_ha;
+	int edb = (dl->opcode & DL_PHY_MASK) - 1; /* [0xc1,0xc7] -> [0,6] */
+	u8  sb_opcode = dl->status_block[0];
+	int phy_id = sb_opcode & DL_PHY_MASK;
+	struct asd_sas_phy *sas_phy = sas_ha->sas_phy[phy_id];
+
+	if (edb > 6 || edb < 0) {
+		ASD_DPRINTK("edb is 0x%x! dl->opcode is 0x%x\n",
+			    edb, dl->opcode);
+		ASD_DPRINTK("sb_opcode : 0x%x, phy_id: 0x%x\n",
+			    sb_opcode, phy_id);
+		ASD_DPRINTK("escb: vaddr: 0x%p, "
+			    "dma_handle: 0x%llx, next: 0x%llx, "
+			    "index:%d, opcode:0x%02x\n",
+			    ascb->dma_scb.vaddr,
+			    (unsigned long long)ascb->dma_scb.dma_handle,
+			    (unsigned long long)
+			    le64_to_cpu(ascb->scb->header.next_scb),
+			    le16_to_cpu(ascb->scb->header.index),
+			    ascb->scb->header.opcode);
+	}
+
+	sb_opcode &= ~DL_PHY_MASK;
+
+	switch (sb_opcode) {
+	case BYTES_DMAED:
+		ASD_DPRINTK("%s: phy%d: BYTES_DMAED\n", __FUNCTION__, phy_id);
+		asd_bytes_dmaed_tasklet(ascb, dl, edb, phy_id);
+		break;
+	case PRIMITIVE_RECVD:
+		ASD_DPRINTK("%s: phy%d: PRIMITIVE_RECVD\n", __FUNCTION__,
+			    phy_id);
+		asd_primitive_rcvd_tasklet(ascb, dl, phy_id);
+		break;
+	case PHY_EVENT:
+		ASD_DPRINTK("%s: phy%d: PHY_EVENT\n", __FUNCTION__, phy_id);
+		asd_phy_event_tasklet(ascb, dl);
+		break;
+	case LINK_RESET_ERROR:
+		ASD_DPRINTK("%s: phy%d: LINK_RESET_ERROR\n", __FUNCTION__,
+			    phy_id);
+		asd_link_reset_err_tasklet(ascb, dl, phy_id);
+		break;
+	case TIMER_EVENT:
+		ASD_DPRINTK("%s: phy%d: TIMER_EVENT, lost dw sync\n",
+			    __FUNCTION__, phy_id);
+		asd_turn_led(asd_ha, phy_id, 0);
+		/* the device is gone */
+		sas_phy_disconnected(sas_phy);
+		sas_ha->notify_port_event(sas_phy, PORTE_TIMER_EVENT);
+		break;
+	case REQ_TASK_ABORT:
+		ASD_DPRINTK("%s: phy%d: REQ_TASK_ABORT\n", __FUNCTION__,
+			    phy_id);
+		break;
+	case REQ_DEVICE_RESET:
+		ASD_DPRINTK("%s: phy%d: REQ_DEVICE_RESET\n", __FUNCTION__,
+			    phy_id);
+		break;
+	case SIGNAL_NCQ_ERROR:
+		ASD_DPRINTK("%s: phy%d: SIGNAL_NCQ_ERROR\n", __FUNCTION__,
+			    phy_id);
+		break;
+	case CLEAR_NCQ_ERROR:
+		ASD_DPRINTK("%s: phy%d: CLEAR_NCQ_ERROR\n", __FUNCTION__,
+			    phy_id);
+		break;
+	default:
+		ASD_DPRINTK("%s: phy%d: unknown event:0x%x\n", __FUNCTION__,
+			    phy_id, sb_opcode);
+		ASD_DPRINTK("edb is 0x%x! dl->opcode is 0x%x\n",
+			    edb, dl->opcode);
+		ASD_DPRINTK("sb_opcode : 0x%x, phy_id: 0x%x\n",
+			    sb_opcode, phy_id);
+		ASD_DPRINTK("escb: vaddr: 0x%p, "
+			    "dma_handle: 0x%llx, next: 0x%llx, "
+			    "index:%d, opcode:0x%02x\n",
+			    ascb->dma_scb.vaddr,
+			    (unsigned long long)ascb->dma_scb.dma_handle,
+			    (unsigned long long)
+			    le64_to_cpu(ascb->scb->header.next_scb),
+			    le16_to_cpu(ascb->scb->header.index),
+			    ascb->scb->header.opcode);
+
+		break;
+	}
+
+	asd_invalidate_edb(ascb, edb);
+}
+
+int asd_init_post_escbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int i;
+
+	for (i = 0; i < seq->num_escbs; i++)
+		seq->escb_arr[i]->tasklet_complete = escb_tasklet_complete;
+
+	ASD_DPRINTK("posting %d escbs\n", i);
+	return asd_post_escb_list(asd_ha, seq->escb_arr[0], seq->num_escbs);
+}
+
+/* ---------- CONTROL PHY ---------- */
+
+#define CONTROL_PHY_STATUS (CURRENT_DEVICE_PRESENT | CURRENT_OOB_DONE   \
+			    | CURRENT_SPINUP_HOLD | CURRENT_GTO_TIMEOUT \
+			    | CURRENT_OOB_ERROR)
+
+/**
+ * control_phy_tasklet_complete -- tasklet complete for CONTROL PHY ascb
+ * @ascb: pointer to an ascb
+ * @dl: pointer to the done list entry
+ *
+ * This function completes a CONTROL PHY scb and frees the ascb.
+ * A note on LEDs:
+ *  - an LED blinks if there is IO though it,
+ *  - if a device is connected to the LED, it is lit,
+ *  - if no device is connected to the LED, is is dimmed (off).
+ */
+static void control_phy_tasklet_complete(struct asd_ascb *ascb,
+					 struct done_list_struct *dl)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct scb *scb = ascb->scb;
+	struct control_phy *control_phy = &scb->control_phy;
+	u8 phy_id = control_phy->phy_id;
+	struct asd_phy *phy = &ascb->ha->phys[phy_id];
+
+	u8 status     = dl->status_block[0];
+	u8 oob_status = dl->status_block[1];
+	u8 oob_mode   = dl->status_block[2];
+	/* u8 oob_signals= dl->status_block[3]; */
+
+	if (status != 0) {
+		ASD_DPRINTK("%s: phy%d status block opcode:0x%x\n",
+			    __FUNCTION__, phy_id, status);
+		goto out;
+	}
+
+	switch (control_phy->sub_func) {
+	case DISABLE_PHY:
+		asd_ha->hw_prof.enabled_phys &= ~(1 << phy_id);
+		asd_turn_led(asd_ha, phy_id, 0);
+		asd_control_led(asd_ha, phy_id, 0);
+		ASD_DPRINTK("%s: disable phy%d\n", __FUNCTION__, phy_id);
+		break;
+
+	case ENABLE_PHY:
+		asd_control_led(asd_ha, phy_id, 1);
+		if (oob_status & CURRENT_OOB_DONE) {
+			asd_ha->hw_prof.enabled_phys |= (1 << phy_id);
+			get_lrate_mode(phy, oob_mode);
+			asd_turn_led(asd_ha, phy_id, 1);
+			ASD_DPRINTK("%s: phy%d, lrate:0x%x, proto:0x%x\n",
+				    __FUNCTION__, phy_id,phy->sas_phy.linkrate,
+				    phy->sas_phy.iproto);
+		} else if (oob_status & CURRENT_SPINUP_HOLD) {
+			asd_ha->hw_prof.enabled_phys |= (1 << phy_id);
+			asd_turn_led(asd_ha, phy_id, 1);
+			ASD_DPRINTK("%s: phy%d, spinup hold\n", __FUNCTION__,
+				    phy_id);
+		} else if (oob_status & CURRENT_ERR_MASK) {
+			asd_turn_led(asd_ha, phy_id, 0);
+			ASD_DPRINTK("%s: phy%d: error: oob status:0x%02x\n",
+				    __FUNCTION__, phy_id, oob_status);
+		} else if (oob_status & (CURRENT_HOT_PLUG_CNCT
+					 | CURRENT_DEVICE_PRESENT))  {
+			asd_ha->hw_prof.enabled_phys |= (1 << phy_id);
+			asd_turn_led(asd_ha, phy_id, 1);
+			ASD_DPRINTK("%s: phy%d: hot plug or device present\n",
+				    __FUNCTION__, phy_id);
+		} else {
+			asd_ha->hw_prof.enabled_phys |= (1 << phy_id);
+			asd_turn_led(asd_ha, phy_id, 0);
+			ASD_DPRINTK("%s: phy%d: no device present: "
+				    "oob_status:0x%x\n",
+				    __FUNCTION__, phy_id, oob_status);
+		}
+		break;
+	case RELEASE_SPINUP_HOLD:
+	case PHY_NO_OP:
+	case EXECUTE_HARD_RESET:
+		ASD_DPRINTK("%s: phy%d: sub_func:0x%x\n", __FUNCTION__,
+			    phy_id, control_phy->sub_func);
+		/* XXX finish */
+		break;
+	default:
+		ASD_DPRINTK("%s: phy%d: sub_func:0x%x?\n", __FUNCTION__,
+			    phy_id, control_phy->sub_func);
+		break;
+	}
+out:
+	asd_ascb_free(ascb);
+}
+
+static inline void set_speed_mask(u8 *speed_mask, struct asd_phy_desc *pd)
+{
+	/* disable all speeds, then enable defaults */
+	*speed_mask = SAS_SPEED_60_DIS | SAS_SPEED_30_DIS | SAS_SPEED_15_DIS
+		| SATA_SPEED_30_DIS | SATA_SPEED_15_DIS;
+
+	switch (pd->max_sas_lrate) {
+	case PHY_LINKRATE_6:
+		*speed_mask &= ~SAS_SPEED_60_DIS;
+	default:
+	case PHY_LINKRATE_3:
+		*speed_mask &= ~SAS_SPEED_30_DIS;
+	case PHY_LINKRATE_1_5:
+		*speed_mask &= ~SAS_SPEED_15_DIS;
+	}
+
+	switch (pd->min_sas_lrate) {
+	case PHY_LINKRATE_6:
+		*speed_mask |= SAS_SPEED_30_DIS;
+	case PHY_LINKRATE_3:
+		*speed_mask |= SAS_SPEED_15_DIS;
+	default:
+	case PHY_LINKRATE_1_5:
+		/* nothing to do */
+		;
+	}
+
+	switch (pd->max_sata_lrate) {
+	case PHY_LINKRATE_3:
+		*speed_mask &= ~SATA_SPEED_30_DIS;
+	default:
+	case PHY_LINKRATE_1_5:
+		*speed_mask &= ~SATA_SPEED_15_DIS;
+	}
+
+	switch (pd->min_sata_lrate) {
+	case PHY_LINKRATE_3:
+		*speed_mask |= SATA_SPEED_15_DIS;
+	default:
+	case PHY_LINKRATE_1_5:
+		/* nothing to do */
+		;
+	}
+}
+
+/**
+ * asd_build_control_phy -- build a CONTROL PHY SCB
+ * @ascb: pointer to an ascb
+ * @phy_id: phy id to control, integer
+ * @subfunc: subfunction, what to actually to do the phy
+ *
+ * This function builds a CONTROL PHY scb.  No allocation of any kind
+ * is performed. @ascb is allocated with the list function.
+ * The caller can override the ascb->tasklet_complete to point
+ * to its own callback function.  It must call asd_ascb_free()
+ * at its tasklet complete function.
+ * See the default implementation.
+ */
+void asd_build_control_phy(struct asd_ascb *ascb, int phy_id, u8 subfunc)
+{
+	struct asd_phy *phy = &ascb->ha->phys[phy_id];
+	struct scb *scb = ascb->scb;
+	struct control_phy *control_phy = &scb->control_phy;
+
+	scb->header.opcode = CONTROL_PHY;
+	control_phy->phy_id = (u8) phy_id;
+	control_phy->sub_func = subfunc;
+
+	switch (subfunc) {
+	case EXECUTE_HARD_RESET:  /* 0x81 */
+	case ENABLE_PHY:          /* 0x01 */
+		/* decide hot plug delay */
+		control_phy->hot_plug_delay = HOTPLUG_DELAY_TIMEOUT;
+
+		/* decide speed mask */
+		set_speed_mask(&control_phy->speed_mask, phy->phy_desc);
+
+		/* initiator port settings are in the hi nibble */
+		if (phy->sas_phy.role == PHY_ROLE_INITIATOR)
+			control_phy->port_type = SAS_PROTO_ALL << 4;
+		else if (phy->sas_phy.role == PHY_ROLE_TARGET)
+			control_phy->port_type = SAS_PROTO_ALL;
+		else
+			control_phy->port_type =
+				(SAS_PROTO_ALL << 4) | SAS_PROTO_ALL;
+
+		/* link reset retries, this should be nominal */
+		control_phy->link_reset_retries = 10;
+
+	case RELEASE_SPINUP_HOLD: /* 0x02 */
+		/* decide the func_mask */
+		control_phy->func_mask = FUNCTION_MASK_DEFAULT;
+		if (phy->phy_desc->flags & ASD_SATA_SPINUP_HOLD)
+			control_phy->func_mask &= ~SPINUP_HOLD_DIS;
+		else
+			control_phy->func_mask |= SPINUP_HOLD_DIS;
+	}
+
+	control_phy->conn_handle = cpu_to_le16(0xFFFF);
+
+	ascb->tasklet_complete = control_phy_tasklet_complete;
+}
+
+/* ---------- INITIATE LINK ADM TASK ---------- */
+
+static void link_adm_tasklet_complete(struct asd_ascb *ascb,
+				      struct done_list_struct *dl)
+{
+	u8 opcode = dl->opcode;
+	struct initiate_link_adm *link_adm = &ascb->scb->link_adm;
+	u8 phy_id = link_adm->phy_id;
+
+	if (opcode != TC_NO_ERROR) {
+		asd_printk("phy%d: link adm task 0x%x completed with error "
+			   "0x%x\n", phy_id, link_adm->sub_func, opcode);
+	}
+	ASD_DPRINTK("phy%d: link adm task 0x%x: 0x%x\n",
+		    phy_id, link_adm->sub_func, opcode);
+
+	asd_ascb_free(ascb);
+}
+
+void asd_build_initiate_link_adm_task(struct asd_ascb *ascb, int phy_id,
+				      u8 subfunc)
+{
+	struct scb *scb = ascb->scb;
+	struct initiate_link_adm *link_adm = &scb->link_adm;
+
+	scb->header.opcode = INITIATE_LINK_ADM_TASK;
+
+	link_adm->phy_id = phy_id;
+	link_adm->sub_func = subfunc;
+	link_adm->conn_handle = cpu_to_le16(0xFFFF);
+
+	ascb->tasklet_complete = link_adm_tasklet_complete;
+}
+
+/* ---------- SCB timer ---------- */
+
+/**
+ * asd_ascb_timedout -- called when a pending SCB's timer has expired
+ * @data: unsigned long, a pointer to the ascb in question
+ *
+ * This is the default timeout function which does the most necessary.
+ * Upper layers can implement their own timeout function, say to free
+ * resources they have with this SCB, and then call this one at the
+ * end of their timeout function.  To do this, one should initialize
+ * the ascb->timer.{function, data, expires} prior to calling the post
+ * funcion.  The timer is started by the post function.
+ */
+void asd_ascb_timedout(unsigned long data)
+{
+	struct asd_ascb *ascb = (void *) data;
+	struct asd_seq_data *seq = &ascb->ha->seq;
+	unsigned long flags;
+
+	ASD_DPRINTK("scb:0x%x timed out\n", ascb->scb->header.opcode);
+
+	spin_lock_irqsave(&seq->pend_q_lock, flags);
+	seq->pending--;
+	list_del_init(&ascb->list);
+	spin_unlock_irqrestore(&seq->pend_q_lock, flags);
+
+	asd_ascb_free(ascb);
+}
+
+/* ---------- CONTROL PHY ---------- */
+
+/* Given the spec value, return a driver value. */
+static const int phy_func_table[] = {
+	[PHY_FUNC_NOP]        = PHY_NO_OP,
+	[PHY_FUNC_LINK_RESET] = ENABLE_PHY,
+	[PHY_FUNC_HARD_RESET] = EXECUTE_HARD_RESET,
+	[PHY_FUNC_DISABLE]    = DISABLE_PHY,
+	[PHY_FUNC_RELEASE_SPINUP_HOLD] = RELEASE_SPINUP_HOLD,
+};
+
+int asd_control_phy(struct asd_sas_phy *phy, enum phy_func func)
+{
+	struct asd_ha_struct *asd_ha = phy->ha->lldd_ha;
+	struct asd_ascb *ascb;
+	int res = 1;
+
+	if (func == PHY_FUNC_CLEAR_ERROR_LOG)
+		return -ENOSYS;
+
+	ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL);
+	if (!ascb)
+		return -ENOMEM;
+
+	asd_build_control_phy(ascb, phy->id, phy_func_table[func]);
+	res = asd_post_ascb_list(asd_ha, ascb , 1);
+	if (res)
+		asd_ascb_free(ascb);
+
+	return res;
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_sds.c b/drivers/scsi/aic94xx/aic94xx_sds.c
new file mode 100644
index 0000000000000..eec1e0db0e0f0
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_sds.c
@@ -0,0 +1,1136 @@
+/*
+ * Aic94xx SAS/SATA driver access to shared data structures and memory
+ * maps.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "aic94xx.h"
+#include "aic94xx_reg.h"
+
+/* ---------- OCM stuff ---------- */
+
+struct asd_ocm_dir_ent {
+	u8 type;
+	u8 offs[3];
+	u8 _r1;
+	u8 size[3];
+} __attribute__ ((packed));
+
+struct asd_ocm_dir {
+	char sig[2];
+	u8   _r1[2];
+	u8   major;          /* 0 */
+	u8   minor;          /* 0 */
+	u8   _r2;
+	u8   num_de;
+	struct asd_ocm_dir_ent entry[15];
+} __attribute__ ((packed));
+
+#define	OCM_DE_OCM_DIR			0x00
+#define	OCM_DE_WIN_DRVR			0x01
+#define	OCM_DE_BIOS_CHIM		0x02
+#define	OCM_DE_RAID_ENGN		0x03
+#define	OCM_DE_BIOS_INTL		0x04
+#define	OCM_DE_BIOS_CHIM_OSM		0x05
+#define	OCM_DE_BIOS_CHIM_DYNAMIC	0x06
+#define	OCM_DE_ADDC2C_RES0		0x07
+#define	OCM_DE_ADDC2C_RES1		0x08
+#define	OCM_DE_ADDC2C_RES2		0x09
+#define	OCM_DE_ADDC2C_RES3		0x0A
+
+#define OCM_INIT_DIR_ENTRIES	5
+/***************************************************************************
+*  OCM dircetory default
+***************************************************************************/
+static struct asd_ocm_dir OCMDirInit =
+{
+	.sig = {0x4D, 0x4F},	/* signature */
+	.num_de = OCM_INIT_DIR_ENTRIES,	/* no. of directory entries */
+};
+
+/***************************************************************************
+*  OCM dircetory Entries default
+***************************************************************************/
+static struct asd_ocm_dir_ent OCMDirEntriesInit[OCM_INIT_DIR_ENTRIES] =
+{
+	{
+		.type = (OCM_DE_ADDC2C_RES0),	/* Entry type  */
+		.offs = {128},			/* Offset */
+		.size = {0, 4},			/* size */
+	},
+	{
+		.type = (OCM_DE_ADDC2C_RES1),	/* Entry type  */
+		.offs = {128, 4},		/* Offset */
+		.size = {0, 4},			/* size */
+	},
+	{
+		.type = (OCM_DE_ADDC2C_RES2),	/* Entry type  */
+		.offs = {128, 8},		/* Offset */
+		.size = {0, 4},			/* size */
+	},
+	{
+		.type = (OCM_DE_ADDC2C_RES3),	/* Entry type  */
+		.offs = {128, 12},		/* Offset */
+		.size = {0, 4},			/* size */
+	},
+	{
+		.type = (OCM_DE_WIN_DRVR),	/* Entry type  */
+		.offs = {128, 16},		/* Offset */
+		.size = {128, 235, 1},		/* size */
+	},
+};
+
+struct asd_bios_chim_struct {
+	char sig[4];
+	u8   major;          /* 1 */
+	u8   minor;          /* 0 */
+	u8   bios_major;
+	u8   bios_minor;
+	__le32  bios_build;
+	u8   flags;
+	u8   pci_slot;
+	__le16  ue_num;
+	__le16  ue_size;
+	u8  _r[14];
+	/* The unit element array is right here.
+	 */
+} __attribute__ ((packed));
+
+/**
+ * asd_read_ocm_seg - read an on chip memory (OCM) segment
+ * @asd_ha: pointer to the host adapter structure
+ * @buffer: where to write the read data
+ * @offs: offset into OCM where to read from
+ * @size: how many bytes to read
+ *
+ * Return the number of bytes not read. Return 0 on success.
+ */
+static int asd_read_ocm_seg(struct asd_ha_struct *asd_ha, void *buffer,
+			    u32 offs, int size)
+{
+	u8 *p = buffer;
+	if (unlikely(asd_ha->iospace))
+		asd_read_reg_string(asd_ha, buffer, offs+OCM_BASE_ADDR, size);
+	else {
+		for ( ; size > 0; size--, offs++, p++)
+			*p = asd_read_ocm_byte(asd_ha, offs);
+	}
+	return size;
+}
+
+static int asd_read_ocm_dir(struct asd_ha_struct *asd_ha,
+			    struct asd_ocm_dir *dir, u32 offs)
+{
+	int err = asd_read_ocm_seg(asd_ha, dir, offs, sizeof(*dir));
+	if (err) {
+		ASD_DPRINTK("couldn't read ocm segment\n");
+		return err;
+	}
+
+	if (dir->sig[0] != 'M' || dir->sig[1] != 'O') {
+		ASD_DPRINTK("no valid dir signature(%c%c) at start of OCM\n",
+			    dir->sig[0], dir->sig[1]);
+		return -ENOENT;
+	}
+	if (dir->major != 0) {
+		asd_printk("unsupported major version of ocm dir:0x%x\n",
+			   dir->major);
+		return -ENOENT;
+	}
+	dir->num_de &= 0xf;
+	return 0;
+}
+
+/**
+ * asd_write_ocm_seg - write an on chip memory (OCM) segment
+ * @asd_ha: pointer to the host adapter structure
+ * @buffer: where to read the write data
+ * @offs: offset into OCM to write to
+ * @size: how many bytes to write
+ *
+ * Return the number of bytes not written. Return 0 on success.
+ */
+static void asd_write_ocm_seg(struct asd_ha_struct *asd_ha, void *buffer,
+			    u32 offs, int size)
+{
+	u8 *p = buffer;
+	if (unlikely(asd_ha->iospace))
+		asd_write_reg_string(asd_ha, buffer, offs+OCM_BASE_ADDR, size);
+	else {
+		for ( ; size > 0; size--, offs++, p++)
+			asd_write_ocm_byte(asd_ha, offs, *p);
+	}
+	return;
+}
+
+#define THREE_TO_NUM(X) ((X)[0] | ((X)[1] << 8) | ((X)[2] << 16))
+
+static int asd_find_dir_entry(struct asd_ocm_dir *dir, u8 type,
+			      u32 *offs, u32 *size)
+{
+	int i;
+	struct asd_ocm_dir_ent *ent;
+
+	for (i = 0; i < dir->num_de; i++) {
+		if (dir->entry[i].type == type)
+			break;
+	}
+	if (i >= dir->num_de)
+		return -ENOENT;
+	ent = &dir->entry[i];
+	*offs = (u32) THREE_TO_NUM(ent->offs);
+	*size = (u32) THREE_TO_NUM(ent->size);
+	return 0;
+}
+
+#define OCM_BIOS_CHIM_DE  2
+#define BC_BIOS_PRESENT   1
+
+static int asd_get_bios_chim(struct asd_ha_struct *asd_ha,
+			     struct asd_ocm_dir *dir)
+{
+	int err;
+	struct asd_bios_chim_struct *bc_struct;
+	u32 offs, size;
+
+	err = asd_find_dir_entry(dir, OCM_BIOS_CHIM_DE, &offs, &size);
+	if (err) {
+		ASD_DPRINTK("couldn't find BIOS_CHIM dir ent\n");
+		goto out;
+	}
+	err = -ENOMEM;
+	bc_struct = kmalloc(sizeof(*bc_struct), GFP_KERNEL);
+	if (!bc_struct) {
+		asd_printk("no memory for bios_chim struct\n");
+		goto out;
+	}
+	err = asd_read_ocm_seg(asd_ha, (void *)bc_struct, offs,
+			       sizeof(*bc_struct));
+	if (err) {
+		ASD_DPRINTK("couldn't read ocm segment\n");
+		goto out2;
+	}
+	if (strncmp(bc_struct->sig, "SOIB", 4)
+	    && strncmp(bc_struct->sig, "IPSA", 4)) {
+		ASD_DPRINTK("BIOS_CHIM entry has no valid sig(%c%c%c%c)\n",
+			    bc_struct->sig[0], bc_struct->sig[1],
+			    bc_struct->sig[2], bc_struct->sig[3]);
+		err = -ENOENT;
+		goto out2;
+	}
+	if (bc_struct->major != 1) {
+		asd_printk("BIOS_CHIM unsupported major version:0x%x\n",
+			   bc_struct->major);
+		err = -ENOENT;
+		goto out2;
+	}
+	if (bc_struct->flags & BC_BIOS_PRESENT) {
+		asd_ha->hw_prof.bios.present = 1;
+		asd_ha->hw_prof.bios.maj = bc_struct->bios_major;
+		asd_ha->hw_prof.bios.min = bc_struct->bios_minor;
+		asd_ha->hw_prof.bios.bld = le32_to_cpu(bc_struct->bios_build);
+		ASD_DPRINTK("BIOS present (%d,%d), %d\n",
+			    asd_ha->hw_prof.bios.maj,
+			    asd_ha->hw_prof.bios.min,
+			    asd_ha->hw_prof.bios.bld);
+	}
+	asd_ha->hw_prof.ue.num = le16_to_cpu(bc_struct->ue_num);
+	asd_ha->hw_prof.ue.size= le16_to_cpu(bc_struct->ue_size);
+	ASD_DPRINTK("ue num:%d, ue size:%d\n", asd_ha->hw_prof.ue.num,
+		    asd_ha->hw_prof.ue.size);
+	size = asd_ha->hw_prof.ue.num * asd_ha->hw_prof.ue.size;
+	if (size > 0) {
+		err = -ENOMEM;
+		asd_ha->hw_prof.ue.area = kmalloc(size, GFP_KERNEL);
+		if (!asd_ha->hw_prof.ue.area)
+			goto out2;
+		err = asd_read_ocm_seg(asd_ha, (void *)asd_ha->hw_prof.ue.area,
+				       offs + sizeof(*bc_struct), size);
+		if (err) {
+			kfree(asd_ha->hw_prof.ue.area);
+			asd_ha->hw_prof.ue.area = NULL;
+			asd_ha->hw_prof.ue.num  = 0;
+			asd_ha->hw_prof.ue.size = 0;
+			ASD_DPRINTK("couldn't read ue entries(%d)\n", err);
+		}
+	}
+out2:
+	kfree(bc_struct);
+out:
+	return err;
+}
+
+static void
+asd_hwi_initialize_ocm_dir (struct asd_ha_struct *asd_ha)
+{
+	int i;
+
+	/* Zero OCM */
+	for (i = 0; i < OCM_MAX_SIZE; i += 4)
+		asd_write_ocm_dword(asd_ha, i, 0);
+
+	/* Write Dir */
+	asd_write_ocm_seg(asd_ha, &OCMDirInit, 0,
+			  sizeof(struct asd_ocm_dir));
+
+	/* Write Dir Entries */
+	for (i = 0; i < OCM_INIT_DIR_ENTRIES; i++)
+		asd_write_ocm_seg(asd_ha, &OCMDirEntriesInit[i],
+				  sizeof(struct asd_ocm_dir) +
+				  (i * sizeof(struct asd_ocm_dir_ent))
+				  , sizeof(struct asd_ocm_dir_ent));
+
+}
+
+static int
+asd_hwi_check_ocm_access (struct asd_ha_struct *asd_ha)
+{
+	struct pci_dev *pcidev = asd_ha->pcidev;
+	u32 reg;
+	int err = 0;
+	u32 v;
+
+	/* check if OCM has been initialized by BIOS */
+	reg = asd_read_reg_dword(asd_ha, EXSICNFGR);
+
+	if (!(reg & OCMINITIALIZED)) {
+		err = pci_read_config_dword(pcidev, PCIC_INTRPT_STAT, &v);
+		if (err) {
+			asd_printk("couldn't access PCIC_INTRPT_STAT of %s\n",
+					pci_name(pcidev));
+			goto out;
+		}
+
+		printk(KERN_INFO "OCM is not initialized by BIOS,"
+		       "reinitialize it and ignore it, current IntrptStatus"
+		       "is 0x%x\n", v);
+
+		if (v)
+			err = pci_write_config_dword(pcidev,
+						     PCIC_INTRPT_STAT, v);
+		if (err) {
+			asd_printk("couldn't write PCIC_INTRPT_STAT of %s\n",
+					pci_name(pcidev));
+			goto out;
+		}
+
+		asd_hwi_initialize_ocm_dir(asd_ha);
+
+	}
+out:
+	return err;
+}
+
+/**
+ * asd_read_ocm - read on chip memory (OCM)
+ * @asd_ha: pointer to the host adapter structure
+ */
+int asd_read_ocm(struct asd_ha_struct *asd_ha)
+{
+	int err;
+	struct asd_ocm_dir *dir;
+
+	if (asd_hwi_check_ocm_access(asd_ha))
+		return -1;
+
+	dir = kmalloc(sizeof(*dir), GFP_KERNEL);
+	if (!dir) {
+		asd_printk("no memory for ocm dir\n");
+		return -ENOMEM;
+	}
+
+	err = asd_read_ocm_dir(asd_ha, dir, 0);
+	if (err)
+		goto out;
+
+	err = asd_get_bios_chim(asd_ha, dir);
+out:
+	kfree(dir);
+	return err;
+}
+
+/* ---------- FLASH stuff ---------- */
+
+#define FLASH_RESET			0xF0
+#define FLASH_MANUF_AMD                 1
+
+#define FLASH_SIZE                      0x200000
+#define FLASH_DIR_COOKIE                "*** ADAPTEC FLASH DIRECTORY *** "
+#define FLASH_NEXT_ENTRY_OFFS		0x2000
+#define FLASH_MAX_DIR_ENTRIES		32
+
+#define FLASH_DE_TYPE_MASK              0x3FFFFFFF
+#define FLASH_DE_MS                     0x120
+#define FLASH_DE_CTRL_A_USER            0xE0
+
+struct asd_flash_de {
+	__le32   type;
+	__le32   offs;
+	__le32   pad_size;
+	__le32   image_size;
+	__le32   chksum;
+	u8       _r[12];
+	u8       version[32];
+} __attribute__ ((packed));
+
+struct asd_flash_dir {
+	u8    cookie[32];
+	__le32   rev;		  /* 2 */
+	__le32   chksum;
+	__le32   chksum_antidote;
+	__le32   bld;
+	u8    bld_id[32];	  /* build id data */
+	u8    ver_data[32];	  /* date and time of build */
+	__le32   ae_mask;
+	__le32   v_mask;
+	__le32   oc_mask;
+	u8    _r[20];
+	struct asd_flash_de dir_entry[FLASH_MAX_DIR_ENTRIES];
+} __attribute__ ((packed));
+
+struct asd_manuf_sec {
+	char  sig[2];		  /* 'S', 'M' */
+	u16   offs_next;
+	u8    maj;           /* 0 */
+	u8    min;           /* 0 */
+	u16   chksum;
+	u16   size;
+	u8    _r[6];
+	u8    sas_addr[SAS_ADDR_SIZE];
+	u8    pcba_sn[ASD_PCBA_SN_SIZE];
+	/* Here start the other segments */
+	u8    linked_list[0];
+} __attribute__ ((packed));
+
+struct asd_manuf_phy_desc {
+	u8    state;         /* low 4 bits */
+#define MS_PHY_STATE_ENABLEABLE 0
+#define MS_PHY_STATE_REPORTED   1
+#define MS_PHY_STATE_HIDDEN     2
+	u8    phy_id;
+	u16   _r;
+	u8    phy_control_0; /* mode 5 reg 0x160 */
+	u8    phy_control_1; /* mode 5 reg 0x161 */
+	u8    phy_control_2; /* mode 5 reg 0x162 */
+	u8    phy_control_3; /* mode 5 reg 0x163 */
+} __attribute__ ((packed));
+
+struct asd_manuf_phy_param {
+	char  sig[2];		  /* 'P', 'M' */
+	u16   next;
+	u8    maj;           /* 0 */
+	u8    min;           /* 2 */
+	u8    num_phy_desc;  /* 8 */
+	u8    phy_desc_size; /* 8 */
+	u8    _r[3];
+	u8    usage_model_id;
+	u32   _r2;
+	struct asd_manuf_phy_desc phy_desc[ASD_MAX_PHYS];
+} __attribute__ ((packed));
+
+#if 0
+static const char *asd_sb_type[] = {
+	"unknown",
+	"SGPIO",
+	[2 ... 0x7F] = "unknown",
+	[0x80] = "ADPT_I2C",
+	[0x81 ... 0xFF] = "VENDOR_UNIQUExx"
+};
+#endif
+
+struct asd_ms_sb_desc {
+	u8    type;
+	u8    node_desc_index;
+	u8    conn_desc_index;
+	u8    _recvd[0];
+} __attribute__ ((packed));
+
+#if 0
+static const char *asd_conn_type[] = {
+	[0 ... 7] = "unknown",
+	"SFF8470",
+	"SFF8482",
+	"SFF8484",
+	[0x80] = "PCIX_DAUGHTER0",
+	[0x81] = "SAS_DAUGHTER0",
+	[0x82 ... 0xFF] = "VENDOR_UNIQUExx"
+};
+
+static const char *asd_conn_location[] = {
+	"unknown",
+	"internal",
+	"external",
+	"board_to_board",
+};
+#endif
+
+struct asd_ms_conn_desc {
+	u8    type;
+	u8    location;
+	u8    num_sideband_desc;
+	u8    size_sideband_desc;
+	u32   _resvd;
+	u8    name[16];
+	struct asd_ms_sb_desc sb_desc[0];
+} __attribute__ ((packed));
+
+struct asd_nd_phy_desc {
+	u8    vp_attch_type;
+	u8    attch_specific[0];
+} __attribute__ ((packed));
+
+#if 0
+static const char *asd_node_type[] = {
+	"IOP",
+	"IO_CONTROLLER",
+	"EXPANDER",
+	"PORT_MULTIPLIER",
+	"PORT_MULTIPLEXER",
+	"MULTI_DROP_I2C_BUS",
+};
+#endif
+
+struct asd_ms_node_desc {
+	u8    type;
+	u8    num_phy_desc;
+	u8    size_phy_desc;
+	u8    _resvd;
+	u8    name[16];
+	struct asd_nd_phy_desc phy_desc[0];
+} __attribute__ ((packed));
+
+struct asd_ms_conn_map {
+	char  sig[2];		  /* 'M', 'C' */
+	__le16 next;
+	u8    maj;		  /* 0 */
+	u8    min;		  /* 0 */
+	__le16 cm_size;		  /* size of this struct */
+	u8    num_conn;
+	u8    conn_size;
+	u8    num_nodes;
+	u8    usage_model_id;
+	u32   _resvd;
+	struct asd_ms_conn_desc conn_desc[0];
+	struct asd_ms_node_desc node_desc[0];
+} __attribute__ ((packed));
+
+struct asd_ctrla_phy_entry {
+	u8    sas_addr[SAS_ADDR_SIZE];
+	u8    sas_link_rates;  /* max in hi bits, min in low bits */
+	u8    flags;
+	u8    sata_link_rates;
+	u8    _r[5];
+} __attribute__ ((packed));
+
+struct asd_ctrla_phy_settings {
+	u8    id0;		  /* P'h'y */
+	u8    _r;
+	u16   next;
+	u8    num_phys;	      /* number of PHYs in the PCI function */
+	u8    _r2[3];
+	struct asd_ctrla_phy_entry phy_ent[ASD_MAX_PHYS];
+} __attribute__ ((packed));
+
+struct asd_ll_el {
+	u8   id0;
+	u8   id1;
+	__le16  next;
+	u8   something_here[0];
+} __attribute__ ((packed));
+
+static int asd_poll_flash(struct asd_ha_struct *asd_ha)
+{
+	int c;
+	u8 d;
+
+	for (c = 5000; c > 0; c--) {
+		d  = asd_read_reg_byte(asd_ha, asd_ha->hw_prof.flash.bar);
+		d ^= asd_read_reg_byte(asd_ha, asd_ha->hw_prof.flash.bar);
+		if (!d)
+			return 0;
+		udelay(5);
+	}
+	return -ENOENT;
+}
+
+static int asd_reset_flash(struct asd_ha_struct *asd_ha)
+{
+	int err;
+
+	err = asd_poll_flash(asd_ha);
+	if (err)
+		return err;
+	asd_write_reg_byte(asd_ha, asd_ha->hw_prof.flash.bar, FLASH_RESET);
+	err = asd_poll_flash(asd_ha);
+
+	return err;
+}
+
+static inline int asd_read_flash_seg(struct asd_ha_struct *asd_ha,
+				     void *buffer, u32 offs, int size)
+{
+	asd_read_reg_string(asd_ha, buffer, asd_ha->hw_prof.flash.bar+offs,
+			    size);
+	return 0;
+}
+
+/**
+ * asd_find_flash_dir - finds and reads the flash directory
+ * @asd_ha: pointer to the host adapter structure
+ * @flash_dir: pointer to flash directory structure
+ *
+ * If found, the flash directory segment will be copied to
+ * @flash_dir.  Return 1 if found, 0 if not.
+ */
+static int asd_find_flash_dir(struct asd_ha_struct *asd_ha,
+			      struct asd_flash_dir *flash_dir)
+{
+	u32 v;
+	for (v = 0; v < FLASH_SIZE; v += FLASH_NEXT_ENTRY_OFFS) {
+		asd_read_flash_seg(asd_ha, flash_dir, v,
+				   sizeof(FLASH_DIR_COOKIE)-1);
+		if (memcmp(flash_dir->cookie, FLASH_DIR_COOKIE,
+			   sizeof(FLASH_DIR_COOKIE)-1) == 0) {
+			asd_ha->hw_prof.flash.dir_offs = v;
+			asd_read_flash_seg(asd_ha, flash_dir, v,
+					   sizeof(*flash_dir));
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int asd_flash_getid(struct asd_ha_struct *asd_ha)
+{
+	int err = 0;
+	u32 reg, inc;
+
+	reg = asd_read_reg_dword(asd_ha, EXSICNFGR);
+
+	if (!(reg & FLASHEX)) {
+		ASD_DPRINTK("flash doesn't exist\n");
+		return -ENOENT;
+	}
+	if (pci_read_config_dword(asd_ha->pcidev, PCI_CONF_FLSH_BAR,
+				  &asd_ha->hw_prof.flash.bar)) {
+		asd_printk("couldn't read PCI_CONF_FLSH_BAR of %s\n",
+			   pci_name(asd_ha->pcidev));
+		return -ENOENT;
+	}
+	asd_ha->hw_prof.flash.present = 1;
+	asd_ha->hw_prof.flash.wide = reg & FLASHW ? 1 : 0;
+	err = asd_reset_flash(asd_ha);
+	if (err) {
+		ASD_DPRINTK("couldn't reset flash(%d)\n", err);
+		return err;
+	}
+	/* Get flash info. This would most likely be AMD Am29LV family flash.
+	 * First try the sequence for word mode.  It is the same as for
+	 * 008B (byte mode only), 160B (word mode) and 800D (word mode).
+	 */
+	reg = asd_ha->hw_prof.flash.bar;
+	inc = asd_ha->hw_prof.flash.wide ? 2 : 1;
+	asd_write_reg_byte(asd_ha, reg + 0x555, 0xAA);
+	asd_write_reg_byte(asd_ha, reg + 0x2AA, 0x55);
+	asd_write_reg_byte(asd_ha, reg + 0x555, 0x90);
+	asd_ha->hw_prof.flash.manuf = asd_read_reg_byte(asd_ha, reg);
+	asd_ha->hw_prof.flash.dev_id= asd_read_reg_byte(asd_ha,reg+inc);
+	asd_ha->hw_prof.flash.sec_prot = asd_read_reg_byte(asd_ha,reg+inc+inc);
+	/* Get out of autoselect mode. */
+	err = asd_reset_flash(asd_ha);
+
+	if (asd_ha->hw_prof.flash.manuf == FLASH_MANUF_AMD) {
+		ASD_DPRINTK("0Found FLASH(%d) manuf:%d, dev_id:0x%x, "
+			    "sec_prot:%d\n",
+			    asd_ha->hw_prof.flash.wide ? 16 : 8,
+			    asd_ha->hw_prof.flash.manuf,
+			    asd_ha->hw_prof.flash.dev_id,
+			    asd_ha->hw_prof.flash.sec_prot);
+		return 0;
+	}
+
+	/* Ok, try the sequence for byte mode of 160B and 800D.
+	 * We may actually never need this.
+	 */
+	asd_write_reg_byte(asd_ha, reg + 0xAAA, 0xAA);
+	asd_write_reg_byte(asd_ha, reg + 0x555, 0x55);
+	asd_write_reg_byte(asd_ha, reg + 0xAAA, 0x90);
+	asd_ha->hw_prof.flash.manuf = asd_read_reg_byte(asd_ha, reg);
+	asd_ha->hw_prof.flash.dev_id = asd_read_reg_byte(asd_ha, reg + 2);
+	asd_ha->hw_prof.flash.sec_prot = asd_read_reg_byte(asd_ha, reg + 4);
+	err = asd_reset_flash(asd_ha);
+
+	if (asd_ha->hw_prof.flash.manuf == FLASH_MANUF_AMD) {
+		ASD_DPRINTK("1Found FLASH(%d) manuf:%d, dev_id:0x%x, "
+			    "sec_prot:%d\n",
+			    asd_ha->hw_prof.flash.wide ? 16 : 8,
+			    asd_ha->hw_prof.flash.manuf,
+			    asd_ha->hw_prof.flash.dev_id,
+			    asd_ha->hw_prof.flash.sec_prot);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static u16 asd_calc_flash_chksum(u16 *p, int size)
+{
+	u16 chksum = 0;
+
+	while (size-- > 0)
+		chksum += *p++;
+
+	return chksum;
+}
+
+
+static int asd_find_flash_de(struct asd_flash_dir *flash_dir, u32 entry_type,
+			     u32 *offs, u32 *size)
+{
+	int i;
+	struct asd_flash_de *de;
+
+	for (i = 0; i < FLASH_MAX_DIR_ENTRIES; i++) {
+		u32 type = le32_to_cpu(flash_dir->dir_entry[i].type);
+
+		type &= FLASH_DE_TYPE_MASK;
+		if (type == entry_type)
+			break;
+	}
+	if (i >= FLASH_MAX_DIR_ENTRIES)
+		return -ENOENT;
+	de = &flash_dir->dir_entry[i];
+	*offs = le32_to_cpu(de->offs);
+	*size = le32_to_cpu(de->pad_size);
+	return 0;
+}
+
+static int asd_validate_ms(struct asd_manuf_sec *ms)
+{
+	if (ms->sig[0] != 'S' || ms->sig[1] != 'M') {
+		ASD_DPRINTK("manuf sec: no valid sig(%c%c)\n",
+			    ms->sig[0], ms->sig[1]);
+		return -ENOENT;
+	}
+	if (ms->maj != 0) {
+		asd_printk("unsupported manuf. sector. major version:%x\n",
+			   ms->maj);
+		return -ENOENT;
+	}
+	ms->offs_next = le16_to_cpu((__force __le16) ms->offs_next);
+	ms->chksum = le16_to_cpu((__force __le16) ms->chksum);
+	ms->size = le16_to_cpu((__force __le16) ms->size);
+
+	if (asd_calc_flash_chksum((u16 *)ms, ms->size/2)) {
+		asd_printk("failed manuf sector checksum\n");
+	}
+
+	return 0;
+}
+
+static int asd_ms_get_sas_addr(struct asd_ha_struct *asd_ha,
+			       struct asd_manuf_sec *ms)
+{
+	memcpy(asd_ha->hw_prof.sas_addr, ms->sas_addr, SAS_ADDR_SIZE);
+	return 0;
+}
+
+static int asd_ms_get_pcba_sn(struct asd_ha_struct *asd_ha,
+			      struct asd_manuf_sec *ms)
+{
+	memcpy(asd_ha->hw_prof.pcba_sn, ms->pcba_sn, ASD_PCBA_SN_SIZE);
+	asd_ha->hw_prof.pcba_sn[ASD_PCBA_SN_SIZE] = '\0';
+	return 0;
+}
+
+/**
+ * asd_find_ll_by_id - find a linked list entry by its id
+ * @start: void pointer to the first element in the linked list
+ * @id0: the first byte of the id  (offs 0)
+ * @id1: the second byte of the id (offs 1)
+ *
+ * @start has to be the _base_ element start, since the
+ * linked list entries's offset is from this pointer.
+ * Some linked list entries use only the first id, in which case
+ * you can pass 0xFF for the second.
+ */
+static void *asd_find_ll_by_id(void * const start, const u8 id0, const u8 id1)
+{
+	struct asd_ll_el *el = start;
+
+	do {
+		switch (id1) {
+		default:
+			if (el->id1 == id1)
+		case 0xFF:
+				if (el->id0 == id0)
+					return el;
+		}
+		el = start + le16_to_cpu(el->next);
+	} while (el != start);
+
+	return NULL;
+}
+
+/**
+ * asd_ms_get_phy_params - get phy parameters from the manufacturing sector
+ * @asd_ha: pointer to the host adapter structure
+ * @manuf_sec: pointer to the manufacturing sector
+ *
+ * The manufacturing sector contans also the linked list of sub-segments,
+ * since when it was read, its size was taken from the flash directory,
+ * not from the structure size.
+ *
+ * HIDDEN phys do not count in the total count.  REPORTED phys cannot
+ * be enabled but are reported and counted towards the total.
+ * ENEBLEABLE phys are enabled by default and count towards the total.
+ * The absolute total phy number is ASD_MAX_PHYS.  hw_prof->num_phys
+ * merely specifies the number of phys the host adapter decided to
+ * report.  E.g., it is possible for phys 0, 1 and 2 to be HIDDEN,
+ * phys 3, 4 and 5 to be REPORTED and phys 6 and 7 to be ENEBLEABLE.
+ * In this case ASD_MAX_PHYS is 8, hw_prof->num_phys is 5, and only 2
+ * are actually enabled (enabled by default, max number of phys
+ * enableable in this case).
+ */
+static int asd_ms_get_phy_params(struct asd_ha_struct *asd_ha,
+				 struct asd_manuf_sec *manuf_sec)
+{
+	int i;
+	int en_phys = 0;
+	int rep_phys = 0;
+	struct asd_manuf_phy_param *phy_param;
+	struct asd_manuf_phy_param dflt_phy_param;
+
+	phy_param = asd_find_ll_by_id(manuf_sec, 'P', 'M');
+	if (!phy_param) {
+		ASD_DPRINTK("ms: no phy parameters found\n");
+		ASD_DPRINTK("ms: Creating default phy parameters\n");
+		dflt_phy_param.sig[0] = 'P';
+		dflt_phy_param.sig[1] = 'M';
+		dflt_phy_param.maj = 0;
+		dflt_phy_param.min = 2;
+		dflt_phy_param.num_phy_desc = 8;
+		dflt_phy_param.phy_desc_size = sizeof(struct asd_manuf_phy_desc);
+		for (i =0; i < ASD_MAX_PHYS; i++) {
+			dflt_phy_param.phy_desc[i].state = 0;
+			dflt_phy_param.phy_desc[i].phy_id = i;
+			dflt_phy_param.phy_desc[i].phy_control_0 = 0xf6;
+			dflt_phy_param.phy_desc[i].phy_control_1 = 0x10;
+			dflt_phy_param.phy_desc[i].phy_control_2 = 0x43;
+			dflt_phy_param.phy_desc[i].phy_control_3 = 0xeb;
+		}
+
+		phy_param = &dflt_phy_param;
+
+	}
+
+	if (phy_param->maj != 0) {
+		asd_printk("unsupported manuf. phy param major version:0x%x\n",
+			   phy_param->maj);
+		return -ENOENT;
+	}
+
+	ASD_DPRINTK("ms: num_phy_desc: %d\n", phy_param->num_phy_desc);
+	asd_ha->hw_prof.enabled_phys = 0;
+	for (i = 0; i < phy_param->num_phy_desc; i++) {
+		struct asd_manuf_phy_desc *pd = &phy_param->phy_desc[i];
+		switch (pd->state & 0xF) {
+		case MS_PHY_STATE_HIDDEN:
+			ASD_DPRINTK("ms: phy%d: HIDDEN\n", i);
+			continue;
+		case MS_PHY_STATE_REPORTED:
+			ASD_DPRINTK("ms: phy%d: REPORTED\n", i);
+			asd_ha->hw_prof.enabled_phys &= ~(1 << i);
+			rep_phys++;
+			continue;
+		case MS_PHY_STATE_ENABLEABLE:
+			ASD_DPRINTK("ms: phy%d: ENEBLEABLE\n", i);
+			asd_ha->hw_prof.enabled_phys |= (1 << i);
+			en_phys++;
+			break;
+		}
+		asd_ha->hw_prof.phy_desc[i].phy_control_0 = pd->phy_control_0;
+		asd_ha->hw_prof.phy_desc[i].phy_control_1 = pd->phy_control_1;
+		asd_ha->hw_prof.phy_desc[i].phy_control_2 = pd->phy_control_2;
+		asd_ha->hw_prof.phy_desc[i].phy_control_3 = pd->phy_control_3;
+	}
+	asd_ha->hw_prof.max_phys = rep_phys + en_phys;
+	asd_ha->hw_prof.num_phys = en_phys;
+	ASD_DPRINTK("ms: max_phys:0x%x, num_phys:0x%x\n",
+		    asd_ha->hw_prof.max_phys, asd_ha->hw_prof.num_phys);
+	ASD_DPRINTK("ms: enabled_phys:0x%x\n", asd_ha->hw_prof.enabled_phys);
+	return 0;
+}
+
+static int asd_ms_get_connector_map(struct asd_ha_struct *asd_ha,
+				    struct asd_manuf_sec *manuf_sec)
+{
+	struct asd_ms_conn_map *cm;
+
+	cm = asd_find_ll_by_id(manuf_sec, 'M', 'C');
+	if (!cm) {
+		ASD_DPRINTK("ms: no connector map found\n");
+		return 0;
+	}
+
+	if (cm->maj != 0) {
+		ASD_DPRINTK("ms: unsupported: connector map major version 0x%x"
+			    "\n", cm->maj);
+		return -ENOENT;
+	}
+
+	/* XXX */
+
+	return 0;
+}
+
+
+/**
+ * asd_process_ms - find and extract information from the manufacturing sector
+ * @asd_ha: pointer to the host adapter structure
+ * @flash_dir: pointer to the flash directory
+ */
+static int asd_process_ms(struct asd_ha_struct *asd_ha,
+			  struct asd_flash_dir *flash_dir)
+{
+	int err;
+	struct asd_manuf_sec *manuf_sec;
+	u32 offs, size;
+
+	err = asd_find_flash_de(flash_dir, FLASH_DE_MS, &offs, &size);
+	if (err) {
+		ASD_DPRINTK("Couldn't find the manuf. sector\n");
+		goto out;
+	}
+
+	if (size == 0)
+		goto out;
+
+	err = -ENOMEM;
+	manuf_sec = kmalloc(size, GFP_KERNEL);
+	if (!manuf_sec) {
+		ASD_DPRINTK("no mem for manuf sector\n");
+		goto out;
+	}
+
+	err = asd_read_flash_seg(asd_ha, (void *)manuf_sec, offs, size);
+	if (err) {
+		ASD_DPRINTK("couldn't read manuf sector at 0x%x, size 0x%x\n",
+			    offs, size);
+		goto out2;
+	}
+
+	err = asd_validate_ms(manuf_sec);
+	if (err) {
+		ASD_DPRINTK("couldn't validate manuf sector\n");
+		goto out2;
+	}
+
+	err = asd_ms_get_sas_addr(asd_ha, manuf_sec);
+	if (err) {
+		ASD_DPRINTK("couldn't read the SAS_ADDR\n");
+		goto out2;
+	}
+	ASD_DPRINTK("manuf sect SAS_ADDR %llx\n",
+		    SAS_ADDR(asd_ha->hw_prof.sas_addr));
+
+	err = asd_ms_get_pcba_sn(asd_ha, manuf_sec);
+	if (err) {
+		ASD_DPRINTK("couldn't read the PCBA SN\n");
+		goto out2;
+	}
+	ASD_DPRINTK("manuf sect PCBA SN %s\n", asd_ha->hw_prof.pcba_sn);
+
+	err = asd_ms_get_phy_params(asd_ha, manuf_sec);
+	if (err) {
+		ASD_DPRINTK("ms: couldn't get phy parameters\n");
+		goto out2;
+	}
+
+	err = asd_ms_get_connector_map(asd_ha, manuf_sec);
+	if (err) {
+		ASD_DPRINTK("ms: couldn't get connector map\n");
+		goto out2;
+	}
+
+out2:
+	kfree(manuf_sec);
+out:
+	return err;
+}
+
+static int asd_process_ctrla_phy_settings(struct asd_ha_struct *asd_ha,
+					  struct asd_ctrla_phy_settings *ps)
+{
+	int i;
+	for (i = 0; i < ps->num_phys; i++) {
+		struct asd_ctrla_phy_entry *pe = &ps->phy_ent[i];
+
+		if (!PHY_ENABLED(asd_ha, i))
+			continue;
+		if (*(u64 *)pe->sas_addr == 0) {
+			asd_ha->hw_prof.enabled_phys &= ~(1 << i);
+			continue;
+		}
+		/* This is the SAS address which should be sent in IDENTIFY. */
+		memcpy(asd_ha->hw_prof.phy_desc[i].sas_addr, pe->sas_addr,
+		       SAS_ADDR_SIZE);
+		asd_ha->hw_prof.phy_desc[i].max_sas_lrate =
+			(pe->sas_link_rates & 0xF0) >> 4;
+		asd_ha->hw_prof.phy_desc[i].min_sas_lrate =
+			(pe->sas_link_rates & 0x0F);
+		asd_ha->hw_prof.phy_desc[i].max_sata_lrate =
+			(pe->sata_link_rates & 0xF0) >> 4;
+		asd_ha->hw_prof.phy_desc[i].min_sata_lrate =
+			(pe->sata_link_rates & 0x0F);
+		asd_ha->hw_prof.phy_desc[i].flags = pe->flags;
+		ASD_DPRINTK("ctrla: phy%d: sas_addr: %llx, sas rate:0x%x-0x%x,"
+			    " sata rate:0x%x-0x%x, flags:0x%x\n",
+			    i,
+			    SAS_ADDR(asd_ha->hw_prof.phy_desc[i].sas_addr),
+			    asd_ha->hw_prof.phy_desc[i].max_sas_lrate,
+			    asd_ha->hw_prof.phy_desc[i].min_sas_lrate,
+			    asd_ha->hw_prof.phy_desc[i].max_sata_lrate,
+			    asd_ha->hw_prof.phy_desc[i].min_sata_lrate,
+			    asd_ha->hw_prof.phy_desc[i].flags);
+	}
+
+	return 0;
+}
+
+/**
+ * asd_process_ctrl_a_user - process CTRL-A user settings
+ * @asd_ha: pointer to the host adapter structure
+ * @flash_dir: pointer to the flash directory
+ */
+static int asd_process_ctrl_a_user(struct asd_ha_struct *asd_ha,
+				   struct asd_flash_dir *flash_dir)
+{
+	int err, i;
+	u32 offs, size;
+	struct asd_ll_el *el;
+	struct asd_ctrla_phy_settings *ps;
+	struct asd_ctrla_phy_settings dflt_ps;
+
+	err = asd_find_flash_de(flash_dir, FLASH_DE_CTRL_A_USER, &offs, &size);
+	if (err) {
+		ASD_DPRINTK("couldn't find CTRL-A user settings section\n");
+		ASD_DPRINTK("Creating default CTRL-A user settings section\n");
+
+		dflt_ps.id0 = 'h';
+		dflt_ps.num_phys = 8;
+		for (i =0; i < ASD_MAX_PHYS; i++) {
+			memcpy(dflt_ps.phy_ent[i].sas_addr,
+			       asd_ha->hw_prof.sas_addr, SAS_ADDR_SIZE);
+			dflt_ps.phy_ent[i].sas_link_rates = 0x98;
+			dflt_ps.phy_ent[i].flags = 0x0;
+			dflt_ps.phy_ent[i].sata_link_rates = 0x0;
+		}
+
+		size = sizeof(struct asd_ctrla_phy_settings);
+		ps = &dflt_ps;
+	}
+
+	if (size == 0)
+		goto out;
+
+	err = -ENOMEM;
+	el = kmalloc(size, GFP_KERNEL);
+	if (!el) {
+		ASD_DPRINTK("no mem for ctrla user settings section\n");
+		goto out;
+	}
+
+	err = asd_read_flash_seg(asd_ha, (void *)el, offs, size);
+	if (err) {
+		ASD_DPRINTK("couldn't read ctrla phy settings section\n");
+		goto out2;
+	}
+
+	err = -ENOENT;
+	ps = asd_find_ll_by_id(el, 'h', 0xFF);
+	if (!ps) {
+		ASD_DPRINTK("couldn't find ctrla phy settings struct\n");
+		goto out2;
+	}
+
+	err = asd_process_ctrla_phy_settings(asd_ha, ps);
+	if (err) {
+		ASD_DPRINTK("couldn't process ctrla phy settings\n");
+		goto out2;
+	}
+out2:
+	kfree(el);
+out:
+	return err;
+}
+
+/**
+ * asd_read_flash - read flash memory
+ * @asd_ha: pointer to the host adapter structure
+ */
+int asd_read_flash(struct asd_ha_struct *asd_ha)
+{
+	int err;
+	struct asd_flash_dir *flash_dir;
+
+	err = asd_flash_getid(asd_ha);
+	if (err)
+		return err;
+
+	flash_dir = kmalloc(sizeof(*flash_dir), GFP_KERNEL);
+	if (!flash_dir)
+		return -ENOMEM;
+
+	err = -ENOENT;
+	if (!asd_find_flash_dir(asd_ha, flash_dir)) {
+		ASD_DPRINTK("couldn't find flash directory\n");
+		goto out;
+	}
+
+	if (le32_to_cpu(flash_dir->rev) != 2) {
+		asd_printk("unsupported flash dir version:0x%x\n",
+			   le32_to_cpu(flash_dir->rev));
+		goto out;
+	}
+
+	err = asd_process_ms(asd_ha, flash_dir);
+	if (err) {
+		ASD_DPRINTK("couldn't process manuf sector settings\n");
+		goto out;
+	}
+
+	err = asd_process_ctrl_a_user(asd_ha, flash_dir);
+	if (err) {
+		ASD_DPRINTK("couldn't process CTRL-A user settings\n");
+		goto out;
+	}
+
+out:
+	kfree(flash_dir);
+	return err;
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_seq.c b/drivers/scsi/aic94xx/aic94xx_seq.c
new file mode 100644
index 0000000000000..9050c6f3f6bde
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_seq.c
@@ -0,0 +1,1401 @@
+/*
+ * Aic94xx SAS/SATA driver sequencer interface.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * Parts of this code adapted from David Chaw's adp94xx_seq.c.
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/firmware.h>
+#include "aic94xx_reg.h"
+#include "aic94xx_hwi.h"
+
+#include "aic94xx_seq.h"
+#include "aic94xx_dump.h"
+
+/* It takes no more than 0.05 us for an instruction
+ * to complete. So waiting for 1 us should be more than
+ * plenty.
+ */
+#define PAUSE_DELAY 1
+#define PAUSE_TRIES 1000
+
+static const struct firmware *sequencer_fw;
+static const char *sequencer_version;
+static u16 cseq_vecs[CSEQ_NUM_VECS], lseq_vecs[LSEQ_NUM_VECS], mode2_task,
+	cseq_idle_loop, lseq_idle_loop;
+static u8 *cseq_code, *lseq_code;
+static u32 cseq_code_size, lseq_code_size;
+
+static u16 first_scb_site_no = 0xFFFF;
+static u16 last_scb_site_no;
+
+/* ---------- Pause/Unpause CSEQ/LSEQ ---------- */
+
+/**
+ * asd_pause_cseq - pause the central sequencer
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Return 0 on success, negative on failure.
+ */
+int asd_pause_cseq(struct asd_ha_struct *asd_ha)
+{
+	int	count = PAUSE_TRIES;
+	u32	arp2ctl;
+
+	arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL);
+	if (arp2ctl & PAUSED)
+		return 0;
+
+	asd_write_reg_dword(asd_ha, CARP2CTL, arp2ctl | EPAUSE);
+	do {
+		arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL);
+		if (arp2ctl & PAUSED)
+			return 0;
+		udelay(PAUSE_DELAY);
+	} while (--count > 0);
+
+	ASD_DPRINTK("couldn't pause CSEQ\n");
+	return -1;
+}
+
+/**
+ * asd_unpause_cseq - unpause the central sequencer.
+ * @asd_ha: pointer to host adapter structure.
+ *
+ * Return 0 on success, negative on error.
+ */
+int asd_unpause_cseq(struct asd_ha_struct *asd_ha)
+{
+	u32	arp2ctl;
+	int	count = PAUSE_TRIES;
+
+	arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL);
+	if (!(arp2ctl & PAUSED))
+		return 0;
+
+	asd_write_reg_dword(asd_ha, CARP2CTL, arp2ctl & ~EPAUSE);
+	do {
+		arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL);
+		if (!(arp2ctl & PAUSED))
+			return 0;
+		udelay(PAUSE_DELAY);
+	} while (--count > 0);
+
+	ASD_DPRINTK("couldn't unpause the CSEQ\n");
+	return -1;
+}
+
+/**
+ * asd_seq_pause_lseq - pause a link sequencer
+ * @asd_ha: pointer to a host adapter structure
+ * @lseq: link sequencer of interest
+ *
+ * Return 0 on success, negative on error.
+ */
+static inline int asd_seq_pause_lseq(struct asd_ha_struct *asd_ha, int lseq)
+{
+	u32    arp2ctl;
+	int    count = PAUSE_TRIES;
+
+	arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq));
+	if (arp2ctl & PAUSED)
+		return 0;
+
+	asd_write_reg_dword(asd_ha, LmARP2CTL(lseq), arp2ctl | EPAUSE);
+	do {
+		arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq));
+		if (arp2ctl & PAUSED)
+			return 0;
+		udelay(PAUSE_DELAY);
+	} while (--count > 0);
+
+	ASD_DPRINTK("couldn't pause LSEQ %d\n", lseq);
+	return -1;
+}
+
+/**
+ * asd_pause_lseq - pause the link sequencer(s)
+ * @asd_ha: pointer to host adapter structure
+ * @lseq_mask: mask of link sequencers of interest
+ *
+ * Return 0 on success, negative on failure.
+ */
+int asd_pause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask)
+{
+	int lseq;
+	int err = 0;
+
+	for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+		err = asd_seq_pause_lseq(asd_ha, lseq);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+/**
+ * asd_seq_unpause_lseq - unpause a link sequencer
+ * @asd_ha: pointer to host adapter structure
+ * @lseq: link sequencer of interest
+ *
+ * Return 0 on success, negative on error.
+ */
+static inline int asd_seq_unpause_lseq(struct asd_ha_struct *asd_ha, int lseq)
+{
+	u32 arp2ctl;
+	int count = PAUSE_TRIES;
+
+	arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq));
+	if (!(arp2ctl & PAUSED))
+		return 0;
+
+	asd_write_reg_dword(asd_ha, LmARP2CTL(lseq), arp2ctl & ~EPAUSE);
+	do {
+		arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq));
+		if (!(arp2ctl & PAUSED))
+			return 0;
+		udelay(PAUSE_DELAY);
+	} while (--count > 0);
+
+	ASD_DPRINTK("couldn't unpause LSEQ %d\n", lseq);
+	return 0;
+}
+
+
+/**
+ * asd_unpause_lseq - unpause the link sequencer(s)
+ * @asd_ha: pointer to host adapter structure
+ * @lseq_mask: mask of link sequencers of interest
+ *
+ * Return 0 on success, negative on failure.
+ */
+int asd_unpause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask)
+{
+	int lseq;
+	int err = 0;
+
+	for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+		err = asd_seq_unpause_lseq(asd_ha, lseq);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+/* ---------- Downloading CSEQ/LSEQ microcode ---------- */
+
+static int asd_verify_cseq(struct asd_ha_struct *asd_ha, const u8 *_prog,
+			   u32 size)
+{
+	u32 addr = CSEQ_RAM_REG_BASE_ADR;
+	const u32 *prog = (u32 *) _prog;
+	u32 i;
+
+	for (i = 0; i < size; i += 4, prog++, addr += 4) {
+		u32 val = asd_read_reg_dword(asd_ha, addr);
+
+		if (le32_to_cpu(*prog) != val) {
+			asd_printk("%s: cseq verify failed at %u "
+				   "read:0x%x, wanted:0x%x\n",
+				   pci_name(asd_ha->pcidev),
+				   i, val, le32_to_cpu(*prog));
+			return -1;
+		}
+	}
+	ASD_DPRINTK("verified %d bytes, passed\n", size);
+	return 0;
+}
+
+/**
+ * asd_verify_lseq - verify the microcode of a link sequencer
+ * @asd_ha: pointer to host adapter structure
+ * @_prog: pointer to the microcode
+ * @size: size of the microcode in bytes
+ * @lseq: link sequencer of interest
+ *
+ * The link sequencer code is accessed in 4 KB pages, which are selected
+ * by setting LmRAMPAGE (bits 8 and 9) of the LmBISTCTL1 register.
+ * The 10 KB LSEQm instruction code is mapped, page at a time, at
+ * LmSEQRAM address.
+ */
+static int asd_verify_lseq(struct asd_ha_struct *asd_ha, const u8 *_prog,
+			   u32 size, int lseq)
+{
+#define LSEQ_CODEPAGE_SIZE 4096
+	int pages =  (size + LSEQ_CODEPAGE_SIZE - 1) / LSEQ_CODEPAGE_SIZE;
+	u32 page;
+	const u32 *prog = (u32 *) _prog;
+
+	for (page = 0; page < pages; page++) {
+		u32 i;
+
+		asd_write_reg_dword(asd_ha, LmBISTCTL1(lseq),
+				    page << LmRAMPAGE_LSHIFT);
+		for (i = 0; size > 0 && i < LSEQ_CODEPAGE_SIZE;
+		     i += 4, prog++, size-=4) {
+
+			u32 val = asd_read_reg_dword(asd_ha, LmSEQRAM(lseq)+i);
+
+			if (le32_to_cpu(*prog) != val) {
+				asd_printk("%s: LSEQ%d verify failed "
+					   "page:%d, offs:%d\n",
+					   pci_name(asd_ha->pcidev),
+					   lseq, page, i);
+				return -1;
+			}
+		}
+	}
+	ASD_DPRINTK("LSEQ%d verified %d bytes, passed\n", lseq,
+		    (int)((u8 *)prog-_prog));
+	return 0;
+}
+
+/**
+ * asd_verify_seq -- verify CSEQ/LSEQ microcode
+ * @asd_ha: pointer to host adapter structure
+ * @prog: pointer to microcode
+ * @size: size of the microcode
+ * @lseq_mask: if 0, verify CSEQ microcode, else mask of LSEQs of interest
+ *
+ * Return 0 if microcode is correct, negative on mismatch.
+ */
+static int asd_verify_seq(struct asd_ha_struct *asd_ha, const u8 *prog,
+			      u32 size, u8 lseq_mask)
+{
+	if (lseq_mask == 0)
+		return asd_verify_cseq(asd_ha, prog, size);
+	else {
+		int lseq, err;
+
+		for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+			err = asd_verify_lseq(asd_ha, prog, size, lseq);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+#define ASD_DMA_MODE_DOWNLOAD
+#ifdef ASD_DMA_MODE_DOWNLOAD
+/* This is the size of the CSEQ Mapped instruction page */
+#define MAX_DMA_OVLY_COUNT ((1U << 14)-1)
+static int asd_download_seq(struct asd_ha_struct *asd_ha,
+			    const u8 * const prog, u32 size, u8 lseq_mask)
+{
+	u32 comstaten;
+	u32 reg;
+	int page;
+	const int pages = (size + MAX_DMA_OVLY_COUNT - 1) / MAX_DMA_OVLY_COUNT;
+	struct asd_dma_tok *token;
+	int err = 0;
+
+	if (size % 4) {
+		asd_printk("sequencer program not multiple of 4\n");
+		return -1;
+	}
+
+	asd_pause_cseq(asd_ha);
+	asd_pause_lseq(asd_ha, 0xFF);
+
+	/* save, disable and clear interrupts */
+	comstaten = asd_read_reg_dword(asd_ha, COMSTATEN);
+	asd_write_reg_dword(asd_ha, COMSTATEN, 0);
+	asd_write_reg_dword(asd_ha, COMSTAT, COMSTAT_MASK);
+
+	asd_write_reg_dword(asd_ha, CHIMINTEN, RST_CHIMINTEN);
+	asd_write_reg_dword(asd_ha, CHIMINT, CHIMINT_MASK);
+
+	token = asd_alloc_coherent(asd_ha, MAX_DMA_OVLY_COUNT, GFP_KERNEL);
+	if (!token) {
+		asd_printk("out of memory for dma SEQ download\n");
+		err = -ENOMEM;
+		goto out;
+	}
+	ASD_DPRINTK("dma-ing %d bytes\n", size);
+
+	for (page = 0; page < pages; page++) {
+		int i;
+		u32 left = min(size-page*MAX_DMA_OVLY_COUNT,
+			       (u32)MAX_DMA_OVLY_COUNT);
+
+		memcpy(token->vaddr, prog + page*MAX_DMA_OVLY_COUNT, left);
+		asd_write_reg_addr(asd_ha, OVLYDMAADR, token->dma_handle);
+		asd_write_reg_dword(asd_ha, OVLYDMACNT, left);
+		reg = !page ? RESETOVLYDMA : 0;
+		reg |= (STARTOVLYDMA | OVLYHALTERR);
+		reg |= (lseq_mask ? (((u32)lseq_mask) << 8) : OVLYCSEQ);
+		/* Start DMA. */
+		asd_write_reg_dword(asd_ha, OVLYDMACTL, reg);
+
+		for (i = PAUSE_TRIES*100; i > 0; i--) {
+			u32 dmadone = asd_read_reg_dword(asd_ha, OVLYDMACTL);
+			if (!(dmadone & OVLYDMAACT))
+				break;
+			udelay(PAUSE_DELAY);
+		}
+	}
+
+	reg = asd_read_reg_dword(asd_ha, COMSTAT);
+	if (!(reg & OVLYDMADONE) || (reg & OVLYERR)
+	    || (asd_read_reg_dword(asd_ha, CHIMINT) & DEVEXCEPT_MASK)){
+		asd_printk("%s: error DMA-ing sequencer code\n",
+			   pci_name(asd_ha->pcidev));
+		err = -ENODEV;
+	}
+
+	asd_free_coherent(asd_ha, token);
+ out:
+	asd_write_reg_dword(asd_ha, COMSTATEN, comstaten);
+
+	return err ? : asd_verify_seq(asd_ha, prog, size, lseq_mask);
+}
+#else /* ASD_DMA_MODE_DOWNLOAD */
+static int asd_download_seq(struct asd_ha_struct *asd_ha, const u8 *_prog,
+			    u32 size, u8 lseq_mask)
+{
+	int i;
+	u32 reg = 0;
+	const u32 *prog = (u32 *) _prog;
+
+	if (size % 4) {
+		asd_printk("sequencer program not multiple of 4\n");
+		return -1;
+	}
+
+	asd_pause_cseq(asd_ha);
+	asd_pause_lseq(asd_ha, 0xFF);
+
+	reg |= (lseq_mask ? (((u32)lseq_mask) << 8) : OVLYCSEQ);
+	reg |= PIOCMODE;
+
+	asd_write_reg_dword(asd_ha, OVLYDMACNT, size);
+	asd_write_reg_dword(asd_ha, OVLYDMACTL, reg);
+
+	ASD_DPRINTK("downloading %s sequencer%s in PIO mode...\n",
+		    lseq_mask ? "LSEQ" : "CSEQ", lseq_mask ? "s" : "");
+
+	for (i = 0; i < size; i += 4, prog++)
+		asd_write_reg_dword(asd_ha, SPIODATA, *prog);
+
+	reg = (reg & ~PIOCMODE) | OVLYHALTERR;
+	asd_write_reg_dword(asd_ha, OVLYDMACTL, reg);
+
+	return asd_verify_seq(asd_ha, _prog, size, lseq_mask);
+}
+#endif /* ASD_DMA_MODE_DOWNLOAD */
+
+/**
+ * asd_seq_download_seqs - download the sequencer microcode
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Download the central and link sequencer microcode.
+ */
+static int asd_seq_download_seqs(struct asd_ha_struct *asd_ha)
+{
+	int 	err;
+
+	if (!asd_ha->hw_prof.enabled_phys) {
+		asd_printk("%s: no enabled phys!\n", pci_name(asd_ha->pcidev));
+		return -ENODEV;
+	}
+
+	/* Download the CSEQ */
+	ASD_DPRINTK("downloading CSEQ...\n");
+	err = asd_download_seq(asd_ha, cseq_code, cseq_code_size, 0);
+	if (err) {
+		asd_printk("CSEQ download failed:%d\n", err);
+		return err;
+	}
+
+	/* Download the Link Sequencers code. All of the Link Sequencers
+	 * microcode can be downloaded at the same time.
+	 */
+	ASD_DPRINTK("downloading LSEQs...\n");
+	err = asd_download_seq(asd_ha, lseq_code, lseq_code_size,
+			       asd_ha->hw_prof.enabled_phys);
+	if (err) {
+		/* Try it one at a time */
+		u8 lseq;
+		u8 lseq_mask = asd_ha->hw_prof.enabled_phys;
+
+		for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+			err = asd_download_seq(asd_ha, lseq_code,
+					       lseq_code_size, 1<<lseq);
+			if (err)
+				break;
+		}
+	}
+	if (err)
+		asd_printk("LSEQs download failed:%d\n", err);
+
+	return err;
+}
+
+/* ---------- Initializing the chip, chip memory, etc. ---------- */
+
+/**
+ * asd_init_cseq_mip - initialize CSEQ mode independent pages 4-7
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_cseq_mip(struct asd_ha_struct *asd_ha)
+{
+	/* CSEQ Mode Independent, page 4 setup. */
+	asd_write_reg_word(asd_ha, CSEQ_Q_EXE_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_EXE_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_DONE_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_DONE_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_SEND_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_SEND_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_DMA2CHIM_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_DMA2CHIM_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_COPY_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_COPY_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_REG0, 0);
+	asd_write_reg_word(asd_ha, CSEQ_REG1, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_REG2, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_LINK_CTL_Q_MAP, 0);
+	{
+		u8 con = asd_read_reg_byte(asd_ha, CCONEXIST);
+		u8 val = hweight8(con);
+		asd_write_reg_byte(asd_ha, CSEQ_MAX_CSEQ_MODE, (val<<4)|val);
+	}
+	asd_write_reg_word(asd_ha, CSEQ_FREE_LIST_HACK_COUNT, 0);
+
+	/* CSEQ Mode independent, page 5 setup. */
+	asd_write_reg_dword(asd_ha, CSEQ_EST_NEXUS_REQ_QUEUE, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EST_NEXUS_REQ_QUEUE+4, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EST_NEXUS_REQ_COUNT, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EST_NEXUS_REQ_COUNT+4, 0);
+	asd_write_reg_word(asd_ha, CSEQ_Q_EST_NEXUS_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_EST_NEXUS_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_NEED_EST_NEXUS_SCB, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EST_NEXUS_REQ_HEAD, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EST_NEXUS_REQ_TAIL, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EST_NEXUS_SCB_OFFSET, 0);
+
+	/* CSEQ Mode independent, page 6 setup. */
+	asd_write_reg_word(asd_ha, CSEQ_INT_ROUT_RET_ADDR0, 0);
+	asd_write_reg_word(asd_ha, CSEQ_INT_ROUT_RET_ADDR1, 0);
+	asd_write_reg_word(asd_ha, CSEQ_INT_ROUT_SCBPTR, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_INT_ROUT_MODE, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_ISR_SCRATCH_FLAGS, 0);
+	asd_write_reg_word(asd_ha, CSEQ_ISR_SAVE_SINDEX, 0);
+	asd_write_reg_word(asd_ha, CSEQ_ISR_SAVE_DINDEX, 0);
+	asd_write_reg_word(asd_ha, CSEQ_Q_MONIRTT_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_MONIRTT_TAIL, 0xFFFF);
+	/* Calculate the free scb mask. */
+	{
+		u16 cmdctx = asd_get_cmdctx_size(asd_ha);
+		cmdctx = (~((cmdctx/128)-1)) >> 8;
+		asd_write_reg_byte(asd_ha, CSEQ_FREE_SCB_MASK, (u8)cmdctx);
+	}
+	asd_write_reg_word(asd_ha, CSEQ_BUILTIN_FREE_SCB_HEAD,
+			   first_scb_site_no);
+	asd_write_reg_word(asd_ha, CSEQ_BUILTIN_FREE_SCB_TAIL,
+			   last_scb_site_no);
+	asd_write_reg_word(asd_ha, CSEQ_EXTENDED_FREE_SCB_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_EXTENDED_FREE_SCB_TAIL, 0xFFFF);
+
+	/* CSEQ Mode independent, page 7 setup. */
+	asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_QUEUE, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_QUEUE+4, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_COUNT, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_COUNT+4, 0);
+	asd_write_reg_word(asd_ha, CSEQ_Q_EMPTY_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_EMPTY_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_NEED_EMPTY_SCB, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EMPTY_REQ_HEAD, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EMPTY_REQ_TAIL, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EMPTY_SCB_OFFSET, 0);
+	asd_write_reg_word(asd_ha, CSEQ_PRIMITIVE_DATA, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_TIMEOUT_CONST, 0);
+}
+
+/**
+ * asd_init_cseq_mdp - initialize CSEQ Mode dependent pages
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_cseq_mdp(struct asd_ha_struct *asd_ha)
+{
+	int	i;
+	int	moffs;
+
+	moffs = CSEQ_PAGE_SIZE * 2;
+
+	/* CSEQ Mode dependent, modes 0-7, page 0 setup. */
+	for (i = 0; i < 8; i++) {
+		asd_write_reg_word(asd_ha, i*moffs+CSEQ_LRM_SAVE_SINDEX, 0);
+		asd_write_reg_word(asd_ha, i*moffs+CSEQ_LRM_SAVE_SCBPTR, 0);
+		asd_write_reg_word(asd_ha, i*moffs+CSEQ_Q_LINK_HEAD, 0xFFFF);
+		asd_write_reg_word(asd_ha, i*moffs+CSEQ_Q_LINK_TAIL, 0xFFFF);
+		asd_write_reg_byte(asd_ha, i*moffs+CSEQ_LRM_SAVE_SCRPAGE, 0);
+	}
+
+	/* CSEQ Mode dependent, mode 0-7, page 1 and 2 shall be ignored. */
+
+	/* CSEQ Mode dependent, mode 8, page 0 setup. */
+	asd_write_reg_word(asd_ha, CSEQ_RET_ADDR, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_RET_SCBPTR, 0);
+	asd_write_reg_word(asd_ha, CSEQ_SAVE_SCBPTR, 0);
+	asd_write_reg_word(asd_ha, CSEQ_EMPTY_TRANS_CTX, 0);
+	asd_write_reg_word(asd_ha, CSEQ_RESP_LEN, 0);
+	asd_write_reg_word(asd_ha, CSEQ_TMF_SCBPTR, 0);
+	asd_write_reg_word(asd_ha, CSEQ_GLOBAL_PREV_SCB, 0);
+	asd_write_reg_word(asd_ha, CSEQ_GLOBAL_HEAD, 0);
+	asd_write_reg_word(asd_ha, CSEQ_CLEAR_LU_HEAD, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_TMF_OPCODE, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_SCRATCH_FLAGS, 0);
+	asd_write_reg_word(asd_ha, CSEQ_HSB_SITE, 0);
+	asd_write_reg_word(asd_ha, CSEQ_FIRST_INV_SCB_SITE,
+			   (u16)last_scb_site_no+1);
+	asd_write_reg_word(asd_ha, CSEQ_FIRST_INV_DDB_SITE,
+			   (u16)asd_ha->hw_prof.max_ddbs);
+
+	/* CSEQ Mode dependent, mode 8, page 1 setup. */
+	asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CLEAR, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CLEAR + 4, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CHECK, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CHECK + 4, 0);
+
+	/* CSEQ Mode dependent, mode 8, page 2 setup. */
+	/* Tell the sequencer the bus address of the first SCB. */
+	asd_write_reg_addr(asd_ha, CSEQ_HQ_NEW_POINTER,
+			   asd_ha->seq.next_scb.dma_handle);
+	ASD_DPRINTK("First SCB dma_handle: 0x%llx\n",
+		    (unsigned long long)asd_ha->seq.next_scb.dma_handle);
+
+	/* Tell the sequencer the first Done List entry address. */
+	asd_write_reg_addr(asd_ha, CSEQ_HQ_DONE_BASE,
+			   asd_ha->seq.actual_dl->dma_handle);
+
+	/* Initialize the Q_DONE_POINTER with the least significant
+	 * 4 bytes of the first Done List address. */
+	asd_write_reg_dword(asd_ha, CSEQ_HQ_DONE_POINTER,
+			    ASD_BUSADDR_LO(asd_ha->seq.actual_dl->dma_handle));
+
+	asd_write_reg_byte(asd_ha, CSEQ_HQ_DONE_PASS, ASD_DEF_DL_TOGGLE);
+
+	/* CSEQ Mode dependent, mode 8, page 3 shall be ignored. */
+}
+
+/**
+ * asd_init_cseq_scratch -- setup and init CSEQ
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Setup and initialize Central sequencers. Initialiaze the mode
+ * independent and dependent scratch page to the default settings.
+ */
+static void asd_init_cseq_scratch(struct asd_ha_struct *asd_ha)
+{
+	asd_init_cseq_mip(asd_ha);
+	asd_init_cseq_mdp(asd_ha);
+}
+
+/**
+ * asd_init_lseq_mip -- initialize LSEQ Mode independent pages 0-3
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_lseq_mip(struct asd_ha_struct *asd_ha, u8 lseq)
+{
+	int i;
+
+	/* LSEQ Mode independent page 0 setup. */
+	asd_write_reg_word(asd_ha, LmSEQ_Q_TGTXFR_HEAD(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_Q_TGTXFR_TAIL(lseq), 0xFFFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_LINK_NUMBER(lseq), lseq);
+	asd_write_reg_byte(asd_ha, LmSEQ_SCRATCH_FLAGS(lseq),
+			   ASD_NOTIFY_ENABLE_SPINUP);
+	asd_write_reg_dword(asd_ha, LmSEQ_CONNECTION_STATE(lseq),0x08000000);
+	asd_write_reg_word(asd_ha, LmSEQ_CONCTL(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_CONSTAT(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_CONNECTION_MODES(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_REG1_ISR(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_REG2_ISR(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_REG3_ISR(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_REG0_ISR(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_REG0_ISR(lseq)+4, 0);
+
+	/* LSEQ Mode independent page 1 setup. */
+	asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR0(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR1(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR2(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR3(lseq), 0xFFFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE0(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE1(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE2(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE3(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_HEAD(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_TAIL(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_BUF_AVAIL(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_TIMEOUT_CONST(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_ISR_SAVE_SINDEX(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_ISR_SAVE_DINDEX(lseq), 0);
+
+	/* LSEQ Mode Independent page 2 setup. */
+	asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR0(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR1(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR2(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR3(lseq), 0xFFFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD0(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD1(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD2(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD3(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_HEAD(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_TAIL(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_BUFS_AVAIL(lseq), 0);
+	for (i = 0; i < 12; i += 4)
+		asd_write_reg_dword(asd_ha, LmSEQ_ATA_SCR_REGS(lseq) + i, 0);
+
+	/* LSEQ Mode Independent page 3 setup. */
+
+	/* Device present timer timeout */
+	asd_write_reg_dword(asd_ha, LmSEQ_DEV_PRES_TMR_TOUT_CONST(lseq),
+			    ASD_DEV_PRESENT_TIMEOUT);
+
+	/* SATA interlock timer disabled */
+	asd_write_reg_dword(asd_ha, LmSEQ_SATA_INTERLOCK_TIMEOUT(lseq),
+			    ASD_SATA_INTERLOCK_TIMEOUT);
+
+	/* STP shutdown timer timeout constant, IGNORED by the sequencer,
+	 * always 0. */
+	asd_write_reg_dword(asd_ha, LmSEQ_STP_SHUTDOWN_TIMEOUT(lseq),
+			    ASD_STP_SHUTDOWN_TIMEOUT);
+
+	asd_write_reg_dword(asd_ha, LmSEQ_SRST_ASSERT_TIMEOUT(lseq),
+			    ASD_SRST_ASSERT_TIMEOUT);
+
+	asd_write_reg_dword(asd_ha, LmSEQ_RCV_FIS_TIMEOUT(lseq),
+			    ASD_RCV_FIS_TIMEOUT);
+
+	asd_write_reg_dword(asd_ha, LmSEQ_ONE_MILLISEC_TIMEOUT(lseq),
+			    ASD_ONE_MILLISEC_TIMEOUT);
+
+	/* COM_INIT timer */
+	asd_write_reg_dword(asd_ha, LmSEQ_TEN_MS_COMINIT_TIMEOUT(lseq),
+			    ASD_TEN_MILLISEC_TIMEOUT);
+
+	asd_write_reg_dword(asd_ha, LmSEQ_SMP_RCV_TIMEOUT(lseq),
+			    ASD_SMP_RCV_TIMEOUT);
+}
+
+/**
+ * asd_init_lseq_mdp -- initialize LSEQ mode dependent pages.
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_lseq_mdp(struct asd_ha_struct *asd_ha,  int lseq)
+{
+	int    i;
+	u32    moffs;
+	u16 ret_addr[] = {
+		0xFFFF,		  /* mode 0 */
+		0xFFFF,		  /* mode 1 */
+		mode2_task,	  /* mode 2 */
+		0,
+		0xFFFF,		  /* mode 4/5 */
+		0xFFFF,		  /* mode 4/5 */
+	};
+
+	/*
+	 * Mode 0,1,2 and 4/5 have common field on page 0 for the first
+	 * 14 bytes.
+	 */
+	for (i = 0; i < 3; i++) {
+		moffs = i * LSEQ_MODE_SCRATCH_SIZE;
+		asd_write_reg_word(asd_ha, LmSEQ_RET_ADDR(lseq)+moffs,
+				   ret_addr[i]);
+		asd_write_reg_word(asd_ha, LmSEQ_REG0_MODE(lseq)+moffs, 0);
+		asd_write_reg_word(asd_ha, LmSEQ_MODE_FLAGS(lseq)+moffs, 0);
+		asd_write_reg_word(asd_ha, LmSEQ_RET_ADDR2(lseq)+moffs,0xFFFF);
+		asd_write_reg_word(asd_ha, LmSEQ_RET_ADDR1(lseq)+moffs,0xFFFF);
+		asd_write_reg_byte(asd_ha, LmSEQ_OPCODE_TO_CSEQ(lseq)+moffs,0);
+		asd_write_reg_word(asd_ha, LmSEQ_DATA_TO_CSEQ(lseq)+moffs,0);
+	}
+	/*
+	 *  Mode 5 page 0 overlaps the same scratch page with Mode 0 page 3.
+	 */
+	asd_write_reg_word(asd_ha,
+			 LmSEQ_RET_ADDR(lseq)+LSEQ_MODE5_PAGE0_OFFSET,
+			   ret_addr[5]);
+	asd_write_reg_word(asd_ha,
+			 LmSEQ_REG0_MODE(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0);
+	asd_write_reg_word(asd_ha,
+			 LmSEQ_MODE_FLAGS(lseq)+LSEQ_MODE5_PAGE0_OFFSET, 0);
+	asd_write_reg_word(asd_ha,
+			 LmSEQ_RET_ADDR2(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0xFFFF);
+	asd_write_reg_word(asd_ha,
+			 LmSEQ_RET_ADDR1(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0xFFFF);
+	asd_write_reg_byte(asd_ha,
+		         LmSEQ_OPCODE_TO_CSEQ(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0);
+	asd_write_reg_word(asd_ha,
+		         LmSEQ_DATA_TO_CSEQ(lseq)+LSEQ_MODE5_PAGE0_OFFSET, 0);
+
+	/* LSEQ Mode dependent 0, page 0 setup. */
+	asd_write_reg_word(asd_ha, LmSEQ_FIRST_INV_DDB_SITE(lseq),
+			   (u16)asd_ha->hw_prof.max_ddbs);
+	asd_write_reg_word(asd_ha, LmSEQ_EMPTY_TRANS_CTX(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_RESP_LEN(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_FIRST_INV_SCB_SITE(lseq),
+			   (u16)last_scb_site_no+1);
+	asd_write_reg_word(asd_ha, LmSEQ_INTEN_SAVE(lseq),
+			    (u16) LmM0INTEN_MASK & 0xFFFF0000 >> 16);
+	asd_write_reg_word(asd_ha, LmSEQ_INTEN_SAVE(lseq) + 2,
+			    (u16) LmM0INTEN_MASK & 0xFFFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_LINK_RST_FRM_LEN(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_LINK_RST_PROTOCOL(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_RESP_STATUS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_LAST_LOADED_SGE(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_SAVE_SCBPTR(lseq), 0);
+
+	/* LSEQ mode dependent, mode 1, page 0 setup. */
+	asd_write_reg_word(asd_ha, LmSEQ_Q_XMIT_HEAD(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_M1_EMPTY_TRANS_CTX(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_INI_CONN_TAG(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_FAILED_OPEN_STATUS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_XMIT_REQUEST_TYPE(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_M1_RESP_STATUS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_M1_LAST_LOADED_SGE(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_M1_SAVE_SCBPTR(lseq), 0);
+
+	/* LSEQ Mode dependent mode 2, page 0 setup */
+	asd_write_reg_word(asd_ha, LmSEQ_PORT_COUNTER(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_PM_TABLE_PTR(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_SATA_INTERLOCK_TMR_SAVE(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_IP_BITL(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_COPY_SMP_CONN_TAG(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_P0M2_OFFS1AH(lseq), 0);
+
+	/* LSEQ Mode dependent, mode 4/5, page 0 setup. */
+	asd_write_reg_byte(asd_ha, LmSEQ_SAVED_OOB_STATUS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_SAVED_OOB_MODE(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_Q_LINK_HEAD(lseq), 0xFFFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_LINK_RST_ERR(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_SAVED_OOB_SIGNALS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_SAS_RESET_MODE(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_LINK_RESET_RETRY_COUNT(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_NUM_LINK_RESET_RETRIES(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_OOB_INT_ENABLES(lseq), 0);
+	/*
+	 * Set the desired interval between transmissions of the NOTIFY
+	 * (ENABLE SPINUP) primitive.  Must be initilized to val - 1.
+	 */
+	asd_write_reg_word(asd_ha, LmSEQ_NOTIFY_TIMER_TIMEOUT(lseq),
+			   ASD_NOTIFY_TIMEOUT - 1);
+	/* No delay for the first NOTIFY to be sent to the attached target. */
+	asd_write_reg_word(asd_ha, LmSEQ_NOTIFY_TIMER_DOWN_COUNT(lseq),
+			   ASD_NOTIFY_DOWN_COUNT);
+
+	/* LSEQ Mode dependent, mode 0 and 1, page 1 setup. */
+	for (i = 0; i < 2; i++)	{
+		int j;
+		/* Start from Page 1 of Mode 0 and 1. */
+		moffs = LSEQ_PAGE_SIZE + i*LSEQ_MODE_SCRATCH_SIZE;
+		/* All the fields of page 1 can be intialized to 0. */
+		for (j = 0; j < LSEQ_PAGE_SIZE; j += 4)
+			asd_write_reg_dword(asd_ha, LmSCRATCH(lseq)+moffs+j,0);
+	}
+
+	/* LSEQ Mode dependent, mode 2, page 1 setup. */
+	asd_write_reg_dword(asd_ha, LmSEQ_INVALID_DWORD_COUNT(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_DISPARITY_ERROR_COUNT(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_LOSS_OF_SYNC_COUNT(lseq), 0);
+
+	/* LSEQ Mode dependent, mode 4/5, page 1. */
+	for (i = 0; i < LSEQ_PAGE_SIZE; i+=4)
+		asd_write_reg_dword(asd_ha, LmSEQ_FRAME_TYPE_MASK(lseq)+i, 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_FRAME_TYPE_MASK(lseq), 0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_DEST_ADDR_MASK(lseq), 0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_DEST_ADDR_MASK(lseq)+1,0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_DEST_ADDR_MASK(lseq)+2,0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_SRC_ADDR_MASK(lseq), 0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_SRC_ADDR_MASK(lseq)+1, 0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_SRC_ADDR_MASK(lseq)+2, 0xFF);
+	asd_write_reg_dword(asd_ha, LmSEQ_DATA_OFFSET(lseq), 0xFFFFFFFF);
+
+	/* LSEQ Mode dependent, mode 0, page 2 setup. */
+	asd_write_reg_dword(asd_ha, LmSEQ_SMP_RCV_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_DEVICE_BITS(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_SDB_DDB(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_SDB_NUM_TAGS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_SDB_CURR_TAG(lseq), 0);
+
+	/* LSEQ Mode Dependent 1, page 2 setup. */
+	asd_write_reg_dword(asd_ha, LmSEQ_TX_ID_ADDR_FRAME(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_TX_ID_ADDR_FRAME(lseq)+4, 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_OPEN_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_SRST_AS_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_LAST_LOADED_SG_EL(lseq), 0);
+
+	/* LSEQ Mode Dependent 2, page 2 setup. */
+	/* The LmSEQ_STP_SHUTDOWN_TIMER_TERM_TS is IGNORED by the sequencer,
+	 * i.e. always 0. */
+	asd_write_reg_dword(asd_ha, LmSEQ_STP_SHUTDOWN_TIMER_TERM_TS(lseq),0);
+	asd_write_reg_dword(asd_ha, LmSEQ_CLOSE_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_BREAK_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_DWS_RESET_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha,LmSEQ_SATA_INTERLOCK_TIMER_TERM_TS(lseq),0);
+	asd_write_reg_dword(asd_ha, LmSEQ_MCTL_TIMER_TERM_TS(lseq), 0);
+
+	/* LSEQ Mode Dependent 4/5, page 2 setup. */
+	asd_write_reg_dword(asd_ha, LmSEQ_COMINIT_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_RCV_ID_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_RCV_FIS_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_DEV_PRES_TIMER_TERM_TS(lseq),	0);
+}
+
+/**
+ * asd_init_lseq_scratch -- setup and init link sequencers
+ * @asd_ha: pointer to host adapter struct
+ */
+static void asd_init_lseq_scratch(struct asd_ha_struct *asd_ha)
+{
+	u8 lseq;
+	u8 lseq_mask;
+
+	lseq_mask = asd_ha->hw_prof.enabled_phys;
+	for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+		asd_init_lseq_mip(asd_ha, lseq);
+		asd_init_lseq_mdp(asd_ha, lseq);
+	}
+}
+
+/**
+ * asd_init_scb_sites -- initialize sequencer SCB sites (memory).
+ * @asd_ha: pointer to host adapter structure
+ *
+ * This should be done before initializing common CSEQ and LSEQ
+ * scratch since those areas depend on some computed values here,
+ * last_scb_site_no, etc.
+ */
+static void asd_init_scb_sites(struct asd_ha_struct *asd_ha)
+{
+	u16	site_no;
+	u16     max_scbs = 0;
+
+	for (site_no = asd_ha->hw_prof.max_scbs-1;
+	     site_no != (u16) -1;
+	     site_no--) {
+		u16	i;
+
+		/* Initialize all fields in the SCB site to 0. */
+		for (i = 0; i < ASD_SCB_SIZE; i += 4)
+			asd_scbsite_write_dword(asd_ha, site_no, i, 0);
+
+		/* Workaround needed by SEQ to fix a SATA issue is to exclude
+		 * certain SCB sites from the free list. */
+		if (!SCB_SITE_VALID(site_no))
+			continue;
+
+		if (last_scb_site_no == 0)
+			last_scb_site_no = site_no;
+
+		/* For every SCB site, we need to initialize the
+		 * following fields: Q_NEXT, SCB_OPCODE, SCB_FLAGS,
+		 * and SG Element Flag. */
+
+		/* Q_NEXT field of the last SCB is invalidated. */
+		asd_scbsite_write_word(asd_ha, site_no, 0, first_scb_site_no);
+
+		/* Initialize SCB Site Opcode field to invalid. */
+		asd_scbsite_write_byte(asd_ha, site_no,
+				       offsetof(struct scb_header, opcode),
+				       0xFF);
+
+		/* Initialize SCB Site Flags field to mean a response
+		 * frame has been received.  This means inadvertent
+		 * frames received to be dropped. */
+		asd_scbsite_write_byte(asd_ha, site_no, 0x49, 0x01);
+
+		first_scb_site_no = site_no;
+		max_scbs++;
+	}
+	asd_ha->hw_prof.max_scbs = max_scbs;
+	ASD_DPRINTK("max_scbs:%d\n", asd_ha->hw_prof.max_scbs);
+	ASD_DPRINTK("first_scb_site_no:0x%x\n", first_scb_site_no);
+	ASD_DPRINTK("last_scb_site_no:0x%x\n", last_scb_site_no);
+}
+
+/**
+ * asd_init_cseq_cio - initialize CSEQ CIO registers
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_cseq_cio(struct asd_ha_struct *asd_ha)
+{
+	int i;
+
+	asd_write_reg_byte(asd_ha, CSEQCOMINTEN, 0);
+	asd_write_reg_byte(asd_ha, CSEQDLCTL, ASD_DL_SIZE_BITS);
+	asd_write_reg_byte(asd_ha, CSEQDLOFFS, 0);
+	asd_write_reg_byte(asd_ha, CSEQDLOFFS+1, 0);
+	asd_ha->seq.scbpro = 0;
+	asd_write_reg_dword(asd_ha, SCBPRO, 0);
+	asd_write_reg_dword(asd_ha, CSEQCON, 0);
+
+	/* Intialize CSEQ Mode 11 Interrupt Vectors.
+	 * The addresses are 16 bit wide and in dword units.
+	 * The values of their macros are in byte units.
+	 * Thus we have to divide by 4. */
+	asd_write_reg_word(asd_ha, CM11INTVEC0, cseq_vecs[0]);
+	asd_write_reg_word(asd_ha, CM11INTVEC1, cseq_vecs[1]);
+	asd_write_reg_word(asd_ha, CM11INTVEC2, cseq_vecs[2]);
+
+	/* Enable ARP2HALTC (ARP2 Halted from Halt Code Write). */
+	asd_write_reg_byte(asd_ha, CARP2INTEN, EN_ARP2HALTC);
+
+	/* Initialize CSEQ Scratch Page to 0x04. */
+	asd_write_reg_byte(asd_ha, CSCRATCHPAGE, 0x04);
+
+	/* Initialize CSEQ Mode[0-8] Dependent registers. */
+	/* Initialize Scratch Page to 0. */
+	for (i = 0; i < 9; i++)
+		asd_write_reg_byte(asd_ha, CMnSCRATCHPAGE(i), 0);
+
+	/* Reset the ARP2 Program Count. */
+	asd_write_reg_word(asd_ha, CPRGMCNT, cseq_idle_loop);
+
+	for (i = 0; i < 8; i++) {
+		/* Intialize Mode n Link m Interrupt Enable. */
+		asd_write_reg_dword(asd_ha, CMnINTEN(i), EN_CMnRSPMBXF);
+		/* Initialize Mode n Request Mailbox. */
+		asd_write_reg_dword(asd_ha, CMnREQMBX(i), 0);
+	}
+}
+
+/**
+ * asd_init_lseq_cio -- initialize LmSEQ CIO registers
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_lseq_cio(struct asd_ha_struct *asd_ha, int lseq)
+{
+	u8  *sas_addr;
+	int  i;
+
+	/* Enable ARP2HALTC (ARP2 Halted from Halt Code Write). */
+	asd_write_reg_dword(asd_ha, LmARP2INTEN(lseq), EN_ARP2HALTC);
+
+	asd_write_reg_byte(asd_ha, LmSCRATCHPAGE(lseq), 0);
+
+	/* Initialize Mode 0,1, and 2 SCRATCHPAGE to 0. */
+	for (i = 0; i < 3; i++)
+		asd_write_reg_byte(asd_ha, LmMnSCRATCHPAGE(lseq, i), 0);
+
+	/* Initialize Mode 5 SCRATCHPAGE to 0. */
+	asd_write_reg_byte(asd_ha, LmMnSCRATCHPAGE(lseq, 5), 0);
+
+	asd_write_reg_dword(asd_ha, LmRSPMBX(lseq), 0);
+	/* Initialize Mode 0,1,2 and 5 Interrupt Enable and
+	 * Interrupt registers. */
+	asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 0), LmM0INTEN_MASK);
+	asd_write_reg_dword(asd_ha, LmMnINT(lseq, 0), 0xFFFFFFFF);
+	/* Mode 1 */
+	asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 1), LmM1INTEN_MASK);
+	asd_write_reg_dword(asd_ha, LmMnINT(lseq, 1), 0xFFFFFFFF);
+	/* Mode 2 */
+	asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 2), LmM2INTEN_MASK);
+	asd_write_reg_dword(asd_ha, LmMnINT(lseq, 2), 0xFFFFFFFF);
+	/* Mode 5 */
+	asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 5), LmM5INTEN_MASK);
+	asd_write_reg_dword(asd_ha, LmMnINT(lseq, 5), 0xFFFFFFFF);
+
+	/* Enable HW Timer status. */
+	asd_write_reg_byte(asd_ha, LmHWTSTATEN(lseq), LmHWTSTATEN_MASK);
+
+	/* Enable Primitive Status 0 and 1. */
+	asd_write_reg_dword(asd_ha, LmPRIMSTAT0EN(lseq), LmPRIMSTAT0EN_MASK);
+	asd_write_reg_dword(asd_ha, LmPRIMSTAT1EN(lseq), LmPRIMSTAT1EN_MASK);
+
+	/* Enable Frame Error. */
+	asd_write_reg_dword(asd_ha, LmFRMERREN(lseq), LmFRMERREN_MASK);
+	asd_write_reg_byte(asd_ha, LmMnHOLDLVL(lseq, 0), 0x50);
+
+	/* Initialize Mode 0 Transfer Level to 512. */
+	asd_write_reg_byte(asd_ha,  LmMnXFRLVL(lseq, 0), LmMnXFRLVL_512);
+	/* Initialize Mode 1 Transfer Level to 256. */
+	asd_write_reg_byte(asd_ha, LmMnXFRLVL(lseq, 1), LmMnXFRLVL_256);
+
+	/* Initialize Program Count. */
+	asd_write_reg_word(asd_ha, LmPRGMCNT(lseq), lseq_idle_loop);
+
+	/* Enable Blind SG Move. */
+	asd_write_reg_dword(asd_ha, LmMODECTL(lseq), LmBLIND48);
+	asd_write_reg_word(asd_ha, LmM3SATATIMER(lseq),
+			   ASD_SATA_INTERLOCK_TIMEOUT);
+
+	(void) asd_read_reg_dword(asd_ha, LmREQMBX(lseq));
+
+	/* Clear Primitive Status 0 and 1. */
+	asd_write_reg_dword(asd_ha, LmPRMSTAT0(lseq), 0xFFFFFFFF);
+	asd_write_reg_dword(asd_ha, LmPRMSTAT1(lseq), 0xFFFFFFFF);
+
+	/* Clear HW Timer status. */
+	asd_write_reg_byte(asd_ha, LmHWTSTAT(lseq), 0xFF);
+
+	/* Clear DMA Errors for Mode 0 and 1. */
+	asd_write_reg_byte(asd_ha, LmMnDMAERRS(lseq, 0), 0xFF);
+	asd_write_reg_byte(asd_ha, LmMnDMAERRS(lseq, 1), 0xFF);
+
+	/* Clear SG DMA Errors for Mode 0 and 1. */
+	asd_write_reg_byte(asd_ha, LmMnSGDMAERRS(lseq, 0), 0xFF);
+	asd_write_reg_byte(asd_ha, LmMnSGDMAERRS(lseq, 1), 0xFF);
+
+	/* Clear Mode 0 Buffer Parity Error. */
+	asd_write_reg_byte(asd_ha, LmMnBUFSTAT(lseq, 0), LmMnBUFPERR);
+
+	/* Clear Mode 0 Frame Error register. */
+	asd_write_reg_dword(asd_ha, LmMnFRMERR(lseq, 0), 0xFFFFFFFF);
+
+	/* Reset LSEQ external interrupt arbiter. */
+	asd_write_reg_byte(asd_ha, LmARP2INTCTL(lseq), RSTINTCTL);
+
+	/* Set the Phy SAS for the LmSEQ WWN. */
+	sas_addr = asd_ha->phys[lseq].phy_desc->sas_addr;
+	for (i = 0; i < SAS_ADDR_SIZE; i++)
+		asd_write_reg_byte(asd_ha, LmWWN(lseq) + i, sas_addr[i]);
+
+	/* Set the Transmit Size to 1024 bytes, 0 = 256 Dwords. */
+	asd_write_reg_byte(asd_ha, LmMnXMTSIZE(lseq, 1), 0);
+
+	/* Set the Bus Inactivity Time Limit Timer. */
+	asd_write_reg_word(asd_ha, LmBITL_TIMER(lseq), 9);
+
+	/* Enable SATA Port Multiplier. */
+	asd_write_reg_byte(asd_ha, LmMnSATAFS(lseq, 1), 0x80);
+
+	/* Initialize Interrupt Vector[0-10] address in Mode 3.
+	 * See the comment on CSEQ_INT_* */
+	asd_write_reg_word(asd_ha, LmM3INTVEC0(lseq), lseq_vecs[0]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC1(lseq), lseq_vecs[1]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC2(lseq), lseq_vecs[2]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC3(lseq), lseq_vecs[3]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC4(lseq), lseq_vecs[4]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC5(lseq), lseq_vecs[5]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC6(lseq), lseq_vecs[6]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC7(lseq), lseq_vecs[7]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC8(lseq), lseq_vecs[8]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC9(lseq), lseq_vecs[9]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC10(lseq), lseq_vecs[10]);
+	/*
+	 * Program the Link LED control, applicable only for
+	 * Chip Rev. B or later.
+	 */
+	asd_write_reg_dword(asd_ha, LmCONTROL(lseq),
+			    (LEDTIMER | LEDMODE_TXRX | LEDTIMERS_100ms));
+
+	/* Set the Align Rate for SAS and STP mode. */
+	asd_write_reg_byte(asd_ha, LmM1SASALIGN(lseq), SAS_ALIGN_DEFAULT);
+	asd_write_reg_byte(asd_ha, LmM1STPALIGN(lseq), STP_ALIGN_DEFAULT);
+}
+
+
+/**
+ * asd_post_init_cseq -- clear CSEQ Mode n Int. status and Response mailbox
+ * @asd_ha: pointer to host adapter struct
+ */
+static void asd_post_init_cseq(struct asd_ha_struct *asd_ha)
+{
+	int i;
+
+	for (i = 0; i < 8; i++)
+		asd_write_reg_dword(asd_ha, CMnINT(i), 0xFFFFFFFF);
+	for (i = 0; i < 8; i++)
+		asd_read_reg_dword(asd_ha, CMnRSPMBX(i));
+	/* Reset the external interrupt arbiter. */
+	asd_write_reg_byte(asd_ha, CARP2INTCTL, RSTINTCTL);
+}
+
+/**
+ * asd_init_ddb_0 -- initialize DDB 0
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Initialize DDB site 0 which is used internally by the sequencer.
+ */
+static void asd_init_ddb_0(struct asd_ha_struct *asd_ha)
+{
+	int	i;
+
+	/* Zero out the DDB explicitly */
+	for (i = 0; i < sizeof(struct asd_ddb_seq_shared); i+=4)
+		asd_ddbsite_write_dword(asd_ha, 0, i, 0);
+
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, q_free_ddb_head), 0);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, q_free_ddb_tail),
+			       asd_ha->hw_prof.max_ddbs-1);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, q_free_ddb_cnt), 0);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, q_used_ddb_head), 0xFFFF);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, q_used_ddb_tail), 0xFFFF);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, shared_mem_lock), 0);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, smp_conn_tag), 0);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, est_nexus_buf_cnt), 0);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, est_nexus_buf_thresh),
+			       asd_ha->hw_prof.num_phys * 2);
+	asd_ddbsite_write_byte(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, settable_max_contexts),0);
+	asd_ddbsite_write_byte(asd_ha, 0,
+	       offsetof(struct asd_ddb_seq_shared, conn_not_active), 0xFF);
+	asd_ddbsite_write_byte(asd_ha, 0,
+	       offsetof(struct asd_ddb_seq_shared, phy_is_up), 0x00);
+	/* DDB 0 is reserved */
+	set_bit(0, asd_ha->hw_prof.ddb_bitmap);
+}
+
+/**
+ * asd_seq_setup_seqs -- setup and initialize central and link sequencers
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_seq_setup_seqs(struct asd_ha_struct *asd_ha)
+{
+	int 		lseq;
+	u8		lseq_mask;
+
+	/* Initialize SCB sites. Done first to compute some values which
+	 * the rest of the init code depends on. */
+	asd_init_scb_sites(asd_ha);
+
+	/* Initialize CSEQ Scratch RAM registers. */
+	asd_init_cseq_scratch(asd_ha);
+
+	/* Initialize LmSEQ Scratch RAM registers. */
+	asd_init_lseq_scratch(asd_ha);
+
+	/* Initialize CSEQ CIO registers. */
+	asd_init_cseq_cio(asd_ha);
+
+	asd_init_ddb_0(asd_ha);
+
+	/* Initialize LmSEQ CIO registers. */
+	lseq_mask = asd_ha->hw_prof.enabled_phys;
+	for_each_sequencer(lseq_mask, lseq_mask, lseq)
+		asd_init_lseq_cio(asd_ha, lseq);
+	asd_post_init_cseq(asd_ha);
+}
+
+
+/**
+ * asd_seq_start_cseq -- start the central sequencer, CSEQ
+ * @asd_ha: pointer to host adapter structure
+ */
+static int asd_seq_start_cseq(struct asd_ha_struct *asd_ha)
+{
+	/* Reset the ARP2 instruction to location zero. */
+	asd_write_reg_word(asd_ha, CPRGMCNT, cseq_idle_loop);
+
+	/* Unpause the CSEQ  */
+	return asd_unpause_cseq(asd_ha);
+}
+
+/**
+ * asd_seq_start_lseq -- start a link sequencer
+ * @asd_ha: pointer to host adapter structure
+ * @lseq: the link sequencer of interest
+ */
+static int asd_seq_start_lseq(struct asd_ha_struct *asd_ha, int lseq)
+{
+	/* Reset the ARP2 instruction to location zero. */
+	asd_write_reg_word(asd_ha, LmPRGMCNT(lseq), lseq_idle_loop);
+
+	/* Unpause the LmSEQ  */
+	return asd_seq_unpause_lseq(asd_ha, lseq);
+}
+
+static int asd_request_firmware(struct asd_ha_struct *asd_ha)
+{
+	int err, i;
+	struct sequencer_file_header header, *hdr_ptr;
+	u32 csum = 0;
+	u16 *ptr_cseq_vecs, *ptr_lseq_vecs;
+
+	if (sequencer_fw)
+		/* already loaded */
+		return 0;
+
+	err = request_firmware(&sequencer_fw,
+			       SAS_RAZOR_SEQUENCER_FW_FILE,
+			       &asd_ha->pcidev->dev);
+	if (err)
+		return err;
+
+	hdr_ptr = (struct sequencer_file_header *)sequencer_fw->data;
+
+	header.csum = le32_to_cpu(hdr_ptr->csum);
+	header.major = le32_to_cpu(hdr_ptr->major);
+	header.minor = le32_to_cpu(hdr_ptr->minor);
+	sequencer_version = hdr_ptr->version;
+	header.cseq_table_offset = le32_to_cpu(hdr_ptr->cseq_table_offset);
+	header.cseq_table_size = le32_to_cpu(hdr_ptr->cseq_table_size);
+	header.lseq_table_offset = le32_to_cpu(hdr_ptr->lseq_table_offset);
+	header.lseq_table_size = le32_to_cpu(hdr_ptr->lseq_table_size);
+	header.cseq_code_offset = le32_to_cpu(hdr_ptr->cseq_code_offset);
+	header.cseq_code_size = le32_to_cpu(hdr_ptr->cseq_code_size);
+	header.lseq_code_offset = le32_to_cpu(hdr_ptr->lseq_code_offset);
+	header.lseq_code_size = le32_to_cpu(hdr_ptr->lseq_code_size);
+	header.mode2_task = le16_to_cpu(hdr_ptr->mode2_task);
+	header.cseq_idle_loop = le16_to_cpu(hdr_ptr->cseq_idle_loop);
+	header.lseq_idle_loop = le16_to_cpu(hdr_ptr->lseq_idle_loop);
+
+	for (i = sizeof(header.csum); i < sequencer_fw->size; i++)
+		csum += sequencer_fw->data[i];
+
+	if (csum != header.csum) {
+		asd_printk("Firmware file checksum mismatch\n");
+		return -EINVAL;
+	}
+
+	if (header.cseq_table_size != CSEQ_NUM_VECS ||
+	    header.lseq_table_size != LSEQ_NUM_VECS) {
+		asd_printk("Firmware file table size mismatch\n");
+		return -EINVAL;
+	}
+
+	ptr_cseq_vecs = (u16 *)&sequencer_fw->data[header.cseq_table_offset];
+	ptr_lseq_vecs = (u16 *)&sequencer_fw->data[header.lseq_table_offset];
+	mode2_task = header.mode2_task;
+	cseq_idle_loop = header.cseq_idle_loop;
+	lseq_idle_loop = header.lseq_idle_loop;
+
+	for (i = 0; i < CSEQ_NUM_VECS; i++)
+		cseq_vecs[i] = le16_to_cpu(ptr_cseq_vecs[i]);
+
+	for (i = 0; i < LSEQ_NUM_VECS; i++)
+		lseq_vecs[i] = le16_to_cpu(ptr_lseq_vecs[i]);
+
+	cseq_code = &sequencer_fw->data[header.cseq_code_offset];
+	cseq_code_size = header.cseq_code_size;
+	lseq_code = &sequencer_fw->data[header.lseq_code_offset];
+	lseq_code_size = header.lseq_code_size;
+
+	return 0;
+}
+
+int asd_init_seqs(struct asd_ha_struct *asd_ha)
+{
+	int err;
+
+	err = asd_request_firmware(asd_ha);
+
+	if (err) {
+		asd_printk("Failed to load sequencer firmware file %s, error %d\n",
+			   SAS_RAZOR_SEQUENCER_FW_FILE, err);
+		return err;
+	}
+
+	asd_printk("using sequencer %s\n", sequencer_version);
+	err = asd_seq_download_seqs(asd_ha);
+	if (err) {
+		asd_printk("couldn't download sequencers for %s\n",
+			   pci_name(asd_ha->pcidev));
+		return err;
+	}
+
+	asd_seq_setup_seqs(asd_ha);
+
+	return 0;
+}
+
+int asd_start_seqs(struct asd_ha_struct *asd_ha)
+{
+	int err;
+	u8  lseq_mask;
+	int lseq;
+
+	err = asd_seq_start_cseq(asd_ha);
+	if (err) {
+		asd_printk("couldn't start CSEQ for %s\n",
+			   pci_name(asd_ha->pcidev));
+		return err;
+	}
+
+	lseq_mask = asd_ha->hw_prof.enabled_phys;
+	for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+		err = asd_seq_start_lseq(asd_ha, lseq);
+		if (err) {
+			asd_printk("coudln't start LSEQ %d for %s\n", lseq,
+				   pci_name(asd_ha->pcidev));
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * asd_update_port_links -- update port_map_by_links and phy_is_up
+ * @sas_phy: pointer to the phy which has been added to a port
+ *
+ * 1) When a link reset has completed and we got BYTES DMAED with a
+ * valid frame we call this function for that phy, to indicate that
+ * the phy is up, i.e. we update the phy_is_up in DDB 0.  The
+ * sequencer checks phy_is_up when pending SCBs are to be sent, and
+ * when an open address frame has been received.
+ *
+ * 2) When we know of ports, we call this function to update the map
+ * of phys participaing in that port, i.e. we update the
+ * port_map_by_links in DDB 0.  When a HARD_RESET primitive has been
+ * received, the sequencer disables all phys in that port.
+ * port_map_by_links is also used as the conn_mask byte in the
+ * initiator/target port DDB.
+ */
+void asd_update_port_links(struct asd_sas_phy *sas_phy)
+{
+	struct asd_ha_struct *asd_ha = sas_phy->ha->lldd_ha;
+	const u8 phy_mask = (u8) sas_phy->port->phy_mask;
+	u8  phy_is_up;
+	u8  mask;
+	int i, err;
+
+	for_each_phy(phy_mask, mask, i)
+		asd_ddbsite_write_byte(asd_ha, 0,
+				       offsetof(struct asd_ddb_seq_shared,
+						port_map_by_links)+i,phy_mask);
+
+	for (i = 0; i < 12; i++) {
+		phy_is_up = asd_ddbsite_read_byte(asd_ha, 0,
+			  offsetof(struct asd_ddb_seq_shared, phy_is_up));
+		err = asd_ddbsite_update_byte(asd_ha, 0,
+				offsetof(struct asd_ddb_seq_shared, phy_is_up),
+				phy_is_up,
+				phy_is_up | phy_mask);
+		if (!err)
+			break;
+		else if (err == -EFAULT) {
+			asd_printk("phy_is_up: parity error in DDB 0\n");
+			break;
+		}
+	}
+
+	if (err)
+		asd_printk("couldn't update DDB 0:error:%d\n", err);
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_seq.h b/drivers/scsi/aic94xx/aic94xx_seq.h
new file mode 100644
index 0000000000000..42281c36153bb
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_seq.h
@@ -0,0 +1,70 @@
+/*
+ * Aic94xx SAS/SATA driver sequencer interface header file.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef _AIC94XX_SEQ_H_
+#define _AIC94XX_SEQ_H_
+
+#define CSEQ_NUM_VECS	3
+#define LSEQ_NUM_VECS	11
+
+#define SAS_RAZOR_SEQUENCER_FW_FILE "aic94xx-seq.fw"
+
+/* Note:  All quantites in the sequencer file are little endian */
+struct sequencer_file_header {
+	/* Checksum of the entire contents of the sequencer excluding
+	 * these four bytes */
+	u32	csum;
+	/* numeric major version */
+	u32	major;
+	/* numeric minor version */
+	u32	minor;
+	/* version string printed by driver */
+	char	version[16];
+	u32	cseq_table_offset;
+	u32	cseq_table_size;
+	u32	lseq_table_offset;
+	u32	lseq_table_size;
+	u32	cseq_code_offset;
+	u32	cseq_code_size;
+	u32	lseq_code_offset;
+	u32	lseq_code_size;
+	u16	mode2_task;
+	u16	cseq_idle_loop;
+	u16	lseq_idle_loop;
+} __attribute__((packed));
+
+#ifdef __KERNEL__
+int asd_pause_cseq(struct asd_ha_struct *asd_ha);
+int asd_unpause_cseq(struct asd_ha_struct *asd_ha);
+int asd_pause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask);
+int asd_unpause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask);
+int asd_init_seqs(struct asd_ha_struct *asd_ha);
+int asd_start_seqs(struct asd_ha_struct *asd_ha);
+
+void asd_update_port_links(struct asd_sas_phy *phy);
+#endif
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_task.c b/drivers/scsi/aic94xx/aic94xx_task.c
new file mode 100644
index 0000000000000..285e70dae933d
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_task.c
@@ -0,0 +1,642 @@
+/*
+ * Aic94xx SAS/SATA Tasks
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/spinlock.h>
+#include "aic94xx.h"
+#include "aic94xx_sas.h"
+#include "aic94xx_hwi.h"
+
+static void asd_unbuild_ata_ascb(struct asd_ascb *a);
+static void asd_unbuild_smp_ascb(struct asd_ascb *a);
+static void asd_unbuild_ssp_ascb(struct asd_ascb *a);
+
+static inline void asd_can_dequeue(struct asd_ha_struct *asd_ha, int num)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags);
+	asd_ha->seq.can_queue += num;
+	spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+}
+
+/* PCI_DMA_... to our direction translation.
+ */
+static const u8 data_dir_flags[] = {
+	[PCI_DMA_BIDIRECTIONAL] = DATA_DIR_BYRECIPIENT,	/* UNSPECIFIED */
+	[PCI_DMA_TODEVICE]      = DATA_DIR_OUT, /* OUTBOUND */
+	[PCI_DMA_FROMDEVICE]    = DATA_DIR_IN, /* INBOUND */
+	[PCI_DMA_NONE]          = DATA_DIR_NONE, /* NO TRANSFER */
+};
+
+static inline int asd_map_scatterlist(struct sas_task *task,
+				      struct sg_el *sg_arr,
+				      unsigned long gfp_flags)
+{
+	struct asd_ascb *ascb = task->lldd_task;
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct scatterlist *sc;
+	int num_sg, res;
+
+	if (task->data_dir == PCI_DMA_NONE)
+		return 0;
+
+	if (task->num_scatter == 0) {
+		void *p = task->scatter;
+		dma_addr_t dma = pci_map_single(asd_ha->pcidev, p,
+						task->total_xfer_len,
+						task->data_dir);
+		sg_arr[0].bus_addr = cpu_to_le64((u64)dma);
+		sg_arr[0].size = cpu_to_le32(task->total_xfer_len);
+		sg_arr[0].flags |= ASD_SG_EL_LIST_EOL;
+		return 0;
+	}
+
+	num_sg = pci_map_sg(asd_ha->pcidev, task->scatter, task->num_scatter,
+			    task->data_dir);
+	if (num_sg == 0)
+		return -ENOMEM;
+
+	if (num_sg > 3) {
+		int i;
+
+		ascb->sg_arr = asd_alloc_coherent(asd_ha,
+						  num_sg*sizeof(struct sg_el),
+						  gfp_flags);
+		if (!ascb->sg_arr) {
+			res = -ENOMEM;
+			goto err_unmap;
+		}
+		for (sc = task->scatter, i = 0; i < num_sg; i++, sc++) {
+			struct sg_el *sg =
+				&((struct sg_el *)ascb->sg_arr->vaddr)[i];
+			sg->bus_addr = cpu_to_le64((u64)sg_dma_address(sc));
+			sg->size = cpu_to_le32((u32)sg_dma_len(sc));
+			if (i == num_sg-1)
+				sg->flags |= ASD_SG_EL_LIST_EOL;
+		}
+
+		for (sc = task->scatter, i = 0; i < 2; i++, sc++) {
+			sg_arr[i].bus_addr =
+				cpu_to_le64((u64)sg_dma_address(sc));
+			sg_arr[i].size = cpu_to_le32((u32)sg_dma_len(sc));
+		}
+		sg_arr[1].next_sg_offs = 2 * sizeof(*sg_arr);
+		sg_arr[1].flags |= ASD_SG_EL_LIST_EOS;
+
+		memset(&sg_arr[2], 0, sizeof(*sg_arr));
+		sg_arr[2].bus_addr=cpu_to_le64((u64)ascb->sg_arr->dma_handle);
+	} else {
+		int i;
+		for (sc = task->scatter, i = 0; i < num_sg; i++, sc++) {
+			sg_arr[i].bus_addr =
+				cpu_to_le64((u64)sg_dma_address(sc));
+			sg_arr[i].size = cpu_to_le32((u32)sg_dma_len(sc));
+		}
+		sg_arr[i-1].flags |= ASD_SG_EL_LIST_EOL;
+	}
+
+	return 0;
+err_unmap:
+	pci_unmap_sg(asd_ha->pcidev, task->scatter, task->num_scatter,
+		     task->data_dir);
+	return res;
+}
+
+static inline void asd_unmap_scatterlist(struct asd_ascb *ascb)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct sas_task *task = ascb->uldd_task;
+
+	if (task->data_dir == PCI_DMA_NONE)
+		return;
+
+	if (task->num_scatter == 0) {
+		dma_addr_t dma = (dma_addr_t)
+		       le64_to_cpu(ascb->scb->ssp_task.sg_element[0].bus_addr);
+		pci_unmap_single(ascb->ha->pcidev, dma, task->total_xfer_len,
+				 task->data_dir);
+		return;
+	}
+
+	asd_free_coherent(asd_ha, ascb->sg_arr);
+	pci_unmap_sg(asd_ha->pcidev, task->scatter, task->num_scatter,
+		     task->data_dir);
+}
+
+/* ---------- Task complete tasklet ---------- */
+
+static void asd_get_response_tasklet(struct asd_ascb *ascb,
+				     struct done_list_struct *dl)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct sas_task *task = ascb->uldd_task;
+	struct task_status_struct *ts = &task->task_status;
+	unsigned long flags;
+	struct tc_resp_sb_struct {
+		__le16 index_escb;
+		u8     len_lsb;
+		u8     flags;
+	} __attribute__ ((packed)) *resp_sb = (void *) dl->status_block;
+
+/* 	int  size   = ((resp_sb->flags & 7) << 8) | resp_sb->len_lsb; */
+	int  edb_id = ((resp_sb->flags & 0x70) >> 4)-1;
+	struct asd_ascb *escb;
+	struct asd_dma_tok *edb;
+	void *r;
+
+	spin_lock_irqsave(&asd_ha->seq.tc_index_lock, flags);
+	escb = asd_tc_index_find(&asd_ha->seq,
+				 (int)le16_to_cpu(resp_sb->index_escb));
+	spin_unlock_irqrestore(&asd_ha->seq.tc_index_lock, flags);
+
+	if (!escb) {
+		ASD_DPRINTK("Uh-oh! No escb for this dl?!\n");
+		return;
+	}
+
+	ts->buf_valid_size = 0;
+	edb = asd_ha->seq.edb_arr[edb_id + escb->edb_index];
+	r = edb->vaddr;
+	if (task->task_proto == SAS_PROTO_SSP) {
+		struct ssp_response_iu *iu =
+			r + 16 + sizeof(struct ssp_frame_hdr);
+
+		ts->residual = le32_to_cpu(*(__le32 *)r);
+		ts->resp = SAS_TASK_COMPLETE;
+		if (iu->datapres == 0)
+			ts->stat = iu->status;
+		else if (iu->datapres == 1)
+			ts->stat = iu->resp_data[3];
+		else if (iu->datapres == 2) {
+			ts->stat = SAM_CHECK_COND;
+			ts->buf_valid_size = min((u32) SAS_STATUS_BUF_SIZE,
+					 be32_to_cpu(iu->sense_data_len));
+			memcpy(ts->buf, iu->sense_data, ts->buf_valid_size);
+			if (iu->status != SAM_CHECK_COND) {
+				ASD_DPRINTK("device %llx sent sense data, but "
+					    "stat(0x%x) is not CHECK_CONDITION"
+					    "\n",
+					    SAS_ADDR(task->dev->sas_addr),
+					    ts->stat);
+			}
+		}
+	}  else {
+		struct ata_task_resp *resp = (void *) &ts->buf[0];
+
+		ts->residual = le32_to_cpu(*(__le32 *)r);
+
+		if (SAS_STATUS_BUF_SIZE >= sizeof(*resp)) {
+			resp->frame_len = le16_to_cpu(*(__le16 *)(r+6));
+			memcpy(&resp->ending_fis[0], r+16, 24);
+			ts->buf_valid_size = sizeof(*resp);
+		}
+	}
+
+	asd_invalidate_edb(escb, edb_id);
+}
+
+static void asd_task_tasklet_complete(struct asd_ascb *ascb,
+				      struct done_list_struct *dl)
+{
+	struct sas_task *task = ascb->uldd_task;
+	struct task_status_struct *ts = &task->task_status;
+	unsigned long flags;
+	u8 opcode = dl->opcode;
+
+	asd_can_dequeue(ascb->ha, 1);
+
+Again:
+	switch (opcode) {
+	case TC_NO_ERROR:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAM_GOOD;
+		break;
+	case TC_UNDERRUN:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_UNDERRUN;
+		ts->residual = le32_to_cpu(*(__le32 *)dl->status_block);
+		break;
+	case TC_OVERRUN:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		ts->residual = 0;
+		break;
+	case TC_SSP_RESP:
+	case TC_ATA_RESP:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_PROTO_RESPONSE;
+		asd_get_response_tasklet(ascb, dl);
+		break;
+	case TF_OPEN_REJECT:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_OPEN_REJECT;
+		if (dl->status_block[1] & 2)
+			ts->open_rej_reason = 1 + dl->status_block[2];
+		else if (dl->status_block[1] & 1)
+			ts->open_rej_reason = (dl->status_block[2] >> 4)+10;
+		else
+			ts->open_rej_reason = SAS_OREJ_UNKNOWN;
+		break;
+	case TF_OPEN_TO:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_OPEN_TO;
+		break;
+	case TF_PHY_DOWN:
+	case TU_PHY_DOWN:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_PHY_DOWN;
+		break;
+	case TI_PHY_DOWN:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_PHY_DOWN;
+		break;
+	case TI_BREAK:
+	case TI_PROTO_ERR:
+	case TI_NAK:
+	case TI_ACK_NAK_TO:
+	case TF_SMP_XMIT_RCV_ERR:
+	case TC_ATA_R_ERR_RECV:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_INTERRUPTED;
+		break;
+	case TF_BREAK:
+	case TU_BREAK:
+	case TU_ACK_NAK_TO:
+	case TF_SMPRSP_TO:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		break;
+	case TF_NAK_RECV:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_NAK_R_ERR;
+		break;
+	case TA_I_T_NEXUS_LOSS:
+		opcode = dl->status_block[0];
+		goto Again;
+		break;
+	case TF_INV_CONN_HANDLE:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_DEVICE_UNKNOWN;
+		break;
+	case TF_REQUESTED_N_PENDING:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_PENDING;
+		break;
+	case TC_TASK_CLEARED:
+	case TA_ON_REQ:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_ABORTED_TASK;
+		break;
+
+	case TF_NO_SMP_CONN:
+	case TF_TMF_NO_CTX:
+	case TF_TMF_NO_TAG:
+	case TF_TMF_TAG_FREE:
+	case TF_TMF_TASK_DONE:
+	case TF_TMF_NO_CONN_HANDLE:
+	case TF_IRTT_TO:
+	case TF_IU_SHORT:
+	case TF_DATA_OFFS_ERR:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		break;
+
+	case TC_LINK_ADM_RESP:
+	case TC_CONTROL_PHY:
+	case TC_RESUME:
+	case TC_PARTIAL_SG_LIST:
+	default:
+		ASD_DPRINTK("%s: dl opcode: 0x%x?\n", __FUNCTION__, opcode);
+		break;
+	}
+
+	switch (task->task_proto) {
+	case SATA_PROTO:
+	case SAS_PROTO_STP:
+		asd_unbuild_ata_ascb(ascb);
+		break;
+	case SAS_PROTO_SMP:
+		asd_unbuild_smp_ascb(ascb);
+		break;
+	case SAS_PROTO_SSP:
+		asd_unbuild_ssp_ascb(ascb);
+	default:
+		break;
+	}
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	task->task_state_flags &= ~SAS_TASK_STATE_PENDING;
+	task->task_state_flags |= SAS_TASK_STATE_DONE;
+	if (unlikely((task->task_state_flags & SAS_TASK_STATE_ABORTED))) {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		ASD_DPRINTK("task 0x%p done with opcode 0x%x resp 0x%x "
+			    "stat 0x%x but aborted by upper layer!\n",
+			    task, opcode, ts->resp, ts->stat);
+		complete(&ascb->completion);
+	} else {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		task->lldd_task = NULL;
+		asd_ascb_free(ascb);
+		mb();
+		task->task_done(task);
+	}
+}
+
+/* ---------- ATA ---------- */
+
+static int asd_build_ata_ascb(struct asd_ascb *ascb, struct sas_task *task,
+			      unsigned long gfp_flags)
+{
+	struct domain_device *dev = task->dev;
+	struct scb *scb;
+	u8     flags;
+	int    res = 0;
+
+	scb = ascb->scb;
+
+	if (unlikely(task->ata_task.device_control_reg_update))
+		scb->header.opcode = CONTROL_ATA_DEV;
+	else if (dev->sata_dev.command_set == ATA_COMMAND_SET)
+		scb->header.opcode = INITIATE_ATA_TASK;
+	else
+		scb->header.opcode = INITIATE_ATAPI_TASK;
+
+	scb->ata_task.proto_conn_rate = (1 << 5); /* STP */
+	if (dev->port->oob_mode == SAS_OOB_MODE)
+		scb->ata_task.proto_conn_rate |= dev->linkrate;
+
+	scb->ata_task.total_xfer_len = cpu_to_le32(task->total_xfer_len);
+	scb->ata_task.fis = task->ata_task.fis;
+	scb->ata_task.fis.fis_type = 0x27;
+	if (likely(!task->ata_task.device_control_reg_update))
+		scb->ata_task.fis.flags |= 0x80; /* C=1: update ATA cmd reg */
+	scb->ata_task.fis.flags &= 0xF0; /* PM_PORT field shall be 0 */
+	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET)
+		memcpy(scb->ata_task.atapi_packet, task->ata_task.atapi_packet,
+		       16);
+	scb->ata_task.sister_scb = cpu_to_le16(0xFFFF);
+	scb->ata_task.conn_handle = cpu_to_le16(
+		(u16)(unsigned long)dev->lldd_dev);
+
+	if (likely(!task->ata_task.device_control_reg_update)) {
+		flags = 0;
+		if (task->ata_task.dma_xfer)
+			flags |= DATA_XFER_MODE_DMA;
+		if (task->ata_task.use_ncq &&
+		    dev->sata_dev.command_set != ATAPI_COMMAND_SET)
+			flags |= ATA_Q_TYPE_NCQ;
+		flags |= data_dir_flags[task->data_dir];
+		scb->ata_task.ata_flags = flags;
+
+		scb->ata_task.retry_count = task->ata_task.retry_count;
+
+		flags = 0;
+		if (task->ata_task.set_affil_pol)
+			flags |= SET_AFFIL_POLICY;
+		if (task->ata_task.stp_affil_pol)
+			flags |= STP_AFFIL_POLICY;
+		scb->ata_task.flags = flags;
+	}
+	ascb->tasklet_complete = asd_task_tasklet_complete;
+
+	if (likely(!task->ata_task.device_control_reg_update))
+		res = asd_map_scatterlist(task, scb->ata_task.sg_element,
+					  gfp_flags);
+
+	return res;
+}
+
+static void asd_unbuild_ata_ascb(struct asd_ascb *a)
+{
+	asd_unmap_scatterlist(a);
+}
+
+/* ---------- SMP ---------- */
+
+static int asd_build_smp_ascb(struct asd_ascb *ascb, struct sas_task *task,
+			      unsigned long gfp_flags)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct domain_device *dev = task->dev;
+	struct scb *scb;
+
+	pci_map_sg(asd_ha->pcidev, &task->smp_task.smp_req, 1,
+		   PCI_DMA_FROMDEVICE);
+	pci_map_sg(asd_ha->pcidev, &task->smp_task.smp_resp, 1,
+		   PCI_DMA_FROMDEVICE);
+
+	scb = ascb->scb;
+
+	scb->header.opcode = INITIATE_SMP_TASK;
+
+	scb->smp_task.proto_conn_rate = dev->linkrate;
+
+	scb->smp_task.smp_req.bus_addr =
+		cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_req));
+	scb->smp_task.smp_req.size =
+		cpu_to_le32((u32)sg_dma_len(&task->smp_task.smp_req)-4);
+
+	scb->smp_task.smp_resp.bus_addr =
+		cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_resp));
+	scb->smp_task.smp_resp.size =
+		cpu_to_le32((u32)sg_dma_len(&task->smp_task.smp_resp)-4);
+
+	scb->smp_task.sister_scb = cpu_to_le16(0xFFFF);
+	scb->smp_task.conn_handle = cpu_to_le16((u16)
+						(unsigned long)dev->lldd_dev);
+
+	ascb->tasklet_complete = asd_task_tasklet_complete;
+
+	return 0;
+}
+
+static void asd_unbuild_smp_ascb(struct asd_ascb *a)
+{
+	struct sas_task *task = a->uldd_task;
+
+	BUG_ON(!task);
+	pci_unmap_sg(a->ha->pcidev, &task->smp_task.smp_req, 1,
+		     PCI_DMA_FROMDEVICE);
+	pci_unmap_sg(a->ha->pcidev, &task->smp_task.smp_resp, 1,
+		     PCI_DMA_FROMDEVICE);
+}
+
+/* ---------- SSP ---------- */
+
+static int asd_build_ssp_ascb(struct asd_ascb *ascb, struct sas_task *task,
+			      unsigned long gfp_flags)
+{
+	struct domain_device *dev = task->dev;
+	struct scb *scb;
+	int    res = 0;
+
+	scb = ascb->scb;
+
+	scb->header.opcode = INITIATE_SSP_TASK;
+
+	scb->ssp_task.proto_conn_rate  = (1 << 4); /* SSP */
+	scb->ssp_task.proto_conn_rate |= dev->linkrate;
+	scb->ssp_task.total_xfer_len = cpu_to_le32(task->total_xfer_len);
+	scb->ssp_task.ssp_frame.frame_type = SSP_DATA;
+	memcpy(scb->ssp_task.ssp_frame.hashed_dest_addr, dev->hashed_sas_addr,
+	       HASHED_SAS_ADDR_SIZE);
+	memcpy(scb->ssp_task.ssp_frame.hashed_src_addr,
+	       dev->port->ha->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
+	scb->ssp_task.ssp_frame.tptt = cpu_to_be16(0xFFFF);
+
+	memcpy(scb->ssp_task.ssp_cmd.lun, task->ssp_task.LUN, 8);
+	if (task->ssp_task.enable_first_burst)
+		scb->ssp_task.ssp_cmd.efb_prio_attr |= EFB_MASK;
+	scb->ssp_task.ssp_cmd.efb_prio_attr |= (task->ssp_task.task_prio << 3);
+	scb->ssp_task.ssp_cmd.efb_prio_attr |= (task->ssp_task.task_attr & 7);
+	memcpy(scb->ssp_task.ssp_cmd.cdb, task->ssp_task.cdb, 16);
+
+	scb->ssp_task.sister_scb = cpu_to_le16(0xFFFF);
+	scb->ssp_task.conn_handle = cpu_to_le16(
+		(u16)(unsigned long)dev->lldd_dev);
+	scb->ssp_task.data_dir = data_dir_flags[task->data_dir];
+	scb->ssp_task.retry_count = scb->ssp_task.retry_count;
+
+	ascb->tasklet_complete = asd_task_tasklet_complete;
+
+	res = asd_map_scatterlist(task, scb->ssp_task.sg_element, gfp_flags);
+
+	return res;
+}
+
+static void asd_unbuild_ssp_ascb(struct asd_ascb *a)
+{
+	asd_unmap_scatterlist(a);
+}
+
+/* ---------- Execute Task ---------- */
+
+static inline int asd_can_queue(struct asd_ha_struct *asd_ha, int num)
+{
+	int res = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags);
+	if ((asd_ha->seq.can_queue - num) < 0)
+		res = -SAS_QUEUE_FULL;
+	else
+		asd_ha->seq.can_queue -= num;
+	spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+
+	return res;
+}
+
+int asd_execute_task(struct sas_task *task, const int num,
+		     unsigned long gfp_flags)
+{
+	int res = 0;
+	LIST_HEAD(alist);
+	struct sas_task *t = task;
+	struct asd_ascb *ascb = NULL, *a;
+	struct asd_ha_struct *asd_ha = task->dev->port->ha->lldd_ha;
+
+	res = asd_can_queue(asd_ha, num);
+	if (res)
+		return res;
+
+	res = num;
+	ascb = asd_ascb_alloc_list(asd_ha, &res, gfp_flags);
+	if (res) {
+		res = -ENOMEM;
+		goto out_err;
+	}
+
+	__list_add(&alist, ascb->list.prev, &ascb->list);
+	list_for_each_entry(a, &alist, list) {
+		a->uldd_task = t;
+		t->lldd_task = a;
+		t = list_entry(t->list.next, struct sas_task, list);
+	}
+	list_for_each_entry(a, &alist, list) {
+		t = a->uldd_task;
+		a->uldd_timer = 1;
+		if (t->task_proto & SAS_PROTO_STP)
+			t->task_proto = SAS_PROTO_STP;
+		switch (t->task_proto) {
+		case SATA_PROTO:
+		case SAS_PROTO_STP:
+			res = asd_build_ata_ascb(a, t, gfp_flags);
+			break;
+		case SAS_PROTO_SMP:
+			res = asd_build_smp_ascb(a, t, gfp_flags);
+			break;
+		case SAS_PROTO_SSP:
+			res = asd_build_ssp_ascb(a, t, gfp_flags);
+			break;
+		default:
+			asd_printk("unknown sas_task proto: 0x%x\n",
+				   t->task_proto);
+			res = -ENOMEM;
+			break;
+		}
+		if (res)
+			goto out_err_unmap;
+	}
+	list_del_init(&alist);
+
+	res = asd_post_ascb_list(asd_ha, ascb, num);
+	if (unlikely(res)) {
+		a = NULL;
+		__list_add(&alist, ascb->list.prev, &ascb->list);
+		goto out_err_unmap;
+	}
+
+	return 0;
+out_err_unmap:
+	{
+		struct asd_ascb *b = a;
+		list_for_each_entry(a, &alist, list) {
+			if (a == b)
+				break;
+			t = a->uldd_task;
+			switch (t->task_proto) {
+			case SATA_PROTO:
+			case SAS_PROTO_STP:
+				asd_unbuild_ata_ascb(a);
+				break;
+			case SAS_PROTO_SMP:
+				asd_unbuild_smp_ascb(a);
+				break;
+			case SAS_PROTO_SSP:
+				asd_unbuild_ssp_ascb(a);
+			default:
+				break;
+			}
+			t->lldd_task = NULL;
+		}
+	}
+	list_del_init(&alist);
+out_err:
+	if (ascb)
+		asd_ascb_free_list(ascb);
+	asd_can_dequeue(asd_ha, num);
+	return res;
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_tmf.c b/drivers/scsi/aic94xx/aic94xx_tmf.c
new file mode 100644
index 0000000000000..61234384503b6
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_tmf.c
@@ -0,0 +1,636 @@
+/*
+ * Aic94xx Task Management Functions
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/spinlock.h>
+#include "aic94xx.h"
+#include "aic94xx_sas.h"
+#include "aic94xx_hwi.h"
+
+/* ---------- Internal enqueue ---------- */
+
+static int asd_enqueue_internal(struct asd_ascb *ascb,
+		void (*tasklet_complete)(struct asd_ascb *,
+					 struct done_list_struct *),
+				void (*timed_out)(unsigned long))
+{
+	int res;
+
+	ascb->tasklet_complete = tasklet_complete;
+	ascb->uldd_timer = 1;
+
+	ascb->timer.data = (unsigned long) ascb;
+	ascb->timer.function = timed_out;
+	ascb->timer.expires = jiffies + AIC94XX_SCB_TIMEOUT;
+
+	add_timer(&ascb->timer);
+
+	res = asd_post_ascb_list(ascb->ha, ascb, 1);
+	if (unlikely(res))
+		del_timer(&ascb->timer);
+	return res;
+}
+
+static inline void asd_timedout_common(unsigned long data)
+{
+	struct asd_ascb *ascb = (void *) data;
+	struct asd_seq_data *seq = &ascb->ha->seq;
+        unsigned long flags;
+
+	spin_lock_irqsave(&seq->pend_q_lock, flags);
+        seq->pending--;
+        list_del_init(&ascb->list);
+        spin_unlock_irqrestore(&seq->pend_q_lock, flags);
+}
+
+/* ---------- CLEAR NEXUS ---------- */
+
+static void asd_clear_nexus_tasklet_complete(struct asd_ascb *ascb,
+					     struct done_list_struct *dl)
+{
+	ASD_DPRINTK("%s: here\n", __FUNCTION__);
+	if (!del_timer(&ascb->timer)) {
+		ASD_DPRINTK("%s: couldn't delete timer\n", __FUNCTION__);
+		return;
+	}
+	ASD_DPRINTK("%s: opcode: 0x%x\n", __FUNCTION__, dl->opcode);
+	ascb->uldd_task = (void *) (unsigned long) dl->opcode;
+	complete(&ascb->completion);
+}
+
+static void asd_clear_nexus_timedout(unsigned long data)
+{
+	struct asd_ascb *ascb = (void *) data;
+
+	ASD_DPRINTK("%s: here\n", __FUNCTION__);
+	asd_timedout_common(data);
+	ascb->uldd_task = (void *) TMF_RESP_FUNC_FAILED;
+	complete(&ascb->completion);
+}
+
+#define CLEAR_NEXUS_PRE         \
+	ASD_DPRINTK("%s: PRE\n", __FUNCTION__); \
+        res = 1;                \
+	ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL); \
+	if (!ascb)              \
+		return -ENOMEM; \
+                                \
+	scb = ascb->scb;        \
+	scb->header.opcode = CLEAR_NEXUS
+
+#define CLEAR_NEXUS_POST        \
+	ASD_DPRINTK("%s: POST\n", __FUNCTION__); \
+	res = asd_enqueue_internal(ascb, asd_clear_nexus_tasklet_complete, \
+				   asd_clear_nexus_timedout);              \
+	if (res)                \
+		goto out_err;   \
+	ASD_DPRINTK("%s: clear nexus posted, waiting...\n", __FUNCTION__); \
+	wait_for_completion(&ascb->completion); \
+	res = (int) (unsigned long) ascb->uldd_task; \
+	if (res == TC_NO_ERROR) \
+		res = TMF_RESP_FUNC_COMPLETE;   \
+out_err:                        \
+	asd_ascb_free(ascb);    \
+	return res
+
+int asd_clear_nexus_ha(struct sas_ha_struct *sas_ha)
+{
+	struct asd_ha_struct *asd_ha = sas_ha->lldd_ha;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_ADAPTER;
+	CLEAR_NEXUS_POST;
+}
+
+int asd_clear_nexus_port(struct asd_sas_port *port)
+{
+	struct asd_ha_struct *asd_ha = port->ha->lldd_ha;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_PORT;
+	scb->clear_nexus.conn_mask = port->phy_mask;
+	CLEAR_NEXUS_POST;
+}
+
+#if 0
+static int asd_clear_nexus_I_T(struct domain_device *dev)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_I_T;
+	scb->clear_nexus.flags = SEND_Q | EXEC_Q | NOTINQ;
+	if (dev->tproto)
+		scb->clear_nexus.flags |= SUSPEND_TX;
+	scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long)
+						   dev->lldd_dev);
+	CLEAR_NEXUS_POST;
+}
+#endif
+
+static int asd_clear_nexus_I_T_L(struct domain_device *dev, u8 *lun)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_I_T_L;
+	scb->clear_nexus.flags = SEND_Q | EXEC_Q | NOTINQ;
+	if (dev->tproto)
+		scb->clear_nexus.flags |= SUSPEND_TX;
+	memcpy(scb->clear_nexus.ssp_task.lun, lun, 8);
+	scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long)
+						   dev->lldd_dev);
+	CLEAR_NEXUS_POST;
+}
+
+static int asd_clear_nexus_tag(struct sas_task *task)
+{
+	struct asd_ha_struct *asd_ha = task->dev->port->ha->lldd_ha;
+	struct asd_ascb *tascb = task->lldd_task;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_TAG;
+	memcpy(scb->clear_nexus.ssp_task.lun, task->ssp_task.LUN, 8);
+	scb->clear_nexus.ssp_task.tag = tascb->tag;
+	if (task->dev->tproto)
+		scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long)
+							  task->dev->lldd_dev);
+	CLEAR_NEXUS_POST;
+}
+
+static int asd_clear_nexus_index(struct sas_task *task)
+{
+	struct asd_ha_struct *asd_ha = task->dev->port->ha->lldd_ha;
+	struct asd_ascb *tascb = task->lldd_task;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_TRANS_CX;
+	if (task->dev->tproto)
+		scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long)
+							  task->dev->lldd_dev);
+	scb->clear_nexus.index = cpu_to_le16(tascb->tc_index);
+	CLEAR_NEXUS_POST;
+}
+
+/* ---------- TMFs ---------- */
+
+static void asd_tmf_timedout(unsigned long data)
+{
+	struct asd_ascb *ascb = (void *) data;
+
+	ASD_DPRINTK("tmf timed out\n");
+	asd_timedout_common(data);
+	ascb->uldd_task = (void *) TMF_RESP_FUNC_FAILED;
+	complete(&ascb->completion);
+}
+
+static int asd_get_tmf_resp_tasklet(struct asd_ascb *ascb,
+				    struct done_list_struct *dl)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	unsigned long flags;
+	struct tc_resp_sb_struct {
+		__le16 index_escb;
+		u8     len_lsb;
+		u8     flags;
+	} __attribute__ ((packed)) *resp_sb = (void *) dl->status_block;
+
+	int  edb_id = ((resp_sb->flags & 0x70) >> 4)-1;
+	struct asd_ascb *escb;
+	struct asd_dma_tok *edb;
+	struct ssp_frame_hdr *fh;
+	struct ssp_response_iu   *ru;
+	int res = TMF_RESP_FUNC_FAILED;
+
+	ASD_DPRINTK("tmf resp tasklet\n");
+
+	spin_lock_irqsave(&asd_ha->seq.tc_index_lock, flags);
+	escb = asd_tc_index_find(&asd_ha->seq,
+				 (int)le16_to_cpu(resp_sb->index_escb));
+	spin_unlock_irqrestore(&asd_ha->seq.tc_index_lock, flags);
+
+	if (!escb) {
+		ASD_DPRINTK("Uh-oh! No escb for this dl?!\n");
+		return res;
+	}
+
+	edb = asd_ha->seq.edb_arr[edb_id + escb->edb_index];
+	ascb->tag = *(__be16 *)(edb->vaddr+4);
+	fh = edb->vaddr + 16;
+	ru = edb->vaddr + 16 + sizeof(*fh);
+	res = ru->status;
+	if (ru->datapres == 1)	  /* Response data present */
+		res = ru->resp_data[3];
+#if 0
+	ascb->tag = fh->tag;
+#endif
+	ascb->tag_valid = 1;
+
+	asd_invalidate_edb(escb, edb_id);
+	return res;
+}
+
+static void asd_tmf_tasklet_complete(struct asd_ascb *ascb,
+				     struct done_list_struct *dl)
+{
+	if (!del_timer(&ascb->timer))
+		return;
+
+	ASD_DPRINTK("tmf tasklet complete\n");
+
+	if (dl->opcode == TC_SSP_RESP)
+		ascb->uldd_task = (void *) (unsigned long)
+			asd_get_tmf_resp_tasklet(ascb, dl);
+	else
+		ascb->uldd_task = (void *) 0xFF00 + (unsigned long) dl->opcode;
+
+	complete(&ascb->completion);
+}
+
+static inline int asd_clear_nexus(struct sas_task *task)
+{
+	int res = TMF_RESP_FUNC_FAILED;
+	struct asd_ascb *tascb = task->lldd_task;
+	unsigned long flags;
+
+	ASD_DPRINTK("task not done, clearing nexus\n");
+	if (tascb->tag_valid)
+		res = asd_clear_nexus_tag(task);
+	else
+		res = asd_clear_nexus_index(task);
+	wait_for_completion_timeout(&tascb->completion,
+				    AIC94XX_SCB_TIMEOUT);
+	ASD_DPRINTK("came back from clear nexus\n");
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE)
+		res = TMF_RESP_FUNC_COMPLETE;
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	return res;
+}
+
+/**
+ * asd_abort_task -- ABORT TASK TMF
+ * @task: the task to be aborted
+ *
+ * Before calling ABORT TASK the task state flags should be ORed with
+ * SAS_TASK_STATE_ABORTED (unless SAS_TASK_STATE_DONE is set) under
+ * the task_state_lock IRQ spinlock, then ABORT TASK *must* be called.
+ *
+ * Implements the ABORT TASK TMF, I_T_L_Q nexus.
+ * Returns: SAS TMF responses (see sas_task.h),
+ *          -ENOMEM,
+ *          -SAS_QUEUE_FULL.
+ *
+ * When ABORT TASK returns, the caller of ABORT TASK checks first the
+ * task->task_state_flags, and then the return value of ABORT TASK.
+ *
+ * If the task has task state bit SAS_TASK_STATE_DONE set, then the
+ * task was completed successfully prior to it being aborted.  The
+ * caller of ABORT TASK has responsibility to call task->task_done()
+ * xor free the task, depending on their framework.  The return code
+ * is TMF_RESP_FUNC_FAILED in this case.
+ *
+ * Else the SAS_TASK_STATE_DONE bit is not set,
+ * 	If the return code is TMF_RESP_FUNC_COMPLETE, then
+ * 		the task was aborted successfully.  The caller of
+ * 		ABORT TASK has responsibility to call task->task_done()
+ *              to finish the task, xor free the task depending on their
+ *		framework.
+ *	else
+ * 		the ABORT TASK returned some kind of error. The task
+ *              was _not_ cancelled.  Nothing can be assumed.
+ *		The caller of ABORT TASK may wish to retry.
+ */
+int asd_abort_task(struct sas_task *task)
+{
+	struct asd_ascb *tascb = task->lldd_task;
+	struct asd_ha_struct *asd_ha = tascb->ha;
+	int res = 1;
+	unsigned long flags;
+	struct asd_ascb *ascb = NULL;
+	struct scb *scb;
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		res = TMF_RESP_FUNC_COMPLETE;
+		ASD_DPRINTK("%s: task 0x%p done\n", __FUNCTION__, task);
+		goto out_done;
+	}
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL);
+	if (!ascb)
+		return -ENOMEM;
+	scb = ascb->scb;
+
+	scb->header.opcode = ABORT_TASK;
+
+	switch (task->task_proto) {
+	case SATA_PROTO:
+	case SAS_PROTO_STP:
+		scb->abort_task.proto_conn_rate = (1 << 5); /* STP */
+		break;
+	case SAS_PROTO_SSP:
+		scb->abort_task.proto_conn_rate  = (1 << 4); /* SSP */
+		scb->abort_task.proto_conn_rate |= task->dev->linkrate;
+		break;
+	case SAS_PROTO_SMP:
+		break;
+	default:
+		break;
+	}
+
+	if (task->task_proto == SAS_PROTO_SSP) {
+		scb->abort_task.ssp_frame.frame_type = SSP_TASK;
+		memcpy(scb->abort_task.ssp_frame.hashed_dest_addr,
+		       task->dev->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
+		memcpy(scb->abort_task.ssp_frame.hashed_src_addr,
+		       task->dev->port->ha->hashed_sas_addr,
+		       HASHED_SAS_ADDR_SIZE);
+		scb->abort_task.ssp_frame.tptt = cpu_to_be16(0xFFFF);
+
+		memcpy(scb->abort_task.ssp_task.lun, task->ssp_task.LUN, 8);
+		scb->abort_task.ssp_task.tmf = TMF_ABORT_TASK;
+		scb->abort_task.ssp_task.tag = cpu_to_be16(0xFFFF);
+	}
+
+	scb->abort_task.sister_scb = cpu_to_le16(0xFFFF);
+	scb->abort_task.conn_handle = cpu_to_le16(
+		(u16)(unsigned long)task->dev->lldd_dev);
+	scb->abort_task.retry_count = 1;
+	scb->abort_task.index = cpu_to_le16((u16)tascb->tc_index);
+	scb->abort_task.itnl_to = cpu_to_le16(ITNL_TIMEOUT_CONST);
+
+	res = asd_enqueue_internal(ascb, asd_tmf_tasklet_complete,
+				   asd_tmf_timedout);
+	if (res)
+		goto out;
+	wait_for_completion(&ascb->completion);
+	ASD_DPRINTK("tmf came back\n");
+
+	res = (int) (unsigned long) ascb->uldd_task;
+	tascb->tag = ascb->tag;
+	tascb->tag_valid = ascb->tag_valid;
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		res = TMF_RESP_FUNC_COMPLETE;
+		ASD_DPRINTK("%s: task 0x%p done\n", __FUNCTION__, task);
+		goto out_done;
+	}
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	switch (res) {
+	/* The task to be aborted has been sent to the device.
+	 * We got a Response IU for the ABORT TASK TMF. */
+	case TC_NO_ERROR + 0xFF00:
+	case TMF_RESP_FUNC_COMPLETE:
+	case TMF_RESP_FUNC_FAILED:
+		res = asd_clear_nexus(task);
+		break;
+	case TMF_RESP_INVALID_FRAME:
+	case TMF_RESP_OVERLAPPED_TAG:
+	case TMF_RESP_FUNC_ESUPP:
+	case TMF_RESP_NO_LUN:
+		goto out_done; break;
+	}
+	/* In the following we assume that the managing layer
+	 * will _never_ make a mistake, when issuing ABORT TASK.
+	 */
+	switch (res) {
+	default:
+		res = asd_clear_nexus(task);
+		/* fallthrough */
+	case TC_NO_ERROR + 0xFF00:
+	case TMF_RESP_FUNC_COMPLETE:
+		break;
+	/* The task hasn't been sent to the device xor we never got
+	 * a (sane) Response IU for the ABORT TASK TMF.
+	 */
+	case TF_NAK_RECV + 0xFF00:
+		res = TMF_RESP_INVALID_FRAME;
+		break;
+	case TF_TMF_TASK_DONE + 0xFF00:	/* done but not reported yet */
+		res = TMF_RESP_FUNC_FAILED;
+		wait_for_completion_timeout(&tascb->completion,
+					    AIC94XX_SCB_TIMEOUT);
+		spin_lock_irqsave(&task->task_state_lock, flags);
+		if (task->task_state_flags & SAS_TASK_STATE_DONE)
+			res = TMF_RESP_FUNC_COMPLETE;
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		goto out_done;
+	case TF_TMF_NO_TAG + 0xFF00:
+	case TF_TMF_TAG_FREE + 0xFF00: /* the tag is in the free list */
+	case TF_TMF_NO_CONN_HANDLE + 0xFF00: /* no such device */
+		res = TMF_RESP_FUNC_COMPLETE;
+		goto out_done;
+	case TF_TMF_NO_CTX + 0xFF00: /* not in seq, or proto != SSP */
+		res = TMF_RESP_FUNC_ESUPP;
+		goto out;
+	}
+out_done:
+	if (res == TMF_RESP_FUNC_COMPLETE) {
+		task->lldd_task = NULL;
+		mb();
+		asd_ascb_free(tascb);
+	}
+out:
+	asd_ascb_free(ascb);
+	ASD_DPRINTK("task 0x%p aborted, res: 0x%x\n", task, res);
+	return res;
+}
+
+/**
+ * asd_initiate_ssp_tmf -- send a TMF to an I_T_L or I_T_L_Q nexus
+ * @dev: pointer to struct domain_device of interest
+ * @lun: pointer to u8[8] which is the LUN
+ * @tmf: the TMF to be performed (see sas_task.h or the SAS spec)
+ * @index: the transaction context of the task to be queried if QT TMF
+ *
+ * This function is used to send ABORT TASK SET, CLEAR ACA,
+ * CLEAR TASK SET, LU RESET and QUERY TASK TMFs.
+ *
+ * No SCBs should be queued to the I_T_L nexus when this SCB is
+ * pending.
+ *
+ * Returns: TMF response code (see sas_task.h or the SAS spec)
+ */
+static int asd_initiate_ssp_tmf(struct domain_device *dev, u8 *lun,
+				int tmf, int index)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	struct asd_ascb *ascb;
+	int res = 1;
+	struct scb *scb;
+
+	if (!(dev->tproto & SAS_PROTO_SSP))
+		return TMF_RESP_FUNC_ESUPP;
+
+	ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL);
+	if (!ascb)
+		return -ENOMEM;
+	scb = ascb->scb;
+
+	if (tmf == TMF_QUERY_TASK)
+		scb->header.opcode = QUERY_SSP_TASK;
+	else
+		scb->header.opcode = INITIATE_SSP_TMF;
+
+	scb->ssp_tmf.proto_conn_rate  = (1 << 4); /* SSP */
+	scb->ssp_tmf.proto_conn_rate |= dev->linkrate;
+	/* SSP frame header */
+	scb->ssp_tmf.ssp_frame.frame_type = SSP_TASK;
+	memcpy(scb->ssp_tmf.ssp_frame.hashed_dest_addr,
+	       dev->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
+	memcpy(scb->ssp_tmf.ssp_frame.hashed_src_addr,
+	       dev->port->ha->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
+	scb->ssp_tmf.ssp_frame.tptt = cpu_to_be16(0xFFFF);
+	/* SSP Task IU */
+	memcpy(scb->ssp_tmf.ssp_task.lun, lun, 8);
+	scb->ssp_tmf.ssp_task.tmf = tmf;
+
+	scb->ssp_tmf.sister_scb = cpu_to_le16(0xFFFF);
+	scb->ssp_tmf.conn_handle= cpu_to_le16((u16)(unsigned long)
+					      dev->lldd_dev);
+	scb->ssp_tmf.retry_count = 1;
+	scb->ssp_tmf.itnl_to = cpu_to_le16(ITNL_TIMEOUT_CONST);
+	if (tmf == TMF_QUERY_TASK)
+		scb->ssp_tmf.index = cpu_to_le16(index);
+
+	res = asd_enqueue_internal(ascb, asd_tmf_tasklet_complete,
+				   asd_tmf_timedout);
+	if (res)
+		goto out_err;
+	wait_for_completion(&ascb->completion);
+	res = (int) (unsigned long) ascb->uldd_task;
+
+	switch (res) {
+	case TC_NO_ERROR + 0xFF00:
+		res = TMF_RESP_FUNC_COMPLETE;
+		break;
+	case TF_NAK_RECV + 0xFF00:
+		res = TMF_RESP_INVALID_FRAME;
+		break;
+	case TF_TMF_TASK_DONE + 0xFF00:
+		res = TMF_RESP_FUNC_FAILED;
+		break;
+	case TF_TMF_NO_TAG + 0xFF00:
+	case TF_TMF_TAG_FREE + 0xFF00: /* the tag is in the free list */
+	case TF_TMF_NO_CONN_HANDLE + 0xFF00: /* no such device */
+		res = TMF_RESP_FUNC_COMPLETE;
+		break;
+	case TF_TMF_NO_CTX + 0xFF00: /* not in seq, or proto != SSP */
+		res = TMF_RESP_FUNC_ESUPP;
+		break;
+	default:
+		ASD_DPRINTK("%s: converting result 0x%x to TMF_RESP_FUNC_FAILED\n",
+			    __FUNCTION__, res);
+		res = TMF_RESP_FUNC_FAILED;
+		break;
+	}
+out_err:
+	asd_ascb_free(ascb);
+	return res;
+}
+
+int asd_abort_task_set(struct domain_device *dev, u8 *lun)
+{
+	int res = asd_initiate_ssp_tmf(dev, lun, TMF_ABORT_TASK_SET, 0);
+
+	if (res == TMF_RESP_FUNC_COMPLETE)
+		asd_clear_nexus_I_T_L(dev, lun);
+	return res;
+}
+
+int asd_clear_aca(struct domain_device *dev, u8 *lun)
+{
+	int res = asd_initiate_ssp_tmf(dev, lun, TMF_CLEAR_ACA, 0);
+
+	if (res == TMF_RESP_FUNC_COMPLETE)
+		asd_clear_nexus_I_T_L(dev, lun);
+	return res;
+}
+
+int asd_clear_task_set(struct domain_device *dev, u8 *lun)
+{
+	int res = asd_initiate_ssp_tmf(dev, lun, TMF_CLEAR_TASK_SET, 0);
+
+	if (res == TMF_RESP_FUNC_COMPLETE)
+		asd_clear_nexus_I_T_L(dev, lun);
+	return res;
+}
+
+int asd_lu_reset(struct domain_device *dev, u8 *lun)
+{
+	int res = asd_initiate_ssp_tmf(dev, lun, TMF_LU_RESET, 0);
+
+	if (res == TMF_RESP_FUNC_COMPLETE)
+		asd_clear_nexus_I_T_L(dev, lun);
+	return res;
+}
+
+/**
+ * asd_query_task -- send a QUERY TASK TMF to an I_T_L_Q nexus
+ * task: pointer to sas_task struct of interest
+ *
+ * Returns: TMF_RESP_FUNC_COMPLETE if the task is not in the task set,
+ * or TMF_RESP_FUNC_SUCC if the task is in the task set.
+ *
+ * Normally the management layer sets the task to aborted state,
+ * and then calls query task and then abort task.
+ */
+int asd_query_task(struct sas_task *task)
+{
+	struct asd_ascb *ascb = task->lldd_task;
+	int index;
+
+	if (ascb) {
+		index = ascb->tc_index;
+		return asd_initiate_ssp_tmf(task->dev, task->ssp_task.LUN,
+					    TMF_QUERY_TASK, index);
+	}
+	return TMF_RESP_FUNC_COMPLETE;
+}
diff --git a/drivers/scsi/libsas/Kconfig b/drivers/scsi/libsas/Kconfig
new file mode 100644
index 0000000000000..aafdc92f83123
--- /dev/null
+++ b/drivers/scsi/libsas/Kconfig
@@ -0,0 +1,39 @@
+#
+# Kernel configuration file for the SAS Class
+#
+# Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+# Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+#
+# This file is licensed under GPLv2.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; version 2 of the
+# License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+#
+
+config SCSI_SAS_LIBSAS
+	tristate "SAS Domain Transport Attributes"
+	depends on SCSI
+	select SCSI_SAS_ATTRS
+	help
+	  This provides transport specific helpers for SAS drivers which
+	  use the domain device construct (like the aic94xxx).
+
+config SCSI_SAS_LIBSAS_DEBUG
+	bool "Compile the SAS Domain Transport Attributes in debug mode"
+	default y
+	depends on SCSI_SAS_LIBSAS
+	help
+		Compiles the SAS Layer in debug mode.  In debug mode, the
+		SAS Layer prints diagnostic and debug messages.
diff --git a/drivers/scsi/libsas/Makefile b/drivers/scsi/libsas/Makefile
new file mode 100644
index 0000000000000..44d972a3b4bda
--- /dev/null
+++ b/drivers/scsi/libsas/Makefile
@@ -0,0 +1,36 @@
+#
+# Kernel Makefile for the libsas helpers
+#
+# Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+# Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+#
+# This file is licensed under GPLv2.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; version 2 of the
+# License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+
+ifeq ($(CONFIG_SCSI_SAS_LIBSAS_DEBUG),y)
+	EXTRA_CFLAGS += -DSAS_DEBUG
+endif
+
+obj-$(CONFIG_SCSI_SAS_LIBSAS) += libsas.o
+libsas-y +=  sas_init.o     \
+		sas_phy.o      \
+		sas_port.o     \
+		sas_event.o    \
+		sas_dump.o     \
+		sas_discover.o \
+		sas_expander.o \
+		sas_scsi_host.o
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
new file mode 100644
index 0000000000000..d977bd492d8d0
--- /dev/null
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -0,0 +1,749 @@
+/*
+ * Serial Attached SCSI (SAS) Discover process
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_eh.h>
+#include "sas_internal.h"
+
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+#include "../scsi_sas_internal.h"
+
+/* ---------- Basic task processing for discovery purposes ---------- */
+
+void sas_init_dev(struct domain_device *dev)
+{
+        INIT_LIST_HEAD(&dev->siblings);
+        INIT_LIST_HEAD(&dev->dev_list_node);
+        switch (dev->dev_type) {
+        case SAS_END_DEV:
+                break;
+        case EDGE_DEV:
+        case FANOUT_DEV:
+                INIT_LIST_HEAD(&dev->ex_dev.children);
+                break;
+        case SATA_DEV:
+        case SATA_PM:
+        case SATA_PM_PORT:
+                INIT_LIST_HEAD(&dev->sata_dev.children);
+                break;
+        default:
+                break;
+        }
+}
+
+static void sas_task_timedout(unsigned long _task)
+{
+	struct sas_task *task = (void *) _task;
+	unsigned long flags;
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (!(task->task_state_flags & SAS_TASK_STATE_DONE))
+		task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	complete(&task->completion);
+}
+
+static void sas_disc_task_done(struct sas_task *task)
+{
+	if (!del_timer(&task->timer))
+		return;
+	complete(&task->completion);
+}
+
+#define SAS_DEV_TIMEOUT 10
+
+/**
+ * sas_execute_task -- Basic task processing for discovery
+ * @task: the task to be executed
+ * @buffer: pointer to buffer to do I/O
+ * @size: size of @buffer
+ * @pci_dma_dir: PCI_DMA_...
+ */
+static int sas_execute_task(struct sas_task *task, void *buffer, int size,
+			    int pci_dma_dir)
+{
+	int res = 0;
+	struct scatterlist *scatter = NULL;
+	struct task_status_struct *ts = &task->task_status;
+	int num_scatter = 0;
+	int retries = 0;
+	struct sas_internal *i =
+		to_sas_internal(task->dev->port->ha->core.shost->transportt);
+
+	if (pci_dma_dir != PCI_DMA_NONE) {
+		scatter = kzalloc(sizeof(*scatter), GFP_KERNEL);
+		if (!scatter)
+			goto out;
+
+		sg_init_one(scatter, buffer, size);
+		num_scatter = 1;
+	}
+
+	task->task_proto = task->dev->tproto;
+	task->scatter = scatter;
+	task->num_scatter = num_scatter;
+	task->total_xfer_len = size;
+	task->data_dir = pci_dma_dir;
+	task->task_done = sas_disc_task_done;
+
+	for (retries = 0; retries < 5; retries++) {
+		task->task_state_flags = SAS_TASK_STATE_PENDING;
+		init_completion(&task->completion);
+
+		task->timer.data = (unsigned long) task;
+		task->timer.function = sas_task_timedout;
+		task->timer.expires = jiffies + SAS_DEV_TIMEOUT*HZ;
+		add_timer(&task->timer);
+
+		res = i->dft->lldd_execute_task(task, 1, GFP_KERNEL);
+		if (res) {
+			del_timer(&task->timer);
+			SAS_DPRINTK("executing SAS discovery task failed:%d\n",
+				    res);
+			goto ex_err;
+		}
+		wait_for_completion(&task->completion);
+		res = -ETASK;
+		if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+			int res2;
+			SAS_DPRINTK("task aborted, flags:0x%x\n",
+				    task->task_state_flags);
+			res2 = i->dft->lldd_abort_task(task);
+			SAS_DPRINTK("came back from abort task\n");
+			if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+				if (res2 == TMF_RESP_FUNC_COMPLETE)
+					continue; /* Retry the task */
+				else
+					goto ex_err;
+			}
+		}
+		if (task->task_status.stat == SAM_BUSY ||
+			   task->task_status.stat == SAM_TASK_SET_FULL ||
+			   task->task_status.stat == SAS_QUEUE_FULL) {
+			SAS_DPRINTK("task: q busy, sleeping...\n");
+			schedule_timeout_interruptible(HZ);
+		} else if (task->task_status.stat == SAM_CHECK_COND) {
+			struct scsi_sense_hdr shdr;
+
+			if (!scsi_normalize_sense(ts->buf, ts->buf_valid_size,
+						  &shdr)) {
+				SAS_DPRINTK("couldn't normalize sense\n");
+				continue;
+			}
+			if ((shdr.sense_key == 6 && shdr.asc == 0x29) ||
+			    (shdr.sense_key == 2 && shdr.asc == 4 &&
+			     shdr.ascq == 1)) {
+				SAS_DPRINTK("device %016llx LUN: %016llx "
+					    "powering up or not ready yet, "
+					    "sleeping...\n",
+					    SAS_ADDR(task->dev->sas_addr),
+					    SAS_ADDR(task->ssp_task.LUN));
+
+				schedule_timeout_interruptible(5*HZ);
+			} else if (shdr.sense_key == 1) {
+				res = 0;
+				break;
+			} else if (shdr.sense_key == 5) {
+				break;
+			} else {
+				SAS_DPRINTK("dev %016llx LUN: %016llx "
+					    "sense key:0x%x ASC:0x%x ASCQ:0x%x"
+					    "\n",
+					    SAS_ADDR(task->dev->sas_addr),
+					    SAS_ADDR(task->ssp_task.LUN),
+					    shdr.sense_key,
+					    shdr.asc, shdr.ascq);
+			}
+		} else if (task->task_status.resp != SAS_TASK_COMPLETE ||
+			   task->task_status.stat != SAM_GOOD) {
+			SAS_DPRINTK("task finished with resp:0x%x, "
+				    "stat:0x%x\n",
+				    task->task_status.resp,
+				    task->task_status.stat);
+			goto ex_err;
+		} else {
+			res = 0;
+			break;
+		}
+	}
+ex_err:
+	if (pci_dma_dir != PCI_DMA_NONE)
+		kfree(scatter);
+out:
+	return res;
+}
+
+/* ---------- Domain device discovery ---------- */
+
+/**
+ * sas_get_port_device -- Discover devices which caused port creation
+ * @port: pointer to struct sas_port of interest
+ *
+ * Devices directly attached to a HA port, have no parent.  This is
+ * how we know they are (domain) "root" devices.  All other devices
+ * do, and should have their "parent" pointer set appropriately as
+ * soon as a child device is discovered.
+ */
+static int sas_get_port_device(struct asd_sas_port *port)
+{
+	unsigned long flags;
+	struct asd_sas_phy *phy;
+	struct sas_rphy *rphy;
+	struct domain_device *dev;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&port->phy_list_lock, flags);
+	if (list_empty(&port->phy_list)) {
+		spin_unlock_irqrestore(&port->phy_list_lock, flags);
+		kfree(dev);
+		return -ENODEV;
+	}
+	phy = container_of(port->phy_list.next, struct asd_sas_phy, port_phy_el);
+	spin_lock(&phy->frame_rcvd_lock);
+	memcpy(dev->frame_rcvd, phy->frame_rcvd, min(sizeof(dev->frame_rcvd),
+					     (size_t)phy->frame_rcvd_size));
+	spin_unlock(&phy->frame_rcvd_lock);
+	spin_unlock_irqrestore(&port->phy_list_lock, flags);
+
+	if (dev->frame_rcvd[0] == 0x34 && port->oob_mode == SATA_OOB_MODE) {
+		struct dev_to_host_fis *fis =
+			(struct dev_to_host_fis *) dev->frame_rcvd;
+		if (fis->interrupt_reason == 1 && fis->lbal == 1 &&
+		    fis->byte_count_low==0x69 && fis->byte_count_high == 0x96
+		    && (fis->device & ~0x10) == 0)
+			dev->dev_type = SATA_PM;
+		else
+			dev->dev_type = SATA_DEV;
+		dev->tproto = SATA_PROTO;
+	} else {
+		struct sas_identify_frame *id =
+			(struct sas_identify_frame *) dev->frame_rcvd;
+		dev->dev_type = id->dev_type;
+		dev->iproto = id->initiator_bits;
+		dev->tproto = id->target_bits;
+	}
+
+	sas_init_dev(dev);
+
+	switch (dev->dev_type) {
+	case SAS_END_DEV:
+		rphy = sas_end_device_alloc(port->port);
+		break;
+	case EDGE_DEV:
+		rphy = sas_expander_alloc(port->port,
+					  SAS_EDGE_EXPANDER_DEVICE);
+		break;
+	case FANOUT_DEV:
+		rphy = sas_expander_alloc(port->port,
+					  SAS_FANOUT_EXPANDER_DEVICE);
+		break;
+	case SATA_DEV:
+	default:
+		printk("ERROR: Unidentified device type %d\n", dev->dev_type);
+		rphy = NULL;
+		break;
+	}
+
+	if (!rphy) {
+		kfree(dev);
+		return -ENODEV;
+	}
+	rphy->identify.phy_identifier = phy->phy->identify.phy_identifier;
+	memcpy(dev->sas_addr, port->attached_sas_addr, SAS_ADDR_SIZE);
+	sas_fill_in_rphy(dev, rphy);
+	sas_hash_addr(dev->hashed_sas_addr, dev->sas_addr);
+	port->port_dev = dev;
+	dev->port = port;
+	dev->linkrate = port->linkrate;
+	dev->min_linkrate = port->linkrate;
+	dev->max_linkrate = port->linkrate;
+	dev->pathways = port->num_phys;
+	memset(port->disc.fanout_sas_addr, 0, SAS_ADDR_SIZE);
+	memset(port->disc.eeds_a, 0, SAS_ADDR_SIZE);
+	memset(port->disc.eeds_b, 0, SAS_ADDR_SIZE);
+	port->disc.max_level = 0;
+
+	dev->rphy = rphy;
+	spin_lock(&port->dev_list_lock);
+	list_add_tail(&dev->dev_list_node, &port->dev_list);
+	spin_unlock(&port->dev_list_lock);
+
+	return 0;
+}
+
+/* ---------- Discover and Revalidate ---------- */
+
+/* ---------- SATA ---------- */
+
+static void sas_get_ata_command_set(struct domain_device *dev)
+{
+	struct dev_to_host_fis *fis =
+		(struct dev_to_host_fis *) dev->frame_rcvd;
+
+	if ((fis->sector_count == 1 && /* ATA */
+	     fis->lbal         == 1 &&
+	     fis->lbam         == 0 &&
+	     fis->lbah         == 0 &&
+	     fis->device       == 0)
+	    ||
+	    (fis->sector_count == 0 && /* CE-ATA (mATA) */
+	     fis->lbal         == 0 &&
+	     fis->lbam         == 0xCE &&
+	     fis->lbah         == 0xAA &&
+	     (fis->device & ~0x10) == 0))
+
+		dev->sata_dev.command_set = ATA_COMMAND_SET;
+
+	else if ((fis->interrupt_reason == 1 &&	/* ATAPI */
+		  fis->lbal             == 1 &&
+		  fis->byte_count_low   == 0x14 &&
+		  fis->byte_count_high  == 0xEB &&
+		  (fis->device & ~0x10) == 0))
+
+		dev->sata_dev.command_set = ATAPI_COMMAND_SET;
+
+	else if ((fis->sector_count == 1 && /* SEMB */
+		  fis->lbal         == 1 &&
+		  fis->lbam         == 0x3C &&
+		  fis->lbah         == 0xC3 &&
+		  fis->device       == 0)
+		||
+		 (fis->interrupt_reason == 1 &&	/* SATA PM */
+		  fis->lbal             == 1 &&
+		  fis->byte_count_low   == 0x69 &&
+		  fis->byte_count_high  == 0x96 &&
+		  (fis->device & ~0x10) == 0))
+
+		/* Treat it as a superset? */
+		dev->sata_dev.command_set = ATAPI_COMMAND_SET;
+}
+
+/**
+ * sas_issue_ata_cmd -- Basic SATA command processing for discovery
+ * @dev: the device to send the command to
+ * @command: the command register
+ * @features: the features register
+ * @buffer: pointer to buffer to do I/O
+ * @size: size of @buffer
+ * @pci_dma_dir: PCI_DMA_...
+ */
+static int sas_issue_ata_cmd(struct domain_device *dev, u8 command,
+			     u8 features, void *buffer, int size,
+			     int pci_dma_dir)
+{
+	int res = 0;
+	struct sas_task *task;
+	struct dev_to_host_fis *d2h_fis = (struct dev_to_host_fis *)
+		&dev->frame_rcvd[0];
+
+	res = -ENOMEM;
+	task = sas_alloc_task(GFP_KERNEL);
+	if (!task)
+		goto out;
+
+	task->dev = dev;
+
+	task->ata_task.fis.command = command;
+	task->ata_task.fis.features = features;
+	task->ata_task.fis.device = d2h_fis->device;
+	task->ata_task.retry_count = 1;
+
+	res = sas_execute_task(task, buffer, size, pci_dma_dir);
+
+	sas_free_task(task);
+out:
+	return res;
+}
+
+static void sas_sata_propagate_sas_addr(struct domain_device *dev)
+{
+	unsigned long flags;
+	struct asd_sas_port *port = dev->port;
+	struct asd_sas_phy  *phy;
+
+	BUG_ON(dev->parent);
+
+	memcpy(port->attached_sas_addr, dev->sas_addr, SAS_ADDR_SIZE);
+	spin_lock_irqsave(&port->phy_list_lock, flags);
+	list_for_each_entry(phy, &port->phy_list, port_phy_el)
+		memcpy(phy->attached_sas_addr, dev->sas_addr, SAS_ADDR_SIZE);
+	spin_unlock_irqrestore(&port->phy_list_lock, flags);
+}
+
+#define ATA_IDENTIFY_DEV         0xEC
+#define ATA_IDENTIFY_PACKET_DEV  0xA1
+#define ATA_SET_FEATURES         0xEF
+#define ATA_FEATURE_PUP_STBY_SPIN_UP 0x07
+
+/**
+ * sas_discover_sata_dev -- discover a STP/SATA device (SATA_DEV)
+ * @dev: STP/SATA device of interest (ATA/ATAPI)
+ *
+ * The LLDD has already been notified of this device, so that we can
+ * send FISes to it.  Here we try to get IDENTIFY DEVICE or IDENTIFY
+ * PACKET DEVICE, if ATAPI device, so that the LLDD can fine-tune its
+ * performance for this device.
+ */
+static int sas_discover_sata_dev(struct domain_device *dev)
+{
+	int     res;
+	__le16  *identify_x;
+	u8      command;
+
+	identify_x = kzalloc(512, GFP_KERNEL);
+	if (!identify_x)
+		return -ENOMEM;
+
+	if (dev->sata_dev.command_set == ATA_COMMAND_SET) {
+		dev->sata_dev.identify_device = identify_x;
+		command = ATA_IDENTIFY_DEV;
+	} else {
+		dev->sata_dev.identify_packet_device = identify_x;
+		command = ATA_IDENTIFY_PACKET_DEV;
+	}
+
+	res = sas_issue_ata_cmd(dev, command, 0, identify_x, 512,
+				PCI_DMA_FROMDEVICE);
+	if (res)
+		goto out_err;
+
+	/* lives on the media? */
+	if (le16_to_cpu(identify_x[0]) & 4) {
+		/* incomplete response */
+		SAS_DPRINTK("sending SET FEATURE/PUP_STBY_SPIN_UP to "
+			    "dev %llx\n", SAS_ADDR(dev->sas_addr));
+		if (!le16_to_cpu(identify_x[83] & (1<<6)))
+			goto cont1;
+		res = sas_issue_ata_cmd(dev, ATA_SET_FEATURES,
+					ATA_FEATURE_PUP_STBY_SPIN_UP,
+					NULL, 0, PCI_DMA_NONE);
+		if (res)
+			goto cont1;
+
+		schedule_timeout_interruptible(5*HZ); /* More time? */
+		res = sas_issue_ata_cmd(dev, command, 0, identify_x, 512,
+					PCI_DMA_FROMDEVICE);
+		if (res)
+			goto out_err;
+	}
+cont1:
+	/* Get WWN */
+	if (dev->port->oob_mode != SATA_OOB_MODE) {
+		memcpy(dev->sas_addr, dev->sata_dev.rps_resp.rps.stp_sas_addr,
+		       SAS_ADDR_SIZE);
+	} else if (dev->sata_dev.command_set == ATA_COMMAND_SET &&
+		   (le16_to_cpu(dev->sata_dev.identify_device[108]) & 0xF000)
+		   == 0x5000) {
+		int i;
+
+		for (i = 0; i < 4; i++) {
+			dev->sas_addr[2*i] =
+	     (le16_to_cpu(dev->sata_dev.identify_device[108+i]) & 0xFF00) >> 8;
+			dev->sas_addr[2*i+1] =
+	      le16_to_cpu(dev->sata_dev.identify_device[108+i]) & 0x00FF;
+		}
+	}
+	sas_hash_addr(dev->hashed_sas_addr, dev->sas_addr);
+	if (!dev->parent)
+		sas_sata_propagate_sas_addr(dev);
+
+	/* XXX Hint: register this SATA device with SATL.
+	   When this returns, dev->sata_dev->lu is alive and
+	   present.
+	sas_satl_register_dev(dev);
+	*/
+	return 0;
+out_err:
+	dev->sata_dev.identify_packet_device = NULL;
+	dev->sata_dev.identify_device = NULL;
+	kfree(identify_x);
+	return res;
+}
+
+static int sas_discover_sata_pm(struct domain_device *dev)
+{
+	return -ENODEV;
+}
+
+int sas_notify_lldd_dev_found(struct domain_device *dev)
+{
+	int res = 0;
+	struct sas_ha_struct *sas_ha = dev->port->ha;
+	struct Scsi_Host *shost = sas_ha->core.shost;
+	struct sas_internal *i = to_sas_internal(shost->transportt);
+
+	if (i->dft->lldd_dev_found) {
+		res = i->dft->lldd_dev_found(dev);
+		if (res) {
+			printk("sas: driver on pcidev %s cannot handle "
+			       "device %llx, error:%d\n",
+			       pci_name(sas_ha->pcidev),
+			       SAS_ADDR(dev->sas_addr), res);
+		}
+	}
+	return res;
+}
+
+
+void sas_notify_lldd_dev_gone(struct domain_device *dev)
+{
+	struct sas_ha_struct *sas_ha = dev->port->ha;
+	struct Scsi_Host *shost = sas_ha->core.shost;
+	struct sas_internal *i = to_sas_internal(shost->transportt);
+
+	if (i->dft->lldd_dev_gone)
+		i->dft->lldd_dev_gone(dev);
+}
+
+/* ---------- Common/dispatchers ---------- */
+
+/**
+ * sas_discover_sata -- discover an STP/SATA domain device
+ * @dev: pointer to struct domain_device of interest
+ *
+ * First we notify the LLDD of this device, so we can send frames to
+ * it.  Then depending on the type of device we call the appropriate
+ * discover functions.  Once device discover is done, we notify the
+ * LLDD so that it can fine-tune its parameters for the device, by
+ * removing it and then adding it.  That is, the second time around,
+ * the driver would have certain fields, that it is looking at, set.
+ * Finally we initialize the kobj so that the device can be added to
+ * the system at registration time.  Devices directly attached to a HA
+ * port, have no parents.  All other devices do, and should have their
+ * "parent" pointer set appropriately before calling this function.
+ */
+int sas_discover_sata(struct domain_device *dev)
+{
+	int res;
+
+	sas_get_ata_command_set(dev);
+
+	res = sas_notify_lldd_dev_found(dev);
+	if (res)
+		return res;
+
+	switch (dev->dev_type) {
+	case SATA_DEV:
+		res = sas_discover_sata_dev(dev);
+		break;
+	case SATA_PM:
+		res = sas_discover_sata_pm(dev);
+		break;
+	default:
+		break;
+	}
+
+	sas_notify_lldd_dev_gone(dev);
+	if (!res) {
+		sas_notify_lldd_dev_found(dev);
+	}
+	return res;
+}
+
+/**
+ * sas_discover_end_dev -- discover an end device (SSP, etc)
+ * @end: pointer to domain device of interest
+ *
+ * See comment in sas_discover_sata().
+ */
+int sas_discover_end_dev(struct domain_device *dev)
+{
+	int res;
+
+	res = sas_notify_lldd_dev_found(dev);
+	if (res)
+		return res;
+
+	res = sas_rphy_add(dev->rphy);
+	if (res)
+		goto out_err;
+
+	/* do this to get the end device port attributes which will have
+	 * been scanned in sas_rphy_add */
+	sas_notify_lldd_dev_gone(dev);
+	sas_notify_lldd_dev_found(dev);
+
+	return 0;
+
+out_err:
+	sas_notify_lldd_dev_gone(dev);
+	return res;
+}
+
+/* ---------- Device registration and unregistration ---------- */
+
+static inline void sas_unregister_common_dev(struct domain_device *dev)
+{
+	sas_notify_lldd_dev_gone(dev);
+	if (!dev->parent)
+		dev->port->port_dev = NULL;
+	else
+		list_del_init(&dev->siblings);
+	list_del_init(&dev->dev_list_node);
+}
+
+void sas_unregister_dev(struct domain_device *dev)
+{
+	if (dev->rphy) {
+		sas_remove_children(&dev->rphy->dev);
+		sas_rphy_delete(dev->rphy);
+		dev->rphy = NULL;
+	}
+	if (dev->dev_type == EDGE_DEV || dev->dev_type == FANOUT_DEV) {
+		/* remove the phys and ports, everything else should be gone */
+		kfree(dev->ex_dev.ex_phy);
+		dev->ex_dev.ex_phy = NULL;
+	}
+	sas_unregister_common_dev(dev);
+}
+
+void sas_unregister_domain_devices(struct asd_sas_port *port)
+{
+	struct domain_device *dev, *n;
+
+	list_for_each_entry_safe_reverse(dev,n,&port->dev_list,dev_list_node)
+		sas_unregister_dev(dev);
+
+	port->port->rphy = NULL;
+
+}
+
+/* ---------- Discovery and Revalidation ---------- */
+
+/**
+ * sas_discover_domain -- discover the domain
+ * @port: port to the domain of interest
+ *
+ * NOTE: this process _must_ quit (return) as soon as any connection
+ * errors are encountered.  Connection recovery is done elsewhere.
+ * Discover process only interrogates devices in order to discover the
+ * domain.
+ */
+static void sas_discover_domain(void *data)
+{
+	int error = 0;
+	struct asd_sas_port *port = data;
+
+	sas_begin_event(DISCE_DISCOVER_DOMAIN, &port->disc.disc_event_lock,
+			&port->disc.pending);
+
+	if (port->port_dev)
+		return ;
+	else {
+		error = sas_get_port_device(port);
+		if (error)
+			return;
+	}
+
+	SAS_DPRINTK("DOING DISCOVERY on port %d, pid:%d\n", port->id,
+		    current->pid);
+
+	switch (port->port_dev->dev_type) {
+	case SAS_END_DEV:
+		error = sas_discover_end_dev(port->port_dev);
+		break;
+	case EDGE_DEV:
+	case FANOUT_DEV:
+		error = sas_discover_root_expander(port->port_dev);
+		break;
+	case SATA_DEV:
+	case SATA_PM:
+		error = sas_discover_sata(port->port_dev);
+		break;
+	default:
+		SAS_DPRINTK("unhandled device %d\n", port->port_dev->dev_type);
+		break;
+	}
+
+	if (error) {
+		kfree(port->port_dev); /* not kobject_register-ed yet */
+		port->port_dev = NULL;
+	}
+
+	SAS_DPRINTK("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id,
+		    current->pid, error);
+}
+
+static void sas_revalidate_domain(void *data)
+{
+	int res = 0;
+	struct asd_sas_port *port = data;
+
+	sas_begin_event(DISCE_REVALIDATE_DOMAIN, &port->disc.disc_event_lock,
+			&port->disc.pending);
+
+	SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id,
+		    current->pid);
+	if (port->port_dev)
+		res = sas_ex_revalidate_domain(port->port_dev);
+
+	SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n",
+		    port->id, current->pid, res);
+}
+
+/* ---------- Events ---------- */
+
+int sas_discover_event(struct asd_sas_port *port, enum discover_event ev)
+{
+	struct sas_discovery *disc;
+
+	if (!port)
+		return 0;
+	disc = &port->disc;
+
+	BUG_ON(ev >= DISC_NUM_EVENTS);
+
+	sas_queue_event(ev, &disc->disc_event_lock, &disc->pending,
+			&disc->disc_work[ev], port->ha->core.shost);
+
+	return 0;
+}
+
+/**
+ * sas_init_disc -- initialize the discovery struct in the port
+ * @port: pointer to struct port
+ *
+ * Called when the ports are being initialized.
+ */
+void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port)
+{
+	int i;
+
+	static void (*sas_event_fns[DISC_NUM_EVENTS])(void *) = {
+		[DISCE_DISCOVER_DOMAIN] = sas_discover_domain,
+		[DISCE_REVALIDATE_DOMAIN] = sas_revalidate_domain,
+	};
+
+	spin_lock_init(&disc->disc_event_lock);
+	disc->pending = 0;
+	for (i = 0; i < DISC_NUM_EVENTS; i++)
+		INIT_WORK(&disc->disc_work[i], sas_event_fns[i], port);
+}
diff --git a/drivers/scsi/libsas/sas_dump.c b/drivers/scsi/libsas/sas_dump.c
new file mode 100644
index 0000000000000..f1246d2c9bef8
--- /dev/null
+++ b/drivers/scsi/libsas/sas_dump.c
@@ -0,0 +1,76 @@
+/*
+ * Serial Attached SCSI (SAS) Dump/Debugging routines
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include "sas_dump.h"
+
+#ifdef SAS_DEBUG
+
+static const char *sas_hae_str[] = {
+	[0] = "HAE_RESET",
+};
+
+static const char *sas_porte_str[] = {
+	[0] = "PORTE_BYTES_DMAED",
+	[1] = "PORTE_BROADCAST_RCVD",
+	[2] = "PORTE_LINK_RESET_ERR",
+	[3] = "PORTE_TIMER_EVENT",
+	[4] = "PORTE_HARD_RESET",
+};
+
+static const char *sas_phye_str[] = {
+	[0] = "PHYE_LOSS_OF_SIGNAL",
+	[1] = "PHYE_OOB_DONE",
+	[2] = "PHYE_OOB_ERROR",
+	[3] = "PHYE_SPINUP_HOLD",
+};
+
+void sas_dprint_porte(int phyid, enum port_event pe)
+{
+	SAS_DPRINTK("phy%d: port event: %s\n", phyid, sas_porte_str[pe]);
+}
+void sas_dprint_phye(int phyid, enum phy_event pe)
+{
+	SAS_DPRINTK("phy%d: phy event: %s\n", phyid, sas_phye_str[pe]);
+}
+
+void sas_dprint_hae(struct sas_ha_struct *sas_ha, enum ha_event he)
+{
+	SAS_DPRINTK("ha %s: %s event\n", pci_name(sas_ha->pcidev),
+		    sas_hae_str[he]);
+}
+
+void sas_dump_port(struct asd_sas_port *port)
+{
+	SAS_DPRINTK("port%d: class:0x%x\n", port->id, port->class);
+	SAS_DPRINTK("port%d: sas_addr:%llx\n", port->id,
+		    SAS_ADDR(port->sas_addr));
+	SAS_DPRINTK("port%d: attached_sas_addr:%llx\n", port->id,
+		    SAS_ADDR(port->attached_sas_addr));
+	SAS_DPRINTK("port%d: iproto:0x%x\n", port->id, port->iproto);
+	SAS_DPRINTK("port%d: tproto:0x%x\n", port->id, port->tproto);
+	SAS_DPRINTK("port%d: oob_mode:0x%x\n", port->id, port->oob_mode);
+	SAS_DPRINTK("port%d: num_phys:%d\n", port->id, port->num_phys);
+}
+
+#endif /* SAS_DEBUG */
diff --git a/drivers/scsi/libsas/sas_dump.h b/drivers/scsi/libsas/sas_dump.h
new file mode 100644
index 0000000000000..47b45d4f5258a
--- /dev/null
+++ b/drivers/scsi/libsas/sas_dump.h
@@ -0,0 +1,42 @@
+/*
+ * Serial Attached SCSI (SAS) Dump/Debugging routines header file
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include "sas_internal.h"
+
+#ifdef SAS_DEBUG
+
+void sas_dprint_porte(int phyid, enum port_event pe);
+void sas_dprint_phye(int phyid, enum phy_event pe);
+void sas_dprint_hae(struct sas_ha_struct *sas_ha, enum ha_event he);
+void sas_dump_port(struct asd_sas_port *port);
+
+#else /* SAS_DEBUG */
+
+static inline void sas_dprint_porte(int phyid, enum port_event pe) { }
+static inline void sas_dprint_phye(int phyid, enum phy_event pe) { }
+static inline void sas_dprint_hae(struct sas_ha_struct *sas_ha,
+				  enum ha_event he) { }
+static inline void sas_dump_port(struct asd_sas_port *port) { }
+
+#endif /* SAS_DEBUG */
diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
new file mode 100644
index 0000000000000..19110ed1c89ca
--- /dev/null
+++ b/drivers/scsi/libsas/sas_event.c
@@ -0,0 +1,75 @@
+/*
+ * Serial Attached SCSI (SAS) Event processing
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <scsi/scsi_host.h>
+#include "sas_internal.h"
+#include "sas_dump.h"
+
+static void notify_ha_event(struct sas_ha_struct *sas_ha, enum ha_event event)
+{
+	BUG_ON(event >= HA_NUM_EVENTS);
+
+	sas_queue_event(event, &sas_ha->event_lock, &sas_ha->pending,
+			&sas_ha->ha_events[event], sas_ha->core.shost);
+}
+
+static void notify_port_event(struct asd_sas_phy *phy, enum port_event event)
+{
+	struct sas_ha_struct *ha = phy->ha;
+
+	BUG_ON(event >= PORT_NUM_EVENTS);
+
+	sas_queue_event(event, &ha->event_lock, &phy->port_events_pending,
+			&phy->port_events[event], ha->core.shost);
+}
+
+static void notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
+{
+	struct sas_ha_struct *ha = phy->ha;
+
+	BUG_ON(event >= PHY_NUM_EVENTS);
+
+	sas_queue_event(event, &ha->event_lock, &phy->phy_events_pending,
+			&phy->phy_events[event], ha->core.shost);
+}
+
+int sas_init_events(struct sas_ha_struct *sas_ha)
+{
+	static void (*sas_ha_event_fns[HA_NUM_EVENTS])(void *) = {
+		[HAE_RESET] = sas_hae_reset,
+	};
+
+	int i;
+
+	spin_lock_init(&sas_ha->event_lock);
+
+	for (i = 0; i < HA_NUM_EVENTS; i++)
+		INIT_WORK(&sas_ha->ha_events[i], sas_ha_event_fns[i], sas_ha);
+
+	sas_ha->notify_ha_event = notify_ha_event;
+	sas_ha->notify_port_event = notify_port_event;
+	sas_ha->notify_phy_event = notify_phy_event;
+
+	return 0;
+}
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
new file mode 100644
index 0000000000000..b653a263f76a7
--- /dev/null
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -0,0 +1,1862 @@
+/*
+ * Serial Attached SCSI (SAS) Expander discovery and configuration
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+
+#include "sas_internal.h"
+
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+#include "../scsi_sas_internal.h"
+
+static int sas_discover_expander(struct domain_device *dev);
+static int sas_configure_routing(struct domain_device *dev, u8 *sas_addr);
+static int sas_configure_phy(struct domain_device *dev, int phy_id,
+			     u8 *sas_addr, int include);
+static int sas_disable_routing(struct domain_device *dev,  u8 *sas_addr);
+
+#if 0
+/* FIXME: smp needs to migrate into the sas class */
+static ssize_t smp_portal_read(struct kobject *, char *, loff_t, size_t);
+static ssize_t smp_portal_write(struct kobject *, char *, loff_t, size_t);
+#endif
+
+/* ---------- SMP task management ---------- */
+
+static void smp_task_timedout(unsigned long _task)
+{
+	struct sas_task *task = (void *) _task;
+	unsigned long flags;
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (!(task->task_state_flags & SAS_TASK_STATE_DONE))
+		task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	complete(&task->completion);
+}
+
+static void smp_task_done(struct sas_task *task)
+{
+	if (!del_timer(&task->timer))
+		return;
+	complete(&task->completion);
+}
+
+/* Give it some long enough timeout. In seconds. */
+#define SMP_TIMEOUT 10
+
+static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
+			    void *resp, int resp_size)
+{
+	int res;
+	struct sas_task *task = sas_alloc_task(GFP_KERNEL);
+	struct sas_internal *i =
+		to_sas_internal(dev->port->ha->core.shost->transportt);
+
+	if (!task)
+		return -ENOMEM;
+
+	task->dev = dev;
+	task->task_proto = dev->tproto;
+	sg_init_one(&task->smp_task.smp_req, req, req_size);
+	sg_init_one(&task->smp_task.smp_resp, resp, resp_size);
+
+	task->task_done = smp_task_done;
+
+	task->timer.data = (unsigned long) task;
+	task->timer.function = smp_task_timedout;
+	task->timer.expires = jiffies + SMP_TIMEOUT*HZ;
+	add_timer(&task->timer);
+
+	res = i->dft->lldd_execute_task(task, 1, GFP_KERNEL);
+
+	if (res) {
+		del_timer(&task->timer);
+		SAS_DPRINTK("executing SMP task failed:%d\n", res);
+		goto ex_err;
+	}
+
+	wait_for_completion(&task->completion);
+	res = -ETASK;
+	if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
+		SAS_DPRINTK("smp task timed out or aborted\n");
+		i->dft->lldd_abort_task(task);
+		if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+			SAS_DPRINTK("SMP task aborted and not done\n");
+			goto ex_err;
+		}
+	}
+	if (task->task_status.resp == SAS_TASK_COMPLETE &&
+	    task->task_status.stat == SAM_GOOD)
+		res = 0;
+	else
+		SAS_DPRINTK("%s: task to dev %016llx response: 0x%x "
+			    "status 0x%x\n", __FUNCTION__,
+			    SAS_ADDR(dev->sas_addr),
+			    task->task_status.resp,
+			    task->task_status.stat);
+ex_err:
+	sas_free_task(task);
+	return res;
+}
+
+/* ---------- Allocations ---------- */
+
+static inline void *alloc_smp_req(int size)
+{
+	u8 *p = kzalloc(size, GFP_KERNEL);
+	if (p)
+		p[0] = SMP_REQUEST;
+	return p;
+}
+
+static inline void *alloc_smp_resp(int size)
+{
+	return kzalloc(size, GFP_KERNEL);
+}
+
+/* ---------- Expander configuration ---------- */
+
+static void sas_set_ex_phy(struct domain_device *dev, int phy_id,
+			   void *disc_resp)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *phy = &ex->ex_phy[phy_id];
+	struct smp_resp *resp = disc_resp;
+	struct discover_resp *dr = &resp->disc;
+	struct sas_rphy *rphy = dev->rphy;
+	int rediscover = (phy->phy != NULL);
+
+	if (!rediscover) {
+		phy->phy = sas_phy_alloc(&rphy->dev, phy_id);
+
+		/* FIXME: error_handling */
+		BUG_ON(!phy->phy);
+	}
+
+	switch (resp->result) {
+	case SMP_RESP_PHY_VACANT:
+		phy->phy_state = PHY_VACANT;
+		return;
+	default:
+		phy->phy_state = PHY_NOT_PRESENT;
+		return;
+	case SMP_RESP_FUNC_ACC:
+		phy->phy_state = PHY_EMPTY; /* do not know yet */
+		break;
+	}
+
+	phy->phy_id = phy_id;
+	phy->attached_dev_type = dr->attached_dev_type;
+	phy->linkrate = dr->linkrate;
+	phy->attached_sata_host = dr->attached_sata_host;
+	phy->attached_sata_dev  = dr->attached_sata_dev;
+	phy->attached_sata_ps   = dr->attached_sata_ps;
+	phy->attached_iproto = dr->iproto << 1;
+	phy->attached_tproto = dr->tproto << 1;
+	memcpy(phy->attached_sas_addr, dr->attached_sas_addr, SAS_ADDR_SIZE);
+	phy->attached_phy_id = dr->attached_phy_id;
+	phy->phy_change_count = dr->change_count;
+	phy->routing_attr = dr->routing_attr;
+	phy->virtual = dr->virtual;
+	phy->last_da_index = -1;
+
+	phy->phy->identify.initiator_port_protocols = phy->attached_iproto;
+	phy->phy->identify.target_port_protocols = phy->attached_tproto;
+	phy->phy->identify.phy_identifier = phy_id;
+	phy->phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
+	phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
+	phy->phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
+	phy->phy->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS;
+	switch (phy->linkrate) {
+	case PHY_LINKRATE_1_5:
+		phy->phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS;
+		break;
+	case PHY_LINKRATE_3:
+		phy->phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS;
+		break;
+	case PHY_LINKRATE_6:
+		phy->phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS;
+		break;
+	default:
+		phy->phy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN;
+		break;
+	}
+
+	if (!rediscover)
+		sas_phy_add(phy->phy);
+
+	SAS_DPRINTK("ex %016llx phy%02d:%c attached: %016llx\n",
+		    SAS_ADDR(dev->sas_addr), phy->phy_id,
+		    phy->routing_attr == TABLE_ROUTING ? 'T' :
+		    phy->routing_attr == DIRECT_ROUTING ? 'D' :
+		    phy->routing_attr == SUBTRACTIVE_ROUTING ? 'S' : '?',
+		    SAS_ADDR(phy->attached_sas_addr));
+
+	return;
+}
+
+#define DISCOVER_REQ_SIZE  16
+#define DISCOVER_RESP_SIZE 56
+
+static int sas_ex_phy_discover(struct domain_device *dev, int single)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int  res = 0;
+	u8   *disc_req;
+	u8   *disc_resp;
+
+	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
+	if (!disc_req)
+		return -ENOMEM;
+
+	disc_resp = alloc_smp_req(DISCOVER_RESP_SIZE);
+	if (!disc_resp) {
+		kfree(disc_req);
+		return -ENOMEM;
+	}
+
+	disc_req[1] = SMP_DISCOVER;
+
+	if (0 <= single && single < ex->num_phys) {
+		disc_req[9] = single;
+		res = smp_execute_task(dev, disc_req, DISCOVER_REQ_SIZE,
+				       disc_resp, DISCOVER_RESP_SIZE);
+		if (res)
+			goto out_err;
+		sas_set_ex_phy(dev, single, disc_resp);
+	} else {
+		int i;
+
+		for (i = 0; i < ex->num_phys; i++) {
+			disc_req[9] = i;
+			res = smp_execute_task(dev, disc_req,
+					       DISCOVER_REQ_SIZE, disc_resp,
+					       DISCOVER_RESP_SIZE);
+			if (res)
+				goto out_err;
+			sas_set_ex_phy(dev, i, disc_resp);
+		}
+	}
+out_err:
+	kfree(disc_resp);
+	kfree(disc_req);
+	return res;
+}
+
+static int sas_expander_discover(struct domain_device *dev)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int res = -ENOMEM;
+
+	ex->ex_phy = kzalloc(sizeof(*ex->ex_phy)*ex->num_phys, GFP_KERNEL);
+	if (!ex->ex_phy)
+		return -ENOMEM;
+
+	res = sas_ex_phy_discover(dev, -1);
+	if (res)
+		goto out_err;
+
+	return 0;
+ out_err:
+	kfree(ex->ex_phy);
+	ex->ex_phy = NULL;
+	return res;
+}
+
+#define MAX_EXPANDER_PHYS 128
+
+static void ex_assign_report_general(struct domain_device *dev,
+					    struct smp_resp *resp)
+{
+	struct report_general_resp *rg = &resp->rg;
+
+	dev->ex_dev.ex_change_count = be16_to_cpu(rg->change_count);
+	dev->ex_dev.max_route_indexes = be16_to_cpu(rg->route_indexes);
+	dev->ex_dev.num_phys = min(rg->num_phys, (u8)MAX_EXPANDER_PHYS);
+	dev->ex_dev.conf_route_table = rg->conf_route_table;
+	dev->ex_dev.configuring = rg->configuring;
+	memcpy(dev->ex_dev.enclosure_logical_id, rg->enclosure_logical_id, 8);
+}
+
+#define RG_REQ_SIZE   8
+#define RG_RESP_SIZE 32
+
+static int sas_ex_general(struct domain_device *dev)
+{
+	u8 *rg_req;
+	struct smp_resp *rg_resp;
+	int res;
+	int i;
+
+	rg_req = alloc_smp_req(RG_REQ_SIZE);
+	if (!rg_req)
+		return -ENOMEM;
+
+	rg_resp = alloc_smp_resp(RG_RESP_SIZE);
+	if (!rg_resp) {
+		kfree(rg_req);
+		return -ENOMEM;
+	}
+
+	rg_req[1] = SMP_REPORT_GENERAL;
+
+	for (i = 0; i < 5; i++) {
+		res = smp_execute_task(dev, rg_req, RG_REQ_SIZE, rg_resp,
+				       RG_RESP_SIZE);
+
+		if (res) {
+			SAS_DPRINTK("RG to ex %016llx failed:0x%x\n",
+				    SAS_ADDR(dev->sas_addr), res);
+			goto out;
+		} else if (rg_resp->result != SMP_RESP_FUNC_ACC) {
+			SAS_DPRINTK("RG:ex %016llx returned SMP result:0x%x\n",
+				    SAS_ADDR(dev->sas_addr), rg_resp->result);
+			res = rg_resp->result;
+			goto out;
+		}
+
+		ex_assign_report_general(dev, rg_resp);
+
+		if (dev->ex_dev.configuring) {
+			SAS_DPRINTK("RG: ex %llx self-configuring...\n",
+				    SAS_ADDR(dev->sas_addr));
+			schedule_timeout_interruptible(5*HZ);
+		} else
+			break;
+	}
+out:
+	kfree(rg_req);
+	kfree(rg_resp);
+	return res;
+}
+
+static void ex_assign_manuf_info(struct domain_device *dev, void
+					*_mi_resp)
+{
+	u8 *mi_resp = _mi_resp;
+	struct sas_rphy *rphy = dev->rphy;
+	struct sas_expander_device *edev = rphy_to_expander_device(rphy);
+
+	memcpy(edev->vendor_id, mi_resp + 12, SAS_EXPANDER_VENDOR_ID_LEN);
+	memcpy(edev->product_id, mi_resp + 20, SAS_EXPANDER_PRODUCT_ID_LEN);
+	memcpy(edev->product_rev, mi_resp + 36,
+	       SAS_EXPANDER_PRODUCT_REV_LEN);
+
+	if (mi_resp[8] & 1) {
+		memcpy(edev->component_vendor_id, mi_resp + 40,
+		       SAS_EXPANDER_COMPONENT_VENDOR_ID_LEN);
+		edev->component_id = mi_resp[48] << 8 | mi_resp[49];
+		edev->component_revision_id = mi_resp[50];
+	}
+}
+
+#define MI_REQ_SIZE   8
+#define MI_RESP_SIZE 64
+
+static int sas_ex_manuf_info(struct domain_device *dev)
+{
+	u8 *mi_req;
+	u8 *mi_resp;
+	int res;
+
+	mi_req = alloc_smp_req(MI_REQ_SIZE);
+	if (!mi_req)
+		return -ENOMEM;
+
+	mi_resp = alloc_smp_resp(MI_RESP_SIZE);
+	if (!mi_resp) {
+		kfree(mi_req);
+		return -ENOMEM;
+	}
+
+	mi_req[1] = SMP_REPORT_MANUF_INFO;
+
+	res = smp_execute_task(dev, mi_req, MI_REQ_SIZE, mi_resp,MI_RESP_SIZE);
+	if (res) {
+		SAS_DPRINTK("MI: ex %016llx failed:0x%x\n",
+			    SAS_ADDR(dev->sas_addr), res);
+		goto out;
+	} else if (mi_resp[2] != SMP_RESP_FUNC_ACC) {
+		SAS_DPRINTK("MI ex %016llx returned SMP result:0x%x\n",
+			    SAS_ADDR(dev->sas_addr), mi_resp[2]);
+		goto out;
+	}
+
+	ex_assign_manuf_info(dev, mi_resp);
+out:
+	kfree(mi_req);
+	kfree(mi_resp);
+	return res;
+}
+
+#define PC_REQ_SIZE  44
+#define PC_RESP_SIZE 8
+
+int sas_smp_phy_control(struct domain_device *dev, int phy_id,
+			enum phy_func phy_func)
+{
+	u8 *pc_req;
+	u8 *pc_resp;
+	int res;
+
+	pc_req = alloc_smp_req(PC_REQ_SIZE);
+	if (!pc_req)
+		return -ENOMEM;
+
+	pc_resp = alloc_smp_resp(PC_RESP_SIZE);
+	if (!pc_resp) {
+		kfree(pc_req);
+		return -ENOMEM;
+	}
+
+	pc_req[1] = SMP_PHY_CONTROL;
+	pc_req[9] = phy_id;
+	pc_req[10]= phy_func;
+
+	res = smp_execute_task(dev, pc_req, PC_REQ_SIZE, pc_resp,PC_RESP_SIZE);
+
+	kfree(pc_resp);
+	kfree(pc_req);
+	return res;
+}
+
+static void sas_ex_disable_phy(struct domain_device *dev, int phy_id)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *phy = &ex->ex_phy[phy_id];
+
+	sas_smp_phy_control(dev, phy_id, PHY_FUNC_DISABLE);
+	phy->linkrate = PHY_DISABLED;
+}
+
+static void sas_ex_disable_port(struct domain_device *dev, u8 *sas_addr)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int i;
+
+	for (i = 0; i < ex->num_phys; i++) {
+		struct ex_phy *phy = &ex->ex_phy[i];
+
+		if (phy->phy_state == PHY_VACANT ||
+		    phy->phy_state == PHY_NOT_PRESENT)
+			continue;
+
+		if (SAS_ADDR(phy->attached_sas_addr) == SAS_ADDR(sas_addr))
+			sas_ex_disable_phy(dev, i);
+	}
+}
+
+static int sas_dev_present_in_domain(struct asd_sas_port *port,
+					    u8 *sas_addr)
+{
+	struct domain_device *dev;
+
+	if (SAS_ADDR(port->sas_addr) == SAS_ADDR(sas_addr))
+		return 1;
+	list_for_each_entry(dev, &port->dev_list, dev_list_node) {
+		if (SAS_ADDR(dev->sas_addr) == SAS_ADDR(sas_addr))
+			return 1;
+	}
+	return 0;
+}
+
+#define RPEL_REQ_SIZE	16
+#define RPEL_RESP_SIZE	32
+int sas_smp_get_phy_events(struct sas_phy *phy)
+{
+	int res;
+	struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent);
+	struct domain_device *dev = sas_find_dev_by_rphy(rphy);
+	u8 *req = alloc_smp_req(RPEL_REQ_SIZE);
+	u8 *resp = kzalloc(RPEL_RESP_SIZE, GFP_KERNEL);
+
+	if (!resp)
+		return -ENOMEM;
+
+	req[1] = SMP_REPORT_PHY_ERR_LOG;
+	req[9] = phy->number;
+
+	res = smp_execute_task(dev, req, RPEL_REQ_SIZE,
+			            resp, RPEL_RESP_SIZE);
+
+	if (!res)
+		goto out;
+
+	phy->invalid_dword_count = scsi_to_u32(&resp[12]);
+	phy->running_disparity_error_count = scsi_to_u32(&resp[16]);
+	phy->loss_of_dword_sync_count = scsi_to_u32(&resp[20]);
+	phy->phy_reset_problem_count = scsi_to_u32(&resp[24]);
+
+ out:
+	kfree(resp);
+	return res;
+
+}
+
+#define RPS_REQ_SIZE  16
+#define RPS_RESP_SIZE 60
+
+static int sas_get_report_phy_sata(struct domain_device *dev,
+					  int phy_id,
+					  struct smp_resp *rps_resp)
+{
+	int res;
+	u8 *rps_req = alloc_smp_req(RPS_REQ_SIZE);
+
+	if (!rps_req)
+		return -ENOMEM;
+
+	rps_req[1] = SMP_REPORT_PHY_SATA;
+	rps_req[9] = phy_id;
+
+	res = smp_execute_task(dev, rps_req, RPS_REQ_SIZE,
+			            rps_resp, RPS_RESP_SIZE);
+
+	kfree(rps_req);
+	return 0;
+}
+
+static void sas_ex_get_linkrate(struct domain_device *parent,
+				       struct domain_device *child,
+				       struct ex_phy *parent_phy)
+{
+	struct expander_device *parent_ex = &parent->ex_dev;
+	struct sas_port *port;
+	int i;
+
+	child->pathways = 0;
+
+	port = parent_phy->port;
+
+	for (i = 0; i < parent_ex->num_phys; i++) {
+		struct ex_phy *phy = &parent_ex->ex_phy[i];
+
+		if (phy->phy_state == PHY_VACANT ||
+		    phy->phy_state == PHY_NOT_PRESENT)
+			continue;
+
+		if (SAS_ADDR(phy->attached_sas_addr) ==
+		    SAS_ADDR(child->sas_addr)) {
+
+			child->min_linkrate = min(parent->min_linkrate,
+						  phy->linkrate);
+			child->max_linkrate = max(parent->max_linkrate,
+						  phy->linkrate);
+			child->pathways++;
+			sas_port_add_phy(port, phy->phy);
+		}
+	}
+	child->linkrate = min(parent_phy->linkrate, child->max_linkrate);
+	child->pathways = min(child->pathways, parent->pathways);
+}
+
+static struct domain_device *sas_ex_discover_end_dev(
+	struct domain_device *parent, int phy_id)
+{
+	struct expander_device *parent_ex = &parent->ex_dev;
+	struct ex_phy *phy = &parent_ex->ex_phy[phy_id];
+	struct domain_device *child = NULL;
+	struct sas_rphy *rphy;
+	int res;
+
+	if (phy->attached_sata_host || phy->attached_sata_ps)
+		return NULL;
+
+	child = kzalloc(sizeof(*child), GFP_KERNEL);
+	if (!child)
+		return NULL;
+
+	child->parent = parent;
+	child->port   = parent->port;
+	child->iproto = phy->attached_iproto;
+	memcpy(child->sas_addr, phy->attached_sas_addr, SAS_ADDR_SIZE);
+	sas_hash_addr(child->hashed_sas_addr, child->sas_addr);
+	phy->port = sas_port_alloc(&parent->rphy->dev, phy_id);
+	BUG_ON(!phy->port);
+	/* FIXME: better error handling*/
+	BUG_ON(sas_port_add(phy->port) != 0);
+	sas_ex_get_linkrate(parent, child, phy);
+
+	if ((phy->attached_tproto & SAS_PROTO_STP) || phy->attached_sata_dev) {
+		child->dev_type = SATA_DEV;
+		if (phy->attached_tproto & SAS_PROTO_STP)
+			child->tproto = phy->attached_tproto;
+		if (phy->attached_sata_dev)
+			child->tproto |= SATA_DEV;
+		res = sas_get_report_phy_sata(parent, phy_id,
+					      &child->sata_dev.rps_resp);
+		if (res) {
+			SAS_DPRINTK("report phy sata to %016llx:0x%x returned "
+				    "0x%x\n", SAS_ADDR(parent->sas_addr),
+				    phy_id, res);
+			kfree(child);
+			return NULL;
+		}
+		memcpy(child->frame_rcvd, &child->sata_dev.rps_resp.rps.fis,
+		       sizeof(struct dev_to_host_fis));
+		sas_init_dev(child);
+		res = sas_discover_sata(child);
+		if (res) {
+			SAS_DPRINTK("sas_discover_sata() for device %16llx at "
+				    "%016llx:0x%x returned 0x%x\n",
+				    SAS_ADDR(child->sas_addr),
+				    SAS_ADDR(parent->sas_addr), phy_id, res);
+			kfree(child);
+			return NULL;
+		}
+	} else if (phy->attached_tproto & SAS_PROTO_SSP) {
+		child->dev_type = SAS_END_DEV;
+		rphy = sas_end_device_alloc(phy->port);
+		/* FIXME: error handling */
+		BUG_ON(!rphy);
+		child->tproto = phy->attached_tproto;
+		sas_init_dev(child);
+
+		child->rphy = rphy;
+		sas_fill_in_rphy(child, rphy);
+
+		spin_lock(&parent->port->dev_list_lock);
+		list_add_tail(&child->dev_list_node, &parent->port->dev_list);
+		spin_unlock(&parent->port->dev_list_lock);
+
+		res = sas_discover_end_dev(child);
+		if (res) {
+			SAS_DPRINTK("sas_discover_end_dev() for device %16llx "
+				    "at %016llx:0x%x returned 0x%x\n",
+				    SAS_ADDR(child->sas_addr),
+				    SAS_ADDR(parent->sas_addr), phy_id, res);
+			/* FIXME: this kfrees list elements without removing them */
+			//kfree(child);
+			return NULL;
+		}
+	} else {
+		SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n",
+			    phy->attached_tproto, SAS_ADDR(parent->sas_addr),
+			    phy_id);
+	}
+
+	list_add_tail(&child->siblings, &parent_ex->children);
+	return child;
+}
+
+static struct domain_device *sas_ex_discover_expander(
+	struct domain_device *parent, int phy_id)
+{
+	struct sas_expander_device *parent_ex = rphy_to_expander_device(parent->rphy);
+	struct ex_phy *phy = &parent->ex_dev.ex_phy[phy_id];
+	struct domain_device *child = NULL;
+	struct sas_rphy *rphy;
+	struct sas_expander_device *edev;
+	struct asd_sas_port *port;
+	int res;
+
+	if (phy->routing_attr == DIRECT_ROUTING) {
+		SAS_DPRINTK("ex %016llx:0x%x:D <--> ex %016llx:0x%x is not "
+			    "allowed\n",
+			    SAS_ADDR(parent->sas_addr), phy_id,
+			    SAS_ADDR(phy->attached_sas_addr),
+			    phy->attached_phy_id);
+		return NULL;
+	}
+	child = kzalloc(sizeof(*child), GFP_KERNEL);
+	if (!child)
+		return NULL;
+
+	phy->port = sas_port_alloc(&parent->rphy->dev, phy_id);
+	/* FIXME: better error handling */
+	BUG_ON(sas_port_add(phy->port) != 0);
+
+
+	switch (phy->attached_dev_type) {
+	case EDGE_DEV:
+		rphy = sas_expander_alloc(phy->port,
+					  SAS_EDGE_EXPANDER_DEVICE);
+		break;
+	case FANOUT_DEV:
+		rphy = sas_expander_alloc(phy->port,
+					  SAS_FANOUT_EXPANDER_DEVICE);
+		break;
+	default:
+		rphy = NULL;	/* shut gcc up */
+		BUG();
+	}
+	port = parent->port;
+	child->rphy = rphy;
+	edev = rphy_to_expander_device(rphy);
+	child->dev_type = phy->attached_dev_type;
+	child->parent = parent;
+	child->port = port;
+	child->iproto = phy->attached_iproto;
+	child->tproto = phy->attached_tproto;
+	memcpy(child->sas_addr, phy->attached_sas_addr, SAS_ADDR_SIZE);
+	sas_hash_addr(child->hashed_sas_addr, child->sas_addr);
+	sas_ex_get_linkrate(parent, child, phy);
+	edev->level = parent_ex->level + 1;
+	parent->port->disc.max_level = max(parent->port->disc.max_level,
+					   edev->level);
+	sas_init_dev(child);
+	sas_fill_in_rphy(child, rphy);
+	sas_rphy_add(rphy);
+
+	spin_lock(&parent->port->dev_list_lock);
+	list_add_tail(&child->dev_list_node, &parent->port->dev_list);
+	spin_unlock(&parent->port->dev_list_lock);
+
+	res = sas_discover_expander(child);
+	if (res) {
+		kfree(child);
+		return NULL;
+	}
+	list_add_tail(&child->siblings, &parent->ex_dev.children);
+	return child;
+}
+
+static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *ex_phy = &ex->ex_phy[phy_id];
+	struct domain_device *child = NULL;
+	int res = 0;
+
+	/* Phy state */
+	if (ex_phy->linkrate == PHY_SPINUP_HOLD) {
+		if (!sas_smp_phy_control(dev, phy_id, PHY_FUNC_LINK_RESET))
+			res = sas_ex_phy_discover(dev, phy_id);
+		if (res)
+			return res;
+	}
+
+	/* Parent and domain coherency */
+	if (!dev->parent && (SAS_ADDR(ex_phy->attached_sas_addr) ==
+			     SAS_ADDR(dev->port->sas_addr))) {
+		sas_add_parent_port(dev, phy_id);
+		return 0;
+	}
+	if (dev->parent && (SAS_ADDR(ex_phy->attached_sas_addr) ==
+			    SAS_ADDR(dev->parent->sas_addr))) {
+		sas_add_parent_port(dev, phy_id);
+		if (ex_phy->routing_attr == TABLE_ROUTING)
+			sas_configure_phy(dev, phy_id, dev->port->sas_addr, 1);
+		return 0;
+	}
+
+	if (sas_dev_present_in_domain(dev->port, ex_phy->attached_sas_addr))
+		sas_ex_disable_port(dev, ex_phy->attached_sas_addr);
+
+	if (ex_phy->attached_dev_type == NO_DEVICE) {
+		if (ex_phy->routing_attr == DIRECT_ROUTING) {
+			memset(ex_phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
+			sas_configure_routing(dev, ex_phy->attached_sas_addr);
+		}
+		return 0;
+	} else if (ex_phy->linkrate == PHY_LINKRATE_UNKNOWN)
+		return 0;
+
+	if (ex_phy->attached_dev_type != SAS_END_DEV &&
+	    ex_phy->attached_dev_type != FANOUT_DEV &&
+	    ex_phy->attached_dev_type != EDGE_DEV) {
+		SAS_DPRINTK("unknown device type(0x%x) attached to ex %016llx "
+			    "phy 0x%x\n", ex_phy->attached_dev_type,
+			    SAS_ADDR(dev->sas_addr),
+			    phy_id);
+		return 0;
+	}
+
+	res = sas_configure_routing(dev, ex_phy->attached_sas_addr);
+	if (res) {
+		SAS_DPRINTK("configure routing for dev %016llx "
+			    "reported 0x%x. Forgotten\n",
+			    SAS_ADDR(ex_phy->attached_sas_addr), res);
+		sas_disable_routing(dev, ex_phy->attached_sas_addr);
+		return res;
+	}
+
+	switch (ex_phy->attached_dev_type) {
+	case SAS_END_DEV:
+		child = sas_ex_discover_end_dev(dev, phy_id);
+		break;
+	case FANOUT_DEV:
+		if (SAS_ADDR(dev->port->disc.fanout_sas_addr)) {
+			SAS_DPRINTK("second fanout expander %016llx phy 0x%x "
+				    "attached to ex %016llx phy 0x%x\n",
+				    SAS_ADDR(ex_phy->attached_sas_addr),
+				    ex_phy->attached_phy_id,
+				    SAS_ADDR(dev->sas_addr),
+				    phy_id);
+			sas_ex_disable_phy(dev, phy_id);
+			break;
+		} else
+			memcpy(dev->port->disc.fanout_sas_addr,
+			       ex_phy->attached_sas_addr, SAS_ADDR_SIZE);
+		/* fallthrough */
+	case EDGE_DEV:
+		child = sas_ex_discover_expander(dev, phy_id);
+		break;
+	default:
+		break;
+	}
+
+	if (child) {
+		int i;
+
+		for (i = 0; i < ex->num_phys; i++) {
+			if (ex->ex_phy[i].phy_state == PHY_VACANT ||
+			    ex->ex_phy[i].phy_state == PHY_NOT_PRESENT)
+				continue;
+
+			if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) ==
+			    SAS_ADDR(child->sas_addr))
+				ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED;
+		}
+	}
+
+	return res;
+}
+
+static int sas_find_sub_addr(struct domain_device *dev, u8 *sub_addr)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int i;
+
+	for (i = 0; i < ex->num_phys; i++) {
+		struct ex_phy *phy = &ex->ex_phy[i];
+
+		if (phy->phy_state == PHY_VACANT ||
+		    phy->phy_state == PHY_NOT_PRESENT)
+			continue;
+
+		if ((phy->attached_dev_type == EDGE_DEV ||
+		     phy->attached_dev_type == FANOUT_DEV) &&
+		    phy->routing_attr == SUBTRACTIVE_ROUTING) {
+
+			memcpy(sub_addr, phy->attached_sas_addr,SAS_ADDR_SIZE);
+
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int sas_check_level_subtractive_boundary(struct domain_device *dev)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct domain_device *child;
+	u8 sub_addr[8] = {0, };
+
+	list_for_each_entry(child, &ex->children, siblings) {
+		if (child->dev_type != EDGE_DEV &&
+		    child->dev_type != FANOUT_DEV)
+			continue;
+		if (sub_addr[0] == 0) {
+			sas_find_sub_addr(child, sub_addr);
+			continue;
+		} else {
+			u8 s2[8];
+
+			if (sas_find_sub_addr(child, s2) &&
+			    (SAS_ADDR(sub_addr) != SAS_ADDR(s2))) {
+
+				SAS_DPRINTK("ex %016llx->%016llx-?->%016llx "
+					    "diverges from subtractive "
+					    "boundary %016llx\n",
+					    SAS_ADDR(dev->sas_addr),
+					    SAS_ADDR(child->sas_addr),
+					    SAS_ADDR(s2),
+					    SAS_ADDR(sub_addr));
+
+				sas_ex_disable_port(child, s2);
+			}
+		}
+	}
+	return 0;
+}
+/**
+ * sas_ex_discover_devices -- discover devices attached to this expander
+ * dev: pointer to the expander domain device
+ * single: if you want to do a single phy, else set to -1;
+ *
+ * Configure this expander for use with its devices and register the
+ * devices of this expander.
+ */
+static int sas_ex_discover_devices(struct domain_device *dev, int single)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int i = 0, end = ex->num_phys;
+	int res = 0;
+
+	if (0 <= single && single < end) {
+		i = single;
+		end = i+1;
+	}
+
+	for ( ; i < end; i++) {
+		struct ex_phy *ex_phy = &ex->ex_phy[i];
+
+		if (ex_phy->phy_state == PHY_VACANT ||
+		    ex_phy->phy_state == PHY_NOT_PRESENT ||
+		    ex_phy->phy_state == PHY_DEVICE_DISCOVERED)
+			continue;
+
+		switch (ex_phy->linkrate) {
+		case PHY_DISABLED:
+		case PHY_RESET_PROBLEM:
+		case PHY_PORT_SELECTOR:
+			continue;
+		default:
+			res = sas_ex_discover_dev(dev, i);
+			if (res)
+				break;
+			continue;
+		}
+	}
+
+	if (!res)
+		sas_check_level_subtractive_boundary(dev);
+
+	return res;
+}
+
+static int sas_check_ex_subtractive_boundary(struct domain_device *dev)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int i;
+	u8  *sub_sas_addr = NULL;
+
+	if (dev->dev_type != EDGE_DEV)
+		return 0;
+
+	for (i = 0; i < ex->num_phys; i++) {
+		struct ex_phy *phy = &ex->ex_phy[i];
+
+		if (phy->phy_state == PHY_VACANT ||
+		    phy->phy_state == PHY_NOT_PRESENT)
+			continue;
+
+		if ((phy->attached_dev_type == FANOUT_DEV ||
+		     phy->attached_dev_type == EDGE_DEV) &&
+		    phy->routing_attr == SUBTRACTIVE_ROUTING) {
+
+			if (!sub_sas_addr)
+				sub_sas_addr = &phy->attached_sas_addr[0];
+			else if (SAS_ADDR(sub_sas_addr) !=
+				 SAS_ADDR(phy->attached_sas_addr)) {
+
+				SAS_DPRINTK("ex %016llx phy 0x%x "
+					    "diverges(%016llx) on subtractive "
+					    "boundary(%016llx). Disabled\n",
+					    SAS_ADDR(dev->sas_addr), i,
+					    SAS_ADDR(phy->attached_sas_addr),
+					    SAS_ADDR(sub_sas_addr));
+				sas_ex_disable_phy(dev, i);
+			}
+		}
+	}
+	return 0;
+}
+
+static void sas_print_parent_topology_bug(struct domain_device *child,
+						 struct ex_phy *parent_phy,
+						 struct ex_phy *child_phy)
+{
+	static const char ra_char[] = {
+		[DIRECT_ROUTING] = 'D',
+		[SUBTRACTIVE_ROUTING] = 'S',
+		[TABLE_ROUTING] = 'T',
+	};
+	static const char *ex_type[] = {
+		[EDGE_DEV] = "edge",
+		[FANOUT_DEV] = "fanout",
+	};
+	struct domain_device *parent = child->parent;
+
+	sas_printk("%s ex %016llx phy 0x%x <--> %s ex %016llx phy 0x%x "
+		   "has %c:%c routing link!\n",
+
+		   ex_type[parent->dev_type],
+		   SAS_ADDR(parent->sas_addr),
+		   parent_phy->phy_id,
+
+		   ex_type[child->dev_type],
+		   SAS_ADDR(child->sas_addr),
+		   child_phy->phy_id,
+
+		   ra_char[parent_phy->routing_attr],
+		   ra_char[child_phy->routing_attr]);
+}
+
+static int sas_check_eeds(struct domain_device *child,
+				 struct ex_phy *parent_phy,
+				 struct ex_phy *child_phy)
+{
+	int res = 0;
+	struct domain_device *parent = child->parent;
+
+	if (SAS_ADDR(parent->port->disc.fanout_sas_addr) != 0) {
+		res = -ENODEV;
+		SAS_DPRINTK("edge ex %016llx phy S:0x%x <--> edge ex %016llx "
+			    "phy S:0x%x, while there is a fanout ex %016llx\n",
+			    SAS_ADDR(parent->sas_addr),
+			    parent_phy->phy_id,
+			    SAS_ADDR(child->sas_addr),
+			    child_phy->phy_id,
+			    SAS_ADDR(parent->port->disc.fanout_sas_addr));
+	} else if (SAS_ADDR(parent->port->disc.eeds_a) == 0) {
+		memcpy(parent->port->disc.eeds_a, parent->sas_addr,
+		       SAS_ADDR_SIZE);
+		memcpy(parent->port->disc.eeds_b, child->sas_addr,
+		       SAS_ADDR_SIZE);
+	} else if (((SAS_ADDR(parent->port->disc.eeds_a) ==
+		    SAS_ADDR(parent->sas_addr)) ||
+		   (SAS_ADDR(parent->port->disc.eeds_a) ==
+		    SAS_ADDR(child->sas_addr)))
+		   &&
+		   ((SAS_ADDR(parent->port->disc.eeds_b) ==
+		     SAS_ADDR(parent->sas_addr)) ||
+		    (SAS_ADDR(parent->port->disc.eeds_b) ==
+		     SAS_ADDR(child->sas_addr))))
+		;
+	else {
+		res = -ENODEV;
+		SAS_DPRINTK("edge ex %016llx phy 0x%x <--> edge ex %016llx "
+			    "phy 0x%x link forms a third EEDS!\n",
+			    SAS_ADDR(parent->sas_addr),
+			    parent_phy->phy_id,
+			    SAS_ADDR(child->sas_addr),
+			    child_phy->phy_id);
+	}
+
+	return res;
+}
+
+/* Here we spill over 80 columns.  It is intentional.
+ */
+static int sas_check_parent_topology(struct domain_device *child)
+{
+	struct expander_device *child_ex = &child->ex_dev;
+	struct expander_device *parent_ex;
+	int i;
+	int res = 0;
+
+	if (!child->parent)
+		return 0;
+
+	if (child->parent->dev_type != EDGE_DEV &&
+	    child->parent->dev_type != FANOUT_DEV)
+		return 0;
+
+	parent_ex = &child->parent->ex_dev;
+
+	for (i = 0; i < parent_ex->num_phys; i++) {
+		struct ex_phy *parent_phy = &parent_ex->ex_phy[i];
+		struct ex_phy *child_phy;
+
+		if (parent_phy->phy_state == PHY_VACANT ||
+		    parent_phy->phy_state == PHY_NOT_PRESENT)
+			continue;
+
+		if (SAS_ADDR(parent_phy->attached_sas_addr) != SAS_ADDR(child->sas_addr))
+			continue;
+
+		child_phy = &child_ex->ex_phy[parent_phy->attached_phy_id];
+
+		switch (child->parent->dev_type) {
+		case EDGE_DEV:
+			if (child->dev_type == FANOUT_DEV) {
+				if (parent_phy->routing_attr != SUBTRACTIVE_ROUTING ||
+				    child_phy->routing_attr != TABLE_ROUTING) {
+					sas_print_parent_topology_bug(child, parent_phy, child_phy);
+					res = -ENODEV;
+				}
+			} else if (parent_phy->routing_attr == SUBTRACTIVE_ROUTING) {
+				if (child_phy->routing_attr == SUBTRACTIVE_ROUTING) {
+					res = sas_check_eeds(child, parent_phy, child_phy);
+				} else if (child_phy->routing_attr != TABLE_ROUTING) {
+					sas_print_parent_topology_bug(child, parent_phy, child_phy);
+					res = -ENODEV;
+				}
+			} else if (parent_phy->routing_attr == TABLE_ROUTING &&
+				   child_phy->routing_attr != SUBTRACTIVE_ROUTING) {
+				sas_print_parent_topology_bug(child, parent_phy, child_phy);
+				res = -ENODEV;
+			}
+			break;
+		case FANOUT_DEV:
+			if (parent_phy->routing_attr != TABLE_ROUTING ||
+			    child_phy->routing_attr != SUBTRACTIVE_ROUTING) {
+				sas_print_parent_topology_bug(child, parent_phy, child_phy);
+				res = -ENODEV;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	return res;
+}
+
+#define RRI_REQ_SIZE  16
+#define RRI_RESP_SIZE 44
+
+static int sas_configure_present(struct domain_device *dev, int phy_id,
+				 u8 *sas_addr, int *index, int *present)
+{
+	int i, res = 0;
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *phy = &ex->ex_phy[phy_id];
+	u8 *rri_req;
+	u8 *rri_resp;
+
+	*present = 0;
+	*index = 0;
+
+	rri_req = alloc_smp_req(RRI_REQ_SIZE);
+	if (!rri_req)
+		return -ENOMEM;
+
+	rri_resp = alloc_smp_resp(RRI_RESP_SIZE);
+	if (!rri_resp) {
+		kfree(rri_req);
+		return -ENOMEM;
+	}
+
+	rri_req[1] = SMP_REPORT_ROUTE_INFO;
+	rri_req[9] = phy_id;
+
+	for (i = 0; i < ex->max_route_indexes ; i++) {
+		*(__be16 *)(rri_req+6) = cpu_to_be16(i);
+		res = smp_execute_task(dev, rri_req, RRI_REQ_SIZE, rri_resp,
+				       RRI_RESP_SIZE);
+		if (res)
+			goto out;
+		res = rri_resp[2];
+		if (res == SMP_RESP_NO_INDEX) {
+			SAS_DPRINTK("overflow of indexes: dev %016llx "
+				    "phy 0x%x index 0x%x\n",
+				    SAS_ADDR(dev->sas_addr), phy_id, i);
+			goto out;
+		} else if (res != SMP_RESP_FUNC_ACC) {
+			SAS_DPRINTK("%s: dev %016llx phy 0x%x index 0x%x "
+				    "result 0x%x\n", __FUNCTION__,
+				    SAS_ADDR(dev->sas_addr), phy_id, i, res);
+			goto out;
+		}
+		if (SAS_ADDR(sas_addr) != 0) {
+			if (SAS_ADDR(rri_resp+16) == SAS_ADDR(sas_addr)) {
+				*index = i;
+				if ((rri_resp[12] & 0x80) == 0x80)
+					*present = 0;
+				else
+					*present = 1;
+				goto out;
+			} else if (SAS_ADDR(rri_resp+16) == 0) {
+				*index = i;
+				*present = 0;
+				goto out;
+			}
+		} else if (SAS_ADDR(rri_resp+16) == 0 &&
+			   phy->last_da_index < i) {
+			phy->last_da_index = i;
+			*index = i;
+			*present = 0;
+			goto out;
+		}
+	}
+	res = -1;
+out:
+	kfree(rri_req);
+	kfree(rri_resp);
+	return res;
+}
+
+#define CRI_REQ_SIZE  44
+#define CRI_RESP_SIZE  8
+
+static int sas_configure_set(struct domain_device *dev, int phy_id,
+			     u8 *sas_addr, int index, int include)
+{
+	int res;
+	u8 *cri_req;
+	u8 *cri_resp;
+
+	cri_req = alloc_smp_req(CRI_REQ_SIZE);
+	if (!cri_req)
+		return -ENOMEM;
+
+	cri_resp = alloc_smp_resp(CRI_RESP_SIZE);
+	if (!cri_resp) {
+		kfree(cri_req);
+		return -ENOMEM;
+	}
+
+	cri_req[1] = SMP_CONF_ROUTE_INFO;
+	*(__be16 *)(cri_req+6) = cpu_to_be16(index);
+	cri_req[9] = phy_id;
+	if (SAS_ADDR(sas_addr) == 0 || !include)
+		cri_req[12] |= 0x80;
+	memcpy(cri_req+16, sas_addr, SAS_ADDR_SIZE);
+
+	res = smp_execute_task(dev, cri_req, CRI_REQ_SIZE, cri_resp,
+			       CRI_RESP_SIZE);
+	if (res)
+		goto out;
+	res = cri_resp[2];
+	if (res == SMP_RESP_NO_INDEX) {
+		SAS_DPRINTK("overflow of indexes: dev %016llx phy 0x%x "
+			    "index 0x%x\n",
+			    SAS_ADDR(dev->sas_addr), phy_id, index);
+	}
+out:
+	kfree(cri_req);
+	kfree(cri_resp);
+	return res;
+}
+
+static int sas_configure_phy(struct domain_device *dev, int phy_id,
+				    u8 *sas_addr, int include)
+{
+	int index;
+	int present;
+	int res;
+
+	res = sas_configure_present(dev, phy_id, sas_addr, &index, &present);
+	if (res)
+		return res;
+	if (include ^ present)
+		return sas_configure_set(dev, phy_id, sas_addr, index,include);
+
+	return res;
+}
+
+/**
+ * sas_configure_parent -- configure routing table of parent
+ * parent: parent expander
+ * child: child expander
+ * sas_addr: SAS port identifier of device directly attached to child
+ */
+static int sas_configure_parent(struct domain_device *parent,
+				struct domain_device *child,
+				u8 *sas_addr, int include)
+{
+	struct expander_device *ex_parent = &parent->ex_dev;
+	int res = 0;
+	int i;
+
+	if (parent->parent) {
+		res = sas_configure_parent(parent->parent, parent, sas_addr,
+					   include);
+		if (res)
+			return res;
+	}
+
+	if (ex_parent->conf_route_table == 0) {
+		SAS_DPRINTK("ex %016llx has self-configuring routing table\n",
+			    SAS_ADDR(parent->sas_addr));
+		return 0;
+	}
+
+	for (i = 0; i < ex_parent->num_phys; i++) {
+		struct ex_phy *phy = &ex_parent->ex_phy[i];
+
+		if ((phy->routing_attr == TABLE_ROUTING) &&
+		    (SAS_ADDR(phy->attached_sas_addr) ==
+		     SAS_ADDR(child->sas_addr))) {
+			res = sas_configure_phy(parent, i, sas_addr, include);
+			if (res)
+				return res;
+		}
+	}
+
+	return res;
+}
+
+/**
+ * sas_configure_routing -- configure routing
+ * dev: expander device
+ * sas_addr: port identifier of device directly attached to the expander device
+ */
+static int sas_configure_routing(struct domain_device *dev, u8 *sas_addr)
+{
+	if (dev->parent)
+		return sas_configure_parent(dev->parent, dev, sas_addr, 1);
+	return 0;
+}
+
+static int sas_disable_routing(struct domain_device *dev,  u8 *sas_addr)
+{
+	if (dev->parent)
+		return sas_configure_parent(dev->parent, dev, sas_addr, 0);
+	return 0;
+}
+
+#if 0
+#define SMP_BIN_ATTR_NAME "smp_portal"
+
+static void sas_ex_smp_hook(struct domain_device *dev)
+{
+	struct expander_device *ex_dev = &dev->ex_dev;
+	struct bin_attribute *bin_attr = &ex_dev->smp_bin_attr;
+
+	memset(bin_attr, 0, sizeof(*bin_attr));
+
+	bin_attr->attr.name = SMP_BIN_ATTR_NAME;
+	bin_attr->attr.owner = THIS_MODULE;
+	bin_attr->attr.mode = 0600;
+
+	bin_attr->size = 0;
+	bin_attr->private = NULL;
+	bin_attr->read = smp_portal_read;
+	bin_attr->write= smp_portal_write;
+	bin_attr->mmap = NULL;
+
+	ex_dev->smp_portal_pid = -1;
+	init_MUTEX(&ex_dev->smp_sema);
+}
+#endif
+
+/**
+ * sas_discover_expander -- expander discovery
+ * @ex: pointer to expander domain device
+ *
+ * See comment in sas_discover_sata().
+ */
+static int sas_discover_expander(struct domain_device *dev)
+{
+	int res;
+
+	res = sas_notify_lldd_dev_found(dev);
+	if (res)
+		return res;
+
+	res = sas_ex_general(dev);
+	if (res)
+		goto out_err;
+	res = sas_ex_manuf_info(dev);
+	if (res)
+		goto out_err;
+
+	res = sas_expander_discover(dev);
+	if (res) {
+		SAS_DPRINTK("expander %016llx discovery failed(0x%x)\n",
+			    SAS_ADDR(dev->sas_addr), res);
+		goto out_err;
+	}
+
+	sas_check_ex_subtractive_boundary(dev);
+	res = sas_check_parent_topology(dev);
+	if (res)
+		goto out_err;
+	return 0;
+out_err:
+	sas_notify_lldd_dev_gone(dev);
+	return res;
+}
+
+static int sas_ex_level_discovery(struct asd_sas_port *port, const int level)
+{
+	int res = 0;
+	struct domain_device *dev;
+
+	list_for_each_entry(dev, &port->dev_list, dev_list_node) {
+		if (dev->dev_type == EDGE_DEV ||
+		    dev->dev_type == FANOUT_DEV) {
+			struct sas_expander_device *ex =
+				rphy_to_expander_device(dev->rphy);
+
+			if (level == ex->level)
+				res = sas_ex_discover_devices(dev, -1);
+			else if (level > 0)
+				res = sas_ex_discover_devices(port->port_dev, -1);
+
+		}
+	}
+
+	return res;
+}
+
+static int sas_ex_bfs_disc(struct asd_sas_port *port)
+{
+	int res;
+	int level;
+
+	do {
+		level = port->disc.max_level;
+		res = sas_ex_level_discovery(port, level);
+		mb();
+	} while (level < port->disc.max_level);
+
+	return res;
+}
+
+int sas_discover_root_expander(struct domain_device *dev)
+{
+	int res;
+	struct sas_expander_device *ex = rphy_to_expander_device(dev->rphy);
+
+	sas_rphy_add(dev->rphy);
+
+	ex->level = dev->port->disc.max_level; /* 0 */
+	res = sas_discover_expander(dev);
+	if (!res)
+		sas_ex_bfs_disc(dev->port);
+
+	return res;
+}
+
+/* ---------- Domain revalidation ---------- */
+
+static int sas_get_phy_discover(struct domain_device *dev,
+				int phy_id, struct smp_resp *disc_resp)
+{
+	int res;
+	u8 *disc_req;
+
+	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
+	if (!disc_req)
+		return -ENOMEM;
+
+	disc_req[1] = SMP_DISCOVER;
+	disc_req[9] = phy_id;
+
+	res = smp_execute_task(dev, disc_req, DISCOVER_REQ_SIZE,
+			       disc_resp, DISCOVER_RESP_SIZE);
+	if (res)
+		goto out;
+	else if (disc_resp->result != SMP_RESP_FUNC_ACC) {
+		res = disc_resp->result;
+		goto out;
+	}
+out:
+	kfree(disc_req);
+	return res;
+}
+
+static int sas_get_phy_change_count(struct domain_device *dev,
+				    int phy_id, int *pcc)
+{
+	int res;
+	struct smp_resp *disc_resp;
+
+	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+	if (!disc_resp)
+		return -ENOMEM;
+
+	res = sas_get_phy_discover(dev, phy_id, disc_resp);
+	if (!res)
+		*pcc = disc_resp->disc.change_count;
+
+	kfree(disc_resp);
+	return res;
+}
+
+static int sas_get_phy_attached_sas_addr(struct domain_device *dev,
+					 int phy_id, u8 *attached_sas_addr)
+{
+	int res;
+	struct smp_resp *disc_resp;
+	struct discover_resp *dr;
+
+	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+	if (!disc_resp)
+		return -ENOMEM;
+	dr = &disc_resp->disc;
+
+	res = sas_get_phy_discover(dev, phy_id, disc_resp);
+	if (!res) {
+		memcpy(attached_sas_addr,disc_resp->disc.attached_sas_addr,8);
+		if (dr->attached_dev_type == 0)
+			memset(attached_sas_addr, 0, 8);
+	}
+	kfree(disc_resp);
+	return res;
+}
+
+static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id,
+			      int from_phy)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int res = 0;
+	int i;
+
+	for (i = from_phy; i < ex->num_phys; i++) {
+		int phy_change_count = 0;
+
+		res = sas_get_phy_change_count(dev, i, &phy_change_count);
+		if (res)
+			goto out;
+		else if (phy_change_count != ex->ex_phy[i].phy_change_count) {
+			ex->ex_phy[i].phy_change_count = phy_change_count;
+			*phy_id = i;
+			return 0;
+		}
+	}
+out:
+	return res;
+}
+
+static int sas_get_ex_change_count(struct domain_device *dev, int *ecc)
+{
+	int res;
+	u8  *rg_req;
+	struct smp_resp  *rg_resp;
+
+	rg_req = alloc_smp_req(RG_REQ_SIZE);
+	if (!rg_req)
+		return -ENOMEM;
+
+	rg_resp = alloc_smp_resp(RG_RESP_SIZE);
+	if (!rg_resp) {
+		kfree(rg_req);
+		return -ENOMEM;
+	}
+
+	rg_req[1] = SMP_REPORT_GENERAL;
+
+	res = smp_execute_task(dev, rg_req, RG_REQ_SIZE, rg_resp,
+			       RG_RESP_SIZE);
+	if (res)
+		goto out;
+	if (rg_resp->result != SMP_RESP_FUNC_ACC) {
+		res = rg_resp->result;
+		goto out;
+	}
+
+	*ecc = be16_to_cpu(rg_resp->rg.change_count);
+out:
+	kfree(rg_resp);
+	kfree(rg_req);
+	return res;
+}
+
+static int sas_find_bcast_dev(struct domain_device *dev,
+			      struct domain_device **src_dev)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int ex_change_count = -1;
+	int res;
+
+	res = sas_get_ex_change_count(dev, &ex_change_count);
+	if (res)
+		goto out;
+	if (ex_change_count != -1 &&
+	    ex_change_count != ex->ex_change_count) {
+		*src_dev = dev;
+		ex->ex_change_count = ex_change_count;
+	} else {
+		struct domain_device *ch;
+
+		list_for_each_entry(ch, &ex->children, siblings) {
+			if (ch->dev_type == EDGE_DEV ||
+			    ch->dev_type == FANOUT_DEV) {
+				res = sas_find_bcast_dev(ch, src_dev);
+				if (src_dev)
+					return res;
+			}
+		}
+	}
+out:
+	return res;
+}
+
+static void sas_unregister_ex_tree(struct domain_device *dev)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct domain_device *child, *n;
+
+	list_for_each_entry_safe(child, n, &ex->children, siblings) {
+		if (child->dev_type == EDGE_DEV ||
+		    child->dev_type == FANOUT_DEV)
+			sas_unregister_ex_tree(child);
+		else
+			sas_unregister_dev(child);
+	}
+	sas_unregister_dev(dev);
+}
+
+static void sas_unregister_devs_sas_addr(struct domain_device *parent,
+					 int phy_id)
+{
+	struct expander_device *ex_dev = &parent->ex_dev;
+	struct ex_phy *phy = &ex_dev->ex_phy[phy_id];
+	struct domain_device *child, *n;
+
+	list_for_each_entry_safe(child, n, &ex_dev->children, siblings) {
+		if (SAS_ADDR(child->sas_addr) ==
+		    SAS_ADDR(phy->attached_sas_addr)) {
+			if (child->dev_type == EDGE_DEV ||
+			    child->dev_type == FANOUT_DEV)
+				sas_unregister_ex_tree(child);
+			else
+				sas_unregister_dev(child);
+			break;
+		}
+	}
+	sas_disable_routing(parent, phy->attached_sas_addr);
+	memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
+	sas_port_delete_phy(phy->port, phy->phy);
+	if (phy->port->num_phys == 0)
+		sas_port_delete(phy->port);
+	phy->port = NULL;
+}
+
+static int sas_discover_bfs_by_root_level(struct domain_device *root,
+					  const int level)
+{
+	struct expander_device *ex_root = &root->ex_dev;
+	struct domain_device *child;
+	int res = 0;
+
+	list_for_each_entry(child, &ex_root->children, siblings) {
+		if (child->dev_type == EDGE_DEV ||
+		    child->dev_type == FANOUT_DEV) {
+			struct sas_expander_device *ex =
+				rphy_to_expander_device(child->rphy);
+
+			if (level > ex->level)
+				res = sas_discover_bfs_by_root_level(child,
+								     level);
+			else if (level == ex->level)
+				res = sas_ex_discover_devices(child, -1);
+		}
+	}
+	return res;
+}
+
+static int sas_discover_bfs_by_root(struct domain_device *dev)
+{
+	int res;
+	struct sas_expander_device *ex = rphy_to_expander_device(dev->rphy);
+	int level = ex->level+1;
+
+	res = sas_ex_discover_devices(dev, -1);
+	if (res)
+		goto out;
+	do {
+		res = sas_discover_bfs_by_root_level(dev, level);
+		mb();
+		level += 1;
+	} while (level <= dev->port->disc.max_level);
+out:
+	return res;
+}
+
+static int sas_discover_new(struct domain_device *dev, int phy_id)
+{
+	struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id];
+	struct domain_device *child;
+	int res;
+
+	SAS_DPRINTK("ex %016llx phy%d new device attached\n",
+		    SAS_ADDR(dev->sas_addr), phy_id);
+	res = sas_ex_phy_discover(dev, phy_id);
+	if (res)
+		goto out;
+	res = sas_ex_discover_devices(dev, phy_id);
+	if (res)
+		goto out;
+	list_for_each_entry(child, &dev->ex_dev.children, siblings) {
+		if (SAS_ADDR(child->sas_addr) ==
+		    SAS_ADDR(ex_phy->attached_sas_addr)) {
+			if (child->dev_type == EDGE_DEV ||
+			    child->dev_type == FANOUT_DEV)
+				res = sas_discover_bfs_by_root(child);
+			break;
+		}
+	}
+out:
+	return res;
+}
+
+static int sas_rediscover_dev(struct domain_device *dev, int phy_id)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *phy = &ex->ex_phy[phy_id];
+	u8 attached_sas_addr[8];
+	int res;
+
+	res = sas_get_phy_attached_sas_addr(dev, phy_id, attached_sas_addr);
+	switch (res) {
+	case SMP_RESP_NO_PHY:
+		phy->phy_state = PHY_NOT_PRESENT;
+		sas_unregister_devs_sas_addr(dev, phy_id);
+		goto out; break;
+	case SMP_RESP_PHY_VACANT:
+		phy->phy_state = PHY_VACANT;
+		sas_unregister_devs_sas_addr(dev, phy_id);
+		goto out; break;
+	case SMP_RESP_FUNC_ACC:
+		break;
+	}
+
+	if (SAS_ADDR(attached_sas_addr) == 0) {
+		phy->phy_state = PHY_EMPTY;
+		sas_unregister_devs_sas_addr(dev, phy_id);
+	} else if (SAS_ADDR(attached_sas_addr) ==
+		   SAS_ADDR(phy->attached_sas_addr)) {
+		SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter\n",
+			    SAS_ADDR(dev->sas_addr), phy_id);
+	} else
+		res = sas_discover_new(dev, phy_id);
+out:
+	return res;
+}
+
+static int sas_rediscover(struct domain_device *dev, const int phy_id)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *changed_phy = &ex->ex_phy[phy_id];
+	int res = 0;
+	int i;
+
+	SAS_DPRINTK("ex %016llx phy%d originated BROADCAST(CHANGE)\n",
+		    SAS_ADDR(dev->sas_addr), phy_id);
+
+	if (SAS_ADDR(changed_phy->attached_sas_addr) != 0) {
+		for (i = 0; i < ex->num_phys; i++) {
+			struct ex_phy *phy = &ex->ex_phy[i];
+
+			if (i == phy_id)
+				continue;
+			if (SAS_ADDR(phy->attached_sas_addr) ==
+			    SAS_ADDR(changed_phy->attached_sas_addr)) {
+				SAS_DPRINTK("phy%d part of wide port with "
+					    "phy%d\n", phy_id, i);
+				goto out;
+			}
+		}
+		res = sas_rediscover_dev(dev, phy_id);
+	} else
+		res = sas_discover_new(dev, phy_id);
+out:
+	return res;
+}
+
+/**
+ * sas_revalidate_domain -- revalidate the domain
+ * @port: port to the domain of interest
+ *
+ * NOTE: this process _must_ quit (return) as soon as any connection
+ * errors are encountered.  Connection recovery is done elsewhere.
+ * Discover process only interrogates devices in order to discover the
+ * domain.
+ */
+int sas_ex_revalidate_domain(struct domain_device *port_dev)
+{
+	int res;
+	struct domain_device *dev = NULL;
+
+	res = sas_find_bcast_dev(port_dev, &dev);
+	if (res)
+		goto out;
+	if (dev) {
+		struct expander_device *ex = &dev->ex_dev;
+		int i = 0, phy_id;
+
+		do {
+			phy_id = -1;
+			res = sas_find_bcast_phy(dev, &phy_id, i);
+			if (phy_id == -1)
+				break;
+			res = sas_rediscover(dev, phy_id);
+			i = phy_id + 1;
+		} while (i < ex->num_phys);
+	}
+out:
+	return res;
+}
+
+#if 0
+/* ---------- SMP portal ---------- */
+
+static ssize_t smp_portal_write(struct kobject *kobj, char *buf, loff_t offs,
+				size_t size)
+{
+	struct domain_device *dev = to_dom_device(kobj);
+	struct expander_device *ex = &dev->ex_dev;
+
+	if (offs != 0)
+		return -EFBIG;
+	else if (size == 0)
+		return 0;
+
+	down_interruptible(&ex->smp_sema);
+	if (ex->smp_req)
+		kfree(ex->smp_req);
+	ex->smp_req = kzalloc(size, GFP_USER);
+	if (!ex->smp_req) {
+		up(&ex->smp_sema);
+		return -ENOMEM;
+	}
+	memcpy(ex->smp_req, buf, size);
+	ex->smp_req_size = size;
+	ex->smp_portal_pid = current->pid;
+	up(&ex->smp_sema);
+
+	return size;
+}
+
+static ssize_t smp_portal_read(struct kobject *kobj, char *buf, loff_t offs,
+			       size_t size)
+{
+	struct domain_device *dev = to_dom_device(kobj);
+	struct expander_device *ex = &dev->ex_dev;
+	u8 *smp_resp;
+	int res = -EINVAL;
+
+	/* XXX: sysfs gives us an offset of 0x10 or 0x8 while in fact
+	 *  it should be 0.
+	 */
+
+	down_interruptible(&ex->smp_sema);
+	if (!ex->smp_req || ex->smp_portal_pid != current->pid)
+		goto out;
+
+	res = 0;
+	if (size == 0)
+		goto out;
+
+	res = -ENOMEM;
+	smp_resp = alloc_smp_resp(size);
+	if (!smp_resp)
+		goto out;
+	res = smp_execute_task(dev, ex->smp_req, ex->smp_req_size,
+			       smp_resp, size);
+	if (!res) {
+		memcpy(buf, smp_resp, size);
+		res = size;
+	}
+
+	kfree(smp_resp);
+out:
+	kfree(ex->smp_req);
+	ex->smp_req = NULL;
+	ex->smp_req_size = 0;
+	ex->smp_portal_pid = -1;
+	up(&ex->smp_sema);
+	return res;
+}
+#endif
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
new file mode 100644
index 0000000000000..b961664b81060
--- /dev/null
+++ b/drivers/scsi/libsas/sas_init.c
@@ -0,0 +1,227 @@
+/*
+ * Serial Attached SCSI (SAS) Transport Layer initialization
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+
+#include "sas_internal.h"
+
+#include "../scsi_sas_internal.h"
+
+kmem_cache_t *sas_task_cache;
+
+/*------------ SAS addr hash -----------*/
+void sas_hash_addr(u8 *hashed, const u8 *sas_addr)
+{
+        const u32 poly = 0x00DB2777;
+        u32     r = 0;
+        int     i;
+
+        for (i = 0; i < 8; i++) {
+                int b;
+                for (b = 7; b >= 0; b--) {
+                        r <<= 1;
+                        if ((1 << b) & sas_addr[i]) {
+                                if (!(r & 0x01000000))
+                                        r ^= poly;
+                        } else if (r & 0x01000000)
+                                r ^= poly;
+                }
+        }
+
+        hashed[0] = (r >> 16) & 0xFF;
+        hashed[1] = (r >> 8) & 0xFF ;
+        hashed[2] = r & 0xFF;
+}
+
+
+/* ---------- HA events ---------- */
+
+void sas_hae_reset(void *data)
+{
+	struct sas_ha_struct *ha = data;
+
+	sas_begin_event(HAE_RESET, &ha->event_lock,
+			&ha->pending);
+}
+
+int sas_register_ha(struct sas_ha_struct *sas_ha)
+{
+	int error = 0;
+
+	spin_lock_init(&sas_ha->phy_port_lock);
+	sas_hash_addr(sas_ha->hashed_sas_addr, sas_ha->sas_addr);
+
+	if (sas_ha->lldd_queue_size == 0)
+		sas_ha->lldd_queue_size = 1;
+	else if (sas_ha->lldd_queue_size == -1)
+		sas_ha->lldd_queue_size = 128; /* Sanity */
+
+	error = sas_register_phys(sas_ha);
+	if (error) {
+		printk(KERN_NOTICE "couldn't register sas phys:%d\n", error);
+		return error;
+	}
+
+	error = sas_register_ports(sas_ha);
+	if (error) {
+		printk(KERN_NOTICE "couldn't register sas ports:%d\n", error);
+		goto Undo_phys;
+	}
+
+	error = sas_init_events(sas_ha);
+	if (error) {
+		printk(KERN_NOTICE "couldn't start event thread:%d\n", error);
+		goto Undo_ports;
+	}
+
+	if (sas_ha->lldd_max_execute_num > 1) {
+		error = sas_init_queue(sas_ha);
+		if (error) {
+			printk(KERN_NOTICE "couldn't start queue thread:%d, "
+			       "running in direct mode\n", error);
+			sas_ha->lldd_max_execute_num = 1;
+		}
+	}
+
+	return 0;
+
+Undo_ports:
+	sas_unregister_ports(sas_ha);
+Undo_phys:
+
+	return error;
+}
+
+int sas_unregister_ha(struct sas_ha_struct *sas_ha)
+{
+	if (sas_ha->lldd_max_execute_num > 1) {
+		sas_shutdown_queue(sas_ha);
+	}
+
+	sas_unregister_ports(sas_ha);
+
+	return 0;
+}
+
+static int sas_get_linkerrors(struct sas_phy *phy)
+{
+	if (scsi_is_sas_phy_local(phy))
+		/* FIXME: we have no local phy stats
+		 * gathering at this time */
+		return -EINVAL;
+
+	return sas_smp_get_phy_events(phy);
+}
+
+static int sas_phy_reset(struct sas_phy *phy, int hard_reset)
+{
+	int ret;
+	enum phy_func reset_type;
+
+	if (hard_reset)
+		reset_type = PHY_FUNC_HARD_RESET;
+	else
+		reset_type = PHY_FUNC_LINK_RESET;
+
+	if (scsi_is_sas_phy_local(phy)) {
+		struct Scsi_Host *shost = dev_to_shost(phy->dev.parent);
+		struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
+		struct asd_sas_phy *asd_phy = sas_ha->sas_phy[phy->number];
+		struct sas_internal *i =
+			to_sas_internal(sas_ha->core.shost->transportt);
+
+		ret = i->dft->lldd_control_phy(asd_phy, reset_type);
+	} else {
+		struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent);
+		struct domain_device *ddev = sas_find_dev_by_rphy(rphy);
+		ret = sas_smp_phy_control(ddev, phy->number, reset_type);
+	}
+	return ret;
+}
+
+static struct sas_function_template sft = {
+	.phy_reset = sas_phy_reset,
+	.get_linkerrors = sas_get_linkerrors,
+};
+
+struct scsi_transport_template *
+sas_domain_attach_transport(struct sas_domain_function_template *dft)
+{
+	struct scsi_transport_template *stt = sas_attach_transport(&sft);
+	struct sas_internal *i;
+
+	if (!stt)
+		return stt;
+
+	i = to_sas_internal(stt);
+	i->dft = dft;
+	stt->create_work_queue = 1;
+	stt->eh_timed_out = sas_scsi_timed_out;
+	stt->eh_strategy_handler = sas_scsi_recover_host;
+
+	return stt;
+}
+EXPORT_SYMBOL_GPL(sas_domain_attach_transport);
+
+
+void sas_domain_release_transport(struct scsi_transport_template *stt)
+{
+	sas_release_transport(stt);
+}
+EXPORT_SYMBOL_GPL(sas_domain_release_transport);
+
+/* ---------- SAS Class register/unregister ---------- */
+
+static int __init sas_class_init(void)
+{
+	sas_task_cache = kmem_cache_create("sas_task", sizeof(struct sas_task),
+					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!sas_task_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __exit sas_class_exit(void)
+{
+	kmem_cache_destroy(sas_task_cache);
+}
+
+MODULE_AUTHOR("Luben Tuikov <luben_tuikov@adaptec.com>");
+MODULE_DESCRIPTION("SAS Transport Layer");
+MODULE_LICENSE("GPL v2");
+
+module_init(sas_class_init);
+module_exit(sas_class_exit);
+
+EXPORT_SYMBOL_GPL(sas_register_ha);
+EXPORT_SYMBOL_GPL(sas_unregister_ha);
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
new file mode 100644
index 0000000000000..89c3976808466
--- /dev/null
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -0,0 +1,146 @@
+/*
+ * Serial Attached SCSI (SAS) class internal header file
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#ifndef _SAS_INTERNAL_H_
+#define _SAS_INTERNAL_H_
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_transport_sas.h>
+#include <scsi/libsas.h>
+
+#define sas_printk(fmt, ...) printk(KERN_NOTICE "sas: " fmt, ## __VA_ARGS__)
+
+#ifdef SAS_DEBUG
+#define SAS_DPRINTK(fmt, ...) printk(KERN_NOTICE "sas: " fmt, ## __VA_ARGS__)
+#else
+#define SAS_DPRINTK(fmt, ...)
+#endif
+
+void sas_scsi_recover_host(struct Scsi_Host *shost);
+
+int sas_show_class(enum sas_class class, char *buf);
+int sas_show_proto(enum sas_proto proto, char *buf);
+int sas_show_linkrate(enum sas_phy_linkrate linkrate, char *buf);
+int sas_show_oob_mode(enum sas_oob_mode oob_mode, char *buf);
+
+int  sas_register_phys(struct sas_ha_struct *sas_ha);
+void sas_unregister_phys(struct sas_ha_struct *sas_ha);
+
+int  sas_register_ports(struct sas_ha_struct *sas_ha);
+void sas_unregister_ports(struct sas_ha_struct *sas_ha);
+
+enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *);
+
+int  sas_init_queue(struct sas_ha_struct *sas_ha);
+int  sas_init_events(struct sas_ha_struct *sas_ha);
+void sas_shutdown_queue(struct sas_ha_struct *sas_ha);
+
+void sas_deform_port(struct asd_sas_phy *phy);
+
+void sas_porte_bytes_dmaed(void *);
+void sas_porte_broadcast_rcvd(void *);
+void sas_porte_link_reset_err(void *);
+void sas_porte_timer_event(void *);
+void sas_porte_hard_reset(void *);
+
+int sas_notify_lldd_dev_found(struct domain_device *);
+void sas_notify_lldd_dev_gone(struct domain_device *);
+
+int sas_smp_phy_control(struct domain_device *dev, int phy_id,
+			enum phy_func phy_func);
+int sas_smp_get_phy_events(struct sas_phy *phy);
+
+struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy);
+
+void sas_hae_reset(void *);
+
+static inline void sas_queue_event(int event, spinlock_t *lock,
+				   unsigned long *pending,
+				   struct work_struct *work,
+				   struct Scsi_Host *shost)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(lock, flags);
+	if (test_bit(event, pending)) {
+		spin_unlock_irqrestore(lock, flags);
+		return;
+	}
+	__set_bit(event, pending);
+	spin_unlock_irqrestore(lock, flags);
+	scsi_queue_work(shost, work);
+}
+
+static inline void sas_begin_event(int event, spinlock_t *lock,
+				   unsigned long *pending)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(lock, flags);
+	__clear_bit(event, pending);
+	spin_unlock_irqrestore(lock, flags);
+}
+
+static inline void sas_fill_in_rphy(struct domain_device *dev,
+				    struct sas_rphy *rphy)
+{
+	rphy->identify.sas_address = SAS_ADDR(dev->sas_addr);
+	rphy->identify.initiator_port_protocols = dev->iproto;
+	rphy->identify.target_port_protocols = dev->tproto;
+	switch (dev->dev_type) {
+	case SATA_DEV:
+		/* FIXME: need sata device type */
+	case SAS_END_DEV:
+		rphy->identify.device_type = SAS_END_DEVICE;
+		break;
+	case EDGE_DEV:
+		rphy->identify.device_type = SAS_EDGE_EXPANDER_DEVICE;
+		break;
+	case FANOUT_DEV:
+		rphy->identify.device_type = SAS_FANOUT_EXPANDER_DEVICE;
+		break;
+	default:
+		rphy->identify.device_type = SAS_PHY_UNUSED;
+		break;
+	}
+}
+
+static inline void sas_add_parent_port(struct domain_device *dev, int phy_id)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *ex_phy = &ex->ex_phy[phy_id];
+
+	if (!ex->parent_port) {
+		ex->parent_port = sas_port_alloc(&dev->rphy->dev, phy_id);
+		/* FIXME: error handling */
+		BUG_ON(!ex->parent_port);
+		BUG_ON(sas_port_add(ex->parent_port));
+		sas_port_mark_backlink(ex->parent_port);
+	}
+	sas_port_add_phy(ex->parent_port, ex_phy->phy);
+}
+
+#endif /* _SAS_INTERNAL_H_ */
diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
new file mode 100644
index 0000000000000..024ab00e70d2e
--- /dev/null
+++ b/drivers/scsi/libsas/sas_phy.c
@@ -0,0 +1,157 @@
+/*
+ * Serial Attached SCSI (SAS) Phy class
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include "sas_internal.h"
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+#include "../scsi_sas_internal.h"
+
+/* ---------- Phy events ---------- */
+
+static void sas_phye_loss_of_signal(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PHYE_LOSS_OF_SIGNAL, &phy->ha->event_lock,
+			&phy->phy_events_pending);
+	phy->error = 0;
+	sas_deform_port(phy);
+}
+
+static void sas_phye_oob_done(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PHYE_OOB_DONE, &phy->ha->event_lock,
+			&phy->phy_events_pending);
+	phy->error = 0;
+}
+
+static void sas_phye_oob_error(void *data)
+{
+	struct asd_sas_phy *phy = data;
+	struct sas_ha_struct *sas_ha = phy->ha;
+	struct asd_sas_port *port = phy->port;
+	struct sas_internal *i =
+		to_sas_internal(sas_ha->core.shost->transportt);
+
+	sas_begin_event(PHYE_OOB_ERROR, &phy->ha->event_lock,
+			&phy->phy_events_pending);
+
+	sas_deform_port(phy);
+
+	if (!port && phy->enabled && i->dft->lldd_control_phy) {
+		phy->error++;
+		switch (phy->error) {
+		case 1:
+		case 2:
+			i->dft->lldd_control_phy(phy, PHY_FUNC_HARD_RESET);
+			break;
+		case 3:
+		default:
+			phy->error = 0;
+			phy->enabled = 0;
+			i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE);
+			break;
+		}
+	}
+}
+
+static void sas_phye_spinup_hold(void *data)
+{
+	struct asd_sas_phy *phy = data;
+	struct sas_ha_struct *sas_ha = phy->ha;
+	struct sas_internal *i =
+		to_sas_internal(sas_ha->core.shost->transportt);
+
+	sas_begin_event(PHYE_SPINUP_HOLD, &phy->ha->event_lock,
+			&phy->phy_events_pending);
+
+	phy->error = 0;
+	i->dft->lldd_control_phy(phy, PHY_FUNC_RELEASE_SPINUP_HOLD);
+}
+
+/* ---------- Phy class registration ---------- */
+
+int sas_register_phys(struct sas_ha_struct *sas_ha)
+{
+	int i;
+
+	static void (*sas_phy_event_fns[PHY_NUM_EVENTS])(void *) = {
+		[PHYE_LOSS_OF_SIGNAL] = sas_phye_loss_of_signal,
+		[PHYE_OOB_DONE] = sas_phye_oob_done,
+		[PHYE_OOB_ERROR] = sas_phye_oob_error,
+		[PHYE_SPINUP_HOLD] = sas_phye_spinup_hold,
+	};
+
+	static void (*sas_port_event_fns[PORT_NUM_EVENTS])(void *) = {
+		[PORTE_BYTES_DMAED] = sas_porte_bytes_dmaed,
+		[PORTE_BROADCAST_RCVD] = sas_porte_broadcast_rcvd,
+		[PORTE_LINK_RESET_ERR] = sas_porte_link_reset_err,
+		[PORTE_TIMER_EVENT] = sas_porte_timer_event,
+		[PORTE_HARD_RESET] = sas_porte_hard_reset,
+	};
+
+	/* Now register the phys. */
+	for (i = 0; i < sas_ha->num_phys; i++) {
+		int k;
+		struct asd_sas_phy *phy = sas_ha->sas_phy[i];
+
+		phy->error = 0;
+		INIT_LIST_HEAD(&phy->port_phy_el);
+		for (k = 0; k < PORT_NUM_EVENTS; k++)
+			INIT_WORK(&phy->port_events[k], sas_port_event_fns[k],
+				  phy);
+
+		for (k = 0; k < PHY_NUM_EVENTS; k++)
+			INIT_WORK(&phy->phy_events[k], sas_phy_event_fns[k],
+				  phy);
+		phy->port = NULL;
+		phy->ha = sas_ha;
+		spin_lock_init(&phy->frame_rcvd_lock);
+		spin_lock_init(&phy->sas_prim_lock);
+		phy->frame_rcvd_size = 0;
+
+		phy->phy = sas_phy_alloc(&sas_ha->core.shost->shost_gendev,
+					 i);
+		if (!phy->phy)
+			return -ENOMEM;
+
+		phy->phy->identify.initiator_port_protocols =
+			phy->iproto;
+		phy->phy->identify.target_port_protocols = phy->tproto;
+		phy->phy->identify.sas_address = SAS_ADDR(sas_ha->sas_addr);
+		phy->phy->identify.phy_identifier = i;
+		phy->phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
+		phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
+		phy->phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
+		phy->phy->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS;
+		phy->phy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN;
+
+		sas_phy_add(phy->phy);
+	}
+
+	return 0;
+}
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
new file mode 100644
index 0000000000000..253cdcf306a25
--- /dev/null
+++ b/drivers/scsi/libsas/sas_port.c
@@ -0,0 +1,279 @@
+/*
+ * Serial Attached SCSI (SAS) Port class
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include "sas_internal.h"
+
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+#include "../scsi_sas_internal.h"
+
+/**
+ * sas_form_port -- add this phy to a port
+ * @phy: the phy of interest
+ *
+ * This function adds this phy to an existing port, thus creating a wide
+ * port, or it creates a port and adds the phy to the port.
+ */
+static void sas_form_port(struct asd_sas_phy *phy)
+{
+	int i;
+	struct sas_ha_struct *sas_ha = phy->ha;
+	struct asd_sas_port *port = phy->port;
+	struct sas_internal *si =
+		to_sas_internal(sas_ha->core.shost->transportt);
+
+	if (port) {
+		if (memcmp(port->attached_sas_addr, phy->attached_sas_addr,
+			   SAS_ADDR_SIZE) == 0)
+			sas_deform_port(phy);
+		else {
+			SAS_DPRINTK("%s: phy%d belongs to port%d already(%d)!\n",
+				    __FUNCTION__, phy->id, phy->port->id,
+				    phy->port->num_phys);
+			return;
+		}
+	}
+
+	/* find a port */
+	spin_lock(&sas_ha->phy_port_lock);
+	for (i = 0; i < sas_ha->num_phys; i++) {
+		port = sas_ha->sas_port[i];
+		spin_lock(&port->phy_list_lock);
+		if (*(u64 *) port->sas_addr &&
+		    memcmp(port->attached_sas_addr,
+			   phy->attached_sas_addr, SAS_ADDR_SIZE) == 0 &&
+		    port->num_phys > 0) {
+			/* wide port */
+			SAS_DPRINTK("phy%d matched wide port%d\n", phy->id,
+				    port->id);
+			break;
+		} else if (*(u64 *) port->sas_addr == 0 && port->num_phys==0) {
+			memcpy(port->sas_addr, phy->sas_addr, SAS_ADDR_SIZE);
+			break;
+		}
+		spin_unlock(&port->phy_list_lock);
+	}
+
+	if (i >= sas_ha->num_phys) {
+		printk(KERN_NOTICE "%s: couldn't find a free port, bug?\n",
+		       __FUNCTION__);
+		spin_unlock(&sas_ha->phy_port_lock);
+		return;
+	}
+
+	/* add the phy to the port */
+	list_add_tail(&phy->port_phy_el, &port->phy_list);
+	phy->port = port;
+	port->num_phys++;
+	port->phy_mask |= (1U << phy->id);
+
+	if (!port->phy)
+		port->phy = phy->phy;
+
+	SAS_DPRINTK("phy%d added to port%d, phy_mask:0x%x\n", phy->id,
+		    port->id, port->phy_mask);
+
+	if (*(u64 *)port->attached_sas_addr == 0) {
+		port->class = phy->class;
+		memcpy(port->attached_sas_addr, phy->attached_sas_addr,
+		       SAS_ADDR_SIZE);
+		port->iproto = phy->iproto;
+		port->tproto = phy->tproto;
+		port->oob_mode = phy->oob_mode;
+		port->linkrate = phy->linkrate;
+	} else
+		port->linkrate = max(port->linkrate, phy->linkrate);
+	spin_unlock(&port->phy_list_lock);
+	spin_unlock(&sas_ha->phy_port_lock);
+
+	if (!port->port) {
+		port->port = sas_port_alloc(phy->phy->dev.parent, port->id);
+		BUG_ON(!port->port);
+		sas_port_add(port->port);
+	}
+	sas_port_add_phy(port->port, phy->phy);
+
+	if (port->port_dev)
+		port->port_dev->pathways = port->num_phys;
+
+	/* Tell the LLDD about this port formation. */
+	if (si->dft->lldd_port_formed)
+		si->dft->lldd_port_formed(phy);
+
+	sas_discover_event(phy->port, DISCE_DISCOVER_DOMAIN);
+}
+
+/**
+ * sas_deform_port -- remove this phy from the port it belongs to
+ * @phy: the phy of interest
+ *
+ * This is called when the physical link to the other phy has been
+ * lost (on this phy), in Event thread context. We cannot delay here.
+ */
+void sas_deform_port(struct asd_sas_phy *phy)
+{
+	struct sas_ha_struct *sas_ha = phy->ha;
+	struct asd_sas_port *port = phy->port;
+	struct sas_internal *si =
+		to_sas_internal(sas_ha->core.shost->transportt);
+
+	if (!port)
+		return;		  /* done by a phy event */
+
+	if (port->port_dev)
+		port->port_dev->pathways--;
+
+	if (port->num_phys == 1) {
+		sas_unregister_domain_devices(port);
+		sas_port_delete(port->port);
+		port->port = NULL;
+	} else
+		sas_port_delete_phy(port->port, phy->phy);
+
+
+	if (si->dft->lldd_port_deformed)
+		si->dft->lldd_port_deformed(phy);
+
+	spin_lock(&sas_ha->phy_port_lock);
+	spin_lock(&port->phy_list_lock);
+
+	list_del_init(&phy->port_phy_el);
+	phy->port = NULL;
+	port->num_phys--;
+	port->phy_mask &= ~(1U << phy->id);
+
+	if (port->num_phys == 0) {
+		INIT_LIST_HEAD(&port->phy_list);
+		memset(port->sas_addr, 0, SAS_ADDR_SIZE);
+		memset(port->attached_sas_addr, 0, SAS_ADDR_SIZE);
+		port->class = 0;
+		port->iproto = 0;
+		port->tproto = 0;
+		port->oob_mode = 0;
+		port->phy_mask = 0;
+	}
+	spin_unlock(&port->phy_list_lock);
+	spin_unlock(&sas_ha->phy_port_lock);
+
+	return;
+}
+
+/* ---------- SAS port events ---------- */
+
+void sas_porte_bytes_dmaed(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PORTE_BYTES_DMAED, &phy->ha->event_lock,
+			&phy->port_events_pending);
+
+	sas_form_port(phy);
+}
+
+void sas_porte_broadcast_rcvd(void *data)
+{
+	unsigned long flags;
+	u32 prim;
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PORTE_BROADCAST_RCVD, &phy->ha->event_lock,
+			&phy->port_events_pending);
+
+	spin_lock_irqsave(&phy->sas_prim_lock, flags);
+	prim = phy->sas_prim;
+	spin_unlock_irqrestore(&phy->sas_prim_lock, flags);
+
+	SAS_DPRINTK("broadcast received: %d\n", prim);
+	sas_discover_event(phy->port, DISCE_REVALIDATE_DOMAIN);
+}
+
+void sas_porte_link_reset_err(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PORTE_LINK_RESET_ERR, &phy->ha->event_lock,
+			&phy->port_events_pending);
+
+	sas_deform_port(phy);
+}
+
+void sas_porte_timer_event(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PORTE_TIMER_EVENT, &phy->ha->event_lock,
+			&phy->port_events_pending);
+
+	sas_deform_port(phy);
+}
+
+void sas_porte_hard_reset(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PORTE_HARD_RESET, &phy->ha->event_lock,
+			&phy->port_events_pending);
+
+	sas_deform_port(phy);
+}
+
+/* ---------- SAS port registration ---------- */
+
+static void sas_init_port(struct asd_sas_port *port,
+			  struct sas_ha_struct *sas_ha, int i)
+{
+	port->id = i;
+	INIT_LIST_HEAD(&port->dev_list);
+	spin_lock_init(&port->phy_list_lock);
+	INIT_LIST_HEAD(&port->phy_list);
+	port->num_phys = 0;
+	port->phy_mask = 0;
+	port->ha = sas_ha;
+
+	spin_lock_init(&port->dev_list_lock);
+}
+
+int sas_register_ports(struct sas_ha_struct *sas_ha)
+{
+	int i;
+
+	/* initialize the ports and discovery */
+	for (i = 0; i < sas_ha->num_phys; i++) {
+		struct asd_sas_port *port = sas_ha->sas_port[i];
+
+		sas_init_port(port, sas_ha, i);
+		sas_init_disc(&port->disc, port);
+	}
+	return 0;
+}
+
+void sas_unregister_ports(struct sas_ha_struct *sas_ha)
+{
+	int i;
+
+	for (i = 0; i < sas_ha->num_phys; i++)
+		if (sas_ha->sas_phy[i]->port)
+			sas_deform_port(sas_ha->sas_phy[i]);
+
+}
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
new file mode 100644
index 0000000000000..43e0e4e369346
--- /dev/null
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -0,0 +1,786 @@
+/*
+ * Serial Attached SCSI (SAS) class SCSI Host glue.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#include "sas_internal.h"
+
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+#include "../scsi_sas_internal.h"
+
+#include <linux/err.h>
+#include <linux/blkdev.h>
+#include <linux/scatterlist.h>
+
+/* ---------- SCSI Host glue ---------- */
+
+#define TO_SAS_TASK(_scsi_cmd)  ((void *)(_scsi_cmd)->host_scribble)
+#define ASSIGN_SAS_TASK(_sc, _t) do { (_sc)->host_scribble = (void *) _t; } while (0)
+
+static void sas_scsi_task_done(struct sas_task *task)
+{
+	struct task_status_struct *ts = &task->task_status;
+	struct scsi_cmnd *sc = task->uldd_task;
+	unsigned ts_flags = task->task_state_flags;
+	int hs = 0, stat = 0;
+
+	if (unlikely(!sc)) {
+		SAS_DPRINTK("task_done called with non existing SCSI cmnd!\n");
+		list_del_init(&task->list);
+		sas_free_task(task);
+		return;
+	}
+
+	if (ts->resp == SAS_TASK_UNDELIVERED) {
+		/* transport error */
+		hs = DID_NO_CONNECT;
+	} else { /* ts->resp == SAS_TASK_COMPLETE */
+		/* task delivered, what happened afterwards? */
+		switch (ts->stat) {
+		case SAS_DEV_NO_RESPONSE:
+		case SAS_INTERRUPTED:
+		case SAS_PHY_DOWN:
+		case SAS_NAK_R_ERR:
+		case SAS_OPEN_TO:
+			hs = DID_NO_CONNECT;
+			break;
+		case SAS_DATA_UNDERRUN:
+			sc->resid = ts->residual;
+			if (sc->request_bufflen - sc->resid < sc->underflow)
+				hs = DID_ERROR;
+			break;
+		case SAS_DATA_OVERRUN:
+			hs = DID_ERROR;
+			break;
+		case SAS_QUEUE_FULL:
+			hs = DID_SOFT_ERROR; /* retry */
+			break;
+		case SAS_DEVICE_UNKNOWN:
+			hs = DID_BAD_TARGET;
+			break;
+		case SAS_SG_ERR:
+			hs = DID_PARITY;
+			break;
+		case SAS_OPEN_REJECT:
+			if (ts->open_rej_reason == SAS_OREJ_RSVD_RETRY)
+				hs = DID_SOFT_ERROR; /* retry */
+			else
+				hs = DID_ERROR;
+			break;
+		case SAS_PROTO_RESPONSE:
+			SAS_DPRINTK("LLDD:%s sent SAS_PROTO_RESP for an SSP "
+				    "task; please report this\n",
+				    task->dev->port->ha->sas_ha_name);
+			break;
+		case SAS_ABORTED_TASK:
+			hs = DID_ABORT;
+			break;
+		case SAM_CHECK_COND:
+			memcpy(sc->sense_buffer, ts->buf,
+			       max(SCSI_SENSE_BUFFERSIZE, ts->buf_valid_size));
+			stat = SAM_CHECK_COND;
+			break;
+		default:
+			stat = ts->stat;
+			break;
+		}
+	}
+	ASSIGN_SAS_TASK(sc, NULL);
+	sc->result = (hs << 16) | stat;
+	list_del_init(&task->list);
+	sas_free_task(task);
+	/* This is very ugly but this is how SCSI Core works. */
+	if (ts_flags & SAS_TASK_STATE_ABORTED)
+		scsi_finish_command(sc);
+	else
+		sc->scsi_done(sc);
+}
+
+static enum task_attribute sas_scsi_get_task_attr(struct scsi_cmnd *cmd)
+{
+	enum task_attribute ta = TASK_ATTR_SIMPLE;
+	if (cmd->request && blk_rq_tagged(cmd->request)) {
+		if (cmd->device->ordered_tags &&
+		    (cmd->request->flags & REQ_HARDBARRIER))
+			ta = TASK_ATTR_HOQ;
+	}
+	return ta;
+}
+
+static struct sas_task *sas_create_task(struct scsi_cmnd *cmd,
+					       struct domain_device *dev,
+					       unsigned long gfp_flags)
+{
+	struct sas_task *task = sas_alloc_task(gfp_flags);
+	struct scsi_lun lun;
+
+	if (!task)
+		return NULL;
+
+	*(u32 *)cmd->sense_buffer = 0;
+	task->uldd_task = cmd;
+	ASSIGN_SAS_TASK(cmd, task);
+
+	task->dev = dev;
+	task->task_proto = task->dev->tproto; /* BUG_ON(!SSP) */
+
+	task->ssp_task.retry_count = 1;
+	int_to_scsilun(cmd->device->lun, &lun);
+	memcpy(task->ssp_task.LUN, &lun.scsi_lun, 8);
+	task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd);
+	memcpy(task->ssp_task.cdb, cmd->cmnd, 16);
+
+	task->scatter = cmd->request_buffer;
+	task->num_scatter = cmd->use_sg;
+	task->total_xfer_len = cmd->request_bufflen;
+	task->data_dir = cmd->sc_data_direction;
+
+	task->task_done = sas_scsi_task_done;
+
+	return task;
+}
+
+static int sas_queue_up(struct sas_task *task)
+{
+	struct sas_ha_struct *sas_ha = task->dev->port->ha;
+	struct scsi_core *core = &sas_ha->core;
+	unsigned long flags;
+	LIST_HEAD(list);
+
+	spin_lock_irqsave(&core->task_queue_lock, flags);
+	if (sas_ha->lldd_queue_size < core->task_queue_size + 1) {
+		spin_unlock_irqrestore(&core->task_queue_lock, flags);
+		return -SAS_QUEUE_FULL;
+	}
+	list_add_tail(&task->list, &core->task_queue);
+	core->task_queue_size += 1;
+	spin_unlock_irqrestore(&core->task_queue_lock, flags);
+	up(&core->queue_thread_sema);
+
+	return 0;
+}
+
+/**
+ * sas_queuecommand -- Enqueue a command for processing
+ * @parameters: See SCSI Core documentation
+ *
+ * Note: XXX: Remove the host unlock/lock pair when SCSI Core can
+ * call us without holding an IRQ spinlock...
+ */
+int sas_queuecommand(struct scsi_cmnd *cmd,
+		     void (*scsi_done)(struct scsi_cmnd *))
+{
+	int res = 0;
+	struct domain_device *dev = cmd_to_domain_dev(cmd);
+	struct Scsi_Host *host = cmd->device->host;
+	struct sas_internal *i = to_sas_internal(host->transportt);
+
+	spin_unlock_irq(host->host_lock);
+
+	{
+		struct sas_ha_struct *sas_ha = dev->port->ha;
+		struct sas_task *task;
+
+		res = -ENOMEM;
+		task = sas_create_task(cmd, dev, GFP_ATOMIC);
+		if (!task)
+			goto out;
+
+		cmd->scsi_done = scsi_done;
+		/* Queue up, Direct Mode or Task Collector Mode. */
+		if (sas_ha->lldd_max_execute_num < 2)
+			res = i->dft->lldd_execute_task(task, 1, GFP_ATOMIC);
+		else
+			res = sas_queue_up(task);
+
+		/* Examine */
+		if (res) {
+			SAS_DPRINTK("lldd_execute_task returned: %d\n", res);
+			ASSIGN_SAS_TASK(cmd, NULL);
+			sas_free_task(task);
+			if (res == -SAS_QUEUE_FULL) {
+				cmd->result = DID_SOFT_ERROR << 16; /* retry */
+				res = 0;
+				scsi_done(cmd);
+			}
+			goto out;
+		}
+	}
+out:
+	spin_lock_irq(host->host_lock);
+	return res;
+}
+
+static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd)
+{
+	struct scsi_cmnd *cmd, *n;
+
+	list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
+		if (cmd == my_cmd)
+			list_del_init(&cmd->eh_entry);
+	}
+}
+
+static void sas_scsi_clear_queue_I_T(struct list_head *error_q,
+				     struct domain_device *dev)
+{
+	struct scsi_cmnd *cmd, *n;
+
+	list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
+		struct domain_device *x = cmd_to_domain_dev(cmd);
+
+		if (x == dev)
+			list_del_init(&cmd->eh_entry);
+	}
+}
+
+static void sas_scsi_clear_queue_port(struct list_head *error_q,
+				      struct asd_sas_port *port)
+{
+	struct scsi_cmnd *cmd, *n;
+
+	list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
+		struct domain_device *dev = cmd_to_domain_dev(cmd);
+		struct asd_sas_port *x = dev->port;
+
+		if (x == port)
+			list_del_init(&cmd->eh_entry);
+	}
+}
+
+enum task_disposition {
+	TASK_IS_DONE,
+	TASK_IS_ABORTED,
+	TASK_IS_AT_LU,
+	TASK_IS_NOT_AT_LU,
+};
+
+static enum task_disposition sas_scsi_find_task(struct sas_task *task)
+{
+	struct sas_ha_struct *ha = task->dev->port->ha;
+	unsigned long flags;
+	int i, res;
+	struct sas_internal *si =
+		to_sas_internal(task->dev->port->ha->core.shost->transportt);
+
+	if (ha->lldd_max_execute_num > 1) {
+		struct scsi_core *core = &ha->core;
+		struct sas_task *t, *n;
+
+		spin_lock_irqsave(&core->task_queue_lock, flags);
+		list_for_each_entry_safe(t, n, &core->task_queue, list) {
+			if (task == t) {
+				list_del_init(&t->list);
+				spin_unlock_irqrestore(&core->task_queue_lock,
+						       flags);
+				SAS_DPRINTK("%s: task 0x%p aborted from "
+					    "task_queue\n",
+					    __FUNCTION__, task);
+				return TASK_IS_ABORTED;
+			}
+		}
+		spin_unlock_irqrestore(&core->task_queue_lock, flags);
+	}
+
+	for (i = 0; i < 5; i++) {
+		SAS_DPRINTK("%s: aborting task 0x%p\n", __FUNCTION__, task);
+		res = si->dft->lldd_abort_task(task);
+
+		spin_lock_irqsave(&task->task_state_lock, flags);
+		if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+			spin_unlock_irqrestore(&task->task_state_lock, flags);
+			SAS_DPRINTK("%s: task 0x%p is done\n", __FUNCTION__,
+				    task);
+			return TASK_IS_DONE;
+		}
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+		if (res == TMF_RESP_FUNC_COMPLETE) {
+			SAS_DPRINTK("%s: task 0x%p is aborted\n",
+				    __FUNCTION__, task);
+			return TASK_IS_ABORTED;
+		} else if (si->dft->lldd_query_task) {
+			SAS_DPRINTK("%s: querying task 0x%p\n",
+				    __FUNCTION__, task);
+			res = si->dft->lldd_query_task(task);
+			if (res == TMF_RESP_FUNC_SUCC) {
+				SAS_DPRINTK("%s: task 0x%p at LU\n",
+					    __FUNCTION__, task);
+				return TASK_IS_AT_LU;
+			} else if (res == TMF_RESP_FUNC_COMPLETE) {
+				SAS_DPRINTK("%s: task 0x%p not at LU\n",
+					    __FUNCTION__, task);
+				return TASK_IS_NOT_AT_LU;
+			}
+		}
+	}
+	return res;
+}
+
+static int sas_recover_lu(struct domain_device *dev, struct scsi_cmnd *cmd)
+{
+	int res = TMF_RESP_FUNC_FAILED;
+	struct scsi_lun lun;
+	struct sas_internal *i =
+		to_sas_internal(dev->port->ha->core.shost->transportt);
+
+	int_to_scsilun(cmd->device->lun, &lun);
+
+	SAS_DPRINTK("eh: device %llx LUN %x has the task\n",
+		    SAS_ADDR(dev->sas_addr),
+		    cmd->device->lun);
+
+	if (i->dft->lldd_abort_task_set)
+		res = i->dft->lldd_abort_task_set(dev, lun.scsi_lun);
+
+	if (res == TMF_RESP_FUNC_FAILED) {
+		if (i->dft->lldd_clear_task_set)
+			res = i->dft->lldd_clear_task_set(dev, lun.scsi_lun);
+	}
+
+	if (res == TMF_RESP_FUNC_FAILED) {
+		if (i->dft->lldd_lu_reset)
+			res = i->dft->lldd_lu_reset(dev, lun.scsi_lun);
+	}
+
+	return res;
+}
+
+static int sas_recover_I_T(struct domain_device *dev)
+{
+	int res = TMF_RESP_FUNC_FAILED;
+	struct sas_internal *i =
+		to_sas_internal(dev->port->ha->core.shost->transportt);
+
+	SAS_DPRINTK("I_T nexus reset for dev %016llx\n",
+		    SAS_ADDR(dev->sas_addr));
+
+	if (i->dft->lldd_I_T_nexus_reset)
+		res = i->dft->lldd_I_T_nexus_reset(dev);
+
+	return res;
+}
+
+void sas_scsi_recover_host(struct Scsi_Host *shost)
+{
+	struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
+	unsigned long flags;
+	LIST_HEAD(error_q);
+	struct scsi_cmnd *cmd, *n;
+	enum task_disposition res = TASK_IS_DONE;
+	int tmf_resp;
+	struct sas_internal *i = to_sas_internal(shost->transportt);
+
+	spin_lock_irqsave(shost->host_lock, flags);
+	list_splice_init(&shost->eh_cmd_q, &error_q);
+	spin_unlock_irqrestore(shost->host_lock, flags);
+
+	SAS_DPRINTK("Enter %s\n", __FUNCTION__);
+
+	/* All tasks on this list were marked SAS_TASK_STATE_ABORTED
+	 * by sas_scsi_timed_out() callback.
+	 */
+Again:
+	SAS_DPRINTK("going over list...\n");
+	list_for_each_entry_safe(cmd, n, &error_q, eh_entry) {
+		struct sas_task *task = TO_SAS_TASK(cmd);
+
+		SAS_DPRINTK("trying to find task 0x%p\n", task);
+		list_del_init(&cmd->eh_entry);
+		res = sas_scsi_find_task(task);
+
+		cmd->eh_eflags = 0;
+		shost->host_failed--;
+
+		switch (res) {
+		case TASK_IS_DONE:
+			SAS_DPRINTK("%s: task 0x%p is done\n", __FUNCTION__,
+				    task);
+			task->task_done(task);
+			continue;
+		case TASK_IS_ABORTED:
+			SAS_DPRINTK("%s: task 0x%p is aborted\n",
+				    __FUNCTION__, task);
+			task->task_done(task);
+			continue;
+		case TASK_IS_AT_LU:
+			SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
+			tmf_resp = sas_recover_lu(task->dev, cmd);
+			if (tmf_resp == TMF_RESP_FUNC_COMPLETE) {
+				SAS_DPRINTK("dev %016llx LU %x is "
+					    "recovered\n",
+					    SAS_ADDR(task->dev),
+					    cmd->device->lun);
+				task->task_done(task);
+				sas_scsi_clear_queue_lu(&error_q, cmd);
+				goto Again;
+			}
+			/* fallthrough */
+		case TASK_IS_NOT_AT_LU:
+			SAS_DPRINTK("task 0x%p is not at LU: I_T recover\n",
+				    task);
+			tmf_resp = sas_recover_I_T(task->dev);
+			if (tmf_resp == TMF_RESP_FUNC_COMPLETE) {
+				SAS_DPRINTK("I_T %016llx recovered\n",
+					    SAS_ADDR(task->dev->sas_addr));
+				task->task_done(task);
+				sas_scsi_clear_queue_I_T(&error_q, task->dev);
+				goto Again;
+			}
+			/* Hammer time :-) */
+			if (i->dft->lldd_clear_nexus_port) {
+				struct asd_sas_port *port = task->dev->port;
+				SAS_DPRINTK("clearing nexus for port:%d\n",
+					    port->id);
+				res = i->dft->lldd_clear_nexus_port(port);
+				if (res == TMF_RESP_FUNC_COMPLETE) {
+					SAS_DPRINTK("clear nexus port:%d "
+						    "succeeded\n", port->id);
+					task->task_done(task);
+					sas_scsi_clear_queue_port(&error_q,
+								  port);
+					goto Again;
+				}
+			}
+			if (i->dft->lldd_clear_nexus_ha) {
+				SAS_DPRINTK("clear nexus ha\n");
+				res = i->dft->lldd_clear_nexus_ha(ha);
+				if (res == TMF_RESP_FUNC_COMPLETE) {
+					SAS_DPRINTK("clear nexus ha "
+						    "succeeded\n");
+					task->task_done(task);
+					goto out;
+				}
+			}
+			/* If we are here -- this means that no amount
+			 * of effort could recover from errors.  Quite
+			 * possibly the HA just disappeared.
+			 */
+			SAS_DPRINTK("error from  device %llx, LUN %x "
+				    "couldn't be recovered in any way\n",
+				    SAS_ADDR(task->dev->sas_addr),
+				    cmd->device->lun);
+
+			task->task_done(task);
+			goto clear_q;
+		}
+	}
+out:
+	SAS_DPRINTK("--- Exit %s\n", __FUNCTION__);
+	return;
+clear_q:
+	SAS_DPRINTK("--- Exit %s -- clear_q\n", __FUNCTION__);
+	list_for_each_entry_safe(cmd, n, &error_q, eh_entry) {
+		struct sas_task *task = TO_SAS_TASK(cmd);
+		list_del_init(&cmd->eh_entry);
+		task->task_done(task);
+	}
+}
+
+enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
+{
+	struct sas_task *task = TO_SAS_TASK(cmd);
+	unsigned long flags;
+
+	if (!task) {
+		SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n",
+			    cmd, task);
+		return EH_HANDLED;
+	}
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n",
+			    cmd, task);
+		return EH_HANDLED;
+	}
+	task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_NOT_HANDLED\n",
+		    cmd, task);
+
+	return EH_NOT_HANDLED;
+}
+
+struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy)
+{
+	struct Scsi_Host *shost = dev_to_shost(rphy->dev.parent);
+	struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
+	struct domain_device *found_dev = NULL;
+	int i;
+
+	spin_lock(&ha->phy_port_lock);
+	for (i = 0; i < ha->num_phys; i++) {
+		struct asd_sas_port *port = ha->sas_port[i];
+		struct domain_device *dev;
+
+		spin_lock(&port->dev_list_lock);
+		list_for_each_entry(dev, &port->dev_list, dev_list_node) {
+			if (rphy == dev->rphy) {
+				found_dev = dev;
+				spin_unlock(&port->dev_list_lock);
+				goto found;
+			}
+		}
+		spin_unlock(&port->dev_list_lock);
+	}
+ found:
+	spin_unlock(&ha->phy_port_lock);
+
+	return found_dev;
+}
+
+static inline struct domain_device *sas_find_target(struct scsi_target *starget)
+{
+	struct sas_rphy *rphy = dev_to_rphy(starget->dev.parent);
+
+	return sas_find_dev_by_rphy(rphy);
+}
+
+int sas_target_alloc(struct scsi_target *starget)
+{
+	struct domain_device *found_dev = sas_find_target(starget);
+
+	if (!found_dev)
+		return -ENODEV;
+
+	starget->hostdata = found_dev;
+	return 0;
+}
+
+#define SAS_DEF_QD 32
+#define SAS_MAX_QD 64
+
+int sas_slave_configure(struct scsi_device *scsi_dev)
+{
+	struct domain_device *dev = sdev_to_domain_dev(scsi_dev);
+	struct sas_ha_struct *sas_ha;
+
+	BUG_ON(dev->rphy->identify.device_type != SAS_END_DEVICE);
+
+	sas_ha = dev->port->ha;
+
+	sas_read_port_mode_page(scsi_dev);
+
+	if (scsi_dev->tagged_supported) {
+		scsi_set_tag_type(scsi_dev, MSG_SIMPLE_TAG);
+		scsi_activate_tcq(scsi_dev, SAS_DEF_QD);
+	} else {
+		SAS_DPRINTK("device %llx, LUN %x doesn't support "
+			    "TCQ\n", SAS_ADDR(dev->sas_addr),
+			    scsi_dev->lun);
+		scsi_dev->tagged_supported = 0;
+		scsi_set_tag_type(scsi_dev, 0);
+		scsi_deactivate_tcq(scsi_dev, 1);
+	}
+
+	return 0;
+}
+
+void sas_slave_destroy(struct scsi_device *scsi_dev)
+{
+}
+
+int sas_change_queue_depth(struct scsi_device *scsi_dev, int new_depth)
+{
+	int res = min(new_depth, SAS_MAX_QD);
+
+	if (scsi_dev->tagged_supported)
+		scsi_adjust_queue_depth(scsi_dev, scsi_get_tag_type(scsi_dev),
+					res);
+	else {
+		struct domain_device *dev = sdev_to_domain_dev(scsi_dev);
+		sas_printk("device %llx LUN %x queue depth changed to 1\n",
+			   SAS_ADDR(dev->sas_addr),
+			   scsi_dev->lun);
+		scsi_adjust_queue_depth(scsi_dev, 0, 1);
+		res = 1;
+	}
+
+	return res;
+}
+
+int sas_change_queue_type(struct scsi_device *scsi_dev, int qt)
+{
+	if (!scsi_dev->tagged_supported)
+		return 0;
+
+	scsi_deactivate_tcq(scsi_dev, 1);
+
+	scsi_set_tag_type(scsi_dev, qt);
+	scsi_activate_tcq(scsi_dev, scsi_dev->queue_depth);
+
+	return qt;
+}
+
+int sas_bios_param(struct scsi_device *scsi_dev,
+			  struct block_device *bdev,
+			  sector_t capacity, int *hsc)
+{
+	hsc[0] = 255;
+	hsc[1] = 63;
+	sector_div(capacity, 255*63);
+	hsc[2] = capacity;
+
+	return 0;
+}
+
+/* ---------- Task Collector Thread implementation ---------- */
+
+static void sas_queue(struct sas_ha_struct *sas_ha)
+{
+	struct scsi_core *core = &sas_ha->core;
+	unsigned long flags;
+	LIST_HEAD(q);
+	int can_queue;
+	int res;
+	struct sas_internal *i = to_sas_internal(core->shost->transportt);
+
+	spin_lock_irqsave(&core->task_queue_lock, flags);
+	while (!core->queue_thread_kill &&
+	       !list_empty(&core->task_queue)) {
+
+		can_queue = sas_ha->lldd_queue_size - core->task_queue_size;
+		if (can_queue >= 0) {
+			can_queue = core->task_queue_size;
+			list_splice_init(&core->task_queue, &q);
+		} else {
+			struct list_head *a, *n;
+
+			can_queue = sas_ha->lldd_queue_size;
+			list_for_each_safe(a, n, &core->task_queue) {
+				list_move_tail(a, &q);
+				if (--can_queue == 0)
+					break;
+			}
+			can_queue = sas_ha->lldd_queue_size;
+		}
+		core->task_queue_size -= can_queue;
+		spin_unlock_irqrestore(&core->task_queue_lock, flags);
+		{
+			struct sas_task *task = list_entry(q.next,
+							   struct sas_task,
+							   list);
+			list_del_init(&q);
+			res = i->dft->lldd_execute_task(task, can_queue,
+							GFP_KERNEL);
+			if (unlikely(res))
+				__list_add(&q, task->list.prev, &task->list);
+		}
+		spin_lock_irqsave(&core->task_queue_lock, flags);
+		if (res) {
+			list_splice_init(&q, &core->task_queue); /*at head*/
+			core->task_queue_size += can_queue;
+		}
+	}
+	spin_unlock_irqrestore(&core->task_queue_lock, flags);
+}
+
+static DECLARE_COMPLETION(queue_th_comp);
+
+/**
+ * sas_queue_thread -- The Task Collector thread
+ * @_sas_ha: pointer to struct sas_ha
+ */
+static int sas_queue_thread(void *_sas_ha)
+{
+	struct sas_ha_struct *sas_ha = _sas_ha;
+	struct scsi_core *core = &sas_ha->core;
+
+	daemonize("sas_queue_%d", core->shost->host_no);
+	current->flags |= PF_NOFREEZE;
+
+	complete(&queue_th_comp);
+
+	while (1) {
+		down_interruptible(&core->queue_thread_sema);
+		sas_queue(sas_ha);
+		if (core->queue_thread_kill)
+			break;
+	}
+
+	complete(&queue_th_comp);
+
+	return 0;
+}
+
+int sas_init_queue(struct sas_ha_struct *sas_ha)
+{
+	int res;
+	struct scsi_core *core = &sas_ha->core;
+
+	spin_lock_init(&core->task_queue_lock);
+	core->task_queue_size = 0;
+	INIT_LIST_HEAD(&core->task_queue);
+	init_MUTEX_LOCKED(&core->queue_thread_sema);
+
+	res = kernel_thread(sas_queue_thread, sas_ha, 0);
+	if (res >= 0)
+		wait_for_completion(&queue_th_comp);
+
+	return res < 0 ? res : 0;
+}
+
+void sas_shutdown_queue(struct sas_ha_struct *sas_ha)
+{
+	unsigned long flags;
+	struct scsi_core *core = &sas_ha->core;
+	struct sas_task *task, *n;
+
+	init_completion(&queue_th_comp);
+	core->queue_thread_kill = 1;
+	up(&core->queue_thread_sema);
+	wait_for_completion(&queue_th_comp);
+
+	if (!list_empty(&core->task_queue))
+		SAS_DPRINTK("HA: %llx: scsi core task queue is NOT empty!?\n",
+			    SAS_ADDR(sas_ha->sas_addr));
+
+	spin_lock_irqsave(&core->task_queue_lock, flags);
+	list_for_each_entry_safe(task, n, &core->task_queue, list) {
+		struct scsi_cmnd *cmd = task->uldd_task;
+
+		list_del_init(&task->list);
+
+		ASSIGN_SAS_TASK(cmd, NULL);
+		sas_free_task(task);
+		cmd->result = DID_ABORT << 16;
+		cmd->scsi_done(cmd);
+	}
+	spin_unlock_irqrestore(&core->task_queue_lock, flags);
+}
+
+EXPORT_SYMBOL_GPL(sas_queuecommand);
+EXPORT_SYMBOL_GPL(sas_target_alloc);
+EXPORT_SYMBOL_GPL(sas_slave_configure);
+EXPORT_SYMBOL_GPL(sas_slave_destroy);
+EXPORT_SYMBOL_GPL(sas_change_queue_depth);
+EXPORT_SYMBOL_GPL(sas_change_queue_type);
+EXPORT_SYMBOL_GPL(sas_bios_param);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
new file mode 100644
index 0000000000000..72acdabe7f807
--- /dev/null
+++ b/include/scsi/libsas.h
@@ -0,0 +1,627 @@
+/*
+ * SAS host prototypes and structures header file
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#ifndef _LIBSAS_H_
+#define _LIBSAS_H_
+
+
+#include <linux/timer.h>
+#include <linux/pci.h>
+#include <scsi/sas.h>
+#include <linux/list.h>
+#include <asm/semaphore.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_transport_sas.h>
+
+struct block_device;
+
+enum sas_class {
+	SAS,
+	EXPANDER
+};
+
+enum sas_phy_role {
+	PHY_ROLE_NONE = 0,
+	PHY_ROLE_TARGET = 0x40,
+	PHY_ROLE_INITIATOR = 0x80,
+};
+
+enum sas_phy_type {
+        PHY_TYPE_PHYSICAL,
+        PHY_TYPE_VIRTUAL
+};
+
+/* The events are mnemonically described in sas_dump.c
+ * so when updating/adding events here, please also
+ * update the other file too.
+ */
+enum ha_event {
+	HAE_RESET             = 0U,
+	HA_NUM_EVENTS         = 1,
+};
+
+enum port_event {
+	PORTE_BYTES_DMAED     = 0U,
+	PORTE_BROADCAST_RCVD  = 1,
+	PORTE_LINK_RESET_ERR  = 2,
+	PORTE_TIMER_EVENT     = 3,
+	PORTE_HARD_RESET      = 4,
+	PORT_NUM_EVENTS       = 5,
+};
+
+enum phy_event {
+	PHYE_LOSS_OF_SIGNAL   = 0U,
+	PHYE_OOB_DONE         = 1,
+	PHYE_OOB_ERROR        = 2,
+	PHYE_SPINUP_HOLD      = 3, /* hot plug SATA, no COMWAKE sent */
+	PHY_NUM_EVENTS        = 4,
+};
+
+enum discover_event {
+	DISCE_DISCOVER_DOMAIN   = 0U,
+	DISCE_REVALIDATE_DOMAIN = 1,
+	DISCE_PORT_GONE         = 2,
+	DISC_NUM_EVENTS 	= 3,
+};
+
+/* ---------- Expander Devices ---------- */
+
+#define ETASK 0xFA
+
+#define to_dom_device(_obj) container_of(_obj, struct domain_device, dev_obj)
+#define to_dev_attr(_attr)  container_of(_attr, struct domain_dev_attribute,\
+                                         attr)
+
+enum routing_attribute {
+	DIRECT_ROUTING,
+	SUBTRACTIVE_ROUTING,
+	TABLE_ROUTING,
+};
+
+enum ex_phy_state {
+	PHY_EMPTY,
+	PHY_VACANT,
+	PHY_NOT_PRESENT,
+	PHY_DEVICE_DISCOVERED
+};
+
+struct ex_phy {
+	int    phy_id;
+
+	enum ex_phy_state phy_state;
+
+	enum sas_dev_type attached_dev_type;
+	enum sas_phy_linkrate linkrate;
+
+	u8   attached_sata_host:1;
+	u8   attached_sata_dev:1;
+	u8   attached_sata_ps:1;
+
+	enum sas_proto attached_tproto;
+	enum sas_proto attached_iproto;
+
+	u8   attached_sas_addr[SAS_ADDR_SIZE];
+	u8   attached_phy_id;
+
+	u8   phy_change_count;
+	enum routing_attribute routing_attr;
+	u8   virtual:1;
+
+	int  last_da_index;
+
+	struct sas_phy *phy;
+	struct sas_port *port;
+};
+
+struct expander_device {
+	struct list_head children;
+
+	u16    ex_change_count;
+	u16    max_route_indexes;
+	u8     num_phys;
+	u8     configuring:1;
+	u8     conf_route_table:1;
+	u8     enclosure_logical_id[8];
+
+	struct ex_phy *ex_phy;
+	struct sas_port *parent_port;
+};
+
+/* ---------- SATA device ---------- */
+enum ata_command_set {
+        ATA_COMMAND_SET   = 0,
+        ATAPI_COMMAND_SET = 1,
+};
+
+struct sata_device {
+        enum   ata_command_set command_set;
+        struct smp_resp        rps_resp; /* report_phy_sata_resp */
+        __le16 *identify_device;
+        __le16 *identify_packet_device;
+
+        u8     port_no;        /* port number, if this is a PM (Port) */
+        struct list_head children; /* PM Ports if this is a PM */
+};
+
+/* ---------- Domain device ---------- */
+struct domain_device {
+        enum sas_dev_type dev_type;
+
+        enum sas_phy_linkrate linkrate;
+        enum sas_phy_linkrate min_linkrate;
+        enum sas_phy_linkrate max_linkrate;
+
+        int  pathways;
+
+        struct domain_device *parent;
+        struct list_head siblings; /* devices on the same level */
+        struct asd_sas_port *port;        /* shortcut to root of the tree */
+
+        struct list_head dev_list_node;
+
+        enum sas_proto    iproto;
+        enum sas_proto    tproto;
+
+        struct sas_rphy *rphy;
+
+        u8  sas_addr[SAS_ADDR_SIZE];
+        u8  hashed_sas_addr[HASHED_SAS_ADDR_SIZE];
+
+        u8  frame_rcvd[32];
+
+        union {
+                struct expander_device ex_dev;
+                struct sata_device     sata_dev; /* STP & directly attached */
+        };
+
+        void *lldd_dev;
+};
+
+struct sas_discovery {
+	spinlock_t disc_event_lock;
+	struct work_struct disc_work[DISC_NUM_EVENTS];
+	unsigned long    pending;
+	u8     fanout_sas_addr[8];
+	u8     eeds_a[8];
+	u8     eeds_b[8];
+	int    max_level;
+};
+
+
+/* The port struct is Class:RW, driver:RO */
+struct asd_sas_port {
+/* private: */
+	struct completion port_gone_completion;
+
+	struct sas_discovery disc;
+	struct domain_device *port_dev;
+	spinlock_t dev_list_lock;
+	struct list_head dev_list;
+	enum   sas_phy_linkrate linkrate;
+
+	struct sas_phy *phy;
+	struct work_struct work;
+
+/* public: */
+	int id;
+
+	enum sas_class   class;
+	u8               sas_addr[SAS_ADDR_SIZE];
+	u8               attached_sas_addr[SAS_ADDR_SIZE];
+	enum sas_proto   iproto;
+	enum sas_proto   tproto;
+
+	enum sas_oob_mode oob_mode;
+
+	spinlock_t       phy_list_lock;
+	struct list_head phy_list;
+	int              num_phys;
+	u32              phy_mask;
+
+	struct sas_ha_struct *ha;
+
+	struct sas_port	*port;
+
+	void *lldd_port;	  /* not touched by the sas class code */
+};
+
+/* The phy pretty much is controlled by the LLDD.
+ * The class only reads those fields.
+ */
+struct asd_sas_phy {
+/* private: */
+	/* protected by ha->event_lock */
+	struct work_struct   port_events[PORT_NUM_EVENTS];
+	struct work_struct   phy_events[PHY_NUM_EVENTS];
+
+	unsigned long port_events_pending;
+	unsigned long phy_events_pending;
+
+	int error;
+
+	struct sas_phy *phy;
+
+/* public: */
+	/* The following are class:RO, driver:R/W */
+	int            enabled;	  /* must be set */
+
+	int            id;	  /* must be set */
+	enum sas_class class;
+	enum sas_proto iproto;
+	enum sas_proto tproto;
+
+	enum sas_phy_type  type;
+	enum sas_phy_role  role;
+	enum sas_oob_mode  oob_mode;
+	enum sas_phy_linkrate linkrate;
+
+	u8   *sas_addr;		  /* must be set */
+	u8   attached_sas_addr[SAS_ADDR_SIZE]; /* class:RO, driver: R/W */
+
+	spinlock_t     frame_rcvd_lock;
+	u8             *frame_rcvd; /* must be set */
+	int            frame_rcvd_size;
+
+	spinlock_t     sas_prim_lock;
+	u32            sas_prim;
+
+	struct list_head port_phy_el; /* driver:RO */
+	struct asd_sas_port      *port; /* Class:RW, driver: RO */
+
+	struct sas_ha_struct *ha; /* may be set; the class sets it anyway */
+
+	void *lldd_phy;		  /* not touched by the sas_class_code */
+};
+
+struct scsi_core {
+	struct Scsi_Host *shost;
+
+	spinlock_t        task_queue_lock;
+	struct list_head  task_queue;
+	int               task_queue_size;
+
+	struct semaphore  queue_thread_sema;
+	int               queue_thread_kill;
+};
+
+struct sas_ha_struct {
+/* private: */
+	spinlock_t       event_lock;
+	struct work_struct ha_events[HA_NUM_EVENTS];
+	unsigned long	 pending;
+
+	struct scsi_core core;
+
+/* public: */
+	char *sas_ha_name;
+	struct pci_dev *pcidev;	  /* should be set */
+	struct module *lldd_module; /* should be set */
+
+	u8 *sas_addr;		  /* must be set */
+	u8 hashed_sas_addr[HASHED_SAS_ADDR_SIZE];
+
+	spinlock_t      phy_port_lock;
+	struct asd_sas_phy  **sas_phy; /* array of valid pointers, must be set */
+	struct asd_sas_port **sas_port; /* array of valid pointers, must be set */
+	int             num_phys; /* must be set, gt 0, static */
+
+	/* The class calls this to send a task for execution. */
+	int lldd_max_execute_num;
+	int lldd_queue_size;
+
+	/* LLDD calls these to notify the class of an event. */
+	void (*notify_ha_event)(struct sas_ha_struct *, enum ha_event);
+	void (*notify_port_event)(struct asd_sas_phy *, enum port_event);
+	void (*notify_phy_event)(struct asd_sas_phy *, enum phy_event);
+
+	void *lldd_ha;		  /* not touched by sas class code */
+};
+
+#define SHOST_TO_SAS_HA(_shost) (*(struct sas_ha_struct **)(_shost)->hostdata)
+
+static inline struct domain_device *
+starget_to_domain_dev(struct scsi_target *starget) {
+	return starget->hostdata;
+}
+
+static inline struct domain_device *
+sdev_to_domain_dev(struct scsi_device *sdev) {
+	return starget_to_domain_dev(sdev->sdev_target);
+}
+
+static inline struct domain_device *
+cmd_to_domain_dev(struct scsi_cmnd *cmd)
+{
+	return sdev_to_domain_dev(cmd->device);
+}
+
+void sas_hash_addr(u8 *hashed, const u8 *sas_addr);
+
+/* Before calling a notify event, LLDD should use this function
+ * when the link is severed (possibly from its tasklet).
+ * The idea is that the Class only reads those, while the LLDD,
+ * can R/W these (thus avoiding a race).
+ */
+static inline void sas_phy_disconnected(struct asd_sas_phy *phy)
+{
+	phy->oob_mode = OOB_NOT_CONNECTED;
+	phy->linkrate = PHY_LINKRATE_NONE;
+}
+
+/* ---------- Tasks ---------- */
+/*
+      service_response |  SAS_TASK_COMPLETE  |  SAS_TASK_UNDELIVERED |
+  exec_status          |                     |                       |
+  ---------------------+---------------------+-----------------------+
+       SAM_...         |         X           |                       |
+       DEV_NO_RESPONSE |         X           |           X           |
+       INTERRUPTED     |         X           |                       |
+       QUEUE_FULL      |                     |           X           |
+       DEVICE_UNKNOWN  |                     |           X           |
+       SG_ERR          |                     |           X           |
+  ---------------------+---------------------+-----------------------+
+ */
+
+enum service_response {
+	SAS_TASK_COMPLETE,
+	SAS_TASK_UNDELIVERED = -1,
+};
+
+enum exec_status {
+	SAM_GOOD         = 0,
+	SAM_CHECK_COND   = 2,
+	SAM_COND_MET     = 4,
+	SAM_BUSY         = 8,
+	SAM_INTERMEDIATE = 0x10,
+	SAM_IM_COND_MET  = 0x12,
+	SAM_RESV_CONFLICT= 0x14,
+	SAM_TASK_SET_FULL= 0x28,
+	SAM_ACA_ACTIVE   = 0x30,
+	SAM_TASK_ABORTED = 0x40,
+
+	SAS_DEV_NO_RESPONSE = 0x80,
+	SAS_DATA_UNDERRUN,
+	SAS_DATA_OVERRUN,
+	SAS_INTERRUPTED,
+	SAS_QUEUE_FULL,
+	SAS_DEVICE_UNKNOWN,
+	SAS_SG_ERR,
+	SAS_OPEN_REJECT,
+	SAS_OPEN_TO,
+	SAS_PROTO_RESPONSE,
+	SAS_PHY_DOWN,
+	SAS_NAK_R_ERR,
+	SAS_PENDING,
+	SAS_ABORTED_TASK,
+};
+
+/* When a task finishes with a response, the LLDD examines the
+ * response:
+ * 	- For an ATA task task_status_struct::stat is set to
+ * SAS_PROTO_RESPONSE, and the task_status_struct::buf is set to the
+ * contents of struct ata_task_resp.
+ * 	- For SSP tasks, if no data is present or status/TMF response
+ * is valid, task_status_struct::stat is set.  If data is present
+ * (SENSE data), the LLDD copies up to SAS_STATUS_BUF_SIZE, sets
+ * task_status_struct::buf_valid_size, and task_status_struct::stat is
+ * set to SAM_CHECK_COND.
+ *
+ * "buf" has format SCSI Sense for SSP task, or struct ata_task_resp
+ * for ATA task.
+ *
+ * "frame_len" is the total frame length, which could be more or less
+ * than actually copied.
+ *
+ * Tasks ending with response, always set the residual field.
+ */
+struct ata_task_resp {
+	u16  frame_len;
+	u8   ending_fis[24];	  /* dev to host or data-in */
+	u32  sstatus;
+	u32  serror;
+	u32  scontrol;
+	u32  sactive;
+};
+
+#define SAS_STATUS_BUF_SIZE 96
+
+struct task_status_struct {
+	enum service_response resp;
+	enum exec_status      stat;
+	int  buf_valid_size;
+
+	u8   buf[SAS_STATUS_BUF_SIZE];
+
+	u32  residual;
+	enum sas_open_rej_reason open_rej_reason;
+};
+
+/* ATA and ATAPI task queuable to a SAS LLDD.
+ */
+struct sas_ata_task {
+	struct host_to_dev_fis fis;
+	u8     atapi_packet[16];  /* 0 if not ATAPI task */
+
+	u8     retry_count;	  /* hardware retry, should be > 0 */
+
+	u8     dma_xfer:1;	  /* PIO:0 or DMA:1 */
+	u8     use_ncq:1;
+	u8     set_affil_pol:1;
+	u8     stp_affil_pol:1;
+
+	u8     device_control_reg_update:1;
+};
+
+struct sas_smp_task {
+	struct scatterlist smp_req;
+	struct scatterlist smp_resp;
+};
+
+enum task_attribute {
+	TASK_ATTR_SIMPLE = 0,
+	TASK_ATTR_HOQ    = 1,
+	TASK_ATTR_ORDERED= 2,
+	TASK_ATTR_ACA    = 4,
+};
+
+struct sas_ssp_task {
+	u8     retry_count;	  /* hardware retry, should be > 0 */
+
+	u8     LUN[8];
+	u8     enable_first_burst:1;
+	enum   task_attribute task_attr;
+	u8     task_prio;
+	u8     cdb[16];
+};
+
+struct sas_task {
+	struct domain_device *dev;
+	struct list_head      list;
+
+	spinlock_t   task_state_lock;
+	unsigned     task_state_flags;
+
+	enum   sas_proto      task_proto;
+
+	/* Used by the discovery code. */
+	struct timer_list     timer;
+	struct completion     completion;
+
+	union {
+		struct sas_ata_task ata_task;
+		struct sas_smp_task smp_task;
+		struct sas_ssp_task ssp_task;
+	};
+
+	struct scatterlist *scatter;
+	int    num_scatter;
+	u32    total_xfer_len;
+	u8     data_dir:2;	  /* Use PCI_DMA_... */
+
+	struct task_status_struct task_status;
+	void   (*task_done)(struct sas_task *);
+
+	void   *lldd_task;	  /* for use by LLDDs */
+	void   *uldd_task;
+};
+
+
+
+#define SAS_TASK_STATE_PENDING  1
+#define SAS_TASK_STATE_DONE     2
+#define SAS_TASK_STATE_ABORTED  4
+
+static inline struct sas_task *sas_alloc_task(unsigned long flags)
+{
+	extern kmem_cache_t *sas_task_cache;
+	struct sas_task *task = kmem_cache_alloc(sas_task_cache, flags);
+
+	if (task) {
+		memset(task, 0, sizeof(*task));
+		INIT_LIST_HEAD(&task->list);
+		spin_lock_init(&task->task_state_lock);
+		task->task_state_flags = SAS_TASK_STATE_PENDING;
+		init_timer(&task->timer);
+		init_completion(&task->completion);
+	}
+
+	return task;
+}
+
+static inline void sas_free_task(struct sas_task *task)
+{
+	if (task) {
+		extern kmem_cache_t *sas_task_cache;
+		BUG_ON(!list_empty(&task->list));
+		kmem_cache_free(sas_task_cache, task);
+	}
+}
+
+struct sas_domain_function_template {
+	/* The class calls these to notify the LLDD of an event. */
+	void (*lldd_port_formed)(struct asd_sas_phy *);
+	void (*lldd_port_deformed)(struct asd_sas_phy *);
+
+	/* The class calls these when a device is found or gone. */
+	int  (*lldd_dev_found)(struct domain_device *);
+	void (*lldd_dev_gone)(struct domain_device *);
+
+	int (*lldd_execute_task)(struct sas_task *, int num,
+				 unsigned long gfp_flags);
+
+	/* Task Management Functions. Must be called from process context. */
+	int (*lldd_abort_task)(struct sas_task *);
+	int (*lldd_abort_task_set)(struct domain_device *, u8 *lun);
+	int (*lldd_clear_aca)(struct domain_device *, u8 *lun);
+	int (*lldd_clear_task_set)(struct domain_device *, u8 *lun);
+	int (*lldd_I_T_nexus_reset)(struct domain_device *);
+	int (*lldd_lu_reset)(struct domain_device *, u8 *lun);
+	int (*lldd_query_task)(struct sas_task *);
+
+	/* Port and Adapter management */
+	int (*lldd_clear_nexus_port)(struct asd_sas_port *);
+	int (*lldd_clear_nexus_ha)(struct sas_ha_struct *);
+
+	/* Phy management */
+	int (*lldd_control_phy)(struct asd_sas_phy *, enum phy_func);
+};
+
+extern int sas_register_ha(struct sas_ha_struct *);
+extern int sas_unregister_ha(struct sas_ha_struct *);
+
+extern int sas_queuecommand(struct scsi_cmnd *,
+		     void (*scsi_done)(struct scsi_cmnd *));
+extern int sas_target_alloc(struct scsi_target *);
+extern int sas_slave_alloc(struct scsi_device *);
+extern int sas_slave_configure(struct scsi_device *);
+extern void sas_slave_destroy(struct scsi_device *);
+extern int sas_change_queue_depth(struct scsi_device *, int new_depth);
+extern int sas_change_queue_type(struct scsi_device *, int qt);
+extern int sas_bios_param(struct scsi_device *,
+			  struct block_device *,
+			  sector_t capacity, int *hsc);
+extern struct scsi_transport_template *
+sas_domain_attach_transport(struct sas_domain_function_template *);
+extern void sas_domain_release_transport(struct scsi_transport_template *);
+
+int  sas_discover_root_expander(struct domain_device *);
+
+void sas_init_ex_attr(void);
+
+int  sas_ex_revalidate_domain(struct domain_device *);
+
+void sas_unregister_domain_devices(struct asd_sas_port *port);
+void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *);
+int  sas_discover_event(struct asd_sas_port *, enum discover_event ev);
+
+int  sas_discover_sata(struct domain_device *);
+int  sas_discover_end_dev(struct domain_device *);
+
+void sas_unregister_dev(struct domain_device *);
+
+void sas_init_dev(struct domain_device *);
+
+#endif /* _SASLIB_H_ */
diff --git a/include/scsi/sas.h b/include/scsi/sas.h
new file mode 100644
index 0000000000000..752853a113dc6
--- /dev/null
+++ b/include/scsi/sas.h
@@ -0,0 +1,644 @@
+/*
+ * SAS structures and definitions header file
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#ifndef _SAS_H_
+#define _SAS_H_
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#define SAS_ADDR_SIZE        8
+#define HASHED_SAS_ADDR_SIZE 3
+#define SAS_ADDR(_sa) ((unsigned long long) be64_to_cpu(*(__be64 *)(_sa)))
+
+#define SMP_REQUEST             0x40
+#define SMP_RESPONSE            0x41
+
+#define SSP_DATA                0x01
+#define SSP_XFER_RDY            0x05
+#define SSP_COMMAND             0x06
+#define SSP_RESPONSE            0x07
+#define SSP_TASK                0x16
+
+#define SMP_REPORT_GENERAL       0x00
+#define SMP_REPORT_MANUF_INFO    0x01
+#define SMP_READ_GPIO_REG        0x02
+#define SMP_DISCOVER             0x10
+#define SMP_REPORT_PHY_ERR_LOG   0x11
+#define SMP_REPORT_PHY_SATA      0x12
+#define SMP_REPORT_ROUTE_INFO    0x13
+#define SMP_WRITE_GPIO_REG       0x82
+#define SMP_CONF_ROUTE_INFO      0x90
+#define SMP_PHY_CONTROL          0x91
+#define SMP_PHY_TEST_FUNCTION    0x92
+
+#define SMP_RESP_FUNC_ACC        0x00
+#define SMP_RESP_FUNC_UNK        0x01
+#define SMP_RESP_FUNC_FAILED     0x02
+#define SMP_RESP_INV_FRM_LEN     0x03
+#define SMP_RESP_NO_PHY          0x10
+#define SMP_RESP_NO_INDEX        0x11
+#define SMP_RESP_PHY_NO_SATA     0x12
+#define SMP_RESP_PHY_UNK_OP      0x13
+#define SMP_RESP_PHY_UNK_TESTF   0x14
+#define SMP_RESP_PHY_TEST_INPROG 0x15
+#define SMP_RESP_PHY_VACANT      0x16
+
+/* SAM TMFs */
+#define TMF_ABORT_TASK      0x01
+#define TMF_ABORT_TASK_SET  0x02
+#define TMF_CLEAR_TASK_SET  0x04
+#define TMF_LU_RESET        0x08
+#define TMF_CLEAR_ACA       0x40
+#define TMF_QUERY_TASK      0x80
+
+/* SAS TMF responses */
+#define TMF_RESP_FUNC_COMPLETE   0x00
+#define TMF_RESP_INVALID_FRAME   0x02
+#define TMF_RESP_FUNC_ESUPP      0x04
+#define TMF_RESP_FUNC_FAILED     0x05
+#define TMF_RESP_FUNC_SUCC       0x08
+#define TMF_RESP_NO_LUN          0x09
+#define TMF_RESP_OVERLAPPED_TAG  0x0A
+
+enum sas_oob_mode {
+	OOB_NOT_CONNECTED,
+	SATA_OOB_MODE,
+	SAS_OOB_MODE
+};
+
+/* See sas_discover.c if you plan on changing these.
+ */
+enum sas_dev_type {
+	NO_DEVICE   = 0,	  /* protocol */
+	SAS_END_DEV = 1,	  /* protocol */
+	EDGE_DEV    = 2,	  /* protocol */
+	FANOUT_DEV  = 3,	  /* protocol */
+	SAS_HA      = 4,
+	SATA_DEV    = 5,
+	SATA_PM     = 7,
+	SATA_PM_PORT= 8,
+};
+
+enum sas_phy_linkrate {
+	PHY_LINKRATE_NONE = 0,
+	PHY_LINKRATE_UNKNOWN = 0,
+	PHY_DISABLED,
+	PHY_RESET_PROBLEM,
+	PHY_SPINUP_HOLD,
+	PHY_PORT_SELECTOR,
+	PHY_LINKRATE_1_5 = 0x08,
+	PHY_LINKRATE_G1  = PHY_LINKRATE_1_5,
+	PHY_LINKRATE_3   = 0x09,
+	PHY_LINKRATE_G2  = PHY_LINKRATE_3,
+	PHY_LINKRATE_6   = 0x0A,
+};
+
+/* Partly from IDENTIFY address frame. */
+enum sas_proto {
+	SATA_PROTO    = 1,
+	SAS_PROTO_SMP = 2,	  /* protocol */
+	SAS_PROTO_STP = 4,	  /* protocol */
+	SAS_PROTO_SSP = 8,	  /* protocol */
+	SAS_PROTO_ALL = 0xE,
+};
+
+/* From the spec; local phys only */
+enum phy_func {
+	PHY_FUNC_NOP,
+	PHY_FUNC_LINK_RESET,		  /* Enables the phy */
+	PHY_FUNC_HARD_RESET,
+	PHY_FUNC_DISABLE,
+	PHY_FUNC_CLEAR_ERROR_LOG = 5,
+	PHY_FUNC_CLEAR_AFFIL,
+	PHY_FUNC_TX_SATA_PS_SIGNAL,
+	PHY_FUNC_RELEASE_SPINUP_HOLD = 0x10, /* LOCAL PORT ONLY! */
+};
+
+/* SAS LLDD would need to report only _very_few_ of those, like BROADCAST.
+ * Most of those are here for completeness.
+ */
+enum sas_prim {
+	SAS_PRIM_AIP_NORMAL = 1,
+	SAS_PRIM_AIP_R0     = 2,
+	SAS_PRIM_AIP_R1     = 3,
+	SAS_PRIM_AIP_R2     = 4,
+	SAS_PRIM_AIP_WC     = 5,
+	SAS_PRIM_AIP_WD     = 6,
+	SAS_PRIM_AIP_WP     = 7,
+	SAS_PRIM_AIP_RWP    = 8,
+
+	SAS_PRIM_BC_CH      = 9,
+	SAS_PRIM_BC_RCH0    = 10,
+	SAS_PRIM_BC_RCH1    = 11,
+	SAS_PRIM_BC_R0      = 12,
+	SAS_PRIM_BC_R1      = 13,
+	SAS_PRIM_BC_R2      = 14,
+	SAS_PRIM_BC_R3      = 15,
+	SAS_PRIM_BC_R4      = 16,
+
+	SAS_PRIM_NOTIFY_ENSP= 17,
+	SAS_PRIM_NOTIFY_R0  = 18,
+	SAS_PRIM_NOTIFY_R1  = 19,
+	SAS_PRIM_NOTIFY_R2  = 20,
+
+	SAS_PRIM_CLOSE_CLAF = 21,
+	SAS_PRIM_CLOSE_NORM = 22,
+	SAS_PRIM_CLOSE_R0   = 23,
+	SAS_PRIM_CLOSE_R1   = 24,
+
+	SAS_PRIM_OPEN_RTRY  = 25,
+	SAS_PRIM_OPEN_RJCT  = 26,
+	SAS_PRIM_OPEN_ACPT  = 27,
+
+	SAS_PRIM_DONE       = 28,
+	SAS_PRIM_BREAK      = 29,
+
+	SATA_PRIM_DMAT      = 33,
+	SATA_PRIM_PMNAK     = 34,
+	SATA_PRIM_PMACK     = 35,
+	SATA_PRIM_PMREQ_S   = 36,
+	SATA_PRIM_PMREQ_P   = 37,
+	SATA_SATA_R_ERR     = 38,
+};
+
+enum sas_open_rej_reason {
+	/* Abandon open */
+	SAS_OREJ_UNKNOWN   = 0,
+	SAS_OREJ_BAD_DEST  = 1,
+	SAS_OREJ_CONN_RATE = 2,
+	SAS_OREJ_EPROTO    = 3,
+	SAS_OREJ_RESV_AB0  = 4,
+	SAS_OREJ_RESV_AB1  = 5,
+	SAS_OREJ_RESV_AB2  = 6,
+	SAS_OREJ_RESV_AB3  = 7,
+	SAS_OREJ_WRONG_DEST= 8,
+	SAS_OREJ_STP_NORES = 9,
+
+	/* Retry open */
+	SAS_OREJ_NO_DEST   = 10,
+	SAS_OREJ_PATH_BLOCKED = 11,
+	SAS_OREJ_RSVD_CONT0 = 12,
+	SAS_OREJ_RSVD_CONT1 = 13,
+	SAS_OREJ_RSVD_INIT0 = 14,
+	SAS_OREJ_RSVD_INIT1 = 15,
+	SAS_OREJ_RSVD_STOP0 = 16,
+	SAS_OREJ_RSVD_STOP1 = 17,
+	SAS_OREJ_RSVD_RETRY = 18,
+};
+
+struct  dev_to_host_fis {
+	u8     fis_type;	  /* 0x34 */
+	u8     flags;
+	u8     status;
+	u8     error;
+
+	u8     lbal;
+	union { u8 lbam; u8 byte_count_low; };
+	union { u8 lbah; u8 byte_count_high; };
+	u8     device;
+
+	u8     lbal_exp;
+	u8     lbam_exp;
+	u8     lbah_exp;
+	u8     _r_a;
+
+	union { u8  sector_count; u8 interrupt_reason; };
+	u8     sector_count_exp;
+	u8     _r_b;
+	u8     _r_c;
+
+	u32    _r_d;
+} __attribute__ ((packed));
+
+struct host_to_dev_fis {
+	u8     fis_type;	  /* 0x27 */
+	u8     flags;
+	u8     command;
+	u8     features;
+
+	u8     lbal;
+	union { u8 lbam; u8 byte_count_low; };
+	union { u8 lbah; u8 byte_count_high; };
+	u8     device;
+
+	u8     lbal_exp;
+	u8     lbam_exp;
+	u8     lbah_exp;
+	u8     features_exp;
+
+	union { u8  sector_count; u8 interrupt_reason; };
+	u8     sector_count_exp;
+	u8     _r_a;
+	u8     control;
+
+	u32    _r_b;
+} __attribute__ ((packed));
+
+/* Prefer to have code clarity over header file clarity.
+ */
+#ifdef __LITTLE_ENDIAN_BITFIELD
+struct sas_identify_frame {
+	/* Byte 0 */
+	u8  frame_type:4;
+	u8  dev_type:3;
+	u8  _un0:1;
+
+	/* Byte 1 */
+	u8  _un1;
+
+	/* Byte 2 */
+	union {
+		struct {
+			u8  _un20:1;
+			u8  smp_iport:1;
+			u8  stp_iport:1;
+			u8  ssp_iport:1;
+			u8  _un247:4;
+		};
+		u8 initiator_bits;
+	};
+
+	/* Byte 3 */
+	union {
+		struct {
+			u8  _un30:1;
+			u8 smp_tport:1;
+			u8 stp_tport:1;
+			u8 ssp_tport:1;
+			u8 _un347:4;
+		};
+		u8 target_bits;
+	};
+
+	/* Byte 4 - 11 */
+	u8 _un4_11[8];
+
+	/* Byte 12 - 19 */
+	u8 sas_addr[SAS_ADDR_SIZE];
+
+	/* Byte 20 */
+	u8 phy_id;
+
+	u8 _un21_27[7];
+
+	__be32 crc;
+} __attribute__ ((packed));
+
+struct ssp_frame_hdr {
+	u8     frame_type;
+	u8     hashed_dest_addr[HASHED_SAS_ADDR_SIZE];
+	u8     _r_a;
+	u8     hashed_src_addr[HASHED_SAS_ADDR_SIZE];
+	__be16 _r_b;
+
+	u8     changing_data_ptr:1;
+	u8     retransmit:1;
+	u8     retry_data_frames:1;
+	u8     _r_c:5;
+
+	u8     num_fill_bytes:2;
+	u8     _r_d:6;
+
+	u32    _r_e;
+	__be16 tag;
+	__be16 tptt;
+	__be32 data_offs;
+} __attribute__ ((packed));
+
+struct ssp_response_iu {
+	u8     _r_a[10];
+
+	u8     datapres:2;
+	u8     _r_b:6;
+
+	u8     status;
+
+	u32    _r_c;
+
+	__be32 sense_data_len;
+	__be32 response_data_len;
+
+	u8     resp_data[0];
+	u8     sense_data[0];
+} __attribute__ ((packed));
+
+/* ---------- SMP ---------- */
+
+struct report_general_resp {
+	__be16  change_count;
+	__be16  route_indexes;
+	u8      _r_a;
+	u8      num_phys;
+
+	u8      conf_route_table:1;
+	u8      configuring:1;
+	u8      _r_b:6;
+
+	u8      _r_c;
+
+	u8      enclosure_logical_id[8];
+
+	u8      _r_d[12];
+} __attribute__ ((packed));
+
+struct discover_resp {
+	u8    _r_a[5];
+
+	u8    phy_id;
+	__be16 _r_b;
+
+	u8    _r_c:4;
+	u8    attached_dev_type:3;
+	u8    _r_d:1;
+
+	u8    linkrate:4;
+	u8    _r_e:4;
+
+	u8    attached_sata_host:1;
+	u8    iproto:3;
+	u8    _r_f:4;
+
+	u8    attached_sata_dev:1;
+	u8    tproto:3;
+	u8    _r_g:3;
+	u8    attached_sata_ps:1;
+
+	u8    sas_addr[8];
+	u8    attached_sas_addr[8];
+	u8    attached_phy_id;
+
+	u8    _r_h[7];
+
+	u8    hmin_linkrate:4;
+	u8    pmin_linkrate:4;
+	u8    hmax_linkrate:4;
+	u8    pmax_linkrate:4;
+
+	u8    change_count;
+
+	u8    pptv:4;
+	u8    _r_i:3;
+	u8    virtual:1;
+
+	u8    routing_attr:4;
+	u8    _r_j:4;
+
+	u8    conn_type;
+	u8    conn_el_index;
+	u8    conn_phy_link;
+
+	u8    _r_k[8];
+} __attribute__ ((packed));
+
+struct report_phy_sata_resp {
+	u8    _r_a[5];
+
+	u8    phy_id;
+	u8    _r_b;
+
+	u8    affil_valid:1;
+	u8    affil_supp:1;
+	u8    _r_c:6;
+
+	u32    _r_d;
+
+	u8    stp_sas_addr[8];
+
+	struct dev_to_host_fis fis;
+
+	u32   _r_e;
+
+	u8    affil_stp_ini_addr[8];
+
+	__be32 crc;
+} __attribute__ ((packed));
+
+struct smp_resp {
+	u8    frame_type;
+	u8    function;
+	u8    result;
+	u8    reserved;
+	union {
+		struct report_general_resp  rg;
+		struct discover_resp        disc;
+		struct report_phy_sata_resp rps;
+	};
+} __attribute__ ((packed));
+
+#elif defined(__BIG_ENDIAN_BITFIELD)
+struct sas_identify_frame {
+	/* Byte 0 */
+	u8  _un0:1;
+	u8  dev_type:3;
+	u8  frame_type:4;
+
+	/* Byte 1 */
+	u8  _un1;
+
+	/* Byte 2 */
+	union {
+		struct {
+			u8  _un247:4;
+			u8  ssp_iport:1;
+			u8  stp_iport:1;
+			u8  smp_iport:1;
+			u8  _un20:1;
+		};
+		u8 initiator_bits;
+	};
+
+	/* Byte 3 */
+	union {
+		struct {
+			u8 _un347:4;
+			u8 ssp_tport:1;
+			u8 stp_tport:1;
+			u8 smp_tport:1;
+			u8 _un30:1;
+		};
+		u8 target_bits;
+	};
+
+	/* Byte 4 - 11 */
+	u8 _un4_11[8];
+
+	/* Byte 12 - 19 */
+	u8 sas_addr[SAS_ADDR_SIZE];
+
+	/* Byte 20 */
+	u8 phy_id;
+
+	u8 _un21_27[7];
+
+	__be32 crc;
+} __attribute__ ((packed));
+
+struct ssp_frame_hdr {
+	u8     frame_type;
+	u8     hashed_dest_addr[HASHED_SAS_ADDR_SIZE];
+	u8     _r_a;
+	u8     hashed_src_addr[HASHED_SAS_ADDR_SIZE];
+	__be16 _r_b;
+
+	u8     _r_c:5;
+	u8     retry_data_frames:1;
+	u8     retransmit:1;
+	u8     changing_data_ptr:1;
+
+	u8     _r_d:6;
+	u8     num_fill_bytes:2;
+
+	u32    _r_e;
+	__be16 tag;
+	__be16 tptt;
+	__be32 data_offs;
+} __attribute__ ((packed));
+
+struct ssp_response_iu {
+	u8     _r_a[10];
+
+	u8     _r_b:6;
+	u8     datapres:2;
+
+	u8     status;
+
+	u32    _r_c;
+
+	__be32 sense_data_len;
+	__be32 response_data_len;
+
+	u8     resp_data[0];
+	u8     sense_data[0];
+} __attribute__ ((packed));
+
+/* ---------- SMP ---------- */
+
+struct report_general_resp {
+	__be16  change_count;
+	__be16  route_indexes;
+	u8      _r_a;
+	u8      num_phys;
+
+	u8      _r_b:6;
+	u8      configuring:1;
+	u8      conf_route_table:1;
+
+	u8      _r_c;
+
+	u8      enclosure_logical_id[8];
+
+	u8      _r_d[12];
+} __attribute__ ((packed));
+
+struct discover_resp {
+	u8    _r_a[5];
+
+	u8    phy_id;
+	__be16 _r_b;
+
+	u8    _r_d:1;
+	u8    attached_dev_type:3;
+	u8    _r_c:4;
+
+	u8    _r_e:4;
+	u8    linkrate:4;
+
+	u8    _r_f:4;
+	u8    iproto:3;
+	u8    attached_sata_host:1;
+
+	u8    attached_sata_ps:1;
+	u8    _r_g:3;
+	u8    tproto:3;
+	u8    attached_sata_dev:1;
+
+	u8    sas_addr[8];
+	u8    attached_sas_addr[8];
+	u8    attached_phy_id;
+
+	u8    _r_h[7];
+
+	u8    pmin_linkrate:4;
+	u8    hmin_linkrate:4;
+	u8    pmax_linkrate:4;
+	u8    hmax_linkrate:4;
+
+	u8    change_count;
+
+	u8    virtual:1;
+	u8    _r_i:3;
+	u8    pptv:4;
+
+	u8    _r_j:4;
+	u8    routing_attr:4;
+
+	u8    conn_type;
+	u8    conn_el_index;
+	u8    conn_phy_link;
+
+	u8    _r_k[8];
+} __attribute__ ((packed));
+
+struct report_phy_sata_resp {
+	u8    _r_a[5];
+
+	u8    phy_id;
+	u8    _r_b;
+
+	u8    _r_c:6;
+	u8    affil_supp:1;
+	u8    affil_valid:1;
+
+	u32   _r_d;
+
+	u8    stp_sas_addr[8];
+
+	struct dev_to_host_fis fis;
+
+	u32   _r_e;
+
+	u8    affil_stp_ini_addr[8];
+
+	__be32 crc;
+} __attribute__ ((packed));
+
+struct smp_resp {
+	u8    frame_type;
+	u8    function;
+	u8    result;
+	u8    reserved;
+	union {
+		struct report_general_resp  rg;
+		struct discover_resp        disc;
+		struct report_phy_sata_resp rps;
+	};
+} __attribute__ ((packed));
+
+#else
+#error "Bitfield order not defined!"
+#endif
+
+#endif /* _SAS_H_ */
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 1bc6752014139..84a6d5fe0920c 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -429,4 +429,10 @@ struct scsi_lun {
 /* Used to obtain the PCI location of a device */
 #define SCSI_IOCTL_GET_PCI		0x5387
 
+/* Pull a u32 out of a SCSI message (using BE SCSI conventions) */
+static inline u32 scsi_to_u32(u8 *ptr)
+{
+	return (ptr[0]<<24) + (ptr[1]<<16) + (ptr[2]<<8) + ptr[3];
+}
+
 #endif /* _SCSI_SCSI_H */
-- 
GitLab


From 187afbed1814ea0851bf30bacbf807217dd7864b Mon Sep 17 00:00:00 2001
From: Jon Masters <jcm@redhat.com>
Date: Mon, 28 Aug 2006 17:08:21 -0500
Subject: [PATCH 0129/1063] [SCSI] MODULE_FIRMWARE for binary firmware(s)

Right now, various kernel modules are being migrated over to use
request_firmware in order to pull in binary firmware blobs from userland
when the module is loaded. This makes sense.

However, there is right now little mechanism in place to automatically
determine which binary firmware blobs must be included with a kernel in
order to satisfy the prerequisites of these drivers. This affects
vendors, but also regular users to a certain extent too.

The attached patch introduces MODULE_FIRMWARE as a mechanism for
advertising that a particular firmware file is to be loaded - it will
then show up via modinfo and could be used e.g. when packaging a kernel.

Signed-off-by: Jon Masters <jcm@redhat.com>

Comments added in line with all the other MODULE_ tag

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/module.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/module.h b/include/linux/module.h
index 0dfb794c52d3b..d4486cc2e7fe6 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -156,6 +156,11 @@ extern struct module __this_module;
 */
 #define MODULE_VERSION(_version) MODULE_INFO(version, _version)
 
+/* Optional firmware file (or files) needed by the module
+ * format is simply firmware file name.  Multiple firmware
+ * files require multiple MODULE_FIRMWARE() specifiers */
+#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
+
 /* Given an address, look for it in the exception tables */
 const struct exception_table_entry *search_exception_tables(unsigned long add);
 
-- 
GitLab


From bc229b3663dcd7d8f266cb13b0839efdee6d95b5 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@SteelEye.com>
Date: Mon, 28 Aug 2006 17:08:21 -0500
Subject: [PATCH 0130/1063] [SCSI] aic94xx: add MODULE_FIRMWARE tag

Add a tag which shows what the firmware file we're requesting is.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aic94xx/aic94xx_seq.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/aic94xx/aic94xx_seq.c b/drivers/scsi/aic94xx/aic94xx_seq.c
index 9050c6f3f6bde..d9b6da5fd06ca 100644
--- a/drivers/scsi/aic94xx/aic94xx_seq.c
+++ b/drivers/scsi/aic94xx/aic94xx_seq.c
@@ -28,6 +28,7 @@
 
 #include <linux/delay.h>
 #include <linux/pci.h>
+#include <linux/module.h>
 #include <linux/firmware.h>
 #include "aic94xx_reg.h"
 #include "aic94xx_hwi.h"
@@ -1399,3 +1400,5 @@ void asd_update_port_links(struct asd_sas_phy *sas_phy)
 	if (err)
 		asd_printk("couldn't update DDB 0:error:%d\n", err);
 }
+
+MODULE_FIRMWARE(SAS_RAZOR_SEQUENCER_FW_FILE);
-- 
GitLab


From f19eaa7f53736449a6eac89c3863eca2c64d5913 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Wed, 30 Aug 2006 14:18:33 -0700
Subject: [PATCH 0131/1063] [SCSI] aic94xx: Increase can_queue for better
 performance

This patch sets can_queue in the aic94xx driver's scsi_host to better
performing values than what's there currently.  It seems that
asd_ha->seq.can_queue reflects the number of requests that can be
queued per controller; so long as there's one scsi_host per
controller, it seems logical that the scsi_host ought to have the same
can_queue value.  To the best of my (still limited) knowledge, this
method provides the correct value.

The effect of leaving this value set to 1 is terrible performance in
the case of either (a) certain Maxtor SAS drives flying solo or (b)
flooding several disks with I/O simultaneously (md-raid).  There may be
more scenarios where we see similar problems that I haven't uncovered.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aic94xx/aic94xx_init.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
index 3ec2e46f80c61..69aa708875302 100644
--- a/drivers/scsi/aic94xx/aic94xx_init.c
+++ b/drivers/scsi/aic94xx/aic94xx_init.c
@@ -620,6 +620,8 @@ static int __devinit asd_pci_probe(struct pci_dev *dev,
 		   asd_ha->hw_prof.bios.present ? "build " : "not present",
 		   asd_ha->hw_prof.bios.bld);
 
+	shost->can_queue = asd_ha->seq.can_queue;
+
 	if (use_msi)
 		pci_enable_msi(asd_ha->pcidev);
 
-- 
GitLab


From 492dfb489658dfe4a755fa29dd0e34e9c8bd8fb8 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Wed, 30 Aug 2006 15:48:45 -0400
Subject: [PATCH 0132/1063] [SCSI] block: add support for shared tag maps

The current block queue implementation already contains most of the
machinery for shared tag maps.  The only remaining pieces are a way to
allocate and destroy a tag map independently of the queues (so that
the maps can be managed on the life cycle of the overseeing entity)

Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 block/ll_rw_blk.c      | 109 +++++++++++++++++++++++++++++++++--------
 include/linux/blkdev.h |   2 +
 2 files changed, 90 insertions(+), 21 deletions(-)

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index ddd9253f9d55f..556a3d354eab5 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -848,21 +848,18 @@ struct request *blk_queue_find_tag(request_queue_t *q, int tag)
 EXPORT_SYMBOL(blk_queue_find_tag);
 
 /**
- * __blk_queue_free_tags - release tag maintenance info
- * @q:  the request queue for the device
+ * __blk_free_tags - release a given set of tag maintenance info
+ * @bqt:	the tag map to free
  *
- *  Notes:
- *    blk_cleanup_queue() will take care of calling this function, if tagging
- *    has been used. So there's no need to call this directly.
- **/
-static void __blk_queue_free_tags(request_queue_t *q)
+ * Tries to free the specified @bqt@.  Returns true if it was
+ * actually freed and false if there are still references using it
+ */
+static int __blk_free_tags(struct blk_queue_tag *bqt)
 {
-	struct blk_queue_tag *bqt = q->queue_tags;
-
-	if (!bqt)
-		return;
+	int retval;
 
-	if (atomic_dec_and_test(&bqt->refcnt)) {
+	retval = atomic_dec_and_test(&bqt->refcnt);
+	if (retval) {
 		BUG_ON(bqt->busy);
 		BUG_ON(!list_empty(&bqt->busy_list));
 
@@ -873,12 +870,49 @@ static void __blk_queue_free_tags(request_queue_t *q)
 		bqt->tag_map = NULL;
 
 		kfree(bqt);
+
 	}
 
+	return retval;
+}
+
+/**
+ * __blk_queue_free_tags - release tag maintenance info
+ * @q:  the request queue for the device
+ *
+ *  Notes:
+ *    blk_cleanup_queue() will take care of calling this function, if tagging
+ *    has been used. So there's no need to call this directly.
+ **/
+static void __blk_queue_free_tags(request_queue_t *q)
+{
+	struct blk_queue_tag *bqt = q->queue_tags;
+
+	if (!bqt)
+		return;
+
+	__blk_free_tags(bqt);
+
 	q->queue_tags = NULL;
 	q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
 }
 
+
+/**
+ * blk_free_tags - release a given set of tag maintenance info
+ * @bqt:	the tag map to free
+ *
+ * For externally managed @bqt@ frees the map.  Callers of this
+ * function must guarantee to have released all the queues that
+ * might have been using this tag map.
+ */
+void blk_free_tags(struct blk_queue_tag *bqt)
+{
+	if (unlikely(!__blk_free_tags(bqt)))
+		BUG();
+}
+EXPORT_SYMBOL(blk_free_tags);
+
 /**
  * blk_queue_free_tags - release tag maintenance info
  * @q:  the request queue for the device
@@ -901,7 +935,7 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
 	unsigned long *tag_map;
 	int nr_ulongs;
 
-	if (depth > q->nr_requests * 2) {
+	if (q && depth > q->nr_requests * 2) {
 		depth = q->nr_requests * 2;
 		printk(KERN_ERR "%s: adjusted depth to %d\n",
 				__FUNCTION__, depth);
@@ -927,6 +961,38 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
 	return -ENOMEM;
 }
 
+static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
+						   int depth)
+{
+	struct blk_queue_tag *tags;
+
+	tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
+	if (!tags)
+		goto fail;
+
+	if (init_tag_map(q, tags, depth))
+		goto fail;
+
+	INIT_LIST_HEAD(&tags->busy_list);
+	tags->busy = 0;
+	atomic_set(&tags->refcnt, 1);
+	return tags;
+fail:
+	kfree(tags);
+	return NULL;
+}
+
+/**
+ * blk_init_tags - initialize the tag info for an external tag map
+ * @depth:	the maximum queue depth supported
+ * @tags: the tag to use
+ **/
+struct blk_queue_tag *blk_init_tags(int depth)
+{
+	return __blk_queue_init_tags(NULL, depth);
+}
+EXPORT_SYMBOL(blk_init_tags);
+
 /**
  * blk_queue_init_tags - initialize the queue tag info
  * @q:  the request queue for the device
@@ -941,16 +1007,10 @@ int blk_queue_init_tags(request_queue_t *q, int depth,
 	BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
 
 	if (!tags && !q->queue_tags) {
-		tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
-		if (!tags)
-			goto fail;
+		tags = __blk_queue_init_tags(q, depth);
 
-		if (init_tag_map(q, tags, depth))
+		if (!tags)
 			goto fail;
-
-		INIT_LIST_HEAD(&tags->busy_list);
-		tags->busy = 0;
-		atomic_set(&tags->refcnt, 1);
 	} else if (q->queue_tags) {
 		if ((rc = blk_queue_resize_tags(q, depth)))
 			return rc;
@@ -1001,6 +1061,13 @@ int blk_queue_resize_tags(request_queue_t *q, int new_depth)
 		return 0;
 	}
 
+	/*
+	 * Currently cannot replace a shared tag map with a new
+	 * one, so error out if this is the case
+	 */
+	if (atomic_read(&bqt->refcnt) != 1)
+		return -EBUSY;
+
 	/*
 	 * save the old state info, so we can copy it back
 	 */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aafe82788b4ef..427b0d61be6c0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -746,6 +746,8 @@ extern void blk_queue_free_tags(request_queue_t *);
 extern int blk_queue_resize_tags(request_queue_t *, int);
 extern void blk_queue_invalidate_tags(request_queue_t *);
 extern long blk_congestion_wait(int rw, long timeout);
+extern struct blk_queue_tag *blk_init_tags(int);
+extern void blk_free_tags(struct blk_queue_tag *);
 
 extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *);
 extern int blkdev_issue_flush(struct block_device *, sector_t *);
-- 
GitLab


From 86e33a296c2c9ed6eece0bfff4ac776f42040504 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Wed, 30 Aug 2006 09:45:51 -0400
Subject: [PATCH 0133/1063] [SCSI] add shared tag map helpers

This patch adds support for sharing tag maps at the host level
(i.e. either every queue [LUN] has its own tag map or there's a single
one for the entire host).  This formulation is primarily intended to
help single issue queue hardware, like the aic7xxx

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/hosts.c     |  3 +++
 include/scsi/scsi_host.h |  7 +++++++
 include/scsi/scsi_tcq.h  | 14 +++++++++++++-
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index f244d4f6597a4..68ef1636678df 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -265,6 +265,9 @@ static void scsi_host_dev_release(struct device *dev)
 		destroy_workqueue(shost->work_q);
 
 	scsi_destroy_command_freelist(shost);
+	if (shost->bqt)
+		blk_free_tags(shost->bqt);
+
 	kfree(shost->shost_data);
 
 	if (parent)
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index b3dd90f3e8586..39c6f8cc20c3c 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -16,6 +16,7 @@ struct scsi_target;
 struct Scsi_Host;
 struct scsi_host_cmd_pool;
 struct scsi_transport_template;
+struct blk_queue_tags;
 
 
 /*
@@ -465,6 +466,12 @@ struct Scsi_Host {
 	struct scsi_host_template *hostt;
 	struct scsi_transport_template *transportt;
 
+	/*
+	 * area to keep a shared tag map (if needed, will be
+	 * NULL if not)
+	 */
+	struct blk_queue_tag	*bqt;
+
 	/*
 	 * The following two fields are protected with host_lock;
 	 * however, eh routines can safely access during eh processing
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index e47e36a4ef49f..4eea254b1ce9e 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -4,6 +4,7 @@
 #include <linux/blkdev.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
 
 
 #define MSG_SIMPLE_TAG	0x20
@@ -66,7 +67,8 @@ static inline void scsi_activate_tcq(struct scsi_device *sdev, int depth)
 		return;
 
 	if (!blk_queue_tagged(sdev->request_queue))
-		blk_queue_init_tags(sdev->request_queue, depth, NULL);
+		blk_queue_init_tags(sdev->request_queue, depth,
+				    sdev->host->bqt);
 
 	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
 }
@@ -131,4 +133,14 @@ static inline struct scsi_cmnd *scsi_find_tag(struct scsi_device *sdev, int tag)
 	return sdev->current_cmnd;
 }
 
+/**
+ * scsi_init_shared_tag_map - create a shared tag map
+ * @shost:	the host to share the tag map among all devices
+ * @depth:	the total depth of the map
+ */
+static inline void scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
+{
+	shost->bqt = blk_init_tags(depth);
+}
+
 #endif /* _SCSI_SCSI_TCQ_H */
-- 
GitLab


From 85b6c720b0931101c8bcc3a5abdc2b8514b0fb4b Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Thu, 31 Aug 2006 18:15:22 -0400
Subject: [PATCH 0134/1063] [SCSI] sd: fix cache flushing on module removal
 (and individual device removal)

The fix isn't actually in sd: it's in scsi_device_get().  I modified it
to allow devices to be returned in SDEV_CANCEL, but not SDEV_DEL.  This
means that the device_remove_driver, which occurs in device_del() in
scsi_remove_device() after the device has gone into SDEV_CANCEL is now
effective at flushing the cache.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 94df671d776a3..37843927e47ff 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -851,14 +851,14 @@ EXPORT_SYMBOL(scsi_track_queue_full);
  */
 int scsi_device_get(struct scsi_device *sdev)
 {
-	if (sdev->sdev_state == SDEV_DEL || sdev->sdev_state == SDEV_CANCEL)
+	if (sdev->sdev_state == SDEV_DEL)
 		return -ENXIO;
 	if (!get_device(&sdev->sdev_gendev))
 		return -ENXIO;
-	if (!try_module_get(sdev->host->hostt->module)) {
-		put_device(&sdev->sdev_gendev);
-		return -ENXIO;
-	}
+	/* We can fail this if we're doing SCSI operations
+	 * from module exit (like cache flush) */
+	try_module_get(sdev->host->hostt->module);
+
 	return 0;
 }
 EXPORT_SYMBOL(scsi_device_get);
@@ -873,7 +873,10 @@ EXPORT_SYMBOL(scsi_device_get);
  */
 void scsi_device_put(struct scsi_device *sdev)
 {
-	module_put(sdev->host->hostt->module);
+	/* The module refcount will be zero if scsi_device_get()
+	 * was called from a module removal routine */
+	if (likely(module_refcount(sdev->host->hostt->module) != 0))
+		module_put(sdev->host->hostt->module);
 	put_device(&sdev->sdev_gendev);
 }
 EXPORT_SYMBOL(scsi_device_put);
-- 
GitLab


From e5b3cd42960a10c1bc3701d4f00767463c88ec9d Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 21 Aug 2006 15:53:25 -0400
Subject: [PATCH 0135/1063] [SCSI] SCSI: sanitize INQUIRY strings

Sanitize the Vendor, Product, and Revision strings contained in an
INQUIRY result by setting all non-graphic or non-ASCII characters to ' '.
Since the standard disallows such characters, this will affect
only non-compliant devices.

To help maintain backward compatibility, NUL characters are treated
specially.  They are taken as string terminators; they and all the
following characters are set to ' '.  If some valid characters get
erased as a result... well, we weren't seeing them before so we haven't
lost anything.

The primary purpose of this change is to allow blacklist entries to
match devices with illegal Vendor or Product strings.

In addition, the patch updates a couple of function prototypes, giving
inq_result its correct type (unsigned char *).

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_scan.c | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index a24d3461fc788..31d05ab0b2fc3 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -396,6 +396,32 @@ void scsi_target_reap(struct scsi_target *starget)
 	return;
 }
 
+/**
+ * sanitize_inquiry_string - remove non-graphical chars from an INQUIRY result string
+ * @s: INQUIRY result string to sanitize
+ * @len: length of the string
+ *
+ * Description:
+ *	The SCSI spec says that INQUIRY vendor, product, and revision
+ *	strings must consist entirely of graphic ASCII characters,
+ *	padded on the right with spaces.  Since not all devices obey
+ *	this rule, we will replace non-graphic or non-ASCII characters
+ *	with spaces.  Exception: a NUL character is interpreted as a
+ *	string terminator, so all the following characters are set to
+ *	spaces.
+ **/
+static void sanitize_inquiry_string(unsigned char *s, int len)
+{
+	int terminated = 0;
+
+	for (; len > 0; (--len, ++s)) {
+		if (*s == 0)
+			terminated = 1;
+		if (terminated || *s < 0x20 || *s > 0x7e)
+			*s = ' ';
+	}
+}
+
 /**
  * scsi_probe_lun - probe a single LUN using a SCSI INQUIRY
  * @sdev:	scsi_device to probe
@@ -410,7 +436,7 @@ void scsi_target_reap(struct scsi_target *starget)
  *     INQUIRY data is in @inq_result; the scsi_level and INQUIRY length
  *     are copied to the scsi_device any flags value is stored in *@bflags.
  **/
-static int scsi_probe_lun(struct scsi_device *sdev, char *inq_result,
+static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
 			  int result_len, int *bflags)
 {
 	unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -469,7 +495,11 @@ static int scsi_probe_lun(struct scsi_device *sdev, char *inq_result,
 	}
 
 	if (result == 0) {
-		response_len = (unsigned char) inq_result[4] + 5;
+		sanitize_inquiry_string(&inq_result[8], 8);
+		sanitize_inquiry_string(&inq_result[16], 16);
+		sanitize_inquiry_string(&inq_result[32], 4);
+
+		response_len = inq_result[4] + 5;
 		if (response_len > 255)
 			response_len = first_inquiry_len;	/* sanity */
 
@@ -575,7 +605,8 @@ static int scsi_probe_lun(struct scsi_device *sdev, char *inq_result,
  *     SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device
  *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
  **/
-static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags)
+static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
+		int *bflags)
 {
 	/*
 	 * XXX do not save the inquiry, since it can change underneath us,
-- 
GitLab


From ffd0436ed2e5a741c8d30062b489b989acf0a526 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:24 -0400
Subject: [PATCH 0136/1063] [SCSI] libiscsi, iscsi_tcp, iscsi_iser: check that
 burst lengths are valid.

iSCSI RFC states that the first burst length must be smaller than the
max burst length. We currently assume targets will be good, but that may
not be the case, so this patch adds a check.

This patch also moves the unsol data out offset to the lib so the LLDs
do not have to track it.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.c | 18 ++++----------
 drivers/infiniband/ulp/iser/iscsi_iser.h |  1 -
 drivers/scsi/iscsi_tcp.c                 | 30 ++++++------------------
 drivers/scsi/iscsi_tcp.h                 |  1 -
 drivers/scsi/libiscsi.c                  | 25 +++++++++++++-------
 include/scsi/libiscsi.h                  |  5 ++--
 6 files changed, 31 insertions(+), 49 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 1437d7ee3b190..101e407eaa43d 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -141,18 +141,11 @@ iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask)
 
 	if (sc->sc_data_direction == DMA_TO_DEVICE) {
 		BUG_ON(ctask->total_length == 0);
-		/* bytes to be sent via RDMA operations */
-		iser_ctask->rdma_data_count = ctask->total_length -
-					 ctask->imm_count -
-					 ctask->unsol_count;
 
-		debug_scsi("cmd [itt %x total %d imm %d unsol_data %d "
-			   "rdma_data %d]\n",
+		debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n",
 			   ctask->itt, ctask->total_length, ctask->imm_count,
-			   ctask->unsol_count, iser_ctask->rdma_data_count);
-	} else
-		/* bytes to be sent via RDMA operations */
-		iser_ctask->rdma_data_count = ctask->total_length;
+			   ctask->unsol_count);
+	}
 
 	iser_ctask_rdma_init(iser_ctask);
 }
@@ -196,13 +189,10 @@ iscsi_iser_ctask_xmit_unsol_data(struct iscsi_conn *conn,
 {
 	struct iscsi_data  hdr;
 	int error = 0;
-	struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
 
 	/* Send data-out PDUs while there's still unsolicited data to send */
 	while (ctask->unsol_count > 0) {
-		iscsi_prep_unsolicit_data_pdu(ctask, &hdr,
-					      iser_ctask->rdma_data_count);
-
+		iscsi_prep_unsolicit_data_pdu(ctask, &hdr);
 		debug_scsi("Sending data-out: itt 0x%x, data count %d\n",
 			   hdr.itt, ctask->data_count);
 
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 3350ba690cfe1..7c3d0c96d889f 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -257,7 +257,6 @@ struct iscsi_iser_conn {
 struct iscsi_iser_cmd_task {
 	struct iser_desc             desc;
 	struct iscsi_iser_conn	     *iser_conn;
-	int			     rdma_data_count;/* RDMA bytes           */
 	enum iser_task_status 	     status;
 	int                          command_sent;  /* set if command  sent  */
 	int                          dir[ISER_DIRS_NUM];      /* set if dir use*/
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 058f094f945ab..a97a3a4e99eb5 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -1264,19 +1264,6 @@ iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 			    r2t->data_count);
 }
 
-static void
-iscsi_unsolicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
-{
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_data_task *dtask;
-
-	dtask = tcp_ctask->dtask = &tcp_ctask->unsol_dtask;
-	iscsi_prep_unsolicit_data_pdu(ctask, &dtask->hdr,
-				      tcp_ctask->r2t_data_count);
-	iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)&dtask->hdr,
-			   sizeof(struct iscsi_hdr));
-}
-
 /**
  * iscsi_tcp_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
  * @conn: iscsi connection
@@ -1326,14 +1313,11 @@ iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask)
 		if (ctask->unsol_count)
 			tcp_ctask->xmstate |= XMSTATE_UNS_HDR |
 						XMSTATE_UNS_INIT;
-		tcp_ctask->r2t_data_count = ctask->total_length -
-				    ctask->imm_count -
-				    ctask->unsol_count;
 
-		debug_scsi("cmd [itt 0x%x total %d imm %d imm_data %d "
-			   "r2t_data %d]\n",
+		debug_scsi("cmd [itt 0x%x total %d imm_data %d "
+			   "unsol count %d, unsol offset %d]\n",
 			   ctask->itt, ctask->total_length, ctask->imm_count,
-			   ctask->unsol_count, tcp_ctask->r2t_data_count);
+			   ctask->unsol_count, ctask->unsol_offset);
 	} else
 		tcp_ctask->xmstate = XMSTATE_R_HDR;
 
@@ -1531,8 +1515,10 @@ handle_xmstate_uns_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 
 	tcp_ctask->xmstate |= XMSTATE_UNS_DATA;
 	if (tcp_ctask->xmstate & XMSTATE_UNS_INIT) {
-		iscsi_unsolicit_data_init(conn, ctask);
-		dtask = tcp_ctask->dtask;
+		dtask = tcp_ctask->dtask = &tcp_ctask->unsol_dtask;
+		iscsi_prep_unsolicit_data_pdu(ctask, &dtask->hdr);
+		iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)&dtask->hdr,
+				   sizeof(struct iscsi_hdr));
 		if (conn->hdrdgst_en)
 			iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
 					(u8*)dtask->hdrext);
@@ -1720,7 +1706,6 @@ handle_xmstate_sol_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 	 * Done with this R2T. Check if there are more
 	 * outstanding R2Ts ready to be processed.
 	 */
-	BUG_ON(tcp_ctask->r2t_data_count - r2t->data_length < 0);
 	if (conn->datadgst_en) {
 		rc = iscsi_digest_final_send(conn, ctask, &dtask->digestbuf,
 					    &dtask->digest, 1);
@@ -1732,7 +1717,6 @@ handle_xmstate_sol_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 		debug_tcp("r2t done dout digest 0x%x\n", dtask->digest);
 	}
 
-	tcp_ctask->r2t_data_count -= r2t->data_length;
 	tcp_ctask->r2t = NULL;
 	spin_lock_bh(&session->lock);
 	__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h
index 6a4ee704e46e8..aace8f70dfd76 100644
--- a/drivers/scsi/iscsi_tcp.h
+++ b/drivers/scsi/iscsi_tcp.h
@@ -157,7 +157,6 @@ struct iscsi_tcp_cmd_task {
 	struct scatterlist	*bad_sg;		/* assert statement */
 	int			sg_count;		/* SG's to process  */
 	uint32_t		exp_r2tsn;
-	int			r2t_data_count;		/* R2T Data-Out bytes */
 	int			data_offset;
 	struct iscsi_r2t_info	*r2t;			/* in progress R2T    */
 	struct iscsi_queue	r2tpool;
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 5884cd26d53af..a7c6e70f4ef85 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -68,8 +68,7 @@ iscsi_check_assign_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
 EXPORT_SYMBOL_GPL(iscsi_check_assign_cmdsn);
 
 void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *ctask,
-				   struct iscsi_data *hdr,
-				   int transport_data_cnt)
+				   struct iscsi_data *hdr)
 {
 	struct iscsi_conn *conn = ctask->conn;
 
@@ -82,14 +81,12 @@ void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *ctask,
 
 	hdr->itt = ctask->hdr->itt;
 	hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
-
-	hdr->offset = cpu_to_be32(ctask->total_length -
-				  transport_data_cnt -
-				  ctask->unsol_count);
+	hdr->offset = cpu_to_be32(ctask->unsol_offset);
 
 	if (ctask->unsol_count > conn->max_xmit_dlength) {
 		hton24(hdr->dlength, conn->max_xmit_dlength);
 		ctask->data_count = conn->max_xmit_dlength;
+		ctask->unsol_offset += ctask->data_count;
 		hdr->flags = 0;
 	} else {
 		hton24(hdr->dlength, ctask->unsol_count);
@@ -125,6 +122,7 @@ static void iscsi_prep_scsi_cmd_pdu(struct iscsi_cmd_task *ctask)
         memcpy(hdr->cdb, sc->cmnd, sc->cmd_len);
         memset(&hdr->cdb[sc->cmd_len], 0, MAX_COMMAND_SIZE - sc->cmd_len);
 
+	ctask->data_count = 0;
 	if (sc->sc_data_direction == DMA_TO_DEVICE) {
 		hdr->flags |= ISCSI_FLAG_CMD_WRITE;
 		/*
@@ -143,6 +141,7 @@ static void iscsi_prep_scsi_cmd_pdu(struct iscsi_cmd_task *ctask)
 		 */
 		ctask->imm_count = 0;
 		ctask->unsol_count = 0;
+		ctask->unsol_offset = 0;
 		ctask->unsol_datasn = 0;
 
 		if (session->imm_data_en) {
@@ -156,9 +155,12 @@ static void iscsi_prep_scsi_cmd_pdu(struct iscsi_cmd_task *ctask)
 		} else
 			zero_data(ctask->hdr->dlength);
 
-		if (!session->initial_r2t_en)
+		if (!session->initial_r2t_en) {
 			ctask->unsol_count = min(session->first_burst,
 				ctask->total_length) - ctask->imm_count;
+			ctask->unsol_offset = ctask->imm_count;
+		}
+
 		if (!ctask->unsol_count)
 			/* No unsolicit Data-Out's */
 			ctask->hdr->flags |= ISCSI_FLAG_CMD_FINAL;
@@ -1520,11 +1522,18 @@ int iscsi_conn_start(struct iscsi_cls_conn *cls_conn)
 	struct iscsi_conn *conn = cls_conn->dd_data;
 	struct iscsi_session *session = conn->session;
 
-	if (session == NULL) {
+	if (!session) {
 		printk(KERN_ERR "iscsi: can't start unbound connection\n");
 		return -EPERM;
 	}
 
+	if (session->first_burst > session->max_burst) {
+		printk("iscsi: invalid burst lengths: "
+		       "first_burst %d max_burst %d\n",
+		       session->first_burst, session->max_burst);
+		return -EINVAL;
+	}
+
 	spin_lock_bh(&session->lock);
 	conn->c_stage = ISCSI_CONN_STARTED;
 	session->state = ISCSI_STATE_LOGGED_IN;
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 41904f611d122..4900650bd0812 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -102,6 +102,8 @@ struct iscsi_cmd_task {
 	uint32_t		unsol_datasn;
 	int			imm_count;	/* imm-data (bytes)   */
 	int			unsol_count;	/* unsolicited (bytes)*/
+	/* offset in unsolicited stream (bytes); */
+	int			unsol_offset;
 	int			data_count;	/* remaining Data-Out */
 	struct scsi_cmnd	*sc;		/* associated SCSI cmd*/
 	int			total_length;
@@ -290,8 +292,7 @@ extern int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
 extern int iscsi_check_assign_cmdsn(struct iscsi_session *,
 				    struct iscsi_nopin *);
 extern void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *,
-					struct iscsi_data *hdr,
-					int transport_data_cnt);
+					struct iscsi_data *hdr);
 extern int iscsi_conn_send_pdu(struct iscsi_cls_conn *, struct iscsi_hdr *,
 				char *, uint32_t);
 extern int iscsi_complete_pdu(struct iscsi_conn *, struct iscsi_hdr *,
-- 
GitLab


From 60ecebf5a10e42f5e2d6e07eb9e24bdee8500b81 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:25 -0400
Subject: [PATCH 0137/1063] [SCSI] add refcouting around ctask usage in main IO
 patch

It is possible that a ctask could be completing and getting
cleaned up at the same time, we are finishing up the last
data transfer. This could then result in the data transfer
code using stale or invalid values. This patch adds a refcount
to the ctask. When the count goes to zero then we know the
transmit thread and recv thread or softirq are not touching
it and we can safely release it.

The eh should not need to grab a reference because it only cleans
up a task if it has both the xmit mutex and recv lock (or recv
side suspended).

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/libiscsi.c | 59 ++++++++++++++++++++++++++++++++++-------
 include/scsi/libiscsi.h |  1 +
 2 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index a7c6e70f4ef85..9584cbc082fee 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -179,16 +179,15 @@ EXPORT_SYMBOL_GPL(iscsi_prep_scsi_cmd_pdu);
 
 /**
  * iscsi_complete_command - return command back to scsi-ml
- * @session: iscsi session
  * @ctask: iscsi cmd task
  *
  * Must be called with session lock.
  * This function returns the scsi command to scsi-ml and returns
  * the cmd task to the pool of available cmd tasks.
  */
-static void iscsi_complete_command(struct iscsi_session *session,
-				   struct iscsi_cmd_task *ctask)
+static void iscsi_complete_command(struct iscsi_cmd_task *ctask)
 {
+	struct iscsi_session *session = ctask->conn->session;
 	struct scsi_cmnd *sc = ctask->sc;
 
 	ctask->state = ISCSI_TASK_COMPLETED;
@@ -198,6 +197,35 @@ static void iscsi_complete_command(struct iscsi_session *session,
 	sc->scsi_done(sc);
 }
 
+static void __iscsi_get_ctask(struct iscsi_cmd_task *ctask)
+{
+	atomic_inc(&ctask->refcount);
+}
+
+static void iscsi_get_ctask(struct iscsi_cmd_task *ctask)
+{
+	spin_lock_bh(&ctask->conn->session->lock);
+	__iscsi_get_ctask(ctask);
+	spin_unlock_bh(&ctask->conn->session->lock);
+}
+
+static void __iscsi_put_ctask(struct iscsi_cmd_task *ctask)
+{
+	struct iscsi_conn *conn = ctask->conn;
+
+	if (atomic_dec_and_test(&ctask->refcount)) {
+		conn->session->tt->cleanup_cmd_task(conn, ctask);
+		iscsi_complete_command(ctask);
+	}
+}
+
+static void iscsi_put_ctask(struct iscsi_cmd_task *ctask)
+{
+	spin_lock_bh(&ctask->conn->session->lock);
+	__iscsi_put_ctask(ctask);
+	spin_unlock_bh(&ctask->conn->session->lock);
+}
+
 /**
  * iscsi_cmd_rsp - SCSI Command Response processing
  * @conn: iscsi connection
@@ -274,7 +302,7 @@ static int iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		   (long)sc, sc->result, ctask->itt);
 	conn->scsirsp_pdus_cnt++;
 
-	iscsi_complete_command(conn->session, ctask);
+	__iscsi_put_ctask(ctask);
 	return rc;
 }
 
@@ -338,7 +366,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			BUG_ON((void*)ctask != ctask->sc->SCp.ptr);
 			if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
 				conn->scsirsp_pdus_cnt++;
-				iscsi_complete_command(session, ctask);
+				__iscsi_put_ctask(ctask);
 			}
 			break;
 		case ISCSI_OP_R2T:
@@ -563,7 +591,9 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
 	BUG_ON(conn->ctask && conn->mtask);
 
 	if (conn->ctask) {
+		iscsi_get_ctask(conn->ctask);
 		rc = tt->xmit_cmd_task(conn, conn->ctask);
+		iscsi_put_ctask(conn->ctask);
 		if (rc)
 			goto again;
 		/* done with this in-progress ctask */
@@ -604,12 +634,19 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
 					 struct iscsi_cmd_task, running);
 		conn->ctask->state = ISCSI_TASK_RUNNING;
 		list_move_tail(conn->xmitqueue.next, &conn->run_list);
+		__iscsi_get_ctask(conn->ctask);
 		spin_unlock_bh(&conn->session->lock);
 
 		rc = tt->xmit_cmd_task(conn, conn->ctask);
 		if (rc)
 			goto again;
+
 		spin_lock_bh(&conn->session->lock);
+		__iscsi_put_ctask(conn->ctask);
+		if (rc) {
+			spin_unlock_bh(&conn->session->lock);
+			goto again;
+		}
 	}
 	spin_unlock_bh(&conn->session->lock);
 	/* done with this ctask */
@@ -659,6 +696,7 @@ enum {
 	FAILURE_SESSION_FAILED,
 	FAILURE_SESSION_FREED,
 	FAILURE_WINDOW_CLOSED,
+	FAILURE_OOM,
 	FAILURE_SESSION_TERMINATE,
 	FAILURE_SESSION_IN_RECOVERY,
 	FAILURE_SESSION_RECOVERY_TIMEOUT,
@@ -717,10 +755,15 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 
 	conn = session->leadconn;
 
-	__kfifo_get(session->cmdpool.queue, (void*)&ctask, sizeof(void*));
+	if (!__kfifo_get(session->cmdpool.queue, (void*)&ctask,
+			 sizeof(void*))) {
+		reason = FAILURE_OOM;
+		goto reject;
+	}
 	sc->SCp.phase = session->age;
 	sc->SCp.ptr = (char *)ctask;
 
+	atomic_set(&ctask->refcount, 1);
 	ctask->state = ISCSI_TASK_PENDING;
 	ctask->mtask = NULL;
 	ctask->conn = conn;
@@ -1057,13 +1100,11 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 	sc = ctask->sc;
 	if (!sc)
 		return;
-
-	conn->session->tt->cleanup_cmd_task(conn, ctask);
 	iscsi_ctask_mtask_cleanup(ctask);
 
 	sc->result = err;
 	sc->resid = sc->request_bufflen;
-	iscsi_complete_command(conn->session, ctask);
+	__iscsi_put_ctask(ctask);
 }
 
 int iscsi_eh_abort(struct scsi_cmnd *sc)
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 4900650bd0812..401192e56e502 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -112,6 +112,7 @@ struct iscsi_cmd_task {
 
 	/* state set/tested under session->lock */
 	int			state;
+	atomic_t		refcount;
 	struct list_head	running;	/* running cmd list */
 	void			*dd_data;	/* driver/transport data */
 };
-- 
GitLab


From 98a9416af08385f8497e9c1595113a81aefa5d49 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:26 -0400
Subject: [PATCH 0138/1063] [SCSI] attempt to complete r2t with data len
 greater than max burst

A couple targets like string bean and MDS, send r2ts with
a data len greater than the max burst we agreed to. We
were being strict in our enforcing of the iscsi rfc in that
code path, but there is no driver limitation that prevents
us from fullfilling the request. To allow those targets
to work we will ignore the max_burst length and send as
much data as the target asks for assuming it has consciously
decided to override its max burst length.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/iscsi_tcp.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index a97a3a4e99eb5..d6927f1a6b65d 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -358,8 +358,11 @@ iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 	int r2tsn = be32_to_cpu(rhdr->r2tsn);
 	int rc;
 
-	if (tcp_conn->in.datalen)
+	if (tcp_conn->in.datalen) {
+		printk(KERN_ERR "iscsi_tcp: invalid R2t with datalen %d\n",
+		       tcp_conn->in.datalen);
 		return ISCSI_ERR_DATALEN;
+	}
 
 	if (tcp_ctask->exp_r2tsn && tcp_ctask->exp_r2tsn != r2tsn)
 		return ISCSI_ERR_R2TSN;
@@ -385,15 +388,23 @@ iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 
 	r2t->exp_statsn = rhdr->statsn;
 	r2t->data_length = be32_to_cpu(rhdr->data_length);
-	if (r2t->data_length == 0 ||
-	    r2t->data_length > session->max_burst) {
+	if (r2t->data_length == 0) {
+		printk(KERN_ERR "iscsi_tcp: invalid R2T with zero data len\n");
 		spin_unlock(&session->lock);
 		return ISCSI_ERR_DATALEN;
 	}
 
+	if (r2t->data_length > session->max_burst)
+		debug_scsi("invalid R2T with data len %u and max burst %u."
+			   "Attempting to execute request.\n",
+			    r2t->data_length, session->max_burst);
+
 	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
 	if (r2t->data_offset + r2t->data_length > ctask->total_length) {
 		spin_unlock(&session->lock);
+		printk(KERN_ERR "iscsi_tcp: invalid R2T with data len %u at "
+		       "offset %u and total length %d\n", r2t->data_length,
+		       r2t->data_offset, ctask->total_length);
 		return ISCSI_ERR_DATALEN;
 	}
 
-- 
GitLab


From 62f383003c22cd34920d0412465eddcb1223da0d Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:27 -0400
Subject: [PATCH 0139/1063] [SCSI] iscsi_tcp: fix padding, data digests, and IO
 at weird offsets

iscsi_tcp calculates padding by using the expected transfer length. This
has the problem where if we have immediate data = no and initial R2T =
yes, and the transfer length ended up needing padding then we send:

1. header
2. padding which should have gone after data
3. data

Besides this bug, we also assume the target will always ask for nice
transfer lengths and the first burst length will always be a nice value.
As far as I can tell form the RFC this is not a requirement. It would be
silly to do this, but if someone did it we will end doing bad things.

Finally the last bug in that bit of code is in our handling of the
recalculation of data digests when we do not send a whole iscsi_buf in
one try. The bug here is that we call crypto_digest_final on a
iscsi_sendpage error, then when we send the rest of the iscsi_buf, we
doiscsi_data_digest_init and this causes the previous data digest to be
lost.

And to make matters worse, some of these bugs are replicated over and
over and over again for immediate data, solicited data and unsolicited
data. So the attached patch made over the iscsi git tree (see
kernel.org/git for details) which I updated today to include the patches
I said I merged, consolidates the sending of data, padding and digests
and calculation of data digests and fixes the above bugs.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/iscsi_tcp.c | 636 +++++++++++++++------------------------
 drivers/scsi/iscsi_tcp.h |  33 +-
 drivers/scsi/libiscsi.c  |  37 ++-
 3 files changed, 286 insertions(+), 420 deletions(-)

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index d6927f1a6b65d..290c1d76cd409 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -281,7 +281,6 @@ iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 {
 	struct iscsi_data *hdr;
 	struct scsi_cmnd *sc = ctask->sc;
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
 
 	hdr = &r2t->dtask.hdr;
 	memset(hdr, 0, sizeof(struct iscsi_data));
@@ -336,10 +335,12 @@ iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 			sg_count += sg->length;
 		}
 		BUG_ON(r2t->sg == NULL);
-	} else
-		iscsi_buf_init_iov(&tcp_ctask->sendbuf,
+	} else {
+		iscsi_buf_init_iov(&r2t->sendbuf,
 			    (char*)sc->request_buffer + r2t->data_offset,
 			    r2t->data_count);
+		r2t->sg = NULL;
+	}
 }
 
 /**
@@ -503,7 +504,6 @@ iscsi_tcp_hdr_recv(struct iscsi_conn *conn)
 			goto copy_hdr;
 
 		spin_lock(&session->lock);
-		iscsi_tcp_cleanup_ctask(conn, tcp_conn->in.ctask);
 		rc = __iscsi_complete_pdu(conn, hdr, NULL, 0);
 		spin_unlock(&session->lock);
 		break;
@@ -676,15 +676,15 @@ iscsi_tcp_copy(struct iscsi_conn *conn)
 }
 
 static inline void
-partial_sg_digest_update(struct iscsi_tcp_conn *tcp_conn,
-			 struct scatterlist *sg, int offset, int length)
+partial_sg_digest_update(struct crypto_tfm *tfm, struct scatterlist *sg,
+			 int offset, int length)
 {
 	struct scatterlist temp;
 
 	memcpy(&temp, sg, sizeof(struct scatterlist));
 	temp.offset = offset;
 	temp.length = length;
-	crypto_digest_update(tcp_conn->data_rx_tfm, &temp, 1);
+	crypto_digest_update(tfm, &temp, 1);
 }
 
 static void
@@ -751,7 +751,8 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
 							tcp_conn->data_rx_tfm,
 							&sg[i], 1);
 				else
-					partial_sg_digest_update(tcp_conn,
+					partial_sg_digest_update(
+							tcp_conn->data_rx_tfm,
 							&sg[i],
 							sg[i].offset + offset,
 							sg[i].length - offset);
@@ -765,7 +766,8 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
 				/*
 				 * data-in is complete, but buffer not...
 				 */
-				partial_sg_digest_update(tcp_conn, &sg[i],
+				partial_sg_digest_update(tcp_conn->data_rx_tfm,
+						&sg[i],
 						sg[i].offset, sg[i].length-rc);
 			rc = 0;
 			break;
@@ -783,7 +785,6 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
 			   (long)sc, sc->result, ctask->itt,
 			   tcp_conn->in.hdr->flags);
 		spin_lock(&conn->session->lock);
-		iscsi_tcp_cleanup_ctask(conn, ctask);
 		__iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
 		spin_unlock(&conn->session->lock);
 	}
@@ -803,9 +804,6 @@ iscsi_data_recv(struct iscsi_conn *conn)
 		rc = iscsi_scsi_data_in(conn);
 		break;
 	case ISCSI_OP_SCSI_CMD_RSP:
-		spin_lock(&conn->session->lock);
-		iscsi_tcp_cleanup_ctask(conn, tcp_conn->in.ctask);
-		spin_unlock(&conn->session->lock);
 	case ISCSI_OP_TEXT_RSP:
 	case ISCSI_OP_LOGIN_RSP:
 	case ISCSI_OP_ASYNC_EVENT:
@@ -1188,37 +1186,12 @@ iscsi_sendpage(struct iscsi_conn *conn, struct iscsi_buf *buf,
 
 static inline void
 iscsi_data_digest_init(struct iscsi_tcp_conn *tcp_conn,
-		      struct iscsi_cmd_task *ctask)
+		      struct iscsi_tcp_cmd_task *tcp_ctask)
 {
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-
-	BUG_ON(!tcp_conn->data_tx_tfm);
 	crypto_digest_init(tcp_conn->data_tx_tfm);
 	tcp_ctask->digest_count = 4;
 }
 
-static int
-iscsi_digest_final_send(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
-			struct iscsi_buf *buf, uint32_t *digest, int final)
-{
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-	int rc = 0;
-	int sent = 0;
-
-	if (final)
-		crypto_digest_final(tcp_conn->data_tx_tfm, (u8*)digest);
-
-	iscsi_buf_init_iov(buf, (char*)digest, 4);
-	rc = iscsi_sendpage(conn, buf, &tcp_ctask->digest_count, &sent);
-	if (rc) {
-		tcp_ctask->datadigest = *digest;
-		tcp_ctask->xmstate |= XMSTATE_DATA_DIGEST;
-	} else
-		tcp_ctask->digest_count = 4;
-	return rc;
-}
-
 /**
  * iscsi_solicit_data_cont - initialize next Data-Out
  * @conn: iscsi connection
@@ -1236,7 +1209,6 @@ static void
 iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 			struct iscsi_r2t_info *r2t, int left)
 {
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
 	struct iscsi_data *hdr;
 	struct scsi_cmnd *sc = ctask->sc;
 	int new_offset;
@@ -1265,14 +1237,30 @@ iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 	iscsi_buf_init_iov(&r2t->headbuf, (char*)hdr,
 			   sizeof(struct iscsi_hdr));
 
-	if (sc->use_sg && !iscsi_buf_left(&r2t->sendbuf)) {
-		BUG_ON(tcp_ctask->bad_sg == r2t->sg);
+	if (iscsi_buf_left(&r2t->sendbuf))
+		return;
+
+	if (sc->use_sg) {
 		iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg);
 		r2t->sg += 1;
-	} else
-		iscsi_buf_init_iov(&tcp_ctask->sendbuf,
+	} else {
+		iscsi_buf_init_iov(&r2t->sendbuf,
 			    (char*)sc->request_buffer + new_offset,
 			    r2t->data_count);
+		r2t->sg = NULL;
+	}
+}
+
+static void iscsi_set_padding(struct iscsi_tcp_cmd_task *tcp_ctask,
+			      unsigned long len)
+{
+	tcp_ctask->pad_count = len & (ISCSI_PAD_LEN - 1);
+	if (!tcp_ctask->pad_count)
+		return;
+
+	tcp_ctask->pad_count = ISCSI_PAD_LEN - tcp_ctask->pad_count;
+	debug_scsi("write padding %d bytes\n", tcp_ctask->pad_count);
+	tcp_ctask->xmstate |= XMSTATE_W_PAD;
 }
 
 /**
@@ -1300,31 +1288,16 @@ iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask)
 		if (sc->use_sg) {
 			struct scatterlist *sg = sc->request_buffer;
 
-			iscsi_buf_init_sg(&tcp_ctask->sendbuf,
-					  &sg[tcp_ctask->sg_count++]);
-			tcp_ctask->sg = sg;
+			iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg);
+			tcp_ctask->sg = sg + 1;
 			tcp_ctask->bad_sg = sg + sc->use_sg;
-		} else
+		} else {
 			iscsi_buf_init_iov(&tcp_ctask->sendbuf,
 					   sc->request_buffer,
 					   sc->request_bufflen);
-
-		if (ctask->imm_count)
-			tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
-
-		tcp_ctask->pad_count = ctask->total_length & (ISCSI_PAD_LEN-1);
-		if (tcp_ctask->pad_count) {
-			tcp_ctask->pad_count = ISCSI_PAD_LEN -
-							tcp_ctask->pad_count;
-			debug_scsi("write padding %d bytes\n",
-				   tcp_ctask->pad_count);
-			tcp_ctask->xmstate |= XMSTATE_W_PAD;
+			tcp_ctask->sg = NULL;
+			tcp_ctask->bad_sg = NULL;
 		}
-
-		if (ctask->unsol_count)
-			tcp_ctask->xmstate |= XMSTATE_UNS_HDR |
-						XMSTATE_UNS_INIT;
-
 		debug_scsi("cmd [itt 0x%x total %d imm_data %d "
 			   "unsol count %d, unsol offset %d]\n",
 			   ctask->itt, ctask->total_length, ctask->imm_count,
@@ -1410,8 +1383,8 @@ iscsi_tcp_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
 }
 
 static inline int
-handle_xmstate_r_hdr(struct iscsi_conn *conn,
-		     struct iscsi_tcp_cmd_task *tcp_ctask)
+iscsi_send_read_hdr(struct iscsi_conn *conn,
+		    struct iscsi_tcp_cmd_task *tcp_ctask)
 {
 	int rc;
 
@@ -1429,7 +1402,7 @@ handle_xmstate_r_hdr(struct iscsi_conn *conn,
 }
 
 static inline int
-handle_xmstate_w_hdr(struct iscsi_conn *conn,
+iscsi_send_write_hdr(struct iscsi_conn *conn,
 		     struct iscsi_cmd_task *ctask)
 {
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
@@ -1440,85 +1413,125 @@ handle_xmstate_w_hdr(struct iscsi_conn *conn,
 		iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
 				 (u8*)tcp_ctask->hdrext);
 	rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->imm_count);
-	if (rc)
+	if (rc) {
 		tcp_ctask->xmstate |= XMSTATE_W_HDR;
-	return rc;
+		return rc;
+	}
+
+	if (ctask->imm_count) {
+		tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
+		iscsi_set_padding(tcp_ctask, ctask->imm_count);
+
+		if (ctask->conn->datadgst_en) {
+			iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask);
+			tcp_ctask->immdigest = 0;
+		}
+	}
+
+	if (ctask->unsol_count)
+		tcp_ctask->xmstate |= XMSTATE_UNS_HDR | XMSTATE_UNS_INIT;
+	return 0;
 }
 
-static inline int
-handle_xmstate_data_digest(struct iscsi_conn *conn,
-			   struct iscsi_cmd_task *ctask)
+static int
+iscsi_send_padding(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	int rc;
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	int sent = 0, rc;
 
-	tcp_ctask->xmstate &= ~XMSTATE_DATA_DIGEST;
-	debug_tcp("resent data digest 0x%x\n", tcp_ctask->datadigest);
-	rc = iscsi_digest_final_send(conn, ctask, &tcp_ctask->immbuf,
-				    &tcp_ctask->datadigest, 0);
+	if (tcp_ctask->xmstate & XMSTATE_W_PAD) {
+		iscsi_buf_init_iov(&tcp_ctask->sendbuf, (char*)&tcp_ctask->pad,
+				   tcp_ctask->pad_count);
+		if (conn->datadgst_en)
+			crypto_digest_update(tcp_conn->data_tx_tfm,
+					     &tcp_ctask->sendbuf.sg, 1);
+	} else if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_PAD))
+		return 0;
+
+	tcp_ctask->xmstate &= ~XMSTATE_W_PAD;
+	tcp_ctask->xmstate &= ~XMSTATE_W_RESEND_PAD;
+	debug_scsi("sending %d pad bytes for itt 0x%x\n",
+		   tcp_ctask->pad_count, ctask->itt);
+	rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf, &tcp_ctask->pad_count,
+			   &sent);
 	if (rc) {
-		tcp_ctask->xmstate |= XMSTATE_DATA_DIGEST;
-		debug_tcp("resent data digest 0x%x fail!\n",
-			  tcp_ctask->datadigest);
+		debug_scsi("padding send failed %d\n", rc);
+		tcp_ctask->xmstate |= XMSTATE_W_RESEND_PAD;
 	}
-
 	return rc;
 }
 
-static inline int
-handle_xmstate_imm_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+static int
+iscsi_send_digest(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
+			struct iscsi_buf *buf, uint32_t *digest)
 {
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-	int rc;
+	struct iscsi_tcp_cmd_task *tcp_ctask;
+	struct iscsi_tcp_conn *tcp_conn;
+	int rc, sent = 0;
 
-	BUG_ON(!ctask->imm_count);
-	tcp_ctask->xmstate &= ~XMSTATE_IMM_DATA;
+	if (!conn->datadgst_en)
+		return 0;
 
-	if (conn->datadgst_en) {
-		iscsi_data_digest_init(tcp_conn, ctask);
-		tcp_ctask->immdigest = 0;
-	}
+	tcp_ctask = ctask->dd_data;
+	tcp_conn = conn->dd_data;
 
-	for (;;) {
-		rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf,
-				   &ctask->imm_count, &tcp_ctask->sent);
-		if (rc) {
-			tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
-			if (conn->datadgst_en) {
-				crypto_digest_final(tcp_conn->data_tx_tfm,
-						(u8*)&tcp_ctask->immdigest);
-				debug_tcp("tx imm sendpage fail 0x%x\n",
-					  tcp_ctask->datadigest);
-			}
-			return rc;
-		}
-		if (conn->datadgst_en)
-			crypto_digest_update(tcp_conn->data_tx_tfm,
-					     &tcp_ctask->sendbuf.sg, 1);
+	if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_DATA_DIGEST)) {
+		crypto_digest_final(tcp_conn->data_tx_tfm, (u8*)digest);
+		iscsi_buf_init_iov(buf, (char*)digest, 4);
+	}
+	tcp_ctask->xmstate &= ~XMSTATE_W_RESEND_DATA_DIGEST;
 
-		if (!ctask->imm_count)
-			break;
-		iscsi_buf_init_sg(&tcp_ctask->sendbuf,
-				  &tcp_ctask->sg[tcp_ctask->sg_count++]);
+	rc = iscsi_sendpage(conn, buf, &tcp_ctask->digest_count, &sent);
+	if (!rc)
+		debug_scsi("sent digest 0x%x for itt 0x%x\n", *digest,
+			  ctask->itt);
+	else {
+		debug_scsi("sending digest 0x%x failed for itt 0x%x!\n",
+			  *digest, ctask->itt);
+		tcp_ctask->xmstate |= XMSTATE_W_RESEND_DATA_DIGEST;
 	}
+	return rc;
+}
 
-	if (conn->datadgst_en && !(tcp_ctask->xmstate & XMSTATE_W_PAD)) {
-		rc = iscsi_digest_final_send(conn, ctask, &tcp_ctask->immbuf,
-				            &tcp_ctask->immdigest, 1);
-		if (rc) {
-			debug_tcp("sending imm digest 0x%x fail!\n",
-				  tcp_ctask->immdigest);
-			return rc;
+static int
+iscsi_send_data(struct iscsi_cmd_task *ctask, struct iscsi_buf *sendbuf,
+		struct scatterlist **sg, int *sent, int *count,
+		struct iscsi_buf *digestbuf, uint32_t *digest)
+{
+	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+	struct iscsi_conn *conn = ctask->conn;
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	int rc, buf_sent, offset;
+
+	while (*count) {
+		buf_sent = 0;
+		offset = sendbuf->sent;
+
+		rc = iscsi_sendpage(conn, sendbuf, count, &buf_sent);
+		*sent = *sent + buf_sent;
+		if (buf_sent && conn->datadgst_en)
+			partial_sg_digest_update(tcp_conn->data_tx_tfm,
+				&sendbuf->sg, sendbuf->sg.offset + offset,
+				buf_sent);
+		if (!iscsi_buf_left(sendbuf) && *sg != tcp_ctask->bad_sg) {
+			iscsi_buf_init_sg(sendbuf, *sg);
+			*sg = *sg + 1;
 		}
-		debug_tcp("sending imm digest 0x%x\n", tcp_ctask->immdigest);
+
+		if (rc)
+			return rc;
 	}
 
-	return 0;
+	rc = iscsi_send_padding(conn, ctask);
+	if (rc)
+		return rc;
+
+	return iscsi_send_digest(conn, ctask, digestbuf, digest);
 }
 
-static inline int
-handle_xmstate_uns_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+static int
+iscsi_send_unsol_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
 	struct iscsi_data_task *dtask;
@@ -1526,14 +1539,21 @@ handle_xmstate_uns_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 
 	tcp_ctask->xmstate |= XMSTATE_UNS_DATA;
 	if (tcp_ctask->xmstate & XMSTATE_UNS_INIT) {
-		dtask = tcp_ctask->dtask = &tcp_ctask->unsol_dtask;
+		dtask = &tcp_ctask->unsol_dtask;
+
 		iscsi_prep_unsolicit_data_pdu(ctask, &dtask->hdr);
 		iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)&dtask->hdr,
 				   sizeof(struct iscsi_hdr));
 		if (conn->hdrdgst_en)
 			iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
 					(u8*)dtask->hdrext);
+		if (conn->datadgst_en) {
+			iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask);
+			dtask->digest = 0;
+		}
+
 		tcp_ctask->xmstate &= ~XMSTATE_UNS_INIT;
+		iscsi_set_padding(tcp_ctask, ctask->data_count);
 	}
 
 	rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->data_count);
@@ -1548,247 +1568,128 @@ handle_xmstate_uns_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 	return 0;
 }
 
-static inline int
-handle_xmstate_uns_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+static int
+iscsi_send_unsol_pdu(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_data_task *dtask = tcp_ctask->dtask;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	int rc;
 
-	BUG_ON(!ctask->data_count);
-	tcp_ctask->xmstate &= ~XMSTATE_UNS_DATA;
-
-	if (conn->datadgst_en) {
-		iscsi_data_digest_init(tcp_conn, ctask);
-		dtask->digest = 0;
+	if (tcp_ctask->xmstate & XMSTATE_UNS_HDR) {
+		BUG_ON(!ctask->unsol_count);
+		tcp_ctask->xmstate &= ~XMSTATE_UNS_HDR;
+send_hdr:
+		rc = iscsi_send_unsol_hdr(conn, ctask);
+		if (rc)
+			return rc;
 	}
 
-	for (;;) {
+	if (tcp_ctask->xmstate & XMSTATE_UNS_DATA) {
+		struct iscsi_data_task *dtask = &tcp_ctask->unsol_dtask;
 		int start = tcp_ctask->sent;
 
-		rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf,
-				   &ctask->data_count, &tcp_ctask->sent);
-		if (rc) {
-			ctask->unsol_count -= tcp_ctask->sent - start;
-			tcp_ctask->xmstate |= XMSTATE_UNS_DATA;
-			/* will continue with this ctask later.. */
-			if (conn->datadgst_en) {
-				crypto_digest_final(tcp_conn->data_tx_tfm,
-						(u8 *)&dtask->digest);
-				debug_tcp("tx uns data fail 0x%x\n",
-					  dtask->digest);
-			}
-			return rc;
-		}
-
-		BUG_ON(tcp_ctask->sent > ctask->total_length);
+		rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg,
+				     &tcp_ctask->sent, &ctask->data_count,
+				     &dtask->digestbuf, &dtask->digest);
 		ctask->unsol_count -= tcp_ctask->sent - start;
-
+		if (rc)
+			return rc;
+		tcp_ctask->xmstate &= ~XMSTATE_UNS_DATA;
 		/*
-		 * XXX:we may run here with un-initial sendbuf.
-		 * so pass it
+		 * Done with the Data-Out. Next, check if we need
+		 * to send another unsolicited Data-Out.
 		 */
-		if (conn->datadgst_en && tcp_ctask->sent - start > 0)
-			crypto_digest_update(tcp_conn->data_tx_tfm,
-					     &tcp_ctask->sendbuf.sg, 1);
-
-		if (!ctask->data_count)
-			break;
-		iscsi_buf_init_sg(&tcp_ctask->sendbuf,
-				  &tcp_ctask->sg[tcp_ctask->sg_count++]);
-	}
-	BUG_ON(ctask->unsol_count < 0);
-
-	/*
-	 * Done with the Data-Out. Next, check if we need
-	 * to send another unsolicited Data-Out.
-	 */
-	if (ctask->unsol_count) {
-		if (conn->datadgst_en) {
-			rc = iscsi_digest_final_send(conn, ctask,
-						    &dtask->digestbuf,
-						    &dtask->digest, 1);
-			if (rc) {
-				debug_tcp("send uns digest 0x%x fail\n",
-					  dtask->digest);
-				return rc;
-			}
-			debug_tcp("sending uns digest 0x%x, more uns\n",
-				  dtask->digest);
-		}
-		tcp_ctask->xmstate |= XMSTATE_UNS_INIT;
-		return 1;
-	}
-
-	if (conn->datadgst_en && !(tcp_ctask->xmstate & XMSTATE_W_PAD)) {
-		rc = iscsi_digest_final_send(conn, ctask,
-					    &dtask->digestbuf,
-					    &dtask->digest, 1);
-		if (rc) {
-			debug_tcp("send last uns digest 0x%x fail\n",
-				   dtask->digest);
-			return rc;
+		if (ctask->unsol_count) {
+			debug_scsi("sending more uns\n");
+			tcp_ctask->xmstate |= XMSTATE_UNS_INIT;
+			goto send_hdr;
 		}
-		debug_tcp("sending uns digest 0x%x\n",dtask->digest);
 	}
-
 	return 0;
 }
 
-static inline int
-handle_xmstate_sol_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+static int iscsi_send_sol_pdu(struct iscsi_conn *conn,
+			      struct iscsi_cmd_task *ctask)
 {
-	struct iscsi_session *session = conn->session;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_r2t_info *r2t = tcp_ctask->r2t;
-	struct iscsi_data_task *dtask = &r2t->dtask;
+	struct iscsi_session *session = conn->session;
+	struct iscsi_r2t_info *r2t;
+	struct iscsi_data_task *dtask;
 	int left, rc;
 
-	tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
-	tcp_ctask->dtask = dtask;
-
-	if (conn->datadgst_en) {
-		iscsi_data_digest_init(tcp_conn, ctask);
-		dtask->digest = 0;
-	}
-solicit_again:
-	/*
-	 * send Data-Out within this R2T sequence.
-	 */
-	if (!r2t->data_count)
-		goto data_out_done;
-
-	rc = iscsi_sendpage(conn, &r2t->sendbuf, &r2t->data_count, &r2t->sent);
-	if (rc) {
+	if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) {
+		tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
 		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
-		/* will continue with this ctask later.. */
-		if (conn->datadgst_en) {
-			crypto_digest_final(tcp_conn->data_tx_tfm,
-					  (u8 *)&dtask->digest);
-			debug_tcp("r2t data send fail 0x%x\n", dtask->digest);
-		}
-		return rc;
-	}
+		if (!tcp_ctask->r2t)
+			__kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t,
+				    sizeof(void*));
+send_hdr:
+		r2t = tcp_ctask->r2t;
+		dtask = &r2t->dtask;
 
-	BUG_ON(r2t->data_count < 0);
-	if (conn->datadgst_en)
-		crypto_digest_update(tcp_conn->data_tx_tfm, &r2t->sendbuf.sg,
-				     1);
-
-	if (r2t->data_count) {
-		BUG_ON(ctask->sc->use_sg == 0);
-		if (!iscsi_buf_left(&r2t->sendbuf)) {
-			BUG_ON(tcp_ctask->bad_sg == r2t->sg);
-			iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg);
-			r2t->sg += 1;
-		}
-		goto solicit_again;
-	}
+		if (conn->hdrdgst_en)
+			iscsi_hdr_digest(conn, &r2t->headbuf,
+					(u8*)dtask->hdrext);
 
-data_out_done:
-	/*
-	 * Done with this Data-Out. Next, check if we have
-	 * to send another Data-Out for this R2T.
-	 */
-	BUG_ON(r2t->data_length - r2t->sent < 0);
-	left = r2t->data_length - r2t->sent;
-	if (left) {
 		if (conn->datadgst_en) {
-			rc = iscsi_digest_final_send(conn, ctask,
-						    &dtask->digestbuf,
-						    &dtask->digest, 1);
-			if (rc) {
-				debug_tcp("send r2t data digest 0x%x"
-					  "fail\n", dtask->digest);
-				return rc;
-			}
-			debug_tcp("r2t data send digest 0x%x\n",
-				  dtask->digest);
+			iscsi_data_digest_init(conn->dd_data, tcp_ctask);
+			dtask->digest = 0;
 		}
-		iscsi_solicit_data_cont(conn, ctask, r2t, left);
-		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
-		tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
-		return 1;
-	}
 
-	/*
-	 * Done with this R2T. Check if there are more
-	 * outstanding R2Ts ready to be processed.
-	 */
-	if (conn->datadgst_en) {
-		rc = iscsi_digest_final_send(conn, ctask, &dtask->digestbuf,
-					    &dtask->digest, 1);
+		rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count);
 		if (rc) {
-			debug_tcp("send last r2t data digest 0x%x"
-				  "fail\n", dtask->digest);
+			tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
+			tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
 			return rc;
 		}
-		debug_tcp("r2t done dout digest 0x%x\n", dtask->digest);
-	}
 
-	tcp_ctask->r2t = NULL;
-	spin_lock_bh(&session->lock);
-	__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
-	spin_unlock_bh(&session->lock);
-	if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		tcp_ctask->r2t = r2t;
-		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
-		tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
-		return 1;
+		iscsi_set_padding(tcp_ctask, r2t->data_count);
+		debug_scsi("sol dout [dsn %d itt 0x%x dlen %d sent %d]\n",
+			r2t->solicit_datasn - 1, ctask->itt, r2t->data_count,
+			r2t->sent);
 	}
 
-	return 0;
-}
+	if (tcp_ctask->xmstate & XMSTATE_SOL_DATA) {
+		r2t = tcp_ctask->r2t;
+		dtask = &r2t->dtask;
 
-static inline int
-handle_xmstate_w_pad(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
-{
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-	struct iscsi_data_task *dtask = tcp_ctask->dtask;
-	int sent = 0, rc;
+		rc = iscsi_send_data(ctask, &r2t->sendbuf, &r2t->sg,
+				     &r2t->sent, &r2t->data_count,
+				     &dtask->digestbuf, &dtask->digest);
+		if (rc)
+			return rc;
+		tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
 
-	tcp_ctask->xmstate &= ~XMSTATE_W_PAD;
-	iscsi_buf_init_iov(&tcp_ctask->sendbuf, (char*)&tcp_ctask->pad,
-			    tcp_ctask->pad_count);
-	rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf, &tcp_ctask->pad_count,
-			   &sent);
-	if (rc) {
-		tcp_ctask->xmstate |= XMSTATE_W_PAD;
-		return rc;
-	}
+		/*
+		 * Done with this Data-Out. Next, check if we have
+		 * to send another Data-Out for this R2T.
+		 */
+		BUG_ON(r2t->data_length - r2t->sent < 0);
+		left = r2t->data_length - r2t->sent;
+		if (left) {
+			iscsi_solicit_data_cont(conn, ctask, r2t, left);
+			tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
+			tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+			goto send_hdr;
+		}
 
-	if (conn->datadgst_en) {
-		crypto_digest_update(tcp_conn->data_tx_tfm,
-				     &tcp_ctask->sendbuf.sg, 1);
-		/* imm data? */
-		if (!dtask) {
-			rc = iscsi_digest_final_send(conn, ctask,
-						    &tcp_ctask->immbuf,
-						    &tcp_ctask->immdigest, 1);
-			if (rc) {
-				debug_tcp("send padding digest 0x%x"
-					  "fail!\n", tcp_ctask->immdigest);
-				return rc;
-			}
-			debug_tcp("done with padding, digest 0x%x\n",
-				  tcp_ctask->datadigest);
-		} else {
-			rc = iscsi_digest_final_send(conn, ctask,
-						    &dtask->digestbuf,
-						    &dtask->digest, 1);
-			if (rc) {
-				debug_tcp("send padding digest 0x%x"
-				          "fail\n", dtask->digest);
-				return rc;
-			}
-			debug_tcp("done with padding, digest 0x%x\n",
-				  dtask->digest);
+		/*
+		 * Done with this R2T. Check if there are more
+		 * outstanding R2Ts ready to be processed.
+		 */
+		spin_lock_bh(&session->lock);
+		tcp_ctask->r2t = NULL;
+		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
+			    sizeof(void*));
+		if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t,
+				sizeof(void*))) {
+			tcp_ctask->r2t = r2t;
+			tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
+			tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+			spin_unlock_bh(&session->lock);
+			goto send_hdr;
 		}
+		spin_unlock_bh(&session->lock);
 	}
-
 	return 0;
 }
 
@@ -1808,85 +1709,30 @@ iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 		return rc;
 
 	if (tcp_ctask->xmstate & XMSTATE_R_HDR)
-		return handle_xmstate_r_hdr(conn, tcp_ctask);
+		return iscsi_send_read_hdr(conn, tcp_ctask);
 
 	if (tcp_ctask->xmstate & XMSTATE_W_HDR) {
-		rc = handle_xmstate_w_hdr(conn, ctask);
-		if (rc)
-			return rc;
-	}
-
-	/* XXX: for data digest xmit recover */
-	if (tcp_ctask->xmstate & XMSTATE_DATA_DIGEST) {
-		rc = handle_xmstate_data_digest(conn, ctask);
+		rc = iscsi_send_write_hdr(conn, ctask);
 		if (rc)
 			return rc;
 	}
 
 	if (tcp_ctask->xmstate & XMSTATE_IMM_DATA) {
-		rc = handle_xmstate_imm_data(conn, ctask);
+		rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg,
+				     &tcp_ctask->sent, &ctask->imm_count,
+				     &tcp_ctask->immbuf, &tcp_ctask->immdigest);
 		if (rc)
 			return rc;
+		tcp_ctask->xmstate &= ~XMSTATE_IMM_DATA;
 	}
 
-	if (tcp_ctask->xmstate & XMSTATE_UNS_HDR) {
-		BUG_ON(!ctask->unsol_count);
-		tcp_ctask->xmstate &= ~XMSTATE_UNS_HDR;
-unsolicit_head_again:
-		rc = handle_xmstate_uns_hdr(conn, ctask);
-		if (rc)
-			return rc;
-	}
-
-	if (tcp_ctask->xmstate & XMSTATE_UNS_DATA) {
-		rc = handle_xmstate_uns_data(conn, ctask);
-		if (rc == 1)
-			goto unsolicit_head_again;
-		else if (rc)
-			return rc;
-		goto done;
-	}
-
-	if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) {
-		struct iscsi_r2t_info *r2t;
-
-		tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
-		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
-		if (!tcp_ctask->r2t)
-			__kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t,
-				    sizeof(void*));
-solicit_head_again:
-		r2t = tcp_ctask->r2t;
-		if (conn->hdrdgst_en)
-			iscsi_hdr_digest(conn, &r2t->headbuf,
-					(u8*)r2t->dtask.hdrext);
-		rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count);
-		if (rc) {
-			tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
-			tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
-			return rc;
-		}
-
-		debug_scsi("sol dout [dsn %d itt 0x%x dlen %d sent %d]\n",
-			r2t->solicit_datasn - 1, ctask->itt, r2t->data_count,
-			r2t->sent);
-	}
-
-	if (tcp_ctask->xmstate & XMSTATE_SOL_DATA) {
-		rc = handle_xmstate_sol_data(conn, ctask);
-		if (rc == 1)
-			goto solicit_head_again;
-		if (rc)
-			return rc;
-	}
+	rc = iscsi_send_unsol_pdu(conn, ctask);
+	if (rc)
+		return rc;
 
-done:
-	/*
-	 * Last thing to check is whether we need to send write
-	 * padding. Note that we check for xmstate equality, not just the bit.
-	 */
-	if (tcp_ctask->xmstate == XMSTATE_W_PAD)
-		rc = handle_xmstate_w_pad(conn, ctask);
+	rc = iscsi_send_sol_pdu(conn, ctask);
+	if (rc)
+		return rc;
 
 	return rc;
 }
diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h
index aace8f70dfd76..7e40e94d9fdc4 100644
--- a/drivers/scsi/iscsi_tcp.h
+++ b/drivers/scsi/iscsi_tcp.h
@@ -31,23 +31,21 @@
 #define IN_PROGRESS_DDIGEST_RECV	0x3
 
 /* xmit state machine */
-#define	XMSTATE_IDLE			0x0
-#define	XMSTATE_R_HDR			0x1
-#define	XMSTATE_W_HDR			0x2
-#define	XMSTATE_IMM_HDR			0x4
-#define	XMSTATE_IMM_DATA		0x8
-#define	XMSTATE_UNS_INIT		0x10
-#define	XMSTATE_UNS_HDR			0x20
-#define	XMSTATE_UNS_DATA		0x40
-#define	XMSTATE_SOL_HDR			0x80
-#define	XMSTATE_SOL_DATA		0x100
-#define	XMSTATE_W_PAD			0x200
-#define XMSTATE_DATA_DIGEST		0x400
-
-#define ISCSI_CONN_RCVBUF_MIN		262144
-#define ISCSI_CONN_SNDBUF_MIN		262144
+#define XMSTATE_IDLE			0x0
+#define XMSTATE_R_HDR			0x1
+#define XMSTATE_W_HDR			0x2
+#define XMSTATE_IMM_HDR			0x4
+#define XMSTATE_IMM_DATA		0x8
+#define XMSTATE_UNS_INIT		0x10
+#define XMSTATE_UNS_HDR			0x20
+#define XMSTATE_UNS_DATA		0x40
+#define XMSTATE_SOL_HDR			0x80
+#define XMSTATE_SOL_DATA		0x100
+#define XMSTATE_W_PAD			0x200
+#define XMSTATE_W_RESEND_PAD		0x400
+#define XMSTATE_W_RESEND_DATA_DIGEST	0x800
+
 #define ISCSI_PAD_LEN			4
-#define ISCSI_R2T_MAX			16
 #define ISCSI_SG_TABLESIZE		SG_ALL
 #define ISCSI_TCP_MAX_CMD_LEN		16
 
@@ -162,13 +160,10 @@ struct iscsi_tcp_cmd_task {
 	struct iscsi_queue	r2tpool;
 	struct kfifo		*r2tqueue;
 	struct iscsi_r2t_info	**r2ts;
-	uint32_t		datadigest;		/* for recover digest */
 	int			digest_count;
 	uint32_t		immdigest;		/* for imm data */
 	struct iscsi_buf	immbuf;			/* for imm data digest */
-	struct iscsi_data_task	*dtask;		/* data task in progress*/
 	struct iscsi_data_task	unsol_dtask;	/* unsol data task */
-	int			digest_offset;	/* for partial buff digest */
 };
 
 #endif /* ISCSI_H */
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 9584cbc082fee..fb65311c81ddf 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -325,6 +325,30 @@ static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
 	wake_up(&conn->ehwait);
 }
 
+static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+			       char *data, int datalen)
+{
+	struct iscsi_reject *reject = (struct iscsi_reject *)hdr;
+	struct iscsi_hdr rejected_pdu;
+	uint32_t itt;
+
+	conn->exp_statsn = be32_to_cpu(reject->statsn) + 1;
+
+	if (reject->reason == ISCSI_REASON_DATA_DIGEST_ERROR) {
+		if (ntoh24(reject->dlength) > datalen)
+			return ISCSI_ERR_PROTO;
+
+		if (ntoh24(reject->dlength) >= sizeof(struct iscsi_hdr)) {
+			memcpy(&rejected_pdu, data, sizeof(struct iscsi_hdr));
+			itt = rejected_pdu.itt & ISCSI_ITT_MASK;
+			printk(KERN_ERR "itt 0x%x had pdu (op 0x%x) rejected "
+				"due to DataDigest error.\n", itt,
+				rejected_pdu.opcode);
+		}
+	}
+	return 0;
+}
+
 /**
  * __iscsi_complete_pdu - complete pdu
  * @conn: iscsi conn
@@ -436,6 +460,11 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			break;
 		}
 	} else if (itt == ISCSI_RESERVED_TAG) {
+		rc = iscsi_check_assign_cmdsn(session,
+					     (struct iscsi_nopin*)hdr);
+		if (rc)
+			goto done;
+
 		switch(opcode) {
 		case ISCSI_OP_NOOP_IN:
 			if (datalen) {
@@ -443,11 +472,6 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 				break;
 			}
 
-			rc = iscsi_check_assign_cmdsn(session,
-						 (struct iscsi_nopin*)hdr);
-			if (rc)
-				break;
-
 			if (hdr->ttt == ISCSI_RESERVED_TAG)
 				break;
 
@@ -455,7 +479,8 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 				rc = ISCSI_ERR_CONN_FAILED;
 			break;
 		case ISCSI_OP_REJECT:
-			/* we need sth like iscsi_reject_rsp()*/
+			rc = iscsi_handle_reject(conn, hdr, data, datalen);
+			break;
 		case ISCSI_OP_ASYNC_EVENT:
 			conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
 			/* we need sth like iscsi_async_event_rsp() */
-- 
GitLab


From dd8c0d958621e3137f3e3302f7b8952041a4a1d7 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:28 -0400
Subject: [PATCH 0140/1063] [SCSI] scsi_tcp: rm data rx and tx tfms

We currently allocated seperate tfms for data and header digests. There
is no reason for this since we can never calculate a rx header and
digest at the same time. Same for sends. So this patch removes the data
tfms and has the send and recv sides use the rx_tfm or tx_tfm.

I also made the connection creation code preallocate the tfms because I
thought I hit a bug where I changed the digests settings during a
relogin but could not allocate the tfm and then we just failed.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/iscsi_tcp.c | 102 ++++++++++++++-------------------------
 drivers/scsi/iscsi_tcp.h |   8 +--
 2 files changed, 38 insertions(+), 72 deletions(-)

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 290c1d76cd409..82399f71028d2 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -693,7 +693,7 @@ iscsi_recv_digest_update(struct iscsi_tcp_conn *tcp_conn, char* buf, int len)
 	struct scatterlist tmp;
 
 	sg_init_one(&tmp, buf, len);
-	crypto_digest_update(tcp_conn->data_rx_tfm, &tmp, 1);
+	crypto_digest_update(tcp_conn->rx_tfm, &tmp, 1);
 }
 
 static int iscsi_scsi_data_in(struct iscsi_conn *conn)
@@ -748,11 +748,11 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
 			if (conn->datadgst_en) {
 				if (!offset)
 					crypto_digest_update(
-							tcp_conn->data_rx_tfm,
+							tcp_conn->rx_tfm,
 							&sg[i], 1);
 				else
 					partial_sg_digest_update(
-							tcp_conn->data_rx_tfm,
+							tcp_conn->rx_tfm,
 							&sg[i],
 							sg[i].offset + offset,
 							sg[i].length - offset);
@@ -766,7 +766,7 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
 				/*
 				 * data-in is complete, but buffer not...
 				 */
-				partial_sg_digest_update(tcp_conn->data_rx_tfm,
+				partial_sg_digest_update(tcp_conn->rx_tfm,
 						&sg[i],
 						sg[i].offset, sg[i].length-rc);
 			rc = 0;
@@ -885,10 +885,8 @@ iscsi_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 		 */
 		rc = iscsi_tcp_hdr_recv(conn);
 		if (!rc && tcp_conn->in.datalen) {
-			if (conn->datadgst_en) {
-				BUG_ON(!tcp_conn->data_rx_tfm);
-				crypto_digest_init(tcp_conn->data_rx_tfm);
-			}
+			if (conn->datadgst_en)
+				crypto_digest_init(tcp_conn->rx_tfm);
 			tcp_conn->in_progress = IN_PROGRESS_DATA_RECV;
 		} else if (rc) {
 			iscsi_conn_failure(conn, rc);
@@ -940,10 +938,10 @@ iscsi_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 					  tcp_conn->in.padding);
 				memset(pad, 0, tcp_conn->in.padding);
 				sg_init_one(&sg, pad, tcp_conn->in.padding);
-				crypto_digest_update(tcp_conn->data_rx_tfm,
+				crypto_digest_update(tcp_conn->rx_tfm,
 						     &sg, 1);
 			}
-			crypto_digest_final(tcp_conn->data_rx_tfm,
+			crypto_digest_final(tcp_conn->rx_tfm,
 					    (u8 *) & tcp_conn->in.datadgst);
 			debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst);
 			tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV;
@@ -1188,7 +1186,7 @@ static inline void
 iscsi_data_digest_init(struct iscsi_tcp_conn *tcp_conn,
 		      struct iscsi_tcp_cmd_task *tcp_ctask)
 {
-	crypto_digest_init(tcp_conn->data_tx_tfm);
+	crypto_digest_init(tcp_conn->tx_tfm);
 	tcp_ctask->digest_count = 4;
 }
 
@@ -1444,7 +1442,7 @@ iscsi_send_padding(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 		iscsi_buf_init_iov(&tcp_ctask->sendbuf, (char*)&tcp_ctask->pad,
 				   tcp_ctask->pad_count);
 		if (conn->datadgst_en)
-			crypto_digest_update(tcp_conn->data_tx_tfm,
+			crypto_digest_update(tcp_conn->tx_tfm,
 					     &tcp_ctask->sendbuf.sg, 1);
 	} else if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_PAD))
 		return 0;
@@ -1477,7 +1475,7 @@ iscsi_send_digest(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 	tcp_conn = conn->dd_data;
 
 	if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_DATA_DIGEST)) {
-		crypto_digest_final(tcp_conn->data_tx_tfm, (u8*)digest);
+		crypto_digest_final(tcp_conn->tx_tfm, (u8*)digest);
 		iscsi_buf_init_iov(buf, (char*)digest, 4);
 	}
 	tcp_ctask->xmstate &= ~XMSTATE_W_RESEND_DATA_DIGEST;
@@ -1511,7 +1509,7 @@ iscsi_send_data(struct iscsi_cmd_task *ctask, struct iscsi_buf *sendbuf,
 		rc = iscsi_sendpage(conn, sendbuf, count, &buf_sent);
 		*sent = *sent + buf_sent;
 		if (buf_sent && conn->datadgst_en)
-			partial_sg_digest_update(tcp_conn->data_tx_tfm,
+			partial_sg_digest_update(tcp_conn->tx_tfm,
 				&sendbuf->sg, sendbuf->sg.offset + offset,
 				buf_sent);
 		if (!iscsi_buf_left(sendbuf) && *sg != tcp_ctask->bad_sg) {
@@ -1547,10 +1545,6 @@ iscsi_send_unsol_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 		if (conn->hdrdgst_en)
 			iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
 					(u8*)dtask->hdrext);
-		if (conn->datadgst_en) {
-			iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask);
-			dtask->digest = 0;
-		}
 
 		tcp_ctask->xmstate &= ~XMSTATE_UNS_INIT;
 		iscsi_set_padding(tcp_ctask, ctask->data_count);
@@ -1563,6 +1557,12 @@ iscsi_send_unsol_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 		return rc;
 	}
 
+	if (conn->datadgst_en) {
+		dtask = &tcp_ctask->unsol_dtask;
+		iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask);
+		dtask->digest = 0;
+	}
+
 	debug_scsi("uns dout [itt 0x%x dlen %d sent %d]\n",
 		   ctask->itt, ctask->unsol_count, tcp_ctask->sent);
 	return 0;
@@ -1629,12 +1629,6 @@ static int iscsi_send_sol_pdu(struct iscsi_conn *conn,
 		if (conn->hdrdgst_en)
 			iscsi_hdr_digest(conn, &r2t->headbuf,
 					(u8*)dtask->hdrext);
-
-		if (conn->datadgst_en) {
-			iscsi_data_digest_init(conn->dd_data, tcp_ctask);
-			dtask->digest = 0;
-		}
-
 		rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count);
 		if (rc) {
 			tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
@@ -1642,6 +1636,11 @@ static int iscsi_send_sol_pdu(struct iscsi_conn *conn,
 			return rc;
 		}
 
+		if (conn->datadgst_en) {
+			iscsi_data_digest_init(conn->dd_data, tcp_ctask);
+			dtask->digest = 0;
+		}
+
 		iscsi_set_padding(tcp_ctask, r2t->data_count);
 		debug_scsi("sol dout [dsn %d itt 0x%x dlen %d sent %d]\n",
 			r2t->solicit_datasn - 1, ctask->itt, r2t->data_count,
@@ -1764,8 +1763,20 @@ iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
 	/* initial operational parameters */
 	tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
 
+	tcp_conn->tx_tfm = crypto_alloc_tfm("crc32c", 0);
+	if (!tcp_conn->tx_tfm)
+		goto free_tcp_conn;
+
+	tcp_conn->rx_tfm = crypto_alloc_tfm("crc32c", 0);
+	if (!tcp_conn->rx_tfm)
+		goto free_tx_tfm;
+
 	return cls_conn;
 
+free_tx_tfm:
+	crypto_free_tfm(tcp_conn->tx_tfm);
+free_tcp_conn:
+	kfree(tcp_conn);
 tcp_conn_alloc_fail:
 	iscsi_conn_teardown(cls_conn);
 	return NULL;
@@ -1807,10 +1818,6 @@ iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
 			crypto_free_tfm(tcp_conn->tx_tfm);
 		if (tcp_conn->rx_tfm)
 			crypto_free_tfm(tcp_conn->rx_tfm);
-		if (tcp_conn->data_tx_tfm)
-			crypto_free_tfm(tcp_conn->data_tx_tfm);
-		if (tcp_conn->data_rx_tfm)
-			crypto_free_tfm(tcp_conn->data_rx_tfm);
 	}
 
 	kfree(tcp_conn);
@@ -1968,48 +1975,11 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
 	case ISCSI_PARAM_HDRDGST_EN:
 		iscsi_set_param(cls_conn, param, buf, buflen);
 		tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
-		if (conn->hdrdgst_en) {
+		if (conn->hdrdgst_en)
 			tcp_conn->hdr_size += sizeof(__u32);
-			if (!tcp_conn->tx_tfm)
-				tcp_conn->tx_tfm = crypto_alloc_tfm("crc32c",
-								    0);
-			if (!tcp_conn->tx_tfm)
-				return -ENOMEM;
-			if (!tcp_conn->rx_tfm)
-				tcp_conn->rx_tfm = crypto_alloc_tfm("crc32c",
-								    0);
-			if (!tcp_conn->rx_tfm) {
-				crypto_free_tfm(tcp_conn->tx_tfm);
-				return -ENOMEM;
-			}
-		} else {
-			if (tcp_conn->tx_tfm)
-				crypto_free_tfm(tcp_conn->tx_tfm);
-			if (tcp_conn->rx_tfm)
-				crypto_free_tfm(tcp_conn->rx_tfm);
-		}
 		break;
 	case ISCSI_PARAM_DATADGST_EN:
 		iscsi_set_param(cls_conn, param, buf, buflen);
-		if (conn->datadgst_en) {
-			if (!tcp_conn->data_tx_tfm)
-				tcp_conn->data_tx_tfm =
-				    crypto_alloc_tfm("crc32c", 0);
-			if (!tcp_conn->data_tx_tfm)
-				return -ENOMEM;
-			if (!tcp_conn->data_rx_tfm)
-				tcp_conn->data_rx_tfm =
-				    crypto_alloc_tfm("crc32c", 0);
-			if (!tcp_conn->data_rx_tfm) {
-				crypto_free_tfm(tcp_conn->data_tx_tfm);
-				return -ENOMEM;
-			}
-		} else {
-			if (tcp_conn->data_tx_tfm)
-				crypto_free_tfm(tcp_conn->data_tx_tfm);
-			if (tcp_conn->data_rx_tfm)
-				crypto_free_tfm(tcp_conn->data_rx_tfm);
-		}
 		tcp_conn->sendpage = conn->datadgst_en ?
 			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
 		break;
diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h
index 7e40e94d9fdc4..609f4778d1254 100644
--- a/drivers/scsi/iscsi_tcp.h
+++ b/drivers/scsi/iscsi_tcp.h
@@ -81,10 +81,6 @@ struct iscsi_tcp_conn {
 						 * stop to terminate */
 	/* iSCSI connection-wide sequencing */
 	int			hdr_size;	/* PDU header size */
-
-	struct crypto_tfm	*rx_tfm;	/* CRC32C (Rx) */
-	struct crypto_tfm	*data_rx_tfm;	/* CRC32C (Rx) for data */
-
 	/* control data */
 	struct iscsi_tcp_recv	in;		/* TCP receive context */
 	int			in_progress;	/* connection state machine */
@@ -94,9 +90,9 @@ struct iscsi_tcp_conn {
 	void			(*old_state_change)(struct sock *);
 	void			(*old_write_space)(struct sock *);
 
-	/* xmit */
+	/* data and header digests */
 	struct crypto_tfm	*tx_tfm;	/* CRC32C (Tx) */
-	struct crypto_tfm	*data_tx_tfm;	/* CRC32C (Tx) for data */
+	struct crypto_tfm	*rx_tfm;	/* CRC32C (Rx) */
 
 	/* MIB custom statistics */
 	uint32_t		sendpage_failures_cnt;
-- 
GitLab


From 753e7d3866748799e4a8769cd27ea7202654211b Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:29 -0400
Subject: [PATCH 0141/1063] [SCSI] iscsi_tcp: fix header resend

This patch built over the last ones fixes a bug in the partial header
resend code, where we add on another 4 bytes to the send length on the resend.
We want just the header plus digest.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/iscsi_tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 82399f71028d2..541912a5b886c 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -109,7 +109,7 @@ iscsi_hdr_digest(struct iscsi_conn *conn, struct iscsi_buf *buf,
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 
 	crypto_digest_digest(tcp_conn->tx_tfm, &buf->sg, 1, crc);
-	buf->sg.length += sizeof(uint32_t);
+	buf->sg.length = tcp_conn->hdr_size;
 }
 
 static inline int
-- 
GitLab


From d5390f5f788f01788e9dfd41ad516a2908901610 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:30 -0400
Subject: [PATCH 0142/1063] [SCSI] iscsi_tcp: update header size during relogin

When we relogin to a target, we have not yet negotiated digests
so we must reset the hdr_size var.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/iscsi_tcp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 541912a5b886c..5d292d0b65ec1 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -1827,9 +1827,11 @@ static void
 iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
 {
 	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 
 	iscsi_conn_stop(cls_conn, flag);
 	iscsi_tcp_release_conn(conn);
+	tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
 }
 
 static int
-- 
GitLab


From db98ccde0881b8247acb52dece6d94ed770a7aa5 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:31 -0400
Subject: [PATCH 0143/1063] [SCSI] libiscsi: only check burst lengths when
 sending unsol data

The first burst length is only relevant if immedate data = Yes
or if Initial R2T is No

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/libiscsi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index fb65311c81ddf..864c6284e83c3 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1593,7 +1593,8 @@ int iscsi_conn_start(struct iscsi_cls_conn *cls_conn)
 		return -EPERM;
 	}
 
-	if (session->first_burst > session->max_burst) {
+	if ((session->imm_data_en || !session->initial_r2t_en) &&
+	     session->first_burst > session->max_burst) {
 		printk("iscsi: invalid burst lengths: "
 		       "first_burst %d max_burst %d\n",
 		       session->first_burst, session->max_burst);
-- 
GitLab


From ca5186842a6d85e982e3d572ecd407453d0c5116 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:32 -0400
Subject: [PATCH 0144/1063] [SCSI] iscsi_tcp: fix partial digest recv

When a digest is spread across two network buffers, we currently
ignore this and try to check the digest with the partial buffer.
Or course this fails. This patch has use iscsi_tcp_copy to
copy the whole digest before testing it.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/iscsi_tcp.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 5d292d0b65ec1..d91e8949c71e4 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -648,10 +648,9 @@ iscsi_ctask_copy(struct iscsi_tcp_conn *tcp_conn, struct iscsi_cmd_task *ctask,
  *	byte counters.
  **/
 static inline int
-iscsi_tcp_copy(struct iscsi_conn *conn)
+iscsi_tcp_copy(struct iscsi_conn *conn, int buf_size)
 {
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-	int buf_size = tcp_conn->in.datalen;
 	int buf_left = buf_size - tcp_conn->data_copied;
 	int size = min(tcp_conn->in.copy, buf_left);
 	int rc;
@@ -812,7 +811,7 @@ iscsi_data_recv(struct iscsi_conn *conn)
 		 * Collect data segment to the connection's data
 		 * placeholder
 		 */
-		if (iscsi_tcp_copy(conn)) {
+		if (iscsi_tcp_copy(conn, tcp_conn->in.datalen)) {
 			rc = -EAGAIN;
 			goto exit;
 		}
@@ -899,10 +898,15 @@ iscsi_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 
 		debug_tcp("extra data_recv offset %d copy %d\n",
 			  tcp_conn->in.offset, tcp_conn->in.copy);
-		skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset,
-				&recv_digest, 4);
-		tcp_conn->in.offset += 4;
-		tcp_conn->in.copy -= 4;
+		rc = iscsi_tcp_copy(conn, sizeof(uint32_t));
+		if (rc) {
+			if (rc == -EAGAIN)
+				goto again;
+			iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+			return 0;
+		}
+
+		memcpy(&recv_digest, conn->data, sizeof(uint32_t));
 		if (recv_digest != tcp_conn->in.datadgst) {
 			debug_tcp("iscsi_tcp: data digest error!"
 				  "0x%x != 0x%x\n", recv_digest,
@@ -942,9 +946,10 @@ iscsi_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 						     &sg, 1);
 			}
 			crypto_digest_final(tcp_conn->rx_tfm,
-					    (u8 *) & tcp_conn->in.datadgst);
+					    (u8 *) &tcp_conn->in.datadgst);
 			debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst);
 			tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV;
+			tcp_conn->data_copied = 0;
 		} else
 			tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
 	}
-- 
GitLab


From f47f2cf5d4acf929a3aaa6957c3fc4622c358703 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:33 -0400
Subject: [PATCH 0145/1063] [SCSI] libiscsi: check that command ptr is set
 before accessing it

If the scsi eh sends a TUR and the session is down we could
return SCSI_ML_HOST_BUSY. scsi eh will ignore this and send
ask us to abort the command and we blindly accesst the
command ptr.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/libiscsi.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 864c6284e83c3..12b5c1800740d 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -192,6 +192,8 @@ static void iscsi_complete_command(struct iscsi_cmd_task *ctask)
 
 	ctask->state = ISCSI_TASK_COMPLETED;
 	ctask->sc = NULL;
+	/* SCSI eh reuses commands to verify us */
+	sc->SCp.ptr = NULL;
 	list_del_init(&ctask->running);
 	__kfifo_put(session->cmdpool.queue, (void*)&ctask, sizeof(void*));
 	sc->scsi_done(sc);
@@ -737,6 +739,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 
 	sc->scsi_done = done;
 	sc->result = 0;
+	sc->SCp.ptr = NULL;
 
 	host = sc->device->host;
 	session = iscsi_hostdata(host->hostdata);
@@ -801,9 +804,10 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 
 	list_add_tail(&ctask->running, &conn->xmitqueue);
 	debug_scsi(
-	       "ctask enq [%s cid %d sc %lx itt 0x%x len %d cmdsn %d win %d]\n",
+	       "ctask enq [%s cid %d sc %p cdb 0x%x itt 0x%x len %d cmdsn %d "
+		"win %d]\n",
 		sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read",
-		conn->id, (long)sc, ctask->itt, sc->request_bufflen,
+		conn->id, sc, sc->cmnd[0], ctask->itt, sc->request_bufflen,
 		session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1);
 	spin_unlock(&session->lock);
 
@@ -1134,11 +1138,24 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 
 int iscsi_eh_abort(struct scsi_cmnd *sc)
 {
-	struct iscsi_cmd_task *ctask = (struct iscsi_cmd_task *)sc->SCp.ptr;
-	struct iscsi_conn *conn = ctask->conn;
-	struct iscsi_session *session = conn->session;
+	struct iscsi_cmd_task *ctask;
+	struct iscsi_conn *conn;
+	struct iscsi_session *session;
 	int rc;
 
+	/*
+	 * if session was ISCSI_STATE_IN_RECOVERY then we may not have
+	 * got the command.
+	 */
+	if (!sc->SCp.ptr) {
+		debug_scsi("sc never reached iscsi layer or it completed.\n");
+		return SUCCESS;
+	}
+
+	ctask = (struct iscsi_cmd_task *)sc->SCp.ptr;
+	conn = ctask->conn;
+	session = conn->session;
+
 	conn->eh_abort_cnt++;
 	debug_scsi("aborting [sc %p itt 0x%x]\n", sc, ctask->itt);
 
-- 
GitLab


From e648f63c6520d6e572573149c16a64d2c5ad7ec5 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:34 -0400
Subject: [PATCH 0146/1063] [SCSI] libiscsi: don't call into lld to cleanup
 task

In the normal IO path we should not be calling back
into the LLD since the LLD will have cleaned up the
task before or after calling complete pdu.

For the fail_command path we still need to do this
to force the cleanup.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/libiscsi.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 12b5c1800740d..c542d0e95e682 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -213,12 +213,8 @@ static void iscsi_get_ctask(struct iscsi_cmd_task *ctask)
 
 static void __iscsi_put_ctask(struct iscsi_cmd_task *ctask)
 {
-	struct iscsi_conn *conn = ctask->conn;
-
-	if (atomic_dec_and_test(&ctask->refcount)) {
-		conn->session->tt->cleanup_cmd_task(conn, ctask);
+	if (atomic_dec_and_test(&ctask->refcount))
 		iscsi_complete_command(ctask);
-	}
 }
 
 static void iscsi_put_ctask(struct iscsi_cmd_task *ctask)
@@ -1129,10 +1125,13 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 	sc = ctask->sc;
 	if (!sc)
 		return;
+
+	conn->session->tt->cleanup_cmd_task(conn, ctask);
 	iscsi_ctask_mtask_cleanup(ctask);
 
 	sc->result = err;
 	sc->resid = sc->request_bufflen;
+	/* release ref from queuecommand */
 	__iscsi_put_ctask(ctask);
 }
 
-- 
GitLab


From 01dfc7fc56f4b7ec0e5344ab44fcf673ebfbf7fa Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 31 Aug 2006 18:09:35 -0400
Subject: [PATCH 0147/1063] [SCSI] iscsi class: update version

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_transport_iscsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 2ecd14188574e..7b0019cccce33 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -34,7 +34,7 @@
 #define ISCSI_SESSION_ATTRS 11
 #define ISCSI_CONN_ATTRS 11
 #define ISCSI_HOST_ATTRS 0
-#define ISCSI_TRANSPORT_VERSION "1.1-646"
+#define ISCSI_TRANSPORT_VERSION "2.0-685"
 
 struct iscsi_internal {
 	int daemon_pid;
-- 
GitLab


From 69bdd88ca2670c321fef774e77059516f836c6f2 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 1 Sep 2006 15:50:23 +0200
Subject: [PATCH 0148/1063] [SCSI] Wrong size information for devices with
 disabled read access

When accessing a device with disabled read access the capacity is set
randomly to 1GB. This makes it impossible to userspace tools to detect
invalid device capacities.

Signed-off-by: Mike Anderson <andmike@us.ibm.com>
Acked-by: Chris Mason <mason@suse.com>
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/sd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 98bd3aab97391..638cff41d4367 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1215,7 +1215,7 @@ sd_read_capacity(struct scsi_disk *sdkp, char *diskname,
 		/* Either no media are present but the drive didn't tell us,
 		   or they are present but the read capacity command fails */
 		/* sdkp->media_present = 0; -- not always correct */
-		sdkp->capacity = 0x200000; /* 1 GB - random */
+		sdkp->capacity = 0; /* unknown mapped to zero - as usual */
 
 		return;
 	} else if (the_result && longrc) {
-- 
GitLab


From 5a25ba1677ab8d63890016a8c1bca68a3e0fbc7d Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Fri, 1 Sep 2006 03:12:19 -0400
Subject: [PATCH 0149/1063] [SCSI] Add Promise SuperTrak driver

Add Promise SuperTrak 'stex' driver, supporting SuperTrak
EX8350/8300/16350/16300 controllers.  The controller's firmware accepts
SCSI commands, handing them to the underlying RAID or JBOD disks.

The driver consisted of the following cleanups and fixes, beyond its
initial submission:

Ed Lin:
      stex: cleanup and minor fixes
      stex: add new device ids
      stex: update internal copy code path
      stex: add hard reset function
      stex: adjust command timeout in slave_config routine
      stex: use more efficient method for unload/shutdown flush

Jeff Garzik:
      [SCSI] Add Promise SuperTrak 'shasta' driver.
      Rename drivers/scsi/shasta.c to stex.c ("SuperTrak EX").
      [SCSI] stex: update with community comments from 'Promise SuperTrak' thread
      [SCSI] stex: Fix warning, trim trailing whitespace.
      [SCSI] stex: remove last remnants of "shasta" project code name
      [SCSI] stex: removed 6-byte command emulation
      [SCSI] stex: minor cleanups
      [SCSI] stex: minor fixes: irq flag, error return value
      [SCSI] stex: use dma_alloc_coherent()

Signed-off-by: Jeff Garzik <jeff@garzik.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/Kconfig  |    7 +
 drivers/scsi/Makefile |    1 +
 drivers/scsi/stex.c   | 1316 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1324 insertions(+)
 create mode 100644 drivers/scsi/stex.c

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 7de5fdfdab677..c8c606589ea63 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1070,6 +1070,13 @@ config 53C700_LE_ON_BE
 	depends on SCSI_LASI700
 	default y
 
+config SCSI_STEX
+	tristate "Promise SuperTrak EX Series support"
+	depends on PCI && SCSI
+	---help---
+	  This driver supports Promise SuperTrak EX8350/8300/16350/16300
+	  Storage controllers.
+
 config SCSI_SYM53C8XX_2
 	tristate "SYM53C8XX Version 2 SCSI support"
 	depends on PCI && SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 83da70decdd15..fd9aeb1ba07f8 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -141,6 +141,7 @@ obj-$(CONFIG_SCSI_SATA_ULI)	+= libata.o sata_uli.o
 obj-$(CONFIG_SCSI_SATA_MV)	+= libata.o sata_mv.o
 obj-$(CONFIG_SCSI_PDC_ADMA)	+= libata.o pdc_adma.o
 obj-$(CONFIG_SCSI_HPTIOP)	+= hptiop.o
+obj-$(CONFIG_SCSI_STEX)		+= stex.o
 
 obj-$(CONFIG_ARM)		+= arm/
 
diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
new file mode 100644
index 0000000000000..fd093302bf1a9
--- /dev/null
+++ b/drivers/scsi/stex.c
@@ -0,0 +1,1316 @@
+/*
+ * SuperTrak EX Series Storage Controller driver for Linux
+ *
+ *	Copyright (C) 2005, 2006 Promise Technology Inc.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Written By:
+ *		Ed Lin <promise_linux@promise.com>
+ *
+ *	Version: 2.9.0.13
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/pci.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_host.h>
+
+#define DRV_NAME "stex"
+#define ST_DRIVER_VERSION "2.9.0.13"
+#define ST_VER_MAJOR 		2
+#define ST_VER_MINOR 		9
+#define ST_OEM 			0
+#define ST_BUILD_VER 		13
+
+enum {
+	/* MU register offset */
+	IMR0	= 0x10,	/* MU_INBOUND_MESSAGE_REG0 */
+	IMR1	= 0x14,	/* MU_INBOUND_MESSAGE_REG1 */
+	OMR0	= 0x18,	/* MU_OUTBOUND_MESSAGE_REG0 */
+	OMR1	= 0x1c,	/* MU_OUTBOUND_MESSAGE_REG1 */
+	IDBL	= 0x20,	/* MU_INBOUND_DOORBELL */
+	IIS	= 0x24,	/* MU_INBOUND_INTERRUPT_STATUS */
+	IIM	= 0x28,	/* MU_INBOUND_INTERRUPT_MASK */
+	ODBL	= 0x2c,	/* MU_OUTBOUND_DOORBELL */
+	OIS	= 0x30,	/* MU_OUTBOUND_INTERRUPT_STATUS */
+	OIM	= 0x3c,	/* MU_OUTBOUND_INTERRUPT_MASK */
+
+	/* MU register value */
+	MU_INBOUND_DOORBELL_HANDSHAKE		= 1,
+	MU_INBOUND_DOORBELL_REQHEADCHANGED	= 2,
+	MU_INBOUND_DOORBELL_STATUSTAILCHANGED	= 4,
+	MU_INBOUND_DOORBELL_HMUSTOPPED		= 8,
+	MU_INBOUND_DOORBELL_RESET		= 16,
+
+	MU_OUTBOUND_DOORBELL_HANDSHAKE		= 1,
+	MU_OUTBOUND_DOORBELL_REQUESTTAILCHANGED	= 2,
+	MU_OUTBOUND_DOORBELL_STATUSHEADCHANGED	= 4,
+	MU_OUTBOUND_DOORBELL_BUSCHANGE		= 8,
+	MU_OUTBOUND_DOORBELL_HASEVENT		= 16,
+
+	/* MU status code */
+	MU_STATE_STARTING			= 1,
+	MU_STATE_FMU_READY_FOR_HANDSHAKE	= 2,
+	MU_STATE_SEND_HANDSHAKE_FRAME		= 3,
+	MU_STATE_STARTED			= 4,
+	MU_STATE_RESETTING			= 5,
+
+	MU_MAX_DELAY_TIME			= 240000,
+	MU_HANDSHAKE_SIGNATURE			= 0x55aaaa55,
+	HMU_PARTNER_TYPE			= 2,
+
+	/* firmware returned values */
+	SRB_STATUS_SUCCESS			= 0x01,
+	SRB_STATUS_ERROR			= 0x04,
+	SRB_STATUS_BUSY				= 0x05,
+	SRB_STATUS_INVALID_REQUEST		= 0x06,
+	SRB_STATUS_SELECTION_TIMEOUT		= 0x0A,
+	SRB_SEE_SENSE 				= 0x80,
+
+	/* task attribute */
+	TASK_ATTRIBUTE_SIMPLE			= 0x0,
+	TASK_ATTRIBUTE_HEADOFQUEUE		= 0x1,
+	TASK_ATTRIBUTE_ORDERED			= 0x2,
+	TASK_ATTRIBUTE_ACA			= 0x4,
+
+	/* request count, etc. */
+	MU_MAX_REQUEST				= 32,
+	TAG_BITMAP_LENGTH			= MU_MAX_REQUEST,
+
+	/* one message wasted, use MU_MAX_REQUEST+1
+		to handle MU_MAX_REQUEST messages */
+	MU_REQ_COUNT				= (MU_MAX_REQUEST + 1),
+	MU_STATUS_COUNT				= (MU_MAX_REQUEST + 1),
+
+	STEX_CDB_LENGTH				= MAX_COMMAND_SIZE,
+	REQ_VARIABLE_LEN			= 1024,
+	STATUS_VAR_LEN				= 128,
+	ST_CAN_QUEUE				= MU_MAX_REQUEST,
+	ST_CMD_PER_LUN				= MU_MAX_REQUEST,
+	ST_MAX_SG				= 32,
+
+	/* sg flags */
+	SG_CF_EOT				= 0x80,	/* end of table */
+	SG_CF_64B				= 0x40,	/* 64 bit item */
+	SG_CF_HOST				= 0x20,	/* sg in host memory */
+
+	ST_MAX_ARRAY_SUPPORTED			= 16,
+	ST_MAX_TARGET_NUM			= (ST_MAX_ARRAY_SUPPORTED+1),
+	ST_MAX_LUN_PER_TARGET			= 16,
+
+	st_shasta				= 0,
+	st_vsc					= 1,
+
+	PASSTHRU_REQ_TYPE			= 0x00000001,
+	PASSTHRU_REQ_NO_WAKEUP			= 0x00000100,
+	ST_INTERNAL_TIMEOUT			= 30,
+
+	/* vendor specific commands of Promise */
+	ARRAY_CMD				= 0xe0,
+	CONTROLLER_CMD				= 0xe1,
+	DEBUGGING_CMD				= 0xe2,
+	PASSTHRU_CMD				= 0xe3,
+
+	PASSTHRU_GET_ADAPTER			= 0x05,
+	PASSTHRU_GET_DRVVER			= 0x10,
+	CTLR_POWER_STATE_CHANGE			= 0x0e,
+	CTLR_POWER_SAVING			= 0x01,
+
+	PASSTHRU_SIGNATURE			= 0x4e415041,
+
+	INQUIRY_EVPD				= 0x01,
+};
+
+struct st_sgitem {
+	u8 ctrl;	/* SG_CF_xxx */
+	u8 reserved[3];
+	__le32 count;
+	__le32 addr;
+	__le32 addr_hi;
+};
+
+struct st_sgtable {
+	__le16 sg_count;
+	__le16 max_sg_count;
+	__le32 sz_in_byte;
+	struct st_sgitem table[ST_MAX_SG];
+};
+
+struct handshake_frame {
+	__le32 rb_phy;		/* request payload queue physical address */
+	__le32 rb_phy_hi;
+	__le16 req_sz;		/* size of each request payload */
+	__le16 req_cnt;		/* count of reqs the buffer can hold */
+	__le16 status_sz;	/* size of each status payload */
+	__le16 status_cnt;	/* count of status the buffer can hold */
+	__le32 hosttime;	/* seconds from Jan 1, 1970 (GMT) */
+	__le32 hosttime_hi;
+	u8 partner_type;	/* who sends this frame */
+	u8 reserved0[7];
+	__le32 partner_ver_major;
+	__le32 partner_ver_minor;
+	__le32 partner_ver_oem;
+	__le32 partner_ver_build;
+	u32 reserved1[4];
+};
+
+struct req_msg {
+	__le16 tag;
+	u8 lun;
+	u8 target;
+	u8 task_attr;
+	u8 task_manage;
+	u8 prd_entry;
+	u8 payload_sz;		/* payload size in 4-byte */
+	u8 cdb[STEX_CDB_LENGTH];
+	u8 variable[REQ_VARIABLE_LEN];
+};
+
+struct status_msg {
+	__le16 tag;
+	u8 lun;
+	u8 target;
+	u8 srb_status;
+	u8 scsi_status;
+	u8 reserved;
+	u8 payload_sz;		/* payload size in 4-byte */
+	u8 variable[STATUS_VAR_LEN];
+};
+
+struct ver_info {
+	u32 major;
+	u32 minor;
+	u32 oem;
+	u32 build;
+	u32 reserved[2];
+};
+
+struct st_frame {
+	u32 base[6];
+	u32 rom_addr;
+
+	struct ver_info drv_ver;
+	struct ver_info bios_ver;
+
+	u32 bus;
+	u32 slot;
+	u32 irq_level;
+	u32 irq_vec;
+	u32 id;
+	u32 subid;
+
+	u32 dimm_size;
+	u8 dimm_type;
+	u8 reserved[3];
+
+	u32 channel;
+	u32 reserved1;
+};
+
+struct st_drvver {
+	u32 major;
+	u32 minor;
+	u32 oem;
+	u32 build;
+	u32 signature[2];
+	u8 console_id;
+	u8 host_no;
+	u8 reserved0[2];
+	u32 reserved[3];
+};
+
+#define MU_REQ_BUFFER_SIZE	(MU_REQ_COUNT * sizeof(struct req_msg))
+#define MU_STATUS_BUFFER_SIZE	(MU_STATUS_COUNT * sizeof(struct status_msg))
+#define MU_BUFFER_SIZE		(MU_REQ_BUFFER_SIZE + MU_STATUS_BUFFER_SIZE)
+#define STEX_BUFFER_SIZE	(MU_BUFFER_SIZE + sizeof(struct st_frame))
+
+struct st_ccb {
+	struct req_msg *req;
+	struct scsi_cmnd *cmd;
+
+	void *sense_buffer;
+	unsigned int sense_bufflen;
+	int sg_count;
+
+	u32 req_type;
+	u8 srb_status;
+	u8 scsi_status;
+};
+
+struct st_hba {
+	void __iomem *mmio_base;	/* iomapped PCI memory space */
+	void *dma_mem;
+	dma_addr_t dma_handle;
+
+	struct Scsi_Host *host;
+	struct pci_dev *pdev;
+
+	u32 tag;
+	u32 req_head;
+	u32 req_tail;
+	u32 status_head;
+	u32 status_tail;
+
+	struct status_msg *status_buffer;
+	void *copy_buffer; /* temp buffer for driver-handled commands */
+	struct st_ccb ccb[MU_MAX_REQUEST];
+	struct st_ccb *wait_ccb;
+	wait_queue_head_t waitq;
+
+	unsigned int mu_status;
+	int out_req_cnt;
+
+	unsigned int cardtype;
+};
+
+static const char console_inq_page[] =
+{
+	0x03,0x00,0x03,0x03,0xFA,0x00,0x00,0x30,
+	0x50,0x72,0x6F,0x6D,0x69,0x73,0x65,0x20,	/* "Promise " */
+	0x52,0x41,0x49,0x44,0x20,0x43,0x6F,0x6E,	/* "RAID Con" */
+	0x73,0x6F,0x6C,0x65,0x20,0x20,0x20,0x20,	/* "sole    " */
+	0x31,0x2E,0x30,0x30,0x20,0x20,0x20,0x20,	/* "1.00    " */
+	0x53,0x58,0x2F,0x52,0x53,0x41,0x46,0x2D,	/* "SX/RSAF-" */
+	0x54,0x45,0x31,0x2E,0x30,0x30,0x20,0x20,	/* "TE1.00  " */
+	0x0C,0x20,0x20,0x20,0x20,0x20,0x20,0x20
+};
+
+MODULE_AUTHOR("Ed Lin");
+MODULE_DESCRIPTION("Promise Technology SuperTrak EX Controllers");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(ST_DRIVER_VERSION);
+
+static void stex_gettime(__le32 *time)
+{
+	struct timeval tv;
+	do_gettimeofday(&tv);
+
+	*time = cpu_to_le32(tv.tv_sec & 0xffffffff);
+	*(time + 1) = cpu_to_le32((tv.tv_sec >> 16) >> 16);
+}
+
+static u16 __stex_alloc_tag(unsigned long *bitmap)
+{
+	int i;
+	i = find_first_zero_bit(bitmap, TAG_BITMAP_LENGTH);
+	if (i < TAG_BITMAP_LENGTH)
+		__set_bit(i, bitmap);
+	return (u16)i;
+}
+
+static u16 stex_alloc_tag(struct st_hba *hba, unsigned long *bitmap)
+{
+	unsigned long flags;
+	u16 tag;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	tag = __stex_alloc_tag(bitmap);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+	return tag;
+}
+
+static void __stex_free_tag(unsigned long *bitmap, u16 tag)
+{
+	__clear_bit((int)tag, bitmap);
+}
+
+static void stex_free_tag(struct st_hba *hba, unsigned long *bitmap, u16 tag)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	__stex_free_tag(bitmap, tag);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+}
+
+static struct status_msg *stex_get_status(struct st_hba *hba)
+{
+	struct status_msg *status =
+		hba->status_buffer + hba->status_tail;
+
+	++hba->status_tail;
+	hba->status_tail %= MU_STATUS_COUNT;
+
+	return status;
+}
+
+static void stex_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq)
+{
+	cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+
+	cmd->sense_buffer[0] = 0x70;    /* fixed format, current */
+	cmd->sense_buffer[2] = sk;
+	cmd->sense_buffer[7] = 18 - 8;  /* additional sense length */
+	cmd->sense_buffer[12] = asc;
+	cmd->sense_buffer[13] = ascq;
+}
+
+static void stex_invalid_field(struct scsi_cmnd *cmd,
+			       void (*done)(struct scsi_cmnd *))
+{
+	/* "Invalid field in cbd" */
+	stex_set_sense(cmd, ILLEGAL_REQUEST, 0x24, 0x0);
+	done(cmd);
+}
+
+static struct req_msg *stex_alloc_req(struct st_hba *hba)
+{
+	struct req_msg *req = ((struct req_msg *)hba->dma_mem) +
+		hba->req_head;
+
+	++hba->req_head;
+	hba->req_head %= MU_REQ_COUNT;
+
+	return req;
+}
+
+static int stex_map_sg(struct st_hba *hba,
+	struct req_msg *req, struct st_ccb *ccb)
+{
+	struct pci_dev *pdev = hba->pdev;
+	struct scsi_cmnd *cmd;
+	dma_addr_t dma_handle;
+	struct scatterlist *src;
+	struct st_sgtable *dst;
+	int i;
+
+	cmd = ccb->cmd;
+	dst = (struct st_sgtable *)req->variable;
+	dst->max_sg_count = cpu_to_le16(ST_MAX_SG);
+	dst->sz_in_byte = cpu_to_le32(cmd->request_bufflen);
+
+	if (cmd->use_sg) {
+		int n_elem;
+
+		src = (struct scatterlist *) cmd->request_buffer;
+		n_elem = pci_map_sg(pdev, src,
+			cmd->use_sg, cmd->sc_data_direction);
+		if (n_elem <= 0)
+			return -EIO;
+
+		ccb->sg_count = n_elem;
+		dst->sg_count = cpu_to_le16((u16)n_elem);
+
+		for (i = 0; i < n_elem; i++, src++) {
+			dst->table[i].count = cpu_to_le32((u32)sg_dma_len(src));
+			dst->table[i].addr =
+				cpu_to_le32(sg_dma_address(src) & 0xffffffff);
+			dst->table[i].addr_hi =
+				cpu_to_le32((sg_dma_address(src) >> 16) >> 16);
+			dst->table[i].ctrl = SG_CF_64B | SG_CF_HOST;
+		}
+		dst->table[--i].ctrl |= SG_CF_EOT;
+		return 0;
+	}
+
+	dma_handle = pci_map_single(pdev, cmd->request_buffer,
+		cmd->request_bufflen, cmd->sc_data_direction);
+	cmd->SCp.dma_handle = dma_handle;
+
+	ccb->sg_count = 1;
+	dst->sg_count = cpu_to_le16(1);
+	dst->table[0].addr = cpu_to_le32(dma_handle & 0xffffffff);
+	dst->table[0].addr_hi = cpu_to_le32((dma_handle >> 16) >> 16);
+	dst->table[0].count = cpu_to_le32((u32)cmd->request_bufflen);
+	dst->table[0].ctrl = SG_CF_EOT | SG_CF_64B | SG_CF_HOST;
+
+	return 0;
+}
+
+static void stex_internal_copy(struct scsi_cmnd *cmd,
+	const void *src, size_t *count, int sg_count)
+{
+	size_t lcount;
+	size_t len;
+	void *s, *d, *base = NULL;
+	if (*count > cmd->request_bufflen)
+		*count = cmd->request_bufflen;
+	lcount = *count;
+	while (lcount) {
+		len = lcount;
+		s = (void *)src;
+		if (cmd->use_sg) {
+			size_t offset = *count - lcount;
+			s += offset;
+			base = scsi_kmap_atomic_sg(cmd->request_buffer,
+				sg_count, &offset, &len);
+			if (base == NULL) {
+				*count -= lcount;
+				return;
+			}
+			d = base + offset;
+		} else
+			d = cmd->request_buffer;
+
+		memcpy(d, s, len);
+
+		lcount -= len;
+		if (cmd->use_sg)
+			scsi_kunmap_atomic_sg(base);
+	}
+}
+
+static int stex_direct_copy(struct scsi_cmnd *cmd,
+	const void *src, size_t count)
+{
+	struct st_hba *hba = (struct st_hba *) &cmd->device->host->hostdata[0];
+	size_t cp_len = count;
+	int n_elem = 0;
+
+	if (cmd->use_sg) {
+		n_elem = pci_map_sg(hba->pdev, cmd->request_buffer,
+			cmd->use_sg, cmd->sc_data_direction);
+		if (n_elem <= 0)
+			return 0;
+	}
+
+	stex_internal_copy(cmd, src, &cp_len, n_elem);
+
+	if (cmd->use_sg)
+		pci_unmap_sg(hba->pdev, cmd->request_buffer,
+			cmd->use_sg, cmd->sc_data_direction);
+	return cp_len == count;
+}
+
+static void stex_controller_info(struct st_hba *hba, struct st_ccb *ccb)
+{
+	struct st_frame *p;
+	size_t count = sizeof(struct st_frame);
+
+	p = hba->copy_buffer;
+	memset(p->base, 0, sizeof(u32)*6);
+	*(unsigned long *)(p->base) = pci_resource_start(hba->pdev, 0);
+	p->rom_addr = 0;
+
+	p->drv_ver.major = ST_VER_MAJOR;
+	p->drv_ver.minor = ST_VER_MINOR;
+	p->drv_ver.oem = ST_OEM;
+	p->drv_ver.build = ST_BUILD_VER;
+
+	p->bus = hba->pdev->bus->number;
+	p->slot = hba->pdev->devfn;
+	p->irq_level = 0;
+	p->irq_vec = hba->pdev->irq;
+	p->id = hba->pdev->vendor << 16 | hba->pdev->device;
+	p->subid =
+		hba->pdev->subsystem_vendor << 16 | hba->pdev->subsystem_device;
+
+	stex_internal_copy(ccb->cmd, p, &count, ccb->sg_count);
+}
+
+static void
+stex_send_cmd(struct st_hba *hba, struct req_msg *req, u16 tag)
+{
+	req->tag = cpu_to_le16(tag);
+	req->task_attr = TASK_ATTRIBUTE_SIMPLE;
+	req->task_manage = 0; /* not supported yet */
+	req->payload_sz = (u8)(sizeof(struct req_msg)/sizeof(u32));
+
+	hba->ccb[tag].req = req;
+	hba->out_req_cnt++;
+
+	writel(hba->req_head, hba->mmio_base + IMR0);
+	writel(MU_INBOUND_DOORBELL_REQHEADCHANGED, hba->mmio_base + IDBL);
+	readl(hba->mmio_base + IDBL); /* flush */
+}
+
+static int
+stex_slave_config(struct scsi_device *sdev)
+{
+	sdev->use_10_for_rw = 1;
+	sdev->use_10_for_ms = 1;
+	sdev->timeout = 60 * HZ;
+	return 0;
+}
+
+static void
+stex_slave_destroy(struct scsi_device *sdev)
+{
+	struct st_hba *hba = (struct st_hba *) sdev->host->hostdata;
+	struct req_msg *req;
+	unsigned long flags;
+	unsigned long before;
+	u16 tag;
+
+	if (sdev->type != TYPE_DISK)
+		return;
+
+	before = jiffies;
+	while ((tag = stex_alloc_tag(hba, (unsigned long *)&hba->tag))
+		== TAG_BITMAP_LENGTH) {
+		if (time_after(jiffies, before + ST_INTERNAL_TIMEOUT * HZ))
+			return;
+		msleep(10);
+	}
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	req = stex_alloc_req(hba);
+	memset(req->cdb, 0, STEX_CDB_LENGTH);
+
+	req->target = sdev->id;
+	req->lun = sdev->channel; /* firmware lun issue work around */
+	req->cdb[0] = SYNCHRONIZE_CACHE;
+
+	hba->ccb[tag].cmd = NULL;
+	hba->ccb[tag].sg_count = 0;
+	hba->ccb[tag].sense_bufflen = 0;
+	hba->ccb[tag].sense_buffer = NULL;
+	hba->ccb[tag].req_type |= PASSTHRU_REQ_TYPE;
+
+	stex_send_cmd(hba, req, tag);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	wait_event_timeout(hba->waitq,
+		!(hba->ccb[tag].req_type), ST_INTERNAL_TIMEOUT * HZ);
+	if (hba->ccb[tag].req_type & PASSTHRU_REQ_TYPE)
+		return;
+
+	stex_free_tag(hba, (unsigned long *)&hba->tag, tag);
+}
+
+static int
+stex_queuecommand(struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *))
+{
+	struct st_hba *hba;
+	struct Scsi_Host *host;
+	unsigned int id,lun;
+	struct req_msg *req;
+	u16 tag;
+	host = cmd->device->host;
+	id = cmd->device->id;
+	lun = cmd->device->channel; /* firmware lun issue work around */
+	hba = (struct st_hba *) &host->hostdata[0];
+
+	switch (cmd->cmnd[0]) {
+	case MODE_SENSE_10:
+	{
+		static char ms10_caching_page[12] =
+			{ 0, 0x12, 0, 0, 0, 0, 0, 0, 0x8, 0xa, 0x4, 0 };
+		unsigned char page;
+		page = cmd->cmnd[2] & 0x3f;
+		if (page == 0x8 || page == 0x3f) {
+			stex_direct_copy(cmd, ms10_caching_page,
+					sizeof(ms10_caching_page));
+			cmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8;
+			done(cmd);
+		} else
+			stex_invalid_field(cmd, done);
+		return 0;
+	}
+	case INQUIRY:
+		if (id != ST_MAX_ARRAY_SUPPORTED)
+			break;
+		if (lun == 0 && (cmd->cmnd[1] & INQUIRY_EVPD) == 0) {
+			stex_direct_copy(cmd, console_inq_page,
+				sizeof(console_inq_page));
+			cmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8;
+			done(cmd);
+		} else
+			stex_invalid_field(cmd, done);
+		return 0;
+	case PASSTHRU_CMD:
+		if (cmd->cmnd[1] == PASSTHRU_GET_DRVVER) {
+			struct st_drvver ver;
+			ver.major = ST_VER_MAJOR;
+			ver.minor = ST_VER_MINOR;
+			ver.oem = ST_OEM;
+			ver.build = ST_BUILD_VER;
+			ver.signature[0] = PASSTHRU_SIGNATURE;
+			ver.console_id = ST_MAX_ARRAY_SUPPORTED;
+			ver.host_no = hba->host->host_no;
+			cmd->result = stex_direct_copy(cmd, &ver, sizeof(ver)) ?
+				DID_OK << 16 | COMMAND_COMPLETE << 8 :
+				DID_ERROR << 16 | COMMAND_COMPLETE << 8;
+			done(cmd);
+			return 0;
+		}
+	default:
+		break;
+	}
+
+	cmd->scsi_done = done;
+
+	if (unlikely((tag = __stex_alloc_tag((unsigned long *)&hba->tag))
+		== TAG_BITMAP_LENGTH))
+		return SCSI_MLQUEUE_HOST_BUSY;
+
+	req = stex_alloc_req(hba);
+	req->lun = lun;
+	req->target = id;
+
+	/* cdb */
+	memcpy(req->cdb, cmd->cmnd, STEX_CDB_LENGTH);
+
+	hba->ccb[tag].cmd = cmd;
+	hba->ccb[tag].sense_bufflen = SCSI_SENSE_BUFFERSIZE;
+	hba->ccb[tag].sense_buffer = cmd->sense_buffer;
+	hba->ccb[tag].req_type = 0;
+
+	if (cmd->sc_data_direction != DMA_NONE)
+		stex_map_sg(hba, req, &hba->ccb[tag]);
+
+	stex_send_cmd(hba, req, tag);
+	return 0;
+}
+
+static void stex_unmap_sg(struct st_hba *hba, struct scsi_cmnd *cmd)
+{
+	if (cmd->sc_data_direction != DMA_NONE) {
+		if (cmd->use_sg)
+			pci_unmap_sg(hba->pdev, cmd->request_buffer,
+				cmd->use_sg, cmd->sc_data_direction);
+		else
+			pci_unmap_single(hba->pdev, cmd->SCp.dma_handle,
+				cmd->request_bufflen, cmd->sc_data_direction);
+	}
+}
+
+static void stex_scsi_done(struct st_ccb *ccb)
+{
+	struct scsi_cmnd *cmd = ccb->cmd;
+	int result;
+
+	if (ccb->srb_status == SRB_STATUS_SUCCESS ||  ccb->srb_status == 0) {
+		result = ccb->scsi_status;
+		switch (ccb->scsi_status) {
+		case SAM_STAT_GOOD:
+			result |= DID_OK << 16 | COMMAND_COMPLETE << 8;
+			break;
+		case SAM_STAT_CHECK_CONDITION:
+			result |= DRIVER_SENSE << 24;
+			break;
+		case SAM_STAT_BUSY:
+			result |= DID_BUS_BUSY << 16 | COMMAND_COMPLETE << 8;
+			break;
+		default:
+			result |= DID_ERROR << 16 | COMMAND_COMPLETE << 8;
+			break;
+		}
+	}
+	else if (ccb->srb_status & SRB_SEE_SENSE)
+		result = DRIVER_SENSE << 24 | SAM_STAT_CHECK_CONDITION;
+	else switch (ccb->srb_status) {
+		case SRB_STATUS_SELECTION_TIMEOUT:
+			result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
+			break;
+		case SRB_STATUS_BUSY:
+			result = DID_BUS_BUSY << 16 | COMMAND_COMPLETE << 8;
+			break;
+		case SRB_STATUS_INVALID_REQUEST:
+		case SRB_STATUS_ERROR:
+		default:
+			result = DID_ERROR << 16 | COMMAND_COMPLETE << 8;
+			break;
+	}
+
+	cmd->result = result;
+	cmd->scsi_done(cmd);
+}
+
+static void stex_copy_data(struct st_ccb *ccb,
+	struct status_msg *resp, unsigned int variable)
+{
+	size_t count = variable;
+	if (resp->scsi_status != SAM_STAT_GOOD) {
+		if (ccb->sense_buffer != NULL)
+			memcpy(ccb->sense_buffer, resp->variable,
+				min(variable, ccb->sense_bufflen));
+		return;
+	}
+
+	if (ccb->cmd == NULL)
+		return;
+	stex_internal_copy(ccb->cmd, resp->variable, &count, ccb->sg_count);
+}
+
+static void stex_mu_intr(struct st_hba *hba, u32 doorbell)
+{
+	void __iomem *base = hba->mmio_base;
+	struct status_msg *resp;
+	struct st_ccb *ccb;
+	unsigned int size;
+	u16 tag;
+
+	if (!(doorbell & MU_OUTBOUND_DOORBELL_STATUSHEADCHANGED))
+		return;
+
+	/* status payloads */
+	hba->status_head = readl(base + OMR1);
+	if (unlikely(hba->status_head >= MU_STATUS_COUNT)) {
+		printk(KERN_WARNING DRV_NAME "(%s): invalid status head\n",
+			pci_name(hba->pdev));
+		return;
+	}
+
+	if (unlikely(hba->mu_status != MU_STATE_STARTED ||
+		hba->out_req_cnt <= 0)) {
+		hba->status_tail = hba->status_head;
+		goto update_status;
+	}
+
+	while (hba->status_tail != hba->status_head) {
+		resp = stex_get_status(hba);
+		tag = le16_to_cpu(resp->tag);
+		if (unlikely(tag >= TAG_BITMAP_LENGTH)) {
+			printk(KERN_WARNING DRV_NAME
+				"(%s): invalid tag\n", pci_name(hba->pdev));
+			continue;
+		}
+		if (unlikely((hba->tag & (1 << tag)) == 0)) {
+			printk(KERN_WARNING DRV_NAME
+				"(%s): null tag\n", pci_name(hba->pdev));
+			continue;
+		}
+
+		hba->out_req_cnt--;
+		ccb = &hba->ccb[tag];
+		if (hba->wait_ccb == ccb)
+			hba->wait_ccb = NULL;
+		if (unlikely(ccb->req == NULL)) {
+			printk(KERN_WARNING DRV_NAME
+				"(%s): lagging req\n", pci_name(hba->pdev));
+			__stex_free_tag((unsigned long *)&hba->tag, tag);
+			stex_unmap_sg(hba, ccb->cmd); /* ??? */
+			continue;
+		}
+
+		size = resp->payload_sz * sizeof(u32); /* payload size */
+		if (unlikely(size < sizeof(*resp) - STATUS_VAR_LEN ||
+			size > sizeof(*resp))) {
+			printk(KERN_WARNING DRV_NAME "(%s): bad status size\n",
+				pci_name(hba->pdev));
+		} else {
+			size -= sizeof(*resp) - STATUS_VAR_LEN; /* copy size */
+			if (size)
+				stex_copy_data(ccb, resp, size);
+		}
+
+		ccb->srb_status = resp->srb_status;
+		ccb->scsi_status = resp->scsi_status;
+
+		if (ccb->req_type & PASSTHRU_REQ_TYPE) {
+			if (ccb->req_type & PASSTHRU_REQ_NO_WAKEUP) {
+				ccb->req_type = 0;
+				continue;
+			}
+			ccb->req_type = 0;
+			if (waitqueue_active(&hba->waitq))
+				wake_up(&hba->waitq);
+			continue;
+		}
+		if (ccb->cmd->cmnd[0] == PASSTHRU_CMD &&
+			ccb->cmd->cmnd[1] == PASSTHRU_GET_ADAPTER)
+			stex_controller_info(hba, ccb);
+		__stex_free_tag((unsigned long *)&hba->tag, tag);
+		stex_unmap_sg(hba, ccb->cmd);
+		stex_scsi_done(ccb);
+	}
+
+update_status:
+	writel(hba->status_head, base + IMR1);
+	readl(base + IMR1); /* flush */
+}
+
+static irqreturn_t stex_intr(int irq, void *__hba, struct pt_regs *regs)
+{
+	struct st_hba *hba = __hba;
+	void __iomem *base = hba->mmio_base;
+	u32 data;
+	unsigned long flags;
+	int handled = 0;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+
+	data = readl(base + ODBL);
+
+	if (data && data != 0xffffffff) {
+		/* clear the interrupt */
+		writel(data, base + ODBL);
+		readl(base + ODBL); /* flush */
+		stex_mu_intr(hba, data);
+		handled = 1;
+	}
+
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	return IRQ_RETVAL(handled);
+}
+
+static int stex_handshake(struct st_hba *hba)
+{
+	void __iomem *base = hba->mmio_base;
+	struct handshake_frame *h;
+	dma_addr_t status_phys;
+	int i;
+
+	if (readl(base + OMR0) != MU_HANDSHAKE_SIGNATURE) {
+		writel(MU_INBOUND_DOORBELL_HANDSHAKE, base + IDBL);
+		readl(base + IDBL);
+		for (i = 0; readl(base + OMR0) != MU_HANDSHAKE_SIGNATURE
+			&& i < MU_MAX_DELAY_TIME; i++) {
+			rmb();
+			msleep(1);
+		}
+
+		if (i == MU_MAX_DELAY_TIME) {
+			printk(KERN_ERR DRV_NAME
+				"(%s): no handshake signature\n",
+				pci_name(hba->pdev));
+			return -1;
+		}
+	}
+
+	udelay(10);
+
+	h = (struct handshake_frame *)(hba->dma_mem + MU_REQ_BUFFER_SIZE);
+	h->rb_phy = cpu_to_le32(hba->dma_handle);
+	h->rb_phy_hi = cpu_to_le32((hba->dma_handle >> 16) >> 16);
+	h->req_sz = cpu_to_le16(sizeof(struct req_msg));
+	h->req_cnt = cpu_to_le16(MU_REQ_COUNT);
+	h->status_sz = cpu_to_le16(sizeof(struct status_msg));
+	h->status_cnt = cpu_to_le16(MU_STATUS_COUNT);
+	stex_gettime(&h->hosttime);
+	h->partner_type = HMU_PARTNER_TYPE;
+
+	status_phys = hba->dma_handle + MU_REQ_BUFFER_SIZE;
+	writel(status_phys, base + IMR0);
+	readl(base + IMR0);
+	writel((status_phys >> 16) >> 16, base + IMR1);
+	readl(base + IMR1);
+
+	writel((status_phys >> 16) >> 16, base + OMR0); /* old fw compatible */
+	readl(base + OMR0);
+	writel(MU_INBOUND_DOORBELL_HANDSHAKE, base + IDBL);
+	readl(base + IDBL); /* flush */
+
+	udelay(10);
+	for (i = 0; readl(base + OMR0) != MU_HANDSHAKE_SIGNATURE
+		&& i < MU_MAX_DELAY_TIME; i++) {
+		rmb();
+		msleep(1);
+	}
+
+	if (i == MU_MAX_DELAY_TIME) {
+		printk(KERN_ERR DRV_NAME
+			"(%s): no signature after handshake frame\n",
+			pci_name(hba->pdev));
+		return -1;
+	}
+
+	writel(0, base + IMR0);
+	readl(base + IMR0);
+	writel(0, base + OMR0);
+	readl(base + OMR0);
+	writel(0, base + IMR1);
+	readl(base + IMR1);
+	writel(0, base + OMR1);
+	readl(base + OMR1); /* flush */
+	hba->mu_status = MU_STATE_STARTED;
+	return 0;
+}
+
+static int stex_abort(struct scsi_cmnd *cmd)
+{
+	struct Scsi_Host *host = cmd->device->host;
+	struct st_hba *hba = (struct st_hba *)host->hostdata;
+	u16 tag;
+	void __iomem *base;
+	u32 data;
+	int result = SUCCESS;
+	unsigned long flags;
+	base = hba->mmio_base;
+	spin_lock_irqsave(host->host_lock, flags);
+
+	for (tag = 0; tag < MU_MAX_REQUEST; tag++)
+		if (hba->ccb[tag].cmd == cmd && (hba->tag & (1 << tag))) {
+			hba->wait_ccb = &(hba->ccb[tag]);
+			break;
+		}
+	if (tag >= MU_MAX_REQUEST)
+		goto out;
+
+	data = readl(base + ODBL);
+	if (data == 0 || data == 0xffffffff)
+		goto fail_out;
+
+	writel(data, base + ODBL);
+	readl(base + ODBL); /* flush */
+
+	stex_mu_intr(hba, data);
+
+	if (hba->wait_ccb == NULL) {
+		printk(KERN_WARNING DRV_NAME
+			"(%s): lost interrupt\n", pci_name(hba->pdev));
+		goto out;
+	}
+
+fail_out:
+	hba->wait_ccb->req = NULL; /* nullify the req's future return */
+	hba->wait_ccb = NULL;
+	result = FAILED;
+out:
+	spin_unlock_irqrestore(host->host_lock, flags);
+	return result;
+}
+
+static void stex_hard_reset(struct st_hba *hba)
+{
+	struct pci_bus *bus;
+	int i;
+	u16 pci_cmd;
+	u8 pci_bctl;
+
+	for (i = 0; i < 16; i++)
+		pci_read_config_dword(hba->pdev, i * 4,
+			&hba->pdev->saved_config_space[i]);
+
+	/* Reset secondary bus. Our controller(MU/ATU) is the only device on
+	   secondary bus. Consult Intel 80331/3 developer's manual for detail */
+	bus = hba->pdev->bus;
+	pci_read_config_byte(bus->self, PCI_BRIDGE_CONTROL, &pci_bctl);
+	pci_bctl |= PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
+	msleep(1);
+	pci_bctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
+
+	for (i = 0; i < MU_MAX_DELAY_TIME; i++) {
+		pci_read_config_word(hba->pdev, PCI_COMMAND, &pci_cmd);
+		if (pci_cmd & PCI_COMMAND_MASTER)
+			break;
+		msleep(1);
+	}
+
+	ssleep(5);
+	for (i = 0; i < 16; i++)
+		pci_write_config_dword(hba->pdev, i * 4,
+			hba->pdev->saved_config_space[i]);
+}
+
+static int stex_reset(struct scsi_cmnd *cmd)
+{
+	struct st_hba *hba;
+	unsigned long flags;
+	hba = (struct st_hba *) &cmd->device->host->hostdata[0];
+
+	hba->mu_status = MU_STATE_RESETTING;
+
+	if (hba->cardtype == st_shasta)
+		stex_hard_reset(hba);
+
+	if (stex_handshake(hba)) {
+		printk(KERN_WARNING DRV_NAME
+			"(%s): resetting: handshake failed\n",
+			pci_name(hba->pdev));
+		return FAILED;
+	}
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	hba->tag = 0;
+	hba->req_head = 0;
+	hba->req_tail = 0;
+	hba->status_head = 0;
+	hba->status_tail = 0;
+	hba->out_req_cnt = 0;
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	return SUCCESS;
+}
+
+static int stex_biosparam(struct scsi_device *sdev,
+	struct block_device *bdev, sector_t capacity, int geom[])
+{
+	int heads = 255, sectors = 63, cylinders;
+
+	if (capacity < 0x200000) {
+		heads = 64;
+		sectors = 32;
+	}
+
+	cylinders = sector_div(capacity, heads * sectors);
+
+	geom[0] = heads;
+	geom[1] = sectors;
+	geom[2] = cylinders;
+
+	return 0;
+}
+
+static struct scsi_host_template driver_template = {
+	.module				= THIS_MODULE,
+	.name				= DRV_NAME,
+	.proc_name			= DRV_NAME,
+	.bios_param			= stex_biosparam,
+	.queuecommand			= stex_queuecommand,
+	.slave_configure		= stex_slave_config,
+	.slave_destroy			= stex_slave_destroy,
+	.eh_abort_handler		= stex_abort,
+	.eh_host_reset_handler		= stex_reset,
+	.can_queue			= ST_CAN_QUEUE,
+	.this_id			= -1,
+	.sg_tablesize			= ST_MAX_SG,
+	.cmd_per_lun			= ST_CMD_PER_LUN,
+};
+
+static int stex_set_dma_mask(struct pci_dev * pdev)
+{
+	int ret;
+	if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)
+		&& !pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))
+		return 0;
+	ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+	if (!ret)
+		ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+	return ret;
+}
+
+static int __devinit
+stex_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct st_hba *hba;
+	struct Scsi_Host *host;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+
+	pci_set_master(pdev);
+
+	host = scsi_host_alloc(&driver_template, sizeof(struct st_hba));
+
+	if (!host) {
+		printk(KERN_ERR DRV_NAME "(%s): scsi_host_alloc failed\n",
+			pci_name(pdev));
+		err = -ENOMEM;
+		goto out_disable;
+	}
+
+	hba = (struct st_hba *)host->hostdata;
+	memset(hba, 0, sizeof(struct st_hba));
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err < 0) {
+		printk(KERN_ERR DRV_NAME "(%s): request regions failed\n",
+			pci_name(pdev));
+		goto out_scsi_host_put;
+	}
+
+	hba->mmio_base = ioremap(pci_resource_start(pdev, 0),
+		pci_resource_len(pdev, 0));
+	if ( !hba->mmio_base) {
+		printk(KERN_ERR DRV_NAME "(%s): memory map failed\n",
+			pci_name(pdev));
+		err = -ENOMEM;
+		goto out_release_regions;
+	}
+
+	err = stex_set_dma_mask(pdev);
+	if (err) {
+		printk(KERN_ERR DRV_NAME "(%s): set dma mask failed\n",
+			pci_name(pdev));
+		goto out_iounmap;
+	}
+
+	hba->dma_mem = dma_alloc_coherent(&pdev->dev,
+		STEX_BUFFER_SIZE, &hba->dma_handle, GFP_KERNEL);
+	if (!hba->dma_mem) {
+		err = -ENOMEM;
+		printk(KERN_ERR DRV_NAME "(%s): dma mem alloc failed\n",
+			pci_name(pdev));
+		goto out_iounmap;
+	}
+
+	hba->status_buffer =
+		(struct status_msg *)(hba->dma_mem + MU_REQ_BUFFER_SIZE);
+	hba->copy_buffer = hba->dma_mem + MU_BUFFER_SIZE;
+	hba->mu_status = MU_STATE_STARTING;
+
+	hba->cardtype = (unsigned int) id->driver_data;
+
+	/* firmware uses id/lun pair for a logical drive, but lun would be
+	   always 0 if CONFIG_SCSI_MULTI_LUN not configured, so we use
+	   channel to map lun here */
+	host->max_channel = ST_MAX_LUN_PER_TARGET - 1;
+	host->max_id = ST_MAX_TARGET_NUM;
+	host->max_lun = 1;
+	host->unique_id = host->host_no;
+	host->max_cmd_len = STEX_CDB_LENGTH;
+
+	hba->host = host;
+	hba->pdev = pdev;
+	init_waitqueue_head(&hba->waitq);
+
+	err = request_irq(pdev->irq, stex_intr, IRQF_SHARED, DRV_NAME, hba);
+	if (err) {
+		printk(KERN_ERR DRV_NAME "(%s): request irq failed\n",
+			pci_name(pdev));
+		goto out_pci_free;
+	}
+
+	err = stex_handshake(hba);
+	if (err)
+		goto out_free_irq;
+
+	pci_set_drvdata(pdev, hba);
+
+	err = scsi_add_host(host, &pdev->dev);
+	if (err) {
+		printk(KERN_ERR DRV_NAME "(%s): scsi_add_host failed\n",
+			pci_name(pdev));
+		goto out_free_irq;
+	}
+
+	scsi_scan_host(host);
+
+	return 0;
+
+out_free_irq:
+	free_irq(pdev->irq, hba);
+out_pci_free:
+	dma_free_coherent(&pdev->dev, STEX_BUFFER_SIZE,
+			  hba->dma_mem, hba->dma_handle);
+out_iounmap:
+	iounmap(hba->mmio_base);
+out_release_regions:
+	pci_release_regions(pdev);
+out_scsi_host_put:
+	scsi_host_put(host);
+out_disable:
+	pci_disable_device(pdev);
+
+	return err;
+}
+
+static void stex_hba_stop(struct st_hba *hba)
+{
+	struct req_msg *req;
+	unsigned long flags;
+	unsigned long before;
+	u16 tag;
+
+	before = jiffies;
+	while ((tag = stex_alloc_tag(hba, (unsigned long *)&hba->tag))
+		== TAG_BITMAP_LENGTH) {
+		if (time_after(jiffies, before + ST_INTERNAL_TIMEOUT * HZ))
+			return;
+		msleep(10);
+	}
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	req = stex_alloc_req(hba);
+	memset(req->cdb, 0, STEX_CDB_LENGTH);
+
+	req->cdb[0] = CONTROLLER_CMD;
+	req->cdb[1] = CTLR_POWER_STATE_CHANGE;
+	req->cdb[2] = CTLR_POWER_SAVING;
+
+	hba->ccb[tag].cmd = NULL;
+	hba->ccb[tag].sg_count = 0;
+	hba->ccb[tag].sense_bufflen = 0;
+	hba->ccb[tag].sense_buffer = NULL;
+	hba->ccb[tag].req_type |= PASSTHRU_REQ_TYPE;
+
+	stex_send_cmd(hba, req, tag);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	wait_event_timeout(hba->waitq,
+		!(hba->ccb[tag].req_type), ST_INTERNAL_TIMEOUT * HZ);
+	if (hba->ccb[tag].req_type & PASSTHRU_REQ_TYPE)
+		return;
+
+	stex_free_tag(hba, (unsigned long *)&hba->tag, tag);
+}
+
+static void stex_hba_free(struct st_hba *hba)
+{
+	free_irq(hba->pdev->irq, hba);
+
+	iounmap(hba->mmio_base);
+
+	pci_release_regions(hba->pdev);
+
+	dma_free_coherent(&hba->pdev->dev, STEX_BUFFER_SIZE,
+			  hba->dma_mem, hba->dma_handle);
+}
+
+static void stex_remove(struct pci_dev *pdev)
+{
+	struct st_hba *hba = pci_get_drvdata(pdev);
+
+	scsi_remove_host(hba->host);
+
+	pci_set_drvdata(pdev, NULL);
+
+	stex_hba_stop(hba);
+
+	stex_hba_free(hba);
+
+	scsi_host_put(hba->host);
+
+	pci_disable_device(pdev);
+}
+
+static void stex_shutdown(struct pci_dev *pdev)
+{
+	struct st_hba *hba = pci_get_drvdata(pdev);
+
+	stex_hba_stop(hba);
+}
+
+static struct pci_device_id stex_pci_tbl[] = {
+	{ 0x105a, 0x8350, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0xc350, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0xf350, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0x4301, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0x4302, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0x8301, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0x8302, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x1725, 0x7250, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_vsc },
+	{ }	/* terminate list */
+};
+MODULE_DEVICE_TABLE(pci, stex_pci_tbl);
+
+static struct pci_driver stex_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= stex_pci_tbl,
+	.probe		= stex_probe,
+	.remove		= __devexit_p(stex_remove),
+	.shutdown	= stex_shutdown,
+};
+
+static int __init stex_init(void)
+{
+	printk(KERN_INFO DRV_NAME
+		": Promise SuperTrak EX Driver version: %s\n",
+		 ST_DRIVER_VERSION);
+
+	return pci_register_driver(&stex_pci_driver);
+}
+
+static void __exit stex_exit(void)
+{
+	pci_unregister_driver(&stex_pci_driver);
+}
+
+module_init(stex_init);
+module_exit(stex_exit);
-- 
GitLab


From cf355883f506051a8ce3ac4539752829320b6c8c Mon Sep 17 00:00:00 2001
From: Ed Lin <ed.lin@promise.com>
Date: Fri, 1 Sep 2006 14:31:51 +0800
Subject: [PATCH 0150/1063] [SCSI] stex: add shared tags from block

Use block shared tags entirely within the driver. In the case of
shutdown, assume that there are no other outstanding commands, so tag
0 is fine.

Signed-off-by: Ed Lin <ed.lin@promise.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/stex.c | 177 ++++++++++++++------------------------------
 1 file changed, 57 insertions(+), 120 deletions(-)

diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
index fd093302bf1a9..15fb99f224ee1 100644
--- a/drivers/scsi/stex.c
+++ b/drivers/scsi/stex.c
@@ -34,6 +34,7 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_host.h>
+#include <scsi/scsi_tcq.h>
 
 #define DRV_NAME "stex"
 #define ST_DRIVER_VERSION "2.9.0.13"
@@ -95,7 +96,6 @@ enum {
 
 	/* request count, etc. */
 	MU_MAX_REQUEST				= 32,
-	TAG_BITMAP_LENGTH			= MU_MAX_REQUEST,
 
 	/* one message wasted, use MU_MAX_REQUEST+1
 		to handle MU_MAX_REQUEST messages */
@@ -265,7 +265,6 @@ struct st_hba {
 	struct Scsi_Host *host;
 	struct pci_dev *pdev;
 
-	u32 tag;
 	u32 req_head;
 	u32 req_tail;
 	u32 status_head;
@@ -309,40 +308,6 @@ static void stex_gettime(__le32 *time)
 	*(time + 1) = cpu_to_le32((tv.tv_sec >> 16) >> 16);
 }
 
-static u16 __stex_alloc_tag(unsigned long *bitmap)
-{
-	int i;
-	i = find_first_zero_bit(bitmap, TAG_BITMAP_LENGTH);
-	if (i < TAG_BITMAP_LENGTH)
-		__set_bit(i, bitmap);
-	return (u16)i;
-}
-
-static u16 stex_alloc_tag(struct st_hba *hba, unsigned long *bitmap)
-{
-	unsigned long flags;
-	u16 tag;
-
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	tag = __stex_alloc_tag(bitmap);
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	return tag;
-}
-
-static void __stex_free_tag(unsigned long *bitmap, u16 tag)
-{
-	__clear_bit((int)tag, bitmap);
-}
-
-static void stex_free_tag(struct st_hba *hba, unsigned long *bitmap, u16 tag)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	__stex_free_tag(bitmap, tag);
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-}
-
 static struct status_msg *stex_get_status(struct st_hba *hba)
 {
 	struct status_msg *status =
@@ -534,58 +499,32 @@ stex_send_cmd(struct st_hba *hba, struct req_msg *req, u16 tag)
 	readl(hba->mmio_base + IDBL); /* flush */
 }
 
+static int
+stex_slave_alloc(struct scsi_device *sdev)
+{
+	/* Cheat: usually extracted from Inquiry data */
+	sdev->tagged_supported = 1;
+
+	scsi_activate_tcq(sdev, sdev->host->can_queue);
+
+	return 0;
+}
+
 static int
 stex_slave_config(struct scsi_device *sdev)
 {
 	sdev->use_10_for_rw = 1;
 	sdev->use_10_for_ms = 1;
 	sdev->timeout = 60 * HZ;
+	sdev->tagged_supported = 1;
+
 	return 0;
 }
 
 static void
 stex_slave_destroy(struct scsi_device *sdev)
 {
-	struct st_hba *hba = (struct st_hba *) sdev->host->hostdata;
-	struct req_msg *req;
-	unsigned long flags;
-	unsigned long before;
-	u16 tag;
-
-	if (sdev->type != TYPE_DISK)
-		return;
-
-	before = jiffies;
-	while ((tag = stex_alloc_tag(hba, (unsigned long *)&hba->tag))
-		== TAG_BITMAP_LENGTH) {
-		if (time_after(jiffies, before + ST_INTERNAL_TIMEOUT * HZ))
-			return;
-		msleep(10);
-	}
-
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	req = stex_alloc_req(hba);
-	memset(req->cdb, 0, STEX_CDB_LENGTH);
-
-	req->target = sdev->id;
-	req->lun = sdev->channel; /* firmware lun issue work around */
-	req->cdb[0] = SYNCHRONIZE_CACHE;
-
-	hba->ccb[tag].cmd = NULL;
-	hba->ccb[tag].sg_count = 0;
-	hba->ccb[tag].sense_bufflen = 0;
-	hba->ccb[tag].sense_buffer = NULL;
-	hba->ccb[tag].req_type |= PASSTHRU_REQ_TYPE;
-
-	stex_send_cmd(hba, req, tag);
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-
-	wait_event_timeout(hba->waitq,
-		!(hba->ccb[tag].req_type), ST_INTERNAL_TIMEOUT * HZ);
-	if (hba->ccb[tag].req_type & PASSTHRU_REQ_TYPE)
-		return;
-
-	stex_free_tag(hba, (unsigned long *)&hba->tag, tag);
+	scsi_deactivate_tcq(sdev, 1);
 }
 
 static int
@@ -650,8 +589,9 @@ stex_queuecommand(struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *))
 
 	cmd->scsi_done = done;
 
-	if (unlikely((tag = __stex_alloc_tag((unsigned long *)&hba->tag))
-		== TAG_BITMAP_LENGTH))
+	tag = cmd->request->tag;
+
+	if (unlikely(tag >= host->can_queue))
 		return SCSI_MLQUEUE_HOST_BUSY;
 
 	req = stex_alloc_req(hba);
@@ -771,26 +711,18 @@ static void stex_mu_intr(struct st_hba *hba, u32 doorbell)
 	while (hba->status_tail != hba->status_head) {
 		resp = stex_get_status(hba);
 		tag = le16_to_cpu(resp->tag);
-		if (unlikely(tag >= TAG_BITMAP_LENGTH)) {
+		if (unlikely(tag >= hba->host->can_queue)) {
 			printk(KERN_WARNING DRV_NAME
 				"(%s): invalid tag\n", pci_name(hba->pdev));
 			continue;
 		}
-		if (unlikely((hba->tag & (1 << tag)) == 0)) {
-			printk(KERN_WARNING DRV_NAME
-				"(%s): null tag\n", pci_name(hba->pdev));
-			continue;
-		}
 
-		hba->out_req_cnt--;
 		ccb = &hba->ccb[tag];
 		if (hba->wait_ccb == ccb)
 			hba->wait_ccb = NULL;
 		if (unlikely(ccb->req == NULL)) {
 			printk(KERN_WARNING DRV_NAME
 				"(%s): lagging req\n", pci_name(hba->pdev));
-			__stex_free_tag((unsigned long *)&hba->tag, tag);
-			stex_unmap_sg(hba, ccb->cmd); /* ??? */
 			continue;
 		}
 
@@ -808,7 +740,15 @@ static void stex_mu_intr(struct st_hba *hba, u32 doorbell)
 		ccb->srb_status = resp->srb_status;
 		ccb->scsi_status = resp->scsi_status;
 
-		if (ccb->req_type & PASSTHRU_REQ_TYPE) {
+		if (likely(ccb->cmd != NULL)) {
+			if (unlikely(ccb->cmd->cmnd[0] == PASSTHRU_CMD &&
+				ccb->cmd->cmnd[1] == PASSTHRU_GET_ADAPTER))
+				stex_controller_info(hba, ccb);
+			stex_unmap_sg(hba, ccb->cmd);
+			stex_scsi_done(ccb);
+			hba->out_req_cnt--;
+		} else if (ccb->req_type & PASSTHRU_REQ_TYPE) {
+			hba->out_req_cnt--;
 			if (ccb->req_type & PASSTHRU_REQ_NO_WAKEUP) {
 				ccb->req_type = 0;
 				continue;
@@ -816,14 +756,7 @@ static void stex_mu_intr(struct st_hba *hba, u32 doorbell)
 			ccb->req_type = 0;
 			if (waitqueue_active(&hba->waitq))
 				wake_up(&hba->waitq);
-			continue;
 		}
-		if (ccb->cmd->cmnd[0] == PASSTHRU_CMD &&
-			ccb->cmd->cmnd[1] == PASSTHRU_GET_ADAPTER)
-			stex_controller_info(hba, ccb);
-		__stex_free_tag((unsigned long *)&hba->tag, tag);
-		stex_unmap_sg(hba, ccb->cmd);
-		stex_scsi_done(ccb);
 	}
 
 update_status:
@@ -933,21 +866,24 @@ static int stex_abort(struct scsi_cmnd *cmd)
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct st_hba *hba = (struct st_hba *)host->hostdata;
-	u16 tag;
+	u16 tag = cmd->request->tag;
 	void __iomem *base;
 	u32 data;
 	int result = SUCCESS;
 	unsigned long flags;
 	base = hba->mmio_base;
 	spin_lock_irqsave(host->host_lock, flags);
-
-	for (tag = 0; tag < MU_MAX_REQUEST; tag++)
-		if (hba->ccb[tag].cmd == cmd && (hba->tag & (1 << tag))) {
-			hba->wait_ccb = &(hba->ccb[tag]);
-			break;
-		}
-	if (tag >= MU_MAX_REQUEST)
-		goto out;
+	if (tag < host->can_queue && hba->ccb[tag].cmd == cmd)
+		hba->wait_ccb = &hba->ccb[tag];
+	else {
+		for (tag = 0; tag < host->can_queue; tag++)
+			if (hba->ccb[tag].cmd == cmd) {
+				hba->wait_ccb = &hba->ccb[tag];
+				break;
+			}
+		if (tag >= host->can_queue)
+			goto out;
+	}
 
 	data = readl(base + ODBL);
 	if (data == 0 || data == 0xffffffff)
@@ -965,6 +901,7 @@ static int stex_abort(struct scsi_cmnd *cmd)
 	}
 
 fail_out:
+	stex_unmap_sg(hba, cmd);
 	hba->wait_ccb->req = NULL; /* nullify the req's future return */
 	hba->wait_ccb = NULL;
 	result = FAILED;
@@ -1025,7 +962,6 @@ static int stex_reset(struct scsi_cmnd *cmd)
 		return FAILED;
 	}
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	hba->tag = 0;
 	hba->req_head = 0;
 	hba->req_tail = 0;
 	hba->status_head = 0;
@@ -1061,6 +997,7 @@ static struct scsi_host_template driver_template = {
 	.proc_name			= DRV_NAME,
 	.bios_param			= stex_biosparam,
 	.queuecommand			= stex_queuecommand,
+	.slave_alloc			= stex_slave_alloc,
 	.slave_configure		= stex_slave_config,
 	.slave_destroy			= stex_slave_destroy,
 	.eh_abort_handler		= stex_abort,
@@ -1171,6 +1108,14 @@ stex_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (err)
 		goto out_free_irq;
 
+	scsi_init_shared_tag_map(host, ST_CAN_QUEUE);
+	if (host->bqt == NULL) {
+		err = -ENOMEM;
+		printk(KERN_ERR DRV_NAME "(%s): init shared queue failed\n",
+			pci_name(pdev));
+		goto out_free_irq;
+	}
+
 	pci_set_drvdata(pdev, hba);
 
 	err = scsi_add_host(host, &pdev->dev);
@@ -1206,15 +1151,7 @@ static void stex_hba_stop(struct st_hba *hba)
 	struct req_msg *req;
 	unsigned long flags;
 	unsigned long before;
-	u16 tag;
-
-	before = jiffies;
-	while ((tag = stex_alloc_tag(hba, (unsigned long *)&hba->tag))
-		== TAG_BITMAP_LENGTH) {
-		if (time_after(jiffies, before + ST_INTERNAL_TIMEOUT * HZ))
-			return;
-		msleep(10);
-	}
+	u16 tag = 0;
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	req = stex_alloc_req(hba);
@@ -1233,12 +1170,12 @@ static void stex_hba_stop(struct st_hba *hba)
 	stex_send_cmd(hba, req, tag);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
-	wait_event_timeout(hba->waitq,
-		!(hba->ccb[tag].req_type), ST_INTERNAL_TIMEOUT * HZ);
-	if (hba->ccb[tag].req_type & PASSTHRU_REQ_TYPE)
-		return;
-
-	stex_free_tag(hba, (unsigned long *)&hba->tag, tag);
+	before = jiffies;
+	while (hba->ccb[tag].req_type & PASSTHRU_REQ_TYPE) {
+		if (time_after(jiffies, before + ST_INTERNAL_TIMEOUT * HZ))
+			return;
+		msleep(10);
+	}
 }
 
 static void stex_hba_free(struct st_hba *hba)
-- 
GitLab


From deb81d80ba27da8dfabc29ccb5977db8f4942a0a Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Fri, 1 Sep 2006 09:28:48 -0400
Subject: [PATCH 0151/1063] [SCSI] add failure return to
 scsi_init_shared_tag_map()

And use it in the stex driver.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/stex.c     | 5 ++---
 include/scsi/scsi_tcq.h | 3 ++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
index 15fb99f224ee1..3cf3106a29b8e 100644
--- a/drivers/scsi/stex.c
+++ b/drivers/scsi/stex.c
@@ -1108,9 +1108,8 @@ stex_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (err)
 		goto out_free_irq;
 
-	scsi_init_shared_tag_map(host, ST_CAN_QUEUE);
-	if (host->bqt == NULL) {
-		err = -ENOMEM;
+	err = scsi_init_shared_tag_map(host, ST_CAN_QUEUE);
+	if (err) {
 		printk(KERN_ERR DRV_NAME "(%s): init shared queue failed\n",
 			pci_name(pdev));
 		goto out_free_irq;
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index 4eea254b1ce9e..d04d05adfa9b6 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -138,9 +138,10 @@ static inline struct scsi_cmnd *scsi_find_tag(struct scsi_device *sdev, int tag)
  * @shost:	the host to share the tag map among all devices
  * @depth:	the total depth of the map
  */
-static inline void scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
+static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
 {
 	shost->bqt = blk_init_tags(depth);
+	return shost->bqt ? 0 : -ENOMEM;
 }
 
 #endif /* _SCSI_SCSI_TCQ_H */
-- 
GitLab


From 84314fd4740ad73550c76dee4a9578979d84af48 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 18 Aug 2006 17:30:09 -0400
Subject: [PATCH 0152/1063] [SCSI] SCSI and FC Transport: add netlink support
 for posting of transport events

This patch formally adds support for the posting of FC events via netlink.
It is a followup to the original RFC at:
  http://marc.theaimsgroup.com/?l=linux-scsi&m=114530667923464&w=2
and the initial posting at:
  http://marc.theaimsgroup.com/?l=linux-scsi&m=115507374832500&w=2

The patch has been updated to optimize the send path, per the discussions
in the initial posting.

Per discussions at the Storage Summit and at OLS, we are to use netlink for
async events from transports. Also per discussions, to avoid a netlink
protocol per transport, I've create a single NETLINK_SCSITRANSPORT protocol,
which can then be used by all transports.

This patch:
- Creates new files scsi_netlink.c and scsi_netlink.h, which contains the
  single and shared definitions for the SCSI Transport. It is tied into the
  base SCSI subsystem intialization.
  Contains a single interface routine, scsi_send_transport_event(), for a
  transport to send an event (via multicast to a protocol specific group).
- Creates a new scsi_netlink_fc.h file, which contains the FC netlink event
  messages
- Adds 3 new routines to the fc transport:
   fc_get_event_number() -  to get a FC event #
   fc_host_post_event()  -  to send a simple FC event (32 bits of data)
   fc_host_post_vendor_event() - to send a Vendor unique event, with
                                 arbitrary amounts of data.

   Note: the separation of event number allows for a LLD to send a standard
     event, followed by vendor-specific data for the event.

Note: This patch assumes 2 prior fc transport patches have been installed:
   http://marc.theaimsgroup.com/?l=linux-scsi&m=115555807316329&w=2
   http://marc.theaimsgroup.com/?l=linux-scsi&m=115581614930261&w=2

   Sorry - next time I'll do something like making these individual
   patches of the same posting when I know they'll be posted closely
   together.

Signed-off-by: James Smart <James.Smart@emulex.com>

Tidy up configuration not to make SCSI always select NET

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/Kconfig             |   6 +
 drivers/scsi/Makefile            |   1 +
 drivers/scsi/scsi.c              |   3 +
 drivers/scsi/scsi_netlink.c      | 199 ++++++++++++++++++++++++++++++
 drivers/scsi/scsi_priv.h         |  11 ++
 drivers/scsi/scsi_transport_fc.c | 200 ++++++++++++++++++++++++++++++-
 include/linux/netlink.h          |   2 +
 include/scsi/scsi_netlink.h      |  86 +++++++++++++
 include/scsi/scsi_netlink_fc.h   |  71 +++++++++++
 include/scsi/scsi_transport_fc.h |  34 ++++++
 10 files changed, 612 insertions(+), 1 deletion(-)
 create mode 100644 drivers/scsi/scsi_netlink.c
 create mode 100644 include/scsi/scsi_netlink.h
 create mode 100644 include/scsi/scsi_netlink_fc.h

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index c8c606589ea63..4d1998d23f0f7 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -27,6 +27,11 @@ config SCSI
 	  However, do not compile this as a module if your root file system
 	  (the one containing the directory /) is located on a SCSI device.
 
+config SCSI_NETLINK
+	tristate
+	default	n
+	select NET
+
 config SCSI_PROC_FS
 	bool "legacy /proc/scsi/ support"
 	depends on SCSI && PROC_FS
@@ -222,6 +227,7 @@ config SCSI_SPI_ATTRS
 config SCSI_FC_ATTRS
 	tristate "FiberChannel Transport Attributes"
 	depends on SCSI
+	select SCSI_NETLINK
 	help
 	  If you wish to export transport-specific information about
 	  each attached FiberChannel device to sysfs, say Y.
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index fd9aeb1ba07f8..8fc2c594b537b 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -159,6 +159,7 @@ scsi_mod-y			+= scsi.o hosts.o scsi_ioctl.o constants.o \
 				   scsicam.o scsi_error.o scsi_lib.o \
 				   scsi_scan.o scsi_sysfs.o \
 				   scsi_devinfo.o
+scsi_mod-$(CONFIG_SCSI_NETLINK)	+= scsi_netlink.o
 scsi_mod-$(CONFIG_SYSCTL)	+= scsi_sysctl.o
 scsi_mod-$(CONFIG_SCSI_PROC_FS)	+= scsi_proc.o
 
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 37843927e47ff..eedfd059b82b8 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -1118,6 +1118,8 @@ static int __init init_scsi(void)
 	for_each_possible_cpu(i)
 		INIT_LIST_HEAD(&per_cpu(scsi_done_q, i));
 
+	scsi_netlink_init();
+
 	printk(KERN_NOTICE "SCSI subsystem initialized\n");
 	return 0;
 
@@ -1138,6 +1140,7 @@ static int __init init_scsi(void)
 
 static void __exit exit_scsi(void)
 {
+	scsi_netlink_exit();
 	scsi_sysfs_unregister();
 	scsi_exit_sysctl();
 	scsi_exit_hosts();
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
new file mode 100644
index 0000000000000..1b59b27e887fd
--- /dev/null
+++ b/drivers/scsi/scsi_netlink.c
@@ -0,0 +1,199 @@
+/*
+ *  scsi_netlink.c  - SCSI Transport Netlink Interface
+ *
+ *  Copyright (C) 2006   James Smart, Emulex Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#include <linux/time.h>
+#include <linux/jiffies.h>
+#include <linux/security.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+
+#include <scsi/scsi_netlink.h>
+#include "scsi_priv.h"
+
+struct sock *scsi_nl_sock = NULL;
+EXPORT_SYMBOL_GPL(scsi_nl_sock);
+
+
+/**
+ * scsi_nl_rcv_msg -
+ *    Receive message handler. Extracts message from a receive buffer.
+ *    Validates message header and calls appropriate transport message handler
+ *
+ * @skb:		socket receive buffer
+ *
+ **/
+static void
+scsi_nl_rcv_msg(struct sk_buff *skb)
+{
+	struct nlmsghdr *nlh;
+	struct scsi_nl_hdr *hdr;
+	uint32_t rlen;
+	int err;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		err = 0;
+
+		nlh = (struct nlmsghdr *) skb->data;
+		if ((nlh->nlmsg_len < (sizeof(*nlh) + sizeof(*hdr))) ||
+		    (skb->len < nlh->nlmsg_len)) {
+			printk(KERN_WARNING "%s: discarding partial skb\n",
+				 __FUNCTION__);
+			return;
+		}
+
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+
+		if (nlh->nlmsg_type != SCSI_TRANSPORT_MSG) {
+			err = -EBADMSG;
+			goto next_msg;
+		}
+
+		hdr = NLMSG_DATA(nlh);
+		if ((hdr->version != SCSI_NL_VERSION) ||
+		    (hdr->magic != SCSI_NL_MAGIC)) {
+			err = -EPROTOTYPE;
+			goto next_msg;
+		}
+
+		if (security_netlink_recv(skb, CAP_SYS_ADMIN)) {
+			err = -EPERM;
+			goto next_msg;
+		}
+
+		if (nlh->nlmsg_len < (sizeof(*nlh) + hdr->msglen)) {
+			printk(KERN_WARNING "%s: discarding partial message\n",
+				 __FUNCTION__);
+			return;
+		}
+
+		/*
+		 * We currently don't support anyone sending us a message
+		 */
+
+next_msg:
+		if ((err) || (nlh->nlmsg_flags & NLM_F_ACK))
+			netlink_ack(skb, nlh, err);
+
+		skb_pull(skb, rlen);
+	}
+}
+
+
+/**
+ * scsi_nl_rcv_msg -
+ *    Receive handler for a socket. Extracts a received message buffer from
+ *    the socket, and starts message processing.
+ *
+ * @sk:		socket
+ * @len:	unused
+ *
+ **/
+static void
+scsi_nl_rcv(struct sock *sk, int len)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
+		scsi_nl_rcv_msg(skb);
+		kfree_skb(skb);
+	}
+}
+
+
+/**
+ * scsi_nl_rcv_event -
+ *    Event handler for a netlink socket.
+ *
+ * @this:		event notifier block
+ * @event:		event type
+ * @ptr:		event payload
+ *
+ **/
+static int
+scsi_nl_rcv_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct netlink_notify *n = ptr;
+
+	if (n->protocol != NETLINK_SCSITRANSPORT)
+		return NOTIFY_DONE;
+
+	/*
+	 * Currently, we are not tracking PID's, etc. There is nothing
+	 * to handle.
+	 */
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block scsi_netlink_notifier = {
+	.notifier_call  = scsi_nl_rcv_event,
+};
+
+
+/**
+ * scsi_netlink_init -
+ *    Called by SCSI subsystem to intialize the SCSI transport netlink
+ *    interface
+ *
+ **/
+void
+scsi_netlink_init(void)
+{
+	int error;
+
+	error = netlink_register_notifier(&scsi_netlink_notifier);
+	if (error) {
+		printk(KERN_ERR "%s: register of event handler failed - %d\n",
+				__FUNCTION__, error);
+		return;
+	}
+
+	scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT,
+				SCSI_NL_GRP_CNT, scsi_nl_rcv, THIS_MODULE);
+	if (!scsi_nl_sock) {
+		printk(KERN_ERR "%s: register of recieve handler failed\n",
+				__FUNCTION__);
+		netlink_unregister_notifier(&scsi_netlink_notifier);
+	}
+
+	return;
+}
+
+
+/**
+ * scsi_netlink_exit -
+ *    Called by SCSI subsystem to disable the SCSI transport netlink
+ *    interface
+ *
+ **/
+void
+scsi_netlink_exit(void)
+{
+	if (scsi_nl_sock) {
+		sock_release(scsi_nl_sock->sk_socket);
+		netlink_unregister_notifier(&scsi_netlink_notifier);
+	}
+
+	return;
+}
+
+
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index ae24c85aaeea2..5d023d44e5e7f 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -8,6 +8,7 @@ struct scsi_cmnd;
 struct scsi_device;
 struct scsi_host_template;
 struct Scsi_Host;
+struct scsi_nl_hdr;
 
 
 /*
@@ -110,6 +111,16 @@ extern void __scsi_remove_device(struct scsi_device *);
 
 extern struct bus_type scsi_bus_type;
 
+/* scsi_netlink.c */
+#ifdef CONFIG_SCSI_NETLINK
+extern void scsi_netlink_init(void);
+extern void scsi_netlink_exit(void);
+extern struct sock *scsi_nl_sock;
+#else
+static inline void scsi_netlink_init(void) {}
+static inline void scsi_netlink_exit(void) {}
+#endif
+
 /* 
  * internal scsi timeout functions: for use by mid-layer and transport
  * classes.
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 79d31ca2b7416..05989f1305543 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -32,6 +32,9 @@
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/scsi_cmnd.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
+#include <scsi/scsi_netlink_fc.h>
 #include "scsi_priv.h"
 
 static int fc_queue_work(struct Scsi_Host *, struct work_struct *);
@@ -93,6 +96,29 @@ fc_enum_name_search(port_type, fc_port_type, fc_port_type_names)
 #define FC_PORTTYPE_MAX_NAMELEN		50
 
 
+/* Convert fc_host_event_code values to ascii string name */
+static const struct {
+	enum fc_host_event_code		value;
+	char				*name;
+} fc_host_event_code_names[] = {
+	{ FCH_EVT_LIP,			"lip" },
+	{ FCH_EVT_LINKUP,		"link_up" },
+	{ FCH_EVT_LINKDOWN,		"link_down" },
+	{ FCH_EVT_LIPRESET,		"lip_reset" },
+	{ FCH_EVT_RSCN,			"rscn" },
+	{ FCH_EVT_ADAPTER_CHANGE,	"adapter_chg" },
+	{ FCH_EVT_PORT_UNKNOWN,		"port_unknown" },
+	{ FCH_EVT_PORT_ONLINE,		"port_online" },
+	{ FCH_EVT_PORT_OFFLINE,		"port_offline" },
+	{ FCH_EVT_PORT_FABRIC,		"port_fabric" },
+	{ FCH_EVT_LINK_UNKNOWN,		"link_unknown" },
+	{ FCH_EVT_VENDOR_UNIQUE,	"vendor_unique" },
+};
+fc_enum_name_search(host_event_code, fc_host_event_code,
+		fc_host_event_code_names)
+#define FC_HOST_EVENT_CODE_MAX_NAMELEN	30
+
+
 /* Convert fc_port_state values to ascii string name */
 static struct {
 	enum fc_port_state	value;
@@ -377,10 +403,182 @@ MODULE_PARM_DESC(dev_loss_tmo,
 		 " exceeded, the scsi target is removed. Value should be"
 		 " between 1 and SCSI_DEVICE_BLOCK_MAX_TIMEOUT.");
 
+/**
+ * Netlink Infrastructure
+ **/
+
+static atomic_t fc_event_seq;
+
+/**
+ * fc_get_event_number - Obtain the next sequential FC event number
+ *
+ * Notes:
+ *   We could have inline'd this, but it would have required fc_event_seq to
+ *   be exposed. For now, live with the subroutine call.
+ *   Atomic used to avoid lock/unlock...
+ **/
+u32
+fc_get_event_number(void)
+{
+	return atomic_add_return(1, &fc_event_seq);
+}
+EXPORT_SYMBOL(fc_get_event_number);
+
+
+/**
+ * fc_host_post_event - called to post an even on an fc_host.
+ *
+ * @shost:		host the event occurred on
+ * @event_number:	fc event number obtained from get_fc_event_number()
+ * @event_code:		fc_host event being posted
+ * @event_data:		32bits of data for the event being posted
+ *
+ * Notes:
+ *	This routine assumes no locks are held on entry.
+ **/
+void
+fc_host_post_event(struct Scsi_Host *shost, u32 event_number,
+		enum fc_host_event_code event_code, u32 event_data)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr	*nlh;
+	struct fc_nl_event *event;
+	const char *name;
+	u32 len, skblen;
+	int err;
+
+	if (!scsi_nl_sock) {
+		err = -ENOENT;
+		goto send_fail;
+	}
+
+	len = FC_NL_MSGALIGN(sizeof(*event));
+	skblen = NLMSG_SPACE(len);
+
+	skb = alloc_skb(skblen, GFP_KERNEL);
+	if (!skb) {
+		err = -ENOBUFS;
+		goto send_fail;
+	}
+
+	nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG,
+				skblen - sizeof(*nlh), 0);
+	if (!nlh) {
+		err = -ENOBUFS;
+		goto send_fail_skb;
+	}
+	event = NLMSG_DATA(nlh);
+
+	INIT_SCSI_NL_HDR(&event->snlh, SCSI_NL_TRANSPORT_FC,
+				FC_NL_ASYNC_EVENT, len);
+	event->seconds = get_seconds();
+	event->vendor_id = 0;
+	event->host_no = shost->host_no;
+	event->event_datalen = sizeof(u32);	/* bytes */
+	event->event_num = event_number;
+	event->event_code = event_code;
+	event->event_data = event_data;
+
+	err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS);
+	if (err && (err != -ESRCH))	/* filter no recipient errors */
+		/* nlmsg_multicast already kfree_skb'd */
+		goto send_fail;
+
+	return;
+
+send_fail_skb:
+	kfree_skb(skb);
+send_fail:
+	name = get_fc_host_event_code_name(event_code);
+	printk(KERN_WARNING
+		"%s: Dropped Event : host %d %s data 0x%08x - err %d\n",
+		__FUNCTION__, shost->host_no,
+		(name) ? name : "<unknown>", event_data, err);
+	return;
+}
+EXPORT_SYMBOL(fc_host_post_event);
+
+
+/**
+ * fc_host_post_vendor_event - called to post a vendor unique event on
+ *                             a fc_host
+ *
+ * @shost:		host the event occurred on
+ * @event_number:	fc event number obtained from get_fc_event_number()
+ * @data_len:		amount, in bytes, of vendor unique data
+ * @data_buf:		pointer to vendor unique data
+ *
+ * Notes:
+ *	This routine assumes no locks are held on entry.
+ **/
+void
+fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number,
+		u32 data_len, char * data_buf, u32 vendor_id)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr	*nlh;
+	struct fc_nl_event *event;
+	u32 len, skblen;
+	int err;
+
+	if (!scsi_nl_sock) {
+		err = -ENOENT;
+		goto send_vendor_fail;
+	}
+
+	len = FC_NL_MSGALIGN(sizeof(*event) + data_len);
+	skblen = NLMSG_SPACE(len);
+
+	skb = alloc_skb(skblen, GFP_KERNEL);
+	if (!skb) {
+		err = -ENOBUFS;
+		goto send_vendor_fail;
+	}
+
+	nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG,
+				skblen - sizeof(*nlh), 0);
+	if (!nlh) {
+		err = -ENOBUFS;
+		goto send_vendor_fail_skb;
+	}
+	event = NLMSG_DATA(nlh);
+
+	INIT_SCSI_NL_HDR(&event->snlh, SCSI_NL_TRANSPORT_FC,
+				FC_NL_ASYNC_EVENT, len);
+	event->seconds = get_seconds();
+	event->vendor_id = vendor_id;
+	event->host_no = shost->host_no;
+	event->event_datalen = data_len;	/* bytes */
+	event->event_num = event_number;
+	event->event_code = FCH_EVT_VENDOR_UNIQUE;
+	memcpy(&event->event_data, data_buf, data_len);
+
+	err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS);
+	if (err && (err != -ESRCH))	/* filter no recipient errors */
+		/* nlmsg_multicast already kfree_skb'd */
+		goto send_vendor_fail;
+
+	return;
+
+send_vendor_fail_skb:
+	kfree_skb(skb);
+send_vendor_fail:
+	printk(KERN_WARNING
+		"%s: Dropped Event : host %d vendor_unique - err %d\n",
+		__FUNCTION__, shost->host_no, err);
+	return;
+}
+EXPORT_SYMBOL(fc_host_post_vendor_event);
+
+
 
 static __init int fc_transport_init(void)
 {
-	int error = transport_class_register(&fc_host_class);
+	int error;
+
+	atomic_set(&fc_event_seq, 0);
+
+	error = transport_class_register(&fc_host_class);
 	if (error)
 		return error;
 	error = transport_class_register(&fc_rport_class);
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 855b44668caae..66411622e06eb 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -21,6 +21,8 @@
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
 #define NETLINK_KOBJECT_UEVENT	15	/* Kernel messages to userspace */
 #define NETLINK_GENERIC		16
+/* leave room for NETLINK_DM (DM Events) */
+#define NETLINK_SCSITRANSPORT	18	/* SCSI Transports */
 
 #define MAX_LINKS 32		
 
diff --git a/include/scsi/scsi_netlink.h b/include/scsi/scsi_netlink.h
new file mode 100644
index 0000000000000..7a3a20e640c0b
--- /dev/null
+++ b/include/scsi/scsi_netlink.h
@@ -0,0 +1,86 @@
+/*
+ *  SCSI Transport Netlink Interface
+ *    Used for the posting of outbound SCSI transport events
+ *
+ *  Copyright (C) 2006   James Smart, Emulex Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#ifndef SCSI_NETLINK_H
+#define SCSI_NETLINK_H
+
+/*
+ * This file intended to be included by both kernel and user space
+ */
+
+/* Single Netlink Message type to send all SCSI Transport messages */
+#define SCSI_TRANSPORT_MSG		NLMSG_MIN_TYPE + 1
+
+/* SCSI Transport Broadcast Groups */
+	/* leaving groups 0 and 1 unassigned */
+#define SCSI_NL_GRP_FC_EVENTS		(1<<2)		/* Group 2 */
+#define SCSI_NL_GRP_CNT			3
+
+
+/* SCSI_TRANSPORT_MSG event message header */
+struct scsi_nl_hdr {
+	uint8_t version;
+	uint8_t transport;
+	uint16_t magic;
+	uint16_t msgtype;
+	uint16_t msglen;
+} __attribute__((aligned(sizeof(uint64_t))));
+
+/* scsi_nl_hdr->version value */
+#define SCSI_NL_VERSION				1
+
+/* scsi_nl_hdr->magic value */
+#define SCSI_NL_MAGIC				0xA1B2
+
+/* scsi_nl_hdr->transport value */
+#define SCSI_NL_TRANSPORT			0
+#define SCSI_NL_TRANSPORT_FC			1
+#define SCSI_NL_MAX_TRANSPORTS			2
+
+/* scsi_nl_hdr->msgtype values are defined in each transport */
+
+
+/*
+ * Vendor ID:
+ *   If transports post vendor-unique events, they must pass a well-known
+ *   32-bit vendor identifier. This identifier consists of 8 bits indicating
+ *   the "type" of identifier contained, and 24 bits of id data.
+ *
+ *   Identifiers for each type:
+ *    PCI :  ID data is the 16 bit PCI Registered Vendor ID
+ */
+#define SCSI_NL_VID_ID_MASK			0x00FFFFFF
+#define SCSI_NL_VID_TYPE_MASK			0xFF000000
+#define SCSI_NL_VID_TYPE_PCI			0x01000000
+
+
+#define INIT_SCSI_NL_HDR(hdr, t, mtype, mlen)			\
+	{							\
+	(hdr)->version = SCSI_NL_VERSION;			\
+	(hdr)->transport = t;					\
+	(hdr)->magic = SCSI_NL_MAGIC;				\
+	(hdr)->msgtype = mtype;					\
+	(hdr)->msglen = mlen;					\
+	}
+
+
+#endif /* SCSI_NETLINK_H */
+
diff --git a/include/scsi/scsi_netlink_fc.h b/include/scsi/scsi_netlink_fc.h
new file mode 100644
index 0000000000000..b213d2909fed4
--- /dev/null
+++ b/include/scsi/scsi_netlink_fc.h
@@ -0,0 +1,71 @@
+/*
+ *  FC Transport Netlink Interface
+ *
+ *  Copyright (C) 2006   James Smart, Emulex Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#ifndef SCSI_NETLINK_FC_H
+#define SCSI_NETLINK_FC_H
+
+#include <scsi/scsi_netlink.h>
+
+/*
+ * This file intended to be included by both kernel and user space
+ */
+
+/*
+ * FC Transport Message Types
+ */
+	/* kernel -> user */
+#define FC_NL_ASYNC_EVENT			0x0100
+	/* user -> kernel */
+/* none */
+
+
+/*
+ * Message Structures :
+ */
+
+/* macro to round up message lengths to 8byte boundary */
+#define FC_NL_MSGALIGN(len)		(((len) + 7) & ~7)
+
+
+/*
+ * FC Transport Broadcast Event Message :
+ *   FC_NL_ASYNC_EVENT
+ *
+ * Note: if Vendor Unique message, &event_data will be  start of
+ * 	 vendor unique payload, and the length of the payload is
+ *       per event_datalen
+ *
+ * Note: When specifying vendor_id, be sure to read the Vendor Type and ID
+ *   formatting requirements specified in scsi_netlink.h
+ */
+struct fc_nl_event {
+	struct scsi_nl_hdr snlh;		/* must be 1st element ! */
+	uint64_t seconds;
+	uint32_t vendor_id;
+	uint16_t host_no;
+	uint16_t event_datalen;
+	uint32_t event_num;
+	uint32_t event_code;
+	uint32_t event_data;
+} __attribute__((aligned(sizeof(uint64_t))));
+
+
+#endif /* SCSI_NETLINK_FC_H */
+
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index c74be5dabfebb..f91c5358af3a9 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -29,6 +29,7 @@
 
 #include <linux/sched.h>
 #include <scsi/scsi.h>
+#include <scsi/scsi_netlink.h>
 
 struct scsi_transport_template;
 
@@ -283,6 +284,30 @@ struct fc_host_statistics {
 };
 
 
+/*
+ * FC Event Codes - Polled and Async, following FC HBAAPI v2.0 guidelines
+ */
+
+/*
+ * fc_host_event_code: If you alter this, you also need to alter
+ * scsi_transport_fc.c (for the ascii descriptions).
+ */
+enum fc_host_event_code  {
+	FCH_EVT_LIP			= 0x1,
+	FCH_EVT_LINKUP			= 0x2,
+	FCH_EVT_LINKDOWN		= 0x3,
+	FCH_EVT_LIPRESET		= 0x4,
+	FCH_EVT_RSCN			= 0x5,
+	FCH_EVT_ADAPTER_CHANGE		= 0x103,
+	FCH_EVT_PORT_UNKNOWN		= 0x200,
+	FCH_EVT_PORT_OFFLINE		= 0x201,
+	FCH_EVT_PORT_ONLINE		= 0x202,
+	FCH_EVT_PORT_FABRIC		= 0x204,
+	FCH_EVT_LINK_UNKNOWN		= 0x500,
+	FCH_EVT_VENDOR_UNIQUE		= 0xffff,
+};
+
+
 /*
  * FC Local Port (Host) Attributes
  *
@@ -526,5 +551,14 @@ struct fc_rport *fc_remote_port_add(struct Scsi_Host *shost,
 void fc_remote_port_delete(struct fc_rport  *rport);
 void fc_remote_port_rolechg(struct fc_rport  *rport, u32 roles);
 int scsi_is_fc_rport(const struct device *);
+u32 fc_get_event_number(void);
+void fc_host_post_event(struct Scsi_Host *shost, u32 event_number,
+		enum fc_host_event_code event_code, u32 event_data);
+void fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number,
+		u32 data_len, char * data_buf, u32 vendor_id);
+	/* Note: when specifying vendor_id to fc_host_post_vendor_event()
+	 *   be sure to read the Vendor Type and ID formatting requirements
+	 *   specified in scsi_netlink.h
+	 */
 
 #endif /* SCSI_TRANSPORT_FC_H */
-- 
GitLab


From f14e2e29cdd07f80de6dec168dc2bb39de37eec3 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Tue, 22 Aug 2006 09:55:23 -0400
Subject: [PATCH 0153/1063] [SCSI] SCSI & FC transport: extend event vendor
 id's to 64bits

During discussions with Mike Christie, I became convinced that we needed
a larger vendor id. This patch extends the id from 32 to 64 bits.

This applies on top of the prior patches that add SCSI transport events
via netlink.

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_transport_fc.c | 2 +-
 include/scsi/scsi_netlink.h      | 7 ++++---
 include/scsi/scsi_netlink_fc.h   | 2 +-
 include/scsi/scsi_transport_fc.h | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 05989f1305543..293188cbff8c0 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -513,7 +513,7 @@ EXPORT_SYMBOL(fc_host_post_event);
  **/
 void
 fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number,
-		u32 data_len, char * data_buf, u32 vendor_id)
+		u32 data_len, char * data_buf, u64 vendor_id)
 {
 	struct sk_buff *skb;
 	struct nlmsghdr	*nlh;
diff --git a/include/scsi/scsi_netlink.h b/include/scsi/scsi_netlink.h
index 7a3a20e640c0b..8c1470cc82096 100644
--- a/include/scsi/scsi_netlink.h
+++ b/include/scsi/scsi_netlink.h
@@ -67,9 +67,10 @@ struct scsi_nl_hdr {
  *   Identifiers for each type:
  *    PCI :  ID data is the 16 bit PCI Registered Vendor ID
  */
-#define SCSI_NL_VID_ID_MASK			0x00FFFFFF
-#define SCSI_NL_VID_TYPE_MASK			0xFF000000
-#define SCSI_NL_VID_TYPE_PCI			0x01000000
+#define SCSI_NL_VID_TYPE_SHIFT		56
+#define SCSI_NL_VID_TYPE_MASK		((u64)0xFF << SCSI_NL_VID_TYPE_SHIFT)
+#define SCSI_NL_VID_TYPE_PCI		((u64)0x01 << SCSI_NL_VID_TYPE_SHIFT)
+#define SCSI_NL_VID_ID_MASK		(~ SCSI_NL_VID_TYPE_MASK)
 
 
 #define INIT_SCSI_NL_HDR(hdr, t, mtype, mlen)			\
diff --git a/include/scsi/scsi_netlink_fc.h b/include/scsi/scsi_netlink_fc.h
index b213d2909fed4..cbf76e479761c 100644
--- a/include/scsi/scsi_netlink_fc.h
+++ b/include/scsi/scsi_netlink_fc.h
@@ -58,7 +58,7 @@
 struct fc_nl_event {
 	struct scsi_nl_hdr snlh;		/* must be 1st element ! */
 	uint64_t seconds;
-	uint32_t vendor_id;
+	uint64_t vendor_id;
 	uint16_t host_no;
 	uint16_t event_datalen;
 	uint32_t event_num;
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index f91c5358af3a9..0b11eff989e04 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -555,7 +555,7 @@ u32 fc_get_event_number(void);
 void fc_host_post_event(struct Scsi_Host *shost, u32 event_number,
 		enum fc_host_event_code event_code, u32 event_data);
 void fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number,
-		u32 data_len, char * data_buf, u32 vendor_id);
+		u32 data_len, char * data_buf, u64 vendor_id);
 	/* Note: when specifying vendor_id to fc_host_post_vendor_event()
 	 *   be sure to read the Vendor Type and ID formatting requirements
 	 *   specified in scsi_netlink.h
-- 
GitLab


From d2873e4c1ef293ee6d66456fb84448e258a487fa Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 18 Aug 2006 17:46:43 -0400
Subject: [PATCH 0154/1063] [SCSI] lpfc 8.1.10 : Add support to post events via
 new FC event interfaces

Add support to post events via new FC event interfaces

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/lpfc/lpfc.h         | 2 ++
 drivers/scsi/lpfc/lpfc_els.c     | 5 +++++
 drivers/scsi/lpfc/lpfc_hbadisc.c | 6 ++++++
 drivers/scsi/lpfc/lpfc_init.c    | 6 ++++++
 4 files changed, 19 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index d44f9aac6b8fe..4a4048d7ba915 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -391,3 +391,5 @@ struct rnidrsp {
 	struct list_head list;
 	uint32_t data;
 };
+
+#define FC_REG_DUMP_EVENT	0x10	/* Register for Dump events */
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 3567de6131621..71864cdc6c71a 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2506,6 +2506,7 @@ lpfc_els_rcv_rscn(struct lpfc_hba * phba,
 	uint32_t *lp;
 	IOCB_t *icmd;
 	uint32_t payload_len, cmd;
+	int i;
 
 	icmd = &cmdiocb->iocb;
 	pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
@@ -2524,6 +2525,10 @@ lpfc_els_rcv_rscn(struct lpfc_hba * phba,
 			phba->brd_no,
 			phba->fc_flag, payload_len, *lp, phba->fc_rscn_id_cnt);
 
+	for (i = 0; i < payload_len/sizeof(uint32_t); i++)
+		fc_host_post_event(phba->host, fc_get_event_number(),
+			FCH_EVT_RSCN, lp[i]);
+
 	/* If we are about to begin discovery, just ACC the RSCN.
 	 * Discovery processing will satisfy it.
 	 */
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index b2f1552f1848a..53821e5778b3a 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -340,6 +340,9 @@ lpfc_linkdown(struct lpfc_hba * phba)
 		spin_unlock_irq(phba->host->host_lock);
 	}
 
+	fc_host_post_event(phba->host, fc_get_event_number(),
+			FCH_EVT_LINKDOWN, 0);
+
 	/* Clean up any firmware default rpi's */
 	if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
 		lpfc_unreg_did(phba, 0xffffffff, mb);
@@ -427,6 +430,9 @@ lpfc_linkup(struct lpfc_hba * phba)
 	struct list_head *listp, *node_list[7];
 	int i;
 
+	fc_host_post_event(phba->host, fc_get_event_number(),
+			FCH_EVT_LINKUP, 0);
+
 	spin_lock_irq(phba->host->host_lock);
 	phba->hba_state = LPFC_LINK_UP;
 	phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY |
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index f6948ffe689ad..84e7fc595f5e3 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -511,6 +511,7 @@ lpfc_handle_eratt(struct lpfc_hba * phba)
 {
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring  *pring;
+	uint32_t event_data;
 
 	if (phba->work_hs & HS_FFER6) {
 		/* Re-establishing Link */
@@ -555,6 +556,11 @@ lpfc_handle_eratt(struct lpfc_hba * phba)
 				phba->brd_no, phba->work_hs,
 				phba->work_status[0], phba->work_status[1]);
 
+		event_data = FC_REG_DUMP_EVENT;
+		fc_host_post_vendor_event(phba->host, fc_get_event_number(),
+				sizeof(event_data), (char *) &event_data,
+				SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_EMULEX);
+
 		psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
 		lpfc_offline(phba);
 		phba->hba_state = LPFC_HBA_ERROR;
-- 
GitLab


From ae36764a230ff6a278ed93735acf5fcda08f2786 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 18 Aug 2006 17:46:53 -0400
Subject: [PATCH 0155/1063] [SCSI] lpfc 8.1.10 : Add support to return adapter
 symbolic name

Add support to return adapter symbolic name (now that attribute is dynamic)

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/lpfc/lpfc_attr.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index d384c16f4a87c..c6d683d86cff9 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -1204,6 +1204,15 @@ lpfc_get_host_fabric_name (struct Scsi_Host *shost)
 	fc_host_fabric_name(shost) = node_name;
 }
 
+static void
+lpfc_get_host_symbolic_name (struct Scsi_Host *shost)
+{
+	struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
+
+	spin_lock_irq(shost->host_lock);
+	lpfc_get_hba_sym_node_name(phba, fc_host_symbolic_name(shost));
+	spin_unlock_irq(shost->host_lock);
+}
 
 static struct fc_host_statistics *
 lpfc_get_stats(struct Scsi_Host *shost)
@@ -1486,7 +1495,6 @@ struct fc_function_template lpfc_transport_functions = {
 	.show_host_port_name = 1,
 	.show_host_supported_classes = 1,
 	.show_host_supported_fc4s = 1,
-	.show_host_symbolic_name = 1,
 	.show_host_supported_speeds = 1,
 	.show_host_maxframe_size = 1,
 
@@ -1509,6 +1517,9 @@ struct fc_function_template lpfc_transport_functions = {
 	.get_host_fabric_name = lpfc_get_host_fabric_name,
 	.show_host_fabric_name = 1,
 
+	.get_host_symbolic_name = lpfc_get_host_symbolic_name,
+	.show_host_symbolic_name = 1,
+
 	/*
 	 * The LPFC driver treats linkdown handling as target loss events
 	 * so there are no sysfs handlers for link_down_tmo.
-- 
GitLab


From 0f29b966d60e9a4f5ecff9f3832257b38aea4f13 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 18 Aug 2006 17:33:29 -0400
Subject: [PATCH 0156/1063] [SCSI] FC transport: Add dev_loss_tmo callbacks,
 and new fast_io_fail_tmo w/ callback

This patch adds the following functionality to the FC transport:

- dev_loss_tmo LLDD callback :
  Called to essentially confirm the deletion of an rport. Thus, it is
  called whenever the dev_loss_tmo fires, or when the rport is deleted
  due to other circumstances (module unload, etc).  It is expected that
  the callback will initiate the termination of any outstanding i/o on
  the rport.

- fast_io_fail_tmo and LLD callback:
  There are some cases where it may take a long while to truly determine
  device loss, but the system is in a multipathing configuration that if
  the i/o was failed quickly (faster than dev_loss_tmo), it could be
  redirected to a different path and completed sooner.

Many thanks to Mike Reed who cleaned up the initial RFC in support
of this post.

The original RFC is at:
http://marc.theaimsgroup.com/?l=linux-scsi&m=115505981027246&w=2

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_transport_fc.c | 134 ++++++++++++++++++++++++++++---
 include/scsi/scsi_transport_fc.h |   5 ++
 2 files changed, 128 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 293188cbff8c0..4ab176ed480d1 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -242,6 +242,7 @@ fc_bitfield_name_search(remote_port_roles, fc_remote_port_role_names)
 
 
 static void fc_timeout_deleted_rport(void *data);
+static void fc_timeout_fail_rport_io(void *data);
 static void fc_scsi_scan_rport(void *data);
 
 /*
@@ -249,7 +250,7 @@ static void fc_scsi_scan_rport(void *data);
  * Increase these values if you add attributes
  */
 #define FC_STARGET_NUM_ATTRS 	3
-#define FC_RPORT_NUM_ATTRS	9
+#define FC_RPORT_NUM_ATTRS	10
 #define FC_HOST_NUM_ATTRS	17
 
 struct fc_internal {
@@ -622,11 +623,14 @@ store_fc_rport_##field(struct class_device *cdev, const char *buf,	\
 	struct fc_rport *rport = transport_class_to_rport(cdev);	\
 	struct Scsi_Host *shost = rport_to_shost(rport);		\
 	struct fc_internal *i = to_fc_internal(shost->transportt);	\
+	char *cp;							\
 	if ((rport->port_state == FC_PORTSTATE_BLOCKED) ||		\
 	    (rport->port_state == FC_PORTSTATE_DELETED) ||		\
 	    (rport->port_state == FC_PORTSTATE_NOTPRESENT))		\
 		return -EBUSY;						\
-	val = simple_strtoul(buf, NULL, 0);				\
+	val = simple_strtoul(buf, &cp, 0);				\
+	if (*cp && (*cp != '\n'))					\
+		return -EINVAL;						\
 	i->f->set_rport_##field(rport, val);				\
 	return count;							\
 }
@@ -708,6 +712,13 @@ static FC_CLASS_DEVICE_ATTR(rport, title, S_IRUGO,			\
 	if (i->f->show_rport_##field)					\
 		count++
 
+#define SETUP_PRIVATE_RPORT_ATTRIBUTE_RW(field)				\
+{									\
+	i->private_rport_attrs[count] = class_device_attr_rport_##field; \
+	i->rport_attrs[count] = &i->private_rport_attrs[count];		\
+	count++;							\
+}
+
 
 /* The FC Transport Remote Port Attributes: */
 
@@ -740,12 +751,14 @@ store_fc_rport_dev_loss_tmo(struct class_device *cdev, const char *buf,
 	struct fc_rport *rport = transport_class_to_rport(cdev);
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	struct fc_internal *i = to_fc_internal(shost->transportt);
+	char *cp;
 	if ((rport->port_state == FC_PORTSTATE_BLOCKED) ||
 	    (rport->port_state == FC_PORTSTATE_DELETED) ||
 	    (rport->port_state == FC_PORTSTATE_NOTPRESENT))
 		return -EBUSY;
-	val = simple_strtoul(buf, NULL, 0);
-	if ((val < 0) || (val > SCSI_DEVICE_BLOCK_MAX_TIMEOUT))
+	val = simple_strtoul(buf, &cp, 0);
+	if ((*cp && (*cp != '\n')) ||
+	    (val < 0) || (val > SCSI_DEVICE_BLOCK_MAX_TIMEOUT))
 		return -EINVAL;
 	i->f->set_rport_dev_loss_tmo(rport, val);
 	return count;
@@ -795,6 +808,44 @@ static FC_CLASS_DEVICE_ATTR(rport, roles, S_IRUGO,
 fc_private_rport_rd_enum_attr(port_state, FC_PORTSTATE_MAX_NAMELEN);
 fc_private_rport_rd_attr(scsi_target_id, "%d\n", 20);
 
+/*
+ * fast_io_fail_tmo attribute
+ */
+static ssize_t
+show_fc_rport_fast_io_fail_tmo (struct class_device *cdev, char *buf)
+{
+	struct fc_rport *rport = transport_class_to_rport(cdev);
+
+	if (rport->fast_io_fail_tmo == -1)
+		return snprintf(buf, 5, "off\n");
+	return snprintf(buf, 20, "%d\n", rport->fast_io_fail_tmo);
+}
+
+static ssize_t
+store_fc_rport_fast_io_fail_tmo(struct class_device *cdev, const char *buf,
+			   size_t count)
+{
+	int val;
+	char *cp;
+	struct fc_rport *rport = transport_class_to_rport(cdev);
+
+	if ((rport->port_state == FC_PORTSTATE_BLOCKED) ||
+	    (rport->port_state == FC_PORTSTATE_DELETED) ||
+	    (rport->port_state == FC_PORTSTATE_NOTPRESENT))
+		return -EBUSY;
+	if (strncmp(buf, "off", 3) == 0)
+		rport->fast_io_fail_tmo = -1;
+	else {
+		val = simple_strtoul(buf, &cp, 0);
+		if ((*cp && (*cp != '\n')) ||
+		    (val < 0) || (val >= rport->dev_loss_tmo))
+			return -EINVAL;
+		rport->fast_io_fail_tmo = val;
+	}
+	return count;
+}
+static FC_CLASS_DEVICE_ATTR(rport, fast_io_fail_tmo, S_IRUGO | S_IWUSR,
+	show_fc_rport_fast_io_fail_tmo, store_fc_rport_fast_io_fail_tmo);
 
 
 /*
@@ -880,8 +931,11 @@ store_fc_host_##field(struct class_device *cdev, const char *buf,	\
 	int val;							\
 	struct Scsi_Host *shost = transport_class_to_shost(cdev);	\
 	struct fc_internal *i = to_fc_internal(shost->transportt);	\
+	char *cp;							\
 									\
-	val = simple_strtoul(buf, NULL, 0);				\
+	val = simple_strtoul(buf, &cp, 0);				\
+	if (*cp && (*cp != '\n'))					\
+		return -EINVAL;						\
 	i->f->set_host_##field(shost, val);				\
 	return count;							\
 }
@@ -1481,6 +1535,8 @@ fc_attach_transport(struct fc_function_template *ft)
 	SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(roles);
 	SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(port_state);
 	SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(scsi_target_id);
+	if (ft->terminate_rport_io)
+		SETUP_PRIVATE_RPORT_ATTRIBUTE_RW(fast_io_fail_tmo);
 
 	BUG_ON(count > FC_RPORT_NUM_ATTRS);
 
@@ -1552,7 +1608,7 @@ fc_flush_work(struct Scsi_Host *shost)
  * @delay:	jiffies to delay the work queuing
  *
  * Return value:
- * 	0 on success / != 0 for error
+ * 	1 on success / 0 already queued / < 0 for error
  **/
 static int
 fc_queue_devloss_work(struct Scsi_Host *shost, struct work_struct *work,
@@ -1567,6 +1623,9 @@ fc_queue_devloss_work(struct Scsi_Host *shost, struct work_struct *work,
 		return -EINVAL;
 	}
 
+	if (delay == 0)
+		return queue_work(fc_host_devloss_work_q(shost), work);
+
 	return queue_delayed_work(fc_host_devloss_work_q(shost), work, delay);
 }
 
@@ -1659,10 +1718,23 @@ fc_starget_delete(void *data)
 	struct fc_rport *rport = (struct fc_rport *)data;
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	unsigned long flags;
+	struct fc_internal *i = to_fc_internal(shost->transportt);
+
+	/*
+	 * Involve the LLDD if possible. All io on the rport is to
+	 * be terminated, either as part of the dev_loss_tmo callback
+	 * processing, or via the terminate_rport_io function.
+	 */
+	if (i->f->dev_loss_tmo_callbk)
+		i->f->dev_loss_tmo_callbk(rport);
+	else if (i->f->terminate_rport_io)
+		i->f->terminate_rport_io(rport);
 
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (rport->flags & FC_RPORT_DEVLOSS_PENDING) {
 		spin_unlock_irqrestore(shost->host_lock, flags);
+		if (!cancel_delayed_work(&rport->fail_io_work))
+			fc_flush_devloss(shost);
 		if (!cancel_delayed_work(&rport->dev_loss_work))
 			fc_flush_devloss(shost);
 		spin_lock_irqsave(shost->host_lock, flags);
@@ -1685,10 +1757,7 @@ fc_rport_final_delete(void *data)
 	struct fc_rport *rport = (struct fc_rport *)data;
 	struct device *dev = &rport->dev;
 	struct Scsi_Host *shost = rport_to_shost(rport);
-
-	/* Delete SCSI target and sdevs */
-	if (rport->scsi_target_id != -1)
-		fc_starget_delete(data);
+	struct fc_internal *i = to_fc_internal(shost->transportt);
 
 	/*
 	 * if a scan is pending, flush the SCSI Host work_q so that 
@@ -1697,6 +1766,14 @@ fc_rport_final_delete(void *data)
 	if (rport->flags & FC_RPORT_SCAN_PENDING)
 		scsi_flush_work(shost);
 
+	/* Delete SCSI target and sdevs */
+	if (rport->scsi_target_id != -1)
+		fc_starget_delete(data);
+	else if (i->f->dev_loss_tmo_callbk)
+		i->f->dev_loss_tmo_callbk(rport);
+	else if (i->f->terminate_rport_io)
+		i->f->terminate_rport_io(rport);
+
 	transport_remove_device(dev);
 	device_del(dev);
 	transport_destroy_device(dev);
@@ -1748,8 +1825,10 @@ fc_rport_create(struct Scsi_Host *shost, int channel,
 	if (fci->f->dd_fcrport_size)
 		rport->dd_data = &rport[1];
 	rport->channel = channel;
+	rport->fast_io_fail_tmo = -1;
 
 	INIT_WORK(&rport->dev_loss_work, fc_timeout_deleted_rport, rport);
+	INIT_WORK(&rport->fail_io_work, fc_timeout_fail_rport_io, rport);
 	INIT_WORK(&rport->scan_work, fc_scsi_scan_rport, rport);
 	INIT_WORK(&rport->stgt_delete_work, fc_starget_delete, rport);
 	INIT_WORK(&rport->rport_delete_work, fc_rport_final_delete, rport);
@@ -1913,11 +1992,13 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 				/* restart the target */
 
 				/*
-				 * Stop the target timer first. Take no action
+				 * Stop the target timers first. Take no action
 				 * on the del_timer failure as the state
 				 * machine state change will validate the
 				 * transaction.
 				 */
+				if (!cancel_delayed_work(&rport->fail_io_work))
+					fc_flush_devloss(shost);
 				if (!cancel_delayed_work(work))
 					fc_flush_devloss(shost);
 
@@ -2061,6 +2142,7 @@ void
 fc_remote_port_delete(struct fc_rport  *rport)
 {
 	struct Scsi_Host *shost = rport_to_shost(rport);
+	struct fc_internal *i = to_fc_internal(shost->transportt);
 	int timeout = rport->dev_loss_tmo;
 	unsigned long flags;
 
@@ -2091,6 +2173,12 @@ fc_remote_port_delete(struct fc_rport  *rport)
 
 	scsi_target_block(&rport->dev);
 
+	/* see if we need to kill io faster than waiting for device loss */
+	if ((rport->fast_io_fail_tmo != -1) &&
+	    (rport->fast_io_fail_tmo < timeout) && (i->f->terminate_rport_io))
+		fc_queue_devloss_work(shost, &rport->fail_io_work,
+					rport->fast_io_fail_tmo * HZ);
+
 	/* cap the length the devices can be blocked until they are deleted */
 	fc_queue_devloss_work(shost, &rport->dev_loss_work, timeout * HZ);
 }
@@ -2150,6 +2238,8 @@ fc_remote_port_rolechg(struct fc_rport  *rport, u32 roles)
 		 * machine state change will validate the
 		 * transaction.
 		 */
+		if (!cancel_delayed_work(&rport->fail_io_work))
+			fc_flush_devloss(shost);
 		if (!cancel_delayed_work(&rport->dev_loss_work))
 			fc_flush_devloss(shost);
 
@@ -2270,6 +2360,28 @@ fc_timeout_deleted_rport(void  *data)
 	fc_queue_work(shost, &rport->stgt_delete_work);
 }
 
+/**
+ * fc_timeout_fail_rport_io - Timeout handler for a fast io failing on a
+ *                       disconnected SCSI target.
+ *
+ * @data:	rport to terminate io on.
+ *
+ * Notes: Only requests the failure of the io, not that all are flushed
+ *    prior to returning.
+ **/
+static void
+fc_timeout_fail_rport_io(void  *data)
+{
+	struct fc_rport *rport = (struct fc_rport *)data;
+	struct Scsi_Host *shost = rport_to_shost(rport);
+	struct fc_internal *i = to_fc_internal(shost->transportt);
+
+	if (rport->port_state != FC_PORTSTATE_BLOCKED)
+		return;
+
+	i->f->terminate_rport_io(rport);
+}
+
 /**
  * fc_scsi_scan_rport - called to perform a scsi scan on a remote port.
  *
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 0b11eff989e04..fd352323378bd 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -195,6 +195,7 @@ struct fc_rport {	/* aka fc_starget_attrs */
 	u32 roles;
 	enum fc_port_state port_state;	/* Will only be ONLINE or UNKNOWN */
 	u32 scsi_target_id;
+	u32 fast_io_fail_tmo;
 
 	/* exported data */
 	void *dd_data;			/* Used for driver-specific storage */
@@ -207,6 +208,7 @@ struct fc_rport {	/* aka fc_starget_attrs */
 	struct device dev;
  	struct work_struct dev_loss_work;
  	struct work_struct scan_work;
+ 	struct work_struct fail_io_work;
  	struct work_struct stgt_delete_work;
 	struct work_struct rport_delete_work;
 } __attribute__((aligned(sizeof(unsigned long))));
@@ -445,6 +447,9 @@ struct fc_function_template {
 
 	int	(*issue_fc_host_lip)(struct Scsi_Host *);
 
+	void    (*dev_loss_tmo_callbk)(struct fc_rport *);
+	void	(*terminate_rport_io)(struct fc_rport *);
+
 	/* allocation lengths for host-specific data */
 	u32	 			dd_fcrport_size;
 
-- 
GitLab


From c01f32087960edd60a302ad62ad6b8b525e4aeec Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 18 Aug 2006 17:47:08 -0400
Subject: [PATCH 0157/1063] [SCSI] lpfc 8.1.10 : Add support for
 dev_loss_tmo_callbk and fast_io_fail_tmo_callbk

Add support for new dev_loss_tmo callback
  Goodness is that it removes code for a parallel nodev timer that
  existed in the driver
Add support for the new fast_io_fail callback

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/lpfc/lpfc.h           |   3 +
 drivers/scsi/lpfc/lpfc_attr.c      | 155 ++++++++++++++++++++-----
 drivers/scsi/lpfc/lpfc_crtn.h      |   3 +
 drivers/scsi/lpfc/lpfc_ct.c        |  25 ----
 drivers/scsi/lpfc/lpfc_disc.h      |   6 +-
 drivers/scsi/lpfc/lpfc_hbadisc.c   | 178 +++++++++++------------------
 drivers/scsi/lpfc/lpfc_nportdisc.c |   2 +-
 drivers/scsi/lpfc/lpfc_scsi.c      |  10 +-
 8 files changed, 209 insertions(+), 173 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 4a4048d7ba915..efec44d267c7c 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -285,6 +285,7 @@ struct lpfc_hba {
 	uint32_t cfg_log_verbose;
 	uint32_t cfg_lun_queue_depth;
 	uint32_t cfg_nodev_tmo;
+	uint32_t cfg_devloss_tmo;
 	uint32_t cfg_hba_queue_depth;
 	uint32_t cfg_fcp_class;
 	uint32_t cfg_use_adisc;
@@ -303,6 +304,8 @@ struct lpfc_hba {
 	uint32_t cfg_sg_seg_cnt;
 	uint32_t cfg_sg_dma_buf_size;
 
+	uint32_t dev_loss_tmo_changed;
+
 	lpfc_vpd_t vpd;		/* vital product data */
 
 	struct Scsi_Host *host;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index c6d683d86cff9..0de69324212e7 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -39,6 +39,9 @@
 #include "lpfc_compat.h"
 #include "lpfc_crtn.h"
 
+#define LPFC_DEF_DEVLOSS_TMO 30
+#define LPFC_MIN_DEVLOSS_TMO 1
+#define LPFC_MAX_DEVLOSS_TMO 255
 
 static void
 lpfc_jedec_to_ascii(int incr, char hdw[])
@@ -558,6 +561,123 @@ MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:"
 static CLASS_DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR,
 			 lpfc_poll_show, lpfc_poll_store);
 
+/*
+# lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear
+# until the timer expires. Value range is [0,255]. Default value is 30.
+*/
+static int lpfc_nodev_tmo = LPFC_DEF_DEVLOSS_TMO;
+static int lpfc_devloss_tmo = LPFC_DEF_DEVLOSS_TMO;
+module_param(lpfc_nodev_tmo, int, 0);
+MODULE_PARM_DESC(lpfc_nodev_tmo,
+		 "Seconds driver will hold I/O waiting "
+		 "for a device to come back");
+static ssize_t
+lpfc_nodev_tmo_show(struct class_device *cdev, char *buf)
+{
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
+	int val = 0;
+	val = phba->cfg_devloss_tmo;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			phba->cfg_devloss_tmo);
+}
+
+static int
+lpfc_nodev_tmo_init(struct lpfc_hba *phba, int val)
+{
+	static int warned;
+	if (phba->cfg_devloss_tmo !=  LPFC_DEF_DEVLOSS_TMO) {
+		phba->cfg_nodev_tmo = phba->cfg_devloss_tmo;
+		if (!warned && val != LPFC_DEF_DEVLOSS_TMO) {
+			warned = 1;
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"%d:0402 Ignoring nodev_tmo module "
+					"parameter because devloss_tmo is"
+					" set.\n",
+					phba->brd_no);
+		}
+		return 0;
+	}
+
+	if (val >= LPFC_MIN_DEVLOSS_TMO && val <= LPFC_MAX_DEVLOSS_TMO) {
+		phba->cfg_nodev_tmo = val;
+		phba->cfg_devloss_tmo = val;
+		return 0;
+	}
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"%d:0400 lpfc_nodev_tmo attribute cannot be set to %d, "
+			"allowed range is [%d, %d]\n",
+			phba->brd_no, val,
+			LPFC_MIN_DEVLOSS_TMO, LPFC_MAX_DEVLOSS_TMO);
+	phba->cfg_nodev_tmo = LPFC_DEF_DEVLOSS_TMO;
+	return -EINVAL;
+}
+
+static int
+lpfc_nodev_tmo_set(struct lpfc_hba *phba, int val)
+{
+	if (phba->dev_loss_tmo_changed ||
+		(lpfc_devloss_tmo != LPFC_DEF_DEVLOSS_TMO)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"%d:0401 Ignoring change to nodev_tmo "
+				"because devloss_tmo is set.\n",
+				phba->brd_no);
+		return 0;
+	}
+
+	if (val >= LPFC_MIN_DEVLOSS_TMO && val <= LPFC_MAX_DEVLOSS_TMO) {
+		phba->cfg_nodev_tmo = val;
+		phba->cfg_devloss_tmo = val;
+		return 0;
+	}
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"%d:0403 lpfc_nodev_tmo attribute cannot be set to %d, "
+			"allowed range is [%d, %d]\n",
+			phba->brd_no, val, LPFC_MIN_DEVLOSS_TMO,
+			LPFC_MAX_DEVLOSS_TMO);
+	return -EINVAL;
+}
+
+lpfc_param_store(nodev_tmo)
+
+static CLASS_DEVICE_ATTR(lpfc_nodev_tmo, S_IRUGO | S_IWUSR,
+			 lpfc_nodev_tmo_show, lpfc_nodev_tmo_store);
+
+/*
+# lpfc_devloss_tmo: If set, it will hold all I/O errors on devices that
+# disappear until the timer expires. Value range is [0,255]. Default
+# value is 30.
+*/
+module_param(lpfc_devloss_tmo, int, 0);
+MODULE_PARM_DESC(lpfc_devloss_tmo,
+		 "Seconds driver will hold I/O waiting "
+		 "for a device to come back");
+lpfc_param_init(devloss_tmo, LPFC_DEF_DEVLOSS_TMO,
+		LPFC_MIN_DEVLOSS_TMO, LPFC_MAX_DEVLOSS_TMO)
+lpfc_param_show(devloss_tmo)
+static int
+lpfc_devloss_tmo_set(struct lpfc_hba *phba, int val)
+{
+	if (val >= LPFC_MIN_DEVLOSS_TMO && val <= LPFC_MAX_DEVLOSS_TMO) {
+		phba->cfg_nodev_tmo = val;
+		phba->cfg_devloss_tmo = val;
+		phba->dev_loss_tmo_changed = 1;
+		return 0;
+	}
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"%d:0404 lpfc_devloss_tmo attribute cannot be set to"
+			" %d, allowed range is [%d, %d]\n",
+			phba->brd_no, val, LPFC_MIN_DEVLOSS_TMO,
+			LPFC_MAX_DEVLOSS_TMO);
+	return -EINVAL;
+}
+
+lpfc_param_store(devloss_tmo)
+static CLASS_DEVICE_ATTR(lpfc_devloss_tmo, S_IRUGO | S_IWUSR,
+	lpfc_devloss_tmo_show, lpfc_devloss_tmo_store);
+
 /*
 # lpfc_log_verbose: Only turn this flag on if you are willing to risk being
 # deluged with LOTS of information.
@@ -616,14 +736,6 @@ LPFC_ATTR_R(hba_queue_depth, 8192, 32, 8192,
 LPFC_ATTR_R(scan_down, 1, 0, 1,
 	     "Start scanning for devices from highest ALPA to lowest");
 
-/*
-# lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear
-# until the timer expires. Value range is [0,255]. Default value is 30.
-# NOTE: this MUST be less then the SCSI Layer command timeout - 1.
-*/
-LPFC_ATTR_RW(nodev_tmo, 30, 0, 255,
-	     "Seconds driver will hold I/O waiting for a device to come back");
-
 /*
 # lpfc_topology:  link topology for init link
 #            0x0  = attempt loop mode then point-to-point
@@ -737,6 +849,7 @@ struct class_device_attribute *lpfc_host_attrs[] = {
 	&class_device_attr_lpfc_lun_queue_depth,
 	&class_device_attr_lpfc_hba_queue_depth,
 	&class_device_attr_lpfc_nodev_tmo,
+	&class_device_attr_lpfc_devloss_tmo,
 	&class_device_attr_lpfc_fcp_class,
 	&class_device_attr_lpfc_use_adisc,
 	&class_device_attr_lpfc_ack0,
@@ -1449,28 +1562,13 @@ lpfc_get_starget_port_name(struct scsi_target *starget)
 	fc_starget_port_name(starget) = port_name;
 }
 
-static void
-lpfc_get_rport_loss_tmo(struct fc_rport *rport)
-{
-	/*
-	 * Return the driver's global value for device loss timeout plus
-	 * five seconds to allow the driver's nodev timer to run.
-	 */
-	rport->dev_loss_tmo = lpfc_nodev_tmo + 5;
-}
-
 static void
 lpfc_set_rport_loss_tmo(struct fc_rport *rport, uint32_t timeout)
 {
-	/*
-	 * The driver doesn't have a per-target timeout setting.  Set
-	 * this value globally. lpfc_nodev_tmo should be greater then 0.
-	 */
 	if (timeout)
-		lpfc_nodev_tmo = timeout;
+		rport->dev_loss_tmo = timeout;
 	else
-		lpfc_nodev_tmo = 1;
-	rport->dev_loss_tmo = lpfc_nodev_tmo + 5;
+		rport->dev_loss_tmo = 1;
 }
 
 
@@ -1532,7 +1630,6 @@ struct fc_function_template lpfc_transport_functions = {
 	.show_rport_maxframe_size = 1,
 	.show_rport_supported_classes = 1,
 
-	.get_rport_dev_loss_tmo = lpfc_get_rport_loss_tmo,
 	.set_rport_dev_loss_tmo = lpfc_set_rport_loss_tmo,
 	.show_rport_dev_loss_tmo = 1,
 
@@ -1546,6 +1643,8 @@ struct fc_function_template lpfc_transport_functions = {
 	.show_starget_port_name = 1,
 
 	.issue_fc_host_lip = lpfc_issue_lip,
+	.dev_loss_tmo_callbk = lpfc_dev_loss_tmo_callbk,
+	.terminate_rport_io = lpfc_terminate_rport_io,
 };
 
 void
@@ -1561,13 +1660,13 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_ack0_init(phba, lpfc_ack0);
 	lpfc_topology_init(phba, lpfc_topology);
 	lpfc_scan_down_init(phba, lpfc_scan_down);
-	lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo);
 	lpfc_link_speed_init(phba, lpfc_link_speed);
 	lpfc_fdmi_on_init(phba, lpfc_fdmi_on);
 	lpfc_discovery_threads_init(phba, lpfc_discovery_threads);
 	lpfc_max_luns_init(phba, lpfc_max_luns);
 	lpfc_poll_tmo_init(phba, lpfc_poll_tmo);
-
+	lpfc_devloss_tmo_init(phba, lpfc_devloss_tmo);
+	lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo);
 	phba->cfg_poll = lpfc_poll;
 
 	/*
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 2a176467f71ba..3d684496acde1 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -18,6 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
+struct fc_rport;
 void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t);
 void lpfc_read_nv(struct lpfc_hba *, LPFC_MBOXQ_t *);
 int lpfc_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb,
@@ -200,6 +201,8 @@ extern struct scsi_host_template lpfc_template;
 extern struct fc_function_template lpfc_transport_functions;
 
 void lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp);
+void lpfc_terminate_rport_io(struct fc_rport *);
+void lpfc_dev_loss_tmo_callbk(struct fc_rport *rport);
 
 #define ScsiResult(host_code, scsi_code) (((host_code) << 16) | scsi_code)
 #define HBA_EVENT_RSCN                   5
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index bbb7310210b00..ae41064589918 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -324,7 +324,6 @@ lpfc_ns_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mp, uint32_t Size)
 	struct lpfc_sli_ct_request *Response =
 		(struct lpfc_sli_ct_request *) mp->virt;
 	struct lpfc_nodelist *ndlp = NULL;
-	struct lpfc_nodelist *next_ndlp;
 	struct lpfc_dmabuf *mlast, *next_mp;
 	uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType;
 	uint32_t Did;
@@ -399,30 +398,6 @@ lpfc_ns_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mp, uint32_t Size)
  	 * current driver state.
  	 */
 	if (phba->hba_state == LPFC_HBA_READY) {
-
-		/*
-		 * Switch ports that connect a loop of multiple targets need
-		 * special consideration.  The driver wants to unregister the
-		 * rpi only on the target that was pulled from the loop.  On
-		 * RSCN, the driver wants to rediscover an NPort only if the
-		 * driver flagged it as NLP_NPR_2B_DISC.  Provided adisc is
-		 * not enabled and the NPort is not capable of retransmissions
-		 * (FC Tape) prevent timing races with the scsi error handler by
-		 * unregistering the Nport's RPI.  This action causes all
-		 * outstanding IO to flush back to the midlayer.
-		 */
-		list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_npr_list,
-					 nlp_listp) {
-			if (!(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
-			    (lpfc_rscn_payload_check(phba, ndlp->nlp_DID))) {
-				if ((phba->cfg_use_adisc == 0) &&
-				    !(ndlp->nlp_fcp_info &
-				      NLP_FCP_2_DEVICE)) {
-					lpfc_unreg_rpi(phba, ndlp);
-					ndlp->nlp_flag &= ~NLP_NPR_ADISC;
-				}
-			}
-		}
 		lpfc_els_flush_rscn(phba);
 		spin_lock_irq(phba->host->host_lock);
 		phba->fc_flag |= FC_RSCN_MODE; /* we are still in RSCN mode */
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 41cf5d3ea6ce5..9766f909c9c69 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -30,7 +30,6 @@
 
 /* worker thread events */
 enum lpfc_work_type {
-	LPFC_EVT_NODEV_TMO,
 	LPFC_EVT_ONLINE,
 	LPFC_EVT_OFFLINE,
 	LPFC_EVT_WARM_START,
@@ -74,11 +73,9 @@ struct lpfc_nodelist {
 #define NLP_FCP_2_DEVICE   0x10			/* FCP-2 device */
 
 	struct timer_list   nlp_delayfunc;	/* Used for delayed ELS cmds */
-	struct timer_list   nlp_tmofunc;	/* Used for nodev tmo */
 	struct fc_rport *rport;			/* Corresponding FC transport
 						   port structure */
 	struct lpfc_hba      *nlp_phba;
-	struct lpfc_work_evt nodev_timeout_evt;
 	struct lpfc_work_evt els_retry_evt;
 	unsigned long last_ramp_up_time;        /* jiffy of last ramp up */
 	unsigned long last_q_full_time;		/* jiffy of last queue full */
@@ -102,7 +99,6 @@ struct lpfc_nodelist {
 #define NLP_LOGO_SND       0x100	/* sent LOGO request for this entry */
 #define NLP_RNID_SND       0x400	/* sent RNID request for this entry */
 #define NLP_ELS_SND_MASK   0x7e0	/* sent ELS request for this entry */
-#define NLP_NODEV_TMO      0x10000	/* nodev timeout is running for node */
 #define NLP_DELAY_TMO      0x20000	/* delay timeout is running for node */
 #define NLP_NPR_2B_DISC    0x40000	/* node is included in num_disc_nodes */
 #define NLP_RCV_PLOGI      0x80000	/* Rcv'ed PLOGI from remote system */
@@ -169,7 +165,7 @@ struct lpfc_nodelist {
  */
 /*
  * For a Link Down, all nodes on the ADISC, PLOGI, unmapped or mapped
- * lists will receive a DEVICE_RECOVERY event. If the linkdown or nodev timers
+ * lists will receive a DEVICE_RECOVERY event. If the linkdown or devloss timers
  * expire, all effected nodes will receive a DEVICE_RM event.
  */
 /*
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 53821e5778b3a..97973af980a07 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -56,28 +56,63 @@ static uint8_t lpfcAlpaArray[] = {
 
 static void lpfc_disc_timeout_handler(struct lpfc_hba *);
 
-static void
-lpfc_process_nodev_timeout(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+void
+lpfc_terminate_rport_io(struct fc_rport *rport)
 {
-	uint8_t *name = (uint8_t *)&ndlp->nlp_portname;
-	int warn_on = 0;
+	struct lpfc_rport_data *rdata;
+	struct lpfc_nodelist * ndlp;
+	struct lpfc_hba *phba;
 
-	spin_lock_irq(phba->host->host_lock);
-	if (!(ndlp->nlp_flag & NLP_NODEV_TMO)) {
-		spin_unlock_irq(phba->host->host_lock);
+	rdata = rport->dd_data;
+	ndlp = rdata->pnode;
+
+	if (!ndlp) {
+		if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
+			printk(KERN_ERR "Cannot find remote node"
+			" to terminate I/O Data x%x\n",
+			rport->port_id);
 		return;
 	}
 
-	/*
-	 * If a discovery event readded nodev_timer after timer
-	 * firing and before processing the timer, cancel the
-	 * nlp_tmofunc.
-	 */
-	spin_unlock_irq(phba->host->host_lock);
-	del_timer_sync(&ndlp->nlp_tmofunc);
+	phba = ndlp->nlp_phba;
+
 	spin_lock_irq(phba->host->host_lock);
+	if (ndlp->nlp_sid != NLP_NO_SID) {
+		lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring],
+			ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT);
+	}
+	spin_unlock_irq(phba->host->host_lock);
+
+	return;
+}
+
+/*
+ * This function will be called when dev_loss_tmo fire.
+ */
+void
+lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
+{
+	struct lpfc_rport_data *rdata;
+	struct lpfc_nodelist * ndlp;
+	uint8_t *name;
+	int warn_on = 0;
+	struct lpfc_hba *phba;
+
+	rdata = rport->dd_data;
+	ndlp = rdata->pnode;
 
-	ndlp->nlp_flag &= ~NLP_NODEV_TMO;
+	if (!ndlp) {
+		if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
+			printk(KERN_ERR "Cannot find remote node"
+			" for rport in dev_loss_tmo_callbk x%x\n",
+			rport->port_id);
+		return;
+	}
+
+	name = (uint8_t *)&ndlp->nlp_portname;
+	phba = ndlp->nlp_phba;
+
+	spin_lock_irq(phba->host->host_lock);
 
 	if (ndlp->nlp_sid != NLP_NO_SID) {
 		warn_on = 1;
@@ -85,11 +120,14 @@ lpfc_process_nodev_timeout(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 		lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring],
 			ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT);
 	}
+	if (phba->fc_flag & FC_UNLOADING)
+		warn_on = 0;
+
 	spin_unlock_irq(phba->host->host_lock);
 
 	if (warn_on) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
-				"%d:0203 Nodev timeout on "
+				"%d:0203 Devloss timeout on "
 				"WWPN %x:%x:%x:%x:%x:%x:%x:%x "
 				"NPort x%x Data: x%x x%x x%x\n",
 				phba->brd_no,
@@ -99,7 +137,7 @@ lpfc_process_nodev_timeout(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 				ndlp->nlp_state, ndlp->nlp_rpi);
 	} else {
 		lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
-				"%d:0204 Nodev timeout on "
+				"%d:0204 Devloss timeout on "
 				"WWPN %x:%x:%x:%x:%x:%x:%x:%x "
 				"NPort x%x Data: x%x x%x x%x\n",
 				phba->brd_no,
@@ -109,7 +147,12 @@ lpfc_process_nodev_timeout(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 				ndlp->nlp_state, ndlp->nlp_rpi);
 	}
 
-	lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RM);
+	ndlp->rport = NULL;
+	rdata->pnode = NULL;
+
+	if (!(phba->fc_flag & FC_UNLOADING))
+		lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RM);
+
 	return;
 }
 
@@ -127,11 +170,6 @@ lpfc_work_list_done(struct lpfc_hba * phba)
 		spin_unlock_irq(phba->host->host_lock);
 		free_evt = 1;
 		switch (evtp->evt) {
-		case LPFC_EVT_NODEV_TMO:
-			ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
-			lpfc_process_nodev_timeout(phba, ndlp);
-			free_evt = 0;
-			break;
 		case LPFC_EVT_ELS_RETRY:
 			ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
 			lpfc_els_retry_delay_handler(ndlp);
@@ -377,16 +415,6 @@ lpfc_linkdown(struct lpfc_hba * phba)
 			rc = lpfc_disc_state_machine(phba, ndlp, NULL,
 					     NLP_EVT_DEVICE_RECOVERY);
 
-			/* Check config parameter use-adisc or FCP-2 */
-			if ((rc != NLP_STE_FREED_NODE) &&
-				(phba->cfg_use_adisc == 0) &&
-				!(ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE)) {
-				/* We know we will have to relogin, so
-				 * unreglogin the rpi right now to fail
-				 * any outstanding I/Os quickly.
-				 */
-				lpfc_unreg_rpi(phba, ndlp);
-			}
 		}
 	}
 
@@ -1104,8 +1132,11 @@ lpfc_unregister_remote_port(struct lpfc_hba * phba,
 	struct fc_rport *rport = ndlp->rport;
 	struct lpfc_rport_data *rdata = rport->dd_data;
 
-	ndlp->rport = NULL;
-	rdata->pnode = NULL;
+	if (rport->scsi_target_id == -1) {
+		ndlp->rport = NULL;
+		rdata->pnode = NULL;
+	}
+
 	fc_remote_port_delete(rport);
 
 	return;
@@ -1233,17 +1264,6 @@ lpfc_nlp_list(struct lpfc_hba * phba, struct lpfc_nodelist * nlp, int list)
 		list_add_tail(&nlp->nlp_listp, &phba->fc_nlpunmap_list);
 		phba->fc_unmap_cnt++;
 		phba->nport_event_cnt++;
-		/* stop nodev tmo if running */
-		if (nlp->nlp_flag & NLP_NODEV_TMO) {
-			nlp->nlp_flag &= ~NLP_NODEV_TMO;
-			spin_unlock_irq(phba->host->host_lock);
-			del_timer_sync(&nlp->nlp_tmofunc);
-			spin_lock_irq(phba->host->host_lock);
-			if (!list_empty(&nlp->nodev_timeout_evt.evt_listp))
-				list_del_init(&nlp->nodev_timeout_evt.
-						evt_listp);
-
-		}
 		nlp->nlp_flag &= ~NLP_NODEV_REMOVE;
 		nlp->nlp_type |= NLP_FC_NODE;
 		break;
@@ -1254,17 +1274,6 @@ lpfc_nlp_list(struct lpfc_hba * phba, struct lpfc_nodelist * nlp, int list)
 		list_add_tail(&nlp->nlp_listp, &phba->fc_nlpmap_list);
 		phba->fc_map_cnt++;
 		phba->nport_event_cnt++;
-		/* stop nodev tmo if running */
-		if (nlp->nlp_flag & NLP_NODEV_TMO) {
-			nlp->nlp_flag &= ~NLP_NODEV_TMO;
-			spin_unlock_irq(phba->host->host_lock);
-			del_timer_sync(&nlp->nlp_tmofunc);
-			spin_lock_irq(phba->host->host_lock);
-			if (!list_empty(&nlp->nodev_timeout_evt.evt_listp))
-				list_del_init(&nlp->nodev_timeout_evt.
-						evt_listp);
-
-		}
 		nlp->nlp_flag &= ~NLP_NODEV_REMOVE;
 		break;
 	case NLP_NPR_LIST:
@@ -1273,11 +1282,6 @@ lpfc_nlp_list(struct lpfc_hba * phba, struct lpfc_nodelist * nlp, int list)
 		list_add_tail(&nlp->nlp_listp, &phba->fc_npr_list);
 		phba->fc_npr_cnt++;
 
-		if (!(nlp->nlp_flag & NLP_NODEV_TMO))
-			mod_timer(&nlp->nlp_tmofunc,
-		 			jiffies + HZ * phba->cfg_nodev_tmo);
-
-		nlp->nlp_flag |= NLP_NODEV_TMO;
 		nlp->nlp_flag &= ~NLP_RCV_PLOGI;
 		break;
 	case NLP_JUST_DQ:
@@ -1307,7 +1311,8 @@ lpfc_nlp_list(struct lpfc_hba * phba, struct lpfc_nodelist * nlp, int list)
 			 * already. If we have, and it's a scsi entity, be
 			 * sure to unblock any attached scsi devices
 			 */
-			if (!nlp->rport)
+			if ((!nlp->rport) || (nlp->rport->port_state ==
+					FC_PORTSTATE_BLOCKED))
 				lpfc_register_remote_port(phba, nlp);
 
 			/*
@@ -1581,15 +1586,12 @@ lpfc_freenode(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
 
 	lpfc_els_abort(phba,ndlp,0);
 	spin_lock_irq(phba->host->host_lock);
-	ndlp->nlp_flag &= ~(NLP_NODEV_TMO|NLP_DELAY_TMO);
+	ndlp->nlp_flag &= ~NLP_DELAY_TMO;
 	spin_unlock_irq(phba->host->host_lock);
-	del_timer_sync(&ndlp->nlp_tmofunc);
 
 	ndlp->nlp_last_elscmd = 0;
 	del_timer_sync(&ndlp->nlp_delayfunc);
 
-	if (!list_empty(&ndlp->nodev_timeout_evt.evt_listp))
-		list_del_init(&ndlp->nodev_timeout_evt.evt_listp);
 	if (!list_empty(&ndlp->els_retry_evt.evt_listp))
 		list_del_init(&ndlp->els_retry_evt.evt_listp);
 
@@ -1606,16 +1608,6 @@ lpfc_freenode(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
 int
 lpfc_nlp_remove(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
 {
-	if (ndlp->nlp_flag & NLP_NODEV_TMO) {
-		spin_lock_irq(phba->host->host_lock);
-		ndlp->nlp_flag &= ~NLP_NODEV_TMO;
-		spin_unlock_irq(phba->host->host_lock);
-		del_timer_sync(&ndlp->nlp_tmofunc);
-		if (!list_empty(&ndlp->nodev_timeout_evt.evt_listp))
-			list_del_init(&ndlp->nodev_timeout_evt.evt_listp);
-
-	}
-
 
 	if (ndlp->nlp_flag & NLP_DELAY_TMO) {
 		lpfc_cancel_retry_delay_tmo(phba, ndlp);
@@ -2430,34 +2422,6 @@ lpfc_disc_timeout_handler(struct lpfc_hba *phba)
 	return;
 }
 
-static void
-lpfc_nodev_timeout(unsigned long ptr)
-{
-	struct lpfc_hba *phba;
-	struct lpfc_nodelist *ndlp;
-	unsigned long iflag;
-	struct lpfc_work_evt  *evtp;
-
-	ndlp = (struct lpfc_nodelist *)ptr;
-	phba = ndlp->nlp_phba;
-	evtp = &ndlp->nodev_timeout_evt;
-	spin_lock_irqsave(phba->host->host_lock, iflag);
-
-	if (!list_empty(&evtp->evt_listp)) {
-		spin_unlock_irqrestore(phba->host->host_lock, iflag);
-		return;
-	}
-	evtp->evt_arg1  = ndlp;
-	evtp->evt       = LPFC_EVT_NODEV_TMO;
-	list_add_tail(&evtp->evt_listp, &phba->work_list);
-	if (phba->work_wait)
-		wake_up(phba->work_wait);
-
-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
-	return;
-}
-
-
 /*
  * This routine handles processing a NameServer REG_LOGIN mailbox
  * command upon completion. It is setup in the LPFC_MBOXQ
@@ -2581,11 +2545,7 @@ lpfc_nlp_init(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
 		 uint32_t did)
 {
 	memset(ndlp, 0, sizeof (struct lpfc_nodelist));
-	INIT_LIST_HEAD(&ndlp->nodev_timeout_evt.evt_listp);
 	INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp);
-	init_timer(&ndlp->nlp_tmofunc);
-	ndlp->nlp_tmofunc.function = lpfc_nodev_timeout;
-	ndlp->nlp_tmofunc.data = (unsigned long)ndlp;
 	init_timer(&ndlp->nlp_delayfunc);
 	ndlp->nlp_delayfunc.function = lpfc_els_retry_delay;
 	ndlp->nlp_delayfunc.data = (unsigned long)ndlp;
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 20449a8dd53da..d5f415007db29 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1813,7 +1813,7 @@ lpfc_device_recov_npr_node(struct lpfc_hba * phba,
  */
 /*
  * For a Link Down, all nodes on the ADISC, PLOGI, unmapped or mapped
- * lists will receive a DEVICE_RECOVERY event. If the linkdown or nodev timers
+ * lists will receive a DEVICE_RECOVERY event. If the linkdown or devloss timers
  * expire, all effected nodes will receive a DEVICE_RM event.
  */
 /*
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index a8816a8738f88..97ae98dc95d04 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -935,7 +935,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 			schedule_timeout_uninterruptible(LPFC_ABORT_WAIT*HZ);
 		spin_lock_irq(phba->host->host_lock);
 		if (++loop_count
-		    > (2 * phba->cfg_nodev_tmo)/LPFC_ABORT_WAIT)
+		    > (2 * phba->cfg_devloss_tmo)/LPFC_ABORT_WAIT)
 			break;
 	}
 
@@ -978,7 +978,7 @@ lpfc_reset_lun_handler(struct scsi_cmnd *cmnd)
 	spin_lock_irq(shost->host_lock);
 	/*
 	 * If target is not in a MAPPED state, delay the reset until
-	 * target is rediscovered or nodev timeout expires.
+	 * target is rediscovered or devloss timeout expires.
 	 */
 	while ( 1 ) {
 		if (!pnode)
@@ -1050,7 +1050,7 @@ lpfc_reset_lun_handler(struct scsi_cmnd *cmnd)
 		spin_lock_irq(phba->host->host_lock);
 
 		if (++loopcnt
-		    > (2 * phba->cfg_nodev_tmo)/LPFC_RESET_WAIT)
+		    > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT)
 			break;
 
 		cnt = lpfc_sli_sum_iocb(phba,
@@ -1151,7 +1151,7 @@ lpfc_reset_bus_handler(struct scsi_cmnd *cmnd)
 		spin_lock_irq(phba->host->host_lock);
 
 		if (++loopcnt
-		    > (2 * phba->cfg_nodev_tmo)/LPFC_RESET_WAIT)
+		    > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT)
 			break;
 
 		cnt = lpfc_sli_sum_iocb(phba,
@@ -1249,7 +1249,7 @@ lpfc_slave_configure(struct scsi_device *sdev)
 	 * target pointer is stored in the starget_data for the
 	 * driver's sysfs entry point functions.
 	 */
-	rport->dev_loss_tmo = phba->cfg_nodev_tmo + 5;
+	rport->dev_loss_tmo = phba->cfg_devloss_tmo;
 
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
 		lpfc_sli_poll_fcp_ring(phba);
-- 
GitLab


From c3f28afa61343e3e010e3014aa0d6eba271c1558 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 18 Aug 2006 17:47:18 -0400
Subject: [PATCH 0158/1063] [SCSI] lpfc 8.1.10 : Add support for new lpfc
 soft_wwpn attribute

Add support for a new lpfc soft_wwpn sysfs attribute

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/lpfc/lpfc.h         |   3 +
 drivers/scsi/lpfc/lpfc_attr.c    | 117 +++++++++++++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_hbadisc.c |   2 +
 drivers/scsi/lpfc/lpfc_init.c    |   2 +
 4 files changed, 124 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index efec44d267c7c..3f7f5f8abd751 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -303,6 +303,7 @@ struct lpfc_hba {
 	uint32_t cfg_poll_tmo;
 	uint32_t cfg_sg_seg_cnt;
 	uint32_t cfg_sg_dma_buf_size;
+	uint64_t cfg_soft_wwpn;
 
 	uint32_t dev_loss_tmo_changed;
 
@@ -354,6 +355,8 @@ struct lpfc_hba {
 #define VPD_PORT            0x8         /* valid vpd port data */
 #define VPD_MASK            0xf         /* mask for any vpd data */
 
+	uint8_t soft_wwpn_enable;
+
 	struct timer_list fcp_poll_timer;
 	struct timer_list els_tmofunc;
 
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 0de69324212e7..9496e87c135ea 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -551,6 +551,119 @@ static CLASS_DEVICE_ATTR(board_mode, S_IRUGO | S_IWUSR,
 			 lpfc_board_mode_show, lpfc_board_mode_store);
 static CLASS_DEVICE_ATTR(issue_reset, S_IWUSR, NULL, lpfc_issue_reset);
 
+
+static char *lpfc_soft_wwpn_key = "C99G71SL8032A";
+
+static ssize_t
+lpfc_soft_wwpn_enable_store(struct class_device *cdev, const char *buf,
+				size_t count)
+{
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
+	unsigned int cnt = count;
+
+	/*
+	 * We're doing a simple sanity check for soft_wwpn setting.
+	 * We require that the user write a specific key to enable
+	 * the soft_wwpn attribute to be settable. Once the attribute
+	 * is written, the enable key resets. If further updates are
+	 * desired, the key must be written again to re-enable the
+	 * attribute.
+	 *
+	 * The "key" is not secret - it is a hardcoded string shown
+	 * here. The intent is to protect against the random user or
+	 * application that is just writing attributes.
+	 */
+
+	/* count may include a LF at end of string */
+	if (buf[cnt-1] == '\n')
+		cnt--;
+
+	if ((cnt != strlen(lpfc_soft_wwpn_key)) ||
+	    (strncmp(buf, lpfc_soft_wwpn_key, strlen(lpfc_soft_wwpn_key)) != 0))
+		return -EINVAL;
+
+	phba->soft_wwpn_enable = 1;
+	return count;
+}
+static CLASS_DEVICE_ATTR(lpfc_soft_wwpn_enable, S_IWUSR, NULL,
+				lpfc_soft_wwpn_enable_store);
+
+static ssize_t
+lpfc_soft_wwpn_show(struct class_device *cdev, char *buf)
+{
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
+	return snprintf(buf, PAGE_SIZE, "0x%llx\n", phba->cfg_soft_wwpn);
+}
+
+
+static ssize_t
+lpfc_soft_wwpn_store(struct class_device *cdev, const char *buf, size_t count)
+{
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
+	struct completion online_compl;
+	int stat1=0, stat2=0;
+	unsigned int i, j, cnt=count;
+	u8 wwpn[8];
+
+	/* count may include a LF at end of string */
+	if (buf[cnt-1] == '\n')
+		cnt--;
+
+	if (!phba->soft_wwpn_enable || (cnt < 16) || (cnt > 18) ||
+	    ((cnt == 17) && (*buf++ != 'x')) ||
+	    ((cnt == 18) && ((*buf++ != '0') || (*buf++ != 'x'))))
+		return -EINVAL;
+
+	phba->soft_wwpn_enable = 0;
+
+	memset(wwpn, 0, sizeof(wwpn));
+
+	/* Validate and store the new name */
+	for (i=0, j=0; i < 16; i++) {
+		if ((*buf >= 'a') && (*buf <= 'f'))
+			j = ((j << 4) | ((*buf++ -'a') + 10));
+		else if ((*buf >= 'A') && (*buf <= 'F'))
+			j = ((j << 4) | ((*buf++ -'A') + 10));
+		else if ((*buf >= '0') && (*buf <= '9'))
+			j = ((j << 4) | (*buf++ -'0'));
+		else
+			return -EINVAL;
+		if (i % 2) {
+			wwpn[i/2] = j & 0xff;
+			j = 0;
+		}
+	}
+	phba->cfg_soft_wwpn = wwn_to_u64(wwpn);
+	fc_host_port_name(host) = phba->cfg_soft_wwpn;
+
+	dev_printk(KERN_NOTICE, &phba->pcidev->dev,
+		   "lpfc%d: Reinitializing to use soft_wwpn\n", phba->brd_no);
+
+	init_completion(&online_compl);
+	lpfc_workq_post_event(phba, &stat1, &online_compl, LPFC_EVT_OFFLINE);
+	wait_for_completion(&online_compl);
+	if (stat1)
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"%d:0463 lpfc_soft_wwpn attribute set failed to reinit "
+			"adapter - %d\n", phba->brd_no, stat1);
+
+	init_completion(&online_compl);
+	lpfc_workq_post_event(phba, &stat2, &online_compl, LPFC_EVT_ONLINE);
+	wait_for_completion(&online_compl);
+	if (stat2)
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"%d:0464 lpfc_soft_wwpn attribute set failed to reinit "
+			"adapter - %d\n", phba->brd_no, stat2);
+
+	return (stat1 || stat2) ? -EIO : count;
+}
+static CLASS_DEVICE_ATTR(lpfc_soft_wwpn, S_IRUGO | S_IWUSR,\
+			 lpfc_soft_wwpn_show, lpfc_soft_wwpn_store);
+
+
 static int lpfc_poll = 0;
 module_param(lpfc_poll, int, 0);
 MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:"
@@ -832,6 +945,7 @@ LPFC_ATTR_R(max_luns, 255, 0, 65535,
 LPFC_ATTR_RW(poll_tmo, 10, 1, 255,
 	     "Milliseconds driver will wait between polling FCP ring");
 
+
 struct class_device_attribute *lpfc_host_attrs[] = {
 	&class_device_attr_info,
 	&class_device_attr_serialnum,
@@ -867,6 +981,8 @@ struct class_device_attribute *lpfc_host_attrs[] = {
 	&class_device_attr_issue_reset,
 	&class_device_attr_lpfc_poll,
 	&class_device_attr_lpfc_poll_tmo,
+	&class_device_attr_lpfc_soft_wwpn,
+	&class_device_attr_lpfc_soft_wwpn_enable,
 	NULL,
 };
 
@@ -1668,6 +1784,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_devloss_tmo_init(phba, lpfc_devloss_tmo);
 	lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo);
 	phba->cfg_poll = lpfc_poll;
+	phba->cfg_soft_wwpn = 0L;
 
 	/*
 	 * The total number of segments is the configuration value plus 2
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 97973af980a07..d586c3d3b0d0d 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -672,6 +672,8 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 
 	memcpy((uint8_t *) & phba->fc_sparam, (uint8_t *) mp->virt,
 	       sizeof (struct serv_parm));
+	if (phba->cfg_soft_wwpn)
+		u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn);
 	memcpy((uint8_t *) & phba->fc_nodename,
 	       (uint8_t *) & phba->fc_sparam.nodeName,
 	       sizeof (struct lpfc_name));
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 84e7fc595f5e3..4cdf3464267fd 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -268,6 +268,8 @@ lpfc_config_port_post(struct lpfc_hba * phba)
 	kfree(mp);
 	pmb->context1 = NULL;
 
+	if (phba->cfg_soft_wwpn)
+		u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn);
 	memcpy(&phba->fc_nodename, &phba->fc_sparam.nodeName,
 	       sizeof (struct lpfc_name));
 	memcpy(&phba->fc_portname, &phba->fc_sparam.portName,
-- 
GitLab


From 26dacd0c9b2dc1dc987c376aeee4e80691a7dd0b Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 18 Aug 2006 17:47:24 -0400
Subject: [PATCH 0159/1063] [SCSI] lpfc 8.1.10 : Change version number to
 8.1.10

Change version number to 8.1.10

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/lpfc/lpfc_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index c7091ea29f3f2..ac417908b4071 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.1.9"
+#define LPFC_DRIVER_VERSION "8.1.10"
 
 #define LPFC_DRIVER_NAME "lpfc"
 
-- 
GitLab


From fa6c220a7f01257b1c2c4203d48aaaedb0c4416f Mon Sep 17 00:00:00 2001
From: Aubrey Lee <aubreylee@gmail.com>
Date: Tue, 5 Sep 2006 05:55:07 -0500
Subject: [PATCH 0160/1063] [PATCH] [MTD] DEVICES: Fill more device IDs in the
 structure of m25p80

The flash_info structure has a bunch of missing fields which causes problems
when actually tryin to use some ST parts as it gets detected incorrectly.

Signed-off-by: Aubrey L1 <aubreylee@gmail.com>
Signed-off-by: Josh Boyer <jwboyer@gmail.com>
---
 drivers/mtd/devices/m25p80.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index a8466141e914d..ef4a731ca5c2d 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -406,13 +406,13 @@ struct flash_info {
 
 static struct flash_info __devinitdata m25p_data [] = {
 	/* REVISIT: fill in JEDEC ids, for parts that have them */
-	{ "m25p05", 0x05, 0x0000, 32 * 1024, 2 },
-	{ "m25p10", 0x10, 0x0000, 32 * 1024, 4 },
-	{ "m25p20", 0x11, 0x0000, 64 * 1024, 4 },
-	{ "m25p40", 0x12, 0x0000, 64 * 1024, 8 },
+	{ "m25p05", 0x05, 0x2010, 32 * 1024, 2 },
+	{ "m25p10", 0x10, 0x2011, 32 * 1024, 4 },
+	{ "m25p20", 0x11, 0x2012, 64 * 1024, 4 },
+	{ "m25p40", 0x12, 0x2013, 64 * 1024, 8 },
 	{ "m25p80", 0x13, 0x0000, 64 * 1024, 16 },
-	{ "m25p16", 0x14, 0x0000, 64 * 1024, 32 },
-	{ "m25p32", 0x15, 0x0000, 64 * 1024, 64 },
+	{ "m25p16", 0x14, 0x2015, 64 * 1024, 32 },
+	{ "m25p32", 0x15, 0x2016, 64 * 1024, 64 },
 	{ "m25p64", 0x16, 0x2017, 64 * 1024, 128 },
 };
 
-- 
GitLab


From 3906f4edeef976c081c4e7bd92164d2f59c325ae Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Tue, 5 Sep 2006 17:15:47 -0400
Subject: [PATCH 0161/1063] [CPUFREQ] Fix sparse warning in ondemand

drivers/cpufreq/cpufreq_ondemand.c:323:2: warning: Using plain integer as NULL pointer

Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/cpufreq/cpufreq_ondemand.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 5ca2fd5d1ed1c..bf8aa45d4f019 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -449,7 +449,7 @@ static inline void dbs_timer_init(unsigned int cpu)
 	delay -= jiffies % delay;
 
 	ondemand_powersave_bias_init();
-	INIT_WORK(&dbs_info->work, do_dbs_timer, 0);
+	INIT_WORK(&dbs_info->work, do_dbs_timer, NULL);
 	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
-- 
GitLab


From 8eb7925f93af75e66a240d148efdec212f95bcb7 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 20 Aug 2006 18:48:13 +0400
Subject: [PATCH 0162/1063] [AGPGART] agp.h: constify struct
 agp_bridge_data::version

drivers/char/agp/backend.c: In function `agp_backend_initialize':
drivers/char/agp/backend.c:141: warning: assignment discards qualifiers from pointer target type

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/agp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index 3c623b67ea1cd..8b3317fd46c9a 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -117,7 +117,7 @@ struct agp_bridge_driver {
 };
 
 struct agp_bridge_data {
-	struct agp_version *version;
+	const struct agp_version *version;
 	struct agp_bridge_driver *driver;
 	struct vm_operations_struct *vm_ops;
 	void *previous_size;
-- 
GitLab


From db44aaf3a2f599163c53ce96658aca688b3466f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=B3=20Bilski?= <rafalbilski@interia.pl>
Date: Wed, 16 Aug 2006 01:07:33 +0200
Subject: [PATCH 0163/1063] [CPUFREQ] Longhaul - Add voltage scaling to driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename option "dont_scale_voltage" to "scale_voltage" because
don't will be default.
Use "pos" for calculating voltage. In this way driver don't need
to know mV value or low level value. Simply min U is one pos and
max U is second pos. All pos between these two are used.
Assume that min U is for min f and max U for max f. For frequency
between min and max calculate pos based on difference between
current frequency and min f.
Values in mobile VRM table changed to values from
C3-M datasheet.

Signed-off-by: Rafa³ Bilski <rafalbilski@interia.pl>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/i386/kernel/cpu/cpufreq/longhaul.c | 97 ++++++++++++++-----------
 arch/i386/kernel/cpu/cpufreq/longhaul.h | 48 +++++++++---
 2 files changed, 94 insertions(+), 51 deletions(-)

diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index 43bbf948d45d5..f5cc9f5c9bab3 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -53,19 +53,26 @@
 #define	CPU_NEHEMIAH	5
 
 static int cpu_model;
-static unsigned int numscales=16, numvscales;
+static unsigned int numscales=16;
 static unsigned int fsb;
-static int minvid, maxvid;
+
+static struct mV_pos *vrm_mV_table;
+static unsigned char *mV_vrm_table;
+struct f_msr {
+	unsigned char vrm;
+};
+static struct f_msr f_msr_table[32];
+
+static unsigned int highest_speed, lowest_speed; /* kHz */
 static unsigned int minmult, maxmult;
 static int can_scale_voltage;
-static int vrmrev;
 static struct acpi_processor *pr = NULL;
 static struct acpi_processor_cx *cx = NULL;
-static int port22_en = 0;
+static int port22_en;
 
 /* Module parameters */
-static int dont_scale_voltage;
-static int ignore_latency = 0;
+static int scale_voltage;
+static int ignore_latency;
 
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
 
@@ -73,7 +80,6 @@ static int ignore_latency = 0;
 /* Clock ratios multiplied by 10 */
 static int clock_ratio[32];
 static int eblcr_table[32];
-static int voltage_table[32];
 static unsigned int highest_speed, lowest_speed; /* kHz */
 static int longhaul_version;
 static struct cpufreq_frequency_table *longhaul_table;
@@ -163,6 +169,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
 	longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
 	longhaul.bits.EnableSoftBusRatio = 1;
 
+	if (can_scale_voltage) {
+		longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm;
+		longhaul.bits.EnableSoftVID = 1;
+	}
+
 	/* Sync to timer tick */
 	safe_halt();
 	/* Change frequency on next halt or sleep */
@@ -454,53 +465,57 @@ static int __init longhaul_get_ranges(void)
 static void __init longhaul_setup_voltagescaling(void)
 {
 	union msr_longhaul longhaul;
+	struct mV_pos minvid, maxvid;
+	unsigned int j, speed, pos, kHz_step, numvscales;
 
-	rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
-	if (!(longhaul.bits.RevisionID & 1))
+	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	if (!(longhaul.bits.RevisionID & 1)) {
+		printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
 		return;
+	}
+
+	if (!longhaul.bits.VRMRev) {
+		printk (KERN_INFO PFX "VRM 8.5\n");
+		vrm_mV_table = &vrm85_mV[0];
+		mV_vrm_table = &mV_vrm85[0];
+	} else {
+		printk (KERN_INFO PFX "Mobile VRM\n");
+		vrm_mV_table = &mobilevrm_mV[0];
+		mV_vrm_table = &mV_mobilevrm[0];
+	}
 
-	minvid = longhaul.bits.MinimumVID;
-	maxvid = longhaul.bits.MaximumVID;
-	vrmrev = longhaul.bits.VRMRev;
+	minvid = vrm_mV_table[longhaul.bits.MinimumVID];
+	maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
+	numvscales = maxvid.pos - minvid.pos + 1;
+	kHz_step = (highest_speed - lowest_speed) / numvscales;
 
-	if (minvid == 0 || maxvid == 0) {
+	if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
 		printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
 					"Voltage scaling disabled.\n",
-					minvid/1000, minvid%1000, maxvid/1000, maxvid%1000);
+					minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
-	if (minvid == maxvid) {
+	if (minvid.mV == maxvid.mV) {
 		printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are "
 				"both %d.%03d. Voltage scaling disabled\n",
-				maxvid/1000, maxvid%1000);
+				maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
-	if (vrmrev==0) {
-		dprintk ("VRM 8.5\n");
-		memcpy (voltage_table, vrm85scales, sizeof(voltage_table));
-		numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25;
-	} else {
-		dprintk ("Mobile VRM\n");
-		memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table));
-		numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5;
+	printk(KERN_INFO PFX "Max VID=%d.%03d  Min VID=%d.%03d, %d possible voltage scales\n",
+		maxvid.mV/1000, maxvid.mV%1000,
+		minvid.mV/1000, minvid.mV%1000,
+		numvscales);
+	
+	j = 0;
+	while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
+		speed = longhaul_table[j].frequency;
+		pos = (speed - lowest_speed) / kHz_step + minvid.pos;
+		f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos];
+		j++;
 	}
 
-	/* Current voltage isn't readable at first, so we need to
-	   set it to a known value. The spec says to use maxvid */
-	longhaul.bits.RevisionKey = longhaul.bits.RevisionID;	/* FIXME: This is bad. */
-	longhaul.bits.EnableSoftVID = 1;
-	longhaul.bits.SoftVID = maxvid;
-	wrmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
-	minvid = voltage_table[minvid];
-	maxvid = voltage_table[maxvid];
-
-	dprintk ("Min VID=%d.%03d Max VID=%d.%03d, %d possible voltage scales\n",
-		maxvid/1000, maxvid%1000, minvid/1000, minvid%1000, numvscales);
-
 	can_scale_voltage = 1;
 }
 
@@ -685,7 +700,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 		return ret;
 
 	if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) &&
-		 (dont_scale_voltage==0))
+		 (scale_voltage != 0))
 		longhaul_setup_voltagescaling();
 
 	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
@@ -773,8 +788,8 @@ static void __exit longhaul_exit(void)
 	kfree(longhaul_table);
 }
 
-module_param (dont_scale_voltage, int, 0644);
-MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor");
+module_param (scale_voltage, int, 0644);
+MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
 module_param(ignore_latency, int, 0644);
 MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test");
 
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h
index d3a95d77ee850..bc4682aad69b5 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h
@@ -450,17 +450,45 @@ static int __initdata nehemiah_c_eblcr[32] = {
  * Voltage scales. Div/Mod by 1000 to get actual voltage.
  * Which scale to use depends on the VRM type in use.
  */
-static int __initdata vrm85scales[32] = {
-	1250, 1200, 1150, 1100, 1050, 1800, 1750, 1700,
-	1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300,
-	1275, 1225, 1175, 1125, 1075, 1825, 1775, 1725,
-	1675, 1625, 1575, 1525, 1475, 1425, 1375, 1325,
+
+struct mV_pos {
+	unsigned short mV;
+	unsigned short pos;
+};
+
+static struct mV_pos __initdata vrm85_mV[32] = {
+	{1250, 8},	{1200, 6},	{1150, 4},	{1100, 2},
+	{1050, 0},	{1800, 30},	{1750, 28},	{1700, 26},
+	{1650, 24},	{1600, 22},	{1550, 20},	{1500, 18},
+	{1450, 16},	{1400, 14},	{1350, 12},	{1300, 10},
+	{1275, 9},	{1225, 7},	{1175, 5},	{1125, 3},
+	{1075, 1},	{1825, 31},	{1775, 29},	{1725, 27},
+	{1675, 25},	{1625, 23},	{1575, 21},	{1525, 19},
+	{1475, 17},	{1425, 15},	{1375, 13},	{1325, 11}
+};
+
+static unsigned char __initdata mV_vrm85[32] = {
+	0x04,	0x14,	0x03,	0x13,	0x02,	0x12,	0x01,	0x11,
+	0x00,	0x10,	0x0f,	0x1f,	0x0e,	0x1e,	0x0d,	0x1d,
+	0x0c,	0x1c,	0x0b,	0x1b,	0x0a,	0x1a,	0x09,	0x19,
+	0x08,	0x18,	0x07,	0x17,	0x06,	0x16,	0x05,	0x15
+};
+
+static struct mV_pos __initdata mobilevrm_mV[32] = {
+	{1750, 31},	{1700, 30},	{1650, 29},	{1600, 28},
+	{1550, 27},	{1500, 26},	{1450, 25},	{1400, 24},
+	{1350, 23},	{1300, 22},	{1250, 21},	{1200, 20},
+	{1150, 19},	{1100, 18},	{1050, 17},	{1000, 16},
+	{975, 15},	{950, 14},	{925, 13},	{900, 12},
+	{875, 11},	{850, 10},	{825, 9},	{800, 8},
+	{775, 7},	{750, 6},	{725, 5},	{700, 4},
+	{675, 3},	{650, 2},	{625, 1},	{600, 0}
 };
 
-static int __initdata mobilevrmscales[32] = {
-	2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
-	1600, 1550, 1500, 1450, 1500, 1350, 1300, -1,
-	1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
-	1075, 1050, 1025, 1000, 975, 950, 925, -1,
+static unsigned char __initdata mV_mobilevrm[32] = {
+	0x1f,	0x1e,	0x1d,	0x1c,	0x1b,	0x1a,	0x19,	0x18,
+	0x17,	0x16,	0x15,	0x14,	0x13,	0x12,	0x11,	0x10,
+	0x0f,	0x0e,	0x0d,	0x0c,	0x0b,	0x0a,	0x09,	0x08,
+	0x07,	0x06,	0x05,	0x04,	0x03,	0x02,	0x01,	0x00
 };
 
-- 
GitLab


From 8adcc0c674004c0f9467031a93dc639c2b01411f Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Fri, 1 Sep 2006 14:02:24 -0700
Subject: [PATCH 0164/1063] [CPUFREQ] Workaround for BIOS bug in software
 coordination of frequency

Some buggy BIOSes do a "software any" kind of coordination without telling
about it to OS. So, when OS sets frequency on one CPU on these platforms,
it will also impact all the other logical CPUs that are in the same power
domain. Attached patch is a workaround for those buggy BIOSes.
Patch should be a noop on the normal non-buggy platforms.

Applies over previously sent acpi-cpufreq and software coordination
bug fix patch

Signed-off-by: Denis Sadykov <denis.m.sadykov@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c   | 39 ++++++++++++++++-
 .../kernel/cpu/cpufreq/speedstep-centrino.c   | 42 ++++++++++++++++++-
 2 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index e6ea00edcb544..ea19d091fd41e 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -32,6 +32,7 @@
 #include <linux/seq_file.h>
 #include <linux/compiler.h>
 #include <linux/sched.h>	/* current */
+#include <linux/dmi.h>
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/uaccess.h>
@@ -387,6 +388,33 @@ static int acpi_cpufreq_early_init_acpi(void)
 	return acpi_processor_preregister_performance(acpi_perf_data);
 }
 
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+
+static int __init sw_any_bug_found(struct dmi_system_id *d)
+{
+	bios_with_sw_any_bug = 1;
+	return 0;
+}
+
+static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = {
+	{
+		.callback = sw_any_bug_found,
+		.ident = "Supermicro Server X6DLP",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+		},
+	},
+	{ }
+};
+
 static int
 acpi_cpufreq_cpu_init (
 	struct cpufreq_policy   *policy)
@@ -422,8 +450,17 @@ acpi_cpufreq_cpu_init (
 	 * coordination is required.
 	 */
 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
 		policy->cpus = perf->shared_cpu_map;
+	}
+
+#ifdef CONFIG_SMP
+	dmi_check_system(sw_any_bug_dmi_table);
+	if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+		policy->cpus = cpu_core_map[cpu];
+	}
+#endif
 
 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
 		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index b77f1358bd79e..dba6bb28d2981 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -23,6 +23,7 @@
 
 #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <acpi/processor.h>
 #endif
 
@@ -377,6 +378,35 @@ static int centrino_cpu_early_init_acpi(void)
 	return 0;
 }
 
+
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+static int __init sw_any_bug_found(struct dmi_system_id *d)
+{
+	bios_with_sw_any_bug = 1;
+	return 0;
+}
+
+
+static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = {
+	{
+		.callback = sw_any_bug_found,
+		.ident = "Supermicro Server X6DLP",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+		},
+	},
+	{ }
+};
+
+
 /*
  * centrino_cpu_init_acpi - register with ACPI P-States library
  *
@@ -398,14 +428,24 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
 		dprintk(PFX "obtaining ACPI data failed\n");
 		return -EIO;
 	}
+
 	policy->shared_type = p->shared_type;
 	/*
 	 * Will let policy->cpus know about dependency only when software 
 	 * coordination is required.
 	 */
 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
 		policy->cpus = p->shared_cpu_map;
+	}
+
+#ifdef CONFIG_SMP
+	dmi_check_system(sw_any_bug_dmi_table);
+	if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+		policy->cpus = cpu_core_map[cpu];
+	}
+#endif
 
 	/* verify the acpi_data */
 	if (p->state_count <= 1) {
-- 
GitLab


From 65c25aadfa4e917060e99fe459f33a6a07db53cc Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 6 Sep 2006 11:57:18 -0400
Subject: [PATCH 0165/1063] [AGPGART] Intel 965 Express support.

From: Alan Hourihane <alanh@tungstengraphics.com>
From: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/intel-agp.c | 163 ++++++++++++++++++++++++++++++++---
 1 file changed, 152 insertions(+), 11 deletions(-)

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 42a1cb871992a..a425f27af9ea2 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -2,14 +2,6 @@
  * Intel AGPGART routines.
  */
 
-/*
- * Intel(R) 855GM/852GM and 865G support added by David Dawes
- * <dawes@tungstengraphics.com>.
- *
- * Intel(R) 915G/915GM support added by Alan Hourihane
- * <alanh@tungstengraphics.com>.
- */
-
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/init.h>
@@ -17,6 +9,21 @@
 #include <linux/agp_backend.h>
 #include "agp.h"
 
+#define PCI_DEVICE_ID_INTEL_82946GZ_HB      0x2970
+#define PCI_DEVICE_ID_INTEL_82946GZ_IG      0x2972
+#define PCI_DEVICE_ID_INTEL_82965G_1_HB     0x2980
+#define PCI_DEVICE_ID_INTEL_82965G_1_IG     0x2982
+#define PCI_DEVICE_ID_INTEL_82965Q_HB       0x2990
+#define PCI_DEVICE_ID_INTEL_82965Q_IG       0x2992
+#define PCI_DEVICE_ID_INTEL_82965G_HB       0x29A0
+#define PCI_DEVICE_ID_INTEL_82965G_IG       0x29A2
+
+#define IS_I965 (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82946GZ_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_1_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965Q_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_HB)
+
+
 /* Intel 815 register */
 #define INTEL_815_APCONT	0x51
 #define INTEL_815_ATTBASE_MASK	~0x1FFFFFFF
@@ -40,6 +47,8 @@
 #define I915_GMCH_GMS_STOLEN_48M	(0x6 << 4)
 #define I915_GMCH_GMS_STOLEN_64M	(0x7 << 4)
 
+/* Intel 965G registers */
+#define I965_MSAC 0x62
 
 /* Intel 7505 registers */
 #define INTEL_I7505_APSIZE	0x74
@@ -354,6 +363,7 @@ static struct aper_size_info_fixed intel_i830_sizes[] =
 	/* The 64M mode still requires a 128k gatt */
 	{64, 16384, 5},
 	{256, 65536, 6},
+	{512, 131072, 7},
 };
 
 static struct _intel_i830_private {
@@ -377,7 +387,11 @@ static void intel_i830_init_gtt_entries(void)
 	/* We obtain the size of the GTT, which is also stored (for some
 	 * reason) at the top of stolen memory. Then we add 4KB to that
 	 * for the video BIOS popup, which is also stored in there. */
-	size = agp_bridge->driver->fetch_size() + 4;
+
+	if (IS_I965)
+		size = 512 + 4;
+	else
+		size = agp_bridge->driver->fetch_size() + 4;
 
 	if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82830_HB ||
 	    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82845G_HB) {
@@ -423,7 +437,7 @@ static void intel_i830_init_gtt_entries(void)
 			if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
 			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
 			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB)
+			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || IS_I965 )
 				gtt_entries = MB(48) - KB(size);
 			else
 				gtt_entries = 0;
@@ -433,7 +447,7 @@ static void intel_i830_init_gtt_entries(void)
 			if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
 			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
 			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB)
+			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || IS_I965)
 				gtt_entries = MB(64) - KB(size);
 			else
 				gtt_entries = 0;
@@ -791,6 +805,77 @@ static int intel_i915_create_gatt_table(struct agp_bridge_data *bridge)
 
 	return 0;
 }
+static int intel_i965_fetch_size(void)
+{
+       struct aper_size_info_fixed *values;
+       u32 offset = 0;
+       u8 temp;
+
+#define I965_512MB_ADDRESS_MASK (3<<1)
+
+       values = A_SIZE_FIX(agp_bridge->driver->aperture_sizes);
+
+       pci_read_config_byte(intel_i830_private.i830_dev, I965_MSAC, &temp);
+       temp &= I965_512MB_ADDRESS_MASK;
+       switch (temp) {
+       case 0x00:
+               offset = 0; /* 128MB */
+               break;
+       case 0x06:
+               offset = 3; /* 512MB */
+               break;
+       default:
+       case 0x02:
+               offset = 2; /* 256MB */
+               break;
+       }
+
+       agp_bridge->previous_size = agp_bridge->current_size = (void *)(values + offset);
+
+       return values[offset].size;
+}
+
+/* The intel i965 automatically initializes the agp aperture during POST.
++ * Use the memory already set aside for in the GTT.
++ */
+static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge)
+{
+       int page_order;
+       struct aper_size_info_fixed *size;
+       int num_entries;
+       u32 temp;
+
+       size = agp_bridge->current_size;
+       page_order = size->page_order;
+       num_entries = size->num_entries;
+       agp_bridge->gatt_table_real = NULL;
+
+       pci_read_config_dword(intel_i830_private.i830_dev, I915_MMADDR, &temp);
+
+       temp &= 0xfff00000;
+       intel_i830_private.gtt = ioremap((temp + (512 * 1024)) , 512 * 1024);
+
+       if (!intel_i830_private.gtt)
+               return -ENOMEM;
+
+
+       intel_i830_private.registers = ioremap(temp,128 * 4096);
+       if (!intel_i830_private.registers)
+               return -ENOMEM;
+
+       temp = readl(intel_i830_private.registers+I810_PGETBL_CTL) & 0xfffff000;
+       global_cache_flush();   /* FIXME: ? */
+
+       /* we have to call this as early as possible after the MMIO base address is known */
+       intel_i830_init_gtt_entries();
+
+       agp_bridge->gatt_table = NULL;
+
+       agp_bridge->gatt_bus_addr = temp;
+
+       return 0;
+}
+
 
 static int intel_fetch_size(void)
 {
@@ -1489,6 +1574,29 @@ static struct agp_bridge_driver intel_915_driver = {
 	.agp_destroy_page	= agp_generic_destroy_page,
 };
 
+static struct agp_bridge_driver intel_i965_driver = {
+       .owner                  = THIS_MODULE,
+       .aperture_sizes         = intel_i830_sizes,
+       .size_type              = FIXED_APER_SIZE,
+       .num_aperture_sizes     = 4,
+       .needs_scratch_page     = TRUE,
+       .configure              = intel_i915_configure,
+       .fetch_size             = intel_i965_fetch_size,
+       .cleanup                = intel_i915_cleanup,
+       .tlb_flush              = intel_i810_tlbflush,
+       .mask_memory            = intel_i810_mask_memory,
+       .masks                  = intel_i810_masks,
+       .agp_enable             = intel_i810_agp_enable,
+       .cache_flush            = global_cache_flush,
+       .create_gatt_table      = intel_i965_create_gatt_table,
+       .free_gatt_table        = intel_i830_free_gatt_table,
+       .insert_memory          = intel_i915_insert_entries,
+       .remove_memory          = intel_i915_remove_entries,
+       .alloc_by_type          = intel_i830_alloc_by_type,
+       .free_by_type           = intel_i810_free_by_type,
+       .agp_alloc_page         = agp_generic_alloc_page,
+       .agp_destroy_page       = agp_generic_destroy_page,
+};
 
 static struct agp_bridge_driver intel_7505_driver = {
 	.owner			= THIS_MODULE,
@@ -1684,6 +1792,35 @@ static int __devinit agp_intel_probe(struct pci_dev *pdev,
 			bridge->driver = &intel_845_driver;
 		name = "945GM";
 		break;
+	case PCI_DEVICE_ID_INTEL_82946GZ_HB:
+		if (find_i830(PCI_DEVICE_ID_INTEL_82946GZ_IG))
+			bridge->driver = &intel_i965_driver;
+		else
+			bridge->driver = &intel_845_driver;
+		name = "946GZ";
+		break;
+	case PCI_DEVICE_ID_INTEL_82965G_1_HB:
+		if (find_i830(PCI_DEVICE_ID_INTEL_82965G_1_IG))
+			bridge->driver = &intel_i965_driver;
+		else
+			bridge->driver = &intel_845_driver;
+		name = "965G";
+		break;
+	case PCI_DEVICE_ID_INTEL_82965Q_HB:
+		if (find_i830(PCI_DEVICE_ID_INTEL_82965Q_IG))
+			bridge->driver = &intel_i965_driver;
+		else
+			bridge->driver = &intel_845_driver;
+		name = "965Q";
+		break;
+	case PCI_DEVICE_ID_INTEL_82965G_HB:
+		if (find_i830(PCI_DEVICE_ID_INTEL_82965G_IG))
+			bridge->driver = &intel_i965_driver;
+		else
+			bridge->driver = &intel_845_driver;
+		name = "965G";
+		break;
+
 	case PCI_DEVICE_ID_INTEL_7505_0:
 		bridge->driver = &intel_7505_driver;
 		name = "E7505";
@@ -1827,6 +1964,10 @@ static struct pci_device_id agp_intel_pci_table[] = {
 	ID(PCI_DEVICE_ID_INTEL_82915GM_HB),
 	ID(PCI_DEVICE_ID_INTEL_82945G_HB),
 	ID(PCI_DEVICE_ID_INTEL_82945GM_HB),
+	ID(PCI_DEVICE_ID_INTEL_82946GZ_HB),
+	ID(PCI_DEVICE_ID_INTEL_82965G_1_HB),
+	ID(PCI_DEVICE_ID_INTEL_82965Q_HB),
+	ID(PCI_DEVICE_ID_INTEL_82965G_HB),
 	{ }
 };
 
-- 
GitLab


From c14635eb4e591c61e419c065df1fdacf9ff90c00 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Wed, 6 Sep 2006 11:59:35 -0400
Subject: [PATCH 0166/1063] [AGPGART] Fix number of aperture sizes in 830 gart
 structs.

Spotted by Eric Anholt.
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/intel-agp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index a425f27af9ea2..42c7d8dec6350 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -1392,7 +1392,7 @@ static struct agp_bridge_driver intel_830_driver = {
 	.owner			= THIS_MODULE,
 	.aperture_sizes		= intel_i830_sizes,
 	.size_type		= FIXED_APER_SIZE,
-	.num_aperture_sizes	= 3,
+	.num_aperture_sizes	= 4,
 	.needs_scratch_page	= TRUE,
 	.configure		= intel_i830_configure,
 	.fetch_size		= intel_i830_fetch_size,
@@ -1554,7 +1554,7 @@ static struct agp_bridge_driver intel_915_driver = {
 	.owner			= THIS_MODULE,
 	.aperture_sizes		= intel_i830_sizes,
 	.size_type		= FIXED_APER_SIZE,
-	.num_aperture_sizes	= 3,
+	.num_aperture_sizes	= 4,
 	.needs_scratch_page	= TRUE,
 	.configure		= intel_i915_configure,
 	.fetch_size		= intel_i915_fetch_size,
-- 
GitLab


From 884d25cc4fda20908fd4ef93dbb41d817984b68b Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Tue, 5 Sep 2006 16:26:41 -0500
Subject: [PATCH 0167/1063] [SCSI] Fix refcount breakage with 'echo "1" > scan'
 when target already present

Spotted by: Dan Aloni <da-xx@monatomic.org>

The problem is there's inconsistent locking semantic usage of
scsi_alloc_target().  Two callers assume the target comes back with
reference unincremented and the third assumes its incremented.  Fix by
always making the reference incremented on return.  Also fix path in
target alloc that could consistently increment the parent lock.
Finally document scsi_alloc_target() so its callers know what the
expectations are.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_scan.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 31d05ab0b2fc3..fd9e281c3bfee 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -266,6 +266,18 @@ static struct scsi_target *__scsi_find_target(struct device *parent,
 	return found_starget;
 }
 
+/**
+ * scsi_alloc_target - allocate a new or find an existing target
+ * @parent:	parent of the target (need not be a scsi host)
+ * @channel:	target channel number (zero if no channels)
+ * @id:		target id number
+ *
+ * Return an existing target if one exists, provided it hasn't already
+ * gone into STARGET_DEL state, otherwise allocate a new target.
+ *
+ * The target is returned with an incremented reference, so the caller
+ * is responsible for both reaping and doing a last put
+ */
 static struct scsi_target *scsi_alloc_target(struct device *parent,
 					     int channel, uint id)
 {
@@ -331,14 +343,15 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 			return NULL;
 		}
 	}
+	get_device(dev);
 
 	return starget;
 
  found:
 	found_target->reap_ref++;
 	spin_unlock_irqrestore(shost->host_lock, flags);
-	put_device(parent);
 	if (found_target->state != STARGET_DEL) {
+		put_device(parent);
 		kfree(starget);
 		return found_target;
 	}
@@ -1341,7 +1354,6 @@ struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel,
 	if (!starget)
 		return ERR_PTR(-ENOMEM);
 
-	get_device(&starget->dev);
 	mutex_lock(&shost->scan_mutex);
 	if (scsi_host_scan_allowed(shost))
 		scsi_probe_and_add_lun(starget, lun, NULL, &sdev, 1, hostdata);
@@ -1400,7 +1412,6 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
 	if (!starget)
 		return;
 
-	get_device(&starget->dev);
 	if (lun != SCAN_WILD_CARD) {
 		/*
 		 * Scan for a specific host/chan/id/lun.
@@ -1582,7 +1593,8 @@ struct scsi_device *scsi_get_host_dev(struct Scsi_Host *shost)
 	if (sdev) {
 		sdev->sdev_gendev.parent = get_device(&starget->dev);
 		sdev->borken = 0;
-	}
+	} else
+		scsi_target_reap(starget);
 	put_device(&starget->dev);
  out:
 	mutex_unlock(&shost->scan_mutex);
-- 
GitLab


From f479ab87936563a286b8aa0e39003c40fa31c6da Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Wed, 6 Sep 2006 09:00:29 -0500
Subject: [PATCH 0168/1063] [SCSI] fix up non-modular SCSI

The recent change to the way scsi_device_get()/put() work broke the
non modular build (we do a module_refcount on a NULL).  Fix this by
checking for non-null before checking module_refcount().

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index eedfd059b82b8..c35f5fc0d6680 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -873,10 +873,12 @@ EXPORT_SYMBOL(scsi_device_get);
  */
 void scsi_device_put(struct scsi_device *sdev)
 {
+	struct module *module = sdev->host->hostt->module;
+
 	/* The module refcount will be zero if scsi_device_get()
 	 * was called from a module removal routine */
-	if (likely(module_refcount(sdev->host->hostt->module) != 0))
-		module_put(sdev->host->hostt->module);
+	if (module && module_refcount(module) != 0)
+		module_put(module);
 	put_device(&sdev->sdev_gendev);
 }
 EXPORT_SYMBOL(scsi_device_put);
-- 
GitLab


From b4620233d6a3510564c561a5a2a365a1d8a34b68 Mon Sep 17 00:00:00 2001
From: Henrik Kretzschmar <henne@nachtwindheim.de>
Date: Wed, 6 Sep 2006 10:49:48 +0200
Subject: [PATCH 0169/1063] [SCSI] scsi-driver ultrastore replace Scsi_Cmnd
 with struct scsi_cmnd

Signed-off-by: Henrik Kretzschmar <henne@nachtwindheim.de>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/ultrastor.c | 23 ++++++++++++-----------
 drivers/scsi/ultrastor.h | 12 +++++++-----
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/drivers/scsi/ultrastor.c b/drivers/scsi/ultrastor.c
index e681681ab7a2c..0372aa9fa1903 100644
--- a/drivers/scsi/ultrastor.c
+++ b/drivers/scsi/ultrastor.c
@@ -196,8 +196,8 @@ struct mscp {
   u32 sense_data PACKED;
   /* The following fields are for software only.  They are included in
      the MSCP structure because they are associated with SCSI requests.  */
-  void (*done)(Scsi_Cmnd *);
-  Scsi_Cmnd *SCint;
+  void (*done) (struct scsi_cmnd *);
+  struct scsi_cmnd *SCint;
   ultrastor_sg_list sglist[ULTRASTOR_24F_MAX_SG]; /* use larger size for 24F */
 };
 
@@ -289,7 +289,7 @@ static const unsigned short ultrastor_ports_14f[] = {
 
 static void ultrastor_interrupt(int, void *, struct pt_regs *);
 static irqreturn_t do_ultrastor_interrupt(int, void *, struct pt_regs *);
-static inline void build_sg_list(struct mscp *, Scsi_Cmnd *SCpnt);
+static inline void build_sg_list(struct mscp *, struct scsi_cmnd *SCpnt);
 
 
 /* Always called with host lock held */
@@ -673,7 +673,7 @@ static const char *ultrastor_info(struct Scsi_Host * shpnt)
     return buf;
 }
 
-static inline void build_sg_list(struct mscp *mscp, Scsi_Cmnd *SCpnt)
+static inline void build_sg_list(struct mscp *mscp, struct scsi_cmnd *SCpnt)
 {
 	struct scatterlist *sl;
 	long transfer_length = 0;
@@ -694,7 +694,8 @@ static inline void build_sg_list(struct mscp *mscp, Scsi_Cmnd *SCpnt)
 	mscp->transfer_data_length = transfer_length;
 }
 
-static int ultrastor_queuecommand(Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *))
+static int ultrastor_queuecommand(struct scsi_cmnd *SCpnt,
+				void (*done) (struct scsi_cmnd *))
 {
     struct mscp *my_mscp;
 #if ULTRASTOR_MAX_CMDS > 1
@@ -833,7 +834,7 @@ static int ultrastor_queuecommand(Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *))
 
  */
 
-static int ultrastor_abort(Scsi_Cmnd *SCpnt)
+static int ultrastor_abort(struct scsi_cmnd *SCpnt)
 {
 #if ULTRASTOR_DEBUG & UD_ABORT
     char out[108];
@@ -843,7 +844,7 @@ static int ultrastor_abort(Scsi_Cmnd *SCpnt)
     unsigned int mscp_index;
     unsigned char old_aborted;
     unsigned long flags;
-    void (*done)(Scsi_Cmnd *);
+    void (*done)(struct scsi_cmnd *);
     struct Scsi_Host *host = SCpnt->device->host;
 
     if(config.slot) 
@@ -960,7 +961,7 @@ static int ultrastor_abort(Scsi_Cmnd *SCpnt)
     return SUCCESS;
 }
 
-static int ultrastor_host_reset(Scsi_Cmnd * SCpnt)
+static int ultrastor_host_reset(struct scsi_cmnd * SCpnt)
 {
     unsigned long flags;
     int i;
@@ -1045,8 +1046,8 @@ static void ultrastor_interrupt(int irq, void *dev_id, struct pt_regs *regs)
     unsigned int mscp_index;
 #endif
     struct mscp *mscp;
-    void (*done)(Scsi_Cmnd *);
-    Scsi_Cmnd *SCtmp;
+    void (*done) (struct scsi_cmnd *);
+    struct scsi_cmnd *SCtmp;
 
 #if ULTRASTOR_MAX_CMDS == 1
     mscp = &config.mscp[0];
@@ -1079,7 +1080,7 @@ static void ultrastor_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	    return;
 	}
 	if (icm_status == 3) {
-	    void (*done)(Scsi_Cmnd *) = mscp->done;
+	    void (*done)(struct scsi_cmnd *) = mscp->done;
 	    if (done) {
 		mscp->done = NULL;
 		mscp->SCint->result = DID_ABORT << 16;
diff --git a/drivers/scsi/ultrastor.h b/drivers/scsi/ultrastor.h
index da759a11deffe..a692905f95f76 100644
--- a/drivers/scsi/ultrastor.h
+++ b/drivers/scsi/ultrastor.h
@@ -14,11 +14,13 @@
 #define _ULTRASTOR_H
 
 static int ultrastor_detect(struct scsi_host_template *);
-static const char *ultrastor_info(struct Scsi_Host * shpnt);
-static int ultrastor_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
-static int ultrastor_abort(Scsi_Cmnd *);
-static int ultrastor_host_reset(Scsi_Cmnd *);
-static int ultrastor_biosparam(struct scsi_device *, struct block_device *, sector_t, int *);
+static const char *ultrastor_info(struct Scsi_Host *shpnt);
+static int ultrastor_queuecommand(struct scsi_cmnd *,
+				void (*done)(struct scsi_cmnd *));
+static int ultrastor_abort(struct scsi_cmnd *);
+static int ultrastor_host_reset(struct scsi_cmnd *);
+static int ultrastor_biosparam(struct scsi_device *, struct block_device *,
+				sector_t, int *);
 
 
 #define ULTRASTOR_14F_MAX_SG 16
-- 
GitLab


From 88edf74610bd894b93438f389688bc8b4a2d3414 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Wed, 6 Sep 2006 17:36:13 -0500
Subject: [PATCH 0170/1063] [SCSI] SAS: consolidate linkspeed definitions

At the moment we have two separate linkspeed enumerations covering
roughly the same values.  This patch consolidates on a single one enum
sas_linkspeed in scsi_transport_sas.h and uses it everywhere in the
aic94xx driver.  Eventually I'll get around to removing the duplicated
fields in asd_sas_phy and sas_phy ...

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aic94xx/aic94xx_hwi.c  |  2 +-
 drivers/scsi/aic94xx/aic94xx_init.c | 12 ++++++++----
 drivers/scsi/aic94xx/aic94xx_scb.c  | 26 +++++++++++++-------------
 drivers/scsi/libsas/sas_expander.c  | 27 +++++++--------------------
 drivers/scsi/libsas/sas_internal.h  |  2 +-
 include/scsi/libsas.h               | 14 +++++++-------
 include/scsi/sas.h                  | 14 --------------
 include/scsi/scsi_transport_sas.h   | 26 +++++++++++++++++---------
 8 files changed, 54 insertions(+), 69 deletions(-)

diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c
index 075cea85b56b6..a242013511081 100644
--- a/drivers/scsi/aic94xx/aic94xx_hwi.c
+++ b/drivers/scsi/aic94xx/aic94xx_hwi.c
@@ -96,7 +96,7 @@ static int asd_init_phy(struct asd_phy *phy)
 	sas_phy->type = PHY_TYPE_PHYSICAL;
 	sas_phy->role = PHY_ROLE_INITIATOR;
 	sas_phy->oob_mode = OOB_NOT_CONNECTED;
-	sas_phy->linkrate = PHY_LINKRATE_NONE;
+	sas_phy->linkrate = SAS_LINK_RATE_UNKNOWN;
 
 	phy->id_frm_tok = asd_alloc_coherent(asd_ha,
 					     sizeof(*phy->identify_frame),
diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
index 69aa708875302..302b54fddf3cf 100644
--- a/drivers/scsi/aic94xx/aic94xx_init.c
+++ b/drivers/scsi/aic94xx/aic94xx_init.c
@@ -240,10 +240,14 @@ static int __devinit asd_common_setup(struct asd_ha_struct *asd_ha)
 	/* All phys are enabled, by default. */
 	asd_ha->hw_prof.enabled_phys = 0xFF;
 	for (i = 0; i < ASD_MAX_PHYS; i++) {
-		asd_ha->hw_prof.phy_desc[i].max_sas_lrate = PHY_LINKRATE_3;
-		asd_ha->hw_prof.phy_desc[i].min_sas_lrate = PHY_LINKRATE_1_5;
-		asd_ha->hw_prof.phy_desc[i].max_sata_lrate= PHY_LINKRATE_1_5;
-		asd_ha->hw_prof.phy_desc[i].min_sata_lrate= PHY_LINKRATE_1_5;
+		asd_ha->hw_prof.phy_desc[i].max_sas_lrate =
+			SAS_LINK_RATE_3_0_GBPS;
+		asd_ha->hw_prof.phy_desc[i].min_sas_lrate =
+			SAS_LINK_RATE_1_5_GBPS;
+		asd_ha->hw_prof.phy_desc[i].max_sata_lrate =
+			SAS_LINK_RATE_1_5_GBPS;
+		asd_ha->hw_prof.phy_desc[i].min_sata_lrate =
+			SAS_LINK_RATE_1_5_GBPS;
 	}
 
 	return 0;
diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c
index fc1b7438a9132..ef8ca08b545fd 100644
--- a/drivers/scsi/aic94xx/aic94xx_scb.c
+++ b/drivers/scsi/aic94xx/aic94xx_scb.c
@@ -55,15 +55,15 @@ static inline void get_lrate_mode(struct asd_phy *phy, u8 oob_mode)
 	switch (oob_mode & 7) {
 	case PHY_SPEED_60:
 		/* FIXME: sas transport class doesn't have this */
-		phy->sas_phy.linkrate = PHY_LINKRATE_6;
+		phy->sas_phy.linkrate = SAS_LINK_RATE_6_0_GBPS;
 		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS;
 		break;
 	case PHY_SPEED_30:
-		phy->sas_phy.linkrate = PHY_LINKRATE_3;
+		phy->sas_phy.linkrate = SAS_LINK_RATE_3_0_GBPS;
 		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS;
 		break;
 	case PHY_SPEED_15:
-		phy->sas_phy.linkrate = PHY_LINKRATE_1_5;
+		phy->sas_phy.linkrate = SAS_LINK_RATE_1_5_GBPS;
 		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS;
 		break;
 	}
@@ -540,39 +540,39 @@ static inline void set_speed_mask(u8 *speed_mask, struct asd_phy_desc *pd)
 		| SATA_SPEED_30_DIS | SATA_SPEED_15_DIS;
 
 	switch (pd->max_sas_lrate) {
-	case PHY_LINKRATE_6:
+	case SAS_LINK_RATE_6_0_GBPS:
 		*speed_mask &= ~SAS_SPEED_60_DIS;
 	default:
-	case PHY_LINKRATE_3:
+	case SAS_LINK_RATE_3_0_GBPS:
 		*speed_mask &= ~SAS_SPEED_30_DIS;
-	case PHY_LINKRATE_1_5:
+	case SAS_LINK_RATE_1_5_GBPS:
 		*speed_mask &= ~SAS_SPEED_15_DIS;
 	}
 
 	switch (pd->min_sas_lrate) {
-	case PHY_LINKRATE_6:
+	case SAS_LINK_RATE_6_0_GBPS:
 		*speed_mask |= SAS_SPEED_30_DIS;
-	case PHY_LINKRATE_3:
+	case SAS_LINK_RATE_3_0_GBPS:
 		*speed_mask |= SAS_SPEED_15_DIS;
 	default:
-	case PHY_LINKRATE_1_5:
+	case SAS_LINK_RATE_1_5_GBPS:
 		/* nothing to do */
 		;
 	}
 
 	switch (pd->max_sata_lrate) {
-	case PHY_LINKRATE_3:
+	case SAS_LINK_RATE_3_0_GBPS:
 		*speed_mask &= ~SATA_SPEED_30_DIS;
 	default:
-	case PHY_LINKRATE_1_5:
+	case SAS_LINK_RATE_1_5_GBPS:
 		*speed_mask &= ~SATA_SPEED_15_DIS;
 	}
 
 	switch (pd->min_sata_lrate) {
-	case PHY_LINKRATE_3:
+	case SAS_LINK_RATE_3_0_GBPS:
 		*speed_mask |= SATA_SPEED_15_DIS;
 	default:
-	case PHY_LINKRATE_1_5:
+	case SAS_LINK_RATE_1_5_GBPS:
 		/* nothing to do */
 		;
 	}
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index b653a263f76a7..02e796ee027ee 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -191,20 +191,7 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id,
 	phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
 	phy->phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
 	phy->phy->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS;
-	switch (phy->linkrate) {
-	case PHY_LINKRATE_1_5:
-		phy->phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS;
-		break;
-	case PHY_LINKRATE_3:
-		phy->phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS;
-		break;
-	case PHY_LINKRATE_6:
-		phy->phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS;
-		break;
-	default:
-		phy->phy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN;
-		break;
-	}
+	phy->phy->negotiated_linkrate = phy->linkrate;
 
 	if (!rediscover)
 		sas_phy_add(phy->phy);
@@ -450,7 +437,7 @@ static void sas_ex_disable_phy(struct domain_device *dev, int phy_id)
 	struct ex_phy *phy = &ex->ex_phy[phy_id];
 
 	sas_smp_phy_control(dev, phy_id, PHY_FUNC_DISABLE);
-	phy->linkrate = PHY_DISABLED;
+	phy->linkrate = SAS_PHY_DISABLED;
 }
 
 static void sas_ex_disable_port(struct domain_device *dev, u8 *sas_addr)
@@ -743,7 +730,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
 	int res = 0;
 
 	/* Phy state */
-	if (ex_phy->linkrate == PHY_SPINUP_HOLD) {
+	if (ex_phy->linkrate == SAS_SATA_SPINUP_HOLD) {
 		if (!sas_smp_phy_control(dev, phy_id, PHY_FUNC_LINK_RESET))
 			res = sas_ex_phy_discover(dev, phy_id);
 		if (res)
@@ -773,7 +760,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
 			sas_configure_routing(dev, ex_phy->attached_sas_addr);
 		}
 		return 0;
-	} else if (ex_phy->linkrate == PHY_LINKRATE_UNKNOWN)
+	} else if (ex_phy->linkrate == SAS_LINK_RATE_UNKNOWN)
 		return 0;
 
 	if (ex_phy->attached_dev_type != SAS_END_DEV &&
@@ -922,9 +909,9 @@ static int sas_ex_discover_devices(struct domain_device *dev, int single)
 			continue;
 
 		switch (ex_phy->linkrate) {
-		case PHY_DISABLED:
-		case PHY_RESET_PROBLEM:
-		case PHY_PORT_SELECTOR:
+		case SAS_PHY_DISABLED:
+		case SAS_PHY_RESET_PROBLEM:
+		case SAS_SATA_PORT_SELECTOR:
 			continue;
 		default:
 			res = sas_ex_discover_dev(dev, i);
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index 89c3976808466..0d69ede4b9448 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -43,7 +43,7 @@ void sas_scsi_recover_host(struct Scsi_Host *shost);
 
 int sas_show_class(enum sas_class class, char *buf);
 int sas_show_proto(enum sas_proto proto, char *buf);
-int sas_show_linkrate(enum sas_phy_linkrate linkrate, char *buf);
+int sas_show_linkrate(enum sas_linkrate linkrate, char *buf);
 int sas_show_oob_mode(enum sas_oob_mode oob_mode, char *buf);
 
 int  sas_register_phys(struct sas_ha_struct *sas_ha);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 72acdabe7f807..8d91313dd888b 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -114,7 +114,7 @@ struct ex_phy {
 	enum ex_phy_state phy_state;
 
 	enum sas_dev_type attached_dev_type;
-	enum sas_phy_linkrate linkrate;
+	enum sas_linkrate linkrate;
 
 	u8   attached_sata_host:1;
 	u8   attached_sata_dev:1;
@@ -170,9 +170,9 @@ struct sata_device {
 struct domain_device {
         enum sas_dev_type dev_type;
 
-        enum sas_phy_linkrate linkrate;
-        enum sas_phy_linkrate min_linkrate;
-        enum sas_phy_linkrate max_linkrate;
+        enum sas_linkrate linkrate;
+        enum sas_linkrate min_linkrate;
+        enum sas_linkrate max_linkrate;
 
         int  pathways;
 
@@ -220,7 +220,7 @@ struct asd_sas_port {
 	struct domain_device *port_dev;
 	spinlock_t dev_list_lock;
 	struct list_head dev_list;
-	enum   sas_phy_linkrate linkrate;
+	enum   sas_linkrate linkrate;
 
 	struct sas_phy *phy;
 	struct work_struct work;
@@ -276,7 +276,7 @@ struct asd_sas_phy {
 	enum sas_phy_type  type;
 	enum sas_phy_role  role;
 	enum sas_oob_mode  oob_mode;
-	enum sas_phy_linkrate linkrate;
+	enum sas_linkrate linkrate;
 
 	u8   *sas_addr;		  /* must be set */
 	u8   attached_sas_addr[SAS_ADDR_SIZE]; /* class:RO, driver: R/W */
@@ -368,7 +368,7 @@ void sas_hash_addr(u8 *hashed, const u8 *sas_addr);
 static inline void sas_phy_disconnected(struct asd_sas_phy *phy)
 {
 	phy->oob_mode = OOB_NOT_CONNECTED;
-	phy->linkrate = PHY_LINKRATE_NONE;
+	phy->linkrate = SAS_LINK_RATE_UNKNOWN;
 }
 
 /* ---------- Tasks ---------- */
diff --git a/include/scsi/sas.h b/include/scsi/sas.h
index 752853a113dc6..9c8a5b91ae64d 100644
--- a/include/scsi/sas.h
+++ b/include/scsi/sas.h
@@ -102,20 +102,6 @@ enum sas_dev_type {
 	SATA_PM_PORT= 8,
 };
 
-enum sas_phy_linkrate {
-	PHY_LINKRATE_NONE = 0,
-	PHY_LINKRATE_UNKNOWN = 0,
-	PHY_DISABLED,
-	PHY_RESET_PROBLEM,
-	PHY_SPINUP_HOLD,
-	PHY_PORT_SELECTOR,
-	PHY_LINKRATE_1_5 = 0x08,
-	PHY_LINKRATE_G1  = PHY_LINKRATE_1_5,
-	PHY_LINKRATE_3   = 0x09,
-	PHY_LINKRATE_G2  = PHY_LINKRATE_3,
-	PHY_LINKRATE_6   = 0x0A,
-};
-
 /* Partly from IDENTIFY address frame. */
 enum sas_proto {
 	SATA_PROTO    = 1,
diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h
index eeb2200de8555..87de518960c17 100644
--- a/include/scsi/scsi_transport_sas.h
+++ b/include/scsi/scsi_transport_sas.h
@@ -24,15 +24,23 @@ enum sas_protocol {
 };
 
 enum sas_linkrate {
-	SAS_LINK_RATE_UNKNOWN,
-	SAS_PHY_DISABLED,
-	SAS_LINK_RATE_FAILED,
-	SAS_SATA_SPINUP_HOLD,
-	SAS_SATA_PORT_SELECTOR,
-	SAS_LINK_RATE_1_5_GBPS,
-	SAS_LINK_RATE_3_0_GBPS,
-	SAS_LINK_RATE_6_0_GBPS,
-	SAS_LINK_VIRTUAL,
+	/* These Values are defined in the SAS standard */
+	SAS_LINK_RATE_UNKNOWN = 0,
+	SAS_PHY_DISABLED = 1,
+	SAS_PHY_RESET_PROBLEM = 2,
+	SAS_SATA_SPINUP_HOLD = 3,
+	SAS_SATA_PORT_SELECTOR = 4,
+	SAS_PHY_RESET_IN_PROGRESS = 5,
+	SAS_LINK_RATE_1_5_GBPS = 8,
+	SAS_LINK_RATE_G1 = SAS_LINK_RATE_1_5_GBPS,
+	SAS_LINK_RATE_3_0_GBPS = 9,
+	SAS_LINK_RATE_G2 = SAS_LINK_RATE_3_0_GBPS,
+	SAS_LINK_RATE_6_0_GBPS = 10,
+	/* These are virtual to the transport class and may never
+	 * be signalled normally since the standard defined field
+	 * is only 4 bits */
+	SAS_LINK_RATE_FAILED = 0x10,
+	SAS_PHY_VIRTUAL = 0x11,
 };
 
 struct sas_identify {
-- 
GitLab


From d24e1eeb3a16e4944288c2f3bf082e1513f4b425 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Wed, 6 Sep 2006 19:25:22 -0500
Subject: [PATCH 0171/1063] [SCSI] scsi_transport_sas: make minimum and maximum
 linkrate settable quantities

According to SPEC, the minimum_linkrate and maximum_linkrate should be
settable by the user.  This patch introduces a callback that allows the
sas class to pass these settings on to the driver.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_transport_sas.c | 73 ++++++++++++++++++++++++++++---
 include/scsi/scsi_transport_sas.h |  6 +++
 2 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index d518c1207fb43..b5b0c2cba96b4 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -77,6 +77,24 @@ get_sas_##title##_names(u32 table_key, char *buf)		\
 	return len;						\
 }
 
+#define sas_bitfield_name_set(title, table)			\
+static ssize_t							\
+set_sas_##title##_names(u32 *table_key, const char *buf)	\
+{								\
+	ssize_t len = 0;					\
+	int i;							\
+								\
+	for (i = 0; i < ARRAY_SIZE(table); i++) {		\
+		len = strlen(table[i].name);			\
+		if (strncmp(buf, table[i].name, len) == 0 &&	\
+		    (buf[len] == '\n' || buf[len] == '\0')) {	\
+			*table_key = table[i].value;		\
+			return 0;				\
+		}						\
+	}							\
+	return -EINVAL;						\
+}
+
 #define sas_bitfield_name_search(title, table)			\
 static ssize_t							\
 get_sas_##title##_names(u32 table_key, char *buf)		\
@@ -131,7 +149,7 @@ static struct {
 	{ SAS_LINK_RATE_6_0_GBPS,	"6.0 Gbit" },
 };
 sas_bitfield_name_search(linkspeed, sas_linkspeed_names)
-
+sas_bitfield_name_set(linkspeed, sas_linkspeed_names)
 
 /*
  * SAS host attributes
@@ -253,10 +271,39 @@ show_sas_phy_##field(struct class_device *cdev, char *buf)		\
 	return get_sas_linkspeed_names(phy->field, buf);		\
 }
 
+/* Fudge to tell if we're minimum or maximum */
+#define sas_phy_store_linkspeed(field)					\
+static ssize_t								\
+store_sas_phy_##field(struct class_device *cdev, const char *buf,	\
+		      size_t count)					\
+{									\
+	struct sas_phy *phy = transport_class_to_phy(cdev);		\
+	struct Scsi_Host *shost = dev_to_shost(phy->dev.parent);	\
+	struct sas_internal *i = to_sas_internal(shost->transportt);	\
+	u32 value;							\
+	struct sas_phy_linkrates rates = {0};				\
+	int error;							\
+									\
+	error = set_sas_linkspeed_names(&value, buf);			\
+	if (error)							\
+		return error;						\
+	rates.field = value;						\
+	error = i->f->set_phy_speed(phy, &rates);			\
+									\
+	return error ? error : count;					\
+}
+
+#define sas_phy_linkspeed_rw_attr(field)				\
+	sas_phy_show_linkspeed(field)					\
+	sas_phy_store_linkspeed(field)					\
+static CLASS_DEVICE_ATTR(field, S_IRUGO, show_sas_phy_##field,		\
+	store_sas_phy_##field)
+
 #define sas_phy_linkspeed_attr(field)					\
 	sas_phy_show_linkspeed(field)					\
 static CLASS_DEVICE_ATTR(field, S_IRUGO, show_sas_phy_##field, NULL)
 
+
 #define sas_phy_show_linkerror(field)					\
 static ssize_t								\
 show_sas_phy_##field(struct class_device *cdev, char *buf)		\
@@ -326,9 +373,9 @@ sas_phy_simple_attr(identify.phy_identifier, phy_identifier, "%d\n", u8);
 //sas_phy_simple_attr(port_identifier, port_identifier, "%d\n", int);
 sas_phy_linkspeed_attr(negotiated_linkrate);
 sas_phy_linkspeed_attr(minimum_linkrate_hw);
-sas_phy_linkspeed_attr(minimum_linkrate);
+sas_phy_linkspeed_rw_attr(minimum_linkrate);
 sas_phy_linkspeed_attr(maximum_linkrate_hw);
-sas_phy_linkspeed_attr(maximum_linkrate);
+sas_phy_linkspeed_rw_attr(maximum_linkrate);
 sas_phy_linkerror_attr(invalid_dword_count);
 sas_phy_linkerror_attr(running_disparity_error_count);
 sas_phy_linkerror_attr(loss_of_dword_sync_count);
@@ -1310,13 +1357,23 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel,
  * Setup / Teardown code
  */
 
-#define SETUP_TEMPLATE(attrb, field, perm, test)				\
+#define SETUP_TEMPLATE(attrb, field, perm, test)			\
 	i->private_##attrb[count] = class_device_attr_##field;		\
 	i->private_##attrb[count].attr.mode = perm;			\
 	i->attrb[count] = &i->private_##attrb[count];			\
 	if (test)							\
 		count++
 
+#define SETUP_TEMPLATE_RW(attrb, field, perm, test, ro_test, ro_perm)	\
+	i->private_##attrb[count] = class_device_attr_##field;		\
+	i->private_##attrb[count].attr.mode = perm;			\
+	if (ro_test) {							\
+		i->private_##attrb[count].attr.mode = ro_perm;		\
+		i->private_##attrb[count].store = NULL;			\
+	}								\
+	i->attrb[count] = &i->private_##attrb[count];			\
+	if (test)							\
+		count++
 
 #define SETUP_RPORT_ATTRIBUTE(field) 					\
 	SETUP_TEMPLATE(rphy_attrs, field, S_IRUGO, 1)
@@ -1327,6 +1384,10 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel,
 #define SETUP_PHY_ATTRIBUTE(field)					\
 	SETUP_TEMPLATE(phy_attrs, field, S_IRUGO, 1)
 
+#define SETUP_PHY_ATTRIBUTE_RW(field)					\
+	SETUP_TEMPLATE_RW(phy_attrs, field, S_IRUGO | S_IWUSR, 1,	\
+			!i->f->set_phy_speed, S_IRUGO)
+
 #define SETUP_PORT_ATTRIBUTE(field)					\
 	SETUP_TEMPLATE(port_attrs, field, S_IRUGO, 1)
 
@@ -1407,9 +1468,9 @@ sas_attach_transport(struct sas_function_template *ft)
 	//SETUP_PHY_ATTRIBUTE(port_identifier);
 	SETUP_PHY_ATTRIBUTE(negotiated_linkrate);
 	SETUP_PHY_ATTRIBUTE(minimum_linkrate_hw);
-	SETUP_PHY_ATTRIBUTE(minimum_linkrate);
+	SETUP_PHY_ATTRIBUTE_RW(minimum_linkrate);
 	SETUP_PHY_ATTRIBUTE(maximum_linkrate_hw);
-	SETUP_PHY_ATTRIBUTE(maximum_linkrate);
+	SETUP_PHY_ATTRIBUTE_RW(maximum_linkrate);
 
 	SETUP_PHY_ATTRIBUTE(invalid_dword_count);
 	SETUP_PHY_ATTRIBUTE(running_disparity_error_count);
diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h
index 87de518960c17..53024377f3b84 100644
--- a/include/scsi/scsi_transport_sas.h
+++ b/include/scsi/scsi_transport_sas.h
@@ -150,12 +150,18 @@ struct sas_port {
 #define transport_class_to_sas_port(cdev) \
 	dev_to_sas_port((cdev)->dev)
 
+struct sas_phy_linkrates {
+	enum sas_linkrate maximum_linkrate;
+	enum sas_linkrate minimum_linkrate;
+};
+
 /* The functions by which the transport class and the driver communicate */
 struct sas_function_template {
 	int (*get_linkerrors)(struct sas_phy *);
 	int (*get_enclosure_identifier)(struct sas_rphy *, u64 *);
 	int (*get_bay_identifier)(struct sas_rphy *);
 	int (*phy_reset)(struct sas_phy *, int);
+	int (*set_phy_speed)(struct sas_phy *, struct sas_phy_linkrates *);
 };
 
 
-- 
GitLab


From a01e70e570a72b8a8c9a58062e4f5bdcd3986222 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Wed, 6 Sep 2006 19:28:07 -0500
Subject: [PATCH 0172/1063] [SCSI] aci94xx: implement link rate setting

This patch implements the ability to set the minimum and maximum
linkrates for both libsas (for expanders) and aic94xx (for the host
phys).  It also tidies up the setting of the hardware min and max to
make sure they're updated when the expander emits a change broadcast.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aic94xx/aic94xx.h     |  2 +-
 drivers/scsi/aic94xx/aic94xx_scb.c | 30 ++++++++++++++++++--
 drivers/scsi/libsas/sas_expander.c | 20 +++++++++-----
 drivers/scsi/libsas/sas_init.c     | 44 ++++++++++++++++++++++++++++--
 drivers/scsi/libsas/sas_internal.h |  2 +-
 drivers/scsi/libsas/sas_phy.c      | 15 +++++-----
 include/scsi/libsas.h              |  2 +-
 include/scsi/sas.h                 |  1 +
 8 files changed, 95 insertions(+), 21 deletions(-)

diff --git a/drivers/scsi/aic94xx/aic94xx.h b/drivers/scsi/aic94xx/aic94xx.h
index cb7caf1c9ce15..1bd5b4ecf3d59 100644
--- a/drivers/scsi/aic94xx/aic94xx.h
+++ b/drivers/scsi/aic94xx/aic94xx.h
@@ -109,6 +109,6 @@ int  asd_clear_nexus_port(struct asd_sas_port *port);
 int  asd_clear_nexus_ha(struct sas_ha_struct *sas_ha);
 
 /* ---------- Phy Management ---------- */
-int  asd_control_phy(struct asd_sas_phy *phy, enum phy_func func);
+int  asd_control_phy(struct asd_sas_phy *phy, enum phy_func func, void *arg);
 
 #endif
diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c
index ef8ca08b545fd..7ee49b51b7241 100644
--- a/drivers/scsi/aic94xx/aic94xx_scb.c
+++ b/drivers/scsi/aic94xx/aic94xx_scb.c
@@ -52,6 +52,8 @@
 
 static inline void get_lrate_mode(struct asd_phy *phy, u8 oob_mode)
 {
+	struct sas_phy *sas_phy = phy->sas_phy.phy;
+
 	switch (oob_mode & 7) {
 	case PHY_SPEED_60:
 		/* FIXME: sas transport class doesn't have this */
@@ -67,6 +69,12 @@ static inline void get_lrate_mode(struct asd_phy *phy, u8 oob_mode)
 		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS;
 		break;
 	}
+	sas_phy->negotiated_linkrate = phy->sas_phy.linkrate;
+	sas_phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
+	sas_phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
+	sas_phy->maximum_linkrate = phy->phy_desc->max_sas_lrate;
+	sas_phy->minimum_linkrate = phy->phy_desc->min_sas_lrate;
+
 	if (oob_mode & SAS_MODE)
 		phy->sas_phy.oob_mode = SAS_OOB_MODE;
 	else if (oob_mode & SATA_MODE)
@@ -710,14 +718,32 @@ static const int phy_func_table[] = {
 	[PHY_FUNC_RELEASE_SPINUP_HOLD] = RELEASE_SPINUP_HOLD,
 };
 
-int asd_control_phy(struct asd_sas_phy *phy, enum phy_func func)
+int asd_control_phy(struct asd_sas_phy *phy, enum phy_func func, void *arg)
 {
 	struct asd_ha_struct *asd_ha = phy->ha->lldd_ha;
+	struct asd_phy_desc *pd = asd_ha->phys[phy->id].phy_desc;
 	struct asd_ascb *ascb;
+	struct sas_phy_linkrates *rates;
 	int res = 1;
 
-	if (func == PHY_FUNC_CLEAR_ERROR_LOG)
+	switch (func) {
+	case PHY_FUNC_CLEAR_ERROR_LOG:
 		return -ENOSYS;
+	case PHY_FUNC_SET_LINK_RATE:
+		rates = arg;
+		if (rates->minimum_linkrate) {
+			pd->min_sas_lrate = rates->minimum_linkrate;
+			pd->min_sata_lrate = rates->minimum_linkrate;
+		}
+		if (rates->maximum_linkrate) {
+			pd->max_sas_lrate = rates->maximum_linkrate;
+			pd->max_sata_lrate = rates->maximum_linkrate;
+		}
+		func = PHY_FUNC_LINK_RESET;
+		break;
+	default:
+		break;
+	}
 
 	ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL);
 	if (!ascb)
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 02e796ee027ee..30b8014bcc7a5 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -187,10 +187,10 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id,
 	phy->phy->identify.initiator_port_protocols = phy->attached_iproto;
 	phy->phy->identify.target_port_protocols = phy->attached_tproto;
 	phy->phy->identify.phy_identifier = phy_id;
-	phy->phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
-	phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
-	phy->phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
-	phy->phy->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS;
+	phy->phy->minimum_linkrate_hw = dr->hmin_linkrate;
+	phy->phy->maximum_linkrate_hw = dr->hmax_linkrate;
+	phy->phy->minimum_linkrate = dr->pmin_linkrate;
+	phy->phy->maximum_linkrate = dr->pmax_linkrate;
 	phy->phy->negotiated_linkrate = phy->linkrate;
 
 	if (!rediscover)
@@ -404,7 +404,8 @@ static int sas_ex_manuf_info(struct domain_device *dev)
 #define PC_RESP_SIZE 8
 
 int sas_smp_phy_control(struct domain_device *dev, int phy_id,
-			enum phy_func phy_func)
+			enum phy_func phy_func,
+			struct sas_phy_linkrates *rates)
 {
 	u8 *pc_req;
 	u8 *pc_resp;
@@ -423,6 +424,10 @@ int sas_smp_phy_control(struct domain_device *dev, int phy_id,
 	pc_req[1] = SMP_PHY_CONTROL;
 	pc_req[9] = phy_id;
 	pc_req[10]= phy_func;
+	if (rates) {
+		pc_req[32] = rates->minimum_linkrate << 4;
+		pc_req[33] = rates->maximum_linkrate << 4;
+	}
 
 	res = smp_execute_task(dev, pc_req, PC_REQ_SIZE, pc_resp,PC_RESP_SIZE);
 
@@ -436,7 +441,7 @@ static void sas_ex_disable_phy(struct domain_device *dev, int phy_id)
 	struct expander_device *ex = &dev->ex_dev;
 	struct ex_phy *phy = &ex->ex_phy[phy_id];
 
-	sas_smp_phy_control(dev, phy_id, PHY_FUNC_DISABLE);
+	sas_smp_phy_control(dev, phy_id, PHY_FUNC_DISABLE, NULL);
 	phy->linkrate = SAS_PHY_DISABLED;
 }
 
@@ -731,7 +736,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
 
 	/* Phy state */
 	if (ex_phy->linkrate == SAS_SATA_SPINUP_HOLD) {
-		if (!sas_smp_phy_control(dev, phy_id, PHY_FUNC_LINK_RESET))
+		if (!sas_smp_phy_control(dev, phy_id, PHY_FUNC_LINK_RESET, NULL))
 			res = sas_ex_phy_discover(dev, phy_id);
 		if (res)
 			return res;
@@ -1706,6 +1711,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id)
 		   SAS_ADDR(phy->attached_sas_addr)) {
 		SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter\n",
 			    SAS_ADDR(dev->sas_addr), phy_id);
+		sas_ex_phy_discover(dev, phy_id);
 	} else
 		res = sas_discover_new(dev, phy_id);
 out:
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index b961664b81060..c836a237fb795 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -159,17 +159,57 @@ static int sas_phy_reset(struct sas_phy *phy, int hard_reset)
 		struct sas_internal *i =
 			to_sas_internal(sas_ha->core.shost->transportt);
 
-		ret = i->dft->lldd_control_phy(asd_phy, reset_type);
+		ret = i->dft->lldd_control_phy(asd_phy, reset_type, NULL);
 	} else {
 		struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent);
 		struct domain_device *ddev = sas_find_dev_by_rphy(rphy);
-		ret = sas_smp_phy_control(ddev, phy->number, reset_type);
+		ret = sas_smp_phy_control(ddev, phy->number, reset_type, NULL);
 	}
 	return ret;
 }
 
+static int sas_set_phy_speed(struct sas_phy *phy,
+			     struct sas_phy_linkrates *rates)
+{
+	int ret;
+
+	if ((rates->minimum_linkrate &&
+	     rates->minimum_linkrate > phy->maximum_linkrate) ||
+	    (rates->maximum_linkrate &&
+	     rates->maximum_linkrate < phy->minimum_linkrate))
+		return -EINVAL;
+
+	if (rates->minimum_linkrate &&
+	    rates->minimum_linkrate < phy->minimum_linkrate_hw)
+		rates->minimum_linkrate = phy->minimum_linkrate_hw;
+
+	if (rates->maximum_linkrate &&
+	    rates->maximum_linkrate > phy->maximum_linkrate_hw)
+		rates->maximum_linkrate = phy->maximum_linkrate_hw;
+
+	if (scsi_is_sas_phy_local(phy)) {
+		struct Scsi_Host *shost = dev_to_shost(phy->dev.parent);
+		struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
+		struct asd_sas_phy *asd_phy = sas_ha->sas_phy[phy->number];
+		struct sas_internal *i =
+			to_sas_internal(sas_ha->core.shost->transportt);
+
+		ret = i->dft->lldd_control_phy(asd_phy, PHY_FUNC_SET_LINK_RATE,
+					       rates);
+	} else {
+		struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent);
+		struct domain_device *ddev = sas_find_dev_by_rphy(rphy);
+		ret = sas_smp_phy_control(ddev, phy->number,
+					  PHY_FUNC_LINK_RESET, rates);
+
+	}
+
+	return ret;
+}
+
 static struct sas_function_template sft = {
 	.phy_reset = sas_phy_reset,
+	.set_phy_speed = sas_set_phy_speed,
 	.get_linkerrors = sas_get_linkerrors,
 };
 
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index 0d69ede4b9448..bffcee4749215 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -70,7 +70,7 @@ int sas_notify_lldd_dev_found(struct domain_device *);
 void sas_notify_lldd_dev_gone(struct domain_device *);
 
 int sas_smp_phy_control(struct domain_device *dev, int phy_id,
-			enum phy_func phy_func);
+			enum phy_func phy_func, struct sas_phy_linkrates *);
 int sas_smp_get_phy_events(struct sas_phy *phy);
 
 struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy);
diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
index 024ab00e70d2e..9340cdbae4a3a 100644
--- a/drivers/scsi/libsas/sas_phy.c
+++ b/drivers/scsi/libsas/sas_phy.c
@@ -67,13 +67,14 @@ static void sas_phye_oob_error(void *data)
 		switch (phy->error) {
 		case 1:
 		case 2:
-			i->dft->lldd_control_phy(phy, PHY_FUNC_HARD_RESET);
+			i->dft->lldd_control_phy(phy, PHY_FUNC_HARD_RESET,
+						 NULL);
 			break;
 		case 3:
 		default:
 			phy->error = 0;
 			phy->enabled = 0;
-			i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE);
+			i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE, NULL);
 			break;
 		}
 	}
@@ -90,7 +91,7 @@ static void sas_phye_spinup_hold(void *data)
 			&phy->phy_events_pending);
 
 	phy->error = 0;
-	i->dft->lldd_control_phy(phy, PHY_FUNC_RELEASE_SPINUP_HOLD);
+	i->dft->lldd_control_phy(phy, PHY_FUNC_RELEASE_SPINUP_HOLD, NULL);
 }
 
 /* ---------- Phy class registration ---------- */
@@ -144,10 +145,10 @@ int sas_register_phys(struct sas_ha_struct *sas_ha)
 		phy->phy->identify.target_port_protocols = phy->tproto;
 		phy->phy->identify.sas_address = SAS_ADDR(sas_ha->sas_addr);
 		phy->phy->identify.phy_identifier = i;
-		phy->phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
-		phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
-		phy->phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
-		phy->phy->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS;
+		phy->phy->minimum_linkrate_hw = SAS_LINK_RATE_UNKNOWN;
+		phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_UNKNOWN;
+		phy->phy->minimum_linkrate = SAS_LINK_RATE_UNKNOWN;
+		phy->phy->maximum_linkrate = SAS_LINK_RATE_UNKNOWN;
 		phy->phy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN;
 
 		sas_phy_add(phy->phy);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 8d91313dd888b..8e39982fc3db2 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -586,7 +586,7 @@ struct sas_domain_function_template {
 	int (*lldd_clear_nexus_ha)(struct sas_ha_struct *);
 
 	/* Phy management */
-	int (*lldd_control_phy)(struct asd_sas_phy *, enum phy_func);
+	int (*lldd_control_phy)(struct asd_sas_phy *, enum phy_func, void *);
 };
 
 extern int sas_register_ha(struct sas_ha_struct *);
diff --git a/include/scsi/sas.h b/include/scsi/sas.h
index 9c8a5b91ae64d..2f4b6afa34fc3 100644
--- a/include/scsi/sas.h
+++ b/include/scsi/sas.h
@@ -121,6 +121,7 @@ enum phy_func {
 	PHY_FUNC_CLEAR_AFFIL,
 	PHY_FUNC_TX_SATA_PS_SIGNAL,
 	PHY_FUNC_RELEASE_SPINUP_HOLD = 0x10, /* LOCAL PORT ONLY! */
+	PHY_FUNC_SET_LINK_RATE,
 };
 
 /* SAS LLDD would need to report only _very_few_ of those, like BROADCAST.
-- 
GitLab


From 2b7cbe20174695bca1afe2a8f755e1eb299f4768 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Thu, 7 Sep 2006 15:14:46 -0500
Subject: [PATCH 0173/1063] [SCSI] fix up SCSI netlink build

CONFIG_SCSI_NETLINK can become a bool since the item its
selecting (CONFIG_NET) cannot be a module.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 4d1998d23f0f7..a6f920d218a02 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -28,7 +28,7 @@ config SCSI
 	  (the one containing the directory /) is located on a SCSI device.
 
 config SCSI_NETLINK
-	tristate
+	bool
 	default	n
 	select NET
 
-- 
GitLab


From 08da3f413f6aa3eb48cfc5331c68e57393167fe5 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Sun, 10 Sep 2006 21:09:26 -0400
Subject: [PATCH 0174/1063] [AGPGART] Add suspend callback for i965

Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/intel-agp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 42c7d8dec6350..d1ede7db5a12f 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -1924,6 +1924,8 @@ static int agp_intel_resume(struct pci_dev *pdev)
 		intel_i830_configure();
 	else if (bridge->driver == &intel_810_driver)
 		intel_i810_configure();
+	else if (bridge->driver == &intel_i965_driver)
+		intel_i915_configure();
 
 	return 0;
 }
-- 
GitLab


From edf03fb0575cbee2595a63374b17dc0921f2094a Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Sun, 10 Sep 2006 21:12:20 -0400
Subject: [PATCH 0175/1063] [AGPGART] Rework AGPv3 modesetting fallback.

Sometimes the logic to handle AGPx8->AGPx4 fallback failed, as can
be seen in https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=197346

The failures occured if the bridge was in AGPx8 mode, but the
user hadn't specified a mode in their X config.  We weren't
setting the mode to the highest mode capable by the video card+bridge
(as we do in the AGPv2 case), which was leading to all kinds of
mayhem including us believing that after falling back from AGPx8, that
we couldn't do x4 mode (which is disastrous in AGPv3, as those are
the only two modes possible).

Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/generic.c | 39 +++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index cc5ea347a8a78..0dcdb363923fe 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -568,25 +568,34 @@ static void agp_v3_parse_one(u32 *requested_mode, u32 *bridge_agpstat, u32 *vga_
 		*bridge_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
 		goto done;
 
+	} else if (*requested_mode & AGPSTAT3_4X) {
+		*bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+		*bridge_agpstat |= AGPSTAT3_4X;
+		goto done;
+
 	} else {
 
 		/*
-		 * If we didn't specify AGPx8, we can only do x4.
-		 * If the hardware can't do x4, we're up shit creek, and never
-		 *  should have got this far.
+		 * If we didn't specify an AGP mode, we see if both
+		 * the graphics card, and the bridge can do x8, and use if so.
+		 * If not, we fall back to x4 mode.
 		 */
-		*bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
-		if ((*bridge_agpstat & AGPSTAT3_4X) && (*vga_agpstat & AGPSTAT3_4X))
-			*bridge_agpstat |= AGPSTAT3_4X;
-		else {
-			printk(KERN_INFO PFX "Badness. Don't know which AGP mode to set. "
-							"[bridge_agpstat:%x vga_agpstat:%x fell back to:- bridge_agpstat:%x vga_agpstat:%x]\n",
-							origbridge, origvga, *bridge_agpstat, *vga_agpstat);
-			if (!(*bridge_agpstat & AGPSTAT3_4X))
-				printk(KERN_INFO PFX "Bridge couldn't do AGP x4.\n");
-			if (!(*vga_agpstat & AGPSTAT3_4X))
-				printk(KERN_INFO PFX "Graphic card couldn't do AGP x4.\n");
-			return;
+		if ((*bridge_agpstat & AGPSTAT3_8X) && (*vga_agpstat & AGPSTAT3_8X)) {
+			printk(KERN_INFO PFX "No AGP mode specified. Setting to highest mode supported by bridge & card (x8).\n");
+			*bridge_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
+			*vga_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
+		} else {
+			printk(KERN_INFO PFX "Fell back to AGPx4 mode because");
+			if (!(*bridge_agpstat & AGPSTAT3_8X)) {
+				printk("bridge couldn't do x8. bridge_agpstat:%x (orig=%x)\n", *bridge_agpstat, origbridge);
+				*bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+				*bridge_agpstat |= AGPSTAT3_4X;
+			}
+			if (!(*vga_agpstat & AGPSTAT3_8X)) {
+				printk("graphics card couldn't do x8. vga_agpstat:%x (orig=%x)\n", *vga_agpstat, origvga);
+				*vga_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+				*vga_agpstat |= AGPSTAT3_4X;
+			}
 		}
 	}
 
-- 
GitLab


From a506b44bb5000b2652490a906c3e58beb2a8f6bb Mon Sep 17 00:00:00 2001
From: Daniel Walker <dwalker@mvista.com>
Date: Sat, 9 Sep 2006 09:31:03 -0700
Subject: [PATCH 0176/1063] [SCSI] fix compile error on module_refcount

  LD      .tmp_vmlinux1
drivers/built-in.o(.text+0x8e1f9): In function `scsi_device_put':
drivers/scsi/scsi.c:887: undefined reference to `module_refcount'
make: *** [.tmp_vmlinux1] Error 1

There are only two users of module_refcount() outside of kernel/module.c
and the other one uses ifdef's similar to this.

Signed-Off-By: Daniel Walker <dwalker@mvista.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index c35f5fc0d6680..c51b5769eac82 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -875,10 +875,12 @@ void scsi_device_put(struct scsi_device *sdev)
 {
 	struct module *module = sdev->host->hostt->module;
 
+#ifdef CONFIG_MODULE_UNLOAD
 	/* The module refcount will be zero if scsi_device_get()
 	 * was called from a module removal routine */
 	if (module && module_refcount(module) != 0)
 		module_put(module);
+#endif
 	put_device(&sdev->sdev_gendev);
 }
 EXPORT_SYMBOL(scsi_device_put);
-- 
GitLab


From 65396410af63db90d6428c678ff84aa652c3c1ec Mon Sep 17 00:00:00 2001
From: Henrik Kretzschmar <henne@nachtwindheim.de>
Date: Tue, 12 Sep 2006 23:49:33 +0200
Subject: [PATCH 0177/1063] [SCSI] wd33c93: Scsi_Cmnd convertion

Changes obsolete typedef'd Scsi_Cmnd to struct scsi_cmnd.

Signed-off-by: Henrik Kretzschmar <henne@nachtwindheim.de>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/a2091.c   | 6 +++---
 drivers/scsi/a2091.h   | 4 ----
 drivers/scsi/a3000.c   | 8 ++++----
 drivers/scsi/a3000.h   | 4 ----
 drivers/scsi/gvp11.c   | 8 ++++----
 drivers/scsi/gvp11.h   | 4 ----
 drivers/scsi/mvme147.c | 6 +++---
 drivers/scsi/mvme147.h | 4 ----
 drivers/scsi/sgiwd93.c | 8 ++++----
 9 files changed, 18 insertions(+), 34 deletions(-)

diff --git a/drivers/scsi/a2091.c b/drivers/scsi/a2091.c
index fddfa2ebcd707..085406928605f 100644
--- a/drivers/scsi/a2091.c
+++ b/drivers/scsi/a2091.c
@@ -40,7 +40,7 @@ static irqreturn_t a2091_intr (int irq, void *_instance, struct pt_regs *fp)
     return IRQ_HANDLED;
 }
 
-static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
+static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
     unsigned short cntr = CNTR_PDMD | CNTR_INTEN;
     unsigned long addr = virt_to_bus(cmd->SCp.ptr);
@@ -115,7 +115,7 @@ static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
     return 0;
 }
 
-static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt, 
+static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
 		      int status)
 {
     /* disable SCSI interrupts */
@@ -217,7 +217,7 @@ int __init a2091_detect(struct scsi_host_template *tpnt)
     return num_a2091;
 }
 
-static int a2091_bus_reset(Scsi_Cmnd *cmd)
+static int a2091_bus_reset(struct scsi_cmnd *cmd)
 {
 	/* FIXME perform bus-specific reset */
 
diff --git a/drivers/scsi/a2091.h b/drivers/scsi/a2091.h
index 22d6a13dd8be5..fe809bc88d73b 100644
--- a/drivers/scsi/a2091.h
+++ b/drivers/scsi/a2091.h
@@ -13,10 +13,6 @@
 
 int a2091_detect(struct scsi_host_template *);
 int a2091_release(struct Scsi_Host *);
-const char *wd33c93_info(void);
-int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
-int wd33c93_abort(Scsi_Cmnd *);
-int wd33c93_reset(Scsi_Cmnd *, unsigned int);
 
 #ifndef CMD_PER_LUN
 #define CMD_PER_LUN 2
diff --git a/drivers/scsi/a3000.c b/drivers/scsi/a3000.c
index ae9ab4b136ac3..7bf46d40b5610 100644
--- a/drivers/scsi/a3000.c
+++ b/drivers/scsi/a3000.c
@@ -44,7 +44,7 @@ static irqreturn_t a3000_intr (int irq, void *dummy, struct pt_regs *fp)
 	return IRQ_NONE;
 }
 
-static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
+static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
     unsigned short cntr = CNTR_PDMD | CNTR_INTEN;
     unsigned long addr = virt_to_bus(cmd->SCp.ptr);
@@ -110,8 +110,8 @@ static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
     return 0;
 }
 
-static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt,
-		      int status)
+static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
+		     int status)
 {
     /* disable SCSI interrupts */
     unsigned short cntr = CNTR_PDMD;
@@ -205,7 +205,7 @@ int __init a3000_detect(struct scsi_host_template *tpnt)
     return 0;
 }
 
-static int a3000_bus_reset(Scsi_Cmnd *cmd)
+static int a3000_bus_reset(struct scsi_cmnd *cmd)
 {
 	/* FIXME perform bus-specific reset */
 	
diff --git a/drivers/scsi/a3000.h b/drivers/scsi/a3000.h
index 5535a65150a45..44a4ec7b46503 100644
--- a/drivers/scsi/a3000.h
+++ b/drivers/scsi/a3000.h
@@ -13,10 +13,6 @@
 
 int a3000_detect(struct scsi_host_template *);
 int a3000_release(struct Scsi_Host *);
-const char *wd33c93_info(void);
-int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
-int wd33c93_abort(Scsi_Cmnd *);
-int wd33c93_reset(Scsi_Cmnd *, unsigned int);
 
 #ifndef CMD_PER_LUN
 #define CMD_PER_LUN 2
diff --git a/drivers/scsi/gvp11.c b/drivers/scsi/gvp11.c
index a0d831b1bada7..18dbe5c27dac3 100644
--- a/drivers/scsi/gvp11.c
+++ b/drivers/scsi/gvp11.c
@@ -47,7 +47,7 @@ void gvp11_setup (char *str, int *ints)
     gvp11_xfer_mask = ints[1];
 }
 
-static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
+static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
     unsigned short cntr = GVP11_DMAC_INT_ENABLE;
     unsigned long addr = virt_to_bus(cmd->SCp.ptr);
@@ -142,8 +142,8 @@ static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
     return 0;
 }
 
-static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt,
-		      int status)
+static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
+		     int status)
 {
     /* stop DMA */
     DMA(instance)->SP_DMA = 1;
@@ -341,7 +341,7 @@ int __init gvp11_detect(struct scsi_host_template *tpnt)
     return num_gvp11;
 }
 
-static int gvp11_bus_reset(Scsi_Cmnd *cmd)
+static int gvp11_bus_reset(struct scsi_cmnd *cmd)
 {
 	/* FIXME perform bus-specific reset */
 
diff --git a/drivers/scsi/gvp11.h b/drivers/scsi/gvp11.h
index 575d219d14ba3..bf22859a50355 100644
--- a/drivers/scsi/gvp11.h
+++ b/drivers/scsi/gvp11.h
@@ -13,10 +13,6 @@
 
 int gvp11_detect(struct scsi_host_template *);
 int gvp11_release(struct Scsi_Host *);
-const char *wd33c93_info(void);
-int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
-int wd33c93_abort(Scsi_Cmnd *);
-int wd33c93_reset(Scsi_Cmnd *, unsigned int);
 
 #ifndef CMD_PER_LUN
 #define CMD_PER_LUN 2
diff --git a/drivers/scsi/mvme147.c b/drivers/scsi/mvme147.c
index cb367c2c5c78a..9b991b746d1ef 100644
--- a/drivers/scsi/mvme147.c
+++ b/drivers/scsi/mvme147.c
@@ -29,7 +29,7 @@ static irqreturn_t mvme147_intr (int irq, void *dummy, struct pt_regs *fp)
     return IRQ_HANDLED;
 }
 
-static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
+static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
     unsigned char flags = 0x01;
     unsigned long addr = virt_to_bus(cmd->SCp.ptr);
@@ -57,7 +57,7 @@ static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
     return 0;
 }
 
-static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt,
+static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
 		      int status)
 {
     m147_pcc->dma_cntrl = 0;
@@ -112,7 +112,7 @@ int mvme147_detect(struct scsi_host_template *tpnt)
     return 0;
 }
 
-static int mvme147_bus_reset(Scsi_Cmnd *cmd)
+static int mvme147_bus_reset(struct scsi_cmnd *cmd)
 {
 	/* FIXME perform bus-specific reset */
 
diff --git a/drivers/scsi/mvme147.h b/drivers/scsi/mvme147.h
index 2f56d69bd1802..32aee85434d82 100644
--- a/drivers/scsi/mvme147.h
+++ b/drivers/scsi/mvme147.h
@@ -12,10 +12,6 @@
 
 int mvme147_detect(struct scsi_host_template *);
 int mvme147_release(struct Scsi_Host *);
-const char *wd33c93_info(void);
-int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
-int wd33c93_abort(Scsi_Cmnd *);
-int wd33c93_reset(Scsi_Cmnd *, unsigned int);
 
 #ifndef CMD_PER_LUN
 #define CMD_PER_LUN 2
diff --git a/drivers/scsi/sgiwd93.c b/drivers/scsi/sgiwd93.c
index 7cd366fcc5711..4f1db6f2aae88 100644
--- a/drivers/scsi/sgiwd93.c
+++ b/drivers/scsi/sgiwd93.c
@@ -97,7 +97,7 @@ static irqreturn_t sgiwd93_intr(int irq, void *dev_id, struct pt_regs *regs)
 }
 
 static inline
-void fill_hpc_entries(struct hpc_chunk *hcp, Scsi_Cmnd *cmd, int datainp)
+void fill_hpc_entries(struct hpc_chunk *hcp, struct scsi_cmnd *cmd, int datainp)
 {
 	unsigned long len = cmd->SCp.this_residual;
 	void *addr = cmd->SCp.ptr;
@@ -129,7 +129,7 @@ void fill_hpc_entries(struct hpc_chunk *hcp, Scsi_Cmnd *cmd, int datainp)
 	hcp->desc.cntinfo = HPCDMA_EOX;
 }
 
-static int dma_setup(Scsi_Cmnd *cmd, int datainp)
+static int dma_setup(struct scsi_cmnd *cmd, int datainp)
 {
 	struct ip22_hostdata *hdata = HDATA(cmd->device->host);
 	struct hpc3_scsiregs *hregs =
@@ -163,7 +163,7 @@ static int dma_setup(Scsi_Cmnd *cmd, int datainp)
 	return 0;
 }
 
-static void dma_stop(struct Scsi_Host *instance, Scsi_Cmnd *SCpnt,
+static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
 		     int status)
 {
 	struct ip22_hostdata *hdata = HDATA(instance);
@@ -305,7 +305,7 @@ static int sgiwd93_release(struct Scsi_Host *instance)
 	return 1;
 }
 
-static int sgiwd93_bus_reset(Scsi_Cmnd *cmd)
+static int sgiwd93_bus_reset(struct scsi_cmnd *cmd)
 {
 	/* FIXME perform bus-specific reset */
 
-- 
GitLab


From f50d4cfc98d70f919afb2924b1b53c36b2f4e62f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 24 Aug 2006 16:54:08 +1000
Subject: [PATCH 0178/1063] [POWERPC] Split out vpa unregister logic from
 pseries_kexec_cpu_down_xics()

As part of the new irq code pseries_kexec_cpu_down() was split into a
xics and mpic version. The vpa unregister logic is now only done in the
xics routine, and although that's ok in practice (we don't have SPLPAR
machines with mpic), I'd rather have the two concepts stay separate.

So move the vpa unregister into pseries_kexec_cpu_down(), which gets called
by both the xics and mpic routines. This also gives us an obvious place to
put any new kexec-down logic needed in future.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/pseries/setup.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 1587efc510577..a6398fbe530dc 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -223,12 +223,7 @@ static void pseries_lpar_enable_pmcs(void)
 }
 
 #ifdef CONFIG_KEXEC
-static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary)
-{
-	mpic_teardown_this_cpu(secondary);
-}
-
-static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary)
+static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
 {
 	/* Don't risk a hypervisor call if we're crashing */
 	if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
@@ -248,6 +243,17 @@ static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary)
 					hard_smp_processor_id());
 		}
 	}
+}
+
+static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary)
+{
+	pseries_kexec_cpu_down(crash_shutdown, secondary);
+	mpic_teardown_this_cpu(secondary);
+}
+
+static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary)
+{
+	pseries_kexec_cpu_down(crash_shutdown, secondary);
 	xics_teardown_cpu(secondary);
 }
 #endif /* CONFIG_KEXEC */
-- 
GitLab


From c3412dcb75ff4d64b44bedc72761d5707d19edf7 Mon Sep 17 00:00:00 2001
From: Will Schmidt <will_schmidt@vnet.ibm.com>
Date: Wed, 30 Aug 2006 13:11:38 -0500
Subject: [PATCH 0179/1063] [POWERPC] Emulate power5 popcntb instruction

In an attempt to make it easier for a power5 optimized app to run on a
power4 or a 970 or random earlier machine, this provides emulation of
the popcntb instruction.

Signed-off-by: Will Schmidt <will_schmidt@vnet.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/traps.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 9b352bd0a4601..d9f10f2fc372b 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -598,6 +598,9 @@ static void parse_fpe(struct pt_regs *regs)
 #define INST_STSWI		0x7c0005aa
 #define INST_STSWX		0x7c00052a
 
+#define INST_POPCNTB		0x7c0000f4
+#define INST_POPCNTB_MASK	0xfc0007fe
+
 static int emulate_string_inst(struct pt_regs *regs, u32 instword)
 {
 	u8 rT = (instword >> 21) & 0x1f;
@@ -666,6 +669,23 @@ static int emulate_string_inst(struct pt_regs *regs, u32 instword)
 	return 0;
 }
 
+static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword)
+{
+	u32 ra,rs;
+	unsigned long tmp;
+
+	ra = (instword >> 16) & 0x1f;
+	rs = (instword >> 21) & 0x1f;
+
+	tmp = regs->gpr[rs];
+	tmp = tmp - ((tmp >> 1) & 0x5555555555555555ULL);
+	tmp = (tmp & 0x3333333333333333ULL) + ((tmp >> 2) & 0x3333333333333333ULL);
+	tmp = (tmp + (tmp >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
+	regs->gpr[ra] = tmp;
+
+	return 0;
+}
+
 static int emulate_instruction(struct pt_regs *regs)
 {
 	u32 instword;
@@ -703,6 +723,11 @@ static int emulate_instruction(struct pt_regs *regs)
 	if ((instword & INST_STRING_GEN_MASK) == INST_STRING)
 		return emulate_string_inst(regs, instword);
 
+	/* Emulate the popcntb (Population Count Bytes) instruction. */
+	if ((instword & INST_POPCNTB_MASK) == INST_POPCNTB) {
+		return emulate_popcntb_inst(regs, instword);
+	}
+
 	return -EINVAL;
 }
 
-- 
GitLab


From 477bcae4c289a60f2303fbd4a3a875dcca647cf8 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Wed, 6 Sep 2006 09:02:53 -0500
Subject: [PATCH 0180/1063] [POWERPC] Make function of pm_power_off consistent
 with x86

Allow the pm_power_off function variable in PPC to work as an override.
This makes the function consistent with the other architectures and it
allows generic poweroff operations (like those provided in IPMI
systems) to work properly on PPC.

Signed-off-by: Corey Minyard <minyard@acm.org>
Cc: Joseph Barnett <jbarnett@motorola.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/ppc/kernel/setup.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c
index a74f46d9826fc..5458ac5da7c3f 100644
--- a/arch/ppc/kernel/setup.c
+++ b/arch/ppc/kernel/setup.c
@@ -127,11 +127,8 @@ void machine_restart(char *cmd)
 	ppc_md.restart(cmd);
 }
 
-void machine_power_off(void)
+static void ppc_generic_power_off(void)
 {
-#ifdef CONFIG_NVRAM
-	nvram_sync();
-#endif
 	ppc_md.power_off();
 }
 
@@ -143,7 +140,17 @@ void machine_halt(void)
 	ppc_md.halt();
 }
 
-void (*pm_power_off)(void) = machine_power_off;
+void (*pm_power_off)(void) = ppc_generic_power_off;
+
+void machine_power_off(void)
+{
+#ifdef CONFIG_NVRAM
+	nvram_sync();
+#endif
+	if (pm_power_off)
+		pm_power_off();
+	ppc_generic_power_off();
+}
 
 #ifdef CONFIG_TAU
 extern u32 cpu_temp(unsigned long cpu);
-- 
GitLab


From b7e89214aadf82fa5eaff28f50f2078fa6ae773c Mon Sep 17 00:00:00 2001
From: Josh Boyer <jdub@us.ibm.com>
Date: Thu, 7 Sep 2006 13:27:58 -0500
Subject: [PATCH 0181/1063] [POWERPC] PPC 4xx: Enable XMON on PPC 4xx boards

The following patch allows XMON to run on the 4xx platform.  Tested on
Walnut, Ebony, and Nova (440GX based) eval boards.  440EP, 440SP, and
440SPE boards should work as well.  Patch is against 2.6.18-rc6.

Signed-off-by: Josh Boyer <jdub@us.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/ppc/xmon/start.c | 28 ++++++++++++++++++----------
 arch/ppc/xmon/xmon.c  | 26 ++++++++++++++++++++------
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/arch/ppc/xmon/start.c b/arch/ppc/xmon/start.c
index f7e92986952a2..d74a883e5bdea 100644
--- a/arch/ppc/xmon/start.c
+++ b/arch/ppc/xmon/start.c
@@ -15,6 +15,7 @@
 #include <asm/processor.h>
 #include <asm/delay.h>
 #include <asm/btext.h>
+#include <asm/ibm4xx.h>
 
 static volatile unsigned char *sccc, *sccd;
 unsigned int TXRDY, RXRDY, DLAB;
@@ -57,23 +58,30 @@ static struct sysrq_key_op sysrq_xmon_op =
 void
 xmon_map_scc(void)
 {
-#ifdef CONFIG_PPC_PREP
-	volatile unsigned char *base;
-
-#elif defined(CONFIG_GEMINI)
+#if defined(CONFIG_GEMINI)
 	/* should already be mapped by the kernel boot */
-	sccc = (volatile unsigned char *) 0xffeffb0d;
 	sccd = (volatile unsigned char *) 0xffeffb08;
-	TXRDY = 0x20;
-	RXRDY = 1;
-	DLAB = 0x80;
 #elif defined(CONFIG_405GP)
-	sccc = (volatile unsigned char *)0xef600305;
 	sccd = (volatile unsigned char *)0xef600300;
+#elif defined(CONFIG_440EP)
+	sccd = (volatile unsigned char *) ioremap(PPC440EP_UART0_ADDR, 8);
+#elif defined(CONFIG_440SP)
+	sccd = (volatile unsigned char *) ioremap64(PPC440SP_UART0_ADDR, 8);
+#elif defined(CONFIG_440SPE)
+	sccd = (volatile unsigned char *) ioremap64(PPC440SPE_UART0_ADDR, 8);
+#elif defined(CONFIG_44x)
+	/* This is the default for 44x platforms.  Any boards that have a
+	   different UART address need to be put in cases before this or the
+	   port will be mapped incorrectly */
+	sccd = (volatile unsigned char *) ioremap64(PPC440GP_UART0_ADDR, 8);
+#endif /* platform */
+
+#ifndef CONFIG_PPC_PREP
+	sccc = sccd + 5;
 	TXRDY = 0x20;
 	RXRDY = 1;
 	DLAB = 0x80;
-#endif /* platform */
+#endif
 
 	register_sysrq_key('x', &sysrq_xmon_op);
 }
diff --git a/arch/ppc/xmon/xmon.c b/arch/ppc/xmon/xmon.c
index 37d234f93394a..25d032b2aec7e 100644
--- a/arch/ppc/xmon/xmon.c
+++ b/arch/ppc/xmon/xmon.c
@@ -153,6 +153,12 @@ static int xmon_trace[NR_CPUS];
 #define SSTEP	1		/* stepping because of 's' command */
 #define BRSTEP	2		/* stepping over breakpoint */
 
+#ifdef CONFIG_4xx
+#define MSR_SSTEP_ENABLE 0x200
+#else
+#define MSR_SSTEP_ENABLE 0x400
+#endif
+
 static struct pt_regs *xmon_regs[NR_CPUS];
 
 extern inline void sync(void)
@@ -211,6 +217,14 @@ static void get_tb(unsigned *p)
 	p[1] = lo;
 }
 
+static inline void xmon_enable_sstep(struct pt_regs *regs)
+{
+	regs->msr |= MSR_SSTEP_ENABLE;
+#ifdef CONFIG_4xx
+	mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+#endif
+}
+
 int xmon(struct pt_regs *excp)
 {
 	struct pt_regs regs;
@@ -254,10 +268,10 @@ int xmon(struct pt_regs *excp)
 	cmd = cmds(excp);
 	if (cmd == 's') {
 		xmon_trace[smp_processor_id()] = SSTEP;
-		excp->msr |= 0x400;
+		xmon_enable_sstep(excp);
 	} else if (at_breakpoint(excp->nip)) {
 		xmon_trace[smp_processor_id()] = BRSTEP;
-		excp->msr |= 0x400;
+		xmon_enable_sstep(excp);
 	} else {
 		xmon_trace[smp_processor_id()] = 0;
 		insert_bpts();
@@ -298,7 +312,7 @@ xmon_bpt(struct pt_regs *regs)
 		remove_bpts();
 		excprint(regs);
 		xmon_trace[smp_processor_id()] = BRSTEP;
-		regs->msr |= 0x400;
+		xmon_enable_sstep(regs);
 	} else {
 		xmon(regs);
 	}
@@ -385,7 +399,7 @@ insert_bpts(void)
 		}
 		store_inst((void *) bp->address);
 	}
-#if !defined(CONFIG_8xx)
+#if ! (defined(CONFIG_8xx) || defined(CONFIG_4xx))
 	if (dabr.enabled)
 		set_dabr(dabr.address);
 	if (iabr.enabled)
@@ -400,7 +414,7 @@ remove_bpts(void)
 	struct bpt *bp;
 	unsigned instr;
 
-#if !defined(CONFIG_8xx)
+#if ! (defined(CONFIG_8xx) || defined(CONFIG_4xx))
 	set_dabr(0);
 	set_iabr(0);
 #endif
@@ -677,7 +691,7 @@ bpt_cmds(void)
 
 	cmd = inchar();
 	switch (cmd) {
-#if !defined(CONFIG_8xx)
+#if ! (defined(CONFIG_8xx) || defined(CONFIG_4xx))
 	case 'd':
 		mode = 7;
 		cmd = inchar();
-- 
GitLab


From 5a2fe38d2844ba2f2dd8f4946d795e09d8f7e095 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 6 Sep 2006 14:34:41 -0500
Subject: [PATCH 0182/1063] [POWERPC] powerpc: Reduce default cacheline size to
 64 bytes

Reduce default cacheline size on 64-bit powerpc from 128 bytes to 64.
This is the architected minimum. In most cases we'll still end up using
cache line information from the device tree, but defaults are used during
early boot and doing a few dcbst/icbi's too many there won't do any harm.

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/head_64.S  | 2 +-
 arch/powerpc/kernel/setup_64.c | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index e9963d9f335af..3065b472b95db 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -1748,7 +1748,7 @@ _STATIC(__after_prom_start)
 _GLOBAL(copy_and_flush)
 	addi	r5,r5,-8
 	addi	r6,r6,-8
-4:	li	r0,16			/* Use the least common		*/
+4:	li	r0,8			/* Use the smallest common	*/
 					/* denominator cache line	*/
 					/* size.  This results in	*/
 					/* extra cache line flushes	*/
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 77efe19ccd2c8..00d6b8addd788 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -78,10 +78,10 @@ u64 ppc64_pft_size;
  * before we've read this from the device tree.
  */
 struct ppc64_caches ppc64_caches = {
-	.dline_size = 0x80,
-	.log_dline_size = 7,
-	.iline_size = 0x80,
-	.log_iline_size = 7
+	.dline_size = 0x40,
+	.log_dline_size = 6,
+	.iline_size = 0x40,
+	.log_iline_size = 6
 };
 EXPORT_SYMBOL_GPL(ppc64_caches);
 
-- 
GitLab


From 0024300000769eadcb4a4fcdff531d45ee7735d4 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 6 Sep 2006 14:35:19 -0500
Subject: [PATCH 0183/1063] [POWERPC] powerpc: Divorce CPU_FTR_CTRL from
 CPU_FTR_PPCAS_ARCH_V2_BASE

The performance monitor implementation (including CTRL register behaviour)
is just included in PPC v2 as an example, it's not truly part of the base.

It's actually a somewhat misleading feature, but I'll leave that be for
now: The presence of the register is not what the feature bit is used
for, but instead it's used to determine if it contains the runlatch
bit for idle reporting of the performance monitor. For alternative
implementations, the register might still exist but the bit might have
different meaning (or no meaning at all).

For now, split it off and don't include it in CPU_FTR_PPCAS_ARCH_V2_BASE.

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/cputable.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 748bc1805da90..3608259c49cfc 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -148,7 +148,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
 
 #define CPU_FTR_PPCAS_ARCH_V2_BASE (CPU_FTR_SLB | \
 					CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \
-					CPU_FTR_NODSISRALIGN | CPU_FTR_CTRL)
+					CPU_FTR_NODSISRALIGN)
 
 /* iSeries doesn't support large pages */
 #ifdef CONFIG_PPC_ISERIES
@@ -313,24 +313,25 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | \
 	    CPU_FTR_MMCRA | CPU_FTR_CTRL)
 #define CPU_FTRS_POWER4	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
-	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA)
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+	    CPU_FTR_MMCRA)
 #define CPU_FTRS_PPC970	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
-	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA)
 #define CPU_FTRS_POWER5	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
-	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
 	    CPU_FTR_PURR)
 #define CPU_FTRS_POWER6 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
-	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
 	    CPU_FTR_PURR | CPU_FTR_CI_LARGE_PAGE | CPU_FTR_REAL_LE)
 #define CPU_FTRS_CELL	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
-	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_CTRL | CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE)
+	    CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE)
 #define CPU_FTRS_COMPATIBLE	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2)
 #endif
-- 
GitLab


From b3ebd1d862d6c23caa58e40d341eefc426f835e1 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 6 Sep 2006 14:35:57 -0500
Subject: [PATCH 0184/1063] [POWERPC] powerpc: PA6T cputable entry, PVR value

Introduce PWRficient PA6T cputable entries and feature bits.

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/cputable.c | 14 ++++++++++++++
 include/asm-powerpc/cputable.h |  9 +++++++--
 include/asm-powerpc/reg.h      |  1 +
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 306da4cd37a0f..db65c9f6559ae 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -58,6 +58,9 @@ extern void __restore_cpu_ppc970(void);
 #define COMMON_USER_POWER6	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
 				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
 				 PPC_FEATURE_TRUE_LE)
+#define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
+				 PPC_FEATURE_TRUE_LE | \
+				 PPC_FEATURE_HAS_ALTIVEC_COMP)
 #define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
 				 PPC_FEATURE_BOOKE)
 
@@ -286,6 +289,17 @@ struct cpu_spec	cpu_specs[] = {
 		.dcache_bsize		= 128,
 		.platform		= "ppc-cell-be",
 	},
+	{	/* PA Semi PA6T */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00900000,
+		.cpu_name		= "PA6T",
+		.cpu_features		= CPU_FTRS_PA6T,
+		.cpu_user_features	= COMMON_USER_PA6T,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 6,
+		.platform		= "pa6t",
+	},
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
 		.pvr_value		= 0x00000000,
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 3608259c49cfc..12707ab9dc98d 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -23,6 +23,7 @@
 #define PPC_FEATURE_SMT			0x00004000
 #define PPC_FEATURE_ICACHE_SNOOP	0x00002000
 #define PPC_FEATURE_ARCH_2_05		0x00001000
+#define PPC_FEATURE_PA6T		0x00000800
 
 #define PPC_FEATURE_TRUE_LE		0x00000002
 #define PPC_FEATURE_PPC_LE		0x00000001
@@ -332,6 +333,10 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE)
+#define CPU_FTRS_PA6T (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \
+	    CPU_FTR_PURR | CPU_FTR_REAL_LE)
 #define CPU_FTRS_COMPATIBLE	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2)
 #endif
@@ -340,7 +345,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
 #define CPU_FTRS_POSSIBLE	\
 	    (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |	\
 	    CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 |	\
-	    CPU_FTRS_CELL | CPU_FTR_CI_LARGE_PAGE)
+	    CPU_FTRS_CELL | CPU_FTRS_PA6T)
 #else
 enum {
 	CPU_FTRS_POSSIBLE =
@@ -379,7 +384,7 @@ enum {
 #define CPU_FTRS_ALWAYS		\
 	    (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 &	\
 	    CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 &	\
-	    CPU_FTRS_CELL & CPU_FTRS_POSSIBLE)
+	    CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE)
 #else
 enum {
 	CPU_FTRS_ALWAYS =
diff --git a/include/asm-powerpc/reg.h b/include/asm-powerpc/reg.h
index cf73475a0c69f..3a9fcc15811b4 100644
--- a/include/asm-powerpc/reg.h
+++ b/include/asm-powerpc/reg.h
@@ -592,6 +592,7 @@
 #define PV_630p	0x0041
 #define PV_970MP	0x0044
 #define PV_BE		0x0070
+#define PV_PA6T		0x0090
 
 /*
  * Number of entries in the SLB. If this ever changes we should handle
-- 
GitLab


From 1e76875e51266a5c43f601ecf08a92be5769228c Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 6 Sep 2006 14:42:08 -0500
Subject: [PATCH 0185/1063] [POWERPC] powerpc: PA Semi PWRficient platform
 support

Base patch for PA6T and PA6T-1682M. This introduces the
arch/powerpc/platform/pasemi directory, together with basic
implementations for various setup.

Much of this was based on other platform code, i.e. Maple, etc.

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/Kconfig                   |  11 ++
 arch/powerpc/platforms/Makefile        |   1 +
 arch/powerpc/platforms/pasemi/Makefile |   1 +
 arch/powerpc/platforms/pasemi/pasemi.h |   8 +
 arch/powerpc/platforms/pasemi/pci.c    | 198 +++++++++++++++++++++++++
 arch/powerpc/platforms/pasemi/setup.c  | 188 +++++++++++++++++++++++
 arch/powerpc/platforms/pasemi/time.c   |  29 ++++
 7 files changed, 436 insertions(+)
 create mode 100644 arch/powerpc/platforms/pasemi/Makefile
 create mode 100644 arch/powerpc/platforms/pasemi/pasemi.h
 create mode 100644 arch/powerpc/platforms/pasemi/pci.c
 create mode 100644 arch/powerpc/platforms/pasemi/setup.c
 create mode 100644 arch/powerpc/platforms/pasemi/time.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 904798fd4e74b..c9dcec7f3c614 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -413,6 +413,17 @@ config PPC_MAPLE
           This option enables support for the Maple 970FX Evaluation Board.
 	  For more informations, refer to <http://www.970eval.com>
 
+config PPC_PASEMI
+	depends on PPC_MULTIPLATFORM && PPC64
+	bool "PA Semi SoC-based platforms"
+	default n
+	select MPIC
+	select PPC_UDBG_16550
+	select GENERIC_TBSYNC
+	help
+	  This option enables support for PA Semi's PWRficient line
+	  of SoC processors, including PA6T-1682M
+
 config PPC_CELL
 	bool
 	default n
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index 5cf46dc578956..e58fa953a50bf 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -13,5 +13,6 @@ obj-$(CONFIG_PPC_86xx)		+= 86xx/
 obj-$(CONFIG_PPC_PSERIES)	+= pseries/
 obj-$(CONFIG_PPC_ISERIES)	+= iseries/
 obj-$(CONFIG_PPC_MAPLE)		+= maple/
+obj-$(CONFIG_PPC_PASEMI)		+= pasemi/
 obj-$(CONFIG_PPC_CELL)		+= cell/
 obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile
new file mode 100644
index 0000000000000..1be1a993c5f52
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/Makefile
@@ -0,0 +1 @@
+obj-y	+= setup.o pci.o time.o
diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
new file mode 100644
index 0000000000000..fd71d72736b27
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/pasemi.h
@@ -0,0 +1,8 @@
+#ifndef _PASEMI_PASEMI_H
+#define _PASEMI_PASEMI_H
+
+extern unsigned long pas_get_boot_time(void);
+extern void pas_pci_init(void);
+extern void pas_pcibios_fixup(void);
+
+#endif /* _PASEMI_PASEMI_H */
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
new file mode 100644
index 0000000000000..4679c52304133
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Authors: Kip Walker, PA Semi
+ *	    Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on arch/powerpc/platforms/maple/pci.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+
+#include <asm/ppc-pci.h>
+
+#define PA_PXP_CFA(bus, devfn, off) (((bus) << 20) | ((devfn) << 12) | (off))
+
+#define CONFIG_OFFSET_VALID(off) ((off) < 4096)
+
+static unsigned long pa_pxp_cfg_addr(struct pci_controller *hose,
+				       u8 bus, u8 devfn, int offset)
+{
+	return ((unsigned long)hose->cfg_data) + PA_PXP_CFA(bus, devfn, offset);
+}
+
+static int pa_pxp_read_config(struct pci_bus *bus, unsigned int devfn,
+			      int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	unsigned long addr;
+
+	hose = pci_bus_to_host(bus);
+	if (!hose)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!CONFIG_OFFSET_VALID(offset))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset);
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8((u8 *)addr);
+		break;
+	case 2:
+		*val = in_le16((u16 *)addr);
+		break;
+	default:
+		*val = in_le32((u32 *)addr);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pa_pxp_write_config(struct pci_bus *bus, unsigned int devfn,
+			       int offset, int len, u32 val)
+{
+	struct pci_controller *hose;
+	unsigned long addr;
+
+	hose = pci_bus_to_host(bus);
+	if (!hose)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!CONFIG_OFFSET_VALID(offset))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset);
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8((u8 *)addr, val);
+		(void) in_8((u8 *)addr);
+		break;
+	case 2:
+		out_le16((u16 *)addr, val);
+		(void) in_le16((u16 *)addr);
+		break;
+	default:
+		out_le32((u32 *)addr, val);
+		(void) in_le32((u32 *)addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops pa_pxp_ops = {
+	pa_pxp_read_config,
+	pa_pxp_write_config,
+};
+
+static void __init setup_pa_pxp(struct pci_controller *hose)
+{
+	hose->ops = &pa_pxp_ops;
+	hose->cfg_data = ioremap(0xe0000000, 0x10000000);
+}
+
+static int __init add_bridge(struct device_node *dev)
+{
+	struct pci_controller *hose;
+
+	pr_debug("Adding PCI host bridge %s\n", dev->full_name);
+
+	hose = pcibios_alloc_controller(dev);
+	if (!hose)
+		return -ENOMEM;
+
+	hose->first_busno = 0;
+	hose->last_busno = 0xff;
+
+	setup_pa_pxp(hose);
+
+	printk(KERN_INFO "Found PA-PXP PCI host bridge.\n");
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+	pci_setup_phb_io(hose, 1);
+
+	return 0;
+}
+
+
+void __init pas_pcibios_fixup(void)
+{
+	struct pci_dev *dev = NULL;
+
+	for_each_pci_dev(dev)
+		pci_read_irq_line(dev);
+}
+
+static void __init pas_fixup_phb_resources(void)
+{
+	struct pci_controller *hose, *tmp;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
+		hose->io_resource.start += offset;
+		hose->io_resource.end += offset;
+		printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n",
+		       hose->global_number,
+		       hose->io_resource.start, hose->io_resource.end);
+	}
+}
+
+
+void __init pas_pci_init(void)
+{
+	struct device_node *np, *root;
+
+	root = of_find_node_by_path("/");
+	if (!root) {
+		printk(KERN_CRIT "pas_pci_init: can't find root "
+			"of device tree\n");
+		return;
+	}
+
+	for (np = NULL; (np = of_get_next_child(root, np)) != NULL;)
+		if (np->name && !strcmp(np->name, "pxp") && !add_bridge(np))
+			of_node_get(np);
+
+	of_node_put(root);
+
+	pas_fixup_phb_resources();
+
+	/* Setup the linkage between OF nodes and PHBs */
+	pci_devs_phb_init();
+
+	/* Use the common resource allocation mechanism */
+	pci_probe_only = 1;
+}
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
new file mode 100644
index 0000000000000..628482671c154
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Authors: Kip Walker, PA Semi
+ *	    Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on arch/powerpc/platforms/maple/setup.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/console.h>
+
+#include <asm/prom.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/mpic.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+
+#include "pasemi.h"
+
+static void pas_restart(char *cmd)
+{
+	printk("restart unimplemented, looping...\n");
+	for (;;) ;
+}
+
+static void pas_power_off(void)
+{
+	printk("power off unimplemented, looping...\n");
+	for (;;) ;
+}
+
+static void pas_halt(void)
+{
+	pas_power_off();
+}
+
+#ifdef CONFIG_SMP
+struct smp_ops_t pas_smp_ops = {
+	.probe		= smp_mpic_probe,
+	.message_pass	= smp_mpic_message_pass,
+	.kick_cpu	= smp_generic_kick_cpu,
+	.setup_cpu	= smp_mpic_setup_cpu,
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
+};
+#endif /* CONFIG_SMP */
+
+void __init pas_setup_arch(void)
+{
+#ifdef CONFIG_SMP
+	/* Setup SMP callback */
+	smp_ops = &pas_smp_ops;
+#endif
+	/* Lookup PCI hosts */
+	pas_pci_init();
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	printk(KERN_DEBUG "Using default idle loop\n");
+}
+
+static void iommu_dev_setup_null(struct pci_dev *dev) { }
+static void iommu_bus_setup_null(struct pci_bus *bus) { }
+
+static void __init pas_init_early(void)
+{
+	/* No iommu code yet */
+	ppc_md.iommu_dev_setup = iommu_dev_setup_null;
+	ppc_md.iommu_bus_setup = iommu_bus_setup_null;
+	pci_direct_iommu_init();
+}
+
+/* No legacy IO on our parts */
+static int pas_check_legacy_ioport(unsigned int baseport)
+{
+	return -ENODEV;
+}
+
+static __init void pas_init_IRQ(void)
+{
+	struct device_node *np;
+	struct device_node *root, *mpic_node;
+	unsigned long openpic_addr;
+	const unsigned int *opprop;
+	int naddr, opplen;
+	struct mpic *mpic;
+
+	mpic_node = NULL;
+
+	for_each_node_by_type(np, "interrupt-controller")
+		if (device_is_compatible(np, "open-pic")) {
+			mpic_node = np;
+			break;
+		}
+	if (!mpic_node)
+		for_each_node_by_type(np, "open-pic") {
+			mpic_node = np;
+			break;
+		}
+	if (!mpic_node) {
+		printk(KERN_ERR
+			"Failed to locate the MPIC interrupt controller\n");
+		return;
+	}
+
+	/* Find address list in /platform-open-pic */
+	root = of_find_node_by_path("/");
+	naddr = prom_n_addr_cells(root);
+	opprop = get_property(root, "platform-open-pic", &opplen);
+	if (!opprop) {
+		printk(KERN_ERR "No platform-open-pic property.\n");
+		of_node_put(root);
+		return;
+	}
+	openpic_addr = of_read_number(opprop, naddr);
+	printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
+	of_node_put(root);
+
+	mpic = mpic_alloc(mpic_node, openpic_addr, MPIC_PRIMARY, 0, 0,
+			  " PAS-OPIC  ");
+	BUG_ON(!mpic);
+
+	mpic_assign_isu(mpic, 0, openpic_addr + 0x10000);
+	mpic_init(mpic);
+	of_node_put(mpic_node);
+	of_node_put(root);
+}
+
+static void __init pas_progress(char *s, unsigned short hex)
+{
+	printk("[%04x] : %s\n", hex, s ? s : "");
+}
+
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init pas_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "PA6T-1682M"))
+		return 0;
+
+	hpte_init_native();
+
+	return 1;
+}
+
+define_machine(pas) {
+	.name			= "PA Semi PA6T-1682M",
+	.probe			= pas_probe,
+	.setup_arch		= pas_setup_arch,
+	.init_early		= pas_init_early,
+	.init_IRQ		= pas_init_IRQ,
+	.get_irq		= mpic_get_irq,
+	.pcibios_fixup		= pas_pcibios_fixup,
+	.restart		= pas_restart,
+	.power_off		= pas_power_off,
+	.halt			= pas_halt,
+	.get_boot_time		= pas_get_boot_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.check_legacy_ioport    = pas_check_legacy_ioport,
+	.progress		= pas_progress,
+};
diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c
new file mode 100644
index 0000000000000..9bd410b8fec6f
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/time.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/time.h>
+
+#include <asm/time.h>
+
+unsigned long __init pas_get_boot_time(void)
+{
+	/* Let's just return a fake date right now */
+	return mktime(2006, 1, 1, 12, 0, 0);
+}
-- 
GitLab


From ab06ff3af34a6288b314862abfebd86ad918c5d9 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 6 Sep 2006 14:44:54 -0500
Subject: [PATCH 0186/1063] [POWERPC] powerpc: PA Semi PWRficient MAINTAINER
 entry

Maintainer entry for PWRficient

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 25cd7073a20bb..7e86286ddf1e1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1783,6 +1783,13 @@ W:     http://www.penguinppc.org/
 L:     linuxppc-embedded@ozlabs.org
 S:     Maintained
 
+LINUX FOR POWERPC PA SEMI PWRFICIENT
+P:	Olof Johansson
+M:	olof@lixom.net
+W:	http://www.pasemi.com/
+L:	linuxppc-dev@ozlabs.org
+S:	Supported
+
 LLC (802.2)
 P:	Arnaldo Carvalho de Melo
 M:	acme@conectiva.com.br
-- 
GitLab


From 57852a853b0d6761f270be0961d5d8387e98c8bb Mon Sep 17 00:00:00 2001
From: Mike Kravetz <kravetz@us.ibm.com>
Date: Wed, 6 Sep 2006 16:23:12 -0700
Subject: [PATCH 0187/1063] [POWERPC] powerpc: Instrument Hypervisor Calls

Add instrumentation for hypervisor calls on pseries.  Call statistics
include number of calls, wall time and cpu cycles (if available) and
are made available via debugfs.  Instrumentation code is behind the
HCALL_STATS config option and has no impact if not enabled.

Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/Kconfig.debug                   |  14 ++
 arch/powerpc/kernel/asm-offsets.c            |   7 +
 arch/powerpc/platforms/pseries/Makefile      |   1 +
 arch/powerpc/platforms/pseries/hvCall.S      |  72 +++++++++++
 arch/powerpc/platforms/pseries/hvCall_inst.c | 129 +++++++++++++++++++
 include/asm-powerpc/hvcall.h                 |  12 +-
 6 files changed, 234 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/pseries/hvCall_inst.c

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index e29ef77d3b001..d7b2aedd89aa9 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -18,6 +18,20 @@ config DEBUG_STACK_USAGE
 
 	  This option will slow down process creation somewhat.
 
+config HCALL_STATS
+	bool "Hypervisor call instrumentation"
+	depends on PPC_PSERIES && DEBUG_FS
+	help
+	  Adds code to keep track of the number of hypervisor calls made and
+	  the amount of time spent in hypervisor callsr.  Wall time spent in
+	  each call is always calculated, and if available CPU cycles spent
+	  are also calculated.  A directory named hcall_inst is added at the
+	  root of the debugfs filesystem.  Within the hcall_inst directory
+	  are files that contain CPU specific call statistics.
+
+	  This option will add a small amount of overhead to all hypervisor
+	  calls.
+
 config DEBUGGER
 	bool "Enable debugger hooks"
 	depends on DEBUG_KERNEL
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c53acd2a6dfcd..c578e7ab81732 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -137,6 +137,7 @@ int main(void)
 	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
 	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
 	DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
+	DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
 
 	DEFINE(SLBSHADOW_STACKVSID,
 	       offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
@@ -165,6 +166,12 @@ int main(void)
 	/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
 	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
 	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
+
+	/* hcall statistics */
+	DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats));
+	DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls));
+	DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total));
+	DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total));
 #endif /* CONFIG_PPC64 */
 	DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
 	DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index e5e0ff4669048..997243a91be87 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o eeh_driver.o eeh_event.o
 
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
+obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 9a99b056bd272..c00cfed7af2c0 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -10,9 +10,69 @@
 #include <asm/hvcall.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
 	
 #define STK_PARM(i)     (48 + ((i)-3)*8)
 
+#ifdef CONFIG_HCALL_STATS
+/*
+ * precall must preserve all registers.  use unused STK_PARM()
+ * areas to save snapshots and opcode.
+ */
+#define HCALL_INST_PRECALL					\
+	std	r3,STK_PARM(r3)(r1);	/* save opcode */	\
+	mftb	r0;			/* get timebase and */	\
+	std     r0,STK_PARM(r5)(r1);	/* save for later */	\
+BEGIN_FTR_SECTION;						\
+	mfspr	r0,SPRN_PURR;		/* get PURR and */	\
+	std	r0,STK_PARM(r6)(r1);	/* save for later */	\
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);
+	
+/*
+ * postcall is performed immediately before function return which
+ * allows liberal use of volatile registers.
+ */
+#define HCALL_INST_POSTCALL					\
+	ld	r4,STK_PARM(r3)(r1);	/* validate opcode */	\
+	cmpldi	cr7,r4,MAX_HCALL_OPCODE;			\
+	bgt-	cr7,1f;						\
+								\
+	/* get time and PURR snapshots after hcall */		\
+	mftb	r7;			/* timebase after */	\
+BEGIN_FTR_SECTION;						\
+	mfspr	r8,SPRN_PURR;		/* PURR after */	\
+	ld	r6,STK_PARM(r6)(r1);	/* PURR before */	\
+	subf	r6,r6,r8;		/* delta */		\
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);				\
+	ld	r5,STK_PARM(r5)(r1);	/* timebase before */	\
+	subf	r5,r5,r7;		/* time delta */	\
+								\
+	/* calculate address of stat structure r4 = opcode */	\
+	srdi	r4,r4,2;		/* index into array */	\
+	mulli	r4,r4,HCALL_STAT_SIZE;				\
+	LOAD_REG_ADDR(r7, per_cpu__hcall_stats);		\
+	add	r4,r4,r7;					\
+	ld	r7,PACA_DATA_OFFSET(r13); /* per cpu offset */	\
+	add	r4,r4,r7;					\
+								\
+	/* update stats	*/					\
+	ld	r7,HCALL_STAT_CALLS(r4); /* count */		\
+	addi	r7,r7,1;					\
+	std	r7,HCALL_STAT_CALLS(r4);			\
+	ld      r7,HCALL_STAT_TB(r4);	/* timebase */		\
+	add	r7,r7,r5;					\
+	std	r7,HCALL_STAT_TB(r4);				\
+BEGIN_FTR_SECTION;						\
+	ld	r7,HCALL_STAT_PURR(r4);	/* PURR */		\
+	add	r7,r7,r6;					\
+	std	r7,HCALL_STAT_PURR(r4);				\
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);				\
+1:
+#else
+#define HCALL_INST_PRECALL
+#define HCALL_INST_POSTCALL
+#endif
+
 	.text
 
 _GLOBAL(plpar_hcall_norets)
@@ -21,8 +81,12 @@ _GLOBAL(plpar_hcall_norets)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	HVSC				/* invoke the hypervisor */
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
@@ -33,6 +97,8 @@ _GLOBAL(plpar_hcall)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
 	mr	r4,r5
@@ -50,6 +116,8 @@ _GLOBAL(plpar_hcall)
 	std	r6, 16(r12)
 	std	r7, 24(r12)
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
@@ -61,6 +129,8 @@ _GLOBAL(plpar_hcall9)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
 	mr	r4,r5
@@ -86,6 +156,8 @@ _GLOBAL(plpar_hcall9)
 	std	r11,56(r12)
 	std	r12,64(r12)
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
new file mode 100644
index 0000000000000..641e6511cf066
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2006 Mike Kravetz IBM Corporation
+ *
+ * Hypervisor Call Instrumentation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/cputable.h>
+
+DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
+
+/*
+ * Routines for displaying the statistics in debugfs
+ */
+static void *hc_start(struct seq_file *m, loff_t *pos)
+{
+	if ((int)*pos < HCALL_STAT_ARRAY_SIZE)
+		return (void *)(unsigned long)(*pos + 1);
+
+	return NULL;
+}
+
+static void *hc_next(struct seq_file *m, void *p, loff_t * pos)
+{
+	++*pos;
+
+	return hc_start(m, pos);
+}
+
+static void hc_stop(struct seq_file *m, void *p)
+{
+}
+
+static int hc_show(struct seq_file *m, void *p)
+{
+	unsigned long h_num = (unsigned long)p;
+	struct hcall_stats *hs = (struct hcall_stats *)m->private;
+
+	if (hs[h_num].num_calls) {
+		if (!cpu_has_feature(CPU_FTR_PURR))
+			seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total,
+				   hs[h_num].purr_total);
+		else
+			seq_printf(m, "%lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total);
+	}
+
+	return 0;
+}
+
+static struct seq_operations hcall_inst_seq_ops = {
+        .start = hc_start,
+        .next  = hc_next,
+        .stop  = hc_stop,
+        .show  = hc_show
+};
+
+static int hcall_inst_seq_open(struct inode *inode, struct file *file)
+{
+	int rc;
+	struct seq_file *seq;
+
+	rc = seq_open(file, &hcall_inst_seq_ops);
+	seq = file->private_data;
+	seq->private = file->f_dentry->d_inode->u.generic_ip;
+
+	return rc;
+}
+
+static struct file_operations hcall_inst_seq_fops = {
+	.open = hcall_inst_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#define	HCALL_ROOT_DIR		"hcall_inst"
+#define CPU_NAME_BUF_SIZE	32
+
+static int __init hcall_inst_init(void)
+{
+	struct dentry *hcall_root;
+	struct dentry *hcall_file;
+	char cpu_name_buf[CPU_NAME_BUF_SIZE];
+	int cpu;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
+	if (!hcall_root)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
+		hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO,
+						 hcall_root,
+						 per_cpu(hcall_stats, cpu),
+						 &hcall_inst_seq_fops);
+		if (!hcall_file)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+__initcall(hcall_inst_init);
diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h
index 63ce1ac8c1f48..257d1cecb8c96 100644
--- a/include/asm-powerpc/hvcall.h
+++ b/include/asm-powerpc/hvcall.h
@@ -208,7 +208,7 @@
 #define H_JOIN			0x298
 #define H_VASI_STATE            0x2A4
 #define H_ENABLE_CRQ		0x2B0
-#define MAX_HCALL_OPCODES	(H_ENABLE_CRQ >> 2)
+#define MAX_HCALL_OPCODE	H_ENABLE_CRQ
 
 #ifndef __ASSEMBLY__
 
@@ -246,6 +246,16 @@ long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...);
 #define PLPAR_HCALL9_BUFSIZE 9
 long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
 
+/* For hcall instrumentation.  One structure per-hcall, per-CPU */
+struct hcall_stats {
+	unsigned long	num_calls;	/* number of calls (on this CPU) */
+	unsigned long	tb_total;	/* total wall time (mftb) of calls. */
+	unsigned long	purr_total;	/* total cpu time (PURR) of calls. */
+};
+void update_hcall_stats(unsigned long opcode, unsigned long tb_delta,
+			unsigned long purr_delta);
+#define HCALL_STAT_ARRAY_SIZE	((MAX_HCALL_OPCODE >> 2) + 1)
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */
-- 
GitLab


From 06e6d290ac7a9fb6fcec3a2207988163709f06aa Mon Sep 17 00:00:00 2001
From: Josh Boyer <jdub@us.ibm.com>
Date: Thu, 7 Sep 2006 08:25:40 -0500
Subject: [PATCH 0188/1063] [POWERPC] PPC: Fix Kconfig whitespace warnings

Fix the following whitespace warnings when compiling with ARCH=ppc

arch/ppc/Kconfig:1207:warning: leading whitespace ignored
arch/ppc/Kconfig:1226:warning: leading whitespace ignored
arch/ppc/Kconfig:1231:warning: leading whitespace ignored

Also fix a typo ("Supprt").

Signed-off-by: Josh Boyer <jdub@us.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/ppc/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig
index a04cdf01596b1..8fa10cf661a80 100644
--- a/arch/ppc/Kconfig
+++ b/arch/ppc/Kconfig
@@ -1204,7 +1204,7 @@ config PCI_DOMAINS
 	default PCI
 
 config MPC83xx_PCI2
-	bool "  Supprt for 2nd PCI host controller"
+	bool "Support for 2nd PCI host controller"
 	depends on PCI && MPC834x
 	default y if MPC834x_SYS
 
@@ -1223,12 +1223,12 @@ config PCI_8260
 	default y
 
 config 8260_PCI9
-	bool "  Enable workaround for MPC826x erratum PCI 9"
+	bool "Enable workaround for MPC826x erratum PCI 9"
 	depends on PCI_8260 && !ADS8272
 	default y
 
 choice
-	prompt "  IDMA channel for PCI 9 workaround"
+	prompt "IDMA channel for PCI 9 workaround"
 	depends on 8260_PCI9
 
 config 8260_PCI9_IDMA1
-- 
GitLab


From 87fd7724d4022913ae8dbee3ed55cd04f2c316a6 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Thu, 7 Sep 2006 15:18:08 -0500
Subject: [PATCH 0189/1063] [POWERPC] Quiet hvc_console console output on
 failed opens

No other tty driver will print on the console when the open of it fails.

On systems that happen to be configured for both ttyS0 and hvc0 console,
this will keep flooding the console output.  This is most likely to
happen with systems booted between with and without hypervisor from the
same filesystem.

Let's just remove it. When it's really needed (i.e. when the open fails
and someone is trying to debug it), noone will see the output anyway. And
init will report the opens failing in due time through the syslog.

Signed-off-by: Olof Johansson <olof@lixom.net>
Acked-by: Ryan S. Arnold <rsa@us.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/char/hvc_console.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index dbee8bed05307..1d1bd34b7f127 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -320,10 +320,8 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
 	struct kobject *kobjp;
 
 	/* Auto increments kobject reference if found. */
-	if (!(hp = hvc_get_by_index(tty->index))) {
-		printk(KERN_WARNING "hvc_console: tty open failed, no vty associated with tty.\n");
+	if (!(hp = hvc_get_by_index(tty->index)))
 		return -ENODEV;
-	}
 
 	spin_lock_irqsave(&hp->lock, flags);
 	/* Check and then increment for fast path open. */
-- 
GitLab


From 26c8af5f01dfb91f709cc2ba07fb650949aae13e Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Fri, 8 Sep 2006 16:29:21 +0200
Subject: [PATCH 0190/1063] [POWERPC] print backtrace when entering xmon

xmon does not print a backtrace per default. This is bad on systems with
USB keyboard, the most needed info about the crash is lost.
print a backtrace during the very first xmon entry.

Booting with xmon=nobt disables the autobacktrace functionality.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/Kconfig.debug         |  2 ++
 arch/powerpc/kernel/setup-common.c |  4 ++++
 arch/powerpc/xmon/xmon.c           | 10 ++++++++++
 3 files changed, 16 insertions(+)

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index d7b2aedd89aa9..5ad149b47e340 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -88,6 +88,8 @@ config XMON
 	  very early during boot. 'xmon=on' will just enable the xmon
 	  debugger hooks.  'xmon=off' will disable the debugger hooks
 	  if CONFIG_XMON_DEFAULT is set.
+	  xmon will print a backtrace on the very first invocation.
+	  'xmon=nobt' will disable this autobacktrace.
 
 config XMON_DEFAULT
 	bool "Enable xmon by default"
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d57930d86faaa..465e7435efbcd 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -441,6 +441,8 @@ void __init smp_setup_cpu_maps(void)
 
 int __initdata do_early_xmon;
 #ifdef CONFIG_XMON
+extern int xmon_no_auto_backtrace;
+
 static int __init early_xmon(char *p)
 {
 	/* ensure xmon is enabled */
@@ -449,6 +451,8 @@ static int __init early_xmon(char *p)
 			xmon_init(1);
 		if (strncmp(p, "off", 3) == 0)
 			xmon_init(0);
+		if (strncmp(p, "nobt", 4) == 0)
+			xmon_no_auto_backtrace = 1;
 		if (strncmp(p, "early", 5) != 0)
 			return 0;
 	}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 179b10ced8c77..8adad1444a51f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -137,10 +137,14 @@ static void bootcmds(void);
 static void proccall(void);
 void dump_segments(void);
 static void symbol_lookup(void);
+static void xmon_show_stack(unsigned long sp, unsigned long lr,
+			    unsigned long pc);
 static void xmon_print_symbol(unsigned long address, const char *mid,
 			      const char *after);
 static const char *getvecname(unsigned long vec);
 
+int xmon_no_auto_backtrace;
+
 extern int print_insn_powerpc(unsigned long, unsigned long, int);
 
 extern void xmon_enter(void);
@@ -736,6 +740,12 @@ cmds(struct pt_regs *excp)
 
 	last_cmd = NULL;
 	xmon_regs = excp;
+
+	if (!xmon_no_auto_backtrace) {
+		xmon_no_auto_backtrace = 1;
+		xmon_show_stack(excp->gpr[1], excp->link, excp->nip);
+	}
+
 	for(;;) {
 #ifdef CONFIG_SMP
 		printf("%x:", smp_processor_id());
-- 
GitLab


From 3dd836a56de0d4f049438412959b905e1db4666e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 12 Sep 2006 16:04:25 +0100
Subject: [PATCH 0191/1063] [POWERPC] Export copy_4K_page()

Export copy_4K_page() for use by modules via copy_page() (such as
CacheFiles).

Signed-Off-By: David Howells <dhowells@redhat.com>

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/ppc_ksyms.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 39d3bfcabcd26..b2edac8ddf0a5 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -91,6 +91,9 @@ EXPORT_SYMBOL(__copy_tofrom_user);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
+#ifdef CONFIG_PPC64
+EXPORT_SYMBOL(copy_4K_page);
+#endif
 
 #ifndef  __powerpc64__
 EXPORT_SYMBOL(__ide_mm_insl);
-- 
GitLab


From f04da0bc36566ad17cf21e4ac8dbae377ca1dc75 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 13 Sep 2006 13:32:39 -0500
Subject: [PATCH 0192/1063] [POWERPC] Fix non-smp build

This fixes a compile error that only surfaces on CONFIG_SMP=n builds;
<asm/hvcall.h> seems to get pulled in through another header file for
SMP builds.  This problem was introduced by the hvcall stats patch.

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/asm-offsets.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c578e7ab81732..d06f378597bb4 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -43,6 +43,7 @@
 #include <asm/cache.h>
 #include <asm/compat.h>
 #include <asm/mmu.h>
+#include <asm/hvcall.h>
 #endif
 
 #define DEFINE(sym, val) \
-- 
GitLab


From 7dcd86e14319f4ceab883787ab2e00a5f860d14d Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Wed, 13 Sep 2006 17:41:55 -0500
Subject: [PATCH 0193/1063] [POWERPC] Fix MPC8349EMDS dts PCI interrupt-map
 values for IDSEL 0x18

Fix MPC8349EMDS dts PCI interrupt-map values for IDSEL 0x18 per
Tanya's catch.

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Tanya Jiang <tanya.jiang@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/boot/dts/mpc8349emds.dts | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/boot/dts/mpc8349emds.dts b/arch/powerpc/boot/dts/mpc8349emds.dts
index 12f5dbf3055f2..efceb34326535 100644
--- a/arch/powerpc/boot/dts/mpc8349emds.dts
+++ b/arch/powerpc/boot/dts/mpc8349emds.dts
@@ -214,10 +214,10 @@ b800 0 0 3 700 14 8
 					 b800 0 0 4 700 15 8
 
 					/* IDSEL 0x18 */
-					 b000 0 0 1 700 15 8
-					 b000 0 0 2 700 16 8
-					 b000 0 0 3 700 17 8
-					 b000 0 0 4 700 14 8>;
+					 c000 0 0 1 700 15 8
+					 c000 0 0 2 700 16 8
+					 c000 0 0 3 700 17 8
+					 c000 0 0 4 700 14 8>;
 			interrupt-parent = <700>;
 			interrupts = <42 8>;
 			bus-range = <0 0>;
@@ -274,10 +274,10 @@ b800 0 0 3 700 14 8
 					 b800 0 0 4 700 15 8
 
 					/* IDSEL 0x18 */
-					 b000 0 0 1 700 15 8
-					 b000 0 0 2 700 16 8
-					 b000 0 0 3 700 17 8
-					 b000 0 0 4 700 14 8>;
+					 c000 0 0 1 700 15 8
+					 c000 0 0 2 700 16 8
+					 c000 0 0 3 700 17 8
+					 c000 0 0 4 700 14 8>;
 			interrupt-parent = <700>;
 			interrupts = <42 8>;
 			bus-range = <0 0>;
-- 
GitLab


From d882687c51b52424a56992578ce7636b3f3c8d41 Mon Sep 17 00:00:00 2001
From: Havasi Ferenc <haf@kpax.(none)>
Date: Tue, 5 Sep 2006 16:08:58 +0200
Subject: [PATCH 0194/1063] [JFFS2][SUMMARY] Fix a summary collecting bug.

In some special case (padding because of sync
or umount) it can be possible that summary
information is not fit to the end of the erase
block. In these cases the collecting of summary
is disabled for this erase block.

The problem was that this was not respected
by jffs2_sum_add_kvec(). This patch fix this
bug.

From: Zoltan Sogor <weth@inf.u-szeged.hu>
Signed-off-by: Ferenc Havasi <havasi@inf.u-szeged.hu>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 fs/jffs2/summary.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index c19bd476e8ec7..e52cef526d90e 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -252,6 +252,11 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs,
 	union jffs2_node_union *node;
 	struct jffs2_eraseblock *jeb;
 
+	if (c->summary->sum_size == JFFS2_SUMMARY_NOSUM_SIZE) {
+		dbg_summary("Summary is disabled for this jeb! Skipping summary info!\n");
+		return 0;
+	}
+
 	node = invecs[0].iov_base;
 	jeb = &c->blocks[ofs / c->sector_size];
 	ofs -= jeb->offset;
-- 
GitLab


From de591dacf3034977b3fb94b61d08240c8b35c39d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5vard=20Skinnemoen?= <hskinnemoen@atmel.com>
Date: Fri, 15 Sep 2006 17:19:31 +0200
Subject: [PATCH 0195/1063] MTD: Fix bug in fixup_convert_atmel_pri
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The memset() in fixup_convert_atmel_pri is supposed to zero out
everything except the first 5 bytes in *extp, but it ends up zeroing
out something way outside the struct instead. Fix this potentially
dangerous code by casting the pointer to char * before doing
arithmetic.

Signed-off-by: HÃ¥vard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index ddc5bd7833546..a482e8922de10 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -175,7 +175,7 @@ static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
 	struct cfi_pri_atmel atmel_pri;
 
 	memcpy(&atmel_pri, extp, sizeof(atmel_pri));
-	memset(extp + 5, 0, sizeof(*extp) - 5);
+	memset((char *)extp + 5, 0, sizeof(*extp) - 5);
 
 	if (atmel_pri.Features & 0x02)
 		extp->EraseSuspend = 2;
-- 
GitLab


From ea59830db01b6b3d6bda9f84e3d272a346115e8e Mon Sep 17 00:00:00 2001
From: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Date: Sat, 16 Sep 2006 21:09:29 -0400
Subject: [PATCH 0196/1063] [MTD] Use SEEK_{SET,CUR,END} instead of hardcoded
 values in mtdchar lseek()

Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/mtdchar.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index fb8b4f7e48d3a..5b6acfcb2b880 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -62,15 +62,12 @@ static loff_t mtd_lseek (struct file *file, loff_t offset, int orig)
 	struct mtd_info *mtd = mfi->mtd;
 
 	switch (orig) {
-	case 0:
-		/* SEEK_SET */
+	case SEEK_SET:
 		break;
-	case 1:
-		/* SEEK_CUR */
+	case SEEK_CUR:
 		offset += file->f_pos;
 		break;
-	case 2:
-		/* SEEK_END */
+	case SEEK_END:
 		offset += mtd->size;
 		break;
 	default:
-- 
GitLab


From fadcfa33b6319a5faf8af2287f08bf93a7f926b6 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Tue, 19 Sep 2006 12:43:58 +0100
Subject: [PATCH 0197/1063] [HEADERS] One line per header in Kbuild files to
 reduce conflicts

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/Kbuild                        |  11 +-
 include/asm-alpha/Kbuild              |  10 +-
 include/asm-generic/Kbuild            |  15 +-
 include/asm-generic/Kbuild.asm        |  38 ++-
 include/asm-i386/Kbuild               |   9 +-
 include/asm-ia64/Kbuild               |  18 +-
 include/asm-powerpc/Kbuild            |  45 ++-
 include/asm-s390/Kbuild               |  11 +-
 include/asm-sparc/Kbuild              |  24 +-
 include/asm-sparc64/Kbuild            |  27 +-
 include/asm-x86_64/Kbuild             |  18 +-
 include/linux/Kbuild                  | 400 ++++++++++++++++++++++----
 include/linux/byteorder/Kbuild        |   9 +-
 include/linux/dvb/Kbuild              |  11 +-
 include/linux/netfilter/Kbuild        |  47 ++-
 include/linux/netfilter_arp/Kbuild    |   5 +-
 include/linux/netfilter_bridge/Kbuild |  21 +-
 include/linux/netfilter_ipv4/Kbuild   |  82 ++++--
 include/linux/netfilter_ipv6/Kbuild   |  27 +-
 include/linux/nfsd/Kbuild             |   9 +-
 include/linux/raid/Kbuild             |   3 +-
 include/linux/sunrpc/Kbuild           |   2 +-
 include/linux/tc_act/Kbuild           |   5 +-
 include/linux/tc_ematch/Kbuild        |   5 +-
 include/mtd/Kbuild                    |   8 +-
 include/rdma/Kbuild                   |   2 +-
 include/scsi/Kbuild                   |   4 +-
 include/sound/Kbuild                  |  12 +-
 include/video/Kbuild                  |   2 +-
 29 files changed, 721 insertions(+), 159 deletions(-)

diff --git a/include/Kbuild b/include/Kbuild
index cb2534800b197..2d03f995865ff 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -1,2 +1,9 @@
-header-y += asm-generic/ linux/ scsi/ sound/ mtd/ rdma/ video/
-header-y += asm-$(ARCH)/ 
+header-y += asm-generic/
+header-y += linux/
+header-y += scsi/
+header-y += sound/
+header-y += mtd/
+header-y += rdma/
+header-y += video/
+
+header-y += asm-$(ARCH)/
diff --git a/include/asm-alpha/Kbuild b/include/asm-alpha/Kbuild
index 2b06b3bad5ffc..b7c8f188b313b 100644
--- a/include/asm-alpha/Kbuild
+++ b/include/asm-alpha/Kbuild
@@ -1,5 +1,11 @@
 include include/asm-generic/Kbuild.asm
 
-unifdef-y += console.h fpu.h sysinfo.h compiler.h
+header-y += gentrap.h
+header-y += regdef.h
+header-y += pal.h
+header-y += reg.h
 
-header-y += gentrap.h regdef.h pal.h reg.h
+unifdef-y += console.h
+unifdef-y += fpu.h
+unifdef-y += sysinfo.h
+unifdef-y += compiler.h
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 70594b275a6e6..3c06be3817011 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -1,3 +1,12 @@
-header-y += atomic.h errno-base.h errno.h fcntl.h ioctl.h ipc.h mman.h \
-	signal.h statfs.h
-unifdef-y := resource.h siginfo.h
+header-y += atomic.h
+header-y += errno-base.h
+header-y += errno.h
+header-y += fcntl.h
+header-y += ioctl.h
+header-y += ipc.h
+header-y += mman.h
+header-y += signal.h
+header-y += statfs.h
+
+unifdef-y += resource.h
+unifdef-y += siginfo.h
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index c00de6028fa81..a84c3d88a1891 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -1,8 +1,34 @@
-unifdef-y += a.out.h auxvec.h byteorder.h errno.h fcntl.h ioctl.h	\
-	ioctls.h ipcbuf.h mman.h msgbuf.h param.h poll.h		\
-	posix_types.h ptrace.h resource.h sembuf.h shmbuf.h shmparam.h	\
-	sigcontext.h siginfo.h signal.h socket.h sockios.h stat.h	\
-	statfs.h termbits.h termios.h types.h unistd.h user.h
+unifdef-y += a.out.h
+unifdef-y += auxvec.h
+unifdef-y += byteorder.h
+unifdef-y += errno.h
+unifdef-y += fcntl.h
+unifdef-y += ioctl.h
+unifdef-y += ioctls.h
+unifdef-y += ipcbuf.h
+unifdef-y += mman.h
+unifdef-y += msgbuf.h
+unifdef-y += param.h
+unifdef-y += poll.h
+unifdef-y += posix_types.h
+unifdef-y += ptrace.h
+unifdef-y += resource.h
+unifdef-y += sembuf.h
+unifdef-y += shmbuf.h
+unifdef-y += sigcontext.h
+unifdef-y += siginfo.h
+unifdef-y += signal.h
+unifdef-y += socket.h
+unifdef-y += sockios.h
+unifdef-y += stat.h
+unifdef-y += statfs.h
+unifdef-y += termbits.h
+unifdef-y += termios.h
+unifdef-y += types.h
+unifdef-y += unistd.h
+unifdef-y += user.h
 
 # These probably shouldn't be exported
-unifdef-y += elf.h page.h
+unifdef-y += shmparam.h
+unifdef-y += elf.h
+unifdef-y += page.h
diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild
index 2308190321da0..b75a348d0c1cf 100644
--- a/include/asm-i386/Kbuild
+++ b/include/asm-i386/Kbuild
@@ -1,5 +1,10 @@
 include include/asm-generic/Kbuild.asm
 
-header-y += boot.h debugreg.h ldt.h ucontext.h
+header-y += boot.h
+header-y += debugreg.h
+header-y += ldt.h
+header-y += ucontext.h
 
-unifdef-y += mtrr.h setup.h vm86.h
+unifdef-y += mtrr.h
+unifdef-y += setup.h
+unifdef-y += vm86.h
diff --git a/include/asm-ia64/Kbuild b/include/asm-ia64/Kbuild
index f1cb00f39c22d..15818a18bc520 100644
--- a/include/asm-ia64/Kbuild
+++ b/include/asm-ia64/Kbuild
@@ -1,7 +1,17 @@
 include include/asm-generic/Kbuild.asm
 
-header-y += break.h fpu.h fpswa.h gcc_intrin.h ia64regs.h		\
-	 intel_intrin.h intrinsics.h perfmon_default_smpl.h	\
-	 ptrace_offsets.h rse.h setup.h ucontext.h
+header-y += break.h
+header-y += fpu.h
+header-y += fpswa.h
+header-y += gcc_intrin.h
+header-y += ia64regs.h
+header-y += intel_intrin.h
+header-y += intrinsics.h
+header-y += perfmon_default_smpl.h
+header-y += ptrace_offsets.h
+header-y += rse.h
+header-y += setup.h
+header-y += ucontext.h
 
-unifdef-y += perfmon.h ustack.h
+unifdef-y += perfmon.h
+unifdef-y += ustack.h
diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild
index ac61d7eb60214..9827849953a36 100644
--- a/include/asm-powerpc/Kbuild
+++ b/include/asm-powerpc/Kbuild
@@ -1,10 +1,41 @@
 include include/asm-generic/Kbuild.asm
 
-unifdef-y += a.out.h asm-compat.h bootx.h byteorder.h cputable.h elf.h	\
-	nvram.h param.h posix_types.h ptrace.h seccomp.h signal.h	\
-	termios.h types.h unistd.h
+header-y += auxvec.h
+header-y += ioctls.h
+header-y += mman.h
+header-y += sembuf.h
+header-y += siginfo.h
+header-y += stat.h
+header-y += errno.h
+header-y += ipcbuf.h
+header-y += msgbuf.h
+header-y += shmbuf.h
+header-y += socket.h
+header-y += termbits.h
+header-y += fcntl.h
+header-y += ipc.h
+header-y += poll.h
+header-y += shmparam.h
+header-y += sockios.h
+header-y += ucontext.h
+header-y += ioctl.h
+header-y += linkage.h
+header-y += resource.h
+header-y += sigcontext.h
+header-y += statfs.h
 
-header-y += auxvec.h ioctls.h mman.h sembuf.h siginfo.h stat.h errno.h	\
-	ipcbuf.h msgbuf.h shmbuf.h socket.h termbits.h fcntl.h ipc.h	\
-	poll.h shmparam.h sockios.h ucontext.h ioctl.h linkage.h	\
-	resource.h sigcontext.h statfs.h
+unifdef-y += a.out.h
+unifdef-y += asm-compat.h
+unifdef-y += bootx.h
+unifdef-y += byteorder.h
+unifdef-y += cputable.h
+unifdef-y += elf.h
+unifdef-y += nvram.h
+unifdef-y += param.h
+unifdef-y += posix_types.h
+unifdef-y += ptrace.h
+unifdef-y += seccomp.h
+unifdef-y += signal.h
+unifdef-y += termios.h
+unifdef-y += types.h
+unifdef-y += unistd.h
diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild
index ed8955f49e476..14158a4a9c87d 100644
--- a/include/asm-s390/Kbuild
+++ b/include/asm-s390/Kbuild
@@ -1,4 +1,11 @@
 include include/asm-generic/Kbuild.asm
 
-unifdef-y += cmb.h debug.h
-header-y += dasd.h qeth.h tape390.h ucontext.h vtoc.h z90crypt.h
+header-y += dasd.h
+header-y += qeth.h
+header-y += tape390.h
+header-y += ucontext.h
+header-y += vtoc.h
+header-y += z90crypt.h
+
+unifdef-y += cmb.h
+unifdef-y += debug.h
diff --git a/include/asm-sparc/Kbuild b/include/asm-sparc/Kbuild
index e2a57fd7abfad..b22b67a64ecc7 100644
--- a/include/asm-sparc/Kbuild
+++ b/include/asm-sparc/Kbuild
@@ -1,6 +1,22 @@
 include include/asm-generic/Kbuild.asm
 
-unifdef-y += fbio.h perfctr.h psr.h
-header-y += apc.h asi.h auxio.h bpp.h head.h ipc.h jsflash.h	\
-	openpromio.h pbm.h pconf.h pgtsun4.h reg.h traps.h	\
-	turbosparc.h vfc_ioctls.h winmacro.h
+header-y += apc.h
+header-y += asi.h
+header-y += auxio.h
+header-y += bpp.h
+header-y += head.h
+header-y += ipc.h
+header-y += jsflash.h
+header-y += openpromio.h
+header-y += pbm.h
+header-y += pconf.h
+header-y += pgtsun4.h
+header-y += reg.h
+header-y += traps.h
+header-y += turbosparc.h
+header-y += vfc_ioctls.h
+header-y += winmacro.h
+
+unifdef-y += fbio.h
+unifdef-y += perfctr.h
+unifdef-y += psr.h
diff --git a/include/asm-sparc64/Kbuild b/include/asm-sparc64/Kbuild
index 9284c3cb27ec3..4b59ce46cc2d1 100644
--- a/include/asm-sparc64/Kbuild
+++ b/include/asm-sparc64/Kbuild
@@ -4,7 +4,26 @@ ALTARCH := sparc
 ARCHDEF := defined __sparc__ && defined __arch64__
 ALTARCHDEF := defined __sparc__ && !defined __arch64__
 
-unifdef-y += fbio.h perfctr.h
-header-y += apb.h asi.h bbc.h bpp.h display7seg.h envctrl.h floppy.h	\
-	ipc.h kdebug.h mostek.h openprom.h openpromio.h parport.h	\
-	pconf.h psrcompat.h pstate.h reg.h uctx.h utrap.h watchdog.h
+header-y += apb.h
+header-y += asi.h
+header-y += bbc.h
+header-y += bpp.h
+header-y += display7seg.h
+header-y += envctrl.h
+header-y += floppy.h
+header-y += ipc.h
+header-y += kdebug.h
+header-y += mostek.h
+header-y += openprom.h
+header-y += openpromio.h
+header-y += parport.h
+header-y += pconf.h
+header-y += psrcompat.h
+header-y += pstate.h
+header-y += reg.h
+header-y += uctx.h
+header-y += utrap.h
+header-y += watchdog.h
+
+unifdef-y += fbio.h
+unifdef-y += perfctr.h
diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild
index dc4d101e8a160..40f2f13fe1744 100644
--- a/include/asm-x86_64/Kbuild
+++ b/include/asm-x86_64/Kbuild
@@ -4,8 +4,18 @@ ALTARCH := i386
 ARCHDEF := defined __x86_64__
 ALTARCHDEF := defined __i386__
 
-header-y += boot.h bootsetup.h cpufeature.h debugreg.h ldt.h \
-	 msr.h prctl.h setup.h sigcontext32.h ucontext.h \
-	 vsyscall32.h
+header-y += boot.h
+header-y += bootsetup.h
+header-y += cpufeature.h
+header-y += debugreg.h
+header-y += ldt.h
+header-y += msr.h
+header-y += prctl.h
+header-y += setup.h
+header-y += sigcontext32.h
+header-y += ucontext.h
+header-y += vsyscall32.h
 
-unifdef-y += mce.h mtrr.h vsyscall.h
+unifdef-y += mce.h
+unifdef-y += mtrr.h
+unifdef-y += vsyscall.h
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 2b8a7d68fae3e..7d076d97b2f7f 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -1,63 +1,343 @@
-header-y := byteorder/ dvb/ hdlc/ isdn/ nfsd/ raid/ sunrpc/ tc_act/	\
-	netfilter/ netfilter_arp/ netfilter_bridge/ netfilter_ipv4/	\
-	netfilter_ipv6/
+header-y += byteorder/
+header-y += dvb/
+header-y += hdlc/
+header-y += isdn/
+header-y += nfsd/
+header-y += raid/
+header-y += sunrpc/
+header-y += tc_act/
+header-y += netfilter/
+header-y += netfilter_arp/
+header-y += netfilter_bridge/
+header-y += netfilter_ipv4/
+header-y += netfilter_ipv6/
 
-header-y += affs_fs.h affs_hardblocks.h aio_abi.h a.out.h arcfb.h	\
-	atmapi.h atmbr2684.h atmclip.h atm_eni.h atm_he.h		\
-	atm_idt77105.h atmioc.h atmlec.h atmmpc.h atm_nicstar.h		\
-	atmppp.h atmsap.h atmsvc.h atm_zatm.h auto_fs4.h auxvec.h	\
-	awe_voice.h ax25.h b1lli.h baycom.h bfs_fs.h blkpg.h		\
-	bpqether.h cdk.h chio.h coda_psdev.h coff.h comstats.h		\
-	consolemap.h cycx_cfm.h dm-ioctl.h dn.h dqblk_v1.h		\
-	dqblk_v2.h dqblk_xfs.h efs_fs_sb.h elf-fdpic.h elf.h elf-em.h	\
-	fadvise.h fd.h fdreg.h ftape-header-segment.h ftape-vendors.h	\
-	fuse.h futex.h genetlink.h gen_stats.h gigaset_dev.h hdsmart.h	\
-	hpfs_fs.h hysdn_if.h i2c-dev.h i8k.h icmp.h			\
-	if_arcnet.h if_arp.h if_bonding.h if_cablemodem.h if_fc.h	\
-	if_fddi.h if.h if_hippi.h if_infiniband.h if_packet.h		\
-	if_plip.h if_ppp.h if_slip.h if_strip.h if_tunnel.h in6.h	\
-	in_route.h ioctl.h ip.h ipmi_msgdefs.h ip_mp_alg.h ipsec.h	\
-	ipx.h irda.h isdn_divertif.h iso_fs.h ite_gpio.h ixjuser.h	\
-	jffs2.h keyctl.h limits.h major.h matroxfb.h meye.h minix_fs.h	\
-	mmtimer.h mqueue.h mtio.h ncp_no.h netfilter_arp.h netrom.h	\
-	nfs2.h nfs4_mount.h nfs_mount.h openprom_fs.h param.h		\
-	pci_ids.h pci_regs.h personality.h pfkeyv2.h pg.h pkt_cls.h	\
-	pkt_sched.h posix_types.h ppdev.h prctl.h ps2esdi.h qic117.h	\
-	qnxtypes.h quotaio_v1.h quotaio_v2.h radeonfb.h raw.h		\
-	resource.h rose.h sctp.h smbno.h snmp.h sockios.h som.h		\
-	sound.h stddef.h synclink.h telephony.h termios.h ticable.h	\
-	times.h tiocl.h tipc.h toshiba.h ultrasound.h un.h utime.h	\
-	utsname.h video_decoder.h video_encoder.h videotext.h vt.h	\
-	wavefront.h wireless.h xattr.h x25.h zorro_ids.h
+header-y += affs_fs.h
+header-y += affs_hardblocks.h
+header-y += aio_abi.h
+header-y += a.out.h
+header-y += arcfb.h
+header-y += atmapi.h
+header-y += atmbr2684.h
+header-y += atmclip.h
+header-y += atm_eni.h
+header-y += atm_he.h
+header-y += atm_idt77105.h
+header-y += atmioc.h
+header-y += atmlec.h
+header-y += atmmpc.h
+header-y += atm_nicstar.h
+header-y += atmppp.h
+header-y += atmsap.h
+header-y += atmsvc.h
+header-y += atm_zatm.h
+header-y += auto_fs4.h
+header-y += auxvec.h
+header-y += awe_voice.h
+header-y += ax25.h
+header-y += b1lli.h
+header-y += baycom.h
+header-y += bfs_fs.h
+header-y += blkpg.h
+header-y += bpqether.h
+header-y += cdk.h
+header-y += chio.h
+header-y += coda_psdev.h
+header-y += coff.h
+header-y += comstats.h
+header-y += consolemap.h
+header-y += cycx_cfm.h
+header-y += dm-ioctl.h
+header-y += dn.h
+header-y += dqblk_v1.h
+header-y += dqblk_v2.h
+header-y += dqblk_xfs.h
+header-y += efs_fs_sb.h
+header-y += elf-fdpic.h
+header-y += elf.h
+header-y += elf-em.h
+header-y += fadvise.h
+header-y += fd.h
+header-y += fdreg.h
+header-y += ftape-header-segment.h
+header-y += ftape-vendors.h
+header-y += fuse.h
+header-y += futex.h
+header-y += genetlink.h
+header-y += gen_stats.h
+header-y += gigaset_dev.h
+header-y += hdsmart.h
+header-y += hpfs_fs.h
+header-y += hysdn_if.h
+header-y += i2c-dev.h
+header-y += i8k.h
+header-y += icmp.h
+header-y += if_arcnet.h
+header-y += if_arp.h
+header-y += if_bonding.h
+header-y += if_cablemodem.h
+header-y += if_fc.h
+header-y += if_fddi.h
+header-y += if.h
+header-y += if_hippi.h
+header-y += if_infiniband.h
+header-y += if_packet.h
+header-y += if_plip.h
+header-y += if_ppp.h
+header-y += if_slip.h
+header-y += if_strip.h
+header-y += if_tunnel.h
+header-y += in6.h
+header-y += in_route.h
+header-y += ioctl.h
+header-y += ip.h
+header-y += ipmi_msgdefs.h
+header-y += ip_mp_alg.h
+header-y += ipsec.h
+header-y += ipx.h
+header-y += irda.h
+header-y += isdn_divertif.h
+header-y += iso_fs.h
+header-y += ite_gpio.h
+header-y += ixjuser.h
+header-y += jffs2.h
+header-y += keyctl.h
+header-y += limits.h
+header-y += major.h
+header-y += matroxfb.h
+header-y += meye.h
+header-y += minix_fs.h
+header-y += mmtimer.h
+header-y += mqueue.h
+header-y += mtio.h
+header-y += ncp_no.h
+header-y += netfilter_arp.h
+header-y += netrom.h
+header-y += nfs2.h
+header-y += nfs4_mount.h
+header-y += nfs_mount.h
+header-y += openprom_fs.h
+header-y += param.h
+header-y += pci_ids.h
+header-y += pci_regs.h
+header-y += personality.h
+header-y += pfkeyv2.h
+header-y += pg.h
+header-y += pkt_cls.h
+header-y += pkt_sched.h
+header-y += posix_types.h
+header-y += ppdev.h
+header-y += prctl.h
+header-y += ps2esdi.h
+header-y += qic117.h
+header-y += qnxtypes.h
+header-y += quotaio_v1.h
+header-y += quotaio_v2.h
+header-y += radeonfb.h
+header-y += raw.h
+header-y += resource.h
+header-y += rose.h
+header-y += sctp.h
+header-y += smbno.h
+header-y += snmp.h
+header-y += sockios.h
+header-y += som.h
+header-y += sound.h
+header-y += stddef.h
+header-y += synclink.h
+header-y += telephony.h
+header-y += termios.h
+header-y += ticable.h
+header-y += times.h
+header-y += tiocl.h
+header-y += tipc.h
+header-y += toshiba.h
+header-y += ultrasound.h
+header-y += un.h
+header-y += utime.h
+header-y += utsname.h
+header-y += video_decoder.h
+header-y += video_encoder.h
+header-y += videotext.h
+header-y += vt.h
+header-y += wavefront.h
+header-y += wireless.h
+header-y += xattr.h
+header-y += x25.h
+header-y += zorro_ids.h
 
-unifdef-y += acct.h adb.h adfs_fs.h agpgart.h apm_bios.h atalk.h	\
-	atmarp.h atmdev.h atm.h atm_tcp.h audit.h auto_fs.h binfmts.h	\
-	capability.h capi.h cciss_ioctl.h cdrom.h cm4000_cs.h		\
-	cn_proc.h coda.h connector.h cramfs_fs.h cuda.h cyclades.h	\
-	dccp.h dirent.h divert.h elfcore.h errno.h errqueue.h		\
-	ethtool.h eventpoll.h ext2_fs.h ext3_fs.h fb.h fcntl.h		\
-	filter.h flat.h fs.h ftape.h gameport.h generic_serial.h	\
-	genhd.h hayesesp.h hdlcdrv.h hdlc.h hdreg.h hiddev.h hpet.h	\
-	i2c.h i2o-dev.h icmpv6.h if_bridge.h if_ec.h			\
-	if_eql.h if_ether.h if_frad.h if_ltalk.h if_pppox.h		\
-	if_shaper.h if_tr.h if_tun.h if_vlan.h if_wanpipe.h igmp.h	\
-	inet_diag.h in.h inotify.h input.h ipc.h ipmi.h ipv6.h		\
-	ipv6_route.h isdn.h isdnif.h isdn_ppp.h isicom.h jbd.h		\
-	joystick.h kdev_t.h kd.h kernelcapi.h kernel.h keyboard.h	\
-	llc.h loop.h lp.h mempolicy.h mii.h mman.h mroute.h msdos_fs.h	\
-	msg.h nbd.h ncp_fs.h ncp.h ncp_mount.h netdevice.h		\
-	netfilter_bridge.h netfilter_decnet.h netfilter.h		\
-	netfilter_ipv4.h netfilter_ipv6.h netfilter_logging.h net.h	\
-	netlink.h nfs3.h nfs4.h nfsacl.h nfs_fs.h nfs.h nfs_idmap.h	\
-	n_r3964.h nubus.h nvram.h parport.h patchkey.h pci.h pktcdvd.h	\
-	pmu.h poll.h ppp_defs.h ppp-comp.h ptrace.h qnx4_fs.h quota.h	\
-	random.h reboot.h reiserfs_fs.h reiserfs_xattr.h romfs_fs.h	\
-	route.h rtc.h rtnetlink.h scc.h sched.h sdla.h			\
-	selinux_netlink.h sem.h serial_core.h serial.h serio.h shm.h	\
-	signal.h smb_fs.h smb.h smb_mount.h socket.h sonet.h sonypi.h	\
-	soundcard.h stat.h sysctl.h tcp.h time.h timex.h tty.h types.h	\
-	udf_fs_i.h udp.h uinput.h uio.h unistd.h usb_ch9.h		\
-	usbdevice_fs.h user.h videodev2.h videodev.h wait.h		\
-	wanrouter.h watchdog.h xfrm.h zftape.h
+unifdef-y += acct.h
+unifdef-y += adb.h
+unifdef-y += adfs_fs.h
+unifdef-y += agpgart.h
+unifdef-y += apm_bios.h
+unifdef-y += atalk.h
+unifdef-y += atmarp.h
+unifdef-y += atmdev.h
+unifdef-y += atm.h
+unifdef-y += atm_tcp.h
+unifdef-y += audit.h
+unifdef-y += auto_fs.h
+unifdef-y += binfmts.h
+unifdef-y += capability.h
+unifdef-y += capi.h
+unifdef-y += cciss_ioctl.h
+unifdef-y += cdrom.h
+unifdef-y += cm4000_cs.h
+unifdef-y += cn_proc.h
+unifdef-y += coda.h
+unifdef-y += connector.h
+unifdef-y += cramfs_fs.h
+unifdef-y += cuda.h
+unifdef-y += cyclades.h
+unifdef-y += dccp.h
+unifdef-y += dirent.h
+unifdef-y += divert.h
+unifdef-y += elfcore.h
+unifdef-y += errno.h
+unifdef-y += errqueue.h
+unifdef-y += ethtool.h
+unifdef-y += eventpoll.h
+unifdef-y += ext2_fs.h
+unifdef-y += ext3_fs.h
+unifdef-y += fb.h
+unifdef-y += fcntl.h
+unifdef-y += filter.h
+unifdef-y += flat.h
+unifdef-y += fs.h
+unifdef-y += ftape.h
+unifdef-y += gameport.h
+unifdef-y += generic_serial.h
+unifdef-y += genhd.h
+unifdef-y += hayesesp.h
+unifdef-y += hdlcdrv.h
+unifdef-y += hdlc.h
+unifdef-y += hdreg.h
+unifdef-y += hiddev.h
+unifdef-y += hpet.h
+unifdef-y += i2c.h
+unifdef-y += i2o-dev.h
+unifdef-y += icmpv6.h
+unifdef-y += if_bridge.h
+unifdef-y += if_ec.h
+unifdef-y += if_eql.h
+unifdef-y += if_ether.h
+unifdef-y += if_frad.h
+unifdef-y += if_ltalk.h
+unifdef-y += if_pppox.h
+unifdef-y += if_shaper.h
+unifdef-y += if_tr.h
+unifdef-y += if_tun.h
+unifdef-y += if_vlan.h
+unifdef-y += if_wanpipe.h
+unifdef-y += igmp.h
+unifdef-y += inet_diag.h
+unifdef-y += in.h
+unifdef-y += inotify.h
+unifdef-y += input.h
+unifdef-y += ipc.h
+unifdef-y += ipmi.h
+unifdef-y += ipv6.h
+unifdef-y += ipv6_route.h
+unifdef-y += isdn.h
+unifdef-y += isdnif.h
+unifdef-y += isdn_ppp.h
+unifdef-y += isicom.h
+unifdef-y += jbd.h
+unifdef-y += joystick.h
+unifdef-y += kdev_t.h
+unifdef-y += kd.h
+unifdef-y += kernelcapi.h
+unifdef-y += kernel.h
+unifdef-y += keyboard.h
+unifdef-y += llc.h
+unifdef-y += loop.h
+unifdef-y += lp.h
+unifdef-y += mempolicy.h
+unifdef-y += mii.h
+unifdef-y += mman.h
+unifdef-y += mroute.h
+unifdef-y += msdos_fs.h
+unifdef-y += msg.h
+unifdef-y += nbd.h
+unifdef-y += ncp_fs.h
+unifdef-y += ncp.h
+unifdef-y += ncp_mount.h
+unifdef-y += netdevice.h
+unifdef-y += netfilter_bridge.h
+unifdef-y += netfilter_decnet.h
+unifdef-y += netfilter.h
+unifdef-y += netfilter_ipv4.h
+unifdef-y += netfilter_ipv6.h
+unifdef-y += netfilter_logging.h
+unifdef-y += net.h
+unifdef-y += netlink.h
+unifdef-y += nfs3.h
+unifdef-y += nfs4.h
+unifdef-y += nfsacl.h
+unifdef-y += nfs_fs.h
+unifdef-y += nfs.h
+unifdef-y += nfs_idmap.h
+unifdef-y += n_r3964.h
+unifdef-y += nubus.h
+unifdef-y += nvram.h
+unifdef-y += parport.h
+unifdef-y += patchkey.h
+unifdef-y += pci.h
+unifdef-y += pktcdvd.h
+unifdef-y += pmu.h
+unifdef-y += poll.h
+unifdef-y += ppp_defs.h
+unifdef-y += ppp-comp.h
+unifdef-y += ptrace.h
+unifdef-y += qnx4_fs.h
+unifdef-y += quota.h
+unifdef-y += random.h
+unifdef-y += reboot.h
+unifdef-y += reiserfs_fs.h
+unifdef-y += reiserfs_xattr.h
+unifdef-y += romfs_fs.h
+unifdef-y += route.h
+unifdef-y += rtc.h
+unifdef-y += rtnetlink.h
+unifdef-y += scc.h
+unifdef-y += sched.h
+unifdef-y += sdla.h
+unifdef-y += selinux_netlink.h
+unifdef-y += sem.h
+unifdef-y += serial_core.h
+unifdef-y += serial.h
+unifdef-y += serio.h
+unifdef-y += shm.h
+unifdef-y += signal.h
+unifdef-y += smb_fs.h
+unifdef-y += smb.h
+unifdef-y += smb_mount.h
+unifdef-y += socket.h
+unifdef-y += sonet.h
+unifdef-y += sonypi.h
+unifdef-y += soundcard.h
+unifdef-y += stat.h
+unifdef-y += sysctl.h
+unifdef-y += tcp.h
+unifdef-y += time.h
+unifdef-y += timex.h
+unifdef-y += tty.h
+unifdef-y += types.h
+unifdef-y += udf_fs_i.h
+unifdef-y += udp.h
+unifdef-y += uinput.h
+unifdef-y += uio.h
+unifdef-y += unistd.h
+unifdef-y += usb_ch9.h
+unifdef-y += usbdevice_fs.h
+unifdef-y += user.h
+unifdef-y += videodev2.h
+unifdef-y += videodev.h
+unifdef-y += wait.h
+unifdef-y += wanrouter.h
+unifdef-y += watchdog.h
+unifdef-y += xfrm.h
+unifdef-y += zftape.h
 
-objhdr-y := version.h
+objhdr-y += version.h
diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild
index 84a57d4fb212c..56499ab9e32ee 100644
--- a/include/linux/byteorder/Kbuild
+++ b/include/linux/byteorder/Kbuild
@@ -1,2 +1,7 @@
-unifdef-y += generic.h swabb.h swab.h
-header-y += big_endian.h little_endian.h pdp_endian.h
+header-y += big_endian.h
+header-y += little_endian.h
+header-y += pdp_endian.h
+
+unifdef-y += generic.h
+unifdef-y += swabb.h
+unifdef-y += swab.h
diff --git a/include/linux/dvb/Kbuild b/include/linux/dvb/Kbuild
index 63973af72fd55..d97b3a51e2279 100644
--- a/include/linux/dvb/Kbuild
+++ b/include/linux/dvb/Kbuild
@@ -1,2 +1,9 @@
-header-y += ca.h frontend.h net.h osd.h version.h
-unifdef-y := audio.h dmx.h video.h
+header-y += ca.h
+header-y += frontend.h
+header-y += net.h
+header-y += osd.h
+header-y += version.h
+
+unifdef-y += audio.h
+unifdef-y += dmx.h
+unifdef-y += video.h
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 1d3a14e2da6eb..9a285cecf249e 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -1,11 +1,38 @@
-header-y := nf_conntrack_sctp.h nf_conntrack_tuple_common.h		\
-	    nfnetlink_conntrack.h nfnetlink_log.h nfnetlink_queue.h	\
-	    xt_CLASSIFY.h xt_comment.h xt_connbytes.h xt_connmark.h	\
-	    xt_CONNMARK.h xt_conntrack.h xt_dccp.h xt_esp.h		\
-	    xt_helper.h xt_length.h xt_limit.h xt_mac.h xt_mark.h	\
-	    xt_MARK.h xt_multiport.h xt_NFQUEUE.h xt_pkttype.h		\
-	    xt_policy.h xt_realm.h xt_sctp.h xt_state.h xt_string.h	\
-	    xt_tcpmss.h xt_tcpudp.h xt_SECMARK.h xt_CONNSECMARK.h
+header-y += nf_conntrack_sctp.h
+header-y += nf_conntrack_tuple_common.h
+header-y += nfnetlink_conntrack.h
+header-y += nfnetlink_log.h
+header-y += nfnetlink_queue.h
+header-y += xt_CLASSIFY.h
+header-y += xt_comment.h
+header-y += xt_connbytes.h
+header-y += xt_connmark.h
+header-y += xt_CONNMARK.h
+header-y += xt_conntrack.h
+header-y += xt_dccp.h
+header-y += xt_esp.h
+header-y += xt_helper.h
+header-y += xt_length.h
+header-y += xt_limit.h
+header-y += xt_mac.h
+header-y += xt_mark.h
+header-y += xt_MARK.h
+header-y += xt_multiport.h
+header-y += xt_NFQUEUE.h
+header-y += xt_pkttype.h
+header-y += xt_policy.h
+header-y += xt_realm.h
+header-y += xt_sctp.h
+header-y += xt_state.h
+header-y += xt_string.h
+header-y += xt_tcpmss.h
+header-y += xt_tcpudp.h
+header-y += xt_SECMARK.h
+header-y += xt_CONNSECMARK.h
 
-unifdef-y := nf_conntrack_common.h nf_conntrack_ftp.h		\
-	nf_conntrack_tcp.h nfnetlink.h x_tables.h xt_physdev.h
+unifdef-y += nf_conntrack_common.h
+unifdef-y += nf_conntrack_ftp.h
+unifdef-y += nf_conntrack_tcp.h
+unifdef-y += nfnetlink.h
+unifdef-y += x_tables.h
+unifdef-y += xt_physdev.h
diff --git a/include/linux/netfilter_arp/Kbuild b/include/linux/netfilter_arp/Kbuild
index 198ec5e7b17d0..4f13dfcb92eae 100644
--- a/include/linux/netfilter_arp/Kbuild
+++ b/include/linux/netfilter_arp/Kbuild
@@ -1,2 +1,3 @@
-header-y := arpt_mangle.h
-unifdef-y := arp_tables.h
+header-y += arpt_mangle.h
+
+unifdef-y += arp_tables.h
diff --git a/include/linux/netfilter_bridge/Kbuild b/include/linux/netfilter_bridge/Kbuild
index 5b1aba6abbad4..76ff4c47d8c4b 100644
--- a/include/linux/netfilter_bridge/Kbuild
+++ b/include/linux/netfilter_bridge/Kbuild
@@ -1,4 +1,17 @@
-header-y += ebt_among.h ebt_arp.h ebt_arpreply.h ebt_ip.h ebt_limit.h	\
-	ebt_log.h ebt_mark_m.h ebt_mark_t.h ebt_nat.h ebt_pkttype.h	\
-	ebt_redirect.h ebt_stp.h ebt_ulog.h ebt_vlan.h
-unifdef-y := ebtables.h ebt_802_3.h
+header-y += ebt_among.h
+header-y += ebt_arp.h
+header-y += ebt_arpreply.h
+header-y += ebt_ip.h
+header-y += ebt_limit.h
+header-y += ebt_log.h
+header-y += ebt_mark_m.h
+header-y += ebt_mark_t.h
+header-y += ebt_nat.h
+header-y += ebt_pkttype.h
+header-y += ebt_redirect.h
+header-y += ebt_stp.h
+header-y += ebt_ulog.h
+header-y += ebt_vlan.h
+
+unifdef-y += ebtables.h
+unifdef-y += ebt_802_3.h
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index 04e4d2721689d..591c1a809c004 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -1,21 +1,63 @@
+header-y += ip_conntrack_helper.h
+header-y += ip_conntrack_helper_h323_asn1.h
+header-y += ip_conntrack_helper_h323_types.h
+header-y += ip_conntrack_protocol.h
+header-y += ip_conntrack_sctp.h
+header-y += ip_conntrack_tcp.h
+header-y += ip_conntrack_tftp.h
+header-y += ip_nat_pptp.h
+header-y += ipt_addrtype.h
+header-y += ipt_ah.h
+header-y += ipt_CLASSIFY.h
+header-y += ipt_CLUSTERIP.h
+header-y += ipt_comment.h
+header-y += ipt_connbytes.h
+header-y += ipt_connmark.h
+header-y += ipt_CONNMARK.h
+header-y += ipt_conntrack.h
+header-y += ipt_dccp.h
+header-y += ipt_dscp.h
+header-y += ipt_DSCP.h
+header-y += ipt_ecn.h
+header-y += ipt_ECN.h
+header-y += ipt_esp.h
+header-y += ipt_hashlimit.h
+header-y += ipt_helper.h
+header-y += ipt_iprange.h
+header-y += ipt_length.h
+header-y += ipt_limit.h
+header-y += ipt_LOG.h
+header-y += ipt_mac.h
+header-y += ipt_mark.h
+header-y += ipt_MARK.h
+header-y += ipt_multiport.h
+header-y += ipt_NFQUEUE.h
+header-y += ipt_owner.h
+header-y += ipt_physdev.h
+header-y += ipt_pkttype.h
+header-y += ipt_policy.h
+header-y += ipt_realm.h
+header-y += ipt_recent.h
+header-y += ipt_REJECT.h
+header-y += ipt_SAME.h
+header-y += ipt_sctp.h
+header-y += ipt_state.h
+header-y += ipt_string.h
+header-y += ipt_tcpmss.h
+header-y += ipt_TCPMSS.h
+header-y += ipt_tos.h
+header-y += ipt_TOS.h
+header-y += ipt_ttl.h
+header-y += ipt_TTL.h
+header-y += ipt_ULOG.h
 
-header-y := ip_conntrack_helper.h ip_conntrack_helper_h323_asn1.h	\
-	    ip_conntrack_helper_h323_types.h ip_conntrack_protocol.h	\
-	    ip_conntrack_sctp.h ip_conntrack_tcp.h ip_conntrack_tftp.h	\
-	    ip_nat_pptp.h ipt_addrtype.h ipt_ah.h	\
-	    ipt_CLASSIFY.h ipt_CLUSTERIP.h ipt_comment.h		\
-	    ipt_connbytes.h ipt_connmark.h ipt_CONNMARK.h		\
-	    ipt_conntrack.h ipt_dccp.h ipt_dscp.h ipt_DSCP.h ipt_ecn.h	\
-	    ipt_ECN.h ipt_esp.h ipt_hashlimit.h ipt_helper.h		\
-	    ipt_iprange.h ipt_length.h ipt_limit.h ipt_LOG.h ipt_mac.h	\
-	    ipt_mark.h ipt_MARK.h ipt_multiport.h ipt_NFQUEUE.h		\
-	    ipt_owner.h ipt_physdev.h ipt_pkttype.h ipt_policy.h	\
-	    ipt_realm.h ipt_recent.h ipt_REJECT.h ipt_SAME.h		\
-	    ipt_sctp.h ipt_state.h ipt_string.h ipt_tcpmss.h		\
-	    ipt_TCPMSS.h ipt_tos.h ipt_TOS.h ipt_ttl.h ipt_TTL.h	\
-	    ipt_ULOG.h
-
-unifdef-y := ip_conntrack.h ip_conntrack_h323.h ip_conntrack_irc.h	\
-	ip_conntrack_pptp.h ip_conntrack_proto_gre.h			\
-	ip_conntrack_tuple.h ip_nat.h ip_nat_rule.h ip_queue.h		\
-	ip_tables.h
+unifdef-y += ip_conntrack.h
+unifdef-y += ip_conntrack_h323.h
+unifdef-y += ip_conntrack_irc.h
+unifdef-y += ip_conntrack_pptp.h
+unifdef-y += ip_conntrack_proto_gre.h
+unifdef-y += ip_conntrack_tuple.h
+unifdef-y += ip_nat.h
+unifdef-y += ip_nat_rule.h
+unifdef-y += ip_queue.h
+unifdef-y += ip_tables.h
diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild
index 913ddbf55b4ba..9dd978d149ff8 100644
--- a/include/linux/netfilter_ipv6/Kbuild
+++ b/include/linux/netfilter_ipv6/Kbuild
@@ -1,6 +1,21 @@
-header-y += ip6t_HL.h ip6t_LOG.h ip6t_MARK.h ip6t_REJECT.h ip6t_ah.h	\
-	ip6t_esp.h ip6t_frag.h ip6t_hl.h ip6t_ipv6header.h		\
-	ip6t_length.h ip6t_limit.h ip6t_mac.h ip6t_mark.h		\
-	ip6t_multiport.h ip6t_opts.h ip6t_owner.h ip6t_policy.h		\
-	ip6t_physdev.h ip6t_rt.h
-unifdef-y := ip6_tables.h
+header-y += ip6t_HL.h
+header-y += ip6t_LOG.h
+header-y += ip6t_MARK.h
+header-y += ip6t_REJECT.h
+header-y += ip6t_ah.h
+header-y += ip6t_esp.h
+header-y += ip6t_frag.h
+header-y += ip6t_hl.h
+header-y += ip6t_ipv6header.h
+header-y += ip6t_length.h
+header-y += ip6t_limit.h
+header-y += ip6t_mac.h
+header-y += ip6t_mark.h
+header-y += ip6t_multiport.h
+header-y += ip6t_opts.h
+header-y += ip6t_owner.h
+header-y += ip6t_policy.h
+header-y += ip6t_physdev.h
+header-y += ip6t_rt.h
+
+unifdef-y += ip6_tables.h
diff --git a/include/linux/nfsd/Kbuild b/include/linux/nfsd/Kbuild
index c8c545665885b..d9c5455808e59 100644
--- a/include/linux/nfsd/Kbuild
+++ b/include/linux/nfsd/Kbuild
@@ -1,2 +1,7 @@
-unifdef-y := const.h export.h stats.h syscall.h nfsfh.h debug.h auth.h
-
+unifdef-y += const.h
+unifdef-y += export.h
+unifdef-y += stats.h
+unifdef-y += syscall.h
+unifdef-y += nfsfh.h
+unifdef-y += debug.h
+unifdef-y += auth.h
diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild
index 73fa27a8d5521..2415a64c5e51d 100644
--- a/include/linux/raid/Kbuild
+++ b/include/linux/raid/Kbuild
@@ -1 +1,2 @@
-header-y += md_p.h md_u.h
+header-y += md_p.h
+header-y += md_u.h
diff --git a/include/linux/sunrpc/Kbuild b/include/linux/sunrpc/Kbuild
index 0d1d768a27bf0..fb438f158eee4 100644
--- a/include/linux/sunrpc/Kbuild
+++ b/include/linux/sunrpc/Kbuild
@@ -1 +1 @@
-unifdef-y := debug.h
+unifdef-y += debug.h
diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild
index 5251a505b2f1f..78dfbac363759 100644
--- a/include/linux/tc_act/Kbuild
+++ b/include/linux/tc_act/Kbuild
@@ -1 +1,4 @@
-header-y += tc_gact.h tc_ipt.h tc_mirred.h tc_pedit.h
+header-y += tc_gact.h
+header-y += tc_ipt.h
+header-y += tc_mirred.h
+header-y += tc_pedit.h
diff --git a/include/linux/tc_ematch/Kbuild b/include/linux/tc_ematch/Kbuild
index 381e93018df66..4a58a1c32a000 100644
--- a/include/linux/tc_ematch/Kbuild
+++ b/include/linux/tc_ematch/Kbuild
@@ -1 +1,4 @@
-headers-y := tc_em_cmp.h tc_em_meta.h tc_em_nbyte.h tc_em_text.h
+header-y += tc_em_cmp.h
+header-y += tc_em_meta.h
+header-y += tc_em_nbyte.h
+header-y += tc_em_text.h
diff --git a/include/mtd/Kbuild b/include/mtd/Kbuild
index e1da2a5b2a576..13e7a3c6d7947 100644
--- a/include/mtd/Kbuild
+++ b/include/mtd/Kbuild
@@ -1,2 +1,6 @@
-unifdef-y := mtd-abi.h
-header-y := inftl-user.h jffs2-user.h mtd-user.h nftl-user.h
+header-y += inftl-user.h
+header-y += jffs2-user.h
+header-y += mtd-user.h
+header-y += nftl-user.h
+
+unifdef-y += mtd-abi.h
diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild
index eb710ba9b1a05..e7c043216558e 100644
--- a/include/rdma/Kbuild
+++ b/include/rdma/Kbuild
@@ -1 +1 @@
-header-y := ib_user_mad.h
+header-y += ib_user_mad.h
diff --git a/include/scsi/Kbuild b/include/scsi/Kbuild
index 14a033d73314d..744f85011f1ef 100644
--- a/include/scsi/Kbuild
+++ b/include/scsi/Kbuild
@@ -1,2 +1,4 @@
 header-y += scsi.h
-unifdef-y := scsi_ioctl.h sg.h
+
+unifdef-y += scsi_ioctl.h
+unifdef-y += sg.h
diff --git a/include/sound/Kbuild b/include/sound/Kbuild
index 3a5a3df61496a..fd054a344324e 100644
--- a/include/sound/Kbuild
+++ b/include/sound/Kbuild
@@ -1,2 +1,10 @@
-header-y := asound_fm.h hdsp.h hdspm.h sfnt_info.h sscape_ioctl.h
-unifdef-y := asequencer.h asound.h emu10k1.h sb16_csp.h 
+header-y += asound_fm.h
+header-y += hdsp.h
+header-y += hdspm.h
+header-y += sfnt_info.h
+header-y += sscape_ioctl.h
+
+unifdef-y += asequencer.h
+unifdef-y += asound.h
+unifdef-y += emu10k1.h
+unifdef-y += sb16_csp.h
diff --git a/include/video/Kbuild b/include/video/Kbuild
index 76a60737cc159..a14f9c045b8cd 100644
--- a/include/video/Kbuild
+++ b/include/video/Kbuild
@@ -1 +1 @@
-unifdef-y := sisfb.h
+unifdef-y += sisfb.h
-- 
GitLab


From 86998aa6534e839ec003ed2ef7067d6fe8696ccc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 19 Sep 2006 11:14:34 +0200
Subject: [PATCH 0198/1063] [PATCH] genirq core: fix handle_level_irq()

while porting the -rt tree to 2.6.18-rc7 i noticed the following
screaming-IRQ scenario on an SMP system:

 2274  0Dn.:1 0.001ms: do_IRQ+0xc/0x103  <= (ret_from_intr+0x0/0xf)
 2274  0Dn.:1 0.010ms: do_IRQ+0xc/0x103  <= (ret_from_intr+0x0/0xf)
 2274  0Dn.:1 0.020ms: do_IRQ+0xc/0x103  <= (ret_from_intr+0x0/0xf)
 2274  0Dn.:1 0.029ms: do_IRQ+0xc/0x103  <= (ret_from_intr+0x0/0xf)
 2274  0Dn.:1 0.039ms: do_IRQ+0xc/0x103  <= (ret_from_intr+0x0/0xf)
 2274  0Dn.:1 0.048ms: do_IRQ+0xc/0x103  <= (ret_from_intr+0x0/0xf)
 2274  0Dn.:1 0.058ms: do_IRQ+0xc/0x103  <= (ret_from_intr+0x0/0xf)
 2274  0Dn.:1 0.068ms: do_IRQ+0xc/0x103  <= (ret_from_intr+0x0/0xf)
 2274  0Dn.:1 0.077ms: do_IRQ+0xc/0x103  <= (ret_from_intr+0x0/0xf)
 2274  0Dn.:1 0.087ms: do_IRQ+0xc/0x103  <= (ret_from_intr+0x0/0xf)
 2274  0Dn.:1 0.097ms: do_IRQ+0xc/0x103  <= (ret_from_intr+0x0/0xf)

as it turns out, the bug is caused by handle_level_irq(), which if it
races with another CPU already handling this IRQ, it _unmasks_ the IRQ
line on the way out. This is not how 2.6.17 works, and we introduced
this bug in one of the early genirq cleanups right before it went into
-mm. (the bug was not in the genirq patchset for a long time, and we
didnt notice the bug due to the lack of -rt rebase to the new genirq
code. -rt, and hardirq-preemption in particular opens up such races much
wider than anything else.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/irq/chip.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 9336f2e89e402..ac1f850d4937e 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -252,7 +252,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
 	mask_ack_irq(desc, irq);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
-		goto out;
+		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
 	kstat_cpu(cpu).irqs[irq]++;
 
@@ -263,7 +263,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
 	action = desc->action;
 	if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
 		desc->status |= IRQ_PENDING;
-		goto out;
+		goto out_unlock;
 	}
 
 	desc->status |= IRQ_INPROGRESS;
@@ -276,9 +276,9 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
 
 	spin_lock(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
-out:
 	if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
 		desc->chip->unmask(irq);
+out_unlock:
 	spin_unlock(&desc->lock);
 }
 
-- 
GitLab


From 271fc18eead0aa330758a3b9d8a57ffe315a1978 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Mon, 18 Sep 2006 20:12:19 -0700
Subject: [PATCH 0199/1063] [PATCH] Add headers_check' target to output of
 'make help'

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index a086e3212af30..47e21c04014ea 100644
--- a/Makefile
+++ b/Makefile
@@ -1082,6 +1082,7 @@ help:
 	@echo  'Static analysers'
 	@echo  '  checkstack      - Generate a list of stack hogs'
 	@echo  '  namespacecheck  - Name space analysis on compiled kernel'
+	@echo  '  headers_check   - Sanity check on exported headers'
 	@echo  ''
 	@echo  'Kernel packaging:'
 	@$(MAKE) $(build)=$(package-dir) help
-- 
GitLab


From 020d8c063b4d850b53dfbaee843c8335b7a90d26 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 18 Sep 2006 20:12:23 -0700
Subject: [PATCH 0200/1063] [PATCH] Fix 'make headers_check' on ia64

Fix 'make headers_check' on m68k

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-m68k/page.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h
index db017f838c291..fcc165ddd09e1 100644
--- a/include/asm-m68k/page.h
+++ b/include/asm-m68k/page.h
@@ -2,6 +2,8 @@
 #define _M68K_PAGE_H
 
 
+#ifdef __KERNEL__
+
 /* PAGE_SHIFT determines the page size */
 #ifndef CONFIG_SUN3
 #define PAGE_SHIFT	(12)
@@ -15,8 +17,6 @@
 #endif
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
-#ifdef __KERNEL__
-
 #include <asm/setup.h>
 
 #if PAGE_SHIFT < 13
@@ -175,8 +175,8 @@ static inline void *__va(unsigned long x)
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#endif /* __KERNEL__ */
-
 #include <asm-generic/page.h>
 
+#endif /* __KERNEL__ */
+
 #endif /* _M68K_PAGE_H */
-- 
GitLab


From ac7fb273ca1d0b4fb354575bb6e101ffd09e0b54 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Mon, 18 Sep 2006 20:12:27 -0700
Subject: [PATCH 0201/1063] [PATCH] headers_check: Clean up asm-parisc/page.h
 for user headers

Remove definitions of PAGE_* from the user view
Delete unnecessary comments referring to the size of pages
Only include <asm-generic> if we're in __KERNEL__

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Matthew Wilcox <matthew@wil.cx>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-parisc/page.h | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h
index 0695bc958d564..57d6d82756dd6 100644
--- a/include/asm-parisc/page.h
+++ b/include/asm-parisc/page.h
@@ -1,22 +1,14 @@
 #ifndef _PARISC_PAGE_H
 #define _PARISC_PAGE_H
 
-#if !defined(__KERNEL__)
-/* this is for userspace applications (4k page size) */
-# define PAGE_SHIFT	12	/* 4k */
-# define PAGE_SIZE	(1UL << PAGE_SHIFT)
-# define PAGE_MASK	(~(PAGE_SIZE-1))
-#endif
-
-
 #ifdef __KERNEL__
 
 #if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
-# define PAGE_SHIFT	12	/* 4k */
+# define PAGE_SHIFT	12
 #elif defined(CONFIG_PARISC_PAGE_SIZE_16KB)
-# define PAGE_SHIFT	14	/* 16k */
+# define PAGE_SHIFT	14
 #elif defined(CONFIG_PARISC_PAGE_SIZE_64KB)
-# define PAGE_SHIFT	16	/* 64k */
+# define PAGE_SHIFT	16
 #else
 # error "unknown default kernel page size"
 #endif
@@ -188,9 +180,9 @@ extern int npmem_ranges;
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#endif /* __KERNEL__ */
-
 #include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
+#endif /* __KERNEL__ */
+
 #endif /* _PARISC_PAGE_H */
-- 
GitLab


From 833f73299fdf4497af1552e219e95661f4d2cdca Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Mon, 18 Sep 2006 20:12:33 -0700
Subject: [PATCH 0202/1063] [PATCH] EXT2: Remove superblock lock contention in
 ext2_statfs

Fix a performance degradation introduced in 2.6.17.  (30% degradation
running dbench with 16 threads)

Commit 21730eed11de42f22afcbd43f450a1872a0b5ea1, which claims to make
EXT2_DEBUG work again, moves the taking of the kernel lock out of
debug-only code in ext2_count_free_inodes and ext2_count_free_blocks and
into ext2_statfs.

The same problem was fixed in ext3 by removing the lock completely (commit
5b11687924e40790deb0d5f959247ade82196665)

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/balloc.c | 1 -
 fs/ext2/ialloc.c | 1 -
 fs/ext2/super.c  | 2 --
 3 files changed, 4 deletions(-)

diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index d4870432ecfc9..b1981d0e95ad6 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -539,7 +539,6 @@ unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
 
 #endif  /*  EXT2FS_DEBUG  */
 
-/* Superblock must be locked */
 unsigned long ext2_count_free_blocks (struct super_block * sb)
 {
 	struct ext2_group_desc * desc;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index de85c61c58c57..695f69ccf9088 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -637,7 +637,6 @@ struct inode *ext2_new_inode(struct inode *dir, int mode)
 	return ERR_PTR(err);
 }
 
-/* Superblock must be locked */
 unsigned long ext2_count_free_inodes (struct super_block * sb)
 {
 	struct ext2_group_desc *desc;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ca5bfb6914d26..4286ff6330b6a 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1083,7 +1083,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 	unsigned long overhead;
 	int i;
 
-	lock_super(sb);
 	if (test_opt (sb, MINIX_DF))
 		overhead = 0;
 	else {
@@ -1124,7 +1123,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 	buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count);
 	buf->f_ffree = ext2_count_free_inodes (sb);
 	buf->f_namelen = EXT2_NAME_LEN;
-	unlock_super(sb);
 	return 0;
 }
 
-- 
GitLab


From 79e453d49bd49ba1b576f89310cc565c9e4ca379 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Tue, 19 Sep 2006 08:15:22 -0700
Subject: [PATCH 0203/1063] Revert mmiocfg heuristics and blacklist changes

This reverts commits 11012d419cfc0e0f78ca356aca03674217910124 and
40dd2d20f220eda1cd0da8ea3f0f9db8971ba237, which allowed us to use the
MMIO accesses for PCI config cycles even without the area being marked
reserved in the e820 memory tables.

Those changes were needed for EFI-environment Intel macs, but broke some
newer Intel 965 boards, so for now it's better to revert to our old
2.6.17 behaviour and at least avoid introducing any new breakage.

Andi Kleen has a set of patches that work with both EFI and the broken
Intel 965 boards, which will be applied once they get wider testing.

Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Edgar Hucek <hostmaster@ed-soft.at>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/kernel-parameters.txt |  2 --
 arch/i386/kernel/setup.c            | 32 +++++++++++++++++++++++++++
 arch/i386/pci/common.c              |  5 -----
 arch/i386/pci/mmconfig.c            | 34 +++++++++--------------------
 arch/i386/pci/pci.h                 |  3 +--
 arch/x86_64/kernel/e820.c           | 29 ++++++++++++++++++++++++
 arch/x86_64/pci/mmconfig.c          | 34 +++++++++--------------------
 7 files changed, 82 insertions(+), 57 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 87a17337c7f63..71d05f4817276 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1189,8 +1189,6 @@ running once the system is up.
 				Mechanism 2.
 		nommconf	[IA-32,X86_64] Disable use of MMCONFIG for PCI
 				Configuration
-		mmconf		[IA-32,X86_64] Force MMCONFIG. This is useful
-				to override the builtin blacklist.
 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
 				enabled, this kernel boot option can be used to
 				disable the use of MSI interrupts system-wide.
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 345ffb7d904d3..f1682206d304d 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -956,6 +956,38 @@ efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
 	return 0;
 }
 
+ /*
+  * This function checks if the entire range <start,end> is mapped with type.
+  *
+  * Note: this function only works correct if the e820 table is sorted and
+  * not-overlapping, which is the case
+  */
+int __init
+e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
+{
+	u64 start = s;
+	u64 end = e;
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (type && ei->type != type)
+			continue;
+		/* is the region (part) in overlap with the current region ?*/
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+		/* if the region is at the beginning of <start,end> we move
+		 * start to the end of the region since it's ok until there
+		 */
+		if (ei->addr <= start)
+			start = ei->addr + ei->size;
+		/* if start is now at or beyond end, we're done, full
+		 * coverage */
+		if (start >= end)
+			return 1; /* we're done */
+	}
+	return 0;
+}
+
 /*
  * Find the highest page frame number we have available
  */
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 1220dd828ce3e..0a362e3aeac55 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -237,11 +237,6 @@ char * __devinit  pcibios_setup(char *str)
 		pci_probe &= ~PCI_PROBE_MMCONF;
 		return NULL;
 	}
-	/* override DMI blacklist */
-	else if (!strcmp(str, "mmconf")) {
-		pci_probe |= PCI_PROBE_MMCONF_FORCE;
-		return NULL;
-	}
 #endif
 	else if (!strcmp(str, "noacpi")) {
 		acpi_noirq_set();
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index ef5a2faa7d824..972180f738d9a 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -12,7 +12,6 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
-#include <linux/dmi.h>
 #include <asm/e820.h>
 #include "pci.h"
 
@@ -188,31 +187,9 @@ static __init void unreachable_devices(void)
 	}
 }
 
-static int disable_mcfg(struct dmi_system_id *d)
-{
-	printk("PCI: %s detected. Disabling MCFG.\n", d->ident);
-	pci_probe &= ~PCI_PROBE_MMCONF;
-	return 0;
-}
-
-static struct dmi_system_id __initdata dmi_bad_mcfg[] = {
-	/* Has broken MCFG table that makes the system hang when used */
-        {
-         .callback = disable_mcfg,
-         .ident = "Intel D3C5105 SDV",
-         .matches = {
-                     DMI_MATCH(DMI_BIOS_VENDOR, "Intel"),
-                     DMI_MATCH(DMI_BOARD_NAME, "D26928"),
-                     },
-         },
-         {}
-};
-
 void __init pci_mmcfg_init(void)
 {
-	dmi_check_system(dmi_bad_mcfg);
-
-	if ((pci_probe & (PCI_PROBE_MMCONF_FORCE|PCI_PROBE_MMCONF)) == 0)
+	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
 		return;
 
 	acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
@@ -221,6 +198,15 @@ void __init pci_mmcfg_init(void)
 	    (pci_mmcfg_config[0].base_address == 0))
 		return;
 
+	if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
+			pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
+			E820_RESERVED)) {
+		printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
+				pci_mmcfg_config[0].base_address);
+		printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
+		return;
+	}
+
 	printk(KERN_INFO "PCI: Using MMCONFIG\n");
 	raw_pci_ops = &pci_mmcfg;
 	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index 49a849b3a2414..bf4e79335388b 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -16,8 +16,7 @@
 #define PCI_PROBE_CONF1		0x0002
 #define PCI_PROBE_CONF2		0x0004
 #define PCI_PROBE_MMCONF	0x0008
-#define PCI_PROBE_MMCONF_FORCE	0x0010
-#define PCI_PROBE_MASK		0x00ff
+#define PCI_PROBE_MASK		0x000f
 
 #define PCI_NO_SORT		0x0100
 #define PCI_BIOS_SORT		0x0200
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 764bf23c7103f..d6d7f731f6f0e 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -108,6 +108,35 @@ e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
 	return 0;
 }
 
+/*
+ * This function checks if the entire range <start,end> is mapped with type.
+ *
+ * Note: this function only works correct if the e820 table is sorted and
+ * not-overlapping, which is the case
+ */
+int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
+{
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (type && ei->type != type)
+			continue;
+		/* is the region (part) in overlap with the current region ?*/
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+
+		/* if the region is at the beginning of <start,end> we move
+		 * start to the end of the region since it's ok until there
+		 */
+		if (ei->addr <= start)
+			start = ei->addr + ei->size;
+		/* if start is now at or beyond end, we're done, full coverage */
+		if (start >= end)
+			return 1; /* we're done */
+	}
+	return 0;
+}
+
 /* 
  * Find a free area in a specific range. 
  */ 
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index 2d48a7941d489..3c55c76c6fd5d 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -9,7 +9,6 @@
 #include <linux/init.h>
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
-#include <linux/dmi.h>
 #include <asm/e820.h>
 
 #include "pci.h"
@@ -165,33 +164,11 @@ static __init void unreachable_devices(void)
 	}
 }
 
-static int disable_mcfg(struct dmi_system_id *d)
-{
-	printk("PCI: %s detected. Disabling MCFG.\n", d->ident);
-	pci_probe &= ~PCI_PROBE_MMCONF;
-	return 0;
-}
-
-static struct dmi_system_id __initdata dmi_bad_mcfg[] = {
-	/* Has broken MCFG table that makes the system hang when used */
-        {
-         .callback = disable_mcfg,
-         .ident = "Intel D3C5105 SDV",
-         .matches = {
-                     DMI_MATCH(DMI_BIOS_VENDOR, "Intel"),
-                     DMI_MATCH(DMI_BOARD_NAME, "D26928"),
-                     },
-         },
-         {}
-};
-
 void __init pci_mmcfg_init(void)
 {
 	int i;
 
-	dmi_check_system(dmi_bad_mcfg);
-
-	if ((pci_probe & (PCI_PROBE_MMCONF|PCI_PROBE_MMCONF_FORCE)) == 0)
+	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
 		return;
 
 	acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
@@ -200,6 +177,15 @@ void __init pci_mmcfg_init(void)
 	    (pci_mmcfg_config[0].base_address == 0))
 		return;
 
+	if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
+			pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
+			E820_RESERVED)) {
+		printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
+				pci_mmcfg_config[0].base_address);
+		printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
+		return;
+	}
+
 	/* RED-PEN i386 doesn't do _nocache right now */
 	pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
 	if (pci_mmcfg_virt == NULL) {
-- 
GitLab


From 7a52411107e1ac8f5be6967936ec237f40a1c7e4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 15 Sep 2006 16:03:45 -0400
Subject: [PATCH 0204/1063] NFS: Fix Oopsable condition in nfs_readpage_sync()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/read.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7a9ee00e0c613..f0aff824a291b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -204,9 +204,11 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
 	spin_unlock(&inode->i_lock);
 
-	nfs_readpage_truncate_uninitialised_page(rdata);
-	if (rdata->res.eof || rdata->res.count == rdata->args.count)
+	if (rdata->res.eof || rdata->res.count == rdata->args.count) {
 		SetPageUptodate(page);
+		if (rdata->res.eof && count != 0)
+			memclear_highpage_flush(page, rdata->args.pgbase, count);
+	}
 	result = 0;
 
 io_error:
-- 
GitLab


From 76723de0cf5b186afe2f329eeef304c321d52bf8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 15 Sep 2006 08:11:51 -0400
Subject: [PATCH 0205/1063] NFSv4: Fix incorrect semaphore release in
 _nfs4_do_open()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 153898e1331f7..b14145b7b87f3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -970,7 +970,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	status = -ENOMEM;
 	opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
 	if (opendata == NULL)
-		goto err_put_state_owner;
+		goto err_release_rwsem;
 
 	status = _nfs4_proc_open(opendata);
 	if (status != 0)
@@ -989,11 +989,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	return 0;
 err_opendata_free:
 	nfs4_opendata_free(opendata);
+err_release_rwsem:
+	up_read(&clp->cl_sem);
 err_put_state_owner:
 	nfs4_put_state_owner(sp);
 out_err:
-	/* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
-	up_read(&clp->cl_sem);
 	*res = NULL;
 	return status;
 }
-- 
GitLab


From 5c2d97cb31fb77981797fec46230ca005b865799 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 18 Sep 2006 23:20:35 -0400
Subject: [PATCH 0206/1063] NFS: Fix nfs_page use after free issues in
 fs/nfs/write.c

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8ab3cf10d792b..7084ac9a64555 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -590,8 +590,8 @@ static void nfs_cancel_commit_list(struct list_head *head)
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_inode_remove_request(req);
-		nfs_clear_page_writeback(req);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+		nfs_clear_page_writeback(req);
 	}
 }
 
@@ -1386,8 +1386,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_mark_request_commit(req);
-		nfs_clear_page_writeback(req);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+		nfs_clear_page_writeback(req);
 	}
 	return -ENOMEM;
 }
-- 
GitLab


From 7f81dc0097095f19d25e14c043edfdebb9e01295 Mon Sep 17 00:00:00 2001
From: Chas Williams <chas@cmf.nrl.navy.mil>
Date: Tue, 19 Sep 2006 12:59:11 -0700
Subject: [PATCH 0207/1063] [ATM]: [he] don't hold the device lock when
 upcalling

This can create a deadlock/lock ordering problem with other layers
that want to use the transmit (or other) path of the card at that
time.

Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/he.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index dd96123a2b7f7..ffcb9fd31c38f 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -1928,7 +1928,9 @@ he_service_rbrq(struct he_dev *he_dev, int group)
 #ifdef notdef
 		ATM_SKB(skb)->vcc = vcc;
 #endif
+		spin_unlock(&he_dev->global_lock);
 		vcc->push(vcc, skb);
+		spin_lock(&he_dev->global_lock);
 
 		atomic_inc(&vcc->stats->rx);
 
-- 
GitLab


From b22b9004f22ba8ca33d15059e8b710a4b71ec3cc Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 19 Sep 2006 13:00:57 -0700
Subject: [PATCH 0208/1063] [NETFILTER]: xt_quota: add missing module aliases

Add missing aliases for ipt_quota and ip6t_quota to make autoload
work.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_quota.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 4cdba7469dc4b..be8d3c26b5682 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,6 +11,8 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
+MODULE_ALIAS("ipt_quota");
+MODULE_ALIAS("ip6t_quota");
 
 static DEFINE_SPINLOCK(quota_lock);
 
-- 
GitLab


From 888454c57a45511808d3fa52597b3d765df034a6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Sep 2006 13:42:46 -0700
Subject: [PATCH 0209/1063] [IPV4] fib_trie: missing ntohl() when calling
 fib_semantic_match()

fib_trie.c::check_leaf() passes host-endian where fib_semantic_match()
expects (and stores into) net-endian.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 23fb9d9768e36..01801c0f885d1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1281,18 +1281,18 @@ static inline int check_leaf(struct trie *t, struct leaf *l,
 			     struct fib_result *res)
 {
 	int err, i;
-	t_key mask;
+	__be32 mask;
 	struct leaf_info *li;
 	struct hlist_head *hhead = &l->list;
 	struct hlist_node *node;
 
 	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
 		i = li->plen;
-		mask = ntohl(inet_make_mask(i));
-		if (l->key != (key & mask))
+		mask = inet_make_mask(i);
+		if (l->key != (key & ntohl(mask)))
 			continue;
 
-		if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
+		if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) {
 			*plen = i;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			t->stats.semantic_match_passed++;
-- 
GitLab


From e478bec0ba0a83a48a0f6982934b6de079e7e6b3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Tue, 19 Sep 2006 20:42:06 -0700
Subject: [PATCH 0210/1063] Linux v2.6.18. Arrr!

Ahoy, all land-lubbers, test me out right smartly!

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 47e21c04014ea..edfc2fdf76c95 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 18
-EXTRAVERSION = -rc7
-NAME=Crazed Snow-Weasel
+EXTRAVERSION =
+NAME=Avast! A bilge rat!
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
-- 
GitLab


From 4f896e53eea70013fa48d0d8662680cf8aae8a43 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 24 Aug 2006 13:29:33 +1000
Subject: [PATCH 0211/1063] [POWERPC] make spinlocks work in a combined kernel

If we build a pSeries/iSeries combined kernel, we will need this.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/locks.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 077bed7dc52b3..80b482ca30dfa 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,7 @@
 #include <asm/hvcall.h>
 #include <asm/iseries/hv_call.h>
 #include <asm/smp.h>
+#include <asm/firmware.h>
 
 void __spin_yield(raw_spinlock_t *lock)
 {
@@ -39,13 +40,12 @@ void __spin_yield(raw_spinlock_t *lock)
 	rmb();
 	if (lock->slock != lock_value)
 		return;		/* something has changed */
-#ifdef CONFIG_PPC_ISERIES
-	HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
-		((u64)holder_cpu << 32) | yield_count);
-#else
-	plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu),
-			   yield_count);
-#endif
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
+			((u64)holder_cpu << 32) | yield_count);
+	else
+		plpar_hcall_norets(H_CONFER,
+			get_hard_smp_processor_id(holder_cpu), yield_count);
 }
 
 /*
@@ -69,13 +69,12 @@ void __rw_yield(raw_rwlock_t *rw)
 	rmb();
 	if (rw->lock != lock_value)
 		return;		/* something has changed */
-#ifdef CONFIG_PPC_ISERIES
-	HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
-		((u64)holder_cpu << 32) | yield_count);
-#else
-	plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu),
-			   yield_count);
-#endif
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
+			((u64)holder_cpu << 32) | yield_count);
+	else
+		plpar_hcall_norets(H_CONFER,
+			get_hard_smp_processor_id(holder_cpu), yield_count);
 }
 #endif
 
-- 
GitLab


From 9ca91e0fb5295e8317030feb889085e452cedab1 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 14 Sep 2006 16:59:31 +1000
Subject: [PATCH 0212/1063] [POWERPC] silence a warning

Left over from the constifying of get_property.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/platforms/powermac/cpufreq_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
index 167cd3ce8a138..d30466d741942 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_64.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_64.c
@@ -89,7 +89,7 @@ static DEFINE_MUTEX(g5_switch_mutex);
 
 #ifdef CONFIG_PMAC_SMU
 
-static u32 *g5_pmode_data;
+static const u32 *g5_pmode_data;
 static int g5_pmode_max;
 
 static struct smu_sdbp_fvt *g5_fvt_table;	/* table of op. points */
-- 
GitLab


From fa053d2f008cb73fa768b8e171486d8c0b33312b Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 19 Sep 2006 14:51:40 +1000
Subject: [PATCH 0213/1063] [POWERPC] remove unused io accessors

The io accessors insw_ns, outsw_ns, insl_ns and outsl_ns are unused
(except for one unnecessary use in drivers/net/3c509.c that is addressed
in a previous patch) and are only defined in powerpc/ppc, so remove them.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/ppc/syslib/m8260_pci_erratum9.c | 16 ----------------
 include/asm-powerpc/io.h             | 13 +------------
 include/asm-ppc/io.h                 | 10 ----------
 include/asm-ppc/mpc8260_pci9.h       |  4 ----
 4 files changed, 1 insertion(+), 42 deletions(-)

diff --git a/arch/ppc/syslib/m8260_pci_erratum9.c b/arch/ppc/syslib/m8260_pci_erratum9.c
index 974581ea48493..5475709ce07bb 100644
--- a/arch/ppc/syslib/m8260_pci_erratum9.c
+++ b/arch/ppc/syslib/m8260_pci_erratum9.c
@@ -339,20 +339,6 @@ void insl(unsigned port, void *buf, int nl)
 	idma_pci9_read((u8 *)buf, (u8 *)addr, nl*sizeof(u32), sizeof(u32), 0);
 }
 
-void insw_ns(unsigned port, void *buf, int ns)
-{
-	u8 *addr = (u8 *)(port + _IO_BASE);
-
-	idma_pci9_read((u8 *)buf, (u8 *)addr, ns*sizeof(u16), sizeof(u16), 0);
-}
-
-void insl_ns(unsigned port, void *buf, int nl)
-{
-	u8 *addr = (u8 *)(port + _IO_BASE);
-
-	idma_pci9_read((u8 *)buf, (u8 *)addr, nl*sizeof(u32), sizeof(u32), 0);
-}
-
 void *memcpy_fromio(void *dest, unsigned long src, size_t count)
 {
 	unsigned long pa = iopa((unsigned long) src);
@@ -373,8 +359,6 @@ EXPORT_SYMBOL(inl);
 EXPORT_SYMBOL(insb);
 EXPORT_SYMBOL(insw);
 EXPORT_SYMBOL(insl);
-EXPORT_SYMBOL(insw_ns);
-EXPORT_SYMBOL(insl_ns);
 EXPORT_SYMBOL(memcpy_fromio);
 
 #endif	/* ifdef CONFIG_8260_PCI9 */
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index 212428db0d8b6..9aaced5426242 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -76,8 +76,7 @@ extern unsigned long pci_io_base;
 #define insb(port, buf, ns)	_insb((u8 __iomem *)((port)+pci_io_base), (buf), (ns))
 #define insw(port, buf, ns)	_insw_ns((u8 __iomem *)((port)+pci_io_base), (buf), (ns))
 #define insl(port, buf, nl)	_insl_ns((u8 __iomem *)((port)+pci_io_base), (buf), (nl))
-#define insw_ns(port, buf, ns)	_insw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns))
-#define insl_ns(port, buf, nl)	_insl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl))
+
 #else
 
 static inline unsigned char __raw_readb(const volatile void __iomem *addr)
@@ -138,8 +137,6 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr)
 #define insb(port, buf, ns)	eeh_insb((port), (buf), (ns))
 #define insw(port, buf, ns)	eeh_insw_ns((port), (buf), (ns))
 #define insl(port, buf, nl)	eeh_insl_ns((port), (buf), (nl))
-#define insw_ns(port, buf, ns)	eeh_insw_ns((port), (buf), (ns))
-#define insl_ns(port, buf, nl)	eeh_insl_ns((port), (buf), (nl))
 
 #endif
 
@@ -180,14 +177,6 @@ static inline void mmiowb(void)
 #define inl_p(port)             inl(port)
 #define outl_p(val, port)       (udelay(1), outl((val), (port)))
 
-/*
- * The *_ns versions below don't do byte-swapping.
- * Neither do the standard versions now, these are just here
- * for older code.
- */
-#define outsw_ns(port, buf, ns)	_outsw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns))
-#define outsl_ns(port, buf, nl)	_outsl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl))
-
 
 #define IO_SPACE_LIMIT ~(0UL)
 
diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h
index 680555be22eca..fb0a8fcc51cf0 100644
--- a/include/asm-ppc/io.h
+++ b/include/asm-ppc/io.h
@@ -338,16 +338,6 @@ extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns);
 extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl);
 extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl);
 
-/*
- * The *_ns versions below don't do byte-swapping.
- * Neither do the standard versions now, these are just here
- * for older code.
- */
-#define insw_ns(port, buf, ns)	_insw_ns((port)+___IO_BASE, (buf), (ns))
-#define outsw_ns(port, buf, ns)	_outsw_ns((port)+___IO_BASE, (buf), (ns))
-#define insl_ns(port, buf, nl)	_insl_ns((port)+___IO_BASE, (buf), (nl))
-#define outsl_ns(port, buf, nl)	_outsl_ns((port)+___IO_BASE, (buf), (nl))
-
 
 #define IO_SPACE_LIMIT ~0
 
diff --git a/include/asm-ppc/mpc8260_pci9.h b/include/asm-ppc/mpc8260_pci9.h
index 26b3f6e787bc0..9f7176881c567 100644
--- a/include/asm-ppc/mpc8260_pci9.h
+++ b/include/asm-ppc/mpc8260_pci9.h
@@ -30,8 +30,6 @@
 #undef inb
 #undef inw
 #undef inl
-#undef insw_ns
-#undef insl_ns
 #undef memcpy_fromio
 
 extern int readb(volatile unsigned char *addr);
@@ -43,8 +41,6 @@ extern void insl(unsigned port, void *buf, int nl);
 extern int inb(unsigned port);
 extern int inw(unsigned port);
 extern unsigned inl(unsigned port);
-extern void insw_ns(unsigned port, void *buf, int ns);
-extern void insl_ns(unsigned port, void *buf, int nl);
 extern void *memcpy_fromio(void *dest, unsigned long src, size_t count);
 
 #endif /* !__CONFIG_8260_PCI9_DEFS */
-- 
GitLab


From 661f1cdb8b3e3c2c44e97df122c1d5643c054ce8 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 19 Sep 2006 16:52:55 +1000
Subject: [PATCH 0214/1063] [POWERPC] remove unused asm routines

_insw, _outsw, _insl amd _outsl are all unused, so remove them.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/kernel/misc.S      | 52 --------------------
 arch/powerpc/kernel/ppc_ksyms.c |  4 --
 arch/ppc/kernel/misc.S          | 84 ---------------------------------
 arch/ppc/kernel/ppc_ksyms.c     |  4 --
 include/asm-powerpc/io.h        |  4 --
 include/asm-ppc/io.h            |  4 --
 6 files changed, 152 deletions(-)

diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index f770805f1215d..dd5f8e4291968 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -86,58 +86,6 @@ _GLOBAL(_outsb)
 	sync
 	blr
 
-_GLOBAL(_insw)
-	sync
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhbrx	r5,0,r3
-	eieio
-	sthu	r5,2(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsw)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-	sync
-00:	lhzu	r5,2(r4)
-	sthbrx	r5,0,r3
-	bdnz	00b
-	sync
-	blr
-
-_GLOBAL(_insl)
-	sync
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwbrx	r5,0,r3
-	eieio
-	stwu	r5,4(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsl)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-	sync
-00:	lwzu	r5,4(r4)
-	stwbrx	r5,0,r3
-	bdnz	00b
-	sync
-	blr
-
 #ifdef CONFIG_PPC32
 _GLOBAL(__ide_mm_insw)
 #endif
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index b2edac8ddf0a5..314d6114e6ec1 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -104,10 +104,6 @@ EXPORT_SYMBOL(__ide_mm_outsl);
 
 EXPORT_SYMBOL(_insb);
 EXPORT_SYMBOL(_outsb);
-EXPORT_SYMBOL(_insw);
-EXPORT_SYMBOL(_outsw);
-EXPORT_SYMBOL(_insl);
-EXPORT_SYMBOL(_outsl);
 EXPORT_SYMBOL(_insw_ns);
 EXPORT_SYMBOL(_outsw_ns);
 EXPORT_SYMBOL(_insl_ns);
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
index 2fa0075f2b5f8..44700bbfe7bc2 100644
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -768,90 +768,6 @@ _GLOBAL(_outsb)
 	bdnz	00b
 	blr
 
-_GLOBAL(_insw)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhbrx	r5,0,r3
-01:	eieio
-02:	sthu	r5,2(r4)
-	ISYNC_8xx
-	.section .fixup,"ax"
-03:	blr
-	.text
-	.section __ex_table, "a"
-		.align 2
-		.long 00b, 03b
-		.long 01b, 03b
-		.long 02b, 03b
-	.text
-	bdnz	00b
-	blr
-
-_GLOBAL(_outsw)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhzu	r5,2(r4)
-01:	eieio
-02:	sthbrx	r5,0,r3
-	ISYNC_8xx
-	.section .fixup,"ax"
-03:	blr
-	.text
-	.section __ex_table, "a"
-		.align 2
-		.long 00b, 03b
-		.long 01b, 03b
-		.long 02b, 03b
-	.text
-	bdnz	00b
-	blr
-
-_GLOBAL(_insl)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwbrx	r5,0,r3
-01:	eieio
-02:	stwu	r5,4(r4)
-	ISYNC_8xx
-	.section .fixup,"ax"
-03:	blr
-	.text
-	.section __ex_table, "a"
-		.align 2
-		.long 00b, 03b
-		.long 01b, 03b
-		.long 02b, 03b
-	.text
-	bdnz	00b
-	blr
-
-_GLOBAL(_outsl)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwzu	r5,4(r4)
-01:	stwbrx	r5,0,r3
-02:	eieio
-	ISYNC_8xx
-	.section .fixup,"ax"
-03:	blr
-	.text
-	.section __ex_table, "a"
-		.align 2
-		.long 00b, 03b
-		.long 01b, 03b
-		.long 02b, 03b
-	.text
-	bdnz	00b
-	blr
-
 _GLOBAL(__ide_mm_insw)
 _GLOBAL(_insw_ns)
 	cmpwi	0,r5,0
diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c
index d1735401384cb..2bd1f7353f56a 100644
--- a/arch/ppc/kernel/ppc_ksyms.c
+++ b/arch/ppc/kernel/ppc_ksyms.c
@@ -122,10 +122,6 @@ EXPORT_SYMBOL(__ide_mm_outsl);
 
 EXPORT_SYMBOL(_insb);
 EXPORT_SYMBOL(_outsb);
-EXPORT_SYMBOL(_insw);
-EXPORT_SYMBOL(_outsw);
-EXPORT_SYMBOL(_insl);
-EXPORT_SYMBOL(_outsl);
 EXPORT_SYMBOL(_insw_ns);
 EXPORT_SYMBOL(_outsw_ns);
 EXPORT_SYMBOL(_insl_ns);
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index 9aaced5426242..0ee48436b1e30 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -151,10 +151,6 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr)
 
 extern void _insb(volatile u8 __iomem *port, void *buf, int ns);
 extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns);
-extern void _insw(volatile u16 __iomem *port, void *buf, int ns);
-extern void _outsw(volatile u16 __iomem *port, const void *buf, int ns);
-extern void _insl(volatile u32 __iomem *port, void *buf, int nl);
-extern void _outsl(volatile u32 __iomem *port, const void *buf, int nl);
 extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns);
 extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns);
 extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl);
diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h
index fb0a8fcc51cf0..9fac420f16484 100644
--- a/include/asm-ppc/io.h
+++ b/include/asm-ppc/io.h
@@ -329,10 +329,6 @@ __do_out_asm(outl, "stwbrx")
 
 extern void _insb(volatile u8 __iomem *port, void *buf, int ns);
 extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns);
-extern void _insw(volatile u16 __iomem *port, void *buf, int ns);
-extern void _outsw(volatile u16 __iomem *port, const void *buf, int ns);
-extern void _insl(volatile u32 __iomem *port, void *buf, int nl);
-extern void _outsl(volatile u32 __iomem *port, const void *buf, int nl);
 extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns);
 extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns);
 extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl);
-- 
GitLab


From 73ea9e1bcb8eea4f3b2052fe7ccd7ee4b5a271a0 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 19 Sep 2006 17:30:20 +1000
Subject: [PATCH 0215/1063] [POWERPC] clean up ide io accessors

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/kernel/misc.S      | 12 ------------
 arch/powerpc/kernel/ppc_ksyms.c |  7 -------
 arch/ppc/kernel/misc.S          |  4 ----
 arch/ppc/kernel/ppc_ksyms.c     |  5 -----
 include/asm-powerpc/ide.h       | 12 ++++++------
 include/asm-powerpc/io.h        |  6 ------
 6 files changed, 6 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index dd5f8e4291968..6feb391422ec3 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -86,9 +86,6 @@ _GLOBAL(_outsb)
 	sync
 	blr
 
-#ifdef CONFIG_PPC32
-_GLOBAL(__ide_mm_insw)
-#endif
 _GLOBAL(_insw_ns)
 	sync
 	cmpwi	0,r5,0
@@ -103,9 +100,6 @@ _GLOBAL(_insw_ns)
 	isync
 	blr
 
-#ifdef CONFIG_PPC32
-_GLOBAL(__ide_mm_outsw)
-#endif
 _GLOBAL(_outsw_ns)
 	cmpwi	0,r5,0
 	mtctr	r5
@@ -118,9 +112,6 @@ _GLOBAL(_outsw_ns)
 	sync
 	blr
 
-#ifdef CONFIG_PPC32
-_GLOBAL(__ide_mm_insl)
-#endif
 _GLOBAL(_insl_ns)
 	sync
 	cmpwi	0,r5,0
@@ -135,9 +126,6 @@ _GLOBAL(_insl_ns)
 	isync
 	blr
 
-#ifdef CONFIG_PPC32
-_GLOBAL(__ide_mm_outsl)
-#endif
 _GLOBAL(_outsl_ns)
 	cmpwi	0,r5,0
 	mtctr	r5
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 314d6114e6ec1..75429e580518b 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -95,13 +95,6 @@ EXPORT_SYMBOL(__strnlen_user);
 EXPORT_SYMBOL(copy_4K_page);
 #endif
 
-#ifndef  __powerpc64__
-EXPORT_SYMBOL(__ide_mm_insl);
-EXPORT_SYMBOL(__ide_mm_outsw);
-EXPORT_SYMBOL(__ide_mm_insw);
-EXPORT_SYMBOL(__ide_mm_outsl);
-#endif
-
 EXPORT_SYMBOL(_insb);
 EXPORT_SYMBOL(_outsb);
 EXPORT_SYMBOL(_insw_ns);
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
index 44700bbfe7bc2..50b4bbd06804b 100644
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -768,7 +768,6 @@ _GLOBAL(_outsb)
 	bdnz	00b
 	blr
 
-_GLOBAL(__ide_mm_insw)
 _GLOBAL(_insw_ns)
 	cmpwi	0,r5,0
 	mtctr	r5
@@ -790,7 +789,6 @@ _GLOBAL(_insw_ns)
 	bdnz	00b
 	blr
 
-_GLOBAL(__ide_mm_outsw)
 _GLOBAL(_outsw_ns)
 	cmpwi	0,r5,0
 	mtctr	r5
@@ -812,7 +810,6 @@ _GLOBAL(_outsw_ns)
 	bdnz	00b
 	blr
 
-_GLOBAL(__ide_mm_insl)
 _GLOBAL(_insl_ns)
 	cmpwi	0,r5,0
 	mtctr	r5
@@ -834,7 +831,6 @@ _GLOBAL(_insl_ns)
 	bdnz	00b
 	blr
 
-_GLOBAL(__ide_mm_outsl)
 _GLOBAL(_outsl_ns)
 	cmpwi	0,r5,0
 	mtctr	r5
diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c
index 2bd1f7353f56a..c8b65ca8a350e 100644
--- a/arch/ppc/kernel/ppc_ksyms.c
+++ b/arch/ppc/kernel/ppc_ksyms.c
@@ -115,11 +115,6 @@ EXPORT_SYMBOL(outw);
 EXPORT_SYMBOL(outl);
 EXPORT_SYMBOL(outsl);*/
 
-EXPORT_SYMBOL(__ide_mm_insl);
-EXPORT_SYMBOL(__ide_mm_outsw);
-EXPORT_SYMBOL(__ide_mm_insw);
-EXPORT_SYMBOL(__ide_mm_outsl);
-
 EXPORT_SYMBOL(_insb);
 EXPORT_SYMBOL(_outsb);
 EXPORT_SYMBOL(_insw_ns);
diff --git a/include/asm-powerpc/ide.h b/include/asm-powerpc/ide.h
index b09b42af6a1ef..c8390f9485de9 100644
--- a/include/asm-powerpc/ide.h
+++ b/include/asm-powerpc/ide.h
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <asm/mpc8xx.h>
 #endif
+#include <asm/io.h>
 
 #ifndef MAX_HWIFS
 #ifdef __powerpc64__
@@ -21,15 +22,14 @@
 #endif
 #endif
 
+#define __ide_mm_insw(p, a, c)	_insw_ns((volatile u16 __iomem *)(p), (a), (c))
+#define __ide_mm_insl(p, a, c)	_insl_ns((volatile u32 __iomem *)(p), (a), (c))
+#define __ide_mm_outsw(p, a, c)	_outsw_ns((volatile u16 __iomem *)(p), (a), (c))
+#define __ide_mm_outsl(p, a, c)	_outsl_ns((volatile u32 __iomem *)(p), (a), (c))
+
 #ifndef  __powerpc64__
 #include <linux/hdreg.h>
 #include <linux/ioport.h>
-#include <asm/io.h>
-
-extern void __ide_mm_insw(void __iomem *port, void *addr, u32 count);
-extern void __ide_mm_outsw(void __iomem *port, void *addr, u32 count);
-extern void __ide_mm_insl(void __iomem *port, void *addr, u32 count);
-extern void __ide_mm_outsl(void __iomem *port, void *addr, u32 count);
 
 struct ide_machdep_calls {
         int         (*default_irq)(unsigned long base);
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index 0ee48436b1e30..51a598747367a 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -28,12 +28,6 @@ extern int check_legacy_ioport(unsigned long base_port);
 
 #include <asm-generic/iomap.h>
 
-#define __ide_mm_insw(p, a, c) _insw_ns((volatile u16 __iomem *)(p), (a), (c))
-#define __ide_mm_insl(p, a, c) _insl_ns((volatile u32 __iomem *)(p), (a), (c))
-#define __ide_mm_outsw(p, a, c) _outsw_ns((volatile u16 __iomem *)(p), (a), (c))
-#define __ide_mm_outsl(p, a, c) _outsl_ns((volatile u32 __iomem *)(p), (a), (c))
-
-
 #define SIO_CONFIG_RA	0x398
 #define SIO_CONFIG_RD	0x399
 
-- 
GitLab


From 5adcaf50cf697aa4d0c731107003c1383b59b214 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 19 Sep 2006 22:17:49 +1000
Subject: [PATCH 0216/1063] [POWERPC] convert string i/o operations to C

This produces essentially the same code and will make the iSeries i/o
consolidation easier.

The count parameter is changed to long since that will produce the same
(better) code on 32 and 64 bit builds.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/kernel/Makefile    |   2 +-
 arch/powerpc/kernel/io.c        | 117 ++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/misc.S      |  95 --------------------------
 arch/powerpc/kernel/ppc_ksyms.c |   7 --
 include/asm-powerpc/io.h        |  12 ++--
 include/asm-ppc/io.h            |  12 ++--
 6 files changed, 130 insertions(+), 115 deletions(-)
 create mode 100644 arch/powerpc/kernel/io.c

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 8b3f4faf57688..8b133afbdc205 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -51,7 +51,7 @@ extra-$(CONFIG_8xx)		:= head_8xx.o
 extra-y				+= vmlinux.lds
 
 obj-y				+= time.o prom.o traps.o setup-common.o \
-				   udbg.o misc.o
+				   udbg.o misc.o io.o
 obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o misc_32.o
 obj-$(CONFIG_PPC64)		+= misc_64.o dma_64.o iommu.o
 obj-$(CONFIG_PPC_MULTIPLATFORM)	+= prom_init.o
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
new file mode 100644
index 0000000000000..80a3209acef4b
--- /dev/null
+++ b/arch/powerpc/kernel/io.c
@@ -0,0 +1,117 @@
+/*
+ * I/O string operations
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *    Copyright (C) 2006 IBM Corporation
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
+ *
+ * Rewritten in C by Stephen Rothwell.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/module.h>
+
+#include <asm/io.h>
+
+void _insb(volatile u8 __iomem *port, void *buf, long count)
+{
+	u8 *tbuf = buf;
+	u8 tmp;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		tmp = *port;
+		asm volatile("eieio");
+		*tbuf++ = tmp;
+	} while (--count != 0);
+	asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insb);
+
+void _outsb(volatile u8 __iomem *port, const void *buf, long count)
+{
+	const u8 *tbuf = buf;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		*port = *tbuf++;
+	} while (--count != 0);
+	asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsb);
+
+void _insw_ns(volatile u16 __iomem *port, void *buf, long count)
+{
+	u16 *tbuf = buf;
+	u16 tmp;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		tmp = *port;
+		asm volatile("eieio");
+		*tbuf++ = tmp;
+	} while (--count != 0);
+	asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insw_ns);
+
+void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count)
+{
+	const u16 *tbuf = buf;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		*port = *tbuf++;
+	} while (--count != 0);
+	asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsw_ns);
+
+void _insl_ns(volatile u32 __iomem *port, void *buf, long count)
+{
+	u32 *tbuf = buf;
+	u32 tmp;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		tmp = *port;
+		asm volatile("eieio");
+		*tbuf++ = tmp;
+	} while (--count != 0);
+	asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insl_ns);
+
+void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count)
+{
+	const u32 *tbuf = buf;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		*port = *tbuf++;
+	} while (--count != 0);
+	asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsl_ns);
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 6feb391422ec3..330c9dc7db861 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -43,98 +43,3 @@ _GLOBAL(add_reloc_offset)
 	add	r3,r3,r5
 	mtlr	r0
 	blr
-
-/*
- * I/O string operations
- *
- * insb(port, buf, len)
- * outsb(port, buf, len)
- * insw(port, buf, len)
- * outsw(port, buf, len)
- * insl(port, buf, len)
- * outsl(port, buf, len)
- * insw_ns(port, buf, len)
- * outsw_ns(port, buf, len)
- * insl_ns(port, buf, len)
- * outsl_ns(port, buf, len)
- *
- * The *_ns versions don't do byte-swapping.
- */
-_GLOBAL(_insb)
-	sync
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,1
-	blelr-
-00:	lbz	r5,0(r3)
-	eieio
-	stbu	r5,1(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsb)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,1
-	blelr-
-	sync
-00:	lbzu	r5,1(r4)
-	stb	r5,0(r3)
-	bdnz	00b
-	sync
-	blr
-
-_GLOBAL(_insw_ns)
-	sync
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhz	r5,0(r3)
-	eieio
-	sthu	r5,2(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsw_ns)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-	sync
-00:	lhzu	r5,2(r4)
-	sth	r5,0(r3)
-	bdnz	00b
-	sync
-	blr
-
-_GLOBAL(_insl_ns)
-	sync
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwz	r5,0(r3)
-	eieio
-	stwu	r5,4(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsl_ns)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-	sync
-00:	lwzu	r5,4(r4)
-	stw	r5,0(r3)
-	bdnz	00b
-	sync
-	blr
-
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 75429e580518b..807193a3c7849 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -95,13 +95,6 @@ EXPORT_SYMBOL(__strnlen_user);
 EXPORT_SYMBOL(copy_4K_page);
 #endif
 
-EXPORT_SYMBOL(_insb);
-EXPORT_SYMBOL(_outsb);
-EXPORT_SYMBOL(_insw_ns);
-EXPORT_SYMBOL(_outsw_ns);
-EXPORT_SYMBOL(_insl_ns);
-EXPORT_SYMBOL(_outsl_ns);
-
 #if defined(CONFIG_PPC32) && (defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE))
 EXPORT_SYMBOL(ppc_ide_md);
 #endif
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index 51a598747367a..57e7d14d65633 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -143,12 +143,12 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr)
 #define readl_relaxed(addr) readl(addr)
 #define readq_relaxed(addr) readq(addr)
 
-extern void _insb(volatile u8 __iomem *port, void *buf, int ns);
-extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns);
-extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns);
-extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns);
-extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl);
-extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl);
+extern void _insb(volatile u8 __iomem *port, void *buf, long count);
+extern void _outsb(volatile u8 __iomem *port, const void *buf, long count);
+extern void _insw_ns(volatile u16 __iomem *port, void *buf, long count);
+extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count);
+extern void _insl_ns(volatile u32 __iomem *port, void *buf, long count);
+extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count);
 
 static inline void mmiowb(void)
 {
diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h
index 9fac420f16484..3d9a9e6f33217 100644
--- a/include/asm-ppc/io.h
+++ b/include/asm-ppc/io.h
@@ -327,12 +327,12 @@ __do_out_asm(outl, "stwbrx")
 #define inl_p(port)		inl((port))
 #define outl_p(val, port)	outl((val), (port))
 
-extern void _insb(volatile u8 __iomem *port, void *buf, int ns);
-extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns);
-extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns);
-extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns);
-extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl);
-extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl);
+extern void _insb(volatile u8 __iomem *port, void *buf, long count);
+extern void _outsb(volatile u8 __iomem *port, const void *buf, long count);
+extern void _insw_ns(volatile u16 __iomem *port, void *buf, long count);
+extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count);
+extern void _insl_ns(volatile u32 __iomem *port, void *buf, long count);
+extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count);
 
 
 #define IO_SPACE_LIMIT ~0
-- 
GitLab


From 19e59df4dc2e6f7b46190ee77ce7093769f597a7 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 14 Sep 2006 14:55:36 +1000
Subject: [PATCH 0217/1063] [POWERPC] iseries: eliminate a couple of warnings

Copy and paste bug in io.h

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 include/asm-powerpc/io.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index 57e7d14d65633..174fb89d5edaf 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -68,8 +68,8 @@ extern unsigned long pci_io_base;
  * for older code.
  */
 #define insb(port, buf, ns)	_insb((u8 __iomem *)((port)+pci_io_base), (buf), (ns))
-#define insw(port, buf, ns)	_insw_ns((u8 __iomem *)((port)+pci_io_base), (buf), (ns))
-#define insl(port, buf, nl)	_insl_ns((u8 __iomem *)((port)+pci_io_base), (buf), (nl))
+#define insw(port, buf, ns)	_insw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns))
+#define insl(port, buf, nl)	_insl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl))
 
 #else
 
-- 
GitLab


From a4dc7ff08915a2035aa6d6decc53fa1deaa410bb Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 19 Sep 2006 14:06:27 +1000
Subject: [PATCH 0218/1063] [POWERPC] Define of_read_ulong helper

There are various places where we want to extract an unsigned long
value from a device-tree property that can be 1 or 2 cells in length.
This replaces some open-coded calculations, and one place where we
assumed without checking that properties were the length we wanted,
with a little of_read_ulong() helper.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/prom.c         | 19 ++-----------------
 arch/powerpc/kernel/setup-common.c | 13 ++++++++-----
 arch/powerpc/kernel/time.c         |  4 +---
 include/asm-powerpc/prom.h         | 12 +++++++++++-
 4 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index bf2005b2feb61..eb913f80bfb1f 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -757,24 +757,9 @@ static int __init early_init_dt_scan_root(unsigned long node,
 static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
 {
 	cell_t *p = *cellp;
-	unsigned long r;
 
-	/* Ignore more than 2 cells */
-	while (s > sizeof(unsigned long) / 4) {
-		p++;
-		s--;
-	}
-	r = *p++;
-#ifdef CONFIG_PPC64
-	if (s > 1) {
-		r <<= 32;
-		r |= *(p++);
-		s--;
-	}
-#endif
-
-	*cellp = p;
-	return r;
+	*cellp = p + s;
+	return of_read_ulong(p, s);
 }
 
 
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 465e7435efbcd..0af3fc1bdcc92 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -304,18 +304,21 @@ struct seq_operations cpuinfo_op = {
 void __init check_for_initrd(void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
-	const unsigned long *prop;
+	const unsigned int *prop;
+	int len;
 
 	DBG(" -> check_for_initrd()\n");
 
 	if (of_chosen) {
-		prop = get_property(of_chosen, "linux,initrd-start", NULL);
+		prop = get_property(of_chosen, "linux,initrd-start", &len);
 		if (prop != NULL) {
-			initrd_start = (unsigned long)__va(*prop);
+			initrd_start = (unsigned long)
+				__va(of_read_ulong(prop, len / 4));
 			prop = get_property(of_chosen,
-					"linux,initrd-end", NULL);
+					"linux,initrd-end", &len);
 			if (prop != NULL) {
-				initrd_end = (unsigned long)__va(*prop);
+				initrd_end = (unsigned long)
+					__va(of_read_ulong(prop, len / 4));
 				initrd_below_start_ok = 1;
 			} else
 				initrd_start = 0;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b9a2061cfdb7e..7a3c3f791ade2 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -870,9 +870,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
 		fp = get_property(cpu, name, NULL);
 		if (fp) {
 			found = 1;
-			*val = 0;
-			while (cells--)
-				*val = (*val << 32) | *fp++;
+			*val = of_read_ulong(fp, cells);
 		}
 
 		of_node_put(cpu);
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index c15e66a2e6812..5246297693369 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -197,7 +197,7 @@ extern int release_OF_resource(struct device_node* node, int index);
  */
 
 
-/* Helper to read a big number */
+/* Helper to read a big number; size is in cells (not bytes) */
 static inline u64 of_read_number(const u32 *cell, int size)
 {
 	u64 r = 0;
@@ -206,6 +206,16 @@ static inline u64 of_read_number(const u32 *cell, int size)
 	return r;
 }
 
+/* Like of_read_number, but we want an unsigned long result */
+#ifdef CONFIG_PPC32
+static inline unsigned long of_read_ulong(const u32 *cell, int size)
+{
+	return cell[size-1];
+}
+#else
+#define of_read_ulong(cell, size)	of_read_number(cell, size)
+#endif
+
 /* Translate an OF address block into a CPU physical address
  */
 #define OF_BAD_ADDR	((u64)-1)
-- 
GitLab


From b2c5f61920eeee9c4e78698de4fde4586fe5ae79 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Tue, 19 Sep 2006 14:05:08 +1000
Subject: [PATCH 0219/1063] [POWERPC] Start arch/powerpc/boot code
 reorganization

This abstracts the operations used in the bootwrapper, and defines
the operations needed for the bootwrapper to run on an OF platform.

The operations have been divided up into platform ops (platform_ops),
firmware ops (fw_ops), device tree ops (dt_ops), and console ops
(console_ops).

The proper operations will be hooked up at runtime to provide the
functionality that you need.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/boot/Makefile         |   3 +-
 arch/powerpc/boot/flatdevtree.h    |  46 ++++++
 arch/powerpc/boot/main.c           | 250 +++++++++++++++--------------
 arch/powerpc/boot/{prom.c => of.c} | 144 +++++++++++++++--
 arch/powerpc/boot/ops.h            | 100 ++++++++++++
 arch/powerpc/boot/prom.h           |  41 -----
 arch/powerpc/boot/stdio.c          |   4 +-
 arch/powerpc/boot/stdio.h          |   8 +
 arch/powerpc/boot/types.h          |  23 +++
 9 files changed, 441 insertions(+), 178 deletions(-)
 create mode 100644 arch/powerpc/boot/flatdevtree.h
 rename arch/powerpc/boot/{prom.c => of.c} (54%)
 create mode 100644 arch/powerpc/boot/ops.h
 delete mode 100644 arch/powerpc/boot/prom.h
 create mode 100644 arch/powerpc/boot/types.h

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index afc776f821e5e..e73774136b558 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -36,7 +36,8 @@ zliblinuxheader := zlib.h zconf.h zutil.h
 $(addprefix $(obj)/,$(zlib) main.o): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader))
 #$(addprefix $(obj)/,main.o): $(addprefix $(obj)/,zlib.h)
 
-src-boot := crt0.S string.S prom.c stdio.c main.c div64.S
+src-boot-$(CONFIG_PPC_MULTIPLATFORM) := of.c
+src-boot := crt0.S string.S stdio.c main.c div64.S $(src-boot-y)
 src-boot += $(zlib)
 src-boot := $(addprefix $(obj)/, $(src-boot))
 obj-boot := $(addsuffix .o, $(basename $(src-boot)))
diff --git a/arch/powerpc/boot/flatdevtree.h b/arch/powerpc/boot/flatdevtree.h
new file mode 100644
index 0000000000000..761c8dc840080
--- /dev/null
+++ b/arch/powerpc/boot/flatdevtree.h
@@ -0,0 +1,46 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef FLATDEVTREE_H
+#define FLATDEVTREE_H
+
+#include "types.h"
+
+/* Definitions used by the flattened device tree */
+#define OF_DT_HEADER            0xd00dfeed      /* marker */
+#define OF_DT_BEGIN_NODE        0x1     /* Start of node, full name */
+#define OF_DT_END_NODE          0x2     /* End node */
+#define OF_DT_PROP              0x3     /* Property: name off, size, content */
+#define OF_DT_NOP               0x4     /* nop */
+#define OF_DT_END               0x9
+
+#define OF_DT_VERSION           0x10
+
+struct boot_param_header {
+	u32 magic;              /* magic word OF_DT_HEADER */
+	u32 totalsize;          /* total size of DT block */
+	u32 off_dt_struct;      /* offset to structure */
+	u32 off_dt_strings;     /* offset to strings */
+	u32 off_mem_rsvmap;     /* offset to memory reserve map */
+	u32 version;            /* format version */
+	u32 last_comp_version;  /* last compatible version */
+	/* version 2 fields below */
+	u32 boot_cpuid_phys;    /* Physical CPU id we're booting on */
+	/* version 3 fields below */
+	u32 dt_strings_size;    /* size of the DT strings block */
+};
+
+#endif /* FLATDEVTREE_H */
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index b66634c9ea34e..d719bb9333d18 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -14,17 +14,12 @@
 #include "page.h"
 #include "string.h"
 #include "stdio.h"
-#include "prom.h"
 #include "zlib.h"
+#include "ops.h"
+#include "flatdevtree.h"
 
 extern void flush_cache(void *, unsigned long);
 
-
-/* Value picked to match that used by yaboot */
-#define PROG_START	0x01400000	/* only used on 64-bit systems */
-#define RAM_END		(512<<20)	/* Fixme: use OF */
-#define	ONE_MB		0x100000
-
 extern char _start[];
 extern char __bss_start[];
 extern char _end[];
@@ -33,14 +28,6 @@ extern char _vmlinux_end[];
 extern char _initrd_start[];
 extern char _initrd_end[];
 
-/* A buffer that may be edited by tools operating on a zImage binary so as to
- * edit the command line passed to vmlinux (by setting /chosen/bootargs).
- * The buffer is put in it's own section so that tools may locate it easier.
- */
-static char builtin_cmdline[512]
-	__attribute__((section("__builtin_cmdline")));
-
-
 struct addr_range {
 	unsigned long addr;
 	unsigned long size;
@@ -51,21 +38,16 @@ static struct addr_range vmlinuz;
 static struct addr_range initrd;
 
 static unsigned long elfoffset;
+static int is_64bit;
 
-static char scratch[46912];	/* scratch space for gunzip, from zlib_inflate_workspacesize() */
+/* scratch space for gunzip; 46912 is from zlib_inflate_workspacesize() */
+static char scratch[46912];
 static char elfheader[256];
 
-
-typedef void (*kernel_entry_t)( unsigned long,
-                                unsigned long,
-                                void *,
-				void *);
-
+typedef void (*kernel_entry_t)(unsigned long, unsigned long, void *);
 
 #undef DEBUG
 
-static unsigned long claim_base;
-
 #define HEAD_CRC	2
 #define EXTRA_FIELD	4
 #define ORIG_NAME	8
@@ -123,24 +105,6 @@ static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp)
 	zlib_inflateEnd(&s);
 }
 
-static unsigned long try_claim(unsigned long size)
-{
-	unsigned long addr = 0;
-
-	for(; claim_base < RAM_END; claim_base += ONE_MB) {
-#ifdef DEBUG
-		printf("    trying: 0x%08lx\n\r", claim_base);
-#endif
-		addr = (unsigned long)claim(claim_base, size, 0);
-		if ((void *)addr != (void *)-1)
-			break;
-	}
-	if (addr == 0)
-		return 0;
-	claim_base = PAGE_ALIGN(claim_base + size);
-	return addr;
-}
-
 static int is_elf64(void *hdr)
 {
 	Elf64_Ehdr *elf64 = hdr;
@@ -169,16 +133,7 @@ static int is_elf64(void *hdr)
 	vmlinux.size = (unsigned long)elf64ph->p_filesz + elfoffset;
 	vmlinux.memsize = (unsigned long)elf64ph->p_memsz + elfoffset;
 
-#if defined(PROG_START)
-	/*
-	 * Maintain a "magic" minimum address. This keeps some older
-	 * firmware platforms running.
-	 */
-
-	if (claim_base < PROG_START)
-		claim_base = PROG_START;
-#endif
-
+	is_64bit = 1;
 	return 1;
 }
 
@@ -212,47 +167,9 @@ static int is_elf32(void *hdr)
 	return 1;
 }
 
-void export_cmdline(void* chosen_handle)
-{
-        int len;
-        char cmdline[2] = { 0, 0 };
-
-	if (builtin_cmdline[0] == 0)
-		return;
-
-        len = getprop(chosen_handle, "bootargs", cmdline, sizeof(cmdline));
-        if (len > 0 && cmdline[0] != 0)
-		return;
-
-	setprop(chosen_handle, "bootargs", builtin_cmdline,
-		strlen(builtin_cmdline) + 1);
-}
-
-
-void start(unsigned long a1, unsigned long a2, void *promptr, void *sp)
+static void prep_kernel(unsigned long *a1, unsigned long *a2)
 {
 	int len;
-	kernel_entry_t kernel_entry;
-
-	memset(__bss_start, 0, _end - __bss_start);
-
-	prom = (int (*)(void *)) promptr;
-	chosen_handle = finddevice("/chosen");
-	if (chosen_handle == (void *) -1)
-		exit();
-	if (getprop(chosen_handle, "stdout", &stdout, sizeof(stdout)) != 4)
-		exit();
-
-	printf("\n\rzImage starting: loaded at 0x%p (sp: 0x%p)\n\r", _start, sp);
-
-	/*
-	 * The first available claim_base must be above the end of the
-	 * the loaded kernel wrapper file (_start to _end includes the
-	 * initrd image if it is present) and rounded up to a nice
-	 * 1 MB boundary for good measure.
-	 */
-
-	claim_base = _ALIGN_UP((unsigned long)_end, ONE_MB);
 
 	vmlinuz.addr = (unsigned long)_vmlinux_start;
 	vmlinuz.size = (unsigned long)(_vmlinux_end - _vmlinux_start);
@@ -263,43 +180,51 @@ void start(unsigned long a1, unsigned long a2, void *promptr, void *sp)
 		gunzip(elfheader, sizeof(elfheader),
 				(unsigned char *)vmlinuz.addr, &len);
 	} else
-		memcpy(elfheader, (const void *)vmlinuz.addr, sizeof(elfheader));
+		memcpy(elfheader, (const void *)vmlinuz.addr,
+		       sizeof(elfheader));
 
 	if (!is_elf64(elfheader) && !is_elf32(elfheader)) {
 		printf("Error: not a valid PPC32 or PPC64 ELF file!\n\r");
 		exit();
 	}
+	if (platform_ops.image_hdr)
+		platform_ops.image_hdr(elfheader);
 
-	/* We need to claim the memsize plus the file offset since gzip
+	/* We need to alloc the memsize plus the file offset since gzip
 	 * will expand the header (file offset), then the kernel, then
 	 * possible rubbish we don't care about. But the kernel bss must
 	 * be claimed (it will be zero'd by the kernel itself)
 	 */
 	printf("Allocating 0x%lx bytes for kernel ...\n\r", vmlinux.memsize);
-	vmlinux.addr = try_claim(vmlinux.memsize);
+	vmlinux.addr = (unsigned long)malloc(vmlinux.memsize);
 	if (vmlinux.addr == 0) {
 		printf("Can't allocate memory for kernel image !\n\r");
 		exit();
 	}
 
 	/*
-	 * Now we try to claim memory for the initrd (and copy it there)
+	 * Now we try to alloc memory for the initrd (and copy it there)
 	 */
 	initrd.size = (unsigned long)(_initrd_end - _initrd_start);
 	initrd.memsize = initrd.size;
 	if ( initrd.size > 0 ) {
-		printf("Allocating 0x%lx bytes for initrd ...\n\r", initrd.size);
-		initrd.addr = try_claim(initrd.size);
+		printf("Allocating 0x%lx bytes for initrd ...\n\r",
+		       initrd.size);
+		initrd.addr = (unsigned long)malloc((u32)initrd.size);
 		if (initrd.addr == 0) {
-			printf("Can't allocate memory for initial ramdisk !\n\r");
+			printf("Can't allocate memory for initial "
+					"ramdisk !\n\r");
 			exit();
 		}
-		a1 = initrd.addr;
-		a2 = initrd.size;
-		printf("initial ramdisk moving 0x%lx <- 0x%lx (0x%lx bytes)\n\r",
-		       initrd.addr, (unsigned long)_initrd_start, initrd.size);
-		memmove((void *)initrd.addr, (void *)_initrd_start, initrd.size);
-		printf("initrd head: 0x%lx\n\r", *((unsigned long *)initrd.addr));
+		*a1 = initrd.addr;
+		*a2 = initrd.size;
+		printf("initial ramdisk moving 0x%lx <- 0x%lx "
+			"(0x%lx bytes)\n\r", initrd.addr,
+			(unsigned long)_initrd_start, initrd.size);
+		memmove((void *)initrd.addr, (void *)_initrd_start,
+			initrd.size);
+		printf("initrd head: 0x%lx\n\r",
+				*((unsigned long *)initrd.addr));
 	}
 
 	/* Eventually gunzip the kernel */
@@ -311,11 +236,10 @@ void start(unsigned long a1, unsigned long a2, void *promptr, void *sp)
 			(unsigned char *)vmlinuz.addr, &len);
 		printf("done 0x%lx bytes\n\r", len);
 	} else {
-		memmove((void *)vmlinux.addr,(void *)vmlinuz.addr,vmlinuz.size);
+		memmove((void *)vmlinux.addr,(void *)vmlinuz.addr,
+			vmlinuz.size);
 	}
 
-	export_cmdline(chosen_handle);
-
 	/* Skip over the ELF header */
 #ifdef DEBUG
 	printf("... skipping 0x%lx bytes of ELF header\n\r",
@@ -324,23 +248,107 @@ void start(unsigned long a1, unsigned long a2, void *promptr, void *sp)
 	vmlinux.addr += elfoffset;
 
 	flush_cache((void *)vmlinux.addr, vmlinux.size);
+}
 
-	kernel_entry = (kernel_entry_t)vmlinux.addr;
-#ifdef DEBUG
-	printf( "kernel:\n\r"
-		"        entry addr = 0x%lx\n\r"
-		"        a1         = 0x%lx,\n\r"
-		"        a2         = 0x%lx,\n\r"
-		"        prom       = 0x%lx,\n\r"
-		"        bi_recs    = 0x%lx,\n\r",
-		(unsigned long)kernel_entry, a1, a2,
-		(unsigned long)prom, NULL);
-#endif
+void __attribute__ ((weak)) ft_init(void *dt_blob)
+{
+}
 
-	kernel_entry(a1, a2, prom, NULL);
+/* A buffer that may be edited by tools operating on a zImage binary so as to
+ * edit the command line passed to vmlinux (by setting /chosen/bootargs).
+ * The buffer is put in it's own section so that tools may locate it easier.
+ */
+static char builtin_cmdline[COMMAND_LINE_SIZE]
+	__attribute__((__section__("__builtin_cmdline")));
 
-	printf("Error: Linux kernel returned to zImage bootloader!\n\r");
+static void get_cmdline(char *buf, int size)
+{
+	void *devp;
+	int len = strlen(builtin_cmdline);
 
-	exit();
+	buf[0] = '\0';
+
+	if (len > 0) { /* builtin_cmdline overrides dt's /chosen/bootargs */
+		len = min(len, size-1);
+		strncpy(buf, builtin_cmdline, len);
+		buf[len] = '\0';
+	}
+	else if ((devp = finddevice("/chosen")))
+		getprop(devp, "bootargs", buf, size);
+}
+
+static void set_cmdline(char *buf)
+{
+	void *devp;
+
+	if ((devp = finddevice("/chosen")))
+		setprop(devp, "bootargs", buf, strlen(buf) + 1);
 }
 
+/* Section where ft can be tacked on after zImage is built */
+union blobspace {
+	struct boot_param_header hdr;
+	char space[8*1024];
+} dt_blob __attribute__((__section__("__builtin_ft")));
+
+struct platform_ops platform_ops;
+struct dt_ops dt_ops;
+struct console_ops console_ops;
+
+void start(unsigned long a1, unsigned long a2, void *promptr, void *sp)
+{
+	int have_dt = 0;
+	kernel_entry_t kentry;
+	char cmdline[COMMAND_LINE_SIZE];
+
+	memset(__bss_start, 0, _end - __bss_start);
+	memset(&platform_ops, 0, sizeof(platform_ops));
+	memset(&dt_ops, 0, sizeof(dt_ops));
+	memset(&console_ops, 0, sizeof(console_ops));
+
+	/* Override the dt_ops and device tree if there was an flat dev
+	 * tree attached to the zImage.
+	 */
+	if (dt_blob.hdr.magic == OF_DT_HEADER) {
+		have_dt = 1;
+		ft_init(&dt_blob);
+	}
+
+	if (platform_init(promptr))
+		exit();
+	if (console_ops.open && (console_ops.open() < 0))
+		exit();
+	if (platform_ops.fixups)
+		platform_ops.fixups();
+
+	printf("\n\rzImage starting: loaded at 0x%p (sp: 0x%p)\n\r",
+	       _start, sp);
+
+	prep_kernel(&a1, &a2);
+
+	/* If cmdline came from zimage wrapper or if we can edit the one
+	 * in the dt, print it out and edit it, if possible.
+	 */
+	if ((strlen(builtin_cmdline) > 0) || console_ops.edit_cmdline) {
+		get_cmdline(cmdline, COMMAND_LINE_SIZE);
+		printf("\n\rLinux/PowerPC load: %s", cmdline);
+		if (console_ops.edit_cmdline)
+			console_ops.edit_cmdline(cmdline, COMMAND_LINE_SIZE);
+		printf("\n\r");
+		set_cmdline(cmdline);
+	}
+
+	if (console_ops.close)
+		console_ops.close();
+
+	kentry = (kernel_entry_t) vmlinux.addr;
+	if (have_dt)
+		kentry(dt_ops.ft_addr(), 0, NULL);
+	else
+		/* XXX initrd addr/size should be passed in properties */
+		kentry(a1, a2, promptr);
+
+	/* console closed so printf below may not work */
+	printf("Error: Linux kernel returned to zImage boot wrapper!\n\r");
+	exit();
+}
diff --git a/arch/powerpc/boot/prom.c b/arch/powerpc/boot/of.c
similarity index 54%
rename from arch/powerpc/boot/prom.c
rename to arch/powerpc/boot/of.c
index fa0057736f6b3..fd99f789a37bb 100644
--- a/arch/powerpc/boot/prom.c
+++ b/arch/powerpc/boot/of.c
@@ -8,15 +8,29 @@
  */
 #include <stdarg.h>
 #include <stddef.h>
+#include "types.h"
+#include "elf.h"
 #include "string.h"
 #include "stdio.h"
-#include "prom.h"
+#include "page.h"
+#include "ops.h"
 
-int (*prom)(void *);
-phandle chosen_handle;
-ihandle stdout;
+typedef void *ihandle;
+typedef void *phandle;
 
-int call_prom(const char *service, int nargs, int nret, ...)
+extern char _end[];
+
+/* Value picked to match that used by yaboot */
+#define PROG_START	0x01400000	/* only used on 64-bit systems */
+#define RAM_END		(512<<20)	/* Fixme: use OF */
+#define	ONE_MB		0x100000
+
+int (*prom) (void *);
+
+
+static unsigned long claim_base;
+
+static int call_prom(const char *service, int nargs, int nret, ...)
 {
 	int i;
 	struct prom_args {
@@ -45,7 +59,7 @@ int call_prom(const char *service, int nargs, int nret, ...)
 	return (nret > 0)? args.args[nargs]: 0;
 }
 
-int call_prom_ret(const char *service, int nargs, int nret,
+static int call_prom_ret(const char *service, int nargs, int nret,
 		  unsigned int *rets, ...)
 {
 	int i;
@@ -79,11 +93,6 @@ int call_prom_ret(const char *service, int nargs, int nret,
 	return (nret > 0)? args.args[nargs]: 0;
 }
 
-int write(void *handle, void *ptr, int nb)
-{
-	return call_prom("write", 3, 1, handle, ptr, nb);
-}
-
 /*
  * Older OF's require that when claiming a specific range of addresses,
  * we claim the physical space in the /memory node and the virtual
@@ -142,7 +151,7 @@ static int check_of_version(void)
 	return 1;
 }
 
-void *claim(unsigned long virt, unsigned long size, unsigned long align)
+static void *claim(unsigned long virt, unsigned long size, unsigned long align)
 {
 	int ret;
 	unsigned int result;
@@ -151,7 +160,7 @@ void *claim(unsigned long virt, unsigned long size, unsigned long align)
 		need_map = check_of_version();
 	if (align || !need_map)
 		return (void *) call_prom("claim", 3, 1, virt, size, align);
-	
+
 	ret = call_prom_ret("call-method", 5, 2, &result, "claim", memory,
 			    align, size, virt);
 	if (ret != 0 || result == -1)
@@ -163,3 +172,112 @@ void *claim(unsigned long virt, unsigned long size, unsigned long align)
 			0x12, size, virt, virt);
 	return (void *) virt;
 }
+
+static void *of_try_claim(u32 size)
+{
+	unsigned long addr = 0;
+	static u8 first_time = 1;
+
+	if (first_time) {
+		claim_base = _ALIGN_UP((unsigned long)_end, ONE_MB);
+		first_time = 0;
+	}
+
+	for(; claim_base < RAM_END; claim_base += ONE_MB) {
+#ifdef DEBUG
+		printf("    trying: 0x%08lx\n\r", claim_base);
+#endif
+		addr = (unsigned long)claim(claim_base, size, 0);
+		if ((void *)addr != (void *)-1)
+			break;
+	}
+	if (addr == 0)
+		return NULL;
+	claim_base = PAGE_ALIGN(claim_base + size);
+	return (void *)addr;
+}
+
+static void of_image_hdr(const void *hdr)
+{
+	const Elf64_Ehdr *elf64 = hdr;
+
+	if (elf64->e_ident[EI_CLASS] == ELFCLASS64) {
+		/*
+		 * Maintain a "magic" minimum address. This keeps some older
+		 * firmware platforms running.
+		 */
+		if (claim_base < PROG_START)
+			claim_base = PROG_START;
+	}
+}
+
+static void of_exit(void)
+{
+	call_prom("exit", 0, 0);
+}
+
+/*
+ * OF device tree routines
+ */
+static void *of_finddevice(const char *name)
+{
+	return (phandle) call_prom("finddevice", 1, 1, name);
+}
+
+static int of_getprop(const void *phandle, const char *name, void *buf,
+		const int buflen)
+{
+	return call_prom("getprop", 4, 1, phandle, name, buf, buflen);
+}
+
+static int of_setprop(const void *phandle, const char *name, const void *buf,
+		const int buflen)
+{
+	return call_prom("setprop", 4, 1, phandle, name, buf, buflen);
+}
+
+/*
+ * OF console routines
+ */
+static void *of_stdout_handle;
+
+static int of_console_open(void)
+{
+	void *devp;
+
+	if (((devp = finddevice("/chosen")) != NULL)
+			&& (getprop(devp, "stdout", &of_stdout_handle,
+				sizeof(of_stdout_handle))
+				== sizeof(of_stdout_handle)))
+		return 0;
+
+	return -1;
+}
+
+static void of_console_write(char *buf, int len)
+{
+	call_prom("write", 3, 1, of_stdout_handle, buf, len);
+}
+
+int platform_init(void *promptr)
+{
+	platform_ops.fixups = NULL;
+	platform_ops.image_hdr = of_image_hdr;
+	platform_ops.malloc = of_try_claim;
+	platform_ops.free = NULL;
+	platform_ops.exit = of_exit;
+
+	dt_ops.finddevice = of_finddevice;
+	dt_ops.getprop = of_getprop;
+	dt_ops.setprop = of_setprop;
+	dt_ops.translate_addr = NULL;
+
+	console_ops.open = of_console_open;
+	console_ops.write = of_console_write;
+	console_ops.edit_cmdline = NULL;
+	console_ops.close = NULL;
+	console_ops.data = NULL;
+
+	prom = (int (*)(void *))promptr;
+	return 0;
+}
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
new file mode 100644
index 0000000000000..135eb4bb03b45
--- /dev/null
+++ b/arch/powerpc/boot/ops.h
@@ -0,0 +1,100 @@
+/*
+ * Global definition of all the bootwrapper operations.
+ *
+ * Author: Mark A. Greer <mgreer@mvista.com>
+ *
+ * 2006 (c) MontaVista Software, Inc.  This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifndef _PPC_BOOT_OPS_H_
+#define _PPC_BOOT_OPS_H_
+
+#include "types.h"
+
+#define	COMMAND_LINE_SIZE	512
+#define	MAX_PATH_LEN		256
+#define	MAX_PROP_LEN		256 /* What should this be? */
+
+/* Platform specific operations */
+struct platform_ops {
+	void	(*fixups)(void);
+	void	(*image_hdr)(const void *);
+	void *	(*malloc)(u32 size);
+	void	(*free)(void *ptr, u32 size);
+	void	(*exit)(void);
+};
+extern struct platform_ops platform_ops;
+
+/* Device Tree operations */
+struct dt_ops {
+	void *	(*finddevice)(const char *name);
+	int	(*getprop)(const void *node, const char *name, void *buf,
+			const int buflen);
+	int	(*setprop)(const void *node, const char *name,
+			const void *buf, const int buflen);
+	u64	(*translate_addr)(const char *path, const u32 *in_addr,
+			const u32 addr_len);
+	unsigned long (*ft_addr)(void);
+};
+extern struct dt_ops dt_ops;
+
+/* Console operations */
+struct console_ops {
+	int	(*open)(void);
+	void	(*write)(char *buf, int len);
+	void	(*edit_cmdline)(char *buf, int len);
+	void	(*close)(void);
+	void	*data;
+};
+extern struct console_ops console_ops;
+
+/* Serial console operations */
+struct serial_console_data {
+	int		(*open)(void);
+	void		(*putc)(unsigned char c);
+	unsigned char	(*getc)(void);
+	u8		(*tstc)(void);
+	void		(*close)(void);
+};
+
+extern int platform_init(void *promptr);
+extern void simple_alloc_init(void);
+extern void ft_init(void *dt_blob);
+extern int serial_console_init(void);
+
+static inline void *finddevice(const char *name)
+{
+	return (dt_ops.finddevice) ? dt_ops.finddevice(name) : NULL;
+}
+
+static inline int getprop(void *devp, const char *name, void *buf, int buflen)
+{
+	return (dt_ops.getprop) ? dt_ops.getprop(devp, name, buf, buflen) : -1;
+}
+
+static inline int setprop(void *devp, const char *name, void *buf, int buflen)
+{
+	return (dt_ops.setprop) ? dt_ops.setprop(devp, name, buf, buflen) : -1;
+}
+
+static inline void *malloc(u32 size)
+{
+	return (platform_ops.malloc) ? platform_ops.malloc(size) : NULL;
+}
+
+static inline void free(void *ptr, u32 size)
+{
+	if (platform_ops.free)
+		platform_ops.free(ptr, size);
+}
+
+static inline void exit(void)
+{
+	if (platform_ops.exit)
+		platform_ops.exit();
+	for(;;);
+}
+
+#endif /* _PPC_BOOT_OPS_H_ */
diff --git a/arch/powerpc/boot/prom.h b/arch/powerpc/boot/prom.h
deleted file mode 100644
index a57b184c564f1..0000000000000
--- a/arch/powerpc/boot/prom.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _PPC_BOOT_PROM_H_
-#define _PPC_BOOT_PROM_H_
-
-typedef void *phandle;
-typedef void *ihandle;
-
-extern int (*prom) (void *);
-extern phandle chosen_handle;
-extern ihandle stdout;
-
-int	call_prom(const char *service, int nargs, int nret, ...);
-int	call_prom_ret(const char *service, int nargs, int nret,
-		      unsigned int *rets, ...);
-
-extern int write(void *handle, void *ptr, int nb);
-extern void *claim(unsigned long virt, unsigned long size, unsigned long aln);
-
-static inline void exit(void)
-{
-	call_prom("exit", 0, 0);
-}
-
-static inline phandle finddevice(const char *name)
-{
-	return (phandle) call_prom("finddevice", 1, 1, name);
-}
-
-static inline int getprop(void *phandle, const char *name,
-			  void *buf, int buflen)
-{
-	return call_prom("getprop", 4, 1, phandle, name, buf, buflen);
-}
-
-
-static inline int setprop(void *phandle, const char *name,
-			  void *buf, int buflen)
-{
-	return call_prom("setprop", 4, 1, phandle, name, buf, buflen);
-}
-
-#endif				/* _PPC_BOOT_PROM_H_ */
diff --git a/arch/powerpc/boot/stdio.c b/arch/powerpc/boot/stdio.c
index b5aa522f8b777..6d5f6382e1ce2 100644
--- a/arch/powerpc/boot/stdio.c
+++ b/arch/powerpc/boot/stdio.c
@@ -10,7 +10,7 @@
 #include <stddef.h>
 #include "string.h"
 #include "stdio.h"
-#include "prom.h"
+#include "ops.h"
 
 size_t strnlen(const char * s, size_t count)
 {
@@ -320,6 +320,6 @@ printf(const char *fmt, ...)
 	va_start(args, fmt);
 	n = vsprintf(sprint_buf, fmt, args);
 	va_end(args);
-	write(stdout, sprint_buf, n);
+	console_ops.write(sprint_buf, n);
 	return n;
 }
diff --git a/arch/powerpc/boot/stdio.h b/arch/powerpc/boot/stdio.h
index eb9e16c87aef7..73b8a91bfb348 100644
--- a/arch/powerpc/boot/stdio.h
+++ b/arch/powerpc/boot/stdio.h
@@ -1,8 +1,16 @@
 #ifndef _PPC_BOOT_STDIO_H_
 #define _PPC_BOOT_STDIO_H_
 
+#include <stdarg.h>
+
+#define	ENOMEM		12	/* Out of Memory */
+#define	EINVAL		22	/* Invalid argument */
+#define ENOSPC		28	/* No space left on device */
+
 extern int printf(const char *fmt, ...);
 
+#define fprintf(fmt, args...)	printf(args)
+
 extern int sprintf(char *buf, const char *fmt, ...);
 
 extern int vsprintf(char *buf, const char *fmt, va_list args);
diff --git a/arch/powerpc/boot/types.h b/arch/powerpc/boot/types.h
new file mode 100644
index 0000000000000..79d26e7086770
--- /dev/null
+++ b/arch/powerpc/boot/types.h
@@ -0,0 +1,23 @@
+#ifndef _TYPES_H_
+#define _TYPES_H_
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef unsigned char		u8;
+typedef unsigned short		u16;
+typedef unsigned int		u32;
+typedef unsigned long long	u64;
+
+#define min(x,y) ({ \
+	typeof(x) _x = (x);	\
+	typeof(y) _y = (y);	\
+	(void) (&_x == &_y);	\
+	_x < _y ? _x : _y; })
+
+#define max(x,y) ({ \
+	typeof(x) _x = (x);	\
+	typeof(y) _y = (y);	\
+	(void) (&_x == &_y);	\
+	_x > _y ? _x : _y; })
+
+#endif /* _TYPES_H_ */
-- 
GitLab


From de1a3f1ce6c4c3b2b14cf9157a22d6b4c64f708e Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:20 +0200
Subject: [PATCH 0220/1063] [S390] EX_TABLE macro.

Add EX_TABLE helper macro to simplify creation of inline assembly
exception table entries.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/asm-s390/processor.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index 5b71d37317239..a3a4e5fd30d70 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -339,4 +339,21 @@ int unregister_idle_notifier(struct notifier_block *nb);
 
 #endif
 
+/*
+ * Helper macro for exception table entries
+ */
+#ifndef __s390x__
+#define EX_TABLE(_fault,_target)			\
+	".section __ex_table,\"a\"\n"			\
+	"	.align 4\n"				\
+	"	.long  " #_fault "," #_target "\n"	\
+	".previous\n"
+#else
+#define EX_TABLE(_fault,_target)			\
+	".section __ex_table,\"a\"\n"			\
+	"	.align 8\n"				\
+	"	.quad  " #_fault "," #_target "\n"	\
+	".previous\n"
+#endif
+
 #endif                                 /* __ASM_S390_PROCESSOR_H           */
-- 
GitLab


From 7561b974e0cbbdca1bb880b55200afd9a1a20737 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:22 +0200
Subject: [PATCH 0221/1063] [S390] remove old z90crypt driver.

The z90crypt driver has served its term. It is replaced by the shiny
new zcrypt device driver.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/Kconfig              |    9 -
 drivers/s390/crypto/Makefile      |    2 -
 drivers/s390/crypto/z90common.h   |  166 --
 drivers/s390/crypto/z90crypt.h    |   71 -
 drivers/s390/crypto/z90hardware.c | 2531 ---------------------
 drivers/s390/crypto/z90main.c     | 3379 -----------------------------
 include/asm-s390/z90crypt.h       |  212 --
 7 files changed, 6370 deletions(-)
 delete mode 100644 drivers/s390/crypto/z90common.h
 delete mode 100644 drivers/s390/crypto/z90crypt.h
 delete mode 100644 drivers/s390/crypto/z90hardware.c
 delete mode 100644 drivers/s390/crypto/z90main.c
 delete mode 100644 include/asm-s390/z90crypt.h

diff --git a/drivers/s390/Kconfig b/drivers/s390/Kconfig
index 4d36208ff8de3..f0ea550d39bcd 100644
--- a/drivers/s390/Kconfig
+++ b/drivers/s390/Kconfig
@@ -217,13 +217,4 @@ endmenu
 
 menu "Cryptographic devices"
 
-config Z90CRYPT
-	tristate "Support for PCI-attached cryptographic adapters"
-        default "m"
-        help
-	  Select this option if you want to use a PCI-attached cryptographic
-	  adapter like the PCI Cryptographic Accelerator (PCICA) or the PCI
-	  Cryptographic Coprocessor (PCICC).  This option is also available
-	  as a module called z90crypt.ko.
-
 endmenu
diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile
index 15edebbead7f9..67e75be8e4e4f 100644
--- a/drivers/s390/crypto/Makefile
+++ b/drivers/s390/crypto/Makefile
@@ -2,5 +2,3 @@
 # S/390 crypto devices
 #
 
-z90crypt-objs := z90main.o z90hardware.o
-obj-$(CONFIG_Z90CRYPT) += z90crypt.o
diff --git a/drivers/s390/crypto/z90common.h b/drivers/s390/crypto/z90common.h
deleted file mode 100644
index dbbcda3c846a3..0000000000000
--- a/drivers/s390/crypto/z90common.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- *  linux/drivers/s390/crypto/z90common.h
- *
- *  z90crypt 1.3.3
- *
- *  Copyright (C)  2001, 2005 IBM Corporation
- *  Author(s): Robert Burroughs (burrough@us.ibm.com)
- *             Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _Z90COMMON_H_
-#define _Z90COMMON_H_
-
-
-#define RESPBUFFSIZE 256
-#define PCI_FUNC_KEY_DECRYPT 0x5044
-#define PCI_FUNC_KEY_ENCRYPT 0x504B
-extern int ext_bitlens;
-
-enum devstat {
-	DEV_GONE,
-	DEV_ONLINE,
-	DEV_QUEUE_FULL,
-	DEV_EMPTY,
-	DEV_NO_WORK,
-	DEV_BAD_MESSAGE,
-	DEV_TSQ_EXCEPTION,
-	DEV_RSQ_EXCEPTION,
-	DEV_SEN_EXCEPTION,
-	DEV_REC_EXCEPTION
-};
-
-enum hdstat {
-	HD_NOT_THERE,
-	HD_BUSY,
-	HD_DECONFIGURED,
-	HD_CHECKSTOPPED,
-	HD_ONLINE,
-	HD_TSQ_EXCEPTION
-};
-
-#define Z90C_NO_DEVICES		1
-#define Z90C_AMBIGUOUS_DOMAIN	2
-#define Z90C_INCORRECT_DOMAIN	3
-#define ENOTINIT		4
-
-#define SEN_BUSY	 7
-#define SEN_USER_ERROR	 8
-#define SEN_QUEUE_FULL	11
-#define SEN_NOT_AVAIL	16
-#define SEN_PAD_ERROR	17
-#define SEN_RETRY	18
-#define SEN_RELEASED	24
-
-#define REC_EMPTY	 4
-#define REC_BUSY	 6
-#define REC_OPERAND_INV	 8
-#define REC_OPERAND_SIZE 9
-#define REC_EVEN_MOD	10
-#define REC_NO_WORK	11
-#define REC_HARDWAR_ERR	12
-#define REC_NO_RESPONSE	13
-#define REC_RETRY_DEV	14
-#define REC_USER_GONE	15
-#define REC_BAD_MESSAGE	16
-#define REC_INVALID_PAD	17
-#define REC_USE_PCICA	18
-
-#define WRONG_DEVICE_TYPE 20
-
-#define REC_FATAL_ERROR 32
-#define SEN_FATAL_ERROR 33
-#define TSQ_FATAL_ERROR 34
-#define RSQ_FATAL_ERROR 35
-
-#define Z90CRYPT_NUM_TYPES	6
-#define PCICA		0
-#define PCICC		1
-#define PCIXCC_MCL2	2
-#define PCIXCC_MCL3	3
-#define CEX2C		4
-#define CEX2A		5
-#define NILDEV		-1
-#define ANYDEV		-1
-#define PCIXCC_UNK	-2
-
-enum hdevice_type {
-	PCICC_HW  = 3,
-	PCICA_HW  = 4,
-	PCIXCC_HW = 5,
-	CEX2A_HW  = 6,
-	CEX2C_HW  = 7
-};
-
-struct CPRBX {
-	unsigned short cprb_len;
-	unsigned char  cprb_ver_id;
-	unsigned char  pad_000[3];
-	unsigned char  func_id[2];
-	unsigned char  cprb_flags[4];
-	unsigned int   req_parml;
-	unsigned int   req_datal;
-	unsigned int   rpl_msgbl;
-	unsigned int   rpld_parml;
-	unsigned int   rpl_datal;
-	unsigned int   rpld_datal;
-	unsigned int   req_extbl;
-	unsigned char  pad_001[4];
-	unsigned int   rpld_extbl;
-	unsigned char  req_parmb[16];
-	unsigned char  req_datab[16];
-	unsigned char  rpl_parmb[16];
-	unsigned char  rpl_datab[16];
-	unsigned char  req_extb[16];
-	unsigned char  rpl_extb[16];
-	unsigned short ccp_rtcode;
-	unsigned short ccp_rscode;
-	unsigned int   mac_data_len;
-	unsigned char  logon_id[8];
-	unsigned char  mac_value[8];
-	unsigned char  mac_content_flgs;
-	unsigned char  pad_002;
-	unsigned short domain;
-	unsigned char  pad_003[12];
-	unsigned char  pad_004[36];
-};
-
-#ifndef DEV_NAME
-#define DEV_NAME	"z90crypt"
-#endif
-#define PRINTK(fmt, args...) \
-	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
-#define PRINTKN(fmt, args...) \
-	printk(KERN_DEBUG DEV_NAME ": " fmt, ## args)
-#define PRINTKW(fmt, args...) \
-	printk(KERN_WARNING DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
-#define PRINTKC(fmt, args...) \
-	printk(KERN_CRIT DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
-
-#ifdef Z90CRYPT_DEBUG
-#define PDEBUG(fmt, args...) \
-	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
-#else
-#define PDEBUG(fmt, args...) do {} while (0)
-#endif
-
-#define UMIN(a,b) ((a) < (b) ? (a) : (b))
-#define IS_EVEN(x) ((x) == (2 * ((x) / 2)))
-
-#endif
diff --git a/drivers/s390/crypto/z90crypt.h b/drivers/s390/crypto/z90crypt.h
deleted file mode 100644
index 0ca1d126ccb66..0000000000000
--- a/drivers/s390/crypto/z90crypt.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- *  linux/drivers/s390/crypto/z90crypt.h
- *
- *  z90crypt 1.3.3 (kernel-private header)
- *
- *  Copyright (C)  2001, 2005 IBM Corporation
- *  Author(s): Robert Burroughs (burrough@us.ibm.com)
- *             Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _Z90CRYPT_H_
-#define _Z90CRYPT_H_
-
-#include <asm/z90crypt.h>
-
-/**
- * local errno definitions
- */
-#define ENOBUFF	  129	// filp->private_data->...>work_elem_p->buffer is NULL
-#define EWORKPEND 130	// user issues ioctl while another pending
-#define ERELEASED 131	// user released while ioctl pending
-#define EQUIESCE  132	// z90crypt quiescing (no more work allowed)
-#define ETIMEOUT  133	// request timed out
-#define EUNKNOWN  134	// some unrecognized error occured (retry may succeed)
-#define EGETBUFF  135	// Error getting buffer or hardware lacks capability
-			// (retry in software)
-
-/**
- * DEPRECATED STRUCTURES
- */
-
-/**
- * This structure is DEPRECATED and the corresponding ioctl() has been
- * replaced with individual ioctl()s for each piece of data!
- * This structure will NOT survive past version 1.3.1, so switch to the
- * new ioctl()s.
- */
-#define MASK_LENGTH 64 // mask length
-struct ica_z90_status {
-	int totalcount;
-	int leedslitecount; // PCICA
-	int leeds2count;    // PCICC
-	// int PCIXCCCount; is not in struct for backward compatibility
-	int requestqWaitCount;
-	int pendingqWaitCount;
-	int totalOpenCount;
-	int cryptoDomain;
-	// status: 0=not there, 1=PCICA, 2=PCICC, 3=PCIXCC_MCL2, 4=PCIXCC_MCL3,
-	//         5=CEX2C
-	unsigned char status[MASK_LENGTH];
-	// qdepth: # work elements waiting for each device
-	unsigned char qdepth[MASK_LENGTH];
-};
-
-#endif /* _Z90CRYPT_H_ */
diff --git a/drivers/s390/crypto/z90hardware.c b/drivers/s390/crypto/z90hardware.c
deleted file mode 100644
index be60795f4a743..0000000000000
--- a/drivers/s390/crypto/z90hardware.c
+++ /dev/null
@@ -1,2531 +0,0 @@
-/*
- *  linux/drivers/s390/crypto/z90hardware.c
- *
- *  z90crypt 1.3.3
- *
- *  Copyright (C)  2001, 2005 IBM Corporation
- *  Author(s): Robert Burroughs (burrough@us.ibm.com)
- *             Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <asm/uaccess.h>
-#include <linux/compiler.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include "z90crypt.h"
-#include "z90common.h"
-
-struct cca_token_hdr {
-	unsigned char  token_identifier;
-	unsigned char  version;
-	unsigned short token_length;
-	unsigned char  reserved[4];
-};
-
-#define CCA_TKN_HDR_ID_EXT 0x1E
-
-struct cca_private_ext_ME_sec {
-	unsigned char  section_identifier;
-	unsigned char  version;
-	unsigned short section_length;
-	unsigned char  private_key_hash[20];
-	unsigned char  reserved1[4];
-	unsigned char  key_format;
-	unsigned char  reserved2;
-	unsigned char  key_name_hash[20];
-	unsigned char  key_use_flags[4];
-	unsigned char  reserved3[6];
-	unsigned char  reserved4[24];
-	unsigned char  confounder[24];
-	unsigned char  exponent[128];
-	unsigned char  modulus[128];
-};
-
-#define CCA_PVT_USAGE_ALL 0x80
-
-struct cca_public_sec {
-	unsigned char  section_identifier;
-	unsigned char  version;
-	unsigned short section_length;
-	unsigned char  reserved[2];
-	unsigned short exponent_len;
-	unsigned short modulus_bit_len;
-	unsigned short modulus_byte_len;
-	unsigned char  exponent[3];
-};
-
-struct cca_private_ext_ME {
-	struct cca_token_hdr	      pvtMEHdr;
-	struct cca_private_ext_ME_sec pvtMESec;
-	struct cca_public_sec	      pubMESec;
-};
-
-struct cca_public_key {
-	struct cca_token_hdr  pubHdr;
-	struct cca_public_sec pubSec;
-};
-
-struct cca_pvt_ext_CRT_sec {
-	unsigned char  section_identifier;
-	unsigned char  version;
-	unsigned short section_length;
-	unsigned char  private_key_hash[20];
-	unsigned char  reserved1[4];
-	unsigned char  key_format;
-	unsigned char  reserved2;
-	unsigned char  key_name_hash[20];
-	unsigned char  key_use_flags[4];
-	unsigned short p_len;
-	unsigned short q_len;
-	unsigned short dp_len;
-	unsigned short dq_len;
-	unsigned short u_len;
-	unsigned short mod_len;
-	unsigned char  reserved3[4];
-	unsigned short pad_len;
-	unsigned char  reserved4[52];
-	unsigned char  confounder[8];
-};
-
-#define CCA_PVT_EXT_CRT_SEC_ID_PVT 0x08
-#define CCA_PVT_EXT_CRT_SEC_FMT_CL 0x40
-
-struct cca_private_ext_CRT {
-	struct cca_token_hdr	   pvtCrtHdr;
-	struct cca_pvt_ext_CRT_sec pvtCrtSec;
-	struct cca_public_sec	   pubCrtSec;
-};
-
-struct ap_status_word {
-	unsigned char q_stat_flags;
-	unsigned char response_code;
-	unsigned char reserved[2];
-};
-
-#define AP_Q_STATUS_EMPTY		0x80
-#define AP_Q_STATUS_REPLIES_WAITING	0x40
-#define AP_Q_STATUS_ARRAY_FULL		0x20
-
-#define AP_RESPONSE_NORMAL		0x00
-#define AP_RESPONSE_Q_NOT_AVAIL		0x01
-#define AP_RESPONSE_RESET_IN_PROGRESS	0x02
-#define AP_RESPONSE_DECONFIGURED	0x03
-#define AP_RESPONSE_CHECKSTOPPED	0x04
-#define AP_RESPONSE_BUSY		0x05
-#define AP_RESPONSE_Q_FULL		0x10
-#define AP_RESPONSE_NO_PENDING_REPLY	0x10
-#define AP_RESPONSE_INDEX_TOO_BIG	0x11
-#define AP_RESPONSE_NO_FIRST_PART	0x13
-#define AP_RESPONSE_MESSAGE_TOO_BIG	0x15
-
-#define AP_MAX_CDX_BITL		4
-#define AP_RQID_RESERVED_BITL	4
-#define SKIP_BITL		(AP_MAX_CDX_BITL + AP_RQID_RESERVED_BITL)
-
-struct type4_hdr {
-	unsigned char  reserved1;
-	unsigned char  msg_type_code;
-	unsigned short msg_len;
-	unsigned char  request_code;
-	unsigned char  msg_fmt;
-	unsigned short reserved2;
-};
-
-#define TYPE4_TYPE_CODE 0x04
-#define TYPE4_REQU_CODE 0x40
-
-#define TYPE4_SME_LEN 0x0188
-#define TYPE4_LME_LEN 0x0308
-#define TYPE4_SCR_LEN 0x01E0
-#define TYPE4_LCR_LEN 0x03A0
-
-#define TYPE4_SME_FMT 0x00
-#define TYPE4_LME_FMT 0x10
-#define TYPE4_SCR_FMT 0x40
-#define TYPE4_LCR_FMT 0x50
-
-struct type4_sme {
-	struct type4_hdr header;
-	unsigned char	 message[128];
-	unsigned char	 exponent[128];
-	unsigned char	 modulus[128];
-};
-
-struct type4_lme {
-	struct type4_hdr header;
-	unsigned char	 message[256];
-	unsigned char	 exponent[256];
-	unsigned char	 modulus[256];
-};
-
-struct type4_scr {
-	struct type4_hdr header;
-	unsigned char	 message[128];
-	unsigned char	 dp[72];
-	unsigned char	 dq[64];
-	unsigned char	 p[72];
-	unsigned char	 q[64];
-	unsigned char	 u[72];
-};
-
-struct type4_lcr {
-	struct type4_hdr header;
-	unsigned char	 message[256];
-	unsigned char	 dp[136];
-	unsigned char	 dq[128];
-	unsigned char	 p[136];
-	unsigned char	 q[128];
-	unsigned char	 u[136];
-};
-
-union type4_msg {
-	struct type4_sme sme;
-	struct type4_lme lme;
-	struct type4_scr scr;
-	struct type4_lcr lcr;
-};
-
-struct type84_hdr {
-	unsigned char  reserved1;
-	unsigned char  code;
-	unsigned short len;
-	unsigned char  reserved2[4];
-};
-
-#define TYPE84_RSP_CODE 0x84
-
-struct type6_hdr {
-	unsigned char reserved1;
-	unsigned char type;
-	unsigned char reserved2[2];
-	unsigned char right[4];
-	unsigned char reserved3[2];
-	unsigned char reserved4[2];
-	unsigned char apfs[4];
-	unsigned int  offset1;
-	unsigned int  offset2;
-	unsigned int  offset3;
-	unsigned int  offset4;
-	unsigned char agent_id[16];
-	unsigned char rqid[2];
-	unsigned char reserved5[2];
-	unsigned char function_code[2];
-	unsigned char reserved6[2];
-	unsigned int  ToCardLen1;
-	unsigned int  ToCardLen2;
-	unsigned int  ToCardLen3;
-	unsigned int  ToCardLen4;
-	unsigned int  FromCardLen1;
-	unsigned int  FromCardLen2;
-	unsigned int  FromCardLen3;
-	unsigned int  FromCardLen4;
-};
-
-struct CPRB {
-	unsigned char cprb_len[2];
-	unsigned char cprb_ver_id;
-	unsigned char pad_000;
-	unsigned char srpi_rtcode[4];
-	unsigned char srpi_verb;
-	unsigned char flags;
-	unsigned char func_id[2];
-	unsigned char checkpoint_flag;
-	unsigned char resv2;
-	unsigned char req_parml[2];
-	unsigned char req_parmp[4];
-	unsigned char req_datal[4];
-	unsigned char req_datap[4];
-	unsigned char rpl_parml[2];
-	unsigned char pad_001[2];
-	unsigned char rpl_parmp[4];
-	unsigned char rpl_datal[4];
-	unsigned char rpl_datap[4];
-	unsigned char ccp_rscode[2];
-	unsigned char ccp_rtcode[2];
-	unsigned char repd_parml[2];
-	unsigned char mac_data_len[2];
-	unsigned char repd_datal[4];
-	unsigned char req_pc[2];
-	unsigned char res_origin[8];
-	unsigned char mac_value[8];
-	unsigned char logon_id[8];
-	unsigned char usage_domain[2];
-	unsigned char resv3[18];
-	unsigned char svr_namel[2];
-	unsigned char svr_name[8];
-};
-
-struct type6_msg {
-	struct type6_hdr header;
-	struct CPRB	 CPRB;
-};
-
-struct type86_hdr {
-	unsigned char reserved1;
-	unsigned char type;
-	unsigned char format;
-	unsigned char reserved2;
-	unsigned char reply_code;
-	unsigned char reserved3[3];
-};
-
-#define TYPE86_RSP_CODE 0x86
-#define TYPE86_FMT2	0x02
-
-struct type86_fmt2_msg {
-	struct type86_hdr header;
-	unsigned char	  reserved[4];
-	unsigned char	  apfs[4];
-	unsigned int	  count1;
-	unsigned int	  offset1;
-	unsigned int	  count2;
-	unsigned int	  offset2;
-	unsigned int	  count3;
-	unsigned int	  offset3;
-	unsigned int	  count4;
-	unsigned int	  offset4;
-};
-
-static struct type6_hdr static_type6_hdr = {
-	0x00,
-	0x06,
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	0x00000058,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	{0x01,0x00,0x43,0x43,0x41,0x2D,0x41,0x50,
-	 0x50,0x4C,0x20,0x20,0x20,0x01,0x01,0x01},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x50,0x44},
-	{0x00,0x00},
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000
-};
-
-static struct type6_hdr static_type6_hdrX = {
-	0x00,
-	0x06,
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	0x00000058,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	{0x43,0x41,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x50,0x44},
-	{0x00,0x00},
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000
-};
-
-static struct CPRB static_cprb = {
-	{0x70,0x00},
-	0x41,
-	0x00,
-	{0x00,0x00,0x00,0x00},
-	0x00,
-	0x00,
-	{0x54,0x32},
-	0x01,
-	0x00,
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00},
-	{0x08,0x00},
-	{0x49,0x43,0x53,0x46,0x20,0x20,0x20,0x20}
-};
-
-struct function_and_rules_block {
-	unsigned char function_code[2];
-	unsigned char ulen[2];
-	unsigned char only_rule[8];
-};
-
-static struct function_and_rules_block static_pkd_function_and_rules = {
-	{0x50,0x44},
-	{0x0A,0x00},
-	{'P','K','C','S','-','1','.','2'}
-};
-
-static struct function_and_rules_block static_pke_function_and_rules = {
-	{0x50,0x4B},
-	{0x0A,0x00},
-	{'P','K','C','S','-','1','.','2'}
-};
-
-struct T6_keyBlock_hdr {
-	unsigned char blen[2];
-	unsigned char ulen[2];
-	unsigned char flags[2];
-};
-
-static struct T6_keyBlock_hdr static_T6_keyBlock_hdr = {
-	{0x89,0x01},
-	{0x87,0x01},
-	{0x00}
-};
-
-static struct CPRBX static_cprbx = {
-	0x00DC,
-	0x02,
-	{0x00,0x00,0x00},
-	{0x54,0x32},
-	{0x00,0x00,0x00,0x00},
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	{0x00,0x00,0x00,0x00},
-	0x00000000,
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	0x0000,
-	0x0000,
-	0x00000000,
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	0x00,
-	0x00,
-	0x0000,
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}
-};
-
-static struct function_and_rules_block static_pkd_function_and_rulesX_MCL2 = {
-	{0x50,0x44},
-	{0x00,0x0A},
-	{'P','K','C','S','-','1','.','2'}
-};
-
-static struct function_and_rules_block static_pke_function_and_rulesX_MCL2 = {
-	{0x50,0x4B},
-	{0x00,0x0A},
-	{'Z','E','R','O','-','P','A','D'}
-};
-
-static struct function_and_rules_block static_pkd_function_and_rulesX = {
-	{0x50,0x44},
-	{0x00,0x0A},
-	{'Z','E','R','O','-','P','A','D'}
-};
-
-static struct function_and_rules_block static_pke_function_and_rulesX = {
-	{0x50,0x4B},
-	{0x00,0x0A},
-	{'M','R','P',' ',' ',' ',' ',' '}
-};
-
-static unsigned char static_PKE_function_code[2] = {0x50, 0x4B};
-
-struct T6_keyBlock_hdrX {
-	unsigned short blen;
-	unsigned short ulen;
-	unsigned char flags[2];
-};
-
-static unsigned char static_pad[256] = {
-0x1B,0x7B,0x5D,0xB5,0x75,0x01,0x3D,0xFD,0x8D,0xD1,0xC7,0x03,0x2D,0x09,0x23,0x57,
-0x89,0x49,0xB9,0x3F,0xBB,0x99,0x41,0x5B,0x75,0x21,0x7B,0x9D,0x3B,0x6B,0x51,0x39,
-0xBB,0x0D,0x35,0xB9,0x89,0x0F,0x93,0xA5,0x0B,0x47,0xF1,0xD3,0xBB,0xCB,0xF1,0x9D,
-0x23,0x73,0x71,0xFF,0xF3,0xF5,0x45,0xFB,0x61,0x29,0x23,0xFD,0xF1,0x29,0x3F,0x7F,
-0x17,0xB7,0x1B,0xA9,0x19,0xBD,0x57,0xA9,0xD7,0x95,0xA3,0xCB,0xED,0x1D,0xDB,0x45,
-0x7D,0x11,0xD1,0x51,0x1B,0xED,0x71,0xE9,0xB1,0xD1,0xAB,0xAB,0x21,0x2B,0x1B,0x9F,
-0x3B,0x9F,0xF7,0xF7,0xBD,0x63,0xEB,0xAD,0xDF,0xB3,0x6F,0x5B,0xDB,0x8D,0xA9,0x5D,
-0xE3,0x7D,0x77,0x49,0x47,0xF5,0xA7,0xFD,0xAB,0x2F,0x27,0x35,0x77,0xD3,0x49,0xC9,
-0x09,0xEB,0xB1,0xF9,0xBF,0x4B,0xCB,0x2B,0xEB,0xEB,0x05,0xFF,0x7D,0xC7,0x91,0x8B,
-0x09,0x83,0xB9,0xB9,0x69,0x33,0x39,0x6B,0x79,0x75,0x19,0xBF,0xBB,0x07,0x1D,0xBD,
-0x29,0xBF,0x39,0x95,0x93,0x1D,0x35,0xC7,0xC9,0x4D,0xE5,0x97,0x0B,0x43,0x9B,0xF1,
-0x16,0x93,0x03,0x1F,0xA5,0xFB,0xDB,0xF3,0x27,0x4F,0x27,0x61,0x05,0x1F,0xB9,0x23,
-0x2F,0xC3,0x81,0xA9,0x23,0x71,0x55,0x55,0xEB,0xED,0x41,0xE5,0xF3,0x11,0xF1,0x43,
-0x69,0x03,0xBD,0x0B,0x37,0x0F,0x51,0x8F,0x0B,0xB5,0x89,0x5B,0x67,0xA9,0xD9,0x4F,
-0x01,0xF9,0x21,0x77,0x37,0x73,0x79,0xC5,0x7F,0x51,0xC1,0xCF,0x97,0xA1,0x75,0xAD,
-0x35,0x9D,0xD3,0xD3,0xA7,0x9D,0x5D,0x41,0x6F,0x65,0x1B,0xCF,0xA9,0x87,0x91,0x09
-};
-
-static struct cca_private_ext_ME static_pvt_me_key = {
-	{
-		0x1E,
-		0x00,
-		0x0183,
-		{0x00,0x00,0x00,0x00}
-	},
-
-	{
-		0x02,
-		0x00,
-		0x016C,
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00},
-		0x00,
-		0x00,
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00},
-		{0x80,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}
-	},
-
-	{
-		0x04,
-		0x00,
-		0x000F,
-		{0x00,0x00},
-		0x0003,
-		0x0000,
-		0x0000,
-		{0x01,0x00,0x01}
-	}
-};
-
-static struct cca_public_key static_public_key = {
-	{
-		0x1E,
-		0x00,
-		0x0000,
-		{0x00,0x00,0x00,0x00}
-	},
-
-	{
-		0x04,
-		0x00,
-		0x0000,
-		{0x00,0x00},
-		0x0000,
-		0x0000,
-		0x0000,
-		{0x01,0x00,0x01}
-	}
-};
-
-#define FIXED_TYPE6_ME_LEN 0x0000025F
-
-#define FIXED_TYPE6_ME_EN_LEN 0x000000F0
-
-#define FIXED_TYPE6_ME_LENX 0x000002CB
-
-#define FIXED_TYPE6_ME_EN_LENX 0x0000015C
-
-static struct cca_public_sec static_cca_pub_sec = {
-	0x04,
-	0x00,
-	0x000f,
-	{0x00,0x00},
-	0x0003,
-	0x0000,
-	0x0000,
-	{0x01,0x00,0x01}
-};
-
-#define FIXED_TYPE6_CR_LEN 0x00000177
-
-#define FIXED_TYPE6_CR_LENX 0x000001E3
-
-#define MAX_RESPONSE_SIZE 0x00000710
-
-#define MAX_RESPONSEX_SIZE 0x0000077C
-
-#define RESPONSE_CPRB_SIZE  0x000006B8
-#define RESPONSE_CPRBX_SIZE 0x00000724
-
-struct type50_hdr {
-	u8    reserved1;
-	u8    msg_type_code;
-	u16   msg_len;
-	u8    reserved2;
-	u8    ignored;
-	u16   reserved3;
-};
-
-#define TYPE50_TYPE_CODE 0x50
-
-#define TYPE50_MEB1_LEN (sizeof(struct type50_meb1_msg))
-#define TYPE50_MEB2_LEN (sizeof(struct type50_meb2_msg))
-#define TYPE50_CRB1_LEN (sizeof(struct type50_crb1_msg))
-#define TYPE50_CRB2_LEN (sizeof(struct type50_crb2_msg))
-
-#define TYPE50_MEB1_FMT 0x0001
-#define TYPE50_MEB2_FMT 0x0002
-#define TYPE50_CRB1_FMT 0x0011
-#define TYPE50_CRB2_FMT 0x0012
-
-struct type50_meb1_msg {
-	struct type50_hdr	header;
-	u16			keyblock_type;
-	u8			reserved[6];
-	u8			exponent[128];
-	u8			modulus[128];
-	u8			message[128];
-};
-
-struct type50_meb2_msg {
-	struct type50_hdr	header;
-	u16			keyblock_type;
-	u8			reserved[6];
-	u8			exponent[256];
-	u8			modulus[256];
-	u8			message[256];
-};
-
-struct type50_crb1_msg {
-	struct type50_hdr	header;
-	u16			keyblock_type;
-	u8			reserved[6];
-	u8			p[64];
-	u8			q[64];
-	u8			dp[64];
-	u8			dq[64];
-	u8			u[64];
-	u8			message[128];
-};
-
-struct type50_crb2_msg {
-	struct type50_hdr	header;
-	u16			keyblock_type;
-	u8			reserved[6];
-	u8			p[128];
-	u8			q[128];
-	u8			dp[128];
-	u8			dq[128];
-	u8			u[128];
-	u8			message[256];
-};
-
-union type50_msg {
-	struct type50_meb1_msg meb1;
-	struct type50_meb2_msg meb2;
-	struct type50_crb1_msg crb1;
-	struct type50_crb2_msg crb2;
-};
-
-struct type80_hdr {
-	u8	reserved1;
-	u8	type;
-	u16	len;
-	u8	code;
-	u8	reserved2[3];
-	u8	reserved3[8];
-};
-
-#define TYPE80_RSP_CODE 0x80
-
-struct error_hdr {
-	unsigned char reserved1;
-	unsigned char type;
-	unsigned char reserved2[2];
-	unsigned char reply_code;
-	unsigned char reserved3[3];
-};
-
-#define TYPE82_RSP_CODE 0x82
-#define TYPE88_RSP_CODE 0x88
-
-#define REP82_ERROR_MACHINE_FAILURE  0x10
-#define REP82_ERROR_PREEMPT_FAILURE  0x12
-#define REP82_ERROR_CHECKPT_FAILURE  0x14
-#define REP82_ERROR_MESSAGE_TYPE     0x20
-#define REP82_ERROR_INVALID_COMM_CD  0x21
-#define REP82_ERROR_INVALID_MSG_LEN  0x23
-#define REP82_ERROR_RESERVD_FIELD    0x24
-#define REP82_ERROR_FORMAT_FIELD     0x29
-#define REP82_ERROR_INVALID_COMMAND  0x30
-#define REP82_ERROR_MALFORMED_MSG    0x40
-#define REP82_ERROR_RESERVED_FIELDO  0x50
-#define REP82_ERROR_WORD_ALIGNMENT   0x60
-#define REP82_ERROR_MESSAGE_LENGTH   0x80
-#define REP82_ERROR_OPERAND_INVALID  0x82
-#define REP82_ERROR_OPERAND_SIZE     0x84
-#define REP82_ERROR_EVEN_MOD_IN_OPND 0x85
-#define REP82_ERROR_RESERVED_FIELD   0x88
-#define REP82_ERROR_TRANSPORT_FAIL   0x90
-#define REP82_ERROR_PACKET_TRUNCATED 0xA0
-#define REP82_ERROR_ZERO_BUFFER_LEN  0xB0
-
-#define REP88_ERROR_MODULE_FAILURE   0x10
-#define REP88_ERROR_MODULE_TIMEOUT   0x11
-#define REP88_ERROR_MODULE_NOTINIT   0x13
-#define REP88_ERROR_MODULE_NOTAVAIL  0x14
-#define REP88_ERROR_MODULE_DISABLED  0x15
-#define REP88_ERROR_MODULE_IN_DIAGN  0x17
-#define REP88_ERROR_FASTPATH_DISABLD 0x19
-#define REP88_ERROR_MESSAGE_TYPE     0x20
-#define REP88_ERROR_MESSAGE_MALFORMD 0x22
-#define REP88_ERROR_MESSAGE_LENGTH   0x23
-#define REP88_ERROR_RESERVED_FIELD   0x24
-#define REP88_ERROR_KEY_TYPE         0x34
-#define REP88_ERROR_INVALID_KEY      0x82
-#define REP88_ERROR_OPERAND          0x84
-#define REP88_ERROR_OPERAND_EVEN_MOD 0x85
-
-#define CALLER_HEADER 12
-
-static inline int
-testq(int q_nr, int *q_depth, int *dev_type, struct ap_status_word *stat)
-{
-	int ccode;
-
-	asm volatile
-#ifdef CONFIG_64BIT
-	("	llgfr	0,%4		\n"
-	 "	slgr	1,1		\n"
-	 "	lgr	2,1		\n"
-	 "0:	.long	0xb2af0000	\n"
-	 "1:	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	iihh	%0,0		\n"
-	 "	iihl	%0,0		\n"
-	 "	lgr	%1,1		\n"
-	 "	lgr	%3,2		\n"
-	 "	srl	%3,24		\n"
-	 "	sll	2,24		\n"
-	 "	srl	2,24		\n"
-	 "	lgr	%2,2		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h5		\n"
-	 "	jg	2b		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	8		\n"
-	 "	.quad	0b,3b		\n"
-	 "	.quad	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat),"=d" (*q_depth), "=d" (*dev_type)
-	 :"d" (q_nr), "K" (DEV_TSQ_EXCEPTION)
-	 :"cc","0","1","2","memory");
-#else
-	("	lr	0,%4		\n"
-	 "	slr	1,1		\n"
-	 "	lr	2,1		\n"
-	 "0:	.long	0xb2af0000	\n"
-	 "1:	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	lr	%1,1		\n"
-	 "	lr	%3,2		\n"
-	 "	srl	%3,24		\n"
-	 "	sll	2,24		\n"
-	 "	srl	2,24		\n"
-	 "	lr	%2,2		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h5		\n"
-	 "	bras	1,4f		\n"
-	 "	.long	2b		\n"
-	 "4:				\n"
-	 "	l	1,0(1)		\n"
-	 "	br	1		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	4		\n"
-	 "	.long	0b,3b		\n"
-	 "	.long	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat),"=d" (*q_depth), "=d" (*dev_type)
-	 :"d" (q_nr), "K" (DEV_TSQ_EXCEPTION)
-	 :"cc","0","1","2","memory");
-#endif
-	return ccode;
-}
-
-static inline int
-resetq(int q_nr, struct ap_status_word *stat_p)
-{
-	int ccode;
-
-	asm volatile
-#ifdef CONFIG_64BIT
-	("	llgfr	0,%2		\n"
-	 "	lghi	1,1		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	slgr	1,1		\n"
-	 "	lgr	2,1		\n"
-	 "0:	.long	0xb2af0000	\n"
-	 "1:	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	iihh	%0,0		\n"
-	 "	iihl	%0,0		\n"
-	 "	lgr	%1,1		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h3		\n"
-	 "	jg	2b		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	8		\n"
-	 "	.quad	0b,3b		\n"
-	 "	.quad	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat_p)
-	 :"d" (q_nr), "K" (DEV_RSQ_EXCEPTION)
-	 :"cc","0","1","2","memory");
-#else
-	("	lr	0,%2		\n"
-	 "	lhi	1,1		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	slr	1,1		\n"
-	 "	lr	2,1		\n"
-	 "0:	.long	0xb2af0000	\n"
-	 "1:	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	lr	%1,1		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h3		\n"
-	 "	bras	1,4f		\n"
-	 "	.long	2b		\n"
-	 "4:				\n"
-	 "	l	1,0(1)		\n"
-	 "	br	1		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	4		\n"
-	 "	.long	0b,3b		\n"
-	 "	.long	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat_p)
-	 :"d" (q_nr), "K" (DEV_RSQ_EXCEPTION)
-	 :"cc","0","1","2","memory");
-#endif
-	return ccode;
-}
-
-static inline int
-sen(int msg_len, unsigned char *msg_ext, struct ap_status_word *stat)
-{
-	int ccode;
-
-	asm volatile
-#ifdef CONFIG_64BIT
-	("	lgr	6,%3		\n"
-	 "	llgfr	7,%2		\n"
-	 "	llgt	0,0(6)		\n"
-	 "	lghi	1,64		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	la	6,4(6)		\n"
-	 "	llgt	2,0(6)		\n"
-	 "	llgt	3,4(6)		\n"
-	 "	la	6,8(6)		\n"
-	 "	slr	1,1		\n"
-	 "0:	.long	0xb2ad0026	\n"
-	 "1:	brc	2,0b		\n"
-	 "	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	iihh	%0,0		\n"
-	 "	iihl	%0,0		\n"
-	 "	lgr	%1,1		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h4		\n"
-	 "	jg	2b		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	8		\n"
-	 "	.quad	0b,3b		\n"
-	 "	.quad	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat)
-	 :"d" (msg_len),"a" (msg_ext), "K" (DEV_SEN_EXCEPTION)
-	 :"cc","0","1","2","3","6","7","memory");
-#else
-	("	lr	6,%3		\n"
-	 "	lr	7,%2		\n"
-	 "	l	0,0(6)		\n"
-	 "	lhi	1,64		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	la	6,4(6)		\n"
-	 "	l	2,0(6)		\n"
-	 "	l	3,4(6)		\n"
-	 "	la	6,8(6)		\n"
-	 "	slr	1,1		\n"
-	 "0:	.long	0xb2ad0026	\n"
-	 "1:	brc	2,0b		\n"
-	 "	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	lr	%1,1		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h4		\n"
-	 "	bras	1,4f		\n"
-	 "	.long	2b		\n"
-	 "4:				\n"
-	 "	l	1,0(1)		\n"
-	 "	br	1		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	4		\n"
-	 "	.long	0b,3b		\n"
-	 "	.long	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat)
-	 :"d" (msg_len),"a" (msg_ext), "K" (DEV_SEN_EXCEPTION)
-	 :"cc","0","1","2","3","6","7","memory");
-#endif
-	return ccode;
-}
-
-static inline int
-rec(int q_nr, int buff_l, unsigned char *rsp, unsigned char *id,
-    struct ap_status_word *st)
-{
-	int ccode;
-
-	asm volatile
-#ifdef CONFIG_64BIT
-	("	llgfr	0,%2		\n"
-	 "	lgr	3,%4		\n"
-	 "	lgr	6,%3		\n"
-	 "	llgfr	7,%5		\n"
-	 "	lghi	1,128		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	slgr	1,1		\n"
-	 "	lgr	2,1		\n"
-	 "	lgr	4,1		\n"
-	 "	lgr	5,1		\n"
-	 "0:	.long	0xb2ae0046	\n"
-	 "1:	brc	2,0b		\n"
-	 "	brc	4,0b		\n"
-	 "	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	iihh	%0,0		\n"
-	 "	iihl	%0,0		\n"
-	 "	lgr	%1,1		\n"
-	 "	st	4,0(3)		\n"
-	 "	st	5,4(3)		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi   %0,%h6		\n"
-	 "	jg    2b		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "   .align	8		\n"
-	 "   .quad	0b,3b		\n"
-	 "   .quad	1b,3b		\n"
-	 ".previous"
-	 :"=d"(ccode),"=d"(*st)
-	 :"d" (q_nr), "d" (rsp), "d" (id), "d" (buff_l), "K" (DEV_REC_EXCEPTION)
-	 :"cc","0","1","2","3","4","5","6","7","memory");
-#else
-	("	lr	0,%2		\n"
-	 "	lr	3,%4		\n"
-	 "	lr	6,%3		\n"
-	 "	lr	7,%5		\n"
-	 "	lhi	1,128		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	slr	1,1		\n"
-	 "	lr	2,1		\n"
-	 "	lr	4,1		\n"
-	 "	lr	5,1		\n"
-	 "0:	.long	0xb2ae0046	\n"
-	 "1:	brc	2,0b		\n"
-	 "	brc	4,0b		\n"
-	 "	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	lr	%1,1		\n"
-	 "	st	4,0(3)		\n"
-	 "	st	5,4(3)		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi   %0,%h6		\n"
-	 "	bras  1,4f		\n"
-	 "	.long 2b		\n"
-	 "4:				\n"
-	 "	l     1,0(1)		\n"
-	 "	br    1			\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "   .align	4		\n"
-	 "   .long	0b,3b		\n"
-	 "   .long	1b,3b		\n"
-	 ".previous"
-	 :"=d"(ccode),"=d"(*st)
-	 :"d" (q_nr), "d" (rsp), "d" (id), "d" (buff_l), "K" (DEV_REC_EXCEPTION)
-	 :"cc","0","1","2","3","4","5","6","7","memory");
-#endif
-	return ccode;
-}
-
-static inline void
-itoLe2(int *i_p, unsigned char *lechars)
-{
-	*lechars       = *((unsigned char *) i_p + sizeof(int) - 1);
-	*(lechars + 1) = *((unsigned char *) i_p + sizeof(int) - 2);
-}
-
-static inline void
-le2toI(unsigned char *lechars, int *i_p)
-{
-	unsigned char *ic_p;
-	*i_p = 0;
-	ic_p = (unsigned char *) i_p;
-	*(ic_p + 2) = *(lechars + 1);
-	*(ic_p + 3) = *(lechars);
-}
-
-static inline int
-is_empty(unsigned char *ptr, int len)
-{
-	return !memcmp(ptr, (unsigned char *) &static_pvt_me_key+60, len);
-}
-
-enum hdstat
-query_online(int deviceNr, int cdx, int resetNr, int *q_depth, int *dev_type)
-{
-	int q_nr, i, t_depth, t_dev_type;
-	enum devstat ccode;
-	struct ap_status_word stat_word;
-	enum hdstat stat;
-	int break_out;
-
-	q_nr = (deviceNr << SKIP_BITL) + cdx;
-	stat = HD_BUSY;
-	ccode = testq(q_nr, &t_depth, &t_dev_type, &stat_word);
-	PDEBUG("ccode %d response_code %02X\n", ccode, stat_word.response_code);
-	break_out = 0;
-	for (i = 0; i < resetNr; i++) {
-		if (ccode > 3) {
-			PRINTKC("Exception testing device %d\n", i);
-			return HD_TSQ_EXCEPTION;
-		}
-		switch (ccode) {
-		case 0:
-			PDEBUG("t_dev_type %d\n", t_dev_type);
-			break_out = 1;
-			stat = HD_ONLINE;
-			*q_depth = t_depth + 1;
-			switch (t_dev_type) {
-			case PCICA_HW:
-				*dev_type = PCICA;
-				break;
-			case PCICC_HW:
-				*dev_type = PCICC;
-				break;
-			case PCIXCC_HW:
-				*dev_type = PCIXCC_UNK;
-				break;
-			case CEX2C_HW:
-				*dev_type = CEX2C;
-				break;
-			case CEX2A_HW:
-				*dev_type = CEX2A;
-				break;
-			default:
-				*dev_type = NILDEV;
-				break;
-			}
-			PDEBUG("available device %d: Q depth = %d, dev "
-			       "type = %d, stat = %02X%02X%02X%02X\n",
-			       deviceNr, *q_depth, *dev_type,
-			       stat_word.q_stat_flags,
-			       stat_word.response_code,
-			       stat_word.reserved[0],
-			       stat_word.reserved[1]);
-			break;
-		case 3:
-			switch (stat_word.response_code) {
-			case AP_RESPONSE_NORMAL:
-				stat = HD_ONLINE;
-				break_out = 1;
-				*q_depth = t_depth + 1;
-				*dev_type = t_dev_type;
-				PDEBUG("cc3, available device "
-				       "%d: Q depth = %d, dev "
-				       "type = %d, stat = "
-				       "%02X%02X%02X%02X\n",
-				       deviceNr, *q_depth,
-				       *dev_type,
-				       stat_word.q_stat_flags,
-				       stat_word.response_code,
-				       stat_word.reserved[0],
-				       stat_word.reserved[1]);
-				break;
-			case AP_RESPONSE_Q_NOT_AVAIL:
-				stat = HD_NOT_THERE;
-				break_out = 1;
-				break;
-			case AP_RESPONSE_RESET_IN_PROGRESS:
-				PDEBUG("device %d in reset\n",
-				       deviceNr);
-				break;
-			case AP_RESPONSE_DECONFIGURED:
-				stat = HD_DECONFIGURED;
-				break_out = 1;
-				break;
-			case AP_RESPONSE_CHECKSTOPPED:
-				stat = HD_CHECKSTOPPED;
-				break_out = 1;
-				break;
-			case AP_RESPONSE_BUSY:
-				PDEBUG("device %d busy\n",
-				       deviceNr);
-				break;
-			default:
-				break;
-			}
-			break;
-		default:
-			stat = HD_NOT_THERE;
-			break_out = 1;
-			break;
-		}
-		if (break_out)
-			break;
-
-		udelay(5);
-
-		ccode = testq(q_nr, &t_depth, &t_dev_type, &stat_word);
-	}
-	return stat;
-}
-
-enum devstat
-reset_device(int deviceNr, int cdx, int resetNr)
-{
-	int q_nr, ccode = 0, dummy_qdepth, dummy_devType, i;
-	struct ap_status_word stat_word;
-	enum devstat stat;
-	int break_out;
-
-	q_nr = (deviceNr << SKIP_BITL) + cdx;
-	stat = DEV_GONE;
-	ccode = resetq(q_nr, &stat_word);
-	if (ccode > 3)
-		return DEV_RSQ_EXCEPTION;
-
-	break_out = 0;
-	for (i = 0; i < resetNr; i++) {
-		switch (ccode) {
-		case 0:
-			stat = DEV_ONLINE;
-			if (stat_word.q_stat_flags & AP_Q_STATUS_EMPTY)
-				break_out = 1;
-			break;
-		case 3:
-			switch (stat_word.response_code) {
-			case AP_RESPONSE_NORMAL:
-				stat = DEV_ONLINE;
-				if (stat_word.q_stat_flags & AP_Q_STATUS_EMPTY)
-					break_out = 1;
-				break;
-			case AP_RESPONSE_Q_NOT_AVAIL:
-			case AP_RESPONSE_DECONFIGURED:
-			case AP_RESPONSE_CHECKSTOPPED:
-				stat = DEV_GONE;
-				break_out = 1;
-				break;
-			case AP_RESPONSE_RESET_IN_PROGRESS:
-			case AP_RESPONSE_BUSY:
-			default:
-				break;
-			}
-			break;
-		default:
-			stat = DEV_GONE;
-			break_out = 1;
-			break;
-		}
-		if (break_out == 1)
-			break;
-		udelay(5);
-
-		ccode = testq(q_nr, &dummy_qdepth, &dummy_devType, &stat_word);
-		if (ccode > 3) {
-			stat = DEV_TSQ_EXCEPTION;
-			break;
-		}
-	}
-	PDEBUG("Number of testq's needed for reset: %d\n", i);
-
-	if (i >= resetNr) {
-	  stat = DEV_GONE;
-	}
-
-	return stat;
-}
-
-#ifdef DEBUG_HYDRA_MSGS
-static inline void
-print_buffer(unsigned char *buffer, int bufflen)
-{
-	int i;
-	for (i = 0; i < bufflen; i += 16) {
-		PRINTK("%04X: %02X%02X%02X%02X %02X%02X%02X%02X "
-		       "%02X%02X%02X%02X %02X%02X%02X%02X\n", i,
-		       buffer[i+0], buffer[i+1], buffer[i+2], buffer[i+3],
-		       buffer[i+4], buffer[i+5], buffer[i+6], buffer[i+7],
-		       buffer[i+8], buffer[i+9], buffer[i+10], buffer[i+11],
-		       buffer[i+12], buffer[i+13], buffer[i+14], buffer[i+15]);
-	}
-}
-#endif
-
-enum devstat
-send_to_AP(int dev_nr, int cdx, int msg_len, unsigned char *msg_ext)
-{
-	struct ap_status_word stat_word;
-	enum devstat stat;
-	int ccode;
-	u32 *q_nr_p = (u32 *)msg_ext;
-
-	*q_nr_p = (dev_nr << SKIP_BITL) + cdx;
-	PDEBUG("msg_len passed to sen: %d\n", msg_len);
-	PDEBUG("q number passed to sen: %02x%02x%02x%02x\n",
-	       msg_ext[0], msg_ext[1], msg_ext[2], msg_ext[3]);
-	stat = DEV_GONE;
-
-#ifdef DEBUG_HYDRA_MSGS
-	PRINTK("Request header: %02X%02X%02X%02X %02X%02X%02X%02X "
-	       "%02X%02X%02X%02X\n",
-	       msg_ext[0], msg_ext[1], msg_ext[2], msg_ext[3],
-	       msg_ext[4], msg_ext[5], msg_ext[6], msg_ext[7],
-	       msg_ext[8], msg_ext[9], msg_ext[10], msg_ext[11]);
-	print_buffer(msg_ext+CALLER_HEADER, msg_len);
-#endif
-
-	ccode = sen(msg_len, msg_ext, &stat_word);
-	if (ccode > 3)
-		return DEV_SEN_EXCEPTION;
-
-	PDEBUG("nq cc: %u, st: %02x%02x%02x%02x\n",
-	       ccode, stat_word.q_stat_flags, stat_word.response_code,
-	       stat_word.reserved[0], stat_word.reserved[1]);
-	switch (ccode) {
-	case 0:
-		stat = DEV_ONLINE;
-		break;
-	case 1:
-		stat = DEV_GONE;
-		break;
-	case 3:
-		switch (stat_word.response_code) {
-		case AP_RESPONSE_NORMAL:
-			stat = DEV_ONLINE;
-			break;
-		case AP_RESPONSE_Q_FULL:
-			stat = DEV_QUEUE_FULL;
-			break;
-		default:
-			stat = DEV_GONE;
-			break;
-		}
-		break;
-	default:
-		stat = DEV_GONE;
-		break;
-	}
-
-	return stat;
-}
-
-enum devstat
-receive_from_AP(int dev_nr, int cdx, int resplen, unsigned char *resp,
-		unsigned char *psmid)
-{
-	int ccode;
-	struct ap_status_word stat_word;
-	enum devstat stat;
-
-	memset(resp, 0x00, 8);
-
-	ccode = rec((dev_nr << SKIP_BITL) + cdx, resplen, resp, psmid,
-		    &stat_word);
-	if (ccode > 3)
-		return DEV_REC_EXCEPTION;
-
-	PDEBUG("dq cc: %u, st: %02x%02x%02x%02x\n",
-	       ccode, stat_word.q_stat_flags, stat_word.response_code,
-	       stat_word.reserved[0], stat_word.reserved[1]);
-
-	stat = DEV_GONE;
-	switch (ccode) {
-	case 0:
-		stat = DEV_ONLINE;
-#ifdef DEBUG_HYDRA_MSGS
-		print_buffer(resp, resplen);
-#endif
-		break;
-	case 3:
-		switch (stat_word.response_code) {
-		case AP_RESPONSE_NORMAL:
-			stat = DEV_ONLINE;
-			break;
-		case AP_RESPONSE_NO_PENDING_REPLY:
-			if (stat_word.q_stat_flags & AP_Q_STATUS_EMPTY)
-				stat = DEV_EMPTY;
-			else
-				stat = DEV_NO_WORK;
-			break;
-		case AP_RESPONSE_INDEX_TOO_BIG:
-		case AP_RESPONSE_NO_FIRST_PART:
-		case AP_RESPONSE_MESSAGE_TOO_BIG:
-			stat = DEV_BAD_MESSAGE;
-			break;
-		default:
-			break;
-		}
-		break;
-	default:
-		break;
-	}
-
-	return stat;
-}
-
-static inline int
-pad_msg(unsigned char *buffer, int  totalLength, int msgLength)
-{
-	int pad_len;
-
-	for (pad_len = 0; pad_len < (totalLength - msgLength); pad_len++)
-		if (buffer[pad_len] != 0x00)
-			break;
-	pad_len -= 3;
-	if (pad_len < 8)
-		return SEN_PAD_ERROR;
-
-	buffer[0] = 0x00;
-	buffer[1] = 0x02;
-
-	memcpy(buffer+2, static_pad, pad_len);
-
-	buffer[pad_len + 2] = 0x00;
-
-	return 0;
-}
-
-static inline int
-is_common_public_key(unsigned char *key, int len)
-{
-	int i;
-
-	for (i = 0; i < len; i++)
-		if (key[i])
-			break;
-	key += i;
-	len -= i;
-	if (((len == 1) && (key[0] == 3)) ||
-	    ((len == 3) && (key[0] == 1) && (key[1] == 0) && (key[2] == 1)))
-		return 1;
-
-	return 0;
-}
-
-static int
-ICAMEX_msg_to_type4MEX_msg(struct ica_rsa_modexpo *icaMex_p, int *z90cMsg_l_p,
-			   union type4_msg *z90cMsg_p)
-{
-	int mod_len, msg_size, mod_tgt_len, exp_tgt_len, inp_tgt_len;
-	unsigned char *mod_tgt, *exp_tgt, *inp_tgt;
-	union type4_msg *tmp_type4_msg;
-
-	mod_len = icaMex_p->inputdatalength;
-
-	msg_size = ((mod_len <= 128) ? TYPE4_SME_LEN : TYPE4_LME_LEN) +
-		    CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, msg_size);
-
-	tmp_type4_msg = (union type4_msg *)
-		((unsigned char *) z90cMsg_p + CALLER_HEADER);
-
-	tmp_type4_msg->sme.header.msg_type_code = TYPE4_TYPE_CODE;
-	tmp_type4_msg->sme.header.request_code = TYPE4_REQU_CODE;
-
-	if (mod_len <= 128) {
-		tmp_type4_msg->sme.header.msg_fmt = TYPE4_SME_FMT;
-		tmp_type4_msg->sme.header.msg_len = TYPE4_SME_LEN;
-		mod_tgt = tmp_type4_msg->sme.modulus;
-		mod_tgt_len = sizeof(tmp_type4_msg->sme.modulus);
-		exp_tgt = tmp_type4_msg->sme.exponent;
-		exp_tgt_len = sizeof(tmp_type4_msg->sme.exponent);
-		inp_tgt = tmp_type4_msg->sme.message;
-		inp_tgt_len = sizeof(tmp_type4_msg->sme.message);
-	} else {
-		tmp_type4_msg->lme.header.msg_fmt = TYPE4_LME_FMT;
-		tmp_type4_msg->lme.header.msg_len = TYPE4_LME_LEN;
-		mod_tgt = tmp_type4_msg->lme.modulus;
-		mod_tgt_len = sizeof(tmp_type4_msg->lme.modulus);
-		exp_tgt = tmp_type4_msg->lme.exponent;
-		exp_tgt_len = sizeof(tmp_type4_msg->lme.exponent);
-		inp_tgt = tmp_type4_msg->lme.message;
-		inp_tgt_len = sizeof(tmp_type4_msg->lme.message);
-	}
-
-	mod_tgt += (mod_tgt_len - mod_len);
-	if (copy_from_user(mod_tgt, icaMex_p->n_modulus, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(mod_tgt, mod_len))
-		return SEN_USER_ERROR;
-	exp_tgt += (exp_tgt_len - mod_len);
-	if (copy_from_user(exp_tgt, icaMex_p->b_key, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(exp_tgt, mod_len))
-		return SEN_USER_ERROR;
-	inp_tgt += (inp_tgt_len - mod_len);
-	if (copy_from_user(inp_tgt, icaMex_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(inp_tgt, mod_len))
-		return SEN_USER_ERROR;
-
-	*z90cMsg_l_p = msg_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICACRT_msg_to_type4CRT_msg(struct ica_rsa_modexpo_crt *icaMsg_p,
-			   int *z90cMsg_l_p, union type4_msg *z90cMsg_p)
-{
-	int mod_len, short_len, long_len, tmp_size, p_tgt_len, q_tgt_len,
-	    dp_tgt_len, dq_tgt_len, u_tgt_len, inp_tgt_len;
-	unsigned char *p_tgt, *q_tgt, *dp_tgt, *dq_tgt, *u_tgt, *inp_tgt;
-	union type4_msg *tmp_type4_msg;
-
-	mod_len = icaMsg_p->inputdatalength;
-	short_len = mod_len / 2;
-	long_len = mod_len / 2 + 8;
-
-	tmp_size = ((mod_len <= 128) ? TYPE4_SCR_LEN : TYPE4_LCR_LEN) +
-		    CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, tmp_size);
-
-	tmp_type4_msg = (union type4_msg *)
-		((unsigned char *) z90cMsg_p + CALLER_HEADER);
-
-	tmp_type4_msg->scr.header.msg_type_code = TYPE4_TYPE_CODE;
-	tmp_type4_msg->scr.header.request_code = TYPE4_REQU_CODE;
-	if (mod_len <= 128) {
-		tmp_type4_msg->scr.header.msg_fmt = TYPE4_SCR_FMT;
-		tmp_type4_msg->scr.header.msg_len = TYPE4_SCR_LEN;
-		p_tgt = tmp_type4_msg->scr.p;
-		p_tgt_len = sizeof(tmp_type4_msg->scr.p);
-		q_tgt = tmp_type4_msg->scr.q;
-		q_tgt_len = sizeof(tmp_type4_msg->scr.q);
-		dp_tgt = tmp_type4_msg->scr.dp;
-		dp_tgt_len = sizeof(tmp_type4_msg->scr.dp);
-		dq_tgt = tmp_type4_msg->scr.dq;
-		dq_tgt_len = sizeof(tmp_type4_msg->scr.dq);
-		u_tgt = tmp_type4_msg->scr.u;
-		u_tgt_len = sizeof(tmp_type4_msg->scr.u);
-		inp_tgt = tmp_type4_msg->scr.message;
-		inp_tgt_len = sizeof(tmp_type4_msg->scr.message);
-	} else {
-		tmp_type4_msg->lcr.header.msg_fmt = TYPE4_LCR_FMT;
-		tmp_type4_msg->lcr.header.msg_len = TYPE4_LCR_LEN;
-		p_tgt = tmp_type4_msg->lcr.p;
-		p_tgt_len = sizeof(tmp_type4_msg->lcr.p);
-		q_tgt = tmp_type4_msg->lcr.q;
-		q_tgt_len = sizeof(tmp_type4_msg->lcr.q);
-		dp_tgt = tmp_type4_msg->lcr.dp;
-		dp_tgt_len = sizeof(tmp_type4_msg->lcr.dp);
-		dq_tgt = tmp_type4_msg->lcr.dq;
-		dq_tgt_len = sizeof(tmp_type4_msg->lcr.dq);
-		u_tgt = tmp_type4_msg->lcr.u;
-		u_tgt_len = sizeof(tmp_type4_msg->lcr.u);
-		inp_tgt = tmp_type4_msg->lcr.message;
-		inp_tgt_len = sizeof(tmp_type4_msg->lcr.message);
-	}
-
-	p_tgt += (p_tgt_len - long_len);
-	if (copy_from_user(p_tgt, icaMsg_p->np_prime, long_len))
-		return SEN_RELEASED;
-	if (is_empty(p_tgt, long_len))
-		return SEN_USER_ERROR;
-	q_tgt += (q_tgt_len - short_len);
-	if (copy_from_user(q_tgt, icaMsg_p->nq_prime, short_len))
-		return SEN_RELEASED;
-	if (is_empty(q_tgt, short_len))
-		return SEN_USER_ERROR;
-	dp_tgt += (dp_tgt_len - long_len);
-	if (copy_from_user(dp_tgt, icaMsg_p->bp_key, long_len))
-		return SEN_RELEASED;
-	if (is_empty(dp_tgt, long_len))
-		return SEN_USER_ERROR;
-	dq_tgt += (dq_tgt_len - short_len);
-	if (copy_from_user(dq_tgt, icaMsg_p->bq_key, short_len))
-		return SEN_RELEASED;
-	if (is_empty(dq_tgt, short_len))
-		return SEN_USER_ERROR;
-	u_tgt += (u_tgt_len - long_len);
-	if (copy_from_user(u_tgt, icaMsg_p->u_mult_inv, long_len))
-		return SEN_RELEASED;
-	if (is_empty(u_tgt, long_len))
-		return SEN_USER_ERROR;
-	inp_tgt += (inp_tgt_len - mod_len);
-	if (copy_from_user(inp_tgt, icaMsg_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(inp_tgt, mod_len))
-		return SEN_USER_ERROR;
-
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICAMEX_msg_to_type6MEX_de_msg(struct ica_rsa_modexpo *icaMsg_p, int cdx,
-			      int *z90cMsg_l_p, struct type6_msg *z90cMsg_p)
-{
-	int mod_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l;
-	unsigned char *temp;
-	struct type6_hdr *tp6Hdr_p;
-	struct CPRB *cprb_p;
-	struct cca_private_ext_ME *key_p;
-	static int deprecated_msg_count = 0;
-
-	mod_len = icaMsg_p->inputdatalength;
-	tmp_size = FIXED_TYPE6_ME_LEN + mod_len;
-	total_CPRB_len = tmp_size - sizeof(struct type6_hdr);
-	parmBlock_l = total_CPRB_len - sizeof(struct CPRB);
-	tmp_size = 4*((tmp_size + 3)/4) + CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, tmp_size);
-
-	temp = (unsigned char *)z90cMsg_p + CALLER_HEADER;
-	memcpy(temp, &static_type6_hdr, sizeof(struct type6_hdr));
-	tp6Hdr_p = (struct type6_hdr *)temp;
-	tp6Hdr_p->ToCardLen1 = 4*((total_CPRB_len+3)/4);
-	tp6Hdr_p->FromCardLen1 = RESPONSE_CPRB_SIZE;
-
-	temp += sizeof(struct type6_hdr);
-	memcpy(temp, &static_cprb, sizeof(struct CPRB));
-	cprb_p = (struct CPRB *) temp;
-	cprb_p->usage_domain[0]= (unsigned char)cdx;
-	itoLe2(&parmBlock_l, cprb_p->req_parml);
-	itoLe2((int *)&(tp6Hdr_p->FromCardLen1), cprb_p->rpl_parml);
-
-	temp += sizeof(struct CPRB);
-	memcpy(temp, &static_pkd_function_and_rules,
-	       sizeof(struct function_and_rules_block));
-
-	temp += sizeof(struct function_and_rules_block);
-	vud_len = 2 + icaMsg_p->inputdatalength;
-	itoLe2(&vud_len, temp);
-
-	temp += 2;
-	if (copy_from_user(temp, icaMsg_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(temp, mod_len))
-		return SEN_USER_ERROR;
-
-	temp += mod_len;
-	memcpy(temp, &static_T6_keyBlock_hdr, sizeof(struct T6_keyBlock_hdr));
-
-	temp += sizeof(struct T6_keyBlock_hdr);
-	memcpy(temp, &static_pvt_me_key, sizeof(struct cca_private_ext_ME));
-	key_p = (struct cca_private_ext_ME *)temp;
-	temp = key_p->pvtMESec.exponent + sizeof(key_p->pvtMESec.exponent)
-	       - mod_len;
-	if (copy_from_user(temp, icaMsg_p->b_key, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(temp, mod_len))
-		return SEN_USER_ERROR;
-
-	if (is_common_public_key(temp, mod_len)) {
-		if (deprecated_msg_count < 20) {
-			PRINTK("Common public key used for modex decrypt\n");
-			deprecated_msg_count++;
-			if (deprecated_msg_count == 20)
-				PRINTK("No longer issuing messages about common"
-				       " public key for modex decrypt.\n");
-		}
-		return SEN_NOT_AVAIL;
-	}
-
-	temp = key_p->pvtMESec.modulus + sizeof(key_p->pvtMESec.modulus)
-	       - mod_len;
-	if (copy_from_user(temp, icaMsg_p->n_modulus, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(temp, mod_len))
-		return SEN_USER_ERROR;
-
-	key_p->pubMESec.modulus_bit_len = 8 * mod_len;
-
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICAMEX_msg_to_type6MEX_en_msg(struct ica_rsa_modexpo *icaMsg_p, int cdx,
-			      int *z90cMsg_l_p, struct type6_msg *z90cMsg_p)
-{
-	int mod_len, vud_len, exp_len, key_len;
-	int pad_len, tmp_size, total_CPRB_len, parmBlock_l, i;
-	unsigned char *temp_exp, *exp_p, *temp;
-	struct type6_hdr *tp6Hdr_p;
-	struct CPRB *cprb_p;
-	struct cca_public_key *key_p;
-	struct T6_keyBlock_hdr *keyb_p;
-
-	temp_exp = kmalloc(256, GFP_KERNEL);
-	if (!temp_exp)
-		return EGETBUFF;
-	mod_len = icaMsg_p->inputdatalength;
-	if (copy_from_user(temp_exp, icaMsg_p->b_key, mod_len)) {
-		kfree(temp_exp);
-		return SEN_RELEASED;
-	}
-	if (is_empty(temp_exp, mod_len)) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-
-	exp_p = temp_exp;
-	for (i = 0; i < mod_len; i++)
-		if (exp_p[i])
-			break;
-	if (i >= mod_len) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-
-	exp_len = mod_len - i;
-	exp_p += i;
-
-	PDEBUG("exp_len after computation: %08x\n", exp_len);
-	tmp_size = FIXED_TYPE6_ME_EN_LEN + 2 * mod_len + exp_len;
-	total_CPRB_len = tmp_size - sizeof(struct type6_hdr);
-	parmBlock_l = total_CPRB_len - sizeof(struct CPRB);
-	tmp_size = 4*((tmp_size + 3)/4) + CALLER_HEADER;
-
-	vud_len = 2 + mod_len;
-	memset(z90cMsg_p, 0, tmp_size);
-
-	temp = (unsigned char *)z90cMsg_p + CALLER_HEADER;
-	memcpy(temp, &static_type6_hdr, sizeof(struct type6_hdr));
-	tp6Hdr_p = (struct type6_hdr *)temp;
-	tp6Hdr_p->ToCardLen1 = 4*((total_CPRB_len+3)/4);
-	tp6Hdr_p->FromCardLen1 = RESPONSE_CPRB_SIZE;
-	memcpy(tp6Hdr_p->function_code, static_PKE_function_code,
-	       sizeof(static_PKE_function_code));
-	temp += sizeof(struct type6_hdr);
-	memcpy(temp, &static_cprb, sizeof(struct CPRB));
-	cprb_p = (struct CPRB *) temp;
-	cprb_p->usage_domain[0]= (unsigned char)cdx;
-	itoLe2((int *)&(tp6Hdr_p->FromCardLen1), cprb_p->rpl_parml);
-	temp += sizeof(struct CPRB);
-	memcpy(temp, &static_pke_function_and_rules,
-		 sizeof(struct function_and_rules_block));
-	temp += sizeof(struct function_and_rules_block);
-	temp += 2;
-	if (copy_from_user(temp, icaMsg_p->inputdata, mod_len)) {
-		kfree(temp_exp);
-		return SEN_RELEASED;
-	}
-	if (is_empty(temp, mod_len)) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-	if ((temp[0] != 0x00) || (temp[1] != 0x02)) {
-		kfree(temp_exp);
-		return SEN_NOT_AVAIL;
-	}
-	for (i = 2; i < mod_len; i++)
-		if (temp[i] == 0x00)
-			break;
-	if ((i < 9) || (i > (mod_len - 2))) {
-		kfree(temp_exp);
-		return SEN_NOT_AVAIL;
-	}
-	pad_len = i + 1;
-	vud_len = mod_len - pad_len;
-	memmove(temp, temp+pad_len, vud_len);
-	temp -= 2;
-	vud_len += 2;
-	itoLe2(&vud_len, temp);
-	temp += (vud_len);
-	keyb_p = (struct T6_keyBlock_hdr *)temp;
-	temp += sizeof(struct T6_keyBlock_hdr);
-	memcpy(temp, &static_public_key, sizeof(static_public_key));
-	key_p = (struct cca_public_key *)temp;
-	temp = key_p->pubSec.exponent;
-	memcpy(temp, exp_p, exp_len);
-	kfree(temp_exp);
-	temp += exp_len;
-	if (copy_from_user(temp, icaMsg_p->n_modulus, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(temp, mod_len))
-		return SEN_USER_ERROR;
-	key_p->pubSec.modulus_bit_len = 8 * mod_len;
-	key_p->pubSec.modulus_byte_len = mod_len;
-	key_p->pubSec.exponent_len = exp_len;
-	key_p->pubSec.section_length = CALLER_HEADER + mod_len + exp_len;
-	key_len = key_p->pubSec.section_length + sizeof(struct cca_token_hdr);
-	key_p->pubHdr.token_length = key_len;
-	key_len += 4;
-	itoLe2(&key_len, keyb_p->ulen);
-	key_len += 2;
-	itoLe2(&key_len, keyb_p->blen);
-	parmBlock_l -= pad_len;
-	itoLe2(&parmBlock_l, cprb_p->req_parml);
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICACRT_msg_to_type6CRT_msg(struct ica_rsa_modexpo_crt *icaMsg_p, int cdx,
-			   int *z90cMsg_l_p, struct type6_msg *z90cMsg_p)
-{
-	int mod_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l, short_len;
-	int long_len, pad_len, keyPartsLen, tmp_l;
-	unsigned char *tgt_p, *temp;
-	struct type6_hdr *tp6Hdr_p;
-	struct CPRB *cprb_p;
-	struct cca_token_hdr *keyHdr_p;
-	struct cca_pvt_ext_CRT_sec *pvtSec_p;
-	struct cca_public_sec *pubSec_p;
-
-	mod_len = icaMsg_p->inputdatalength;
-	short_len = mod_len / 2;
-	long_len = 8 + short_len;
-	keyPartsLen = 3 * long_len + 2 * short_len;
-	pad_len = (8 - (keyPartsLen % 8)) % 8;
-	keyPartsLen += pad_len + mod_len;
-	tmp_size = FIXED_TYPE6_CR_LEN + keyPartsLen + mod_len;
-	total_CPRB_len = tmp_size -  sizeof(struct type6_hdr);
-	parmBlock_l = total_CPRB_len - sizeof(struct CPRB);
-	vud_len = 2 + mod_len;
-	tmp_size = 4*((tmp_size + 3)/4) + CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, tmp_size);
-	tgt_p = (unsigned char *)z90cMsg_p + CALLER_HEADER;
-	memcpy(tgt_p, &static_type6_hdr, sizeof(struct type6_hdr));
-	tp6Hdr_p = (struct type6_hdr *)tgt_p;
-	tp6Hdr_p->ToCardLen1 = 4*((total_CPRB_len+3)/4);
-	tp6Hdr_p->FromCardLen1 = RESPONSE_CPRB_SIZE;
-	tgt_p += sizeof(struct type6_hdr);
-	cprb_p = (struct CPRB *) tgt_p;
-	memcpy(tgt_p, &static_cprb, sizeof(struct CPRB));
-	cprb_p->usage_domain[0]= *((unsigned char *)(&(cdx))+3);
-	itoLe2(&parmBlock_l, cprb_p->req_parml);
-	memcpy(cprb_p->rpl_parml, cprb_p->req_parml,
-	       sizeof(cprb_p->req_parml));
-	tgt_p += sizeof(struct CPRB);
-	memcpy(tgt_p, &static_pkd_function_and_rules,
-	       sizeof(struct function_and_rules_block));
-	tgt_p += sizeof(struct function_and_rules_block);
-	itoLe2(&vud_len, tgt_p);
-	tgt_p += 2;
-	if (copy_from_user(tgt_p, icaMsg_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, mod_len))
-		return SEN_USER_ERROR;
-	tgt_p += mod_len;
-	tmp_l = sizeof(struct T6_keyBlock_hdr) + sizeof(struct cca_token_hdr) +
-		sizeof(struct cca_pvt_ext_CRT_sec) + 0x0F + keyPartsLen;
-	itoLe2(&tmp_l, tgt_p);
-	temp = tgt_p + 2;
-	tmp_l -= 2;
-	itoLe2(&tmp_l, temp);
-	tgt_p += sizeof(struct T6_keyBlock_hdr);
-	keyHdr_p = (struct cca_token_hdr *)tgt_p;
-	keyHdr_p->token_identifier = CCA_TKN_HDR_ID_EXT;
-	tmp_l -= 4;
-	keyHdr_p->token_length = tmp_l;
-	tgt_p += sizeof(struct cca_token_hdr);
-	pvtSec_p = (struct cca_pvt_ext_CRT_sec *)tgt_p;
-	pvtSec_p->section_identifier = CCA_PVT_EXT_CRT_SEC_ID_PVT;
-	pvtSec_p->section_length =
-		sizeof(struct cca_pvt_ext_CRT_sec) + keyPartsLen;
-	pvtSec_p->key_format = CCA_PVT_EXT_CRT_SEC_FMT_CL;
-	pvtSec_p->key_use_flags[0] = CCA_PVT_USAGE_ALL;
-	pvtSec_p->p_len = long_len;
-	pvtSec_p->q_len = short_len;
-	pvtSec_p->dp_len = long_len;
-	pvtSec_p->dq_len = short_len;
-	pvtSec_p->u_len = long_len;
-	pvtSec_p->mod_len = mod_len;
-	pvtSec_p->pad_len = pad_len;
-	tgt_p += sizeof(struct cca_pvt_ext_CRT_sec);
-	if (copy_from_user(tgt_p, icaMsg_p->np_prime, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	if (copy_from_user(tgt_p, icaMsg_p->nq_prime, short_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, short_len))
-		return SEN_USER_ERROR;
-	tgt_p += short_len;
-	if (copy_from_user(tgt_p, icaMsg_p->bp_key, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	if (copy_from_user(tgt_p, icaMsg_p->bq_key, short_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, short_len))
-		return SEN_USER_ERROR;
-	tgt_p += short_len;
-	if (copy_from_user(tgt_p, icaMsg_p->u_mult_inv, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	tgt_p += pad_len;
-	memset(tgt_p, 0xFF, mod_len);
-	tgt_p += mod_len;
-	memcpy(tgt_p, &static_cca_pub_sec, sizeof(struct cca_public_sec));
-	pubSec_p = (struct cca_public_sec *) tgt_p;
-	pubSec_p->modulus_bit_len = 8 * mod_len;
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICAMEX_msg_to_type6MEX_msgX(struct ica_rsa_modexpo *icaMsg_p, int cdx,
-			    int *z90cMsg_l_p, struct type6_msg *z90cMsg_p,
-			    int dev_type)
-{
-	int mod_len, exp_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l;
-	int key_len, i;
-	unsigned char *temp_exp, *tgt_p, *temp, *exp_p;
-	struct type6_hdr *tp6Hdr_p;
-	struct CPRBX *cprbx_p;
-	struct cca_public_key *key_p;
-	struct T6_keyBlock_hdrX *keyb_p;
-
-	temp_exp = kmalloc(256, GFP_KERNEL);
-	if (!temp_exp)
-		return EGETBUFF;
-	mod_len = icaMsg_p->inputdatalength;
-	if (copy_from_user(temp_exp, icaMsg_p->b_key, mod_len)) {
-		kfree(temp_exp);
-		return SEN_RELEASED;
-	}
-	if (is_empty(temp_exp, mod_len)) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-	exp_p = temp_exp;
-	for (i = 0; i < mod_len; i++)
-		if (exp_p[i])
-			break;
-	if (i >= mod_len) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-	exp_len = mod_len - i;
-	exp_p += i;
-	PDEBUG("exp_len after computation: %08x\n", exp_len);
-	tmp_size = FIXED_TYPE6_ME_EN_LENX + 2 * mod_len + exp_len;
-	total_CPRB_len = tmp_size - sizeof(struct type6_hdr);
-	parmBlock_l = total_CPRB_len - sizeof(struct CPRBX);
-	tmp_size = tmp_size + CALLER_HEADER;
-	vud_len = 2 + mod_len;
-	memset(z90cMsg_p, 0, tmp_size);
-	tgt_p = (unsigned char *)z90cMsg_p + CALLER_HEADER;
-	memcpy(tgt_p, &static_type6_hdrX, sizeof(struct type6_hdr));
-	tp6Hdr_p = (struct type6_hdr *)tgt_p;
-	tp6Hdr_p->ToCardLen1 = total_CPRB_len;
-	tp6Hdr_p->FromCardLen1 = RESPONSE_CPRBX_SIZE;
-	memcpy(tp6Hdr_p->function_code, static_PKE_function_code,
-	       sizeof(static_PKE_function_code));
-	tgt_p += sizeof(struct type6_hdr);
-	memcpy(tgt_p, &static_cprbx, sizeof(struct CPRBX));
-	cprbx_p = (struct CPRBX *) tgt_p;
-	cprbx_p->domain = (unsigned short)cdx;
-	cprbx_p->rpl_msgbl = RESPONSE_CPRBX_SIZE;
-	tgt_p += sizeof(struct CPRBX);
-	if (dev_type == PCIXCC_MCL2)
-		memcpy(tgt_p, &static_pke_function_and_rulesX_MCL2,
-		       sizeof(struct function_and_rules_block));
-	else
-		memcpy(tgt_p, &static_pke_function_and_rulesX,
-		       sizeof(struct function_and_rules_block));
-	tgt_p += sizeof(struct function_and_rules_block);
-
-	tgt_p += 2;
-	if (copy_from_user(tgt_p, icaMsg_p->inputdata, mod_len)) {
-		kfree(temp_exp);
-		return SEN_RELEASED;
-	}
-	if (is_empty(tgt_p, mod_len)) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-	tgt_p -= 2;
-	*((short *)tgt_p) = (short) vud_len;
-	tgt_p += vud_len;
-	keyb_p = (struct T6_keyBlock_hdrX *)tgt_p;
-	tgt_p += sizeof(struct T6_keyBlock_hdrX);
-	memcpy(tgt_p, &static_public_key, sizeof(static_public_key));
-	key_p = (struct cca_public_key *)tgt_p;
-	temp = key_p->pubSec.exponent;
-	memcpy(temp, exp_p, exp_len);
-	kfree(temp_exp);
-	temp += exp_len;
-	if (copy_from_user(temp, icaMsg_p->n_modulus, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(temp, mod_len))
-		return SEN_USER_ERROR;
-	key_p->pubSec.modulus_bit_len = 8 * mod_len;
-	key_p->pubSec.modulus_byte_len = mod_len;
-	key_p->pubSec.exponent_len = exp_len;
-	key_p->pubSec.section_length = CALLER_HEADER + mod_len + exp_len;
-	key_len = key_p->pubSec.section_length + sizeof(struct cca_token_hdr);
-	key_p->pubHdr.token_length = key_len;
-	key_len += 4;
-	keyb_p->ulen = (unsigned short)key_len;
-	key_len += 2;
-	keyb_p->blen = (unsigned short)key_len;
-	cprbx_p->req_parml = parmBlock_l;
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICACRT_msg_to_type6CRT_msgX(struct ica_rsa_modexpo_crt *icaMsg_p, int cdx,
-			    int *z90cMsg_l_p, struct type6_msg *z90cMsg_p,
-			    int dev_type)
-{
-	int mod_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l, short_len;
-	int long_len, pad_len, keyPartsLen, tmp_l;
-	unsigned char *tgt_p, *temp;
-	struct type6_hdr *tp6Hdr_p;
-	struct CPRBX *cprbx_p;
-	struct cca_token_hdr *keyHdr_p;
-	struct cca_pvt_ext_CRT_sec *pvtSec_p;
-	struct cca_public_sec *pubSec_p;
-
-	mod_len = icaMsg_p->inputdatalength;
-	short_len = mod_len / 2;
-	long_len = 8 + short_len;
-	keyPartsLen = 3 * long_len + 2 * short_len;
-	pad_len = (8 - (keyPartsLen % 8)) % 8;
-	keyPartsLen += pad_len + mod_len;
-	tmp_size = FIXED_TYPE6_CR_LENX + keyPartsLen + mod_len;
-	total_CPRB_len = tmp_size -  sizeof(struct type6_hdr);
-	parmBlock_l = total_CPRB_len - sizeof(struct CPRBX);
-	vud_len = 2 + mod_len;
-	tmp_size = tmp_size + CALLER_HEADER;
-	memset(z90cMsg_p, 0, tmp_size);
-	tgt_p = (unsigned char *)z90cMsg_p + CALLER_HEADER;
-	memcpy(tgt_p, &static_type6_hdrX, sizeof(struct type6_hdr));
-	tp6Hdr_p = (struct type6_hdr *)tgt_p;
-	tp6Hdr_p->ToCardLen1 = total_CPRB_len;
-	tp6Hdr_p->FromCardLen1 = RESPONSE_CPRBX_SIZE;
-	tgt_p += sizeof(struct type6_hdr);
-	cprbx_p = (struct CPRBX *) tgt_p;
-	memcpy(tgt_p, &static_cprbx, sizeof(struct CPRBX));
-	cprbx_p->domain = (unsigned short)cdx;
-	cprbx_p->req_parml = parmBlock_l;
-	cprbx_p->rpl_msgbl = parmBlock_l;
-	tgt_p += sizeof(struct CPRBX);
-	if (dev_type == PCIXCC_MCL2)
-		memcpy(tgt_p, &static_pkd_function_and_rulesX_MCL2,
-		       sizeof(struct function_and_rules_block));
-	else
-		memcpy(tgt_p, &static_pkd_function_and_rulesX,
-		       sizeof(struct function_and_rules_block));
-	tgt_p += sizeof(struct function_and_rules_block);
-	*((short *)tgt_p) = (short) vud_len;
-	tgt_p += 2;
-	if (copy_from_user(tgt_p, icaMsg_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, mod_len))
-		return SEN_USER_ERROR;
-	tgt_p += mod_len;
-	tmp_l = sizeof(struct T6_keyBlock_hdr) + sizeof(struct cca_token_hdr) +
-		sizeof(struct cca_pvt_ext_CRT_sec) + 0x0F + keyPartsLen;
-	*((short *)tgt_p) = (short) tmp_l;
-	temp = tgt_p + 2;
-	tmp_l -= 2;
-	*((short *)temp) = (short) tmp_l;
-	tgt_p += sizeof(struct T6_keyBlock_hdr);
-	keyHdr_p = (struct cca_token_hdr *)tgt_p;
-	keyHdr_p->token_identifier = CCA_TKN_HDR_ID_EXT;
-	tmp_l -= 4;
-	keyHdr_p->token_length = tmp_l;
-	tgt_p += sizeof(struct cca_token_hdr);
-	pvtSec_p = (struct cca_pvt_ext_CRT_sec *)tgt_p;
-	pvtSec_p->section_identifier = CCA_PVT_EXT_CRT_SEC_ID_PVT;
-	pvtSec_p->section_length =
-		sizeof(struct cca_pvt_ext_CRT_sec) + keyPartsLen;
-	pvtSec_p->key_format = CCA_PVT_EXT_CRT_SEC_FMT_CL;
-	pvtSec_p->key_use_flags[0] = CCA_PVT_USAGE_ALL;
-	pvtSec_p->p_len = long_len;
-	pvtSec_p->q_len = short_len;
-	pvtSec_p->dp_len = long_len;
-	pvtSec_p->dq_len = short_len;
-	pvtSec_p->u_len = long_len;
-	pvtSec_p->mod_len = mod_len;
-	pvtSec_p->pad_len = pad_len;
-	tgt_p += sizeof(struct cca_pvt_ext_CRT_sec);
-	if (copy_from_user(tgt_p, icaMsg_p->np_prime, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	if (copy_from_user(tgt_p, icaMsg_p->nq_prime, short_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, short_len))
-		return SEN_USER_ERROR;
-	tgt_p += short_len;
-	if (copy_from_user(tgt_p, icaMsg_p->bp_key, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	if (copy_from_user(tgt_p, icaMsg_p->bq_key, short_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, short_len))
-		return SEN_USER_ERROR;
-	tgt_p += short_len;
-	if (copy_from_user(tgt_p, icaMsg_p->u_mult_inv, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	tgt_p += pad_len;
-	memset(tgt_p, 0xFF, mod_len);
-	tgt_p += mod_len;
-	memcpy(tgt_p, &static_cca_pub_sec, sizeof(struct cca_public_sec));
-	pubSec_p = (struct cca_public_sec *) tgt_p;
-	pubSec_p->modulus_bit_len = 8 * mod_len;
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICAMEX_msg_to_type50MEX_msg(struct ica_rsa_modexpo *icaMex_p, int *z90cMsg_l_p,
-			    union type50_msg *z90cMsg_p)
-{
-	int mod_len, msg_size, mod_tgt_len, exp_tgt_len, inp_tgt_len;
-	unsigned char *mod_tgt, *exp_tgt, *inp_tgt;
-	union type50_msg *tmp_type50_msg;
-
-	mod_len = icaMex_p->inputdatalength;
-
-	msg_size = ((mod_len <= 128) ? TYPE50_MEB1_LEN : TYPE50_MEB2_LEN) +
-		    CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, msg_size);
-
-	tmp_type50_msg = (union type50_msg *)
-		((unsigned char *) z90cMsg_p + CALLER_HEADER);
-
-	tmp_type50_msg->meb1.header.msg_type_code = TYPE50_TYPE_CODE;
-
-	if (mod_len <= 128) {
-		tmp_type50_msg->meb1.header.msg_len = TYPE50_MEB1_LEN;
-		tmp_type50_msg->meb1.keyblock_type = TYPE50_MEB1_FMT;
-		mod_tgt = tmp_type50_msg->meb1.modulus;
-		mod_tgt_len = sizeof(tmp_type50_msg->meb1.modulus);
-		exp_tgt = tmp_type50_msg->meb1.exponent;
-		exp_tgt_len = sizeof(tmp_type50_msg->meb1.exponent);
-		inp_tgt = tmp_type50_msg->meb1.message;
-		inp_tgt_len = sizeof(tmp_type50_msg->meb1.message);
-	} else {
-		tmp_type50_msg->meb2.header.msg_len = TYPE50_MEB2_LEN;
-		tmp_type50_msg->meb2.keyblock_type = TYPE50_MEB2_FMT;
-		mod_tgt = tmp_type50_msg->meb2.modulus;
-		mod_tgt_len = sizeof(tmp_type50_msg->meb2.modulus);
-		exp_tgt = tmp_type50_msg->meb2.exponent;
-		exp_tgt_len = sizeof(tmp_type50_msg->meb2.exponent);
-		inp_tgt = tmp_type50_msg->meb2.message;
-		inp_tgt_len = sizeof(tmp_type50_msg->meb2.message);
-	}
-
-	mod_tgt += (mod_tgt_len - mod_len);
-	if (copy_from_user(mod_tgt, icaMex_p->n_modulus, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(mod_tgt, mod_len))
-		return SEN_USER_ERROR;
-	exp_tgt += (exp_tgt_len - mod_len);
-	if (copy_from_user(exp_tgt, icaMex_p->b_key, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(exp_tgt, mod_len))
-		return SEN_USER_ERROR;
-	inp_tgt += (inp_tgt_len - mod_len);
-	if (copy_from_user(inp_tgt, icaMex_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(inp_tgt, mod_len))
-		return SEN_USER_ERROR;
-
-	*z90cMsg_l_p = msg_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICACRT_msg_to_type50CRT_msg(struct ica_rsa_modexpo_crt *icaMsg_p,
-			    int *z90cMsg_l_p, union type50_msg *z90cMsg_p)
-{
-	int mod_len, short_len, long_len, tmp_size, p_tgt_len, q_tgt_len,
-	    dp_tgt_len, dq_tgt_len, u_tgt_len, inp_tgt_len, long_offset;
-	unsigned char *p_tgt, *q_tgt, *dp_tgt, *dq_tgt, *u_tgt, *inp_tgt,
-		      temp[8];
-	union type50_msg *tmp_type50_msg;
-
-	mod_len = icaMsg_p->inputdatalength;
-	short_len = mod_len / 2;
-	long_len = mod_len / 2 + 8;
-	long_offset = 0;
-
-	if (long_len > 128) {
-		memset(temp, 0x00, sizeof(temp));
-		if (copy_from_user(temp, icaMsg_p->np_prime, long_len-128))
-			return SEN_RELEASED;
-		if (!is_empty(temp, 8))
-			return SEN_NOT_AVAIL;
-		if (copy_from_user(temp, icaMsg_p->bp_key, long_len-128))
-			return SEN_RELEASED;
-		if (!is_empty(temp, 8))
-			return SEN_NOT_AVAIL;
-		if (copy_from_user(temp, icaMsg_p->u_mult_inv, long_len-128))
-			return SEN_RELEASED;
-		if (!is_empty(temp, 8))
-			return SEN_NOT_AVAIL;
-		long_offset = long_len - 128;
-		long_len = 128;
-	}
-
-	tmp_size = ((long_len <= 64) ? TYPE50_CRB1_LEN : TYPE50_CRB2_LEN) +
-		    CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, tmp_size);
-
-	tmp_type50_msg = (union type50_msg *)
-		((unsigned char *) z90cMsg_p + CALLER_HEADER);
-
-	tmp_type50_msg->crb1.header.msg_type_code = TYPE50_TYPE_CODE;
-	if (long_len <= 64) {
-		tmp_type50_msg->crb1.header.msg_len = TYPE50_CRB1_LEN;
-		tmp_type50_msg->crb1.keyblock_type = TYPE50_CRB1_FMT;
-		p_tgt = tmp_type50_msg->crb1.p;
-		p_tgt_len = sizeof(tmp_type50_msg->crb1.p);
-		q_tgt = tmp_type50_msg->crb1.q;
-		q_tgt_len = sizeof(tmp_type50_msg->crb1.q);
-		dp_tgt = tmp_type50_msg->crb1.dp;
-		dp_tgt_len = sizeof(tmp_type50_msg->crb1.dp);
-		dq_tgt = tmp_type50_msg->crb1.dq;
-		dq_tgt_len = sizeof(tmp_type50_msg->crb1.dq);
-		u_tgt = tmp_type50_msg->crb1.u;
-		u_tgt_len = sizeof(tmp_type50_msg->crb1.u);
-		inp_tgt = tmp_type50_msg->crb1.message;
-		inp_tgt_len = sizeof(tmp_type50_msg->crb1.message);
-	} else {
-		tmp_type50_msg->crb2.header.msg_len = TYPE50_CRB2_LEN;
-		tmp_type50_msg->crb2.keyblock_type = TYPE50_CRB2_FMT;
-		p_tgt = tmp_type50_msg->crb2.p;
-		p_tgt_len = sizeof(tmp_type50_msg->crb2.p);
-		q_tgt = tmp_type50_msg->crb2.q;
-		q_tgt_len = sizeof(tmp_type50_msg->crb2.q);
-		dp_tgt = tmp_type50_msg->crb2.dp;
-		dp_tgt_len = sizeof(tmp_type50_msg->crb2.dp);
-		dq_tgt = tmp_type50_msg->crb2.dq;
-		dq_tgt_len = sizeof(tmp_type50_msg->crb2.dq);
-		u_tgt = tmp_type50_msg->crb2.u;
-		u_tgt_len = sizeof(tmp_type50_msg->crb2.u);
-		inp_tgt = tmp_type50_msg->crb2.message;
-		inp_tgt_len = sizeof(tmp_type50_msg->crb2.message);
-	}
-
-	p_tgt += (p_tgt_len - long_len);
-	if (copy_from_user(p_tgt, icaMsg_p->np_prime + long_offset, long_len))
-		return SEN_RELEASED;
-	if (is_empty(p_tgt, long_len))
-		return SEN_USER_ERROR;
-	q_tgt += (q_tgt_len - short_len);
-	if (copy_from_user(q_tgt, icaMsg_p->nq_prime, short_len))
-		return SEN_RELEASED;
-	if (is_empty(q_tgt, short_len))
-		return SEN_USER_ERROR;
-	dp_tgt += (dp_tgt_len - long_len);
-	if (copy_from_user(dp_tgt, icaMsg_p->bp_key + long_offset, long_len))
-		return SEN_RELEASED;
-	if (is_empty(dp_tgt, long_len))
-		return SEN_USER_ERROR;
-	dq_tgt += (dq_tgt_len - short_len);
-	if (copy_from_user(dq_tgt, icaMsg_p->bq_key, short_len))
-		return SEN_RELEASED;
-	if (is_empty(dq_tgt, short_len))
-		return SEN_USER_ERROR;
-	u_tgt += (u_tgt_len - long_len);
-	if (copy_from_user(u_tgt, icaMsg_p->u_mult_inv + long_offset, long_len))
-		return SEN_RELEASED;
-	if (is_empty(u_tgt, long_len))
-		return SEN_USER_ERROR;
-	inp_tgt += (inp_tgt_len - mod_len);
-	if (copy_from_user(inp_tgt, icaMsg_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(inp_tgt, mod_len))
-		return SEN_USER_ERROR;
-
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-int
-convert_request(unsigned char *buffer, int func, unsigned short function,
-		int cdx, int dev_type, int *msg_l_p, unsigned char *msg_p)
-{
-	if (dev_type == PCICA) {
-		if (func == ICARSACRT)
-			return ICACRT_msg_to_type4CRT_msg(
-				(struct ica_rsa_modexpo_crt *) buffer,
-				msg_l_p, (union type4_msg *) msg_p);
-		else
-			return ICAMEX_msg_to_type4MEX_msg(
-				(struct ica_rsa_modexpo *) buffer,
-				msg_l_p, (union type4_msg *) msg_p);
-	}
-	if (dev_type == PCICC) {
-		if (func == ICARSACRT)
-			return ICACRT_msg_to_type6CRT_msg(
-				(struct ica_rsa_modexpo_crt *) buffer,
-				cdx, msg_l_p, (struct type6_msg *)msg_p);
-		if (function == PCI_FUNC_KEY_ENCRYPT)
-			return ICAMEX_msg_to_type6MEX_en_msg(
-				(struct ica_rsa_modexpo *) buffer,
-				cdx, msg_l_p, (struct type6_msg *) msg_p);
-		else
-			return ICAMEX_msg_to_type6MEX_de_msg(
-				(struct ica_rsa_modexpo *) buffer,
-				cdx, msg_l_p, (struct type6_msg *) msg_p);
-	}
-	if ((dev_type == PCIXCC_MCL2) ||
-	    (dev_type == PCIXCC_MCL3) ||
-	    (dev_type == CEX2C)) {
-		if (func == ICARSACRT)
-			return ICACRT_msg_to_type6CRT_msgX(
-				(struct ica_rsa_modexpo_crt *) buffer,
-				cdx, msg_l_p, (struct type6_msg *) msg_p,
-				dev_type);
-		else
-			return ICAMEX_msg_to_type6MEX_msgX(
-				(struct ica_rsa_modexpo *) buffer,
-				cdx, msg_l_p, (struct type6_msg *) msg_p,
-				dev_type);
-	}
-	if (dev_type == CEX2A) {
-		if (func == ICARSACRT)
-			return ICACRT_msg_to_type50CRT_msg(
-				(struct ica_rsa_modexpo_crt *) buffer,
-				msg_l_p, (union type50_msg *) msg_p);
-		else
-			return ICAMEX_msg_to_type50MEX_msg(
-				(struct ica_rsa_modexpo *) buffer,
-				msg_l_p, (union type50_msg *) msg_p);
-	}
-
-	return 0;
-}
-
-int ext_bitlens_msg_count = 0;
-static inline void
-unset_ext_bitlens(void)
-{
-	if (!ext_bitlens_msg_count) {
-		PRINTK("Unable to use coprocessors for extended bitlengths. "
-		       "Using PCICAs/CEX2As (if present) for extended "
-		       "bitlengths. This is not an error.\n");
-		ext_bitlens_msg_count++;
-	}
-	ext_bitlens = 0;
-}
-
-int
-convert_response(unsigned char *response, unsigned char *buffer,
-		 int *respbufflen_p, unsigned char *resp_buff)
-{
-	struct ica_rsa_modexpo *icaMsg_p = (struct ica_rsa_modexpo *) buffer;
-	struct error_hdr *errh_p = (struct error_hdr *) response;
-	struct type80_hdr *t80h_p = (struct type80_hdr *) response;
-	struct type84_hdr *t84h_p = (struct type84_hdr *) response;
-	struct type86_fmt2_msg *t86m_p =  (struct type86_fmt2_msg *) response;
-	int reply_code, service_rc, service_rs, src_l;
-	unsigned char *src_p, *tgt_p;
-	struct CPRB *cprb_p;
-	struct CPRBX *cprbx_p;
-
-	src_p = 0;
-	reply_code = 0;
-	service_rc = 0;
-	service_rs = 0;
-	src_l = 0;
-	switch (errh_p->type) {
-	case TYPE82_RSP_CODE:
-	case TYPE88_RSP_CODE:
-		reply_code = errh_p->reply_code;
-		src_p = (unsigned char *)errh_p;
-		PRINTK("Hardware error: Type %02X Message Header: "
-		       "%02x%02x%02x%02x%02x%02x%02x%02x\n",
-		       errh_p->type,
-		       src_p[0], src_p[1], src_p[2], src_p[3],
-		       src_p[4], src_p[5], src_p[6], src_p[7]);
-		break;
-	case TYPE80_RSP_CODE:
-		src_l = icaMsg_p->outputdatalength;
-		src_p = response + (int)t80h_p->len - src_l;
-		break;
-	case TYPE84_RSP_CODE:
-		src_l = icaMsg_p->outputdatalength;
-		src_p = response + (int)t84h_p->len - src_l;
-		break;
-	case TYPE86_RSP_CODE:
-		reply_code = t86m_p->header.reply_code;
-		if (reply_code != 0)
-			break;
-		cprb_p = (struct CPRB *)
-			(response + sizeof(struct type86_fmt2_msg));
-		cprbx_p = (struct CPRBX *) cprb_p;
-		if (cprb_p->cprb_ver_id != 0x02) {
-			le2toI(cprb_p->ccp_rtcode, &service_rc);
-			if (service_rc != 0) {
-				le2toI(cprb_p->ccp_rscode, &service_rs);
-				if ((service_rc == 8) && (service_rs == 66))
-					PDEBUG("Bad block format on PCICC\n");
-				else if ((service_rc == 8) && (service_rs == 65))
-					PDEBUG("Probably an even modulus on "
-					       "PCICC\n");
-				else if ((service_rc == 8) && (service_rs == 770)) {
-					PDEBUG("Invalid key length on PCICC\n");
-					unset_ext_bitlens();
-					return REC_USE_PCICA;
-				}
-				else if ((service_rc == 8) && (service_rs == 783)) {
-					PDEBUG("Extended bitlengths not enabled"
-					       "on PCICC\n");
-					unset_ext_bitlens();
-					return REC_USE_PCICA;
-				}
-				else
-					PRINTK("service rc/rs (PCICC): %d/%d\n",
-					       service_rc, service_rs);
-				return REC_OPERAND_INV;
-			}
-			src_p = (unsigned char *)cprb_p + sizeof(struct CPRB);
-			src_p += 4;
-			le2toI(src_p, &src_l);
-			src_l -= 2;
-			src_p += 2;
-		} else {
-			service_rc = (int)cprbx_p->ccp_rtcode;
-			if (service_rc != 0) {
-				service_rs = (int) cprbx_p->ccp_rscode;
-				if ((service_rc == 8) && (service_rs == 66))
-					PDEBUG("Bad block format on PCIXCC\n");
-				else if ((service_rc == 8) && (service_rs == 65))
-					PDEBUG("Probably an even modulus on "
-					       "PCIXCC\n");
-				else if ((service_rc == 8) && (service_rs == 770)) {
-					PDEBUG("Invalid key length on PCIXCC\n");
-					unset_ext_bitlens();
-					return REC_USE_PCICA;
-				}
-				else if ((service_rc == 8) && (service_rs == 783)) {
-					PDEBUG("Extended bitlengths not enabled"
-					       "on PCIXCC\n");
-					unset_ext_bitlens();
-					return REC_USE_PCICA;
-				}
-				else
-					PRINTK("service rc/rs (PCIXCC): %d/%d\n",
-					       service_rc, service_rs);
-				return REC_OPERAND_INV;
-			}
-			src_p = (unsigned char *)
-				cprbx_p + sizeof(struct CPRBX);
-			src_p += 4;
-			src_l = (int)(*((short *) src_p));
-			src_l -= 2;
-			src_p += 2;
-		}
-		break;
-	default:
-		src_p = (unsigned char *)errh_p;
-		PRINTK("Unrecognized Message Header: "
-		       "%02x%02x%02x%02x%02x%02x%02x%02x\n",
-		       src_p[0], src_p[1], src_p[2], src_p[3],
-		       src_p[4], src_p[5], src_p[6], src_p[7]);
-		return REC_BAD_MESSAGE;
-	}
-
-	if (reply_code)
-		switch (reply_code) {
-		case REP82_ERROR_MACHINE_FAILURE:
-			if (errh_p->type == TYPE82_RSP_CODE)
-				PRINTKW("Machine check failure\n");
-			else
-				PRINTKW("Module failure\n");
-			return REC_HARDWAR_ERR;
-		case REP82_ERROR_OPERAND_INVALID:
-			return REC_OPERAND_INV;
-		case REP88_ERROR_MESSAGE_MALFORMD:
-			PRINTKW("Message malformed\n");
-			return REC_OPERAND_INV;
-		case REP82_ERROR_OPERAND_SIZE:
-			return REC_OPERAND_SIZE;
-		case REP82_ERROR_EVEN_MOD_IN_OPND:
-			return REC_EVEN_MOD;
-		case REP82_ERROR_MESSAGE_TYPE:
-			return WRONG_DEVICE_TYPE;
-		case REP82_ERROR_TRANSPORT_FAIL:
-			PRINTKW("Transport failed (APFS = %02X%02X%02X%02X)\n",
-				t86m_p->apfs[0], t86m_p->apfs[1],
-				t86m_p->apfs[2], t86m_p->apfs[3]);
-			return REC_HARDWAR_ERR;
-		default:
-			PRINTKW("reply code = %d\n", reply_code);
-			return REC_HARDWAR_ERR;
-		}
-
-	if (service_rc != 0)
-		return REC_OPERAND_INV;
-
-	if ((src_l > icaMsg_p->outputdatalength) ||
-	    (src_l > RESPBUFFSIZE) ||
-	    (src_l <= 0))
-		return REC_OPERAND_SIZE;
-
-	PDEBUG("Length returned = %d\n", src_l);
-	tgt_p = resp_buff + icaMsg_p->outputdatalength - src_l;
-	memcpy(tgt_p, src_p, src_l);
-	if ((errh_p->type == TYPE86_RSP_CODE) && (resp_buff < tgt_p)) {
-		memset(resp_buff, 0, icaMsg_p->outputdatalength - src_l);
-		if (pad_msg(resp_buff, icaMsg_p->outputdatalength, src_l))
-			return REC_INVALID_PAD;
-	}
-	*respbufflen_p = icaMsg_p->outputdatalength;
-	if (*respbufflen_p == 0)
-		PRINTK("Zero *respbufflen_p\n");
-
-	return 0;
-}
-
diff --git a/drivers/s390/crypto/z90main.c b/drivers/s390/crypto/z90main.c
deleted file mode 100644
index b2f20ab8431a0..0000000000000
--- a/drivers/s390/crypto/z90main.c
+++ /dev/null
@@ -1,3379 +0,0 @@
-/*
- *  linux/drivers/s390/crypto/z90main.c
- *
- *  z90crypt 1.3.3
- *
- *  Copyright (C)  2001, 2005 IBM Corporation
- *  Author(s): Robert Burroughs (burrough@us.ibm.com)
- *             Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <asm/uaccess.h>       // copy_(from|to)_user
-#include <linux/compat.h>
-#include <linux/compiler.h>
-#include <linux/delay.h>       // mdelay
-#include <linux/init.h>
-#include <linux/interrupt.h>   // for tasklets
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/proc_fs.h>
-#include <linux/syscalls.h>
-#include "z90crypt.h"
-#include "z90common.h"
-
-/**
- * Defaults that may be modified.
- */
-
-/**
- * You can specify a different minor at compile time.
- */
-#ifndef Z90CRYPT_MINOR
-#define Z90CRYPT_MINOR	MISC_DYNAMIC_MINOR
-#endif
-
-/**
- * You can specify a different domain at compile time or on the insmod
- * command line.
- */
-#ifndef DOMAIN_INDEX
-#define DOMAIN_INDEX	-1
-#endif
-
-/**
- * This is the name under which the device is registered in /proc/modules.
- */
-#define REG_NAME	"z90crypt"
-
-/**
- * Cleanup should run every CLEANUPTIME seconds and should clean up requests
- * older than CLEANUPTIME seconds in the past.
- */
-#ifndef CLEANUPTIME
-#define CLEANUPTIME 15
-#endif
-
-/**
- * Config should run every CONFIGTIME seconds
- */
-#ifndef CONFIGTIME
-#define CONFIGTIME 30
-#endif
-
-/**
- * The first execution of the config task should take place
- * immediately after initialization
- */
-#ifndef INITIAL_CONFIGTIME
-#define INITIAL_CONFIGTIME 1
-#endif
-
-/**
- * Reader should run every READERTIME milliseconds
- * With the 100Hz patch for s390, z90crypt can lock the system solid while
- * under heavy load. We'll try to avoid that.
- */
-#ifndef READERTIME
-#if HZ > 1000
-#define READERTIME 2
-#else
-#define READERTIME 10
-#endif
-#endif
-
-/**
- * turn long device array index into device pointer
- */
-#define LONG2DEVPTR(ndx) (z90crypt.device_p[(ndx)])
-
-/**
- * turn short device array index into long device array index
- */
-#define SHRT2LONG(ndx) (z90crypt.overall_device_x.device_index[(ndx)])
-
-/**
- * turn short device array index into device pointer
- */
-#define SHRT2DEVPTR(ndx) LONG2DEVPTR(SHRT2LONG(ndx))
-
-/**
- * Status for a work-element
- */
-#define STAT_DEFAULT	0x00 // request has not been processed
-
-#define STAT_ROUTED	0x80 // bit 7: requests get routed to specific device
-			     //	       else, device is determined each write
-#define STAT_FAILED	0x40 // bit 6: this bit is set if the request failed
-			     //	       before being sent to the hardware.
-#define STAT_WRITTEN	0x30 // bits 5-4: work to be done, not sent to device
-//			0x20 // UNUSED state
-#define STAT_READPEND	0x10 // bits 5-4: work done, we're returning data now
-#define STAT_NOWORK	0x00 // bits off: no work on any queue
-#define STAT_RDWRMASK	0x30 // mask for bits 5-4
-
-/**
- * Macros to check the status RDWRMASK
- */
-#define CHK_RDWRMASK(statbyte) ((statbyte) & STAT_RDWRMASK)
-#define SET_RDWRMASK(statbyte, newval) \
-	{(statbyte) &= ~STAT_RDWRMASK; (statbyte) |= newval;}
-
-/**
- * Audit Trail.	 Progress of a Work element
- * audit[0]: Unless noted otherwise, these bits are all set by the process
- */
-#define FP_COPYFROM	0x80 // Caller's buffer has been copied to work element
-#define FP_BUFFREQ	0x40 // Low Level buffer requested
-#define FP_BUFFGOT	0x20 // Low Level buffer obtained
-#define FP_SENT		0x10 // Work element sent to a crypto device
-			     // (may be set by process or by reader task)
-#define FP_PENDING	0x08 // Work element placed on pending queue
-			     // (may be set by process or by reader task)
-#define FP_REQUEST	0x04 // Work element placed on request queue
-#define FP_ASLEEP	0x02 // Work element about to sleep
-#define FP_AWAKE	0x01 // Work element has been awakened
-
-/**
- * audit[1]: These bits are set by the reader task and/or the cleanup task
- */
-#define FP_NOTPENDING	  0x80 // Work element removed from pending queue
-#define FP_AWAKENING	  0x40 // Caller about to be awakened
-#define FP_TIMEDOUT	  0x20 // Caller timed out
-#define FP_RESPSIZESET	  0x10 // Response size copied to work element
-#define FP_RESPADDRCOPIED 0x08 // Response address copied to work element
-#define FP_RESPBUFFCOPIED 0x04 // Response buffer copied to work element
-#define FP_REMREQUEST	  0x02 // Work element removed from request queue
-#define FP_SIGNALED	  0x01 // Work element was awakened by a signal
-
-/**
- * audit[2]: unused
- */
-
-/**
- * state of the file handle in private_data.status
- */
-#define STAT_OPEN 0
-#define STAT_CLOSED 1
-
-/**
- * PID() expands to the process ID of the current process
- */
-#define PID() (current->pid)
-
-/**
- * Selected Constants.	The number of APs and the number of devices
- */
-#ifndef Z90CRYPT_NUM_APS
-#define Z90CRYPT_NUM_APS 64
-#endif
-#ifndef Z90CRYPT_NUM_DEVS
-#define Z90CRYPT_NUM_DEVS Z90CRYPT_NUM_APS
-#endif
-
-/**
- * Buffer size for receiving responses. The maximum Response Size
- * is actually the maximum request size, since in an error condition
- * the request itself may be returned unchanged.
- */
-#define MAX_RESPONSE_SIZE 0x0000077C
-
-/**
- * A count and status-byte mask
- */
-struct status {
-	int	      st_count;		    // # of enabled devices
-	int	      disabled_count;	    // # of disabled devices
-	int	      user_disabled_count;  // # of devices disabled via proc fs
-	unsigned char st_mask[Z90CRYPT_NUM_APS]; // current status mask
-};
-
-/**
- * The array of device indexes is a mechanism for fast indexing into
- * a long (and sparse) array.  For instance, if APs 3, 9 and 47 are
- * installed, z90CDeviceIndex[0] is 3, z90CDeviceIndex[1] is 9, and
- * z90CDeviceIndex[2] is 47.
- */
-struct device_x {
-	int device_index[Z90CRYPT_NUM_DEVS];
-};
-
-/**
- * All devices are arranged in a single array: 64 APs
- */
-struct device {
-	int		 dev_type;	    // PCICA, PCICC, PCIXCC_MCL2,
-					    // PCIXCC_MCL3, CEX2C, CEX2A
-	enum devstat	 dev_stat;	    // current device status
-	int		 dev_self_x;	    // Index in array
-	int		 disabled;	    // Set when device is in error
-	int		 user_disabled;	    // Set when device is disabled by user
-	int		 dev_q_depth;	    // q depth
-	unsigned char *	 dev_resp_p;	    // Response buffer address
-	int		 dev_resp_l;	    // Response Buffer length
-	int		 dev_caller_count;  // Number of callers
-	int		 dev_total_req_cnt; // # requests for device since load
-	struct list_head dev_caller_list;   // List of callers
-};
-
-/**
- * There's a struct status and a struct device_x for each device type.
- */
-struct hdware_block {
-	struct status	hdware_mask;
-	struct status	type_mask[Z90CRYPT_NUM_TYPES];
-	struct device_x type_x_addr[Z90CRYPT_NUM_TYPES];
-	unsigned char	device_type_array[Z90CRYPT_NUM_APS];
-};
-
-/**
- * z90crypt is the topmost data structure in the hierarchy.
- */
-struct z90crypt {
-	int		     max_count;		// Nr of possible crypto devices
-	struct status	     mask;
-	int		     q_depth_array[Z90CRYPT_NUM_DEVS];
-	int		     dev_type_array[Z90CRYPT_NUM_DEVS];
-	struct device_x	     overall_device_x;	// array device indexes
-	struct device *	     device_p[Z90CRYPT_NUM_DEVS];
-	int		     terminating;
-	int		     domain_established;// TRUE:  domain has been found
-	int		     cdx;		// Crypto Domain Index
-	int		     len;		// Length of this data structure
-	struct hdware_block *hdware_info;
-};
-
-/**
- * An array of these structures is pointed to from dev_caller
- * The length of the array depends on the device type. For APs,
- * there are 8.
- *
- * The caller buffer is allocated to the user at OPEN. At WRITE,
- * it contains the request; at READ, the response. The function
- * send_to_crypto_device converts the request to device-dependent
- * form and use the caller's OPEN-allocated buffer for the response.
- *
- * For the contents of caller_dev_dep_req and caller_dev_dep_req_p
- * because that points to it, see the discussion in z90hardware.c.
- * Search for "extended request message block".
- */
-struct caller {
-	int		 caller_buf_l;		 // length of original request
-	unsigned char *	 caller_buf_p;		 // Original request on WRITE
-	int		 caller_dev_dep_req_l;	 // len device dependent request
-	unsigned char *	 caller_dev_dep_req_p;	 // Device dependent form
-	unsigned char	 caller_id[8];		 // caller-supplied message id
-	struct list_head caller_liste;
-	unsigned char	 caller_dev_dep_req[MAX_RESPONSE_SIZE];
-};
-
-/**
- * Function prototypes from z90hardware.c
- */
-enum hdstat query_online(int deviceNr, int cdx, int resetNr, int *q_depth,
-			 int *dev_type);
-enum devstat reset_device(int deviceNr, int cdx, int resetNr);
-enum devstat send_to_AP(int dev_nr, int cdx, int msg_len, unsigned char *msg_ext);
-enum devstat receive_from_AP(int dev_nr, int cdx, int resplen,
-			     unsigned char *resp, unsigned char *psmid);
-int convert_request(unsigned char *buffer, int func, unsigned short function,
-		    int cdx, int dev_type, int *msg_l_p, unsigned char *msg_p);
-int convert_response(unsigned char *response, unsigned char *buffer,
-		     int *respbufflen_p, unsigned char *resp_buff);
-
-/**
- * Low level function prototypes
- */
-static int create_z90crypt(int *cdx_p);
-static int refresh_z90crypt(int *cdx_p);
-static int find_crypto_devices(struct status *deviceMask);
-static int create_crypto_device(int index);
-static int destroy_crypto_device(int index);
-static void destroy_z90crypt(void);
-static int refresh_index_array(struct status *status_str,
-			       struct device_x *index_array);
-static int probe_device_type(struct device *devPtr);
-static int probe_PCIXCC_type(struct device *devPtr);
-
-/**
- * proc fs definitions
- */
-static struct proc_dir_entry *z90crypt_entry;
-
-/**
- * data structures
- */
-
-/**
- * work_element.opener points back to this structure
- */
-struct priv_data {
-	pid_t	opener_pid;
-	unsigned char	status;		// 0: open  1: closed
-};
-
-/**
- * A work element is allocated for each request
- */
-struct work_element {
-	struct priv_data *priv_data;
-	pid_t		  pid;
-	int		  devindex;	  // index of device processing this w_e
-					  // (If request did not specify device,
-					  // -1 until placed onto a queue)
-	int		  devtype;
-	struct list_head  liste;	  // used for requestq and pendingq
-	char		  buffer[128];	  // local copy of user request
-	int		  buff_size;	  // size of the buffer for the request
-	char		  resp_buff[RESPBUFFSIZE];
-	int		  resp_buff_size;
-	char __user *	  resp_addr;	  // address of response in user space
-	unsigned int	  funccode;	  // function code of request
-	wait_queue_head_t waitq;
-	unsigned long	  requestsent;	  // time at which the request was sent
-	atomic_t	  alarmrung;	  // wake-up signal
-	unsigned char	  caller_id[8];	  // pid + counter, for this w_e
-	unsigned char	  status[1];	  // bits to mark status of the request
-	unsigned char	  audit[3];	  // record of work element's progress
-	unsigned char *	  requestptr;	  // address of request buffer
-	int		  retcode;	  // return code of request
-};
-
-/**
- * High level function prototypes
- */
-static int z90crypt_open(struct inode *, struct file *);
-static int z90crypt_release(struct inode *, struct file *);
-static ssize_t z90crypt_read(struct file *, char __user *, size_t, loff_t *);
-static ssize_t z90crypt_write(struct file *, const char __user *,
-							size_t, loff_t *);
-static long z90crypt_unlocked_ioctl(struct file *, unsigned int, unsigned long);
-static long z90crypt_compat_ioctl(struct file *, unsigned int, unsigned long);
-
-static void z90crypt_reader_task(unsigned long);
-static void z90crypt_schedule_reader_task(unsigned long);
-static void z90crypt_config_task(unsigned long);
-static void z90crypt_cleanup_task(unsigned long);
-
-static int z90crypt_status(char *, char **, off_t, int, int *, void *);
-static int z90crypt_status_write(struct file *, const char __user *,
-				 unsigned long, void *);
-
-/**
- * Storage allocated at initialization and used throughout the life of
- * this insmod
- */
-static int domain = DOMAIN_INDEX;
-static struct z90crypt z90crypt;
-static int quiesce_z90crypt;
-static spinlock_t queuespinlock;
-static struct list_head request_list;
-static int requestq_count;
-static struct list_head pending_list;
-static int pendingq_count;
-
-static struct tasklet_struct reader_tasklet;
-static struct timer_list reader_timer;
-static struct timer_list config_timer;
-static struct timer_list cleanup_timer;
-static atomic_t total_open;
-static atomic_t z90crypt_step;
-
-static struct file_operations z90crypt_fops = {
-	.owner		= THIS_MODULE,
-	.read		= z90crypt_read,
-	.write		= z90crypt_write,
-	.unlocked_ioctl	= z90crypt_unlocked_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= z90crypt_compat_ioctl,
-#endif
-	.open		= z90crypt_open,
-	.release	= z90crypt_release
-};
-
-static struct miscdevice z90crypt_misc_device = {
-	.minor	    = Z90CRYPT_MINOR,
-	.name	    = DEV_NAME,
-	.fops	    = &z90crypt_fops,
-};
-
-/**
- * Documentation values.
- */
-MODULE_AUTHOR("zSeries Linux Crypto Team: Robert H. Burroughs, Eric D. Rossman"
-	      "and Jochen Roehrig");
-MODULE_DESCRIPTION("zSeries Linux Cryptographic Coprocessor device driver, "
-		   "Copyright 2001, 2005 IBM Corporation");
-MODULE_LICENSE("GPL");
-module_param(domain, int, 0);
-MODULE_PARM_DESC(domain, "domain index for device");
-
-#ifdef CONFIG_COMPAT
-/**
- * ioctl32 conversion routines
- */
-struct ica_rsa_modexpo_32 { // For 32-bit callers
-	compat_uptr_t	inputdata;
-	unsigned int	inputdatalength;
-	compat_uptr_t	outputdata;
-	unsigned int	outputdatalength;
-	compat_uptr_t	b_key;
-	compat_uptr_t	n_modulus;
-};
-
-static long
-trans_modexpo32(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	struct ica_rsa_modexpo_32 __user *mex32u = compat_ptr(arg);
-	struct ica_rsa_modexpo_32  mex32k;
-	struct ica_rsa_modexpo __user *mex64;
-	long ret = 0;
-	unsigned int i;
-
-	if (!access_ok(VERIFY_WRITE, mex32u, sizeof(struct ica_rsa_modexpo_32)))
-		return -EFAULT;
-	mex64 = compat_alloc_user_space(sizeof(struct ica_rsa_modexpo));
-	if (!access_ok(VERIFY_WRITE, mex64, sizeof(struct ica_rsa_modexpo)))
-		return -EFAULT;
-	if (copy_from_user(&mex32k, mex32u, sizeof(struct ica_rsa_modexpo_32)))
-		return -EFAULT;
-	if (__put_user(compat_ptr(mex32k.inputdata), &mex64->inputdata)   ||
-	    __put_user(mex32k.inputdatalength, &mex64->inputdatalength)   ||
-	    __put_user(compat_ptr(mex32k.outputdata), &mex64->outputdata) ||
-	    __put_user(mex32k.outputdatalength, &mex64->outputdatalength) ||
-	    __put_user(compat_ptr(mex32k.b_key), &mex64->b_key)           ||
-	    __put_user(compat_ptr(mex32k.n_modulus), &mex64->n_modulus))
-		return -EFAULT;
-	ret = z90crypt_unlocked_ioctl(filp, cmd, (unsigned long)mex64);
-	if (!ret)
-		if (__get_user(i, &mex64->outputdatalength) ||
-		    __put_user(i, &mex32u->outputdatalength))
-			ret = -EFAULT;
-	return ret;
-}
-
-struct ica_rsa_modexpo_crt_32 { // For 32-bit callers
-	compat_uptr_t	inputdata;
-	unsigned int	inputdatalength;
-	compat_uptr_t	outputdata;
-	unsigned int	outputdatalength;
-	compat_uptr_t	bp_key;
-	compat_uptr_t	bq_key;
-	compat_uptr_t	np_prime;
-	compat_uptr_t	nq_prime;
-	compat_uptr_t	u_mult_inv;
-};
-
-static long
-trans_modexpo_crt32(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	struct ica_rsa_modexpo_crt_32 __user *crt32u = compat_ptr(arg);
-	struct ica_rsa_modexpo_crt_32  crt32k;
-	struct ica_rsa_modexpo_crt __user *crt64;
-	long ret = 0;
-	unsigned int i;
-
-	if (!access_ok(VERIFY_WRITE, crt32u,
-		       sizeof(struct ica_rsa_modexpo_crt_32)))
-		return -EFAULT;
-	crt64 = compat_alloc_user_space(sizeof(struct ica_rsa_modexpo_crt));
-	if (!access_ok(VERIFY_WRITE, crt64, sizeof(struct ica_rsa_modexpo_crt)))
-		return -EFAULT;
-	if (copy_from_user(&crt32k, crt32u,
-			   sizeof(struct ica_rsa_modexpo_crt_32)))
-		return -EFAULT;
-	if (__put_user(compat_ptr(crt32k.inputdata), &crt64->inputdata)   ||
-	    __put_user(crt32k.inputdatalength, &crt64->inputdatalength)   ||
-	    __put_user(compat_ptr(crt32k.outputdata), &crt64->outputdata) ||
-	    __put_user(crt32k.outputdatalength, &crt64->outputdatalength) ||
-	    __put_user(compat_ptr(crt32k.bp_key), &crt64->bp_key)         ||
-	    __put_user(compat_ptr(crt32k.bq_key), &crt64->bq_key)         ||
-	    __put_user(compat_ptr(crt32k.np_prime), &crt64->np_prime)     ||
-	    __put_user(compat_ptr(crt32k.nq_prime), &crt64->nq_prime)     ||
-	    __put_user(compat_ptr(crt32k.u_mult_inv), &crt64->u_mult_inv))
-		return -EFAULT;
-	ret = z90crypt_unlocked_ioctl(filp, cmd, (unsigned long)crt64);
-	if (!ret)
-		if (__get_user(i, &crt64->outputdatalength) ||
-		    __put_user(i, &crt32u->outputdatalength))
-			ret = -EFAULT;
-	return ret;
-}
-
-static long
-z90crypt_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case ICAZ90STATUS:
-	case Z90QUIESCE:
-	case Z90STAT_TOTALCOUNT:
-	case Z90STAT_PCICACOUNT:
-	case Z90STAT_PCICCCOUNT:
-	case Z90STAT_PCIXCCCOUNT:
-	case Z90STAT_PCIXCCMCL2COUNT:
-	case Z90STAT_PCIXCCMCL3COUNT:
-	case Z90STAT_CEX2CCOUNT:
-	case Z90STAT_REQUESTQ_COUNT:
-	case Z90STAT_PENDINGQ_COUNT:
-	case Z90STAT_TOTALOPEN_COUNT:
-	case Z90STAT_DOMAIN_INDEX:
-	case Z90STAT_STATUS_MASK:
-	case Z90STAT_QDEPTH_MASK:
-	case Z90STAT_PERDEV_REQCNT:
-		return z90crypt_unlocked_ioctl(filp, cmd, arg);
-	case ICARSAMODEXPO:
-		return trans_modexpo32(filp, cmd, arg);
-	case ICARSACRT:
-		return trans_modexpo_crt32(filp, cmd, arg);
-	default:
-		return -ENOIOCTLCMD;
-  	}
-}
-#endif
-
-/**
- * The module initialization code.
- */
-static int __init
-z90crypt_init_module(void)
-{
-	int result, nresult;
-	struct proc_dir_entry *entry;
-
-	PDEBUG("PID %d\n", PID());
-
-	if ((domain < -1) || (domain > 15)) {
-		PRINTKW("Invalid param: domain = %d.  Not loading.\n", domain);
-		return -EINVAL;
-	}
-
-	/* Register as misc device with given minor (or get a dynamic one). */
-	result = misc_register(&z90crypt_misc_device);
-	if (result < 0) {
-		PRINTKW(KERN_ERR "misc_register (minor %d) failed with %d\n",
-			z90crypt_misc_device.minor, result);
-		return result;
-	}
-
-	PDEBUG("Registered " DEV_NAME " with result %d\n", result);
-
-	result = create_z90crypt(&domain);
-	if (result != 0) {
-		PRINTKW("create_z90crypt (domain index %d) failed with %d.\n",
-			domain, result);
-		result = -ENOMEM;
-		goto init_module_cleanup;
-	}
-
-	if (result == 0) {
-		PRINTKN("Version %d.%d.%d loaded, built on %s %s\n",
-			z90crypt_VERSION, z90crypt_RELEASE, z90crypt_VARIANT,
-			__DATE__, __TIME__);
-		PDEBUG("create_z90crypt (domain index %d) successful.\n",
-		       domain);
-	} else
-		PRINTK("No devices at startup\n");
-
-	/* Initialize globals. */
-	spin_lock_init(&queuespinlock);
-
-	INIT_LIST_HEAD(&pending_list);
-	pendingq_count = 0;
-
-	INIT_LIST_HEAD(&request_list);
-	requestq_count = 0;
-
-	quiesce_z90crypt = 0;
-
-	atomic_set(&total_open, 0);
-	atomic_set(&z90crypt_step, 0);
-
-	/* Set up the cleanup task. */
-	init_timer(&cleanup_timer);
-	cleanup_timer.function = z90crypt_cleanup_task;
-	cleanup_timer.data = 0;
-	cleanup_timer.expires = jiffies + (CLEANUPTIME * HZ);
-	add_timer(&cleanup_timer);
-
-	/* Set up the proc file system */
-	entry = create_proc_entry("driver/z90crypt", 0644, 0);
-	if (entry) {
-		entry->nlink = 1;
-		entry->data = 0;
-		entry->read_proc = z90crypt_status;
-		entry->write_proc = z90crypt_status_write;
-	}
-	else
-		PRINTK("Couldn't create z90crypt proc entry\n");
-	z90crypt_entry = entry;
-
-	/* Set up the configuration task. */
-	init_timer(&config_timer);
-	config_timer.function = z90crypt_config_task;
-	config_timer.data = 0;
-	config_timer.expires = jiffies + (INITIAL_CONFIGTIME * HZ);
-	add_timer(&config_timer);
-
-	/* Set up the reader task */
-	tasklet_init(&reader_tasklet, z90crypt_reader_task, 0);
-	init_timer(&reader_timer);
-	reader_timer.function = z90crypt_schedule_reader_task;
-	reader_timer.data = 0;
-	reader_timer.expires = jiffies + (READERTIME * HZ / 1000);
-	add_timer(&reader_timer);
-
-	return 0; // success
-
-init_module_cleanup:
-	if ((nresult = misc_deregister(&z90crypt_misc_device)))
-		PRINTK("misc_deregister failed with %d.\n", nresult);
-	else
-		PDEBUG("misc_deregister successful.\n");
-
-	return result; // failure
-}
-
-/**
- * The module termination code
- */
-static void __exit
-z90crypt_cleanup_module(void)
-{
-	int nresult;
-
-	PDEBUG("PID %d\n", PID());
-
-	remove_proc_entry("driver/z90crypt", 0);
-
-	if ((nresult = misc_deregister(&z90crypt_misc_device)))
-		PRINTK("misc_deregister failed with %d.\n", nresult);
-	else
-		PDEBUG("misc_deregister successful.\n");
-
-	/* Remove the tasks */
-	tasklet_kill(&reader_tasklet);
-	del_timer(&reader_timer);
-	del_timer(&config_timer);
-	del_timer(&cleanup_timer);
-
-	destroy_z90crypt();
-
-	PRINTKN("Unloaded.\n");
-}
-
-/**
- * Functions running under a process id
- *
- * The I/O functions:
- *     z90crypt_open
- *     z90crypt_release
- *     z90crypt_read
- *     z90crypt_write
- *     z90crypt_unlocked_ioctl
- *     z90crypt_status
- *     z90crypt_status_write
- *	 disable_card
- *	 enable_card
- *
- * Helper functions:
- *     z90crypt_rsa
- *	 z90crypt_prepare
- *	 z90crypt_send
- *	 z90crypt_process_results
- *
- */
-static int
-z90crypt_open(struct inode *inode, struct file *filp)
-{
-	struct priv_data *private_data_p;
-
-	if (quiesce_z90crypt)
-		return -EQUIESCE;
-
-	private_data_p = kzalloc(sizeof(struct priv_data), GFP_KERNEL);
-	if (!private_data_p) {
-		PRINTK("Memory allocate failed\n");
-		return -ENOMEM;
-	}
-
-	private_data_p->status = STAT_OPEN;
-	private_data_p->opener_pid = PID();
-	filp->private_data = private_data_p;
-	atomic_inc(&total_open);
-
-	return 0;
-}
-
-static int
-z90crypt_release(struct inode *inode, struct file *filp)
-{
-	struct priv_data *private_data_p = filp->private_data;
-
-	PDEBUG("PID %d (filp %p)\n", PID(), filp);
-
-	private_data_p->status = STAT_CLOSED;
-	memset(private_data_p, 0, sizeof(struct priv_data));
-	kfree(private_data_p);
-	atomic_dec(&total_open);
-
-	return 0;
-}
-
-/*
- * there are two read functions, of which compile options will choose one
- * without USE_GET_RANDOM_BYTES
- *   => read() always returns -EPERM;
- * otherwise
- *   => read() uses get_random_bytes() kernel function
- */
-#ifndef USE_GET_RANDOM_BYTES
-/**
- * z90crypt_read will not be supported beyond z90crypt 1.3.1
- */
-static ssize_t
-z90crypt_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
-{
-	PDEBUG("filp %p (PID %d)\n", filp, PID());
-	return -EPERM;
-}
-#else // we want to use get_random_bytes
-/**
- * read() just returns a string of random bytes.  Since we have no way
- * to generate these cryptographically, we just execute get_random_bytes
- * for the length specified.
- */
-#include <linux/random.h>
-static ssize_t
-z90crypt_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
-{
-	unsigned char *temp_buff;
-
-	PDEBUG("filp %p (PID %d)\n", filp, PID());
-
-	if (quiesce_z90crypt)
-		return -EQUIESCE;
-	if (count < 0) {
-		PRINTK("Requested random byte count negative: %ld\n", count);
-		return -EINVAL;
-	}
-	if (count > RESPBUFFSIZE) {
-		PDEBUG("count[%d] > RESPBUFFSIZE", count);
-		return -EINVAL;
-	}
-	if (count == 0)
-		return 0;
-	temp_buff = kmalloc(RESPBUFFSIZE, GFP_KERNEL);
-	if (!temp_buff) {
-		PRINTK("Memory allocate failed\n");
-		return -ENOMEM;
-	}
-	get_random_bytes(temp_buff, count);
-
-	if (copy_to_user(buf, temp_buff, count) != 0) {
-		kfree(temp_buff);
-		return -EFAULT;
-	}
-	kfree(temp_buff);
-	return count;
-}
-#endif
-
-/**
- * Write is is not allowed
- */
-static ssize_t
-z90crypt_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos)
-{
-	PDEBUG("filp %p (PID %d)\n", filp, PID());
-	return -EPERM;
-}
-
-/**
- * New status functions
- */
-static inline int
-get_status_totalcount(void)
-{
-	return z90crypt.hdware_info->hdware_mask.st_count;
-}
-
-static inline int
-get_status_PCICAcount(void)
-{
-	return z90crypt.hdware_info->type_mask[PCICA].st_count;
-}
-
-static inline int
-get_status_PCICCcount(void)
-{
-	return z90crypt.hdware_info->type_mask[PCICC].st_count;
-}
-
-static inline int
-get_status_PCIXCCcount(void)
-{
-	return z90crypt.hdware_info->type_mask[PCIXCC_MCL2].st_count +
-	       z90crypt.hdware_info->type_mask[PCIXCC_MCL3].st_count;
-}
-
-static inline int
-get_status_PCIXCCMCL2count(void)
-{
-	return z90crypt.hdware_info->type_mask[PCIXCC_MCL2].st_count;
-}
-
-static inline int
-get_status_PCIXCCMCL3count(void)
-{
-	return z90crypt.hdware_info->type_mask[PCIXCC_MCL3].st_count;
-}
-
-static inline int
-get_status_CEX2Ccount(void)
-{
-	return z90crypt.hdware_info->type_mask[CEX2C].st_count;
-}
-
-static inline int
-get_status_CEX2Acount(void)
-{
-	return z90crypt.hdware_info->type_mask[CEX2A].st_count;
-}
-
-static inline int
-get_status_requestq_count(void)
-{
-	return requestq_count;
-}
-
-static inline int
-get_status_pendingq_count(void)
-{
-	return pendingq_count;
-}
-
-static inline int
-get_status_totalopen_count(void)
-{
-	return atomic_read(&total_open);
-}
-
-static inline int
-get_status_domain_index(void)
-{
-	return z90crypt.cdx;
-}
-
-static inline unsigned char *
-get_status_status_mask(unsigned char status[Z90CRYPT_NUM_APS])
-{
-	int i, ix;
-
-	memcpy(status, z90crypt.hdware_info->device_type_array,
-	       Z90CRYPT_NUM_APS);
-
-	for (i = 0; i < get_status_totalcount(); i++) {
-		ix = SHRT2LONG(i);
-		if (LONG2DEVPTR(ix)->user_disabled)
-			status[ix] = 0x0d;
-	}
-
-	return status;
-}
-
-static inline unsigned char *
-get_status_qdepth_mask(unsigned char qdepth[Z90CRYPT_NUM_APS])
-{
-	int i, ix;
-
-	memset(qdepth, 0, Z90CRYPT_NUM_APS);
-
-	for (i = 0; i < get_status_totalcount(); i++) {
-		ix = SHRT2LONG(i);
-		qdepth[ix] = LONG2DEVPTR(ix)->dev_caller_count;
-	}
-
-	return qdepth;
-}
-
-static inline unsigned int *
-get_status_perdevice_reqcnt(unsigned int reqcnt[Z90CRYPT_NUM_APS])
-{
-	int i, ix;
-
-	memset(reqcnt, 0, Z90CRYPT_NUM_APS * sizeof(int));
-
-	for (i = 0; i < get_status_totalcount(); i++) {
-		ix = SHRT2LONG(i);
-		reqcnt[ix] = LONG2DEVPTR(ix)->dev_total_req_cnt;
-	}
-
-	return reqcnt;
-}
-
-static inline void
-init_work_element(struct work_element *we_p,
-		  struct priv_data *priv_data, pid_t pid)
-{
-	int step;
-
-	we_p->requestptr = (unsigned char *)we_p + sizeof(struct work_element);
-	/* Come up with a unique id for this caller. */
-	step = atomic_inc_return(&z90crypt_step);
-	memcpy(we_p->caller_id+0, (void *) &pid, sizeof(pid));
-	memcpy(we_p->caller_id+4, (void *) &step, sizeof(step));
-	we_p->pid = pid;
-	we_p->priv_data = priv_data;
-	we_p->status[0] = STAT_DEFAULT;
-	we_p->audit[0] = 0x00;
-	we_p->audit[1] = 0x00;
-	we_p->audit[2] = 0x00;
-	we_p->resp_buff_size = 0;
-	we_p->retcode = 0;
-	we_p->devindex = -1;
-	we_p->devtype = -1;
-	atomic_set(&we_p->alarmrung, 0);
-	init_waitqueue_head(&we_p->waitq);
-	INIT_LIST_HEAD(&(we_p->liste));
-}
-
-static inline int
-allocate_work_element(struct work_element **we_pp,
-		      struct priv_data *priv_data_p, pid_t pid)
-{
-	struct work_element *we_p;
-
-	we_p = (struct work_element *) get_zeroed_page(GFP_KERNEL);
-	if (!we_p)
-		return -ENOMEM;
-	init_work_element(we_p, priv_data_p, pid);
-	*we_pp = we_p;
-	return 0;
-}
-
-static inline void
-remove_device(struct device *device_p)
-{
-	if (!device_p || (device_p->disabled != 0))
-		return;
-	device_p->disabled = 1;
-	z90crypt.hdware_info->type_mask[device_p->dev_type].disabled_count++;
-	z90crypt.hdware_info->hdware_mask.disabled_count++;
-}
-
-/**
- * Bitlength limits for each card
- *
- * There are new MCLs which allow more bitlengths. See the table for details.
- * The MCL must be applied and the newer bitlengths enabled for these to work.
- *
- * Card Type    Old limit    New limit
- * PCICA          ??-2048     same (the lower limit is less than 128 bit...)
- * PCICC         512-1024     512-2048
- * PCIXCC_MCL2   512-2048     ----- (applying any GA LIC will make an MCL3 card)
- * PCIXCC_MCL3   -----        128-2048
- * CEX2C         512-2048     128-2048
- * CEX2A          ??-2048     same (the lower limit is less than 128 bit...)
- *
- * ext_bitlens (extended bitlengths) is a global, since you should not apply an
- * MCL to just one card in a machine. We assume, at first, that all cards have
- * these capabilities.
- */
-int ext_bitlens = 1; // This is global
-#define PCIXCC_MIN_MOD_SIZE	 16	//  128 bits
-#define OLD_PCIXCC_MIN_MOD_SIZE	 64	//  512 bits
-#define PCICC_MIN_MOD_SIZE	 64	//  512 bits
-#define OLD_PCICC_MAX_MOD_SIZE	128	// 1024 bits
-#define MAX_MOD_SIZE		256	// 2048 bits
-
-static inline int
-select_device_type(int *dev_type_p, int bytelength)
-{
-	static int count = 0;
-	int PCICA_avail, PCIXCC_MCL3_avail, CEX2C_avail, CEX2A_avail,
-	    index_to_use;
-	struct status *stat;
-	if ((*dev_type_p != PCICC) && (*dev_type_p != PCICA) &&
-	    (*dev_type_p != PCIXCC_MCL2) && (*dev_type_p != PCIXCC_MCL3) &&
-	    (*dev_type_p != CEX2C) && (*dev_type_p != CEX2A) &&
-	    (*dev_type_p != ANYDEV))
-		return -1;
-	if (*dev_type_p != ANYDEV) {
-		stat = &z90crypt.hdware_info->type_mask[*dev_type_p];
-		if (stat->st_count >
-		    (stat->disabled_count + stat->user_disabled_count))
-			return 0;
-		return -1;
-	}
-
-	/**
-	 * Assumption: PCICA, PCIXCC_MCL3, CEX2C, and CEX2A are all similar in
-	 * speed.
-	 *
-	 * PCICA and CEX2A do NOT co-exist, so it would be either one or the
-	 * other present.
-	 */
-	stat = &z90crypt.hdware_info->type_mask[PCICA];
-	PCICA_avail = stat->st_count -
-			(stat->disabled_count + stat->user_disabled_count);
-	stat = &z90crypt.hdware_info->type_mask[PCIXCC_MCL3];
-	PCIXCC_MCL3_avail = stat->st_count -
-			(stat->disabled_count + stat->user_disabled_count);
-	stat = &z90crypt.hdware_info->type_mask[CEX2C];
-	CEX2C_avail = stat->st_count -
-			(stat->disabled_count + stat->user_disabled_count);
-	stat = &z90crypt.hdware_info->type_mask[CEX2A];
-	CEX2A_avail = stat->st_count -
-			(stat->disabled_count + stat->user_disabled_count);
-	if (PCICA_avail || PCIXCC_MCL3_avail || CEX2C_avail || CEX2A_avail) {
-		/**
-		 * bitlength is a factor, PCICA or CEX2A are the most capable,
-		 * even with the new MCL for PCIXCC.
-		 */
-		if ((bytelength < PCIXCC_MIN_MOD_SIZE) ||
-		    (!ext_bitlens && (bytelength < OLD_PCIXCC_MIN_MOD_SIZE))) {
-			if (PCICA_avail) {
-				*dev_type_p = PCICA;
-				return 0;
-			}
-			if (CEX2A_avail) {
-				*dev_type_p = CEX2A;
-				return 0;
-			}
-			return -1;
-		}
-
-		index_to_use = count % (PCICA_avail + PCIXCC_MCL3_avail +
-					CEX2C_avail + CEX2A_avail);
-		if (index_to_use < PCICA_avail)
-			*dev_type_p = PCICA;
-		else if (index_to_use < (PCICA_avail + PCIXCC_MCL3_avail))
-			*dev_type_p = PCIXCC_MCL3;
-		else if (index_to_use < (PCICA_avail + PCIXCC_MCL3_avail +
-					 CEX2C_avail))
-			*dev_type_p = CEX2C;
-		else
-			*dev_type_p = CEX2A;
-		count++;
-		return 0;
-	}
-
-	/* Less than OLD_PCIXCC_MIN_MOD_SIZE cannot go to a PCIXCC_MCL2 */
-	if (bytelength < OLD_PCIXCC_MIN_MOD_SIZE)
-		return -1;
-	stat = &z90crypt.hdware_info->type_mask[PCIXCC_MCL2];
-	if (stat->st_count >
-	    (stat->disabled_count + stat->user_disabled_count)) {
-		*dev_type_p = PCIXCC_MCL2;
-		return 0;
-	}
-
-	/**
-	 * Less than PCICC_MIN_MOD_SIZE or more than OLD_PCICC_MAX_MOD_SIZE
-	 * (if we don't have the MCL applied and the newer bitlengths enabled)
-	 * cannot go to a PCICC
-	 */
-	if ((bytelength < PCICC_MIN_MOD_SIZE) ||
-	    (!ext_bitlens && (bytelength > OLD_PCICC_MAX_MOD_SIZE))) {
-		return -1;
-	}
-	stat = &z90crypt.hdware_info->type_mask[PCICC];
-	if (stat->st_count >
-	    (stat->disabled_count + stat->user_disabled_count)) {
-		*dev_type_p = PCICC;
-		return 0;
-	}
-
-	return -1;
-}
-
-/**
- * Try the selected number, then the selected type (can be ANYDEV)
- */
-static inline int
-select_device(int *dev_type_p, int *device_nr_p, int bytelength)
-{
-	int i, indx, devTp, low_count, low_indx;
-	struct device_x *index_p;
-	struct device *dev_ptr;
-
-	PDEBUG("device type = %d, index = %d\n", *dev_type_p, *device_nr_p);
-	if ((*device_nr_p >= 0) && (*device_nr_p < Z90CRYPT_NUM_DEVS)) {
-		PDEBUG("trying index = %d\n", *device_nr_p);
-		dev_ptr = z90crypt.device_p[*device_nr_p];
-
-		if (dev_ptr &&
-		    (dev_ptr->dev_stat != DEV_GONE) &&
-		    (dev_ptr->disabled == 0) &&
-		    (dev_ptr->user_disabled == 0)) {
-			PDEBUG("selected by number, index = %d\n",
-			       *device_nr_p);
-			*dev_type_p = dev_ptr->dev_type;
-			return *device_nr_p;
-		}
-	}
-	*device_nr_p = -1;
-	PDEBUG("trying type = %d\n", *dev_type_p);
-	devTp = *dev_type_p;
-	if (select_device_type(&devTp, bytelength) == -1) {
-		PDEBUG("failed to select by type\n");
-		return -1;
-	}
-	PDEBUG("selected type = %d\n", devTp);
-	index_p = &z90crypt.hdware_info->type_x_addr[devTp];
-	low_count = 0x0000FFFF;
-	low_indx = -1;
-	for (i = 0; i < z90crypt.hdware_info->type_mask[devTp].st_count; i++) {
-		indx = index_p->device_index[i];
-		dev_ptr = z90crypt.device_p[indx];
-		if (dev_ptr &&
-		    (dev_ptr->dev_stat != DEV_GONE) &&
-		    (dev_ptr->disabled == 0) &&
-		    (dev_ptr->user_disabled == 0) &&
-		    (devTp == dev_ptr->dev_type) &&
-		    (low_count > dev_ptr->dev_caller_count)) {
-			low_count = dev_ptr->dev_caller_count;
-			low_indx = indx;
-		}
-	}
-	*device_nr_p = low_indx;
-	return low_indx;
-}
-
-static inline int
-send_to_crypto_device(struct work_element *we_p)
-{
-	struct caller *caller_p;
-	struct device *device_p;
-	int dev_nr;
-	int bytelen = ((struct ica_rsa_modexpo *)we_p->buffer)->inputdatalength;
-
-	if (!we_p->requestptr)
-		return SEN_FATAL_ERROR;
-	caller_p = (struct caller *)we_p->requestptr;
-	dev_nr = we_p->devindex;
-	if (select_device(&we_p->devtype, &dev_nr, bytelen) == -1) {
-		if (z90crypt.hdware_info->hdware_mask.st_count != 0)
-			return SEN_RETRY;
-		else
-			return SEN_NOT_AVAIL;
-	}
-	we_p->devindex = dev_nr;
-	device_p = z90crypt.device_p[dev_nr];
-	if (!device_p)
-		return SEN_NOT_AVAIL;
-	if (device_p->dev_type != we_p->devtype)
-		return SEN_RETRY;
-	if (device_p->dev_caller_count >= device_p->dev_q_depth)
-		return SEN_QUEUE_FULL;
-	PDEBUG("device number prior to send: %d\n", dev_nr);
-	switch (send_to_AP(dev_nr, z90crypt.cdx,
-			   caller_p->caller_dev_dep_req_l,
-			   caller_p->caller_dev_dep_req_p)) {
-	case DEV_SEN_EXCEPTION:
-		PRINTKC("Exception during send to device %d\n", dev_nr);
-		z90crypt.terminating = 1;
-		return SEN_FATAL_ERROR;
-	case DEV_GONE:
-		PRINTK("Device %d not available\n", dev_nr);
-		remove_device(device_p);
-		return SEN_NOT_AVAIL;
-	case DEV_EMPTY:
-		return SEN_NOT_AVAIL;
-	case DEV_NO_WORK:
-		return SEN_FATAL_ERROR;
-	case DEV_BAD_MESSAGE:
-		return SEN_USER_ERROR;
-	case DEV_QUEUE_FULL:
-		return SEN_QUEUE_FULL;
-	default:
-	case DEV_ONLINE:
-		break;
-	}
-	list_add_tail(&(caller_p->caller_liste), &(device_p->dev_caller_list));
-	device_p->dev_caller_count++;
-	return 0;
-}
-
-/**
- * Send puts the user's work on one of two queues:
- *   the pending queue if the send was successful
- *   the request queue if the send failed because device full or busy
- */
-static inline int
-z90crypt_send(struct work_element *we_p, const char *buf)
-{
-	int rv;
-
-	PDEBUG("PID %d\n", PID());
-
-	if (CHK_RDWRMASK(we_p->status[0]) != STAT_NOWORK) {
-		PDEBUG("PID %d tried to send more work but has outstanding "
-		       "work.\n", PID());
-		return -EWORKPEND;
-	}
-	we_p->devindex = -1; // Reset device number
-	spin_lock_irq(&queuespinlock);
-	rv = send_to_crypto_device(we_p);
-	switch (rv) {
-	case 0:
-		we_p->requestsent = jiffies;
-		we_p->audit[0] |= FP_SENT;
-		list_add_tail(&we_p->liste, &pending_list);
-		++pendingq_count;
-		we_p->audit[0] |= FP_PENDING;
-		break;
-	case SEN_BUSY:
-	case SEN_QUEUE_FULL:
-		rv = 0;
-		we_p->devindex = -1; // any device will do
-		we_p->requestsent = jiffies;
-		list_add_tail(&we_p->liste, &request_list);
-		++requestq_count;
-		we_p->audit[0] |= FP_REQUEST;
-		break;
-	case SEN_RETRY:
-		rv = -ERESTARTSYS;
-		break;
-	case SEN_NOT_AVAIL:
-		PRINTK("*** No devices available.\n");
-		rv = we_p->retcode = -ENODEV;
-		we_p->status[0] |= STAT_FAILED;
-		break;
-	case REC_OPERAND_INV:
-	case REC_OPERAND_SIZE:
-	case REC_EVEN_MOD:
-	case REC_INVALID_PAD:
-		rv = we_p->retcode = -EINVAL;
-		we_p->status[0] |= STAT_FAILED;
-		break;
-	default:
-		we_p->retcode = rv;
-		we_p->status[0] |= STAT_FAILED;
-		break;
-	}
-	if (rv != -ERESTARTSYS)
-		SET_RDWRMASK(we_p->status[0], STAT_WRITTEN);
-	spin_unlock_irq(&queuespinlock);
-	if (rv == 0)
-		tasklet_schedule(&reader_tasklet);
-	return rv;
-}
-
-/**
- * process_results copies the user's work from kernel space.
- */
-static inline int
-z90crypt_process_results(struct work_element *we_p, char __user *buf)
-{
-	int rv;
-
-	PDEBUG("we_p %p (PID %d)\n", we_p, PID());
-
-	LONG2DEVPTR(we_p->devindex)->dev_total_req_cnt++;
-	SET_RDWRMASK(we_p->status[0], STAT_READPEND);
-
-	rv = 0;
-	if (!we_p->buffer) {
-		PRINTK("we_p %p PID %d in STAT_READPEND: buffer NULL.\n",
-			we_p, PID());
-		rv = -ENOBUFF;
-	}
-
-	if (!rv)
-		if ((rv = copy_to_user(buf, we_p->buffer, we_p->buff_size))) {
-			PDEBUG("copy_to_user failed: rv = %d\n", rv);
-			rv = -EFAULT;
-		}
-
-	if (!rv)
-		rv = we_p->retcode;
-	if (!rv)
-		if (we_p->resp_buff_size
-		    &&	copy_to_user(we_p->resp_addr, we_p->resp_buff,
-				     we_p->resp_buff_size))
-			rv = -EFAULT;
-
-	SET_RDWRMASK(we_p->status[0], STAT_NOWORK);
-	return rv;
-}
-
-static unsigned char NULL_psmid[8] =
-{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
-
-/**
- * Used in device configuration functions
- */
-#define MAX_RESET 90
-
-/**
- * This is used only for PCICC support
- */
-static inline int
-is_PKCS11_padded(unsigned char *buffer, int length)
-{
-	int i;
-	if ((buffer[0] != 0x00) || (buffer[1] != 0x01))
-		return 0;
-	for (i = 2; i < length; i++)
-		if (buffer[i] != 0xFF)
-			break;
-	if ((i < 10) || (i == length))
-		return 0;
-	if (buffer[i] != 0x00)
-		return 0;
-	return 1;
-}
-
-/**
- * This is used only for PCICC support
- */
-static inline int
-is_PKCS12_padded(unsigned char *buffer, int length)
-{
-	int i;
-	if ((buffer[0] != 0x00) || (buffer[1] != 0x02))
-		return 0;
-	for (i = 2; i < length; i++)
-		if (buffer[i] == 0x00)
-			break;
-	if ((i < 10) || (i == length))
-		return 0;
-	if (buffer[i] != 0x00)
-		return 0;
-	return 1;
-}
-
-/**
- * builds struct caller and converts message from generic format to
- * device-dependent format
- * func is ICARSAMODEXPO or ICARSACRT
- * function is PCI_FUNC_KEY_ENCRYPT or PCI_FUNC_KEY_DECRYPT
- */
-static inline int
-build_caller(struct work_element *we_p, short function)
-{
-	int rv;
-	struct caller *caller_p = (struct caller *)we_p->requestptr;
-
-	if ((we_p->devtype != PCICC) && (we_p->devtype != PCICA) &&
-	    (we_p->devtype != PCIXCC_MCL2) && (we_p->devtype != PCIXCC_MCL3) &&
-	    (we_p->devtype != CEX2C) && (we_p->devtype != CEX2A))
-		return SEN_NOT_AVAIL;
-
-	memcpy(caller_p->caller_id, we_p->caller_id,
-	       sizeof(caller_p->caller_id));
-	caller_p->caller_dev_dep_req_p = caller_p->caller_dev_dep_req;
-	caller_p->caller_dev_dep_req_l = MAX_RESPONSE_SIZE;
-	caller_p->caller_buf_p = we_p->buffer;
-	INIT_LIST_HEAD(&(caller_p->caller_liste));
-
-	rv = convert_request(we_p->buffer, we_p->funccode, function,
-			     z90crypt.cdx, we_p->devtype,
-			     &caller_p->caller_dev_dep_req_l,
-			     caller_p->caller_dev_dep_req_p);
-	if (rv) {
-		if (rv == SEN_NOT_AVAIL)
-			PDEBUG("request can't be processed on hdwr avail\n");
-		else
-			PRINTK("Error from convert_request: %d\n", rv);
-	}
-	else
-		memcpy(&(caller_p->caller_dev_dep_req_p[4]), we_p->caller_id,8);
-	return rv;
-}
-
-static inline void
-unbuild_caller(struct device *device_p, struct caller *caller_p)
-{
-	if (!caller_p)
-		return;
-	if (caller_p->caller_liste.next && caller_p->caller_liste.prev)
-		if (!list_empty(&caller_p->caller_liste)) {
-			list_del_init(&caller_p->caller_liste);
-			device_p->dev_caller_count--;
-		}
-	memset(caller_p->caller_id, 0, sizeof(caller_p->caller_id));
-}
-
-static inline int
-get_crypto_request_buffer(struct work_element *we_p)
-{
-	struct ica_rsa_modexpo *mex_p;
-	struct ica_rsa_modexpo_crt *crt_p;
-	unsigned char *temp_buffer;
-	short function;
-	int rv;
-
-	mex_p =	(struct ica_rsa_modexpo *) we_p->buffer;
-	crt_p = (struct ica_rsa_modexpo_crt *) we_p->buffer;
-
-	PDEBUG("device type input = %d\n", we_p->devtype);
-
-	if (z90crypt.terminating)
-		return REC_NO_RESPONSE;
-	if (memcmp(we_p->caller_id, NULL_psmid, 8) == 0) {
-		PRINTK("psmid zeroes\n");
-		return SEN_FATAL_ERROR;
-	}
-	if (!we_p->buffer) {
-		PRINTK("buffer pointer NULL\n");
-		return SEN_USER_ERROR;
-	}
-	if (!we_p->requestptr) {
-		PRINTK("caller pointer NULL\n");
-		return SEN_USER_ERROR;
-	}
-
-	if ((we_p->devtype != PCICA) && (we_p->devtype != PCICC) &&
-	    (we_p->devtype != PCIXCC_MCL2) && (we_p->devtype != PCIXCC_MCL3) &&
-	    (we_p->devtype != CEX2C) && (we_p->devtype != CEX2A) &&
-	    (we_p->devtype != ANYDEV)) {
-		PRINTK("invalid device type\n");
-		return SEN_USER_ERROR;
-	}
-
-	if ((mex_p->inputdatalength < 1) ||
-	    (mex_p->inputdatalength > MAX_MOD_SIZE)) {
-		PRINTK("inputdatalength[%d] is not valid\n",
-		       mex_p->inputdatalength);
-		return SEN_USER_ERROR;
-	}
-
-	if (mex_p->outputdatalength < mex_p->inputdatalength) {
-		PRINTK("outputdatalength[%d] < inputdatalength[%d]\n",
-		       mex_p->outputdatalength, mex_p->inputdatalength);
-		return SEN_USER_ERROR;
-	}
-
-	if (!mex_p->inputdata || !mex_p->outputdata) {
-		PRINTK("inputdata[%p] or outputdata[%p] is NULL\n",
-		       mex_p->outputdata, mex_p->inputdata);
-		return SEN_USER_ERROR;
-	}
-
-	/**
-	 * As long as outputdatalength is big enough, we can set the
-	 * outputdatalength equal to the inputdatalength, since that is the
-	 * number of bytes we will copy in any case
-	 */
-	mex_p->outputdatalength = mex_p->inputdatalength;
-
-	rv = 0;
-	switch (we_p->funccode) {
-	case ICARSAMODEXPO:
-		if (!mex_p->b_key || !mex_p->n_modulus)
-			rv = SEN_USER_ERROR;
-		break;
-	case ICARSACRT:
-		if (!IS_EVEN(crt_p->inputdatalength)) {
-			PRINTK("inputdatalength[%d] is odd, CRT form\n",
-			       crt_p->inputdatalength);
-			rv = SEN_USER_ERROR;
-			break;
-		}
-		if (!crt_p->bp_key ||
-		    !crt_p->bq_key ||
-		    !crt_p->np_prime ||
-		    !crt_p->nq_prime ||
-		    !crt_p->u_mult_inv) {
-			PRINTK("CRT form, bad data: %p/%p/%p/%p/%p\n",
-			       crt_p->bp_key, crt_p->bq_key,
-			       crt_p->np_prime, crt_p->nq_prime,
-			       crt_p->u_mult_inv);
-			rv = SEN_USER_ERROR;
-		}
-		break;
-	default:
-		PRINTK("bad func = %d\n", we_p->funccode);
-		rv = SEN_USER_ERROR;
-		break;
-	}
-	if (rv != 0)
-		return rv;
-
-	if (select_device_type(&we_p->devtype, mex_p->inputdatalength) < 0)
-		return SEN_NOT_AVAIL;
-
-	temp_buffer = (unsigned char *)we_p + sizeof(struct work_element) +
-		      sizeof(struct caller);
-	if (copy_from_user(temp_buffer, mex_p->inputdata,
-			   mex_p->inputdatalength) != 0)
-		return SEN_RELEASED;
-
-	function = PCI_FUNC_KEY_ENCRYPT;
-	switch (we_p->devtype) {
-	/* PCICA and CEX2A do everything with a simple RSA mod-expo operation */
-	case PCICA:
-	case CEX2A:
-		function = PCI_FUNC_KEY_ENCRYPT;
-		break;
-	/**
-	 * PCIXCC_MCL2 does all Mod-Expo form with a simple RSA mod-expo
-	 * operation, and all CRT forms with a PKCS-1.2 format decrypt.
-	 * PCIXCC_MCL3 and CEX2C do all Mod-Expo and CRT forms with a simple RSA
-	 * mod-expo operation
-	 */
-	case PCIXCC_MCL2:
-		if (we_p->funccode == ICARSAMODEXPO)
-			function = PCI_FUNC_KEY_ENCRYPT;
-		else
-			function = PCI_FUNC_KEY_DECRYPT;
-		break;
-	case PCIXCC_MCL3:
-	case CEX2C:
-		if (we_p->funccode == ICARSAMODEXPO)
-			function = PCI_FUNC_KEY_ENCRYPT;
-		else
-			function = PCI_FUNC_KEY_DECRYPT;
-		break;
-	/**
-	 * PCICC does everything as a PKCS-1.2 format request
-	 */
-	case PCICC:
-		/* PCICC cannot handle input that is is PKCS#1.1 padded */
-		if (is_PKCS11_padded(temp_buffer, mex_p->inputdatalength)) {
-			return SEN_NOT_AVAIL;
-		}
-		if (we_p->funccode == ICARSAMODEXPO) {
-			if (is_PKCS12_padded(temp_buffer,
-					     mex_p->inputdatalength))
-				function = PCI_FUNC_KEY_ENCRYPT;
-			else
-				function = PCI_FUNC_KEY_DECRYPT;
-		} else
-			/* all CRT forms are decrypts */
-			function = PCI_FUNC_KEY_DECRYPT;
-		break;
-	}
-	PDEBUG("function: %04x\n", function);
-	rv = build_caller(we_p, function);
-	PDEBUG("rv from build_caller = %d\n", rv);
-	return rv;
-}
-
-static inline int
-z90crypt_prepare(struct work_element *we_p, unsigned int funccode,
-		 const char __user *buffer)
-{
-	int rv;
-
-	we_p->devindex = -1;
-	if (funccode == ICARSAMODEXPO)
-		we_p->buff_size = sizeof(struct ica_rsa_modexpo);
-	else
-		we_p->buff_size = sizeof(struct ica_rsa_modexpo_crt);
-
-	if (copy_from_user(we_p->buffer, buffer, we_p->buff_size))
-		return -EFAULT;
-
-	we_p->audit[0] |= FP_COPYFROM;
-	SET_RDWRMASK(we_p->status[0], STAT_WRITTEN);
-	we_p->funccode = funccode;
-	we_p->devtype = -1;
-	we_p->audit[0] |= FP_BUFFREQ;
-	rv = get_crypto_request_buffer(we_p);
-	switch (rv) {
-	case 0:
-		we_p->audit[0] |= FP_BUFFGOT;
-		break;
-	case SEN_USER_ERROR:
-		rv = -EINVAL;
-		break;
-	case SEN_QUEUE_FULL:
-		rv = 0;
-		break;
-	case SEN_RELEASED:
-		rv = -EFAULT;
-		break;
-	case REC_NO_RESPONSE:
-		rv = -ENODEV;
-		break;
-	case SEN_NOT_AVAIL:
-	case EGETBUFF:
-		rv = -EGETBUFF;
-		break;
-	default:
-		PRINTK("rv = %d\n", rv);
-		rv = -EGETBUFF;
-		break;
-	}
-	if (CHK_RDWRMASK(we_p->status[0]) == STAT_WRITTEN)
-		SET_RDWRMASK(we_p->status[0], STAT_DEFAULT);
-	return rv;
-}
-
-static inline void
-purge_work_element(struct work_element *we_p)
-{
-	struct list_head *lptr;
-
-	spin_lock_irq(&queuespinlock);
-	list_for_each(lptr, &request_list) {
-		if (lptr == &we_p->liste) {
-			list_del_init(lptr);
-			requestq_count--;
-			break;
-		}
-	}
-	list_for_each(lptr, &pending_list) {
-		if (lptr == &we_p->liste) {
-			list_del_init(lptr);
-			pendingq_count--;
-			break;
-		}
-	}
-	spin_unlock_irq(&queuespinlock);
-}
-
-/**
- * Build the request and send it.
- */
-static inline int
-z90crypt_rsa(struct priv_data *private_data_p, pid_t pid,
-	     unsigned int cmd, unsigned long arg)
-{
-	struct work_element *we_p;
-	int rv;
-
-	if ((rv = allocate_work_element(&we_p, private_data_p, pid))) {
-		PDEBUG("PID %d: allocate_work_element returned ENOMEM\n", pid);
-		return rv;
-	}
-	if ((rv = z90crypt_prepare(we_p, cmd, (const char __user *)arg)))
-		PDEBUG("PID %d: rv = %d from z90crypt_prepare\n", pid, rv);
-	if (!rv)
-		if ((rv = z90crypt_send(we_p, (const char *)arg)))
-			PDEBUG("PID %d: rv %d from z90crypt_send.\n", pid, rv);
-	if (!rv) {
-		we_p->audit[0] |= FP_ASLEEP;
-		wait_event(we_p->waitq, atomic_read(&we_p->alarmrung));
-		we_p->audit[0] |= FP_AWAKE;
-		rv = we_p->retcode;
-	}
-	if (!rv)
-		rv = z90crypt_process_results(we_p, (char __user *)arg);
-
-	if ((we_p->status[0] & STAT_FAILED)) {
-		switch (rv) {
-		/**
-		 * EINVAL *after* receive is almost always a padding error or
-		 * length error issued by a coprocessor (not an accelerator).
-		 * We convert this return value to -EGETBUFF which should
-		 * trigger a fallback to software.
-		 */
-		case -EINVAL:
-			if ((we_p->devtype != PCICA) &&
-			    (we_p->devtype != CEX2A))
-				rv = -EGETBUFF;
-			break;
-		case -ETIMEOUT:
-			if (z90crypt.mask.st_count > 0)
-				rv = -ERESTARTSYS; // retry with another
-			else
-				rv = -ENODEV; // no cards left
-		/* fall through to clean up request queue */
-		case -ERESTARTSYS:
-		case -ERELEASED:
-			switch (CHK_RDWRMASK(we_p->status[0])) {
-			case STAT_WRITTEN:
-				purge_work_element(we_p);
-				break;
-			case STAT_READPEND:
-			case STAT_NOWORK:
-			default:
-				break;
-			}
-			break;
-		default:
-			we_p->status[0] ^= STAT_FAILED;
-			break;
-		}
-	}
-	free_page((long)we_p);
-	return rv;
-}
-
-/**
- * This function is a little long, but it's really just one large switch
- * statement.
- */
-static long
-z90crypt_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	struct priv_data *private_data_p = filp->private_data;
-	unsigned char *status;
-	unsigned char *qdepth;
-	unsigned int *reqcnt;
-	struct ica_z90_status *pstat;
-	int ret, i, loopLim, tempstat;
-	static int deprecated_msg_count1 = 0;
-	static int deprecated_msg_count2 = 0;
-
-	PDEBUG("filp %p (PID %d), cmd 0x%08X\n", filp, PID(), cmd);
-	PDEBUG("cmd 0x%08X: dir %s, size 0x%04X, type 0x%02X, nr 0x%02X\n",
-		cmd,
-		!_IOC_DIR(cmd) ? "NO"
-		: ((_IOC_DIR(cmd) == (_IOC_READ|_IOC_WRITE)) ? "RW"
-		: ((_IOC_DIR(cmd) == _IOC_READ) ? "RD"
-		: "WR")),
-		_IOC_SIZE(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd));
-
-	if (_IOC_TYPE(cmd) != Z90_IOCTL_MAGIC) {
-		PRINTK("cmd 0x%08X contains bad magic\n", cmd);
-		return -ENOTTY;
-	}
-
-	ret = 0;
-	switch (cmd) {
-	case ICARSAMODEXPO:
-	case ICARSACRT:
-		if (quiesce_z90crypt) {
-			ret = -EQUIESCE;
-			break;
-		}
-		ret = -ENODEV; // Default if no devices
-		loopLim = z90crypt.hdware_info->hdware_mask.st_count -
-			(z90crypt.hdware_info->hdware_mask.disabled_count +
-			 z90crypt.hdware_info->hdware_mask.user_disabled_count);
-		for (i = 0; i < loopLim; i++) {
-			ret = z90crypt_rsa(private_data_p, PID(), cmd, arg);
-			if (ret != -ERESTARTSYS)
-				break;
-		}
-		if (ret == -ERESTARTSYS)
-			ret = -ENODEV;
-		break;
-
-	case Z90STAT_TOTALCOUNT:
-		tempstat = get_status_totalcount();
-		if (copy_to_user((int __user *)arg, &tempstat,sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_PCICACOUNT:
-		tempstat = get_status_PCICAcount();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_PCICCCOUNT:
-		tempstat = get_status_PCICCcount();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_PCIXCCMCL2COUNT:
-		tempstat = get_status_PCIXCCMCL2count();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_PCIXCCMCL3COUNT:
-		tempstat = get_status_PCIXCCMCL3count();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_CEX2CCOUNT:
-		tempstat = get_status_CEX2Ccount();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_CEX2ACOUNT:
-		tempstat = get_status_CEX2Acount();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_REQUESTQ_COUNT:
-		tempstat = get_status_requestq_count();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_PENDINGQ_COUNT:
-		tempstat = get_status_pendingq_count();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_TOTALOPEN_COUNT:
-		tempstat = get_status_totalopen_count();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_DOMAIN_INDEX:
-		tempstat = get_status_domain_index();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_STATUS_MASK:
-		status = kmalloc(Z90CRYPT_NUM_APS, GFP_KERNEL);
-		if (!status) {
-			PRINTK("kmalloc for status failed!\n");
-			ret = -ENOMEM;
-			break;
-		}
-		get_status_status_mask(status);
-		if (copy_to_user((char __user *) arg, status, Z90CRYPT_NUM_APS)
-									!= 0)
-			ret = -EFAULT;
-		kfree(status);
-		break;
-
-	case Z90STAT_QDEPTH_MASK:
-		qdepth = kmalloc(Z90CRYPT_NUM_APS, GFP_KERNEL);
-		if (!qdepth) {
-			PRINTK("kmalloc for qdepth failed!\n");
-			ret = -ENOMEM;
-			break;
-		}
-		get_status_qdepth_mask(qdepth);
-		if (copy_to_user((char __user *) arg, qdepth, Z90CRYPT_NUM_APS) != 0)
-			ret = -EFAULT;
-		kfree(qdepth);
-		break;
-
-	case Z90STAT_PERDEV_REQCNT:
-		reqcnt = kmalloc(sizeof(int) * Z90CRYPT_NUM_APS, GFP_KERNEL);
-		if (!reqcnt) {
-			PRINTK("kmalloc for reqcnt failed!\n");
-			ret = -ENOMEM;
-			break;
-		}
-		get_status_perdevice_reqcnt(reqcnt);
-		if (copy_to_user((char __user *) arg, reqcnt,
-				 Z90CRYPT_NUM_APS * sizeof(int)) != 0)
-			ret = -EFAULT;
-		kfree(reqcnt);
-		break;
-
-		/* THIS IS DEPRECATED.	USE THE NEW STATUS CALLS */
-	case ICAZ90STATUS:
-		if (deprecated_msg_count1 < 20) {
-			PRINTK("deprecated call to ioctl (ICAZ90STATUS)!\n");
-			deprecated_msg_count1++;
-			if (deprecated_msg_count1 == 20)
-				PRINTK("No longer issuing messages related to "
-				       "deprecated call to ICAZ90STATUS.\n");
-		}
-
-		pstat = kmalloc(sizeof(struct ica_z90_status), GFP_KERNEL);
-		if (!pstat) {
-			PRINTK("kmalloc for pstat failed!\n");
-			ret = -ENOMEM;
-			break;
-		}
-
-		pstat->totalcount	 = get_status_totalcount();
-		pstat->leedslitecount	 = get_status_PCICAcount();
-		pstat->leeds2count	 = get_status_PCICCcount();
-		pstat->requestqWaitCount = get_status_requestq_count();
-		pstat->pendingqWaitCount = get_status_pendingq_count();
-		pstat->totalOpenCount	 = get_status_totalopen_count();
-		pstat->cryptoDomain	 = get_status_domain_index();
-		get_status_status_mask(pstat->status);
-		get_status_qdepth_mask(pstat->qdepth);
-
-		if (copy_to_user((struct ica_z90_status __user *) arg, pstat,
-				 sizeof(struct ica_z90_status)) != 0)
-			ret = -EFAULT;
-		kfree(pstat);
-		break;
-
-		/* THIS IS DEPRECATED.	USE THE NEW STATUS CALLS */
-	case Z90STAT_PCIXCCCOUNT:
-		if (deprecated_msg_count2 < 20) {
-			PRINTK("deprecated ioctl (Z90STAT_PCIXCCCOUNT)!\n");
-			deprecated_msg_count2++;
-			if (deprecated_msg_count2 == 20)
-				PRINTK("No longer issuing messages about depre"
-				       "cated ioctl Z90STAT_PCIXCCCOUNT.\n");
-		}
-
-		tempstat = get_status_PCIXCCcount();
-		if (copy_to_user((int *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90QUIESCE:
-		if (current->euid != 0) {
-			PRINTK("QUIESCE fails: euid %d\n",
-			       current->euid);
-			ret = -EACCES;
-		} else {
-			PRINTK("QUIESCE device from PID %d\n", PID());
-			quiesce_z90crypt = 1;
-		}
-		break;
-
-	default:
-		/* user passed an invalid IOCTL number */
-		PDEBUG("cmd 0x%08X contains invalid ioctl code\n", cmd);
-		ret = -ENOTTY;
-		break;
-	}
-
-	return ret;
-}
-
-static inline int
-sprintcl(unsigned char *outaddr, unsigned char *addr, unsigned int len)
-{
-	int hl, i;
-
-	hl = 0;
-	for (i = 0; i < len; i++)
-		hl += sprintf(outaddr+hl, "%01x", (unsigned int) addr[i]);
-	hl += sprintf(outaddr+hl, " ");
-
-	return hl;
-}
-
-static inline int
-sprintrw(unsigned char *outaddr, unsigned char *addr, unsigned int len)
-{
-	int hl, inl, c, cx;
-
-	hl = sprintf(outaddr, "	   ");
-	inl = 0;
-	for (c = 0; c < (len / 16); c++) {
-		hl += sprintcl(outaddr+hl, addr+inl, 16);
-		inl += 16;
-	}
-
-	cx = len%16;
-	if (cx) {
-		hl += sprintcl(outaddr+hl, addr+inl, cx);
-		inl += cx;
-	}
-
-	hl += sprintf(outaddr+hl, "\n");
-
-	return hl;
-}
-
-static inline int
-sprinthx(unsigned char *title, unsigned char *outaddr,
-	 unsigned char *addr, unsigned int len)
-{
-	int hl, inl, r, rx;
-
-	hl = sprintf(outaddr, "\n%s\n", title);
-	inl = 0;
-	for (r = 0; r < (len / 64); r++) {
-		hl += sprintrw(outaddr+hl, addr+inl, 64);
-		inl += 64;
-	}
-	rx = len % 64;
-	if (rx) {
-		hl += sprintrw(outaddr+hl, addr+inl, rx);
-		inl += rx;
-	}
-
-	hl += sprintf(outaddr+hl, "\n");
-
-	return hl;
-}
-
-static inline int
-sprinthx4(unsigned char *title, unsigned char *outaddr,
-	  unsigned int *array, unsigned int len)
-{
-	int hl, r;
-
-	hl = sprintf(outaddr, "\n%s\n", title);
-
-	for (r = 0; r < len; r++) {
-		if ((r % 8) == 0)
-			hl += sprintf(outaddr+hl, "    ");
-		hl += sprintf(outaddr+hl, "%08X ", array[r]);
-		if ((r % 8) == 7)
-			hl += sprintf(outaddr+hl, "\n");
-	}
-
-	hl += sprintf(outaddr+hl, "\n");
-
-	return hl;
-}
-
-static int
-z90crypt_status(char *resp_buff, char **start, off_t offset,
-		int count, int *eof, void *data)
-{
-	unsigned char *workarea;
-	int len;
-
-	/* resp_buff is a page. Use the right half for a work area */
-	workarea = resp_buff+2000;
-	len = 0;
-	len += sprintf(resp_buff+len, "\nz90crypt version: %d.%d.%d\n",
-		z90crypt_VERSION, z90crypt_RELEASE, z90crypt_VARIANT);
-	len += sprintf(resp_buff+len, "Cryptographic domain: %d\n",
-		get_status_domain_index());
-	len += sprintf(resp_buff+len, "Total device count: %d\n",
-		get_status_totalcount());
-	len += sprintf(resp_buff+len, "PCICA count: %d\n",
-		get_status_PCICAcount());
-	len += sprintf(resp_buff+len, "PCICC count: %d\n",
-		get_status_PCICCcount());
-	len += sprintf(resp_buff+len, "PCIXCC MCL2 count: %d\n",
-		get_status_PCIXCCMCL2count());
-	len += sprintf(resp_buff+len, "PCIXCC MCL3 count: %d\n",
-		get_status_PCIXCCMCL3count());
-	len += sprintf(resp_buff+len, "CEX2C count: %d\n",
-		get_status_CEX2Ccount());
-	len += sprintf(resp_buff+len, "CEX2A count: %d\n",
-		get_status_CEX2Acount());
-	len += sprintf(resp_buff+len, "requestq count: %d\n",
-		get_status_requestq_count());
-	len += sprintf(resp_buff+len, "pendingq count: %d\n",
-		get_status_pendingq_count());
-	len += sprintf(resp_buff+len, "Total open handles: %d\n\n",
-		get_status_totalopen_count());
-	len += sprinthx(
-		"Online devices: 1=PCICA 2=PCICC 3=PCIXCC(MCL2) "
-		"4=PCIXCC(MCL3) 5=CEX2C 6=CEX2A",
-		resp_buff+len,
-		get_status_status_mask(workarea),
-		Z90CRYPT_NUM_APS);
-	len += sprinthx("Waiting work element counts",
-		resp_buff+len,
-		get_status_qdepth_mask(workarea),
-		Z90CRYPT_NUM_APS);
-	len += sprinthx4(
-		"Per-device successfully completed request counts",
-		resp_buff+len,
-		get_status_perdevice_reqcnt((unsigned int *)workarea),
-		Z90CRYPT_NUM_APS);
-	*eof = 1;
-	memset(workarea, 0, Z90CRYPT_NUM_APS * sizeof(unsigned int));
-	return len;
-}
-
-static inline void
-disable_card(int card_index)
-{
-	struct device *devp;
-
-	devp = LONG2DEVPTR(card_index);
-	if (!devp || devp->user_disabled)
-		return;
-	devp->user_disabled = 1;
-	z90crypt.hdware_info->hdware_mask.user_disabled_count++;
-	if (devp->dev_type == -1)
-		return;
-	z90crypt.hdware_info->type_mask[devp->dev_type].user_disabled_count++;
-}
-
-static inline void
-enable_card(int card_index)
-{
-	struct device *devp;
-
-	devp = LONG2DEVPTR(card_index);
-	if (!devp || !devp->user_disabled)
-		return;
-	devp->user_disabled = 0;
-	z90crypt.hdware_info->hdware_mask.user_disabled_count--;
-	if (devp->dev_type == -1)
-		return;
-	z90crypt.hdware_info->type_mask[devp->dev_type].user_disabled_count--;
-}
-
-static int
-z90crypt_status_write(struct file *file, const char __user *buffer,
-		      unsigned long count, void *data)
-{
-	int j, eol;
-	unsigned char *lbuf, *ptr;
-	unsigned int local_count;
-
-#define LBUFSIZE 1200
-	lbuf = kmalloc(LBUFSIZE, GFP_KERNEL);
-	if (!lbuf) {
-		PRINTK("kmalloc failed!\n");
-		return 0;
-	}
-
-	if (count <= 0)
-		return 0;
-
-	local_count = UMIN((unsigned int)count, LBUFSIZE-1);
-
-	if (copy_from_user(lbuf, buffer, local_count) != 0) {
-		kfree(lbuf);
-		return -EFAULT;
-	}
-
-	lbuf[local_count] = '\0';
-
-	ptr = strstr(lbuf, "Online devices");
-	if (ptr == 0) {
-		PRINTK("Unable to parse data (missing \"Online devices\")\n");
-		kfree(lbuf);
-		return count;
-	}
-
-	ptr = strstr(ptr, "\n");
-	if (ptr == 0) {
-		PRINTK("Unable to parse data (missing newline after \"Online devices\")\n");
-		kfree(lbuf);
-		return count;
-	}
-	ptr++;
-
-	if (strstr(ptr, "Waiting work element counts") == NULL) {
-		PRINTK("Unable to parse data (missing \"Waiting work element counts\")\n");
-		kfree(lbuf);
-		return count;
-	}
-
-	j = 0;
-	eol = 0;
-	while ((j < 64) && (*ptr != '\0')) {
-		switch (*ptr) {
-		case '\t':
-		case ' ':
-			break;
-		case '\n':
-		default:
-			eol = 1;
-			break;
-		case '0':	// no device
-		case '1':	// PCICA
-		case '2':	// PCICC
-		case '3':	// PCIXCC_MCL2
-		case '4':	// PCIXCC_MCL3
-		case '5':	// CEX2C
-		case '6':       // CEX2A
-			j++;
-			break;
-		case 'd':
-		case 'D':
-			disable_card(j);
-			j++;
-			break;
-		case 'e':
-		case 'E':
-			enable_card(j);
-			j++;
-			break;
-		}
-		if (eol)
-			break;
-		ptr++;
-	}
-
-	kfree(lbuf);
-	return count;
-}
-
-/**
- * Functions that run under a timer, with no process id
- *
- * The task functions:
- *     z90crypt_reader_task
- *	 helper_send_work
- *	 helper_handle_work_element
- *	 helper_receive_rc
- *     z90crypt_config_task
- *     z90crypt_cleanup_task
- *
- * Helper functions:
- *     z90crypt_schedule_reader_timer
- *     z90crypt_schedule_reader_task
- *     z90crypt_schedule_config_task
- *     z90crypt_schedule_cleanup_task
- */
-static inline int
-receive_from_crypto_device(int index, unsigned char *psmid, int *buff_len_p,
-			   unsigned char *buff, unsigned char __user **dest_p_p)
-{
-	int dv, rv;
-	struct device *dev_ptr;
-	struct caller *caller_p;
-	struct ica_rsa_modexpo *icaMsg_p;
-	struct list_head *ptr, *tptr;
-
-	memcpy(psmid, NULL_psmid, sizeof(NULL_psmid));
-
-	if (z90crypt.terminating)
-		return REC_FATAL_ERROR;
-
-	caller_p = 0;
-	dev_ptr = z90crypt.device_p[index];
-	rv = 0;
-	do {
-		if (!dev_ptr || dev_ptr->disabled) {
-			rv = REC_NO_WORK; // a disabled device can't return work
-			break;
-		}
-		if (dev_ptr->dev_self_x != index) {
-			PRINTKC("Corrupt dev ptr\n");
-			z90crypt.terminating = 1;
-			rv = REC_FATAL_ERROR;
-			break;
-		}
-		if (!dev_ptr->dev_resp_l || !dev_ptr->dev_resp_p) {
-			dv = DEV_REC_EXCEPTION;
-			PRINTK("dev_resp_l = %d, dev_resp_p = %p\n",
-			       dev_ptr->dev_resp_l, dev_ptr->dev_resp_p);
-		} else {
-			PDEBUG("Dequeue called for device %d\n", index);
-			dv = receive_from_AP(index, z90crypt.cdx,
-					     dev_ptr->dev_resp_l,
-					     dev_ptr->dev_resp_p, psmid);
-		}
-		switch (dv) {
-		case DEV_REC_EXCEPTION:
-			rv = REC_FATAL_ERROR;
-			z90crypt.terminating = 1;
-			PRINTKC("Exception in receive from device %d\n",
-				index);
-			break;
-		case DEV_ONLINE:
-			rv = 0;
-			break;
-		case DEV_EMPTY:
-			rv = REC_EMPTY;
-			break;
-		case DEV_NO_WORK:
-			rv = REC_NO_WORK;
-			break;
-		case DEV_BAD_MESSAGE:
-		case DEV_GONE:
-		case REC_HARDWAR_ERR:
-		default:
-			rv = REC_NO_RESPONSE;
-			break;
-		}
-		if (rv)
-			break;
-		if (dev_ptr->dev_caller_count <= 0) {
-			rv = REC_USER_GONE;
-			break;
-	        }
-
-		list_for_each_safe(ptr, tptr, &dev_ptr->dev_caller_list) {
-			caller_p = list_entry(ptr, struct caller, caller_liste);
-			if (!memcmp(caller_p->caller_id, psmid,
-				    sizeof(caller_p->caller_id))) {
-				if (!list_empty(&caller_p->caller_liste)) {
-					list_del_init(ptr);
-					dev_ptr->dev_caller_count--;
-					break;
-				}
-			}
-			caller_p = 0;
-		}
-		if (!caller_p) {
-			PRINTKW("Unable to locate PSMID %02X%02X%02X%02X%02X"
-				"%02X%02X%02X in device list\n",
-				psmid[0], psmid[1], psmid[2], psmid[3],
-				psmid[4], psmid[5], psmid[6], psmid[7]);
-			rv = REC_USER_GONE;
-			break;
-		}
-
-		PDEBUG("caller_p after successful receive: %p\n", caller_p);
-		rv = convert_response(dev_ptr->dev_resp_p,
-				      caller_p->caller_buf_p, buff_len_p, buff);
-		switch (rv) {
-		case REC_USE_PCICA:
-			break;
-		case REC_OPERAND_INV:
-		case REC_OPERAND_SIZE:
-		case REC_EVEN_MOD:
-		case REC_INVALID_PAD:
-			PDEBUG("device %d: 'user error' %d\n", index, rv);
-			break;
-		case WRONG_DEVICE_TYPE:
-		case REC_HARDWAR_ERR:
-		case REC_BAD_MESSAGE:
-			PRINTKW("device %d: hardware error %d\n", index, rv);
-			rv = REC_NO_RESPONSE;
-			break;
-		default:
-			PDEBUG("device %d: rv = %d\n", index, rv);
-			break;
-		}
-	} while (0);
-
-	switch (rv) {
-	case 0:
-		PDEBUG("Successful receive from device %d\n", index);
-		icaMsg_p = (struct ica_rsa_modexpo *)caller_p->caller_buf_p;
-		*dest_p_p = icaMsg_p->outputdata;
-		if (*buff_len_p == 0)
-			PRINTK("Zero *buff_len_p\n");
-		break;
-	case REC_NO_RESPONSE:
-		PRINTKW("Removing device %d from availability\n", index);
-		remove_device(dev_ptr);
-		break;
-	}
-
-	if (caller_p)
-		unbuild_caller(dev_ptr, caller_p);
-
-	return rv;
-}
-
-static inline void
-helper_send_work(int index)
-{
-	struct work_element *rq_p;
-	int rv;
-
-	if (list_empty(&request_list))
-		return;
-	requestq_count--;
-	rq_p = list_entry(request_list.next, struct work_element, liste);
-	list_del_init(&rq_p->liste);
-	rq_p->audit[1] |= FP_REMREQUEST;
-	if (rq_p->devtype == SHRT2DEVPTR(index)->dev_type) {
-		rq_p->devindex = SHRT2LONG(index);
-		rv = send_to_crypto_device(rq_p);
-		if (rv == 0) {
-			rq_p->requestsent = jiffies;
-			rq_p->audit[0] |= FP_SENT;
-			list_add_tail(&rq_p->liste, &pending_list);
-			++pendingq_count;
-			rq_p->audit[0] |= FP_PENDING;
-		} else {
-			switch (rv) {
-			case REC_OPERAND_INV:
-			case REC_OPERAND_SIZE:
-			case REC_EVEN_MOD:
-			case REC_INVALID_PAD:
-				rq_p->retcode = -EINVAL;
-				break;
-			case SEN_NOT_AVAIL:
-			case SEN_RETRY:
-			case REC_NO_RESPONSE:
-			default:
-				if (z90crypt.mask.st_count > 1)
-					rq_p->retcode =
-						-ERESTARTSYS;
-				else
-					rq_p->retcode = -ENODEV;
-				break;
-			}
-			rq_p->status[0] |= STAT_FAILED;
-			rq_p->audit[1] |= FP_AWAKENING;
-			atomic_set(&rq_p->alarmrung, 1);
-			wake_up(&rq_p->waitq);
-		}
-	} else {
-		if (z90crypt.mask.st_count > 1)
-			rq_p->retcode = -ERESTARTSYS;
-		else
-			rq_p->retcode = -ENODEV;
-		rq_p->status[0] |= STAT_FAILED;
-		rq_p->audit[1] |= FP_AWAKENING;
-		atomic_set(&rq_p->alarmrung, 1);
-		wake_up(&rq_p->waitq);
-	}
-}
-
-static inline void
-helper_handle_work_element(int index, unsigned char psmid[8], int rc,
-			   int buff_len, unsigned char *buff,
-			   unsigned char __user *resp_addr)
-{
-	struct work_element *pq_p;
-	struct list_head *lptr, *tptr;
-
-	pq_p = 0;
-	list_for_each_safe(lptr, tptr, &pending_list) {
-		pq_p = list_entry(lptr, struct work_element, liste);
-		if (!memcmp(pq_p->caller_id, psmid, sizeof(pq_p->caller_id))) {
-			list_del_init(lptr);
-			pendingq_count--;
-			pq_p->audit[1] |= FP_NOTPENDING;
-			break;
-		}
-		pq_p = 0;
-	}
-
-	if (!pq_p) {
-		PRINTK("device %d has work but no caller exists on pending Q\n",
-		       SHRT2LONG(index));
-		return;
-	}
-
-	switch (rc) {
-		case 0:
-			pq_p->resp_buff_size = buff_len;
-			pq_p->audit[1] |= FP_RESPSIZESET;
-			if (buff_len) {
-				pq_p->resp_addr = resp_addr;
-				pq_p->audit[1] |= FP_RESPADDRCOPIED;
-				memcpy(pq_p->resp_buff, buff, buff_len);
-				pq_p->audit[1] |= FP_RESPBUFFCOPIED;
-			}
-			break;
-		case REC_OPERAND_INV:
-		case REC_OPERAND_SIZE:
-		case REC_EVEN_MOD:
-		case REC_INVALID_PAD:
-			PDEBUG("-EINVAL after application error %d\n", rc);
-			pq_p->retcode = -EINVAL;
-			pq_p->status[0] |= STAT_FAILED;
-			break;
-		case REC_USE_PCICA:
-			pq_p->retcode = -ERESTARTSYS;
-			pq_p->status[0] |= STAT_FAILED;
-			break;
-		case REC_NO_RESPONSE:
-		default:
-			if (z90crypt.mask.st_count > 1)
-				pq_p->retcode = -ERESTARTSYS;
-			else
-				pq_p->retcode = -ENODEV;
-			pq_p->status[0] |= STAT_FAILED;
-			break;
-	}
-	if ((pq_p->status[0] != STAT_FAILED) || (pq_p->retcode != -ERELEASED)) {
-		pq_p->audit[1] |= FP_AWAKENING;
-		atomic_set(&pq_p->alarmrung, 1);
-		wake_up(&pq_p->waitq);
-	}
-}
-
-/**
- * return TRUE if the work element should be removed from the queue
- */
-static inline int
-helper_receive_rc(int index, int *rc_p)
-{
-	switch (*rc_p) {
-	case 0:
-	case REC_OPERAND_INV:
-	case REC_OPERAND_SIZE:
-	case REC_EVEN_MOD:
-	case REC_INVALID_PAD:
-	case REC_USE_PCICA:
-		break;
-
-	case REC_BUSY:
-	case REC_NO_WORK:
-	case REC_EMPTY:
-	case REC_RETRY_DEV:
-	case REC_FATAL_ERROR:
-		return 0;
-
-	case REC_NO_RESPONSE:
-		break;
-
-	default:
-		PRINTK("rc %d, device %d converted to REC_NO_RESPONSE\n",
-		       *rc_p, SHRT2LONG(index));
-		*rc_p = REC_NO_RESPONSE;
-		break;
-	}
-	return 1;
-}
-
-static inline void
-z90crypt_schedule_reader_timer(void)
-{
-	if (timer_pending(&reader_timer))
-		return;
-	if (mod_timer(&reader_timer, jiffies+(READERTIME*HZ/1000)) != 0)
-		PRINTK("Timer pending while modifying reader timer\n");
-}
-
-static void
-z90crypt_reader_task(unsigned long ptr)
-{
-	int workavail, index, rc, buff_len;
-	unsigned char	psmid[8];
-	unsigned char __user *resp_addr;
-	static unsigned char buff[1024];
-
-	/**
-	 * we use workavail = 2 to ensure 2 passes with nothing dequeued before
-	 * exiting the loop. If (pendingq_count+requestq_count) == 0 after the
-	 * loop, there is no work remaining on the queues.
-	 */
-	resp_addr = 0;
-	workavail = 2;
-	buff_len = 0;
-	while (workavail) {
-		workavail--;
-		rc = 0;
-		spin_lock_irq(&queuespinlock);
-		memset(buff, 0x00, sizeof(buff));
-
-		/* Dequeue once from each device in round robin. */
-		for (index = 0; index < z90crypt.mask.st_count; index++) {
-			PDEBUG("About to receive.\n");
-			rc = receive_from_crypto_device(SHRT2LONG(index),
-							psmid,
-							&buff_len,
-							buff,
-							&resp_addr);
-			PDEBUG("Dequeued: rc = %d.\n", rc);
-
-			if (helper_receive_rc(index, &rc)) {
-				if (rc != REC_NO_RESPONSE) {
-					helper_send_work(index);
-					workavail = 2;
-				}
-
-				helper_handle_work_element(index, psmid, rc,
-							   buff_len, buff,
-							   resp_addr);
-			}
-
-			if (rc == REC_FATAL_ERROR)
-				PRINTKW("REC_FATAL_ERROR from device %d!\n",
-					SHRT2LONG(index));
-		}
-		spin_unlock_irq(&queuespinlock);
-	}
-
-	if (pendingq_count + requestq_count)
-		z90crypt_schedule_reader_timer();
-}
-
-static inline void
-z90crypt_schedule_config_task(unsigned int expiration)
-{
-	if (timer_pending(&config_timer))
-		return;
-	if (mod_timer(&config_timer, jiffies+(expiration*HZ)) != 0)
-		PRINTK("Timer pending while modifying config timer\n");
-}
-
-static void
-z90crypt_config_task(unsigned long ptr)
-{
-	int rc;
-
-	PDEBUG("jiffies %ld\n", jiffies);
-
-	if ((rc = refresh_z90crypt(&z90crypt.cdx)))
-		PRINTK("Error %d detected in refresh_z90crypt.\n", rc);
-	/* If return was fatal, don't bother reconfiguring */
-	if ((rc != TSQ_FATAL_ERROR) && (rc != RSQ_FATAL_ERROR))
-		z90crypt_schedule_config_task(CONFIGTIME);
-}
-
-static inline void
-z90crypt_schedule_cleanup_task(void)
-{
-	if (timer_pending(&cleanup_timer))
-		return;
-	if (mod_timer(&cleanup_timer, jiffies+(CLEANUPTIME*HZ)) != 0)
-		PRINTK("Timer pending while modifying cleanup timer\n");
-}
-
-static inline void
-helper_drain_queues(void)
-{
-	struct work_element *pq_p;
-	struct list_head *lptr, *tptr;
-
-	list_for_each_safe(lptr, tptr, &pending_list) {
-		pq_p = list_entry(lptr, struct work_element, liste);
-		pq_p->retcode = -ENODEV;
-		pq_p->status[0] |= STAT_FAILED;
-		unbuild_caller(LONG2DEVPTR(pq_p->devindex),
-			       (struct caller *)pq_p->requestptr);
-		list_del_init(lptr);
-		pendingq_count--;
-		pq_p->audit[1] |= FP_NOTPENDING;
-		pq_p->audit[1] |= FP_AWAKENING;
-		atomic_set(&pq_p->alarmrung, 1);
-		wake_up(&pq_p->waitq);
-	}
-
-	list_for_each_safe(lptr, tptr, &request_list) {
-		pq_p = list_entry(lptr, struct work_element, liste);
-		pq_p->retcode = -ENODEV;
-		pq_p->status[0] |= STAT_FAILED;
-		list_del_init(lptr);
-		requestq_count--;
-		pq_p->audit[1] |= FP_REMREQUEST;
-		pq_p->audit[1] |= FP_AWAKENING;
-		atomic_set(&pq_p->alarmrung, 1);
-		wake_up(&pq_p->waitq);
-	}
-}
-
-static inline void
-helper_timeout_requests(void)
-{
-	struct work_element *pq_p;
-	struct list_head *lptr, *tptr;
-	long timelimit;
-
-	timelimit = jiffies - (CLEANUPTIME * HZ);
-	/* The list is in strict chronological order */
-	list_for_each_safe(lptr, tptr, &pending_list) {
-		pq_p = list_entry(lptr, struct work_element, liste);
-		if (pq_p->requestsent >= timelimit)
-			break;
-		PRINTKW("Purging(PQ) PSMID %02X%02X%02X%02X%02X%02X%02X%02X\n",
-		       ((struct caller *)pq_p->requestptr)->caller_id[0],
-		       ((struct caller *)pq_p->requestptr)->caller_id[1],
-		       ((struct caller *)pq_p->requestptr)->caller_id[2],
-		       ((struct caller *)pq_p->requestptr)->caller_id[3],
-		       ((struct caller *)pq_p->requestptr)->caller_id[4],
-		       ((struct caller *)pq_p->requestptr)->caller_id[5],
-		       ((struct caller *)pq_p->requestptr)->caller_id[6],
-		       ((struct caller *)pq_p->requestptr)->caller_id[7]);
-		pq_p->retcode = -ETIMEOUT;
-		pq_p->status[0] |= STAT_FAILED;
-		/* get this off any caller queue it may be on */
-		unbuild_caller(LONG2DEVPTR(pq_p->devindex),
-			       (struct caller *) pq_p->requestptr);
-		list_del_init(lptr);
-		pendingq_count--;
-		pq_p->audit[1] |= FP_TIMEDOUT;
-		pq_p->audit[1] |= FP_NOTPENDING;
-		pq_p->audit[1] |= FP_AWAKENING;
-		atomic_set(&pq_p->alarmrung, 1);
-		wake_up(&pq_p->waitq);
-	}
-
-	/**
-	 * If pending count is zero, items left on the request queue may
-	 * never be processed.
-	 */
-	if (pendingq_count <= 0) {
-		list_for_each_safe(lptr, tptr, &request_list) {
-			pq_p = list_entry(lptr, struct work_element, liste);
-			if (pq_p->requestsent >= timelimit)
-				break;
-		PRINTKW("Purging(RQ) PSMID %02X%02X%02X%02X%02X%02X%02X%02X\n",
-		       ((struct caller *)pq_p->requestptr)->caller_id[0],
-		       ((struct caller *)pq_p->requestptr)->caller_id[1],
-		       ((struct caller *)pq_p->requestptr)->caller_id[2],
-		       ((struct caller *)pq_p->requestptr)->caller_id[3],
-		       ((struct caller *)pq_p->requestptr)->caller_id[4],
-		       ((struct caller *)pq_p->requestptr)->caller_id[5],
-		       ((struct caller *)pq_p->requestptr)->caller_id[6],
-		       ((struct caller *)pq_p->requestptr)->caller_id[7]);
-			pq_p->retcode = -ETIMEOUT;
-			pq_p->status[0] |= STAT_FAILED;
-			list_del_init(lptr);
-			requestq_count--;
-			pq_p->audit[1] |= FP_TIMEDOUT;
-			pq_p->audit[1] |= FP_REMREQUEST;
-			pq_p->audit[1] |= FP_AWAKENING;
-			atomic_set(&pq_p->alarmrung, 1);
-			wake_up(&pq_p->waitq);
-		}
-	}
-}
-
-static void
-z90crypt_cleanup_task(unsigned long ptr)
-{
-	PDEBUG("jiffies %ld\n", jiffies);
-	spin_lock_irq(&queuespinlock);
-	if (z90crypt.mask.st_count <= 0) // no devices!
-		helper_drain_queues();
-	else
-		helper_timeout_requests();
-	spin_unlock_irq(&queuespinlock);
-	z90crypt_schedule_cleanup_task();
-}
-
-static void
-z90crypt_schedule_reader_task(unsigned long ptr)
-{
-	tasklet_schedule(&reader_tasklet);
-}
-
-/**
- * Lowlevel Functions:
- *
- *   create_z90crypt:  creates and initializes basic data structures
- *   refresh_z90crypt:	re-initializes basic data structures
- *   find_crypto_devices: returns a count and mask of hardware status
- *   create_crypto_device:  builds the descriptor for a device
- *   destroy_crypto_device:  unallocates the descriptor for a device
- *   destroy_z90crypt:	drains all work, unallocates structs
- */
-
-/**
- * build the z90crypt root structure using the given domain index
- */
-static int
-create_z90crypt(int *cdx_p)
-{
-	struct hdware_block *hdware_blk_p;
-
-	memset(&z90crypt, 0x00, sizeof(struct z90crypt));
-	z90crypt.domain_established = 0;
-	z90crypt.len = sizeof(struct z90crypt);
-	z90crypt.max_count = Z90CRYPT_NUM_DEVS;
-	z90crypt.cdx = *cdx_p;
-
-	hdware_blk_p = kzalloc(sizeof(struct hdware_block), GFP_ATOMIC);
-	if (!hdware_blk_p) {
-		PDEBUG("kmalloc for hardware block failed\n");
-		return ENOMEM;
-	}
-	z90crypt.hdware_info = hdware_blk_p;
-
-	return 0;
-}
-
-static inline int
-helper_scan_devices(int cdx_array[16], int *cdx_p, int *correct_cdx_found)
-{
-	enum hdstat hd_stat;
-	int q_depth, dev_type;
-	int indx, chkdom, numdomains;
-
-	q_depth = dev_type = numdomains = 0;
-	for (chkdom = 0; chkdom <= 15; cdx_array[chkdom++] = -1);
-	for (indx = 0; indx < z90crypt.max_count; indx++) {
-		hd_stat = HD_NOT_THERE;
-		numdomains = 0;
-		for (chkdom = 0; chkdom <= 15; chkdom++) {
-			hd_stat = query_online(indx, chkdom, MAX_RESET,
-					       &q_depth, &dev_type);
-			if (hd_stat == HD_TSQ_EXCEPTION) {
-				z90crypt.terminating = 1;
-				PRINTKC("exception taken!\n");
-				break;
-			}
-			if (hd_stat == HD_ONLINE) {
-				cdx_array[numdomains++] = chkdom;
-				if (*cdx_p == chkdom) {
-					*correct_cdx_found  = 1;
-					break;
-				}
-			}
-		}
-		if ((*correct_cdx_found == 1) || (numdomains != 0))
-			break;
-		if (z90crypt.terminating)
-			break;
-	}
-	return numdomains;
-}
-
-static inline int
-probe_crypto_domain(int *cdx_p)
-{
-	int cdx_array[16];
-	char cdx_array_text[53], temp[5];
-	int correct_cdx_found, numdomains;
-
-	correct_cdx_found = 0;
-	numdomains = helper_scan_devices(cdx_array, cdx_p, &correct_cdx_found);
-
-	if (z90crypt.terminating)
-		return TSQ_FATAL_ERROR;
-
-	if (correct_cdx_found)
-		return 0;
-
-	if (numdomains == 0) {
-		PRINTKW("Unable to find crypto domain: No devices found\n");
-		return Z90C_NO_DEVICES;
-	}
-
-	if (numdomains == 1) {
-		if (*cdx_p == -1) {
-			*cdx_p = cdx_array[0];
-			return 0;
-		}
-		PRINTKW("incorrect domain: specified = %d, found = %d\n",
-		       *cdx_p, cdx_array[0]);
-		return Z90C_INCORRECT_DOMAIN;
-	}
-
-	numdomains--;
-	sprintf(cdx_array_text, "%d", cdx_array[numdomains]);
-	while (numdomains) {
-		numdomains--;
-		sprintf(temp, ", %d", cdx_array[numdomains]);
-		strcat(cdx_array_text, temp);
-	}
-
-	PRINTKW("ambiguous domain detected: specified = %d, found array = %s\n",
-		*cdx_p, cdx_array_text);
-	return Z90C_AMBIGUOUS_DOMAIN;
-}
-
-static int
-refresh_z90crypt(int *cdx_p)
-{
-	int i, j, indx, rv;
-	static struct status local_mask;
-	struct device *devPtr;
-	unsigned char oldStat, newStat;
-	int return_unchanged;
-
-	if (z90crypt.len != sizeof(z90crypt))
-		return ENOTINIT;
-	if (z90crypt.terminating)
-		return TSQ_FATAL_ERROR;
-	rv = 0;
-	if (!z90crypt.hdware_info->hdware_mask.st_count &&
-	    !z90crypt.domain_established) {
-		rv = probe_crypto_domain(cdx_p);
-		if (z90crypt.terminating)
-			return TSQ_FATAL_ERROR;
-		if (rv == Z90C_NO_DEVICES)
-			return 0; // try later
-		if (rv)
-			return rv;
-		z90crypt.cdx = *cdx_p;
-		z90crypt.domain_established = 1;
-	}
-	rv = find_crypto_devices(&local_mask);
-	if (rv) {
-		PRINTK("find crypto devices returned %d\n", rv);
-		return rv;
-	}
-	if (!memcmp(&local_mask, &z90crypt.hdware_info->hdware_mask,
-		    sizeof(struct status))) {
-		return_unchanged = 1;
-		for (i = 0; i < Z90CRYPT_NUM_TYPES; i++) {
-			/**
-			 * Check for disabled cards.  If any device is marked
-			 * disabled, destroy it.
-			 */
-			for (j = 0;
-			     j < z90crypt.hdware_info->type_mask[i].st_count;
-			     j++) {
-				indx = z90crypt.hdware_info->type_x_addr[i].
-								device_index[j];
-				devPtr = z90crypt.device_p[indx];
-				if (devPtr && devPtr->disabled) {
-					local_mask.st_mask[indx] = HD_NOT_THERE;
-					return_unchanged = 0;
-				}
-			}
-		}
-		if (return_unchanged == 1)
-			return 0;
-	}
-
-	spin_lock_irq(&queuespinlock);
-	for (i = 0; i < z90crypt.max_count; i++) {
-		oldStat = z90crypt.hdware_info->hdware_mask.st_mask[i];
-		newStat = local_mask.st_mask[i];
-		if ((oldStat == HD_ONLINE) && (newStat != HD_ONLINE))
-			destroy_crypto_device(i);
-		else if ((oldStat != HD_ONLINE) && (newStat == HD_ONLINE)) {
-			rv = create_crypto_device(i);
-			if (rv >= REC_FATAL_ERROR)
-				return rv;
-			if (rv != 0) {
-				local_mask.st_mask[i] = HD_NOT_THERE;
-				local_mask.st_count--;
-			}
-		}
-	}
-	memcpy(z90crypt.hdware_info->hdware_mask.st_mask, local_mask.st_mask,
-	       sizeof(local_mask.st_mask));
-	z90crypt.hdware_info->hdware_mask.st_count = local_mask.st_count;
-	z90crypt.hdware_info->hdware_mask.disabled_count =
-						      local_mask.disabled_count;
-	refresh_index_array(&z90crypt.mask, &z90crypt.overall_device_x);
-	for (i = 0; i < Z90CRYPT_NUM_TYPES; i++)
-		refresh_index_array(&(z90crypt.hdware_info->type_mask[i]),
-				    &(z90crypt.hdware_info->type_x_addr[i]));
-	spin_unlock_irq(&queuespinlock);
-
-	return rv;
-}
-
-static int
-find_crypto_devices(struct status *deviceMask)
-{
-	int i, q_depth, dev_type;
-	enum hdstat hd_stat;
-
-	deviceMask->st_count = 0;
-	deviceMask->disabled_count = 0;
-	deviceMask->user_disabled_count = 0;
-
-	for (i = 0; i < z90crypt.max_count; i++) {
-		hd_stat = query_online(i, z90crypt.cdx, MAX_RESET, &q_depth,
-				       &dev_type);
-		if (hd_stat == HD_TSQ_EXCEPTION) {
-			z90crypt.terminating = 1;
-			PRINTKC("Exception during probe for crypto devices\n");
-			return TSQ_FATAL_ERROR;
-		}
-		deviceMask->st_mask[i] = hd_stat;
-		if (hd_stat == HD_ONLINE) {
-			PDEBUG("Got an online crypto!: %d\n", i);
-			PDEBUG("Got a queue depth of %d\n", q_depth);
-			PDEBUG("Got a device type of %d\n", dev_type);
-			if (q_depth <= 0)
-				return TSQ_FATAL_ERROR;
-			deviceMask->st_count++;
-			z90crypt.q_depth_array[i] = q_depth;
-			z90crypt.dev_type_array[i] = dev_type;
-		}
-	}
-
-	return 0;
-}
-
-static int
-refresh_index_array(struct status *status_str, struct device_x *index_array)
-{
-	int i, count;
-	enum devstat stat;
-
-	i = -1;
-	count = 0;
-	do {
-		stat = status_str->st_mask[++i];
-		if (stat == DEV_ONLINE)
-			index_array->device_index[count++] = i;
-	} while ((i < Z90CRYPT_NUM_DEVS) && (count < status_str->st_count));
-
-	return count;
-}
-
-static int
-create_crypto_device(int index)
-{
-	int rv, devstat, total_size;
-	struct device *dev_ptr;
-	struct status *type_str_p;
-	int deviceType;
-
-	dev_ptr = z90crypt.device_p[index];
-	if (!dev_ptr) {
-		total_size = sizeof(struct device) +
-			     z90crypt.q_depth_array[index] * sizeof(int);
-
-		dev_ptr = kzalloc(total_size, GFP_ATOMIC);
-		if (!dev_ptr) {
-			PRINTK("kmalloc device %d failed\n", index);
-			return ENOMEM;
-		}
-		dev_ptr->dev_resp_p = kmalloc(MAX_RESPONSE_SIZE, GFP_ATOMIC);
-		if (!dev_ptr->dev_resp_p) {
-			kfree(dev_ptr);
-			PRINTK("kmalloc device %d rec buffer failed\n", index);
-			return ENOMEM;
-		}
-		dev_ptr->dev_resp_l = MAX_RESPONSE_SIZE;
-		INIT_LIST_HEAD(&(dev_ptr->dev_caller_list));
-	}
-
-	devstat = reset_device(index, z90crypt.cdx, MAX_RESET);
-	if (devstat == DEV_RSQ_EXCEPTION) {
-		PRINTK("exception during reset device %d\n", index);
-		kfree(dev_ptr->dev_resp_p);
-		kfree(dev_ptr);
-		return RSQ_FATAL_ERROR;
-	}
-	if (devstat == DEV_ONLINE) {
-		dev_ptr->dev_self_x = index;
-		dev_ptr->dev_type = z90crypt.dev_type_array[index];
-		if (dev_ptr->dev_type == NILDEV) {
-			rv = probe_device_type(dev_ptr);
-			if (rv) {
-				PRINTK("rv = %d from probe_device_type %d\n",
-				       rv, index);
-				kfree(dev_ptr->dev_resp_p);
-				kfree(dev_ptr);
-				return rv;
-			}
-		}
-		if (dev_ptr->dev_type == PCIXCC_UNK) {
-			rv = probe_PCIXCC_type(dev_ptr);
-			if (rv) {
-				PRINTK("rv = %d from probe_PCIXCC_type %d\n",
-				       rv, index);
-				kfree(dev_ptr->dev_resp_p);
-				kfree(dev_ptr);
-				return rv;
-			}
-		}
-		deviceType = dev_ptr->dev_type;
-		z90crypt.dev_type_array[index] = deviceType;
-		if (deviceType == PCICA)
-			z90crypt.hdware_info->device_type_array[index] = 1;
-		else if (deviceType == PCICC)
-			z90crypt.hdware_info->device_type_array[index] = 2;
-		else if (deviceType == PCIXCC_MCL2)
-			z90crypt.hdware_info->device_type_array[index] = 3;
-		else if (deviceType == PCIXCC_MCL3)
-			z90crypt.hdware_info->device_type_array[index] = 4;
-		else if (deviceType == CEX2C)
-			z90crypt.hdware_info->device_type_array[index] = 5;
-		else if (deviceType == CEX2A)
-			z90crypt.hdware_info->device_type_array[index] = 6;
-		else // No idea how this would happen.
-			z90crypt.hdware_info->device_type_array[index] = -1;
-	}
-
-	/**
-	 * 'q_depth' returned by the hardware is one less than
-	 * the actual depth
-	 */
-	dev_ptr->dev_q_depth = z90crypt.q_depth_array[index];
-	dev_ptr->dev_type = z90crypt.dev_type_array[index];
-	dev_ptr->dev_stat = devstat;
-	dev_ptr->disabled = 0;
-	z90crypt.device_p[index] = dev_ptr;
-
-	if (devstat == DEV_ONLINE) {
-		if (z90crypt.mask.st_mask[index] != DEV_ONLINE) {
-			z90crypt.mask.st_mask[index] = DEV_ONLINE;
-			z90crypt.mask.st_count++;
-		}
-		deviceType = dev_ptr->dev_type;
-		type_str_p = &z90crypt.hdware_info->type_mask[deviceType];
-		if (type_str_p->st_mask[index] != DEV_ONLINE) {
-			type_str_p->st_mask[index] = DEV_ONLINE;
-			type_str_p->st_count++;
-		}
-	}
-
-	return 0;
-}
-
-static int
-destroy_crypto_device(int index)
-{
-	struct device *dev_ptr;
-	int t, disabledFlag;
-
-	dev_ptr = z90crypt.device_p[index];
-
-	/* remember device type; get rid of device struct */
-	if (dev_ptr) {
-		disabledFlag = dev_ptr->disabled;
-		t = dev_ptr->dev_type;
-		kfree(dev_ptr->dev_resp_p);
-		kfree(dev_ptr);
-	} else {
-		disabledFlag = 0;
-		t = -1;
-	}
-	z90crypt.device_p[index] = 0;
-
-	/* if the type is valid, remove the device from the type_mask */
-	if ((t != -1) && z90crypt.hdware_info->type_mask[t].st_mask[index]) {
-		  z90crypt.hdware_info->type_mask[t].st_mask[index] = 0x00;
-		  z90crypt.hdware_info->type_mask[t].st_count--;
-		  if (disabledFlag == 1)
-			z90crypt.hdware_info->type_mask[t].disabled_count--;
-	}
-	if (z90crypt.mask.st_mask[index] != DEV_GONE) {
-		z90crypt.mask.st_mask[index] = DEV_GONE;
-		z90crypt.mask.st_count--;
-	}
-	z90crypt.hdware_info->device_type_array[index] = 0;
-
-	return 0;
-}
-
-static void
-destroy_z90crypt(void)
-{
-	int i;
-
-	for (i = 0; i < z90crypt.max_count; i++)
-		if (z90crypt.device_p[i])
-			destroy_crypto_device(i);
-	kfree(z90crypt.hdware_info);
-	memset((void *)&z90crypt, 0, sizeof(z90crypt));
-}
-
-static unsigned char static_testmsg[384] = {
-0x00,0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x00,0x06,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x43,0x43,
-0x41,0x2d,0x41,0x50,0x50,0x4c,0x20,0x20,0x20,0x01,0x01,0x01,0x00,0x00,0x00,0x00,
-0x50,0x4b,0x00,0x00,0x00,0x00,0x01,0x1c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x05,0xb8,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x70,0x00,0x41,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x54,0x32,
-0x01,0x00,0xa0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0xb8,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x49,0x43,0x53,0x46,
-0x20,0x20,0x20,0x20,0x50,0x4b,0x0a,0x00,0x50,0x4b,0x43,0x53,0x2d,0x31,0x2e,0x32,
-0x37,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44,
-0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,
-0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44,0x55,0x66,
-0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x5d,0x00,0x5b,0x00,0x77,0x88,0x1e,0x00,0x00,
-0x57,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x4f,0x00,0x00,0x00,0x03,0x02,0x00,0x00,
-0x40,0x01,0x00,0x01,0xce,0x02,0x68,0x2d,0x5f,0xa9,0xde,0x0c,0xf6,0xd2,0x7b,0x58,
-0x4b,0xf9,0x28,0x68,0x3d,0xb4,0xf4,0xef,0x78,0xd5,0xbe,0x66,0x63,0x42,0xef,0xf8,
-0xfd,0xa4,0xf8,0xb0,0x8e,0x29,0xc2,0xc9,0x2e,0xd8,0x45,0xb8,0x53,0x8c,0x6f,0x4e,
-0x72,0x8f,0x6c,0x04,0x9c,0x88,0xfc,0x1e,0xc5,0x83,0x55,0x57,0xf7,0xdd,0xfd,0x4f,
-0x11,0x36,0x95,0x5d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
-};
-
-static int
-probe_device_type(struct device *devPtr)
-{
-	int rv, dv, i, index, length;
-	unsigned char psmid[8];
-	static unsigned char loc_testmsg[sizeof(static_testmsg)];
-
-	index = devPtr->dev_self_x;
-	rv = 0;
-	do {
-		memcpy(loc_testmsg, static_testmsg, sizeof(static_testmsg));
-		length = sizeof(static_testmsg) - 24;
-		/* the -24 allows for the header */
-		dv = send_to_AP(index, z90crypt.cdx, length, loc_testmsg);
-		if (dv) {
-			PDEBUG("dv returned by send during probe: %d\n", dv);
-			if (dv == DEV_SEN_EXCEPTION) {
-				rv = SEN_FATAL_ERROR;
-				PRINTKC("exception in send to AP %d\n", index);
-				break;
-			}
-			PDEBUG("return value from send_to_AP: %d\n", rv);
-			switch (dv) {
-			case DEV_GONE:
-				PDEBUG("dev %d not available\n", index);
-				rv = SEN_NOT_AVAIL;
-				break;
-			case DEV_ONLINE:
-				rv = 0;
-				break;
-			case DEV_EMPTY:
-				rv = SEN_NOT_AVAIL;
-				break;
-			case DEV_NO_WORK:
-				rv = SEN_FATAL_ERROR;
-				break;
-			case DEV_BAD_MESSAGE:
-				rv = SEN_USER_ERROR;
-				break;
-			case DEV_QUEUE_FULL:
-				rv = SEN_QUEUE_FULL;
-				break;
-			default:
-				PRINTK("unknown dv=%d for dev %d\n", dv, index);
-				rv = SEN_NOT_AVAIL;
-				break;
-			}
-		}
-
-		if (rv)
-			break;
-
-		for (i = 0; i < 6; i++) {
-			mdelay(300);
-			dv = receive_from_AP(index, z90crypt.cdx,
-					     devPtr->dev_resp_l,
-					     devPtr->dev_resp_p, psmid);
-			PDEBUG("dv returned by DQ = %d\n", dv);
-			if (dv == DEV_REC_EXCEPTION) {
-				rv = REC_FATAL_ERROR;
-				PRINTKC("exception in dequeue %d\n",
-					index);
-				break;
-			}
-			switch (dv) {
-			case DEV_ONLINE:
-				rv = 0;
-				break;
-			case DEV_EMPTY:
-				rv = REC_EMPTY;
-				break;
-			case DEV_NO_WORK:
-				rv = REC_NO_WORK;
-				break;
-			case DEV_BAD_MESSAGE:
-			case DEV_GONE:
-			default:
-				rv = REC_NO_RESPONSE;
-				break;
-			}
-			if ((rv != 0) && (rv != REC_NO_WORK))
-				break;
-			if (rv == 0)
-				break;
-		}
-		if (rv)
-			break;
-		rv = (devPtr->dev_resp_p[0] == 0x00) &&
-		     (devPtr->dev_resp_p[1] == 0x86);
-		if (rv)
-			devPtr->dev_type = PCICC;
-		else
-			devPtr->dev_type = PCICA;
-		rv = 0;
-	} while (0);
-	/* In a general error case, the card is not marked online */
-	return rv;
-}
-
-static unsigned char MCL3_testmsg[] = {
-0x00,0x00,0x00,0x00,0xEE,0xEE,0xEE,0xEE,0xEE,0xEE,0xEE,0xEE,
-0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x43,0x41,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x00,0x00,0x00,0x01,0xC4,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x24,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xDC,0x02,0x00,0x00,0x00,0x54,0x32,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xE8,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x24,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x0A,0x4D,0x52,0x50,0x20,0x20,0x20,0x20,0x20,
-0x00,0x42,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,
-0x0E,0x0F,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0xAA,0xBB,0xCC,0xDD,
-0xEE,0xFF,0xFF,0xEE,0xDD,0xCC,0xBB,0xAA,0x99,0x88,0x77,0x66,0x55,0x44,0x33,0x22,
-0x11,0x00,0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF,0xFE,0xDC,0xBA,0x98,0x76,0x54,
-0x32,0x10,0x00,0x9A,0x00,0x98,0x00,0x00,0x1E,0x00,0x00,0x94,0x00,0x00,0x00,0x00,
-0x04,0x00,0x00,0x8C,0x00,0x00,0x00,0x40,0x02,0x00,0x00,0x40,0xBA,0xE8,0x23,0x3C,
-0x75,0xF3,0x91,0x61,0xD6,0x73,0x39,0xCF,0x7B,0x6D,0x8E,0x61,0x97,0x63,0x9E,0xD9,
-0x60,0x55,0xD6,0xC7,0xEF,0xF8,0x1E,0x63,0x95,0x17,0xCC,0x28,0x45,0x60,0x11,0xC5,
-0xC4,0x4E,0x66,0xC6,0xE6,0xC3,0xDE,0x8A,0x19,0x30,0xCF,0x0E,0xD7,0xAA,0xDB,0x01,
-0xD8,0x00,0xBB,0x8F,0x39,0x9F,0x64,0x28,0xF5,0x7A,0x77,0x49,0xCC,0x6B,0xA3,0x91,
-0x97,0x70,0xE7,0x60,0x1E,0x39,0xE1,0xE5,0x33,0xE1,0x15,0x63,0x69,0x08,0x80,0x4C,
-0x67,0xC4,0x41,0x8F,0x48,0xDF,0x26,0x98,0xF1,0xD5,0x8D,0x88,0xD9,0x6A,0xA4,0x96,
-0xC5,0x84,0xD9,0x30,0x49,0x67,0x7D,0x19,0xB1,0xB3,0x45,0x4D,0xB2,0x53,0x9A,0x47,
-0x3C,0x7C,0x55,0xBF,0xCC,0x85,0x00,0x36,0xF1,0x3D,0x93,0x53
-};
-
-static int
-probe_PCIXCC_type(struct device *devPtr)
-{
-	int rv, dv, i, index, length;
-	unsigned char psmid[8];
-	static unsigned char loc_testmsg[548];
-	struct CPRBX *cprbx_p;
-
-	index = devPtr->dev_self_x;
-	rv = 0;
-	do {
-		memcpy(loc_testmsg, MCL3_testmsg, sizeof(MCL3_testmsg));
-		length = sizeof(MCL3_testmsg) - 0x0C;
-		dv = send_to_AP(index, z90crypt.cdx, length, loc_testmsg);
-		if (dv) {
-			PDEBUG("dv returned = %d\n", dv);
-			if (dv == DEV_SEN_EXCEPTION) {
-				rv = SEN_FATAL_ERROR;
-				PRINTKC("exception in send to AP %d\n", index);
-				break;
-			}
-			PDEBUG("return value from send_to_AP: %d\n", rv);
-			switch (dv) {
-			case DEV_GONE:
-				PDEBUG("dev %d not available\n", index);
-				rv = SEN_NOT_AVAIL;
-				break;
-			case DEV_ONLINE:
-				rv = 0;
-				break;
-			case DEV_EMPTY:
-				rv = SEN_NOT_AVAIL;
-				break;
-			case DEV_NO_WORK:
-				rv = SEN_FATAL_ERROR;
-				break;
-			case DEV_BAD_MESSAGE:
-				rv = SEN_USER_ERROR;
-				break;
-			case DEV_QUEUE_FULL:
-				rv = SEN_QUEUE_FULL;
-				break;
-			default:
-				PRINTK("unknown dv=%d for dev %d\n", dv, index);
-				rv = SEN_NOT_AVAIL;
-				break;
-			}
-		}
-
-		if (rv)
-			break;
-
-		for (i = 0; i < 6; i++) {
-			mdelay(300);
-			dv = receive_from_AP(index, z90crypt.cdx,
-					     devPtr->dev_resp_l,
-					     devPtr->dev_resp_p, psmid);
-			PDEBUG("dv returned by DQ = %d\n", dv);
-			if (dv == DEV_REC_EXCEPTION) {
-				rv = REC_FATAL_ERROR;
-				PRINTKC("exception in dequeue %d\n",
-					index);
-				break;
-			}
-			switch (dv) {
-			case DEV_ONLINE:
-				rv = 0;
-				break;
-			case DEV_EMPTY:
-				rv = REC_EMPTY;
-				break;
-			case DEV_NO_WORK:
-				rv = REC_NO_WORK;
-				break;
-			case DEV_BAD_MESSAGE:
-			case DEV_GONE:
-			default:
-				rv = REC_NO_RESPONSE;
-				break;
-			}
-			if ((rv != 0) && (rv != REC_NO_WORK))
-				break;
-			if (rv == 0)
-				break;
-		}
-		if (rv)
-			break;
-		cprbx_p = (struct CPRBX *) (devPtr->dev_resp_p + 48);
-		if ((cprbx_p->ccp_rtcode == 8) && (cprbx_p->ccp_rscode == 33)) {
-			devPtr->dev_type = PCIXCC_MCL2;
-			PDEBUG("device %d is MCL2\n", index);
-		} else {
-			devPtr->dev_type = PCIXCC_MCL3;
-			PDEBUG("device %d is MCL3\n", index);
-		}
-	} while (0);
-	/* In a general error case, the card is not marked online */
-	return rv;
-}
-
-module_init(z90crypt_init_module);
-module_exit(z90crypt_cleanup_module);
diff --git a/include/asm-s390/z90crypt.h b/include/asm-s390/z90crypt.h
deleted file mode 100644
index 31a2439b07bd3..0000000000000
--- a/include/asm-s390/z90crypt.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- *  include/asm-s390/z90crypt.h
- *
- *  z90crypt 1.3.3 (user-visible header)
- *
- *  Copyright (C)  2001, 2005 IBM Corporation
- *  Author(s): Robert Burroughs
- *             Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __ASM_S390_Z90CRYPT_H
-#define __ASM_S390_Z90CRYPT_H
-#include <linux/ioctl.h>
-
-#define z90crypt_VERSION 1
-#define z90crypt_RELEASE 3	// 2 = PCIXCC, 3 = rewrite for coding standards
-#define z90crypt_VARIANT 3	// 3 = CEX2A support
-
-/**
- * struct ica_rsa_modexpo
- *
- * Requirements:
- * - outputdatalength is at least as large as inputdatalength.
- * - All key parts are right justified in their fields, padded on
- *   the left with zeroes.
- * - length(b_key) = inputdatalength
- * - length(n_modulus) = inputdatalength
- */
-struct ica_rsa_modexpo {
-	char __user *	inputdata;
-	unsigned int	inputdatalength;
-	char __user *	outputdata;
-	unsigned int	outputdatalength;
-	char __user *	b_key;
-	char __user *	n_modulus;
-};
-
-/**
- * struct ica_rsa_modexpo_crt
- *
- * Requirements:
- * - inputdatalength is even.
- * - outputdatalength is at least as large as inputdatalength.
- * - All key parts are right justified in their fields, padded on
- *   the left with zeroes.
- * - length(bp_key)	= inputdatalength/2 + 8
- * - length(bq_key)	= inputdatalength/2
- * - length(np_key)	= inputdatalength/2 + 8
- * - length(nq_key)	= inputdatalength/2
- * - length(u_mult_inv) = inputdatalength/2 + 8
- */
-struct ica_rsa_modexpo_crt {
-	char __user *	inputdata;
-	unsigned int	inputdatalength;
-	char __user *	outputdata;
-	unsigned int	outputdatalength;
-	char __user *	bp_key;
-	char __user *	bq_key;
-	char __user *	np_prime;
-	char __user *	nq_prime;
-	char __user *	u_mult_inv;
-};
-
-#define Z90_IOCTL_MAGIC 'z'  // NOTE:  Need to allocate from linux folks
-
-/**
- * Interface notes:
- *
- * The ioctl()s which are implemented (along with relevant details)
- * are:
- *
- *   ICARSAMODEXPO
- *     Perform an RSA operation using a Modulus-Exponent pair
- *     This takes an ica_rsa_modexpo struct as its arg.
- *
- *     NOTE: please refer to the comments preceding this structure
- *           for the implementation details for the contents of the
- *           block
- *
- *   ICARSACRT
- *     Perform an RSA operation using a Chinese-Remainder Theorem key
- *     This takes an ica_rsa_modexpo_crt struct as its arg.
- *
- *     NOTE: please refer to the comments preceding this structure
- *           for the implementation details for the contents of the
- *           block
- *
- *   Z90STAT_TOTALCOUNT
- *     Return an integer count of all device types together.
- *
- *   Z90STAT_PCICACOUNT
- *     Return an integer count of all PCICAs.
- *
- *   Z90STAT_PCICCCOUNT
- *     Return an integer count of all PCICCs.
- *
- *   Z90STAT_PCIXCCMCL2COUNT
- *     Return an integer count of all MCL2 PCIXCCs.
- *
- *   Z90STAT_PCIXCCMCL3COUNT
- *     Return an integer count of all MCL3 PCIXCCs.
- *
- *   Z90STAT_CEX2CCOUNT
- *     Return an integer count of all CEX2Cs.
- *
- *   Z90STAT_CEX2ACOUNT
- *     Return an integer count of all CEX2As.
- *
- *   Z90STAT_REQUESTQ_COUNT
- *     Return an integer count of the number of entries waiting to be
- *     sent to a device.
- *
- *   Z90STAT_PENDINGQ_COUNT
- *     Return an integer count of the number of entries sent to a
- *     device awaiting the reply.
- *
- *   Z90STAT_TOTALOPEN_COUNT
- *     Return an integer count of the number of open file handles.
- *
- *   Z90STAT_DOMAIN_INDEX
- *     Return the integer value of the Cryptographic Domain.
- *
- *   Z90STAT_STATUS_MASK
- *     Return an 64 element array of unsigned chars for the status of
- *     all devices.
- *       0x01: PCICA
- *       0x02: PCICC
- *       0x03: PCIXCC_MCL2
- *       0x04: PCIXCC_MCL3
- *       0x05: CEX2C
- *       0x06: CEX2A
- *       0x0d: device is disabled via the proc filesystem
- *
- *   Z90STAT_QDEPTH_MASK
- *     Return an 64 element array of unsigned chars for the queue
- *     depth of all devices.
- *
- *   Z90STAT_PERDEV_REQCNT
- *     Return an 64 element array of unsigned integers for the number
- *     of successfully completed requests per device since the device
- *     was detected and made available.
- *
- *   ICAZ90STATUS (deprecated)
- *     Return some device driver status in a ica_z90_status struct
- *     This takes an ica_z90_status struct as its arg.
- *
- *     NOTE: this ioctl() is deprecated, and has been replaced with
- *           single ioctl()s for each type of status being requested
- *
- *   Z90STAT_PCIXCCCOUNT (deprecated)
- *     Return an integer count of all PCIXCCs (MCL2 + MCL3).
- *     This is DEPRECATED now that MCL3 PCIXCCs are treated differently from
- *     MCL2 PCIXCCs.
- *
- *   Z90QUIESCE (not recommended)
- *     Quiesce the driver.  This is intended to stop all new
- *     requests from being processed.  Its use is NOT recommended,
- *     except in circumstances where there is no other way to stop
- *     callers from accessing the driver.  Its original use was to
- *     allow the driver to be "drained" of work in preparation for
- *     a system shutdown.
- *
- *     NOTE: once issued, this ban on new work cannot be undone
- *           except by unloading and reloading the driver.
- */
-
-/**
- * Supported ioctl calls
- */
-#define ICARSAMODEXPO	_IOC(_IOC_READ|_IOC_WRITE, Z90_IOCTL_MAGIC, 0x05, 0)
-#define ICARSACRT	_IOC(_IOC_READ|_IOC_WRITE, Z90_IOCTL_MAGIC, 0x06, 0)
-
-/* DEPRECATED status calls (bound for removal at some point) */
-#define ICAZ90STATUS	_IOR(Z90_IOCTL_MAGIC, 0x10, struct ica_z90_status)
-#define Z90STAT_PCIXCCCOUNT	_IOR(Z90_IOCTL_MAGIC, 0x43, int)
-
-/* unrelated to ICA callers */
-#define Z90QUIESCE	_IO(Z90_IOCTL_MAGIC, 0x11)
-
-/* New status calls */
-#define Z90STAT_TOTALCOUNT	_IOR(Z90_IOCTL_MAGIC, 0x40, int)
-#define Z90STAT_PCICACOUNT	_IOR(Z90_IOCTL_MAGIC, 0x41, int)
-#define Z90STAT_PCICCCOUNT	_IOR(Z90_IOCTL_MAGIC, 0x42, int)
-#define Z90STAT_PCIXCCMCL2COUNT	_IOR(Z90_IOCTL_MAGIC, 0x4b, int)
-#define Z90STAT_PCIXCCMCL3COUNT	_IOR(Z90_IOCTL_MAGIC, 0x4c, int)
-#define Z90STAT_CEX2CCOUNT	_IOR(Z90_IOCTL_MAGIC, 0x4d, int)
-#define Z90STAT_CEX2ACOUNT	_IOR(Z90_IOCTL_MAGIC, 0x4e, int)
-#define Z90STAT_REQUESTQ_COUNT	_IOR(Z90_IOCTL_MAGIC, 0x44, int)
-#define Z90STAT_PENDINGQ_COUNT	_IOR(Z90_IOCTL_MAGIC, 0x45, int)
-#define Z90STAT_TOTALOPEN_COUNT _IOR(Z90_IOCTL_MAGIC, 0x46, int)
-#define Z90STAT_DOMAIN_INDEX	_IOR(Z90_IOCTL_MAGIC, 0x47, int)
-#define Z90STAT_STATUS_MASK	_IOR(Z90_IOCTL_MAGIC, 0x48, char[64])
-#define Z90STAT_QDEPTH_MASK	_IOR(Z90_IOCTL_MAGIC, 0x49, char[64])
-#define Z90STAT_PERDEV_REQCNT	_IOR(Z90_IOCTL_MAGIC, 0x4a, int[64])
-
-#endif /* __ASM_S390_Z90CRYPT_H */
-- 
GitLab


From 1534c3820c26aca4e2567f97b8add8bea40e7e2b Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:25 +0200
Subject: [PATCH 0222/1063] [S390] zcrypt adjunct processor bus.

Add a bus for the adjunct processor interface. Up to 64 devices can
be connect to the ap bus interface, each device with 16 domains. That
makes 1024 message queues. The interface is asynchronous, the answer
to a message sent to a queue needs to be received at some later point
in time. Unfortunately the interface does not provide interrupts when
a message reply is pending. So the ap bus needs to implement some
fancy polling, each active queue is polled once per 1/HZ second or
continuously if an idle cpus exsists and the poll thread is activ
(see poll_thread parameter).

The ap bus uses the sysfs path /sys/bus/ap and has two bus attributes,
ap_domain and config_time. The ap_domain selects one of the 16 domains
to be used for this system. This limits the maximum number of ap devices
to 64. The config_time attribute contains the number of seconds between
two ap bus scans to find new devices.

The ap bus uses the modalias entries of the form "ap:tN" to autoload
the ap driver for hardware type N. Currently known types are:
3 - PCICC, 4 - PCICA, 5 - PCIXCC, 6 - CEX2A and 7 - CEX2C.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Ralph Wuerthner <rwuerthn@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/ap_bus.c    | 1221 +++++++++++++++++++++++++++++++
 drivers/s390/crypto/ap_bus.h    |  158 ++++
 include/linux/mod_devicetable.h |   11 +
 scripts/mod/file2alias.c        |   12 +
 4 files changed, 1402 insertions(+)
 create mode 100644 drivers/s390/crypto/ap_bus.c
 create mode 100644 drivers/s390/crypto/ap_bus.h

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
new file mode 100644
index 0000000000000..6ed0985c0c91c
--- /dev/null
+++ b/drivers/s390/crypto/ap_bus.c
@@ -0,0 +1,1221 @@
+/*
+ * linux/drivers/s390/crypto/ap_bus.c
+ *
+ * Copyright (C) 2006 IBM Corporation
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *	      Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * Adjunct processor bus.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <asm/s390_rdev.h>
+
+#include "ap_bus.h"
+
+/* Some prototypes. */
+static void ap_scan_bus(void *);
+static void ap_poll_all(unsigned long);
+static void ap_poll_timeout(unsigned long);
+static int ap_poll_thread_start(void);
+static void ap_poll_thread_stop(void);
+
+/**
+ * Module description.
+ */
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("Adjunct Processor Bus driver, "
+		   "Copyright 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+
+/**
+ * Module parameter
+ */
+int ap_domain_index = -1;	/* Adjunct Processor Domain Index */
+module_param_named(domain, ap_domain_index, int, 0000);
+MODULE_PARM_DESC(domain, "domain index for ap devices");
+EXPORT_SYMBOL(ap_domain_index);
+
+static int ap_thread_flag = 1;
+module_param_named(poll_thread, ap_thread_flag, int, 0000);
+MODULE_PARM_DESC(poll_thread, "Turn on/off poll thread, default is 1 (on).");
+
+static struct device *ap_root_device = NULL;
+
+/**
+ * Workqueue & timer for bus rescan.
+ */
+static struct workqueue_struct *ap_work_queue;
+static struct timer_list ap_config_timer;
+static int ap_config_time = AP_CONFIG_TIME;
+static DECLARE_WORK(ap_config_work, ap_scan_bus, NULL);
+
+/**
+ * Tasklet & timer for AP request polling.
+ */
+static struct timer_list ap_poll_timer = TIMER_INITIALIZER(ap_poll_timeout,0,0);
+static DECLARE_TASKLET(ap_tasklet, ap_poll_all, 0);
+static atomic_t ap_poll_requests = ATOMIC_INIT(0);
+static DECLARE_WAIT_QUEUE_HEAD(ap_poll_wait);
+static struct task_struct *ap_poll_kthread = NULL;
+static DEFINE_MUTEX(ap_poll_thread_mutex);
+
+/**
+ * Test if ap instructions are available.
+ *
+ * Returns 0 if the ap instructions are installed.
+ */
+static inline int ap_instructions_available(void)
+{
+	register unsigned long reg0 asm ("0") = AP_MKQID(0,0);
+	register unsigned long reg1 asm ("1") = -ENODEV;
+	register unsigned long reg2 asm ("2") = 0UL;
+
+	asm volatile(
+		"   .long 0xb2af0000\n"		/* PQAP(TAPQ) */
+		"0: la    %1,0\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: "+d" (reg0), "+d" (reg1), "+d" (reg2) : : "cc" );
+	return reg1;
+}
+
+/**
+ * Test adjunct processor queue.
+ * @qid: the ap queue number
+ * @queue_depth: pointer to queue depth value
+ * @device_type: pointer to device type value
+ *
+ * Returns ap queue status structure.
+ */
+static inline struct ap_queue_status
+ap_test_queue(ap_qid_t qid, int *queue_depth, int *device_type)
+{
+	register unsigned long reg0 asm ("0") = qid;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm ("2") = 0UL;
+
+	asm volatile(".long 0xb2af0000"		/* PQAP(TAPQ) */
+		     : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc");
+	*device_type = (int) (reg2 >> 24);
+	*queue_depth = (int) (reg2 & 0xff);
+	return reg1;
+}
+
+/**
+ * Reset adjunct processor queue.
+ * @qid: the ap queue number
+ *
+ * Returns ap queue status structure.
+ */
+static inline struct ap_queue_status ap_reset_queue(ap_qid_t qid)
+{
+	register unsigned long reg0 asm ("0") = qid | 0x01000000UL;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm ("2") = 0UL;
+
+	asm volatile(
+		".long 0xb2af0000"		/* PQAP(RAPQ) */
+		: "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc");
+	return reg1;
+}
+
+/**
+ * Send message to adjunct processor queue.
+ * @qid: the ap queue number
+ * @psmid: the program supplied message identifier
+ * @msg: the message text
+ * @length: the message length
+ *
+ * Returns ap queue status structure.
+ *
+ * Condition code 1 on NQAP can't happen because the L bit is 1.
+ *
+ * Condition code 2 on NQAP also means the send is incomplete,
+ * because a segment boundary was reached. The NQAP is repeated.
+ */
+static inline struct ap_queue_status
+__ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length)
+{
+	typedef struct { char _[length]; } msgblock;
+	register unsigned long reg0 asm ("0") = qid | 0x40000000UL;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm ("2") = (unsigned long) msg;
+	register unsigned long reg3 asm ("3") = (unsigned long) length;
+	register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32);
+	register unsigned long reg5 asm ("5") = (unsigned int) psmid;
+
+	asm volatile (
+		"0: .long 0xb2ad0042\n"		/* DQAP */
+		"   brc   2,0b"
+		: "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3)
+		: "d" (reg4), "d" (reg5), "m" (*(msgblock *) msg)
+		: "cc" );
+	return reg1;
+}
+
+int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length)
+{
+	struct ap_queue_status status;
+
+	status = __ap_send(qid, psmid, msg, length);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		return 0;
+	case AP_RESPONSE_Q_FULL:
+		return -EBUSY;
+	default:	/* Device is gone. */
+		return -ENODEV;
+	}
+}
+EXPORT_SYMBOL(ap_send);
+
+/*
+ * Receive message from adjunct processor queue.
+ * @qid: the ap queue number
+ * @psmid: pointer to program supplied message identifier
+ * @msg: the message text
+ * @length: the message length
+ *
+ * Returns ap queue status structure.
+ *
+ * Condition code 1 on DQAP means the receive has taken place
+ * but only partially.	The response is incomplete, hence the
+ * DQAP is repeated.
+ *
+ * Condition code 2 on DQAP also means the receive is incomplete,
+ * this time because a segment boundary was reached. Again, the
+ * DQAP is repeated.
+ *
+ * Note that gpr2 is used by the DQAP instruction to keep track of
+ * any 'residual' length, in case the instruction gets interrupted.
+ * Hence it gets zeroed before the instruction.
+ */
+static inline struct ap_queue_status
+__ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length)
+{
+	typedef struct { char _[length]; } msgblock;
+	register unsigned long reg0 asm("0") = qid | 0x80000000UL;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm("2") = 0UL;
+	register unsigned long reg4 asm("4") = (unsigned long) msg;
+	register unsigned long reg5 asm("5") = (unsigned long) length;
+	register unsigned long reg6 asm("6") = 0UL;
+	register unsigned long reg7 asm("7") = 0UL;
+
+
+	asm volatile(
+		"0: .long 0xb2ae0064\n"
+		"   brc   6,0b\n"
+		: "+d" (reg0), "=d" (reg1), "+d" (reg2),
+		"+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7),
+		"=m" (*(msgblock *) msg) : : "cc" );
+	*psmid = (((unsigned long long) reg6) << 32) + reg7;
+	return reg1;
+}
+
+int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length)
+{
+	struct ap_queue_status status;
+
+	status = __ap_recv(qid, psmid, msg, length);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		return 0;
+	case AP_RESPONSE_NO_PENDING_REPLY:
+		if (status.queue_empty)
+			return -ENOENT;
+		return -EBUSY;
+	default:
+		return -ENODEV;
+	}
+}
+EXPORT_SYMBOL(ap_recv);
+
+/**
+ * Check if an AP queue is available. The test is repeated for
+ * AP_MAX_RESET times.
+ * @qid: the ap queue number
+ * @queue_depth: pointer to queue depth value
+ * @device_type: pointer to device type value
+ */
+static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type)
+{
+	struct ap_queue_status status;
+	int t_depth, t_device_type, rc, i;
+
+	rc = -EBUSY;
+	for (i = 0; i < AP_MAX_RESET; i++) {
+		status = ap_test_queue(qid, &t_depth, &t_device_type);
+		switch (status.response_code) {
+		case AP_RESPONSE_NORMAL:
+			*queue_depth = t_depth + 1;
+			*device_type = t_device_type;
+			rc = 0;
+			break;
+		case AP_RESPONSE_Q_NOT_AVAIL:
+			rc = -ENODEV;
+			break;
+		case AP_RESPONSE_RESET_IN_PROGRESS:
+			break;
+		case AP_RESPONSE_DECONFIGURED:
+			rc = -ENODEV;
+			break;
+		case AP_RESPONSE_CHECKSTOPPED:
+			rc = -ENODEV;
+			break;
+		case AP_RESPONSE_BUSY:
+			break;
+		default:
+			BUG();
+		}
+		if (rc != -EBUSY)
+			break;
+		if (i < AP_MAX_RESET - 1)
+			udelay(5);
+	}
+	return rc;
+}
+
+/**
+ * Reset an AP queue and wait for it to become available again.
+ * @qid: the ap queue number
+ */
+static int ap_init_queue(ap_qid_t qid)
+{
+	struct ap_queue_status status;
+	int rc, dummy, i;
+
+	rc = -ENODEV;
+	status = ap_reset_queue(qid);
+	for (i = 0; i < AP_MAX_RESET; i++) {
+		switch (status.response_code) {
+		case AP_RESPONSE_NORMAL:
+			if (status.queue_empty)
+				rc = 0;
+			break;
+		case AP_RESPONSE_Q_NOT_AVAIL:
+		case AP_RESPONSE_DECONFIGURED:
+		case AP_RESPONSE_CHECKSTOPPED:
+			i = AP_MAX_RESET;	/* return with -ENODEV */
+			break;
+		case AP_RESPONSE_RESET_IN_PROGRESS:
+		case AP_RESPONSE_BUSY:
+		default:
+			break;
+		}
+		if (rc != -ENODEV)
+			break;
+		if (i < AP_MAX_RESET - 1) {
+			udelay(5);
+			status = ap_test_queue(qid, &dummy, &dummy);
+		}
+	}
+	return rc;
+}
+
+/**
+ * AP device related attributes.
+ */
+static ssize_t ap_hwtype_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->device_type);
+}
+static DEVICE_ATTR(hwtype, 0444, ap_hwtype_show, NULL);
+
+static ssize_t ap_depth_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->queue_depth);
+}
+static DEVICE_ATTR(depth, 0444, ap_depth_show, NULL);
+
+static ssize_t ap_request_count_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	int rc;
+
+	spin_lock_bh(&ap_dev->lock);
+	rc = snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->total_request_count);
+	spin_unlock_bh(&ap_dev->lock);
+	return rc;
+}
+
+static DEVICE_ATTR(request_count, 0444, ap_request_count_show, NULL);
+
+static ssize_t ap_modalias_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "ap:t%02X", to_ap_dev(dev)->device_type);
+}
+
+static DEVICE_ATTR(modalias, 0444, ap_modalias_show, NULL);
+
+static struct attribute *ap_dev_attrs[] = {
+	&dev_attr_hwtype.attr,
+	&dev_attr_depth.attr,
+	&dev_attr_request_count.attr,
+	&dev_attr_modalias.attr,
+	NULL
+};
+static struct attribute_group ap_dev_attr_group = {
+	.attrs = ap_dev_attrs
+};
+
+/**
+ * AP bus driver registration/unregistration.
+ */
+static int ap_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	struct ap_driver *ap_drv = to_ap_drv(drv);
+	struct ap_device_id *id;
+
+	/**
+	 * Compare device type of the device with the list of
+	 * supported types of the device_driver.
+	 */
+	for (id = ap_drv->ids; id->match_flags; id++) {
+		if ((id->match_flags & AP_DEVICE_ID_MATCH_DEVICE_TYPE) &&
+		    (id->dev_type != ap_dev->device_type))
+			continue;
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * uevent function for AP devices. It sets up a single environment
+ * variable DEV_TYPE which contains the hardware device type.
+ */
+static int ap_uevent (struct device *dev, char **envp, int num_envp,
+		       char *buffer, int buffer_size)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	int length;
+
+	if (!ap_dev)
+		return -ENODEV;
+
+	/* Set up DEV_TYPE environment variable. */
+	envp[0] = buffer;
+	length = scnprintf(buffer, buffer_size, "DEV_TYPE=%04X",
+			   ap_dev->device_type);
+	if (buffer_size - length <= 0)
+		return -ENOMEM;
+	envp[1] = 0;
+	return 0;
+}
+
+static struct bus_type ap_bus_type = {
+	.name = "ap",
+	.match = &ap_bus_match,
+	.uevent = &ap_uevent,
+};
+
+static int ap_device_probe(struct device *dev)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	struct ap_driver *ap_drv = to_ap_drv(dev->driver);
+	int rc;
+
+	ap_dev->drv = ap_drv;
+	rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV;
+	if (rc)
+		ap_dev->unregistered = 1;
+	return rc;
+}
+
+/**
+ * Flush all requests from the request/pending queue of an AP device.
+ * @ap_dev: pointer to the AP device.
+ */
+static inline void __ap_flush_queue(struct ap_device *ap_dev)
+{
+	struct ap_message *ap_msg, *next;
+
+	list_for_each_entry_safe(ap_msg, next, &ap_dev->pendingq, list) {
+		list_del_init(&ap_msg->list);
+		ap_dev->pendingq_count--;
+		ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
+	}
+	list_for_each_entry_safe(ap_msg, next, &ap_dev->requestq, list) {
+		list_del_init(&ap_msg->list);
+		ap_dev->requestq_count--;
+		ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
+	}
+}
+
+void ap_flush_queue(struct ap_device *ap_dev)
+{
+	spin_lock_bh(&ap_dev->lock);
+	__ap_flush_queue(ap_dev);
+	spin_unlock_bh(&ap_dev->lock);
+}
+EXPORT_SYMBOL(ap_flush_queue);
+
+static int ap_device_remove(struct device *dev)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	struct ap_driver *ap_drv = ap_dev->drv;
+
+	spin_lock_bh(&ap_dev->lock);
+	__ap_flush_queue(ap_dev);
+	/**
+	 * set ->unregistered to 1 while holding the lock. This prevents
+	 * new messages to be put on the queue from now on.
+	 */
+	ap_dev->unregistered = 1;
+	spin_unlock_bh(&ap_dev->lock);
+	if (ap_drv->remove)
+		ap_drv->remove(ap_dev);
+	return 0;
+}
+
+int ap_driver_register(struct ap_driver *ap_drv, struct module *owner,
+		       char *name)
+{
+	struct device_driver *drv = &ap_drv->driver;
+
+	drv->bus = &ap_bus_type;
+	drv->probe = ap_device_probe;
+	drv->remove = ap_device_remove;
+	drv->owner = owner;
+	drv->name = name;
+	return driver_register(drv);
+}
+EXPORT_SYMBOL(ap_driver_register);
+
+void ap_driver_unregister(struct ap_driver *ap_drv)
+{
+	driver_unregister(&ap_drv->driver);
+}
+EXPORT_SYMBOL(ap_driver_unregister);
+
+/**
+ * AP bus attributes.
+ */
+static ssize_t ap_domain_show(struct bus_type *bus, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ap_domain_index);
+}
+
+static BUS_ATTR(ap_domain, 0444, ap_domain_show, NULL);
+
+static ssize_t ap_config_time_show(struct bus_type *bus, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ap_config_time);
+}
+
+static ssize_t ap_config_time_store(struct bus_type *bus,
+				    const char *buf, size_t count)
+{
+	int time;
+
+	if (sscanf(buf, "%d\n", &time) != 1 || time < 5 || time > 120)
+		return -EINVAL;
+	ap_config_time = time;
+	if (!timer_pending(&ap_config_timer) ||
+	    !mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ)) {
+		ap_config_timer.expires = jiffies + ap_config_time * HZ;
+		add_timer(&ap_config_timer);
+	}
+	return count;
+}
+
+static BUS_ATTR(config_time, 0644, ap_config_time_show, ap_config_time_store);
+
+static ssize_t ap_poll_thread_show(struct bus_type *bus, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ap_poll_kthread ? 1 : 0);
+}
+
+static ssize_t ap_poll_thread_store(struct bus_type *bus,
+				    const char *buf, size_t count)
+{
+	int flag, rc;
+
+	if (sscanf(buf, "%d\n", &flag) != 1)
+		return -EINVAL;
+	if (flag) {
+		rc = ap_poll_thread_start();
+		if (rc)
+			return rc;
+	}
+	else
+		ap_poll_thread_stop();
+	return count;
+}
+
+static BUS_ATTR(poll_thread, 0644, ap_poll_thread_show, ap_poll_thread_store);
+
+static struct bus_attribute *const ap_bus_attrs[] = {
+	&bus_attr_ap_domain,
+	&bus_attr_config_time,
+	&bus_attr_poll_thread,
+	NULL
+};
+
+/**
+ * Pick one of the 16 ap domains.
+ */
+static inline int ap_select_domain(void)
+{
+	int queue_depth, device_type, count, max_count, best_domain;
+	int rc, i, j;
+
+	/**
+	 * We want to use a single domain. Either the one specified with
+	 * the "domain=" parameter or the domain with the maximum number
+	 * of devices.
+	 */
+	if (ap_domain_index >= 0 && ap_domain_index < AP_DOMAINS)
+		/* Domain has already been selected. */
+		return 0;
+	best_domain = -1;
+	max_count = 0;
+	for (i = 0; i < AP_DOMAINS; i++) {
+		count = 0;
+		for (j = 0; j < AP_DEVICES; j++) {
+			ap_qid_t qid = AP_MKQID(j, i);
+			rc = ap_query_queue(qid, &queue_depth, &device_type);
+			if (rc)
+				continue;
+			count++;
+		}
+		if (count > max_count) {
+			max_count = count;
+			best_domain = i;
+		}
+	}
+	if (best_domain >= 0){
+		ap_domain_index = best_domain;
+		return 0;
+	}
+	return -ENODEV;
+}
+
+/**
+ * Find the device type if query queue returned a device type of 0.
+ * @ap_dev: pointer to the AP device.
+ */
+static int ap_probe_device_type(struct ap_device *ap_dev)
+{
+	static unsigned char msg[] = {
+		0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x01,0x00,0x43,0x43,0x41,0x2d,0x41,0x50,
+		0x50,0x4c,0x20,0x20,0x20,0x01,0x01,0x01,
+		0x00,0x00,0x00,0x00,0x50,0x4b,0x00,0x00,
+		0x00,0x00,0x01,0x1c,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x05,0xb8,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x70,0x00,0x41,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x54,0x32,0x01,0x00,0xa0,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0xb8,0x05,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,
+		0x49,0x43,0x53,0x46,0x20,0x20,0x20,0x20,
+		0x50,0x4b,0x0a,0x00,0x50,0x4b,0x43,0x53,
+		0x2d,0x31,0x2e,0x32,0x37,0x00,0x11,0x22,
+		0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,
+		0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,
+		0x99,0x00,0x11,0x22,0x33,0x44,0x55,0x66,
+		0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44,
+		0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,
+		0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,
+		0x11,0x22,0x33,0x5d,0x00,0x5b,0x00,0x77,
+		0x88,0x1e,0x00,0x00,0x57,0x00,0x00,0x00,
+		0x00,0x04,0x00,0x00,0x4f,0x00,0x00,0x00,
+		0x03,0x02,0x00,0x00,0x40,0x01,0x00,0x01,
+		0xce,0x02,0x68,0x2d,0x5f,0xa9,0xde,0x0c,
+		0xf6,0xd2,0x7b,0x58,0x4b,0xf9,0x28,0x68,
+		0x3d,0xb4,0xf4,0xef,0x78,0xd5,0xbe,0x66,
+		0x63,0x42,0xef,0xf8,0xfd,0xa4,0xf8,0xb0,
+		0x8e,0x29,0xc2,0xc9,0x2e,0xd8,0x45,0xb8,
+		0x53,0x8c,0x6f,0x4e,0x72,0x8f,0x6c,0x04,
+		0x9c,0x88,0xfc,0x1e,0xc5,0x83,0x55,0x57,
+		0xf7,0xdd,0xfd,0x4f,0x11,0x36,0x95,0x5d,
+	};
+	struct ap_queue_status status;
+	unsigned long long psmid;
+	char *reply;
+	int rc, i;
+
+	reply = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reply) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	status = __ap_send(ap_dev->qid, 0x0102030405060708ULL,
+			   msg, sizeof(msg));
+	if (status.response_code != AP_RESPONSE_NORMAL) {
+		rc = -ENODEV;
+		goto out_free;
+	}
+
+	/* Wait for the test message to complete. */
+	for (i = 0; i < 6; i++) {
+		mdelay(300);
+		status = __ap_recv(ap_dev->qid, &psmid, reply, 4096);
+		if (status.response_code == AP_RESPONSE_NORMAL &&
+		    psmid == 0x0102030405060708ULL)
+			break;
+	}
+	if (i < 6) {
+		/* Got an answer. */
+		if (reply[0] == 0x00 && reply[1] == 0x86)
+			ap_dev->device_type = AP_DEVICE_TYPE_PCICC;
+		else
+			ap_dev->device_type = AP_DEVICE_TYPE_PCICA;
+		rc = 0;
+	} else
+		rc = -ENODEV;
+
+out_free:
+	free_page((unsigned long) reply);
+out:
+	return rc;
+}
+
+/**
+ * Scan the ap bus for new devices.
+ */
+static int __ap_scan_bus(struct device *dev, void *data)
+{
+	return to_ap_dev(dev)->qid == (ap_qid_t)(unsigned long) data;
+}
+
+static void ap_device_release(struct device *dev)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+
+	kfree(ap_dev);
+}
+
+static void ap_scan_bus(void *data)
+{
+	struct ap_device *ap_dev;
+	struct device *dev;
+	ap_qid_t qid;
+	int queue_depth, device_type;
+	int rc, i;
+
+	if (ap_select_domain() != 0)
+		return;
+	for (i = 0; i < AP_DEVICES; i++) {
+		qid = AP_MKQID(i, ap_domain_index);
+		dev = bus_find_device(&ap_bus_type, NULL,
+				      (void *)(unsigned long)qid,
+				      __ap_scan_bus);
+		if (dev) {
+			put_device(dev);
+			continue;
+		}
+		rc = ap_query_queue(qid, &queue_depth, &device_type);
+		if (rc)
+			continue;
+		rc = ap_init_queue(qid);
+		if (rc)
+			continue;
+		ap_dev = kzalloc(sizeof(*ap_dev), GFP_KERNEL);
+		if (!ap_dev)
+			break;
+		ap_dev->qid = qid;
+		ap_dev->queue_depth = queue_depth;
+		spin_lock_init(&ap_dev->lock);
+		INIT_LIST_HEAD(&ap_dev->pendingq);
+		INIT_LIST_HEAD(&ap_dev->requestq);
+		if (device_type == 0)
+			ap_probe_device_type(ap_dev);
+		else
+			ap_dev->device_type = device_type;
+
+		ap_dev->device.bus = &ap_bus_type;
+		ap_dev->device.parent = ap_root_device;
+		snprintf(ap_dev->device.bus_id, BUS_ID_SIZE, "card%02x",
+			 AP_QID_DEVICE(ap_dev->qid));
+		ap_dev->device.release = ap_device_release;
+		rc = device_register(&ap_dev->device);
+		if (rc) {
+			kfree(ap_dev);
+			continue;
+		}
+		/* Add device attributes. */
+		rc = sysfs_create_group(&ap_dev->device.kobj,
+					&ap_dev_attr_group);
+		if (rc)
+			device_unregister(&ap_dev->device);
+	}
+}
+
+static void
+ap_config_timeout(unsigned long ptr)
+{
+	queue_work(ap_work_queue, &ap_config_work);
+	ap_config_timer.expires = jiffies + ap_config_time * HZ;
+	add_timer(&ap_config_timer);
+}
+
+/**
+ * Set up the timer to run the poll tasklet
+ */
+static inline void ap_schedule_poll_timer(void)
+{
+	if (timer_pending(&ap_poll_timer))
+		return;
+	mod_timer(&ap_poll_timer, jiffies + AP_POLL_TIME);
+}
+
+/**
+ * Receive pending reply messages from an AP device.
+ * @ap_dev: pointer to the AP device
+ * @flags: pointer to control flags, bit 2^0 is set if another poll is
+ *	   required, bit 2^1 is set if the poll timer needs to get armed
+ * Returns 0 if the device is still present, -ENODEV if not.
+ */
+static inline int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags)
+{
+	struct ap_queue_status status;
+	struct ap_message *ap_msg;
+
+	if (ap_dev->queue_count <= 0)
+		return 0;
+	status = __ap_recv(ap_dev->qid, &ap_dev->reply->psmid,
+			   ap_dev->reply->message, ap_dev->reply->length);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		atomic_dec(&ap_poll_requests);
+		ap_dev->queue_count--;
+		list_for_each_entry(ap_msg, &ap_dev->pendingq, list) {
+			if (ap_msg->psmid != ap_dev->reply->psmid)
+				continue;
+			list_del_init(&ap_msg->list);
+			ap_dev->pendingq_count--;
+			ap_dev->drv->receive(ap_dev, ap_msg, ap_dev->reply);
+			break;
+		}
+		if (ap_dev->queue_count > 0)
+			*flags |= 1;
+		break;
+	case AP_RESPONSE_NO_PENDING_REPLY:
+		if (status.queue_empty) {
+			/* The card shouldn't forget requests but who knows. */
+			ap_dev->queue_count = 0;
+			list_splice_init(&ap_dev->pendingq, &ap_dev->requestq);
+			ap_dev->requestq_count += ap_dev->pendingq_count;
+			ap_dev->pendingq_count = 0;
+		} else
+			*flags |= 2;
+		break;
+	default:
+		return -ENODEV;
+	}
+	return 0;
+}
+
+/**
+ * Send messages from the request queue to an AP device.
+ * @ap_dev: pointer to the AP device
+ * @flags: pointer to control flags, bit 2^0 is set if another poll is
+ *	   required, bit 2^1 is set if the poll timer needs to get armed
+ * Returns 0 if the device is still present, -ENODEV if not.
+ */
+static inline int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
+{
+	struct ap_queue_status status;
+	struct ap_message *ap_msg;
+
+	if (ap_dev->requestq_count <= 0 ||
+	    ap_dev->queue_count >= ap_dev->queue_depth)
+		return 0;
+	/* Start the next request on the queue. */
+	ap_msg = list_entry(ap_dev->requestq.next, struct ap_message, list);
+	status = __ap_send(ap_dev->qid, ap_msg->psmid,
+			   ap_msg->message, ap_msg->length);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		atomic_inc(&ap_poll_requests);
+		ap_dev->queue_count++;
+		list_move_tail(&ap_msg->list, &ap_dev->pendingq);
+		ap_dev->requestq_count--;
+		ap_dev->pendingq_count++;
+		if (ap_dev->queue_count < ap_dev->queue_depth &&
+		    ap_dev->requestq_count > 0)
+			*flags |= 1;
+		*flags |= 2;
+		break;
+	case AP_RESPONSE_Q_FULL:
+		*flags |= 2;
+		break;
+	case AP_RESPONSE_MESSAGE_TOO_BIG:
+		return -EINVAL;
+	default:
+		return -ENODEV;
+	}
+	return 0;
+}
+
+/**
+ * Poll AP device for pending replies and send new messages. If either
+ * ap_poll_read or ap_poll_write returns -ENODEV unregister the device.
+ * @ap_dev: pointer to the bus device
+ * @flags: pointer to control flags, bit 2^0 is set if another poll is
+ *	   required, bit 2^1 is set if the poll timer needs to get armed
+ * Returns 0.
+ */
+static inline int ap_poll_queue(struct ap_device *ap_dev, unsigned long *flags)
+{
+	int rc;
+
+	rc = ap_poll_read(ap_dev, flags);
+	if (rc)
+		return rc;
+	return ap_poll_write(ap_dev, flags);
+}
+
+/**
+ * Queue a message to a device.
+ * @ap_dev: pointer to the AP device
+ * @ap_msg: the message to be queued
+ */
+static int __ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
+{
+	struct ap_queue_status status;
+
+	if (list_empty(&ap_dev->requestq) &&
+	    ap_dev->queue_count < ap_dev->queue_depth) {
+		status = __ap_send(ap_dev->qid, ap_msg->psmid,
+				   ap_msg->message, ap_msg->length);
+		switch (status.response_code) {
+		case AP_RESPONSE_NORMAL:
+			list_add_tail(&ap_msg->list, &ap_dev->pendingq);
+			atomic_inc(&ap_poll_requests);
+			ap_dev->pendingq_count++;
+			ap_dev->queue_count++;
+			ap_dev->total_request_count++;
+			break;
+		case AP_RESPONSE_Q_FULL:
+			list_add_tail(&ap_msg->list, &ap_dev->requestq);
+			ap_dev->requestq_count++;
+			ap_dev->total_request_count++;
+			return -EBUSY;
+		case AP_RESPONSE_MESSAGE_TOO_BIG:
+			ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-EINVAL));
+			return -EINVAL;
+		default:	/* Device is gone. */
+			ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
+			return -ENODEV;
+		}
+	} else {
+		list_add_tail(&ap_msg->list, &ap_dev->requestq);
+		ap_dev->requestq_count++;
+		ap_dev->total_request_count++;
+		return -EBUSY;
+	}
+	ap_schedule_poll_timer();
+	return 0;
+}
+
+void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
+{
+	unsigned long flags;
+	int rc;
+
+	spin_lock_bh(&ap_dev->lock);
+	if (!ap_dev->unregistered) {
+		/* Make room on the queue by polling for finished requests. */
+		rc = ap_poll_queue(ap_dev, &flags);
+		if (!rc)
+			rc = __ap_queue_message(ap_dev, ap_msg);
+		if (!rc)
+			wake_up(&ap_poll_wait);
+	} else {
+		ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
+		rc = 0;
+	}
+	spin_unlock_bh(&ap_dev->lock);
+	if (rc == -ENODEV)
+		device_unregister(&ap_dev->device);
+}
+EXPORT_SYMBOL(ap_queue_message);
+
+/**
+ * Cancel a crypto request. This is done by removing the request
+ * from the devive pendingq or requestq queue. Note that the
+ * request stays on the AP queue. When it finishes the message
+ * reply will be discarded because the psmid can't be found.
+ * @ap_dev: AP device that has the message queued
+ * @ap_msg: the message that is to be removed
+ */
+void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
+{
+	struct ap_message *tmp;
+
+	spin_lock_bh(&ap_dev->lock);
+	if (!list_empty(&ap_msg->list)) {
+		list_for_each_entry(tmp, &ap_dev->pendingq, list)
+			if (tmp->psmid == ap_msg->psmid) {
+				ap_dev->pendingq_count--;
+				goto found;
+			}
+		ap_dev->requestq_count--;
+	found:
+		list_del_init(&ap_msg->list);
+	}
+	spin_unlock_bh(&ap_dev->lock);
+}
+EXPORT_SYMBOL(ap_cancel_message);
+
+/**
+ * AP receive polling for finished AP requests
+ */
+static void ap_poll_timeout(unsigned long unused)
+{
+	tasklet_schedule(&ap_tasklet);
+}
+
+/**
+ * Poll all AP devices on the bus in a round robin fashion. Continue
+ * polling until bit 2^0 of the control flags is not set. If bit 2^1
+ * of the control flags has been set arm the poll timer.
+ */
+static int __ap_poll_all(struct device *dev, void *data)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	int rc;
+
+	spin_lock(&ap_dev->lock);
+	if (!ap_dev->unregistered) {
+		rc = ap_poll_queue(to_ap_dev(dev), (unsigned long *) data);
+	} else
+		rc = 0;
+	spin_unlock(&ap_dev->lock);
+	if (rc)
+		device_unregister(&ap_dev->device);
+	return 0;
+}
+
+static void ap_poll_all(unsigned long dummy)
+{
+	unsigned long flags;
+
+	do {
+		flags = 0;
+		bus_for_each_dev(&ap_bus_type, NULL, &flags, __ap_poll_all);
+	} while (flags & 1);
+	if (flags & 2)
+		ap_schedule_poll_timer();
+}
+
+/**
+ * AP bus poll thread. The purpose of this thread is to poll for
+ * finished requests in a loop if there is a "free" cpu - that is
+ * a cpu that doesn't have anything better to do. The polling stops
+ * as soon as there is another task or if all messages have been
+ * delivered.
+ */
+static int ap_poll_thread(void *data)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	unsigned long flags;
+	int requests;
+
+	set_user_nice(current, -20);
+	while (1) {
+		if (need_resched()) {
+			schedule();
+			continue;
+		}
+		add_wait_queue(&ap_poll_wait, &wait);
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (kthread_should_stop())
+			break;
+		requests = atomic_read(&ap_poll_requests);
+		if (requests <= 0)
+			schedule();
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&ap_poll_wait, &wait);
+
+		local_bh_disable();
+		flags = 0;
+		bus_for_each_dev(&ap_bus_type, NULL, &flags, __ap_poll_all);
+		local_bh_enable();
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&ap_poll_wait, &wait);
+	return 0;
+}
+
+static int ap_poll_thread_start(void)
+{
+	int rc;
+
+	mutex_lock(&ap_poll_thread_mutex);
+	if (!ap_poll_kthread) {
+		ap_poll_kthread = kthread_run(ap_poll_thread, NULL, "appoll");
+		rc = IS_ERR(ap_poll_kthread) ? PTR_ERR(ap_poll_kthread) : 0;
+		if (rc)
+			ap_poll_kthread = NULL;
+	}
+	else
+		rc = 0;
+	mutex_unlock(&ap_poll_thread_mutex);
+	return rc;
+}
+
+static void ap_poll_thread_stop(void)
+{
+	mutex_lock(&ap_poll_thread_mutex);
+	if (ap_poll_kthread) {
+		kthread_stop(ap_poll_kthread);
+		ap_poll_kthread = NULL;
+	}
+	mutex_unlock(&ap_poll_thread_mutex);
+}
+
+/**
+ * The module initialization code.
+ */
+int __init ap_module_init(void)
+{
+	int rc, i;
+
+	if (ap_domain_index < -1 || ap_domain_index >= AP_DOMAINS) {
+		printk(KERN_WARNING "Invalid param: domain = %d. "
+		       " Not loading.\n", ap_domain_index);
+		return -EINVAL;
+	}
+	if (ap_instructions_available() != 0) {
+		printk(KERN_WARNING "AP instructions not installed.\n");
+		return -ENODEV;
+	}
+
+	/* Create /sys/bus/ap. */
+	rc = bus_register(&ap_bus_type);
+	if (rc)
+		goto out;
+	for (i = 0; ap_bus_attrs[i]; i++) {
+		rc = bus_create_file(&ap_bus_type, ap_bus_attrs[i]);
+		if (rc)
+			goto out_bus;
+	}
+
+	/* Create /sys/devices/ap. */
+	ap_root_device = s390_root_dev_register("ap");
+	rc = IS_ERR(ap_root_device) ? PTR_ERR(ap_root_device) : 0;
+	if (rc)
+		goto out_bus;
+
+	ap_work_queue = create_singlethread_workqueue("kapwork");
+	if (!ap_work_queue) {
+		rc = -ENOMEM;
+		goto out_root;
+	}
+
+	if (ap_select_domain() == 0)
+		ap_scan_bus(NULL);
+
+	/* Setup the ap bus rescan timer. */
+	init_timer(&ap_config_timer);
+	ap_config_timer.function = ap_config_timeout;
+	ap_config_timer.data = 0;
+	ap_config_timer.expires = jiffies + ap_config_time * HZ;
+	add_timer(&ap_config_timer);
+
+	/* Start the low priority AP bus poll thread. */
+	if (ap_thread_flag) {
+		rc = ap_poll_thread_start();
+		if (rc)
+			goto out_work;
+	}
+
+	return 0;
+
+out_work:
+	del_timer_sync(&ap_config_timer);
+	del_timer_sync(&ap_poll_timer);
+	destroy_workqueue(ap_work_queue);
+out_root:
+	s390_root_dev_unregister(ap_root_device);
+out_bus:
+	while (i--)
+		bus_remove_file(&ap_bus_type, ap_bus_attrs[i]);
+	bus_unregister(&ap_bus_type);
+out:
+	return rc;
+}
+
+static int __ap_match_all(struct device *dev, void *data)
+{
+	return 1;
+}
+
+/**
+ * The module termination code
+ */
+void ap_module_exit(void)
+{
+	int i;
+	struct device *dev;
+
+	ap_poll_thread_stop();
+	del_timer_sync(&ap_config_timer);
+	del_timer_sync(&ap_poll_timer);
+	destroy_workqueue(ap_work_queue);
+	s390_root_dev_unregister(ap_root_device);
+	while ((dev = bus_find_device(&ap_bus_type, NULL, NULL,
+		    __ap_match_all)))
+	{
+		device_unregister(dev);
+		put_device(dev);
+	}
+	for (i = 0; ap_bus_attrs[i]; i++)
+		bus_remove_file(&ap_bus_type, ap_bus_attrs[i]);
+	bus_unregister(&ap_bus_type);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(ap_module_init);
+module_exit(ap_module_exit);
+#endif
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
new file mode 100644
index 0000000000000..83b69c01cd6e4
--- /dev/null
+++ b/drivers/s390/crypto/ap_bus.h
@@ -0,0 +1,158 @@
+/*
+ * linux/drivers/s390/crypto/ap_bus.h
+ *
+ * Copyright (C) 2006 IBM Corporation
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *	      Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * Adjunct processor bus header file.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _AP_BUS_H_
+#define _AP_BUS_H_
+
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/types.h>
+
+#define AP_DEVICES 64		/* Number of AP devices. */
+#define AP_DOMAINS 16		/* Number of AP domains. */
+#define AP_MAX_RESET 90		/* Maximum number of resets. */
+#define AP_CONFIG_TIME 30	/* Time in seconds between AP bus rescans. */
+#define AP_POLL_TIME 1		/* Time in ticks between receive polls. */
+
+extern int ap_domain_index;
+
+/**
+ * The ap_qid_t identifier of an ap queue. It contains a
+ * 6 bit device index and a 4 bit queue index (domain).
+ */
+typedef unsigned int ap_qid_t;
+
+#define AP_MKQID(_device,_queue) (((_device) & 63) << 8 | ((_queue) & 15))
+#define AP_QID_DEVICE(_qid) (((_qid) >> 8) & 63)
+#define AP_QID_QUEUE(_qid) ((_qid) & 15)
+
+/**
+ * The ap queue status word is returned by all three AP functions
+ * (PQAP, NQAP and DQAP).  There's a set of flags in the first
+ * byte, followed by a 1 byte response code.
+ */
+struct ap_queue_status {
+	unsigned int queue_empty	: 1;
+	unsigned int replies_waiting	: 1;
+	unsigned int queue_full		: 1;
+	unsigned int pad1		: 5;
+	unsigned int response_code	: 8;
+	unsigned int pad2		: 16;
+};
+
+#define AP_RESPONSE_NORMAL		0x00
+#define AP_RESPONSE_Q_NOT_AVAIL		0x01
+#define AP_RESPONSE_RESET_IN_PROGRESS	0x02
+#define AP_RESPONSE_DECONFIGURED	0x03
+#define AP_RESPONSE_CHECKSTOPPED	0x04
+#define AP_RESPONSE_BUSY		0x05
+#define AP_RESPONSE_Q_FULL		0x10
+#define AP_RESPONSE_NO_PENDING_REPLY	0x10
+#define AP_RESPONSE_INDEX_TOO_BIG	0x11
+#define AP_RESPONSE_NO_FIRST_PART	0x13
+#define AP_RESPONSE_MESSAGE_TOO_BIG	0x15
+
+/**
+ * Known device types
+ */
+#define AP_DEVICE_TYPE_PCICC	3
+#define AP_DEVICE_TYPE_PCICA	4
+#define AP_DEVICE_TYPE_PCIXCC	5
+#define AP_DEVICE_TYPE_CEX2A	6
+#define AP_DEVICE_TYPE_CEX2C	7
+
+struct ap_device;
+struct ap_message;
+
+struct ap_driver {
+	struct device_driver driver;
+	struct ap_device_id *ids;
+
+	int (*probe)(struct ap_device *);
+	void (*remove)(struct ap_device *);
+	/* receive is called from tasklet context */
+	void (*receive)(struct ap_device *, struct ap_message *,
+			struct ap_message *);
+};
+
+#define to_ap_drv(x) container_of((x), struct ap_driver, driver)
+
+int ap_driver_register(struct ap_driver *, struct module *, char *);
+void ap_driver_unregister(struct ap_driver *);
+
+struct ap_device {
+	struct device device;
+	struct ap_driver *drv;		/* Pointer to AP device driver. */
+	spinlock_t lock;		/* Per device lock. */
+
+	ap_qid_t qid;			/* AP queue id. */
+	int queue_depth;		/* AP queue depth.*/
+	int device_type;		/* AP device type. */
+	int unregistered;		/* marks AP device as unregistered */
+
+	int queue_count;		/* # messages currently on AP queue. */
+
+	struct list_head pendingq;	/* List of message sent to AP queue. */
+	int pendingq_count;		/* # requests on pendingq list. */
+	struct list_head requestq;	/* List of message yet to be sent. */
+	int requestq_count;		/* # requests on requestq list. */
+	int total_request_count;	/* # requests ever for this AP device. */
+
+	struct ap_message *reply;	/* Per device reply message. */
+
+	void *private;			/* ap driver private pointer. */
+};
+
+#define to_ap_dev(x) container_of((x), struct ap_device, device)
+
+struct ap_message {
+	struct list_head list;		/* Request queueing. */
+	unsigned long long psmid;	/* Message id. */
+	void *message;			/* Pointer to message buffer. */
+	size_t length;			/* Message length. */
+
+	void *private;			/* ap driver private pointer. */
+};
+
+#define AP_DEVICE(dt)					\
+	.dev_type=(dt),					\
+	.match_flags=AP_DEVICE_ID_MATCH_DEVICE_TYPE,
+
+/**
+ * Note: don't use ap_send/ap_recv after using ap_queue_message
+ * for the first time. Otherwise the ap message queue will get
+ * confused.
+ */
+int ap_send(ap_qid_t, unsigned long long, void *, size_t);
+int ap_recv(ap_qid_t, unsigned long long *, void *, size_t);
+
+void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg);
+void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg);
+void ap_flush_queue(struct ap_device *ap_dev);
+
+int ap_module_init(void);
+void ap_module_exit(void);
+
+#endif /* _AP_BUS_H_ */
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index f6977708585ca..f7ca0b09075d9 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -148,6 +148,17 @@ struct ccw_device_id {
 #define CCW_DEVICE_ID_MATCH_DEVICE_TYPE		0x04
 #define CCW_DEVICE_ID_MATCH_DEVICE_MODEL	0x08
 
+/* s390 AP bus devices */
+struct ap_device_id {
+	__u16 match_flags;	/* which fields to match against */
+	__u8 dev_type;		/* device type */
+	__u8 pad1;
+	__u32 pad2;
+	kernel_ulong_t driver_info;
+};
+
+#define AP_DEVICE_ID_MATCH_DEVICE_TYPE		0x01
+
 
 #define PNP_ID_LEN	8
 #define PNP_MAX_DEVICES	8
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index e2de650d3dbff..de76da80443f7 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -265,6 +265,14 @@ static int do_ccw_entry(const char *filename,
 	return 1;
 }
 
+/* looks like: "ap:tN" */
+static int do_ap_entry(const char *filename,
+		       struct ap_device_id *id, char *alias)
+{
+	sprintf(alias, "ap:t%02X", id->dev_type);
+	return 1;
+}
+
 /* Looks like: "serio:tyNprNidNexN" */
 static int do_serio_entry(const char *filename,
 			  struct serio_device_id *id, char *alias)
@@ -503,6 +511,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 		do_table(symval, sym->st_size,
 			 sizeof(struct ccw_device_id), "ccw",
 			 do_ccw_entry, mod);
+	else if (sym_is(symname, "__mod_ap_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct ap_device_id), "ap",
+			 do_ap_entry, mod);
 	else if (sym_is(symname, "__mod_serio_device_table"))
 		do_table(symval, sym->st_size,
 			 sizeof(struct serio_device_id), "serio",
-- 
GitLab


From 2dbc2418bac32a18a372ae9aec386f0fe9174389 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:27 +0200
Subject: [PATCH 0223/1063] [S390] zcrypt user space interface.

The user space interface of the zcrypt device driver implements the old
user space interface as defined by the old z90crypt driver. Everything
is there, the /dev/z90crypt misc character device, all the lovely ioctls
and the /proc file. Even writing to the z90crypt proc file to configure
the crypto device still works. It stands to reason to remove the proc
write function someday since a much cleaner configuration via the sysfs
is now available.

The ap bus device drivers register crypto cards to the zcrypt user
space interface. The request router of the user space interface
picks one of the registered cards based on the predicted latency
for the request and calls the driver via a callback found in the
zcrypt_ops of the device. The request router only knows which
operations the card can do and the minimum / maximum number of bits
a request can have.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Ralph Wuerthner <rwuerthn@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/zcrypt_api.c | 981 +++++++++++++++++++++++++++++++
 drivers/s390/crypto/zcrypt_api.h | 140 +++++
 include/asm-s390/zcrypt.h        | 207 +++++++
 3 files changed, 1328 insertions(+)
 create mode 100644 drivers/s390/crypto/zcrypt_api.c
 create mode 100644 drivers/s390/crypto/zcrypt_api.h
 create mode 100644 include/asm-s390/zcrypt.h

diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
new file mode 100644
index 0000000000000..b3fe003b3d2d5
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -0,0 +1,981 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_api.c
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *	       Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/compat.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "zcrypt_api.h"
+
+/**
+ * Module description.
+ */
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("Cryptographic Coprocessor interface, "
+		   "Copyright 2001, 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+
+static DEFINE_SPINLOCK(zcrypt_device_lock);
+static LIST_HEAD(zcrypt_device_list);
+static int zcrypt_device_count = 0;
+static atomic_t zcrypt_open_count = ATOMIC_INIT(0);
+
+/**
+ * Device attributes common for all crypto devices.
+ */
+static ssize_t zcrypt_type_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct zcrypt_device *zdev = to_ap_dev(dev)->private;
+	return snprintf(buf, PAGE_SIZE, "%s\n", zdev->type_string);
+}
+
+static DEVICE_ATTR(type, 0444, zcrypt_type_show, NULL);
+
+static ssize_t zcrypt_online_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct zcrypt_device *zdev = to_ap_dev(dev)->private;
+	return snprintf(buf, PAGE_SIZE, "%d\n", zdev->online);
+}
+
+static ssize_t zcrypt_online_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct zcrypt_device *zdev = to_ap_dev(dev)->private;
+	int online;
+
+	if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
+		return -EINVAL;
+	zdev->online = online;
+	if (!online)
+		ap_flush_queue(zdev->ap_dev);
+	return count;
+}
+
+static DEVICE_ATTR(online, 0644, zcrypt_online_show, zcrypt_online_store);
+
+static struct attribute * zcrypt_device_attrs[] = {
+	&dev_attr_type.attr,
+	&dev_attr_online.attr,
+	NULL,
+};
+
+static struct attribute_group zcrypt_device_attr_group = {
+	.attrs = zcrypt_device_attrs,
+};
+
+/**
+ * Move the device towards the head of the device list.
+ * Need to be called while holding the zcrypt device list lock.
+ * Note: cards with speed_rating of 0 are kept at the end of the list.
+ */
+static void __zcrypt_increase_preference(struct zcrypt_device *zdev)
+{
+	struct zcrypt_device *tmp;
+	struct list_head *l;
+
+	if (zdev->speed_rating == 0)
+		return;
+	for (l = zdev->list.prev; l != &zcrypt_device_list; l = l->prev) {
+		tmp = list_entry(l, struct zcrypt_device, list);
+		if ((tmp->request_count + 1) * tmp->speed_rating <=
+		    (zdev->request_count + 1) * zdev->speed_rating &&
+		    tmp->speed_rating != 0)
+			break;
+	}
+	if (l == zdev->list.prev)
+		return;
+	/* Move zdev behind l */
+	list_del(&zdev->list);
+	list_add(&zdev->list, l);
+}
+
+/**
+ * Move the device towards the tail of the device list.
+ * Need to be called while holding the zcrypt device list lock.
+ * Note: cards with speed_rating of 0 are kept at the end of the list.
+ */
+static void __zcrypt_decrease_preference(struct zcrypt_device *zdev)
+{
+	struct zcrypt_device *tmp;
+	struct list_head *l;
+
+	if (zdev->speed_rating == 0)
+		return;
+	for (l = zdev->list.next; l != &zcrypt_device_list; l = l->next) {
+		tmp = list_entry(l, struct zcrypt_device, list);
+		if ((tmp->request_count + 1) * tmp->speed_rating >
+		    (zdev->request_count + 1) * zdev->speed_rating ||
+		    tmp->speed_rating == 0)
+			break;
+	}
+	if (l == zdev->list.next)
+		return;
+	/* Move zdev before l */
+	list_del(&zdev->list);
+	list_add_tail(&zdev->list, l);
+}
+
+static void zcrypt_device_release(struct kref *kref)
+{
+	struct zcrypt_device *zdev =
+		container_of(kref, struct zcrypt_device, refcount);
+	zcrypt_device_free(zdev);
+}
+
+void zcrypt_device_get(struct zcrypt_device *zdev)
+{
+	kref_get(&zdev->refcount);
+}
+EXPORT_SYMBOL(zcrypt_device_get);
+
+int zcrypt_device_put(struct zcrypt_device *zdev)
+{
+	return kref_put(&zdev->refcount, zcrypt_device_release);
+}
+EXPORT_SYMBOL(zcrypt_device_put);
+
+struct zcrypt_device *zcrypt_device_alloc(size_t max_response_size)
+{
+	struct zcrypt_device *zdev;
+
+	zdev = kzalloc(sizeof(struct zcrypt_device), GFP_KERNEL);
+	if (!zdev)
+		return NULL;
+	zdev->reply.message = kmalloc(max_response_size, GFP_KERNEL);
+	if (!zdev->reply.message)
+		goto out_free;
+	zdev->reply.length = max_response_size;
+	spin_lock_init(&zdev->lock);
+	INIT_LIST_HEAD(&zdev->list);
+	return zdev;
+
+out_free:
+	kfree(zdev);
+	return NULL;
+}
+EXPORT_SYMBOL(zcrypt_device_alloc);
+
+void zcrypt_device_free(struct zcrypt_device *zdev)
+{
+	kfree(zdev->reply.message);
+	kfree(zdev);
+}
+EXPORT_SYMBOL(zcrypt_device_free);
+
+/**
+ * Register a crypto device.
+ */
+int zcrypt_device_register(struct zcrypt_device *zdev)
+{
+	int rc;
+
+	rc = sysfs_create_group(&zdev->ap_dev->device.kobj,
+				&zcrypt_device_attr_group);
+	if (rc)
+		goto out;
+	get_device(&zdev->ap_dev->device);
+	kref_init(&zdev->refcount);
+	spin_lock_bh(&zcrypt_device_lock);
+	zdev->online = 1;	/* New devices are online by default. */
+	list_add_tail(&zdev->list, &zcrypt_device_list);
+	__zcrypt_increase_preference(zdev);
+	zcrypt_device_count++;
+	spin_unlock_bh(&zcrypt_device_lock);
+out:
+	return rc;
+}
+EXPORT_SYMBOL(zcrypt_device_register);
+
+/**
+ * Unregister a crypto device.
+ */
+void zcrypt_device_unregister(struct zcrypt_device *zdev)
+{
+	spin_lock_bh(&zcrypt_device_lock);
+	zcrypt_device_count--;
+	list_del_init(&zdev->list);
+	spin_unlock_bh(&zcrypt_device_lock);
+	sysfs_remove_group(&zdev->ap_dev->device.kobj,
+			   &zcrypt_device_attr_group);
+	put_device(&zdev->ap_dev->device);
+	zcrypt_device_put(zdev);
+}
+EXPORT_SYMBOL(zcrypt_device_unregister);
+
+/**
+ * zcrypt_read is not be supported beyond zcrypt 1.3.1
+ */
+static ssize_t zcrypt_read(struct file *filp, char __user *buf,
+			   size_t count, loff_t *f_pos)
+{
+	return -EPERM;
+}
+
+/**
+ * Write is is not allowed
+ */
+static ssize_t zcrypt_write(struct file *filp, const char __user *buf,
+			    size_t count, loff_t *f_pos)
+{
+	return -EPERM;
+}
+
+/**
+ * Device open/close functions to count number of users.
+ */
+static int zcrypt_open(struct inode *inode, struct file *filp)
+{
+	atomic_inc(&zcrypt_open_count);
+	return 0;
+}
+
+static int zcrypt_release(struct inode *inode, struct file *filp)
+{
+	atomic_dec(&zcrypt_open_count);
+	return 0;
+}
+
+/**
+ * zcrypt ioctls.
+ */
+static long zcrypt_rsa_modexpo(struct ica_rsa_modexpo *mex)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	if (mex->outputdatalength < mex->inputdatalength)
+		return -EINVAL;
+	/**
+	 * As long as outputdatalength is big enough, we can set the
+	 * outputdatalength equal to the inputdatalength, since that is the
+	 * number of bytes we will copy in any case
+	 */
+	mex->outputdatalength = mex->inputdatalength;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		if (!zdev->online ||
+		    !zdev->ops->rsa_modexpo ||
+		    zdev->min_mod_size > mex->inputdatalength ||
+		    zdev->max_mod_size < mex->inputdatalength)
+			continue;
+		zcrypt_device_get(zdev);
+		get_device(&zdev->ap_dev->device);
+		zdev->request_count++;
+		__zcrypt_decrease_preference(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		if (try_module_get(zdev->ap_dev->drv->driver.owner)) {
+			rc = zdev->ops->rsa_modexpo(zdev, mex);
+			module_put(zdev->ap_dev->drv->driver.owner);
+		}
+		else
+			rc = -EAGAIN;
+		spin_lock_bh(&zcrypt_device_lock);
+		zdev->request_count--;
+		__zcrypt_increase_preference(zdev);
+		put_device(&zdev->ap_dev->device);
+		zcrypt_device_put(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		return rc;
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return -ENODEV;
+}
+
+static long zcrypt_rsa_crt(struct ica_rsa_modexpo_crt *crt)
+{
+	struct zcrypt_device *zdev;
+	unsigned long long z1, z2, z3;
+	int rc, copied;
+
+	if (crt->outputdatalength < crt->inputdatalength ||
+	    (crt->inputdatalength & 1))
+		return -EINVAL;
+	/**
+	 * As long as outputdatalength is big enough, we can set the
+	 * outputdatalength equal to the inputdatalength, since that is the
+	 * number of bytes we will copy in any case
+	 */
+	crt->outputdatalength = crt->inputdatalength;
+
+	copied = 0;
+ restart:
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		if (!zdev->online ||
+		    !zdev->ops->rsa_modexpo_crt ||
+		    zdev->min_mod_size > crt->inputdatalength ||
+		    zdev->max_mod_size < crt->inputdatalength)
+			continue;
+		if (zdev->short_crt && crt->inputdatalength > 240) {
+			/**
+			 * Check inputdata for leading zeros for cards
+			 * that can't handle np_prime, bp_key, or
+			 * u_mult_inv > 128 bytes.
+			 */
+			if (copied == 0) {
+				int len;
+				spin_unlock_bh(&zcrypt_device_lock);
+				/* len is max 256 / 2 - 120 = 8 */
+				len = crt->inputdatalength / 2 - 120;
+				z1 = z2 = z3 = 0;
+				if (copy_from_user(&z1, crt->np_prime, len) ||
+				    copy_from_user(&z2, crt->bp_key, len) ||
+				    copy_from_user(&z3, crt->u_mult_inv, len))
+					return -EFAULT;
+				copied = 1;
+				/**
+				 * We have to restart device lookup -
+				 * the device list may have changed by now.
+				 */
+				goto restart;
+			}
+			if (z1 != 0ULL || z2 != 0ULL || z3 != 0ULL)
+				/* The device can't handle this request. */
+				continue;
+		}
+		zcrypt_device_get(zdev);
+		get_device(&zdev->ap_dev->device);
+		zdev->request_count++;
+		__zcrypt_decrease_preference(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		if (try_module_get(zdev->ap_dev->drv->driver.owner)) {
+			rc = zdev->ops->rsa_modexpo_crt(zdev, crt);
+			module_put(zdev->ap_dev->drv->driver.owner);
+		}
+		else
+			rc = -EAGAIN;
+		spin_lock_bh(&zcrypt_device_lock);
+		zdev->request_count--;
+		__zcrypt_increase_preference(zdev);
+		put_device(&zdev->ap_dev->device);
+		zcrypt_device_put(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		return rc;
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return -ENODEV;
+}
+
+static void zcrypt_status_mask(char status[AP_DEVICES])
+{
+	struct zcrypt_device *zdev;
+
+	memset(status, 0, sizeof(char) * AP_DEVICES);
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list)
+		status[AP_QID_DEVICE(zdev->ap_dev->qid)] =
+			zdev->online ? zdev->user_space_type : 0x0d;
+	spin_unlock_bh(&zcrypt_device_lock);
+}
+
+static void zcrypt_qdepth_mask(char qdepth[AP_DEVICES])
+{
+	struct zcrypt_device *zdev;
+
+	memset(qdepth, 0, sizeof(char)	* AP_DEVICES);
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		spin_lock(&zdev->ap_dev->lock);
+		qdepth[AP_QID_DEVICE(zdev->ap_dev->qid)] =
+			zdev->ap_dev->pendingq_count +
+			zdev->ap_dev->requestq_count;
+		spin_unlock(&zdev->ap_dev->lock);
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+}
+
+static void zcrypt_perdev_reqcnt(int reqcnt[AP_DEVICES])
+{
+	struct zcrypt_device *zdev;
+
+	memset(reqcnt, 0, sizeof(int) * AP_DEVICES);
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		spin_lock(&zdev->ap_dev->lock);
+		reqcnt[AP_QID_DEVICE(zdev->ap_dev->qid)] =
+			zdev->ap_dev->total_request_count;
+		spin_unlock(&zdev->ap_dev->lock);
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+}
+
+static int zcrypt_pendingq_count(void)
+{
+	struct zcrypt_device *zdev;
+	int pendingq_count = 0;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		spin_lock(&zdev->ap_dev->lock);
+		pendingq_count += zdev->ap_dev->pendingq_count;
+		spin_unlock(&zdev->ap_dev->lock);
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return pendingq_count;
+}
+
+static int zcrypt_requestq_count(void)
+{
+	struct zcrypt_device *zdev;
+	int requestq_count = 0;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		spin_lock(&zdev->ap_dev->lock);
+		requestq_count += zdev->ap_dev->requestq_count;
+		spin_unlock(&zdev->ap_dev->lock);
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return requestq_count;
+}
+
+static int zcrypt_count_type(int type)
+{
+	struct zcrypt_device *zdev;
+	int device_count = 0;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list)
+		if (zdev->user_space_type == type)
+			device_count++;
+	spin_unlock_bh(&zcrypt_device_lock);
+	return device_count;
+}
+
+/**
+ * Old, deprecated combi status call.
+ */
+static long zcrypt_ica_status(struct file *filp, unsigned long arg)
+{
+	struct ica_z90_status *pstat;
+	int ret;
+
+	pstat = kzalloc(sizeof(*pstat), GFP_KERNEL);
+	if (!pstat)
+		return -ENOMEM;
+	pstat->totalcount = zcrypt_device_count;
+	pstat->leedslitecount = zcrypt_count_type(ZCRYPT_PCICA);
+	pstat->leeds2count = zcrypt_count_type(ZCRYPT_PCICC);
+	pstat->requestqWaitCount = zcrypt_requestq_count();
+	pstat->pendingqWaitCount = zcrypt_pendingq_count();
+	pstat->totalOpenCount = atomic_read(&zcrypt_open_count);
+	pstat->cryptoDomain = ap_domain_index;
+	zcrypt_status_mask(pstat->status);
+	zcrypt_qdepth_mask(pstat->qdepth);
+	ret = 0;
+	if (copy_to_user((void __user *) arg, pstat, sizeof(*pstat)))
+		ret = -EFAULT;
+	kfree(pstat);
+	return ret;
+}
+
+static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
+				  unsigned long arg)
+{
+	int rc;
+
+	switch (cmd) {
+	case ICARSAMODEXPO: {
+		struct ica_rsa_modexpo __user *umex = (void __user *) arg;
+		struct ica_rsa_modexpo mex;
+		if (copy_from_user(&mex, umex, sizeof(mex)))
+			return -EFAULT;
+		do {
+			rc = zcrypt_rsa_modexpo(&mex);
+		} while (rc == -EAGAIN);
+		if (rc)
+			return rc;
+		return put_user(mex.outputdatalength, &umex->outputdatalength);
+	}
+	case ICARSACRT: {
+		struct ica_rsa_modexpo_crt __user *ucrt = (void __user *) arg;
+		struct ica_rsa_modexpo_crt crt;
+		if (copy_from_user(&crt, ucrt, sizeof(crt)))
+			return -EFAULT;
+		do {
+			rc = zcrypt_rsa_crt(&crt);
+		} while (rc == -EAGAIN);
+		if (rc)
+			return rc;
+		return put_user(crt.outputdatalength, &ucrt->outputdatalength);
+	}
+	case Z90STAT_STATUS_MASK: {
+		char status[AP_DEVICES];
+		zcrypt_status_mask(status);
+		if (copy_to_user((char __user *) arg, status,
+				 sizeof(char) * AP_DEVICES))
+			return -EFAULT;
+		return 0;
+	}
+	case Z90STAT_QDEPTH_MASK: {
+		char qdepth[AP_DEVICES];
+		zcrypt_qdepth_mask(qdepth);
+		if (copy_to_user((char __user *) arg, qdepth,
+				 sizeof(char) * AP_DEVICES))
+			return -EFAULT;
+		return 0;
+	}
+	case Z90STAT_PERDEV_REQCNT: {
+		int reqcnt[AP_DEVICES];
+		zcrypt_perdev_reqcnt(reqcnt);
+		if (copy_to_user((int __user *) arg, reqcnt,
+				 sizeof(int) * AP_DEVICES))
+			return -EFAULT;
+		return 0;
+	}
+	case Z90STAT_REQUESTQ_COUNT:
+		return put_user(zcrypt_requestq_count(), (int __user *) arg);
+	case Z90STAT_PENDINGQ_COUNT:
+		return put_user(zcrypt_pendingq_count(), (int __user *) arg);
+	case Z90STAT_TOTALOPEN_COUNT:
+		return put_user(atomic_read(&zcrypt_open_count),
+				(int __user *) arg);
+	case Z90STAT_DOMAIN_INDEX:
+		return put_user(ap_domain_index, (int __user *) arg);
+	/**
+	 * Deprecated ioctls. Don't add another device count ioctl,
+	 * you can count them yourself in the user space with the
+	 * output of the Z90STAT_STATUS_MASK ioctl.
+	 */
+	case ICAZ90STATUS:
+		return zcrypt_ica_status(filp, arg);
+	case Z90STAT_TOTALCOUNT:
+		return put_user(zcrypt_device_count, (int __user *) arg);
+	case Z90STAT_PCICACOUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_PCICA),
+				(int __user *) arg);
+	case Z90STAT_PCICCCOUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_PCICC),
+				(int __user *) arg);
+	case Z90STAT_PCIXCCMCL2COUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL2),
+				(int __user *) arg);
+	case Z90STAT_PCIXCCMCL3COUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL3),
+				(int __user *) arg);
+	case Z90STAT_PCIXCCCOUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL2) +
+				zcrypt_count_type(ZCRYPT_PCIXCC_MCL3),
+				(int __user *) arg);
+	case Z90STAT_CEX2CCOUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_CEX2C),
+				(int __user *) arg);
+	case Z90STAT_CEX2ACOUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_CEX2A),
+				(int __user *) arg);
+	default:
+		/* unknown ioctl number */
+		return -ENOIOCTLCMD;
+	}
+}
+
+#ifdef CONFIG_COMPAT
+/**
+ * ioctl32 conversion routines
+ */
+struct compat_ica_rsa_modexpo {
+	compat_uptr_t	inputdata;
+	unsigned int	inputdatalength;
+	compat_uptr_t	outputdata;
+	unsigned int	outputdatalength;
+	compat_uptr_t	b_key;
+	compat_uptr_t	n_modulus;
+};
+
+static long trans_modexpo32(struct file *filp, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct compat_ica_rsa_modexpo __user *umex32 = compat_ptr(arg);
+	struct compat_ica_rsa_modexpo mex32;
+	struct ica_rsa_modexpo mex64;
+	long rc;
+
+	if (copy_from_user(&mex32, umex32, sizeof(mex32)))
+		return -EFAULT;
+	mex64.inputdata = compat_ptr(mex32.inputdata);
+	mex64.inputdatalength = mex32.inputdatalength;
+	mex64.outputdata = compat_ptr(mex32.outputdata);
+	mex64.outputdatalength = mex32.outputdatalength;
+	mex64.b_key = compat_ptr(mex32.b_key);
+	mex64.n_modulus = compat_ptr(mex32.n_modulus);
+	do {
+		rc = zcrypt_rsa_modexpo(&mex64);
+	} while (rc == -EAGAIN);
+	if (!rc)
+		rc = put_user(mex64.outputdatalength,
+			      &umex32->outputdatalength);
+	return rc;
+}
+
+struct compat_ica_rsa_modexpo_crt {
+	compat_uptr_t	inputdata;
+	unsigned int	inputdatalength;
+	compat_uptr_t	outputdata;
+	unsigned int	outputdatalength;
+	compat_uptr_t	bp_key;
+	compat_uptr_t	bq_key;
+	compat_uptr_t	np_prime;
+	compat_uptr_t	nq_prime;
+	compat_uptr_t	u_mult_inv;
+};
+
+static long trans_modexpo_crt32(struct file *filp, unsigned int cmd,
+				unsigned long arg)
+{
+	struct compat_ica_rsa_modexpo_crt __user *ucrt32 = compat_ptr(arg);
+	struct compat_ica_rsa_modexpo_crt crt32;
+	struct ica_rsa_modexpo_crt crt64;
+	long rc;
+
+	if (copy_from_user(&crt32, ucrt32, sizeof(crt32)))
+		return -EFAULT;
+	crt64.inputdata = compat_ptr(crt32.inputdata);
+	crt64.inputdatalength = crt32.inputdatalength;
+	crt64.outputdata=  compat_ptr(crt32.outputdata);
+	crt64.outputdatalength = crt32.outputdatalength;
+	crt64.bp_key = compat_ptr(crt32.bp_key);
+	crt64.bq_key = compat_ptr(crt32.bq_key);
+	crt64.np_prime = compat_ptr(crt32.np_prime);
+	crt64.nq_prime = compat_ptr(crt32.nq_prime);
+	crt64.u_mult_inv = compat_ptr(crt32.u_mult_inv);
+	do {
+		rc = zcrypt_rsa_crt(&crt64);
+	} while (rc == -EAGAIN);
+	if (!rc)
+		rc = put_user(crt64.outputdatalength,
+			      &ucrt32->outputdatalength);
+	return rc;
+}
+
+long zcrypt_compat_ioctl(struct file *filp, unsigned int cmd,
+			 unsigned long arg)
+{
+	if (cmd == ICARSAMODEXPO)
+		return trans_modexpo32(filp, cmd, arg);
+	if (cmd == ICARSACRT)
+		return trans_modexpo_crt32(filp, cmd, arg);
+	return zcrypt_unlocked_ioctl(filp, cmd, arg);
+}
+#endif
+
+/**
+ * Misc device file operations.
+ */
+static struct file_operations zcrypt_fops = {
+	.owner		= THIS_MODULE,
+	.read		= zcrypt_read,
+	.write		= zcrypt_write,
+	.unlocked_ioctl	= zcrypt_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= zcrypt_compat_ioctl,
+#endif
+	.open		= zcrypt_open,
+	.release	= zcrypt_release
+};
+
+/**
+ * Misc device.
+ */
+static struct miscdevice zcrypt_misc_device = {
+	.minor	    = MISC_DYNAMIC_MINOR,
+	.name	    = "z90crypt",
+	.fops	    = &zcrypt_fops,
+};
+
+/**
+ * Deprecated /proc entry support.
+ */
+static struct proc_dir_entry *zcrypt_entry;
+
+static inline int sprintcl(unsigned char *outaddr, unsigned char *addr,
+			   unsigned int len)
+{
+	int hl, i;
+
+	hl = 0;
+	for (i = 0; i < len; i++)
+		hl += sprintf(outaddr+hl, "%01x", (unsigned int) addr[i]);
+	hl += sprintf(outaddr+hl, " ");
+	return hl;
+}
+
+static inline int sprintrw(unsigned char *outaddr, unsigned char *addr,
+			   unsigned int len)
+{
+	int hl, inl, c, cx;
+
+	hl = sprintf(outaddr, "	   ");
+	inl = 0;
+	for (c = 0; c < (len / 16); c++) {
+		hl += sprintcl(outaddr+hl, addr+inl, 16);
+		inl += 16;
+	}
+	cx = len%16;
+	if (cx) {
+		hl += sprintcl(outaddr+hl, addr+inl, cx);
+		inl += cx;
+	}
+	hl += sprintf(outaddr+hl, "\n");
+	return hl;
+}
+
+static inline int sprinthx(unsigned char *title, unsigned char *outaddr,
+			   unsigned char *addr, unsigned int len)
+{
+	int hl, inl, r, rx;
+
+	hl = sprintf(outaddr, "\n%s\n", title);
+	inl = 0;
+	for (r = 0; r < (len / 64); r++) {
+		hl += sprintrw(outaddr+hl, addr+inl, 64);
+		inl += 64;
+	}
+	rx = len % 64;
+	if (rx) {
+		hl += sprintrw(outaddr+hl, addr+inl, rx);
+		inl += rx;
+	}
+	hl += sprintf(outaddr+hl, "\n");
+	return hl;
+}
+
+static inline int sprinthx4(unsigned char *title, unsigned char *outaddr,
+			    unsigned int *array, unsigned int len)
+{
+	int hl, r;
+
+	hl = sprintf(outaddr, "\n%s\n", title);
+	for (r = 0; r < len; r++) {
+		if ((r % 8) == 0)
+			hl += sprintf(outaddr+hl, "    ");
+		hl += sprintf(outaddr+hl, "%08X ", array[r]);
+		if ((r % 8) == 7)
+			hl += sprintf(outaddr+hl, "\n");
+	}
+	hl += sprintf(outaddr+hl, "\n");
+	return hl;
+}
+
+static int zcrypt_status_read(char *resp_buff, char **start, off_t offset,
+			      int count, int *eof, void *data)
+{
+	unsigned char *workarea;
+	int len;
+
+	len = 0;
+
+	/* resp_buff is a page. Use the right half for a work area */
+	workarea = resp_buff + 2000;
+	len += sprintf(resp_buff + len, "\nzcrypt version: %d.%d.%d\n",
+		ZCRYPT_VERSION, ZCRYPT_RELEASE, ZCRYPT_VARIANT);
+	len += sprintf(resp_buff + len, "Cryptographic domain: %d\n",
+		       ap_domain_index);
+	len += sprintf(resp_buff + len, "Total device count: %d\n",
+		       zcrypt_device_count);
+	len += sprintf(resp_buff + len, "PCICA count: %d\n",
+		       zcrypt_count_type(ZCRYPT_PCICA));
+	len += sprintf(resp_buff + len, "PCICC count: %d\n",
+		       zcrypt_count_type(ZCRYPT_PCICC));
+	len += sprintf(resp_buff + len, "PCIXCC MCL2 count: %d\n",
+		       zcrypt_count_type(ZCRYPT_PCIXCC_MCL2));
+	len += sprintf(resp_buff + len, "PCIXCC MCL3 count: %d\n",
+		       zcrypt_count_type(ZCRYPT_PCIXCC_MCL3));
+	len += sprintf(resp_buff + len, "CEX2C count: %d\n",
+		       zcrypt_count_type(ZCRYPT_CEX2C));
+	len += sprintf(resp_buff + len, "CEX2A count: %d\n",
+		       zcrypt_count_type(ZCRYPT_CEX2A));
+	len += sprintf(resp_buff + len, "requestq count: %d\n",
+		       zcrypt_requestq_count());
+	len += sprintf(resp_buff + len, "pendingq count: %d\n",
+		       zcrypt_pendingq_count());
+	len += sprintf(resp_buff + len, "Total open handles: %d\n\n",
+		       atomic_read(&zcrypt_open_count));
+	zcrypt_status_mask(workarea);
+	len += sprinthx("Online devices: 1=PCICA 2=PCICC 3=PCIXCC(MCL2) "
+			"4=PCIXCC(MCL3) 5=CEX2C 6=CEX2A",
+			resp_buff+len, workarea, AP_DEVICES);
+	zcrypt_qdepth_mask(workarea);
+	len += sprinthx("Waiting work element counts",
+			resp_buff+len, workarea, AP_DEVICES);
+	zcrypt_perdev_reqcnt((unsigned int *) workarea);
+	len += sprinthx4("Per-device successfully completed request counts",
+			 resp_buff+len,(unsigned int *) workarea, AP_DEVICES);
+	*eof = 1;
+	memset((void *) workarea, 0x00, AP_DEVICES * sizeof(unsigned int));
+	return len;
+}
+
+static void zcrypt_disable_card(int index)
+{
+	struct zcrypt_device *zdev;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list)
+		if (AP_QID_DEVICE(zdev->ap_dev->qid) == index) {
+			zdev->online = 0;
+			ap_flush_queue(zdev->ap_dev);
+			break;
+		}
+	spin_unlock_bh(&zcrypt_device_lock);
+}
+
+static void zcrypt_enable_card(int index)
+{
+	struct zcrypt_device *zdev;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list)
+		if (AP_QID_DEVICE(zdev->ap_dev->qid) == index) {
+			zdev->online = 1;
+			break;
+		}
+	spin_unlock_bh(&zcrypt_device_lock);
+}
+
+static int zcrypt_status_write(struct file *file, const char __user *buffer,
+			       unsigned long count, void *data)
+{
+	unsigned char *lbuf, *ptr;
+	unsigned long local_count;
+	int j;
+
+	if (count <= 0)
+		return 0;
+
+#define LBUFSIZE 1200UL
+	lbuf = kmalloc(LBUFSIZE, GFP_KERNEL);
+	if (!lbuf) {
+		PRINTK("kmalloc failed!\n");
+		return 0;
+	}
+
+	local_count = min(LBUFSIZE - 1, count);
+	if (copy_from_user(lbuf, buffer, local_count) != 0) {
+		kfree(lbuf);
+		return -EFAULT;
+	}
+	lbuf[local_count] = '\0';
+
+	ptr = strstr(lbuf, "Online devices");
+	if (!ptr) {
+		PRINTK("Unable to parse data (missing \"Online devices\")\n");
+		goto out;
+	}
+	ptr = strstr(ptr, "\n");
+	if (!ptr) {
+		PRINTK("Unable to parse data (missing newline "
+		       "after \"Online devices\")\n");
+		goto out;
+	}
+	ptr++;
+
+	if (strstr(ptr, "Waiting work element counts") == NULL) {
+		PRINTK("Unable to parse data (missing "
+		       "\"Waiting work element counts\")\n");
+		goto out;
+	}
+
+	for (j = 0; j < 64 && *ptr; ptr++) {
+		/**
+		 * '0' for no device, '1' for PCICA, '2' for PCICC,
+		 * '3' for PCIXCC_MCL2, '4' for PCIXCC_MCL3,
+		 * '5' for CEX2C and '6' for CEX2A'
+		 */
+		if (*ptr >= '0' && *ptr <= '6')
+			j++;
+		else if (*ptr == 'd' || *ptr == 'D')
+			zcrypt_disable_card(j++);
+		else if (*ptr == 'e' || *ptr == 'E')
+			zcrypt_enable_card(j++);
+		else if (*ptr != ' ' && *ptr != '\t')
+			break;
+	}
+out:
+	kfree(lbuf);
+	return count;
+}
+
+/**
+ * The module initialization code.
+ */
+int __init zcrypt_api_init(void)
+{
+	int rc;
+
+	/* Register the request sprayer. */
+	rc = misc_register(&zcrypt_misc_device);
+	if (rc < 0) {
+		PRINTKW(KERN_ERR "misc_register (minor %d) failed with %d\n",
+			zcrypt_misc_device.minor, rc);
+		goto out;
+	}
+
+	/* Set up the proc file system */
+	zcrypt_entry = create_proc_entry("driver/z90crypt", 0644, NULL);
+	if (!zcrypt_entry) {
+		PRINTK("Couldn't create z90crypt proc entry\n");
+		rc = -ENOMEM;
+		goto out_misc;
+	}
+	zcrypt_entry->nlink = 1;
+	zcrypt_entry->data = NULL;
+	zcrypt_entry->read_proc = zcrypt_status_read;
+	zcrypt_entry->write_proc = zcrypt_status_write;
+
+	return 0;
+
+out_misc:
+	misc_deregister(&zcrypt_misc_device);
+out:
+	return rc;
+}
+
+/**
+ * The module termination code.
+ */
+void zcrypt_api_exit(void)
+{
+	remove_proc_entry("driver/z90crypt", NULL);
+	misc_deregister(&zcrypt_misc_device);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(zcrypt_api_init);
+module_exit(zcrypt_api_exit);
+#endif
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
new file mode 100644
index 0000000000000..1f0e61f2e9b42
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -0,0 +1,140 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_api.h
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *	       Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_API_H_
+#define _ZCRYPT_API_H_
+
+/**
+ * Macro definitions
+ *
+ * PDEBUG debugs in the form "zcrypt: function_name -> message"
+ *
+ * PRINTK is like PDEBUG, except that it is always enabled
+ * PRINTKN is like PRINTK, except that it does not include the function name
+ * PRINTKW is like PRINTK, except that it uses KERN_WARNING
+ * PRINTKC is like PRINTK, except that it uses KERN_CRIT
+ */
+#define DEV_NAME	"zcrypt"
+
+#define PRINTK(fmt, args...) \
+	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+#define PRINTKN(fmt, args...) \
+	printk(KERN_DEBUG DEV_NAME ": " fmt, ## args)
+#define PRINTKW(fmt, args...) \
+	printk(KERN_WARNING DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+#define PRINTKC(fmt, args...) \
+	printk(KERN_CRIT DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+
+#ifdef ZCRYPT_DEBUG
+#define PDEBUG(fmt, args...) \
+	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+#else
+#define PDEBUG(fmt, args...) do {} while (0)
+#endif
+
+#include "ap_bus.h"
+#include <asm/zcrypt.h>
+
+/* deprecated status calls */
+#define ICAZ90STATUS		_IOR(ZCRYPT_IOCTL_MAGIC, 0x10, struct ica_z90_status)
+#define Z90STAT_PCIXCCCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x43, int)
+
+/**
+ * This structure is deprecated and the corresponding ioctl() has been
+ * replaced with individual ioctl()s for each piece of data!
+ */
+struct ica_z90_status {
+	int totalcount;
+	int leedslitecount; // PCICA
+	int leeds2count;    // PCICC
+	// int PCIXCCCount; is not in struct for backward compatibility
+	int requestqWaitCount;
+	int pendingqWaitCount;
+	int totalOpenCount;
+	int cryptoDomain;
+	// status: 0=not there, 1=PCICA, 2=PCICC, 3=PCIXCC_MCL2, 4=PCIXCC_MCL3,
+	//	   5=CEX2C
+	unsigned char status[64];
+	// qdepth: # work elements waiting for each device
+	unsigned char qdepth[64];
+};
+
+/**
+ * device type for an actual device is either PCICA, PCICC, PCIXCC_MCL2,
+ * PCIXCC_MCL3, CEX2C, or CEX2A
+ *
+ * NOTE: PCIXCC_MCL3 refers to a PCIXCC with May 2004 version of Licensed
+ *	 Internal Code (LIC) (EC J12220 level 29).
+ *	 PCIXCC_MCL2 refers to any LIC before this level.
+ */
+#define ZCRYPT_PCICA		1
+#define ZCRYPT_PCICC		2
+#define ZCRYPT_PCIXCC_MCL2	3
+#define ZCRYPT_PCIXCC_MCL3	4
+#define ZCRYPT_CEX2C		5
+#define ZCRYPT_CEX2A		6
+
+struct zcrypt_device;
+
+struct zcrypt_ops {
+	long (*rsa_modexpo)(struct zcrypt_device *, struct ica_rsa_modexpo *);
+	long (*rsa_modexpo_crt)(struct zcrypt_device *,
+				struct ica_rsa_modexpo_crt *);
+};
+
+struct zcrypt_device {
+	struct list_head list;		/* Device list. */
+	spinlock_t lock;		/* Per device lock. */
+	struct kref refcount;		/* device refcounting */
+	struct ap_device *ap_dev;	/* The "real" ap device. */
+	struct zcrypt_ops *ops;		/* Crypto operations. */
+	int online;			/* User online/offline */
+
+	int user_space_type;		/* User space device id. */
+	char *type_string;		/* User space device name. */
+	int min_mod_size;		/* Min number of bits. */
+	int max_mod_size;		/* Max number of bits. */
+	int short_crt;			/* Card has crt length restriction. */
+	int speed_rating;		/* Speed of the crypto device. */
+
+	int request_count;		/* # current requests. */
+
+	struct ap_message reply;	/* Per-device reply structure. */
+};
+
+struct zcrypt_device *zcrypt_device_alloc(size_t);
+void zcrypt_device_free(struct zcrypt_device *);
+void zcrypt_device_get(struct zcrypt_device *);
+int zcrypt_device_put(struct zcrypt_device *);
+int zcrypt_device_register(struct zcrypt_device *);
+void zcrypt_device_unregister(struct zcrypt_device *);
+int zcrypt_api_init(void);
+void zcrypt_api_exit(void);
+
+#endif /* _ZCRYPT_API_H_ */
diff --git a/include/asm-s390/zcrypt.h b/include/asm-s390/zcrypt.h
new file mode 100644
index 0000000000000..0d6a3e2a3349c
--- /dev/null
+++ b/include/asm-s390/zcrypt.h
@@ -0,0 +1,207 @@
+/*
+ *  include/asm-s390/zcrypt.h
+ *
+ *  zcrypt 2.0.0 (user-visible header)
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __ASM_S390_ZCRYPT_H
+#define __ASM_S390_ZCRYPT_H
+
+#define ZCRYPT_VERSION 2
+#define ZCRYPT_RELEASE 1
+#define ZCRYPT_VARIANT 0
+
+#include <linux/ioctl.h>
+#include <linux/compiler.h>
+
+/**
+ * struct ica_rsa_modexpo
+ *
+ * Requirements:
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ *   the left with zeroes.
+ * - length(b_key) = inputdatalength
+ * - length(n_modulus) = inputdatalength
+ */
+struct ica_rsa_modexpo {
+	char __user *	inputdata;
+	unsigned int	inputdatalength;
+	char __user *	outputdata;
+	unsigned int	outputdatalength;
+	char __user *	b_key;
+	char __user *	n_modulus;
+};
+
+/**
+ * struct ica_rsa_modexpo_crt
+ *
+ * Requirements:
+ * - inputdatalength is even.
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ *   the left with zeroes.
+ * - length(bp_key)	= inputdatalength/2 + 8
+ * - length(bq_key)	= inputdatalength/2
+ * - length(np_key)	= inputdatalength/2 + 8
+ * - length(nq_key)	= inputdatalength/2
+ * - length(u_mult_inv) = inputdatalength/2 + 8
+ */
+struct ica_rsa_modexpo_crt {
+	char __user *	inputdata;
+	unsigned int	inputdatalength;
+	char __user *	outputdata;
+	unsigned int	outputdatalength;
+	char __user *	bp_key;
+	char __user *	bq_key;
+	char __user *	np_prime;
+	char __user *	nq_prime;
+	char __user *	u_mult_inv;
+};
+
+#define ZCRYPT_IOCTL_MAGIC 'z'
+
+/**
+ * Interface notes:
+ *
+ * The ioctl()s which are implemented (along with relevant details)
+ * are:
+ *
+ *   ICARSAMODEXPO
+ *     Perform an RSA operation using a Modulus-Exponent pair
+ *     This takes an ica_rsa_modexpo struct as its arg.
+ *
+ *     NOTE: please refer to the comments preceding this structure
+ *	     for the implementation details for the contents of the
+ *	     block
+ *
+ *   ICARSACRT
+ *     Perform an RSA operation using a Chinese-Remainder Theorem key
+ *     This takes an ica_rsa_modexpo_crt struct as its arg.
+ *
+ *     NOTE: please refer to the comments preceding this structure
+ *	     for the implementation details for the contents of the
+ *	     block
+ *
+ *   Z90STAT_TOTALCOUNT
+ *     Return an integer count of all device types together.
+ *
+ *   Z90STAT_PCICACOUNT
+ *     Return an integer count of all PCICAs.
+ *
+ *   Z90STAT_PCICCCOUNT
+ *     Return an integer count of all PCICCs.
+ *
+ *   Z90STAT_PCIXCCMCL2COUNT
+ *     Return an integer count of all MCL2 PCIXCCs.
+ *
+ *   Z90STAT_PCIXCCMCL3COUNT
+ *     Return an integer count of all MCL3 PCIXCCs.
+ *
+ *   Z90STAT_CEX2CCOUNT
+ *     Return an integer count of all CEX2Cs.
+ *
+ *   Z90STAT_CEX2ACOUNT
+ *     Return an integer count of all CEX2As.
+ *
+ *   Z90STAT_REQUESTQ_COUNT
+ *     Return an integer count of the number of entries waiting to be
+ *     sent to a device.
+ *
+ *   Z90STAT_PENDINGQ_COUNT
+ *     Return an integer count of the number of entries sent to a
+ *     device awaiting the reply.
+ *
+ *   Z90STAT_TOTALOPEN_COUNT
+ *     Return an integer count of the number of open file handles.
+ *
+ *   Z90STAT_DOMAIN_INDEX
+ *     Return the integer value of the Cryptographic Domain.
+ *
+ *   Z90STAT_STATUS_MASK
+ *     Return an 64 element array of unsigned chars for the status of
+ *     all devices.
+ *	 0x01: PCICA
+ *	 0x02: PCICC
+ *	 0x03: PCIXCC_MCL2
+ *	 0x04: PCIXCC_MCL3
+ *	 0x05: CEX2C
+ *	 0x06: CEX2A
+ *	 0x0d: device is disabled via the proc filesystem
+ *
+ *   Z90STAT_QDEPTH_MASK
+ *     Return an 64 element array of unsigned chars for the queue
+ *     depth of all devices.
+ *
+ *   Z90STAT_PERDEV_REQCNT
+ *     Return an 64 element array of unsigned integers for the number
+ *     of successfully completed requests per device since the device
+ *     was detected and made available.
+ *
+ *   ICAZ90STATUS (deprecated)
+ *     Return some device driver status in a ica_z90_status struct
+ *     This takes an ica_z90_status struct as its arg.
+ *
+ *     NOTE: this ioctl() is deprecated, and has been replaced with
+ *	     single ioctl()s for each type of status being requested
+ *
+ *   Z90STAT_PCIXCCCOUNT (deprecated)
+ *     Return an integer count of all PCIXCCs (MCL2 + MCL3).
+ *     This is DEPRECATED now that MCL3 PCIXCCs are treated differently from
+ *     MCL2 PCIXCCs.
+ *
+ *   Z90QUIESCE (not recommended)
+ *     Quiesce the driver.  This is intended to stop all new
+ *     requests from being processed.  Its use is NOT recommended,
+ *     except in circumstances where there is no other way to stop
+ *     callers from accessing the driver.  Its original use was to
+ *     allow the driver to be "drained" of work in preparation for
+ *     a system shutdown.
+ *
+ *     NOTE: once issued, this ban on new work cannot be undone
+ *	     except by unloading and reloading the driver.
+ */
+
+/**
+ * Supported ioctl calls
+ */
+#define ICARSAMODEXPO	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
+#define ICARSACRT	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
+
+/* New status calls */
+#define Z90STAT_TOTALCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
+#define Z90STAT_PCICACOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x41, int)
+#define Z90STAT_PCICCCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x42, int)
+#define Z90STAT_PCIXCCMCL2COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4b, int)
+#define Z90STAT_PCIXCCMCL3COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4c, int)
+#define Z90STAT_CEX2CCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4d, int)
+#define Z90STAT_CEX2ACOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4e, int)
+#define Z90STAT_REQUESTQ_COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x44, int)
+#define Z90STAT_PENDINGQ_COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x45, int)
+#define Z90STAT_TOTALOPEN_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x46, int)
+#define Z90STAT_DOMAIN_INDEX	_IOR(ZCRYPT_IOCTL_MAGIC, 0x47, int)
+#define Z90STAT_STATUS_MASK	_IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
+#define Z90STAT_QDEPTH_MASK	_IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64])
+#define Z90STAT_PERDEV_REQCNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64])
+
+#endif /* __ASM_S390_ZCRYPT_H */
-- 
GitLab


From 963ed931c3fd18082bfde0e8704a28955663abf4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:29 +0200
Subject: [PATCH 0224/1063] [S390] zcrypt CEX2A, CEX2C, PCICA accelerator card
 ap bus drivers.

Signed-off-by: Ralph Wuerthner <rwuerthn@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/zcrypt_cex2a.c | 435 +++++++++++++++++++++++++++++
 drivers/s390/crypto/zcrypt_cex2a.h | 126 +++++++++
 drivers/s390/crypto/zcrypt_error.h | 133 +++++++++
 drivers/s390/crypto/zcrypt_pcica.c | 418 +++++++++++++++++++++++++++
 drivers/s390/crypto/zcrypt_pcica.h | 117 ++++++++
 5 files changed, 1229 insertions(+)
 create mode 100644 drivers/s390/crypto/zcrypt_cex2a.c
 create mode 100644 drivers/s390/crypto/zcrypt_cex2a.h
 create mode 100644 drivers/s390/crypto/zcrypt_error.h
 create mode 100644 drivers/s390/crypto/zcrypt_pcica.c
 create mode 100644 drivers/s390/crypto/zcrypt_pcica.h

diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c
new file mode 100644
index 0000000000000..350248e5cd93d
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_cex2a.c
@@ -0,0 +1,435 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_cex2a.c
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "ap_bus.h"
+#include "zcrypt_api.h"
+#include "zcrypt_error.h"
+#include "zcrypt_cex2a.h"
+
+#define CEX2A_MIN_MOD_SIZE	  1	/*    8 bits	*/
+#define CEX2A_MAX_MOD_SIZE	256	/* 2048 bits	*/
+
+#define CEX2A_SPEED_RATING	970
+
+#define CEX2A_MAX_MESSAGE_SIZE	0x390	/* sizeof(struct type50_crb2_msg)    */
+#define CEX2A_MAX_RESPONSE_SIZE 0x110	/* max outputdatalength + type80_hdr */
+
+#define CEX2A_CLEANUP_TIME	(15*HZ)
+
+static struct ap_device_id zcrypt_cex2a_ids[] = {
+	{ AP_DEVICE(AP_DEVICE_TYPE_CEX2A) },
+	{ /* end of list */ },
+};
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_ids);
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("CEX2A Cryptographic Coprocessor device driver, "
+		   "Copyright 2001, 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+#endif
+
+static int zcrypt_cex2a_probe(struct ap_device *ap_dev);
+static void zcrypt_cex2a_remove(struct ap_device *ap_dev);
+static void zcrypt_cex2a_receive(struct ap_device *, struct ap_message *,
+				 struct ap_message *);
+
+static struct ap_driver zcrypt_cex2a_driver = {
+	.probe = zcrypt_cex2a_probe,
+	.remove = zcrypt_cex2a_remove,
+	.receive = zcrypt_cex2a_receive,
+	.ids = zcrypt_cex2a_ids,
+};
+
+/**
+ * Convert a ICAMEX message to a type50 MEX message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @mex: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ica_rsa_modexpo *mex)
+{
+	unsigned char *mod, *exp, *inp;
+	int mod_len;
+
+	mod_len = mex->inputdatalength;
+
+	if (mod_len <= 128) {
+		struct type50_meb1_msg *meb1 = ap_msg->message;
+		memset(meb1, 0, sizeof(*meb1));
+		ap_msg->length = sizeof(*meb1);
+		meb1->header.msg_type_code = TYPE50_TYPE_CODE;
+		meb1->header.msg_len = sizeof(*meb1);
+		meb1->keyblock_type = TYPE50_MEB1_FMT;
+		mod = meb1->modulus + sizeof(meb1->modulus) - mod_len;
+		exp = meb1->exponent + sizeof(meb1->exponent) - mod_len;
+		inp = meb1->message + sizeof(meb1->message) - mod_len;
+	} else {
+		struct type50_meb2_msg *meb2 = ap_msg->message;
+		memset(meb2, 0, sizeof(*meb2));
+		ap_msg->length = sizeof(*meb2);
+		meb2->header.msg_type_code = TYPE50_TYPE_CODE;
+		meb2->header.msg_len = sizeof(*meb2);
+		meb2->keyblock_type = TYPE50_MEB2_FMT;
+		mod = meb2->modulus + sizeof(meb2->modulus) - mod_len;
+		exp = meb2->exponent + sizeof(meb2->exponent) - mod_len;
+		inp = meb2->message + sizeof(meb2->message) - mod_len;
+	}
+
+	if (copy_from_user(mod, mex->n_modulus, mod_len) ||
+	    copy_from_user(exp, mex->b_key, mod_len) ||
+	    copy_from_user(inp, mex->inputdata, mod_len))
+		return -EFAULT;
+	return 0;
+}
+
+/**
+ * Convert a ICACRT message to a type50 CRT message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @crt: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ica_rsa_modexpo_crt *crt)
+{
+	int mod_len, short_len, long_len, long_offset;
+	unsigned char *p, *q, *dp, *dq, *u, *inp;
+
+	mod_len = crt->inputdatalength;
+	short_len = mod_len / 2;
+	long_len = mod_len / 2 + 8;
+
+	/*
+	 * CEX2A cannot handle p, dp, or U > 128 bytes.
+	 * If we have one of these, we need to do extra checking.
+	 */
+	if (long_len > 128) {
+		/*
+		 * zcrypt_rsa_crt already checked for the leading
+		 * zeroes of np_prime, bp_key and u_mult_inc.
+		 */
+		long_offset = long_len - 128;
+		long_len = 128;
+	} else
+		long_offset = 0;
+
+	/*
+	 * Instead of doing extra work for p, dp, U > 64 bytes, we'll just use
+	 * the larger message structure.
+	 */
+	if (long_len <= 64) {
+		struct type50_crb1_msg *crb1 = ap_msg->message;
+		memset(crb1, 0, sizeof(*crb1));
+		ap_msg->length = sizeof(*crb1);
+		crb1->header.msg_type_code = TYPE50_TYPE_CODE;
+		crb1->header.msg_len = sizeof(*crb1);
+		crb1->keyblock_type = TYPE50_CRB1_FMT;
+		p = crb1->p + sizeof(crb1->p) - long_len;
+		q = crb1->q + sizeof(crb1->q) - short_len;
+		dp = crb1->dp + sizeof(crb1->dp) - long_len;
+		dq = crb1->dq + sizeof(crb1->dq) - short_len;
+		u = crb1->u + sizeof(crb1->u) - long_len;
+		inp = crb1->message + sizeof(crb1->message) - mod_len;
+	} else {
+		struct type50_crb2_msg *crb2 = ap_msg->message;
+		memset(crb2, 0, sizeof(*crb2));
+		ap_msg->length = sizeof(*crb2);
+		crb2->header.msg_type_code = TYPE50_TYPE_CODE;
+		crb2->header.msg_len = sizeof(*crb2);
+		crb2->keyblock_type = TYPE50_CRB2_FMT;
+		p = crb2->p + sizeof(crb2->p) - long_len;
+		q = crb2->q + sizeof(crb2->q) - short_len;
+		dp = crb2->dp + sizeof(crb2->dp) - long_len;
+		dq = crb2->dq + sizeof(crb2->dq) - short_len;
+		u = crb2->u + sizeof(crb2->u) - long_len;
+		inp = crb2->message + sizeof(crb2->message) - mod_len;
+	}
+
+	if (copy_from_user(p, crt->np_prime + long_offset, long_len) ||
+	    copy_from_user(q, crt->nq_prime, short_len) ||
+	    copy_from_user(dp, crt->bp_key + long_offset, long_len) ||
+	    copy_from_user(dq, crt->bq_key, short_len) ||
+	    copy_from_user(u, crt->u_mult_inv + long_offset, long_len) ||
+	    copy_from_user(inp, crt->inputdata, mod_len))
+		return -EFAULT;
+
+
+	return 0;
+}
+
+/**
+ * Copy results from a type 80 reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @data: pointer to user output data
+ * @length: size of user output data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int convert_type80(struct zcrypt_device *zdev,
+			  struct ap_message *reply,
+			  char __user *outputdata,
+			  unsigned int outputdatalength)
+{
+	struct type80_hdr *t80h = reply->message;
+	unsigned char *data;
+
+	if (t80h->len < sizeof(*t80h) + outputdatalength) {
+		/* The result is too short, the CEX2A card may not do that.. */
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+	BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE);
+	data = reply->message + t80h->len - outputdatalength;
+	if (copy_to_user(outputdata, data, outputdatalength))
+		return -EFAULT;
+	return 0;
+}
+
+static int convert_response(struct zcrypt_device *zdev,
+			    struct ap_message *reply,
+			    char __user *outputdata,
+			    unsigned int outputdatalength)
+{
+	/* Response type byte is the second byte in the response. */
+	switch (((unsigned char *) reply->message)[1]) {
+	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
+		return convert_error(zdev, reply);
+	case TYPE80_RSP_CODE:
+		return convert_type80(zdev, reply,
+				      outputdata, outputdatalength);
+	default: /* Unknown response type, this should NEVER EVER happen */
+		PRINTK("Unrecognized Message Header: %08x%08x\n",
+		       *(unsigned int *) reply->message,
+		       *(unsigned int *) (reply->message+4));
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
+/**
+ * This function is called from the AP bus code after a crypto request
+ * "msg" has finished with the reply message "reply".
+ * It is called from tasklet context.
+ * @ap_dev: pointer to the AP device
+ * @msg: pointer to the AP message
+ * @reply: pointer to the AP reply message
+ */
+static void zcrypt_cex2a_receive(struct ap_device *ap_dev,
+				 struct ap_message *msg,
+				 struct ap_message *reply)
+{
+	static struct error_hdr error_reply = {
+		.type = TYPE82_RSP_CODE,
+		.reply_code = REP82_ERROR_MACHINE_FAILURE,
+	};
+	struct type80_hdr *t80h = reply->message;
+	int length;
+
+	/* Copy the reply message to the request message buffer. */
+	if (IS_ERR(reply))
+		memcpy(msg->message, &error_reply, sizeof(error_reply));
+	else if (t80h->type == TYPE80_RSP_CODE) {
+		length = min(CEX2A_MAX_RESPONSE_SIZE, (int) t80h->len);
+		memcpy(msg->message, reply->message, length);
+	} else
+		memcpy(msg->message, reply->message, sizeof error_reply);
+	complete((struct completion *) msg->private);
+}
+
+static atomic_t zcrypt_step = ATOMIC_INIT(0);
+
+/**
+ * The request distributor calls this function if it picked the CEX2A
+ * device to handle a modexpo request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  CEX2A device to the request distributor
+ * @mex: pointer to the modexpo request buffer
+ */
+static long zcrypt_cex2a_modexpo(struct zcrypt_device *zdev,
+				 struct ica_rsa_modexpo *mex)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) kmalloc(CEX2A_MAX_MESSAGE_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICAMEX_msg_to_type50MEX_msg(zdev, &ap_msg, mex);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, CEX2A_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, mex->outputdata,
+				      mex->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	kfree(ap_msg.message);
+	return rc;
+}
+
+/**
+ * The request distributor calls this function if it picked the CEX2A
+ * device to handle a modexpo_crt request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  CEX2A device to the request distributor
+ * @crt: pointer to the modexpoc_crt request buffer
+ */
+static long zcrypt_cex2a_modexpo_crt(struct zcrypt_device *zdev,
+				     struct ica_rsa_modexpo_crt *crt)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) kmalloc(CEX2A_MAX_MESSAGE_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICACRT_msg_to_type50CRT_msg(zdev, &ap_msg, crt);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, CEX2A_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, crt->outputdata,
+				      crt->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	kfree(ap_msg.message);
+	return rc;
+}
+
+/**
+ * The crypto operations for a CEX2A card.
+ */
+static struct zcrypt_ops zcrypt_cex2a_ops = {
+	.rsa_modexpo = zcrypt_cex2a_modexpo,
+	.rsa_modexpo_crt = zcrypt_cex2a_modexpo_crt,
+};
+
+/**
+ * Probe function for CEX2A cards. It always accepts the AP device
+ * since the bus_match already checked the hardware type.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_cex2a_probe(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	zdev = zcrypt_device_alloc(CEX2A_MAX_RESPONSE_SIZE);
+	if (!zdev)
+		return -ENOMEM;
+	zdev->ap_dev = ap_dev;
+	zdev->ops = &zcrypt_cex2a_ops;
+	zdev->online = 1;
+	zdev->user_space_type = ZCRYPT_CEX2A;
+	zdev->type_string = "CEX2A";
+	zdev->min_mod_size = CEX2A_MIN_MOD_SIZE;
+	zdev->max_mod_size = CEX2A_MAX_MOD_SIZE;
+	zdev->short_crt = 1;
+	zdev->speed_rating = CEX2A_SPEED_RATING;
+	ap_dev->reply = &zdev->reply;
+	ap_dev->private = zdev;
+	rc = zcrypt_device_register(zdev);
+	if (rc)
+		goto out_free;
+	return 0;
+
+out_free:
+	ap_dev->private = NULL;
+	zcrypt_device_free(zdev);
+	return rc;
+}
+
+/**
+ * This is called to remove the extended CEX2A driver information
+ * if an AP device is removed.
+ */
+static void zcrypt_cex2a_remove(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev = ap_dev->private;
+
+	zcrypt_device_unregister(zdev);
+}
+
+int __init zcrypt_cex2a_init(void)
+{
+	return ap_driver_register(&zcrypt_cex2a_driver, THIS_MODULE, "cex2a");
+}
+
+void __exit zcrypt_cex2a_exit(void)
+{
+	ap_driver_unregister(&zcrypt_cex2a_driver);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(zcrypt_cex2a_init);
+module_exit(zcrypt_cex2a_exit);
+#endif
diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h
new file mode 100644
index 0000000000000..61a78c32dce46
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_cex2a.h
@@ -0,0 +1,126 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_cex2a.h
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_CEX2A_H_
+#define _ZCRYPT_CEX2A_H_
+
+/**
+ * The type 50 message family is associated with a CEX2A card.
+ *
+ * The four members of the family are described below.
+ *
+ * Note that all unsigned char arrays are right-justified and left-padded
+ * with zeroes.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+struct type50_hdr {
+	unsigned char	reserved1;
+	unsigned char	msg_type_code;	/* 0x50 */
+	unsigned short	msg_len;
+	unsigned char	reserved2;
+	unsigned char	ignored;
+	unsigned short	reserved3;
+} __attribute__((packed));
+
+#define TYPE50_TYPE_CODE	0x50
+
+#define TYPE50_MEB1_FMT		0x0001
+#define TYPE50_MEB2_FMT		0x0002
+#define TYPE50_CRB1_FMT		0x0011
+#define TYPE50_CRB2_FMT		0x0012
+
+/* Mod-Exp, with a small modulus */
+struct type50_meb1_msg {
+	struct type50_hdr header;
+	unsigned short	keyblock_type;	/* 0x0001 */
+	unsigned char	reserved[6];
+	unsigned char	exponent[128];
+	unsigned char	modulus[128];
+	unsigned char	message[128];
+} __attribute__((packed));
+
+/* Mod-Exp, with a large modulus */
+struct type50_meb2_msg {
+	struct type50_hdr header;
+	unsigned short	keyblock_type;	/* 0x0002 */
+	unsigned char	reserved[6];
+	unsigned char	exponent[256];
+	unsigned char	modulus[256];
+	unsigned char	message[256];
+} __attribute__((packed));
+
+/* CRT, with a small modulus */
+struct type50_crb1_msg {
+	struct type50_hdr header;
+	unsigned short	keyblock_type;	/* 0x0011 */
+	unsigned char	reserved[6];
+	unsigned char	p[64];
+	unsigned char	q[64];
+	unsigned char	dp[64];
+	unsigned char	dq[64];
+	unsigned char	u[64];
+	unsigned char	message[128];
+} __attribute__((packed));
+
+/* CRT, with a large modulus */
+struct type50_crb2_msg {
+	struct type50_hdr header;
+	unsigned short	keyblock_type;	/* 0x0012 */
+	unsigned char	reserved[6];
+	unsigned char	p[128];
+	unsigned char	q[128];
+	unsigned char	dp[128];
+	unsigned char	dq[128];
+	unsigned char	u[128];
+	unsigned char	message[256];
+} __attribute__((packed));
+
+/**
+ * The type 80 response family is associated with a CEX2A card.
+ *
+ * Note that all unsigned char arrays are right-justified and left-padded
+ * with zeroes.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+
+#define TYPE80_RSP_CODE 0x80
+
+struct type80_hdr {
+	unsigned char	reserved1;
+	unsigned char	type;		/* 0x80 */
+	unsigned short	len;
+	unsigned char	code;		/* 0x00 */
+	unsigned char	reserved2[3];
+	unsigned char	reserved3[8];
+} __attribute__((packed));
+
+int zcrypt_cex2a_init(void);
+void zcrypt_cex2a_exit(void);
+
+#endif /* _ZCRYPT_CEX2A_H_ */
diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
new file mode 100644
index 0000000000000..b22bd055a03ba
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -0,0 +1,133 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_error.h
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_ERROR_H_
+#define _ZCRYPT_ERROR_H_
+
+#include "zcrypt_api.h"
+
+/**
+ * Reply Messages
+ *
+ * Error reply messages are of two types:
+ *    82:  Error (see below)
+ *    88:  Error (see below)
+ * Both type 82 and type 88 have the same structure in the header.
+ *
+ * Request reply messages are of three known types:
+ *    80:  Reply from a Type 50 Request (see CEX2A-RELATED STRUCTS)
+ *    84:  Reply from a Type 4 Request (see PCICA-RELATED STRUCTS)
+ *    86:  Reply from a Type 6 Request (see PCICC/PCIXCC/CEX2C-RELATED STRUCTS)
+ *
+ */
+struct error_hdr {
+	unsigned char reserved1;	/* 0x00			*/
+	unsigned char type;		/* 0x82 or 0x88		*/
+	unsigned char reserved2[2];	/* 0x0000		*/
+	unsigned char reply_code;	/* reply code		*/
+	unsigned char reserved3[3];	/* 0x000000		*/
+};
+
+#define TYPE82_RSP_CODE 0x82
+#define TYPE88_RSP_CODE 0x88
+
+#define REP82_ERROR_MACHINE_FAILURE  0x10
+#define REP82_ERROR_PREEMPT_FAILURE  0x12
+#define REP82_ERROR_CHECKPT_FAILURE  0x14
+#define REP82_ERROR_MESSAGE_TYPE     0x20
+#define REP82_ERROR_INVALID_COMM_CD  0x21	/* Type 84	*/
+#define REP82_ERROR_INVALID_MSG_LEN  0x23
+#define REP82_ERROR_RESERVD_FIELD    0x24	/* was 0x50	*/
+#define REP82_ERROR_FORMAT_FIELD     0x29
+#define REP82_ERROR_INVALID_COMMAND  0x30
+#define REP82_ERROR_MALFORMED_MSG    0x40
+#define REP82_ERROR_RESERVED_FIELDO  0x50	/* old value	*/
+#define REP82_ERROR_WORD_ALIGNMENT   0x60
+#define REP82_ERROR_MESSAGE_LENGTH   0x80
+#define REP82_ERROR_OPERAND_INVALID  0x82
+#define REP82_ERROR_OPERAND_SIZE     0x84
+#define REP82_ERROR_EVEN_MOD_IN_OPND 0x85
+#define REP82_ERROR_RESERVED_FIELD   0x88
+#define REP82_ERROR_TRANSPORT_FAIL   0x90
+#define REP82_ERROR_PACKET_TRUNCATED 0xA0
+#define REP82_ERROR_ZERO_BUFFER_LEN  0xB0
+
+#define REP88_ERROR_MODULE_FAILURE   0x10
+
+#define REP88_ERROR_MESSAGE_TYPE     0x20
+#define REP88_ERROR_MESSAGE_MALFORMD 0x22
+#define REP88_ERROR_MESSAGE_LENGTH   0x23
+#define REP88_ERROR_RESERVED_FIELD   0x24
+#define REP88_ERROR_KEY_TYPE	     0x34
+#define REP88_ERROR_INVALID_KEY      0x82	/* CEX2A	*/
+#define REP88_ERROR_OPERAND	     0x84	/* CEX2A	*/
+#define REP88_ERROR_OPERAND_EVEN_MOD 0x85	/* CEX2A	*/
+
+static inline int convert_error(struct zcrypt_device *zdev,
+				struct ap_message *reply)
+{
+	struct error_hdr *ehdr = reply->message;
+
+	PRINTK("Hardware error : Type %02x Message Header: %08x%08x\n",
+	       ehdr->type, *(unsigned int *) reply->message,
+	       *(unsigned int *) (reply->message + 4));
+
+	switch (ehdr->reply_code) {
+	case REP82_ERROR_OPERAND_INVALID:
+	case REP82_ERROR_OPERAND_SIZE:
+	case REP82_ERROR_EVEN_MOD_IN_OPND:
+	case REP88_ERROR_MESSAGE_MALFORMD:
+	//   REP88_ERROR_INVALID_KEY		// '82' CEX2A
+	//   REP88_ERROR_OPERAND		// '84' CEX2A
+	//   REP88_ERROR_OPERAND_EVEN_MOD	// '85' CEX2A
+		/* Invalid input data. */
+		return -EINVAL;
+	case REP82_ERROR_MESSAGE_TYPE:
+	//   REP88_ERROR_MESSAGE_TYPE		// '20' CEX2A
+		/**
+		 * To sent a message of the wrong type is a bug in the
+		 * device driver. Warn about it, disable the device
+		 * and then repeat the request.
+		 */
+		WARN_ON(1);
+		zdev->online = 0;
+		return -EAGAIN;
+	case REP82_ERROR_TRANSPORT_FAIL:
+	case REP82_ERROR_MACHINE_FAILURE:
+	//   REP88_ERROR_MODULE_FAILURE		// '10' CEX2A
+		/* If a card fails disable it and repeat the request. */
+		zdev->online = 0;
+		return -EAGAIN;
+	default:
+		PRINTKW("unknown type %02x reply code = %d\n",
+			ehdr->type, ehdr->reply_code);
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
+#endif /* _ZCRYPT_ERROR_H_ */
diff --git a/drivers/s390/crypto/zcrypt_pcica.c b/drivers/s390/crypto/zcrypt_pcica.c
new file mode 100644
index 0000000000000..0ff56e86caae6
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcica.c
@@ -0,0 +1,418 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcica.c
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "ap_bus.h"
+#include "zcrypt_api.h"
+#include "zcrypt_error.h"
+#include "zcrypt_pcica.h"
+
+#define PCICA_MIN_MOD_SIZE	  1	/*    8 bits	*/
+#define PCICA_MAX_MOD_SIZE	256	/* 2048 bits	*/
+
+#define PCICA_SPEED_RATING	2800
+
+#define PCICA_MAX_MESSAGE_SIZE	0x3a0	/* sizeof(struct type4_lcr)	     */
+#define PCICA_MAX_RESPONSE_SIZE 0x110	/* max outputdatalength + type80_hdr */
+
+#define PCICA_CLEANUP_TIME	(15*HZ)
+
+static struct ap_device_id zcrypt_pcica_ids[] = {
+	{ AP_DEVICE(AP_DEVICE_TYPE_PCICA) },
+	{ /* end of list */ },
+};
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+MODULE_DEVICE_TABLE(ap, zcrypt_pcica_ids);
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("PCICA Cryptographic Coprocessor device driver, "
+		   "Copyright 2001, 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+#endif
+
+static int zcrypt_pcica_probe(struct ap_device *ap_dev);
+static void zcrypt_pcica_remove(struct ap_device *ap_dev);
+static void zcrypt_pcica_receive(struct ap_device *, struct ap_message *,
+				 struct ap_message *);
+
+static struct ap_driver zcrypt_pcica_driver = {
+	.probe = zcrypt_pcica_probe,
+	.remove = zcrypt_pcica_remove,
+	.receive = zcrypt_pcica_receive,
+	.ids = zcrypt_pcica_ids,
+};
+
+/**
+ * Convert a ICAMEX message to a type4 MEX message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @mex: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICAMEX_msg_to_type4MEX_msg(struct zcrypt_device *zdev,
+				      struct ap_message *ap_msg,
+				      struct ica_rsa_modexpo *mex)
+{
+	unsigned char *modulus, *exponent, *message;
+	int mod_len;
+
+	mod_len = mex->inputdatalength;
+
+	if (mod_len <= 128) {
+		struct type4_sme *sme = ap_msg->message;
+		memset(sme, 0, sizeof(*sme));
+		ap_msg->length = sizeof(*sme);
+		sme->header.msg_fmt = TYPE4_SME_FMT;
+		sme->header.msg_len = sizeof(*sme);
+		sme->header.msg_type_code = TYPE4_TYPE_CODE;
+		sme->header.request_code = TYPE4_REQU_CODE;
+		modulus = sme->modulus + sizeof(sme->modulus) - mod_len;
+		exponent = sme->exponent + sizeof(sme->exponent) - mod_len;
+		message = sme->message + sizeof(sme->message) - mod_len;
+	} else {
+		struct type4_lme *lme = ap_msg->message;
+		memset(lme, 0, sizeof(*lme));
+		ap_msg->length = sizeof(*lme);
+		lme->header.msg_fmt = TYPE4_LME_FMT;
+		lme->header.msg_len = sizeof(*lme);
+		lme->header.msg_type_code = TYPE4_TYPE_CODE;
+		lme->header.request_code = TYPE4_REQU_CODE;
+		modulus = lme->modulus + sizeof(lme->modulus) - mod_len;
+		exponent = lme->exponent + sizeof(lme->exponent) - mod_len;
+		message = lme->message + sizeof(lme->message) - mod_len;
+	}
+
+	if (copy_from_user(modulus, mex->n_modulus, mod_len) ||
+	    copy_from_user(exponent, mex->b_key, mod_len) ||
+	    copy_from_user(message, mex->inputdata, mod_len))
+		return -EFAULT;
+	return 0;
+}
+
+/**
+ * Convert a ICACRT message to a type4 CRT message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @crt: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICACRT_msg_to_type4CRT_msg(struct zcrypt_device *zdev,
+				      struct ap_message *ap_msg,
+				      struct ica_rsa_modexpo_crt *crt)
+{
+	unsigned char *p, *q, *dp, *dq, *u, *inp;
+	int mod_len, short_len, long_len;
+
+	mod_len = crt->inputdatalength;
+	short_len = mod_len / 2;
+	long_len = mod_len / 2 + 8;
+
+	if (mod_len <= 128) {
+		struct type4_scr *scr = ap_msg->message;
+		memset(scr, 0, sizeof(*scr));
+		ap_msg->length = sizeof(*scr);
+		scr->header.msg_type_code = TYPE4_TYPE_CODE;
+		scr->header.request_code = TYPE4_REQU_CODE;
+		scr->header.msg_fmt = TYPE4_SCR_FMT;
+		scr->header.msg_len = sizeof(*scr);
+		p = scr->p + sizeof(scr->p) - long_len;
+		q = scr->q + sizeof(scr->q) - short_len;
+		dp = scr->dp + sizeof(scr->dp) - long_len;
+		dq = scr->dq + sizeof(scr->dq) - short_len;
+		u = scr->u + sizeof(scr->u) - long_len;
+		inp = scr->message + sizeof(scr->message) - mod_len;
+	} else {
+		struct type4_lcr *lcr = ap_msg->message;
+		memset(lcr, 0, sizeof(*lcr));
+		ap_msg->length = sizeof(*lcr);
+		lcr->header.msg_type_code = TYPE4_TYPE_CODE;
+		lcr->header.request_code = TYPE4_REQU_CODE;
+		lcr->header.msg_fmt = TYPE4_LCR_FMT;
+		lcr->header.msg_len = sizeof(*lcr);
+		p = lcr->p + sizeof(lcr->p) - long_len;
+		q = lcr->q + sizeof(lcr->q) - short_len;
+		dp = lcr->dp + sizeof(lcr->dp) - long_len;
+		dq = lcr->dq + sizeof(lcr->dq) - short_len;
+		u = lcr->u + sizeof(lcr->u) - long_len;
+		inp = lcr->message + sizeof(lcr->message) - mod_len;
+	}
+
+	if (copy_from_user(p, crt->np_prime, long_len) ||
+	    copy_from_user(q, crt->nq_prime, short_len) ||
+	    copy_from_user(dp, crt->bp_key, long_len) ||
+	    copy_from_user(dq, crt->bq_key, short_len) ||
+	    copy_from_user(u, crt->u_mult_inv, long_len) ||
+	    copy_from_user(inp, crt->inputdata, mod_len))
+		return -EFAULT;
+	return 0;
+}
+
+/**
+ * Copy results from a type 84 reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @data: pointer to user output data
+ * @length: size of user output data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static inline int convert_type84(struct zcrypt_device *zdev,
+				 struct ap_message *reply,
+				 char __user *outputdata,
+				 unsigned int outputdatalength)
+{
+	struct type84_hdr *t84h = reply->message;
+	char *data;
+
+	if (t84h->len < sizeof(*t84h) + outputdatalength) {
+		/* The result is too short, the PCICA card may not do that.. */
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+	BUG_ON(t84h->len > PCICA_MAX_RESPONSE_SIZE);
+	data = reply->message + t84h->len - outputdatalength;
+	if (copy_to_user(outputdata, data, outputdatalength))
+		return -EFAULT;
+	return 0;
+}
+
+static int convert_response(struct zcrypt_device *zdev,
+			    struct ap_message *reply,
+			    char __user *outputdata,
+			    unsigned int outputdatalength)
+{
+	/* Response type byte is the second byte in the response. */
+	switch (((unsigned char *) reply->message)[1]) {
+	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
+		return convert_error(zdev, reply);
+	case TYPE84_RSP_CODE:
+		return convert_type84(zdev, reply,
+				      outputdata, outputdatalength);
+	default: /* Unknown response type, this should NEVER EVER happen */
+		PRINTK("Unrecognized Message Header: %08x%08x\n",
+		       *(unsigned int *) reply->message,
+		       *(unsigned int *) (reply->message+4));
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
+/**
+ * This function is called from the AP bus code after a crypto request
+ * "msg" has finished with the reply message "reply".
+ * It is called from tasklet context.
+ * @ap_dev: pointer to the AP device
+ * @msg: pointer to the AP message
+ * @reply: pointer to the AP reply message
+ */
+static void zcrypt_pcica_receive(struct ap_device *ap_dev,
+				 struct ap_message *msg,
+				 struct ap_message *reply)
+{
+	static struct error_hdr error_reply = {
+		.type = TYPE82_RSP_CODE,
+		.reply_code = REP82_ERROR_MACHINE_FAILURE,
+	};
+	struct type84_hdr *t84h = reply->message;
+	int length;
+
+	/* Copy the reply message to the request message buffer. */
+	if (IS_ERR(reply))
+		memcpy(msg->message, &error_reply, sizeof(error_reply));
+	else if (t84h->code == TYPE84_RSP_CODE) {
+		length = min(PCICA_MAX_RESPONSE_SIZE, (int) t84h->len);
+		memcpy(msg->message, reply->message, length);
+	} else
+		memcpy(msg->message, reply->message, sizeof error_reply);
+	complete((struct completion *) msg->private);
+}
+
+static atomic_t zcrypt_step = ATOMIC_INIT(0);
+
+/**
+ * The request distributor calls this function if it picked the PCICA
+ * device to handle a modexpo request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCICA device to the request distributor
+ * @mex: pointer to the modexpo request buffer
+ */
+static long zcrypt_pcica_modexpo(struct zcrypt_device *zdev,
+				 struct ica_rsa_modexpo *mex)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) kmalloc(PCICA_MAX_MESSAGE_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICAMEX_msg_to_type4MEX_msg(zdev, &ap_msg, mex);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, PCICA_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, mex->outputdata,
+				      mex->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	kfree(ap_msg.message);
+	return rc;
+}
+
+/**
+ * The request distributor calls this function if it picked the PCICA
+ * device to handle a modexpo_crt request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCICA device to the request distributor
+ * @crt: pointer to the modexpoc_crt request buffer
+ */
+static long zcrypt_pcica_modexpo_crt(struct zcrypt_device *zdev,
+				     struct ica_rsa_modexpo_crt *crt)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) kmalloc(PCICA_MAX_MESSAGE_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICACRT_msg_to_type4CRT_msg(zdev, &ap_msg, crt);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, PCICA_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, crt->outputdata,
+				      crt->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	kfree(ap_msg.message);
+	return rc;
+}
+
+/**
+ * The crypto operations for a PCICA card.
+ */
+static struct zcrypt_ops zcrypt_pcica_ops = {
+	.rsa_modexpo = zcrypt_pcica_modexpo,
+	.rsa_modexpo_crt = zcrypt_pcica_modexpo_crt,
+};
+
+/**
+ * Probe function for PCICA cards. It always accepts the AP device
+ * since the bus_match already checked the hardware type.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_pcica_probe(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	zdev = zcrypt_device_alloc(PCICA_MAX_RESPONSE_SIZE);
+	if (!zdev)
+		return -ENOMEM;
+	zdev->ap_dev = ap_dev;
+	zdev->ops = &zcrypt_pcica_ops;
+	zdev->online = 1;
+	zdev->user_space_type = ZCRYPT_PCICA;
+	zdev->type_string = "PCICA";
+	zdev->min_mod_size = PCICA_MIN_MOD_SIZE;
+	zdev->max_mod_size = PCICA_MAX_MOD_SIZE;
+	zdev->speed_rating = PCICA_SPEED_RATING;
+	ap_dev->reply = &zdev->reply;
+	ap_dev->private = zdev;
+	rc = zcrypt_device_register(zdev);
+	if (rc)
+		goto out_free;
+	return 0;
+
+out_free:
+	ap_dev->private = NULL;
+	zcrypt_device_free(zdev);
+	return rc;
+}
+
+/**
+ * This is called to remove the extended PCICA driver information
+ * if an AP device is removed.
+ */
+static void zcrypt_pcica_remove(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev = ap_dev->private;
+
+	zcrypt_device_unregister(zdev);
+}
+
+int __init zcrypt_pcica_init(void)
+{
+	return ap_driver_register(&zcrypt_pcica_driver, THIS_MODULE, "pcica");
+}
+
+void zcrypt_pcica_exit(void)
+{
+	ap_driver_unregister(&zcrypt_pcica_driver);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(zcrypt_pcica_init);
+module_exit(zcrypt_pcica_exit);
+#endif
diff --git a/drivers/s390/crypto/zcrypt_pcica.h b/drivers/s390/crypto/zcrypt_pcica.h
new file mode 100644
index 0000000000000..a08a4f8c33c95
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcica.h
@@ -0,0 +1,117 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcica.h
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_PCICA_H_
+#define _ZCRYPT_PCICA_H_
+
+/**
+ * The type 4 message family is associated with a PCICA card.
+ *
+ * The four members of the family are described below.
+ *
+ * Note that all unsigned char arrays are right-justified and left-padded
+ * with zeroes.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+struct type4_hdr {
+	unsigned char  reserved1;
+	unsigned char  msg_type_code;	/* 0x04 */
+	unsigned short msg_len;
+	unsigned char  request_code;	/* 0x40 */
+	unsigned char  msg_fmt;
+	unsigned short reserved2;
+} __attribute__((packed));
+
+#define TYPE4_TYPE_CODE 0x04
+#define TYPE4_REQU_CODE 0x40
+
+#define TYPE4_SME_FMT 0x00
+#define TYPE4_LME_FMT 0x10
+#define TYPE4_SCR_FMT 0x40
+#define TYPE4_LCR_FMT 0x50
+
+/* Mod-Exp, with a small modulus */
+struct type4_sme {
+	struct type4_hdr header;
+	unsigned char	 message[128];
+	unsigned char	 exponent[128];
+	unsigned char	 modulus[128];
+} __attribute__((packed));
+
+/* Mod-Exp, with a large modulus */
+struct type4_lme {
+	struct type4_hdr header;
+	unsigned char	 message[256];
+	unsigned char	 exponent[256];
+	unsigned char	 modulus[256];
+} __attribute__((packed));
+
+/* CRT, with a small modulus */
+struct type4_scr {
+	struct type4_hdr header;
+	unsigned char	 message[128];
+	unsigned char	 dp[72];
+	unsigned char	 dq[64];
+	unsigned char	 p[72];
+	unsigned char	 q[64];
+	unsigned char	 u[72];
+} __attribute__((packed));
+
+/* CRT, with a large modulus */
+struct type4_lcr {
+	struct type4_hdr header;
+	unsigned char	 message[256];
+	unsigned char	 dp[136];
+	unsigned char	 dq[128];
+	unsigned char	 p[136];
+	unsigned char	 q[128];
+	unsigned char	 u[136];
+} __attribute__((packed));
+
+/**
+ * The type 84 response family is associated with a PCICA card.
+ *
+ * Note that all unsigned char arrays are right-justified and left-padded
+ * with zeroes.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+
+struct type84_hdr {
+	unsigned char  reserved1;
+	unsigned char  code;
+	unsigned short len;
+	unsigned char  reserved2[4];
+} __attribute__((packed));
+
+#define TYPE84_RSP_CODE 0x84
+
+int zcrypt_pcica_init(void);
+void zcrypt_pcica_exit(void);
+
+#endif /* _ZCRYPT_PCICA_H_ */
-- 
GitLab


From 6684af1a07a1f88f3970bc90e5aed173d39168db Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:32 +0200
Subject: [PATCH 0225/1063] [S390] zcrypt PCICC, PCIXCC coprocessor card ap bus
 drivers.

Signed-off-by: Ralph Wuerthner <rwuerthn@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/zcrypt_cca_key.h | 350 +++++++++++++
 drivers/s390/crypto/zcrypt_pcicc.c   | 630 +++++++++++++++++++++++
 drivers/s390/crypto/zcrypt_pcicc.h   | 176 +++++++
 drivers/s390/crypto/zcrypt_pcixcc.c  | 714 +++++++++++++++++++++++++++
 drivers/s390/crypto/zcrypt_pcixcc.h  |  79 +++
 5 files changed, 1949 insertions(+)
 create mode 100644 drivers/s390/crypto/zcrypt_cca_key.h
 create mode 100644 drivers/s390/crypto/zcrypt_pcicc.c
 create mode 100644 drivers/s390/crypto/zcrypt_pcicc.h
 create mode 100644 drivers/s390/crypto/zcrypt_pcixcc.c
 create mode 100644 drivers/s390/crypto/zcrypt_pcixcc.h

diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h
new file mode 100644
index 0000000000000..c80f40d441976
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_cca_key.h
@@ -0,0 +1,350 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_cca_key.h
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_CCA_KEY_H_
+#define _ZCRYPT_CCA_KEY_H_
+
+struct T6_keyBlock_hdr {
+	unsigned short blen;
+	unsigned short ulen;
+	unsigned short flags;
+};
+
+/**
+ * mapping for the cca private ME key token.
+ * Three parts of interest here: the header, the private section and
+ * the public section.
+ *
+ * mapping for the cca key token header
+ */
+struct cca_token_hdr {
+	unsigned char  token_identifier;
+	unsigned char  version;
+	unsigned short token_length;
+	unsigned char  reserved[4];
+} __attribute__((packed));
+
+#define CCA_TKN_HDR_ID_EXT 0x1E
+
+/**
+ * mapping for the cca private ME section
+ */
+struct cca_private_ext_ME_sec {
+	unsigned char  section_identifier;
+	unsigned char  version;
+	unsigned short section_length;
+	unsigned char  private_key_hash[20];
+	unsigned char  reserved1[4];
+	unsigned char  key_format;
+	unsigned char  reserved2;
+	unsigned char  key_name_hash[20];
+	unsigned char  key_use_flags[4];
+	unsigned char  reserved3[6];
+	unsigned char  reserved4[24];
+	unsigned char  confounder[24];
+	unsigned char  exponent[128];
+	unsigned char  modulus[128];
+} __attribute__((packed));
+
+#define CCA_PVT_USAGE_ALL 0x80
+
+/**
+ * mapping for the cca public section
+ * In a private key, the modulus doesn't appear in the public
+ * section. So, an arbitrary public exponent of 0x010001 will be
+ * used, for a section length of 0x0F always.
+ */
+struct cca_public_sec {
+	unsigned char  section_identifier;
+	unsigned char  version;
+	unsigned short section_length;
+	unsigned char  reserved[2];
+	unsigned short exponent_len;
+	unsigned short modulus_bit_len;
+	unsigned short modulus_byte_len;    /* In a private key, this is 0 */
+} __attribute__((packed));
+
+/**
+ * mapping for the cca private CRT key 'token'
+ * The first three parts (the only parts considered in this release)
+ * are: the header, the private section and the public section.
+ * The header and public section are the same as for the
+ * struct cca_private_ext_ME
+ *
+ * Following the structure are the quantities p, q, dp, dq, u, pad,
+ * and modulus, in that order, where pad_len is the modulo 8
+ * complement of the residue modulo 8 of the sum of
+ * (p_len + q_len + dp_len + dq_len + u_len).
+ */
+struct cca_pvt_ext_CRT_sec {
+	unsigned char  section_identifier;
+	unsigned char  version;
+	unsigned short section_length;
+	unsigned char  private_key_hash[20];
+	unsigned char  reserved1[4];
+	unsigned char  key_format;
+	unsigned char  reserved2;
+	unsigned char  key_name_hash[20];
+	unsigned char  key_use_flags[4];
+	unsigned short p_len;
+	unsigned short q_len;
+	unsigned short dp_len;
+	unsigned short dq_len;
+	unsigned short u_len;
+	unsigned short mod_len;
+	unsigned char  reserved3[4];
+	unsigned short pad_len;
+	unsigned char  reserved4[52];
+	unsigned char  confounder[8];
+} __attribute__((packed));
+
+#define CCA_PVT_EXT_CRT_SEC_ID_PVT 0x08
+#define CCA_PVT_EXT_CRT_SEC_FMT_CL 0x40
+
+/**
+ * Set up private key fields of a type6 MEX message.
+ * Note that all numerics in the key token are big-endian,
+ * while the entries in the key block header are little-endian.
+ *
+ * @mex: pointer to user input data
+ * @p: pointer to memory area for the key
+ *
+ * Returns the size of the key area or -EFAULT
+ */
+static inline int zcrypt_type6_mex_key_de(struct ica_rsa_modexpo *mex,
+					  void *p, int big_endian)
+{
+	static struct cca_token_hdr static_pvt_me_hdr = {
+		.token_identifier	=  0x1E,
+		.token_length		=  0x0183,
+	};
+	static struct cca_private_ext_ME_sec static_pvt_me_sec = {
+		.section_identifier	=  0x02,
+		.section_length		=  0x016C,
+		.key_use_flags		= {0x80,0x00,0x00,0x00},
+	};
+	static struct cca_public_sec static_pub_me_sec = {
+		.section_identifier	=  0x04,
+		.section_length		=  0x000F,
+		.exponent_len		=  0x0003,
+	};
+	static char pk_exponent[3] = { 0x01, 0x00, 0x01 };
+	struct {
+		struct T6_keyBlock_hdr t6_hdr;
+		struct cca_token_hdr pvtMeHdr;
+		struct cca_private_ext_ME_sec pvtMeSec;
+		struct cca_public_sec pubMeSec;
+		char exponent[3];
+	} __attribute__((packed)) *key = p;
+	unsigned char *temp;
+
+	memset(key, 0, sizeof(*key));
+
+	if (big_endian) {
+		key->t6_hdr.blen = cpu_to_be16(0x189);
+		key->t6_hdr.ulen = cpu_to_be16(0x189 - 2);
+	} else {
+		key->t6_hdr.blen = cpu_to_le16(0x189);
+		key->t6_hdr.ulen = cpu_to_le16(0x189 - 2);
+	}
+	key->pvtMeHdr = static_pvt_me_hdr;
+	key->pvtMeSec = static_pvt_me_sec;
+	key->pubMeSec = static_pub_me_sec;
+	/**
+	 * In a private key, the modulus doesn't appear in the public
+	 * section. So, an arbitrary public exponent of 0x010001 will be
+	 * used.
+	 */
+	memcpy(key->exponent, pk_exponent, 3);
+
+	/* key parameter block */
+	temp = key->pvtMeSec.exponent +
+		sizeof(key->pvtMeSec.exponent) - mex->inputdatalength;
+	if (copy_from_user(temp, mex->b_key, mex->inputdatalength))
+		return -EFAULT;
+
+	/* modulus */
+	temp = key->pvtMeSec.modulus +
+		sizeof(key->pvtMeSec.modulus) - mex->inputdatalength;
+	if (copy_from_user(temp, mex->n_modulus, mex->inputdatalength))
+		return -EFAULT;
+	key->pubMeSec.modulus_bit_len = 8 * mex->inputdatalength;
+	return sizeof(*key);
+}
+
+/**
+ * Set up private key fields of a type6 MEX message. The _pad variant
+ * strips leading zeroes from the b_key.
+ * Note that all numerics in the key token are big-endian,
+ * while the entries in the key block header are little-endian.
+ *
+ * @mex: pointer to user input data
+ * @p: pointer to memory area for the key
+ *
+ * Returns the size of the key area or -EFAULT
+ */
+static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex,
+					  void *p, int big_endian)
+{
+	static struct cca_token_hdr static_pub_hdr = {
+		.token_identifier	=  0x1E,
+	};
+	static struct cca_public_sec static_pub_sec = {
+		.section_identifier	=  0x04,
+	};
+	struct {
+		struct T6_keyBlock_hdr t6_hdr;
+		struct cca_token_hdr pubHdr;
+		struct cca_public_sec pubSec;
+		char exponent[0];
+	} __attribute__((packed)) *key = p;
+	unsigned char *temp;
+	int i;
+
+	memset(key, 0, sizeof(*key));
+
+	key->pubHdr = static_pub_hdr;
+	key->pubSec = static_pub_sec;
+
+	/* key parameter block */
+	temp = key->exponent;
+	if (copy_from_user(temp, mex->b_key, mex->inputdatalength))
+		return -EFAULT;
+	/* Strip leading zeroes from b_key. */
+	for (i = 0; i < mex->inputdatalength; i++)
+		if (temp[i])
+			break;
+	if (i >= mex->inputdatalength)
+		return -EINVAL;
+	memmove(temp, temp + i, mex->inputdatalength - i);
+	temp += mex->inputdatalength - i;
+	/* modulus */
+	if (copy_from_user(temp, mex->n_modulus, mex->inputdatalength))
+		return -EFAULT;
+
+	key->pubSec.modulus_bit_len = 8 * mex->inputdatalength;
+	key->pubSec.modulus_byte_len = mex->inputdatalength;
+	key->pubSec.exponent_len = mex->inputdatalength - i;
+	key->pubSec.section_length = sizeof(key->pubSec) +
+					2*mex->inputdatalength - i;
+	key->pubHdr.token_length =
+		key->pubSec.section_length + sizeof(key->pubHdr);
+	if (big_endian) {
+		key->t6_hdr.ulen = cpu_to_be16(key->pubHdr.token_length + 4);
+		key->t6_hdr.blen = cpu_to_be16(key->pubHdr.token_length + 6);
+	} else {
+		key->t6_hdr.ulen = cpu_to_le16(key->pubHdr.token_length + 4);
+		key->t6_hdr.blen = cpu_to_le16(key->pubHdr.token_length + 6);
+	}
+	return sizeof(*key) + 2*mex->inputdatalength - i;
+}
+
+/**
+ * Set up private key fields of a type6 CRT message.
+ * Note that all numerics in the key token are big-endian,
+ * while the entries in the key block header are little-endian.
+ *
+ * @mex: pointer to user input data
+ * @p: pointer to memory area for the key
+ *
+ * Returns the size of the key area or -EFAULT
+ */
+static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt,
+				       void *p, int big_endian)
+{
+	static struct cca_public_sec static_cca_pub_sec = {
+		.section_identifier = 4,
+		.section_length = 0x000f,
+		.exponent_len = 0x0003,
+	};
+	static char pk_exponent[3] = { 0x01, 0x00, 0x01 };
+	struct {
+		struct T6_keyBlock_hdr t6_hdr;
+		struct cca_token_hdr token;
+		struct cca_pvt_ext_CRT_sec pvt;
+		char key_parts[0];
+	} __attribute__((packed)) *key = p;
+	struct cca_public_sec *pub;
+	int short_len, long_len, pad_len, key_len, size;
+
+	memset(key, 0, sizeof(*key));
+
+	short_len = crt->inputdatalength / 2;
+	long_len = short_len + 8;
+	pad_len = -(3*long_len + 2*short_len) & 7;
+	key_len = 3*long_len + 2*short_len + pad_len + crt->inputdatalength;
+	size = sizeof(*key) + key_len + sizeof(*pub) + 3;
+
+	/* parameter block.key block */
+	if (big_endian) {
+		key->t6_hdr.blen = cpu_to_be16(size);
+		key->t6_hdr.ulen = cpu_to_be16(size - 2);
+	} else {
+		key->t6_hdr.blen = cpu_to_le16(size);
+		key->t6_hdr.ulen = cpu_to_le16(size - 2);
+	}
+
+	/* key token header */
+	key->token.token_identifier = CCA_TKN_HDR_ID_EXT;
+	key->token.token_length = size - 6;
+
+	/* private section */
+	key->pvt.section_identifier = CCA_PVT_EXT_CRT_SEC_ID_PVT;
+	key->pvt.section_length = sizeof(key->pvt) + key_len;
+	key->pvt.key_format = CCA_PVT_EXT_CRT_SEC_FMT_CL;
+	key->pvt.key_use_flags[0] = CCA_PVT_USAGE_ALL;
+	key->pvt.p_len = key->pvt.dp_len = key->pvt.u_len = long_len;
+	key->pvt.q_len = key->pvt.dq_len = short_len;
+	key->pvt.mod_len = crt->inputdatalength;
+	key->pvt.pad_len = pad_len;
+
+	/* key parts */
+	if (copy_from_user(key->key_parts, crt->np_prime, long_len) ||
+	    copy_from_user(key->key_parts + long_len,
+					crt->nq_prime, short_len) ||
+	    copy_from_user(key->key_parts + long_len + short_len,
+					crt->bp_key, long_len) ||
+	    copy_from_user(key->key_parts + 2*long_len + short_len,
+					crt->bq_key, short_len) ||
+	    copy_from_user(key->key_parts + 2*long_len + 2*short_len,
+					crt->u_mult_inv, long_len))
+		return -EFAULT;
+	memset(key->key_parts + 3*long_len + 2*short_len + pad_len,
+	       0xff, crt->inputdatalength);
+	pub = (struct cca_public_sec *)(key->key_parts + key_len);
+	*pub = static_cca_pub_sec;
+	pub->modulus_bit_len = 8 * crt->inputdatalength;
+	/**
+	 * In a private key, the modulus doesn't appear in the public
+	 * section. So, an arbitrary public exponent of 0x010001 will be
+	 * used.
+	 */
+	memcpy((char *) (pub + 1), pk_exponent, 3);
+	return size;
+}
+
+#endif /* _ZCRYPT_CCA_KEY_H_ */
diff --git a/drivers/s390/crypto/zcrypt_pcicc.c b/drivers/s390/crypto/zcrypt_pcicc.c
new file mode 100644
index 0000000000000..900362983fec1
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcicc.c
@@ -0,0 +1,630 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcicc.c
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "ap_bus.h"
+#include "zcrypt_api.h"
+#include "zcrypt_error.h"
+#include "zcrypt_pcicc.h"
+#include "zcrypt_cca_key.h"
+
+#define PCICC_MIN_MOD_SIZE	 64	/*  512 bits */
+#define PCICC_MAX_MOD_SIZE_OLD	128	/* 1024 bits */
+#define PCICC_MAX_MOD_SIZE	256	/* 2048 bits */
+
+/**
+ * PCICC cards need a speed rating of 0. This keeps them at the end of
+ * the zcrypt device list (see zcrypt_api.c). PCICC cards are only
+ * used if no other cards are present because they are slow and can only
+ * cope with PKCS12 padded requests. The logic is queer. PKCS11 padded
+ * requests are rejected. The modexpo function encrypts PKCS12 padded data
+ * and decrypts any non-PKCS12 padded data (except PKCS11) in the assumption
+ * that it's encrypted PKCS12 data. The modexpo_crt function always decrypts
+ * the data in the assumption that its PKCS12 encrypted data.
+ */
+#define PCICC_SPEED_RATING	0
+
+#define PCICC_MAX_MESSAGE_SIZE 0x710	/* max size type6 v1 crt message */
+#define PCICC_MAX_RESPONSE_SIZE 0x710	/* max size type86 v1 reply	 */
+
+#define PCICC_CLEANUP_TIME	(15*HZ)
+
+static struct ap_device_id zcrypt_pcicc_ids[] = {
+	{ AP_DEVICE(AP_DEVICE_TYPE_PCICC) },
+	{ /* end of list */ },
+};
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+MODULE_DEVICE_TABLE(ap, zcrypt_pcicc_ids);
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("PCICC Cryptographic Coprocessor device driver, "
+		   "Copyright 2001, 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+#endif
+
+static int zcrypt_pcicc_probe(struct ap_device *ap_dev);
+static void zcrypt_pcicc_remove(struct ap_device *ap_dev);
+static void zcrypt_pcicc_receive(struct ap_device *, struct ap_message *,
+				 struct ap_message *);
+
+static struct ap_driver zcrypt_pcicc_driver = {
+	.probe = zcrypt_pcicc_probe,
+	.remove = zcrypt_pcicc_remove,
+	.receive = zcrypt_pcicc_receive,
+	.ids = zcrypt_pcicc_ids,
+};
+
+/**
+ * The following is used to initialize the CPRB passed to the PCICC card
+ * in a type6 message. The 3 fields that must be filled in at execution
+ * time are  req_parml, rpl_parml and usage_domain. Note that all three
+ * fields are *little*-endian. Actually, everything about this interface
+ * is ascii/little-endian, since the device has 'Intel inside'.
+ *
+ * The CPRB is followed immediately by the parm block.
+ * The parm block contains:
+ * - function code ('PD' 0x5044 or 'PK' 0x504B)
+ * - rule block (0x0A00 'PKCS-1.2' or 0x0A00 'ZERO-PAD')
+ * - VUD block
+ */
+static struct CPRB static_cprb = {
+	.cprb_len	= __constant_cpu_to_le16(0x0070),
+	.cprb_ver_id	=  0x41,
+	.func_id	= {0x54,0x32},
+	.checkpoint_flag=  0x01,
+	.svr_namel	= __constant_cpu_to_le16(0x0008),
+	.svr_name	= {'I','C','S','F',' ',' ',' ',' '}
+};
+
+/**
+ * Check the message for PKCS11 padding.
+ */
+static inline int is_PKCS11_padded(unsigned char *buffer, int length)
+{
+	int i;
+	if ((buffer[0] != 0x00) || (buffer[1] != 0x01))
+		return 0;
+	for (i = 2; i < length; i++)
+		if (buffer[i] != 0xFF)
+			break;
+	if (i < 10 || i == length)
+		return 0;
+	if (buffer[i] != 0x00)
+		return 0;
+	return 1;
+}
+
+/**
+ * Check the message for PKCS12 padding.
+ */
+static inline int is_PKCS12_padded(unsigned char *buffer, int length)
+{
+	int i;
+	if ((buffer[0] != 0x00) || (buffer[1] != 0x02))
+		return 0;
+	for (i = 2; i < length; i++)
+		if (buffer[i] == 0x00)
+			break;
+	if ((i < 10) || (i == length))
+		return 0;
+	if (buffer[i] != 0x00)
+		return 0;
+	return 1;
+}
+
+/**
+ * Convert a ICAMEX message to a type6 MEX message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @mex: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICAMEX_msg_to_type6MEX_msg(struct zcrypt_device *zdev,
+				      struct ap_message *ap_msg,
+				      struct ica_rsa_modexpo *mex)
+{
+	static struct type6_hdr static_type6_hdr = {
+		.type		=  0x06,
+		.offset1	=  0x00000058,
+		.agent_id	= {0x01,0x00,0x43,0x43,0x41,0x2D,0x41,0x50,
+				   0x50,0x4C,0x20,0x20,0x20,0x01,0x01,0x01},
+		.function_code	= {'P','K'},
+	};
+	static struct function_and_rules_block static_pke_function_and_rules ={
+		.function_code	= {'P','K'},
+		.ulen		= __constant_cpu_to_le16(10),
+		.only_rule	= {'P','K','C','S','-','1','.','2'}
+	};
+	struct {
+		struct type6_hdr hdr;
+		struct CPRB cprb;
+		struct function_and_rules_block fr;
+		unsigned short length;
+		char text[0];
+	} __attribute__((packed)) *msg = ap_msg->message;
+	int vud_len, pad_len, size;
+
+	/* VUD.ciphertext */
+	if (copy_from_user(msg->text, mex->inputdata, mex->inputdatalength))
+		return -EFAULT;
+
+	if (is_PKCS11_padded(msg->text, mex->inputdatalength))
+		return -EINVAL;
+
+	/* static message header and f&r */
+	msg->hdr = static_type6_hdr;
+	msg->fr = static_pke_function_and_rules;
+
+	if (is_PKCS12_padded(msg->text, mex->inputdatalength)) {
+		/* strip the padding and adjust the data length */
+		pad_len = strnlen(msg->text + 2, mex->inputdatalength - 2) + 3;
+		if (pad_len <= 9 || pad_len >= mex->inputdatalength)
+			return -ENODEV;
+		vud_len = mex->inputdatalength - pad_len;
+		memmove(msg->text, msg->text + pad_len, vud_len);
+		msg->length = cpu_to_le16(vud_len + 2);
+
+		/* Set up key after the variable length text. */
+		size = zcrypt_type6_mex_key_en(mex, msg->text + vud_len, 0);
+		if (size < 0)
+			return size;
+		size += sizeof(*msg) + vud_len;	/* total size of msg */
+	} else {
+		vud_len = mex->inputdatalength;
+		msg->length = cpu_to_le16(2 + vud_len);
+
+		msg->hdr.function_code[1] = 'D';
+		msg->fr.function_code[1] = 'D';
+
+		/* Set up key after the variable length text. */
+		size = zcrypt_type6_mex_key_de(mex, msg->text + vud_len, 0);
+		if (size < 0)
+			return size;
+		size += sizeof(*msg) + vud_len;	/* total size of msg */
+	}
+
+	/* message header, cprb and f&r */
+	msg->hdr.ToCardLen1 = (size - sizeof(msg->hdr) + 3) & -4;
+	msg->hdr.FromCardLen1 = PCICC_MAX_RESPONSE_SIZE - sizeof(msg->hdr);
+
+	msg->cprb = static_cprb;
+	msg->cprb.usage_domain[0]= AP_QID_QUEUE(zdev->ap_dev->qid);
+	msg->cprb.req_parml = cpu_to_le16(size - sizeof(msg->hdr) -
+					   sizeof(msg->cprb));
+	msg->cprb.rpl_parml = cpu_to_le16(msg->hdr.FromCardLen1);
+
+	ap_msg->length = (size + 3) & -4;
+	return 0;
+}
+
+/**
+ * Convert a ICACRT message to a type6 CRT message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @crt: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICACRT_msg_to_type6CRT_msg(struct zcrypt_device *zdev,
+				      struct ap_message *ap_msg,
+				      struct ica_rsa_modexpo_crt *crt)
+{
+	static struct type6_hdr static_type6_hdr = {
+		.type		=  0x06,
+		.offset1	=  0x00000058,
+		.agent_id	= {0x01,0x00,0x43,0x43,0x41,0x2D,0x41,0x50,
+				   0x50,0x4C,0x20,0x20,0x20,0x01,0x01,0x01},
+		.function_code	= {'P','D'},
+	};
+	static struct function_and_rules_block static_pkd_function_and_rules ={
+		.function_code	= {'P','D'},
+		.ulen		= __constant_cpu_to_le16(10),
+		.only_rule	= {'P','K','C','S','-','1','.','2'}
+	};
+	struct {
+		struct type6_hdr hdr;
+		struct CPRB cprb;
+		struct function_and_rules_block fr;
+		unsigned short length;
+		char text[0];
+	} __attribute__((packed)) *msg = ap_msg->message;
+	int size;
+
+	/* VUD.ciphertext */
+	msg->length = cpu_to_le16(2 + crt->inputdatalength);
+	if (copy_from_user(msg->text, crt->inputdata, crt->inputdatalength))
+		return -EFAULT;
+
+	if (is_PKCS11_padded(msg->text, crt->inputdatalength))
+		return -EINVAL;
+
+	/* Set up key after the variable length text. */
+	size = zcrypt_type6_crt_key(crt, msg->text + crt->inputdatalength, 0);
+	if (size < 0)
+		return size;
+	size += sizeof(*msg) + crt->inputdatalength;	/* total size of msg */
+
+	/* message header, cprb and f&r */
+	msg->hdr = static_type6_hdr;
+	msg->hdr.ToCardLen1 = (size -  sizeof(msg->hdr) + 3) & -4;
+	msg->hdr.FromCardLen1 = PCICC_MAX_RESPONSE_SIZE - sizeof(msg->hdr);
+
+	msg->cprb = static_cprb;
+	msg->cprb.usage_domain[0] = AP_QID_QUEUE(zdev->ap_dev->qid);
+	msg->cprb.req_parml = msg->cprb.rpl_parml =
+		cpu_to_le16(size - sizeof(msg->hdr) - sizeof(msg->cprb));
+
+	msg->fr = static_pkd_function_and_rules;
+
+	ap_msg->length = (size + 3) & -4;
+	return 0;
+}
+
+/**
+ * Copy results from a type 86 reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @data: pointer to user output data
+ * @length: size of user output data
+ *
+ * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
+ */
+struct type86_reply {
+	struct type86_hdr hdr;
+	struct type86_fmt2_ext fmt2;
+	struct CPRB cprb;
+	unsigned char pad[4];	/* 4 byte function code/rules block ? */
+	unsigned short length;
+	char text[0];
+} __attribute__((packed));
+
+static int convert_type86(struct zcrypt_device *zdev,
+			  struct ap_message *reply,
+			  char __user *outputdata,
+			  unsigned int outputdatalength)
+{
+	static unsigned char static_pad[] = {
+		0x00,0x02,
+		0x1B,0x7B,0x5D,0xB5,0x75,0x01,0x3D,0xFD,
+		0x8D,0xD1,0xC7,0x03,0x2D,0x09,0x23,0x57,
+		0x89,0x49,0xB9,0x3F,0xBB,0x99,0x41,0x5B,
+		0x75,0x21,0x7B,0x9D,0x3B,0x6B,0x51,0x39,
+		0xBB,0x0D,0x35,0xB9,0x89,0x0F,0x93,0xA5,
+		0x0B,0x47,0xF1,0xD3,0xBB,0xCB,0xF1,0x9D,
+		0x23,0x73,0x71,0xFF,0xF3,0xF5,0x45,0xFB,
+		0x61,0x29,0x23,0xFD,0xF1,0x29,0x3F,0x7F,
+		0x17,0xB7,0x1B,0xA9,0x19,0xBD,0x57,0xA9,
+		0xD7,0x95,0xA3,0xCB,0xED,0x1D,0xDB,0x45,
+		0x7D,0x11,0xD1,0x51,0x1B,0xED,0x71,0xE9,
+		0xB1,0xD1,0xAB,0xAB,0x21,0x2B,0x1B,0x9F,
+		0x3B,0x9F,0xF7,0xF7,0xBD,0x63,0xEB,0xAD,
+		0xDF,0xB3,0x6F,0x5B,0xDB,0x8D,0xA9,0x5D,
+		0xE3,0x7D,0x77,0x49,0x47,0xF5,0xA7,0xFD,
+		0xAB,0x2F,0x27,0x35,0x77,0xD3,0x49,0xC9,
+		0x09,0xEB,0xB1,0xF9,0xBF,0x4B,0xCB,0x2B,
+		0xEB,0xEB,0x05,0xFF,0x7D,0xC7,0x91,0x8B,
+		0x09,0x83,0xB9,0xB9,0x69,0x33,0x39,0x6B,
+		0x79,0x75,0x19,0xBF,0xBB,0x07,0x1D,0xBD,
+		0x29,0xBF,0x39,0x95,0x93,0x1D,0x35,0xC7,
+		0xC9,0x4D,0xE5,0x97,0x0B,0x43,0x9B,0xF1,
+		0x16,0x93,0x03,0x1F,0xA5,0xFB,0xDB,0xF3,
+		0x27,0x4F,0x27,0x61,0x05,0x1F,0xB9,0x23,
+		0x2F,0xC3,0x81,0xA9,0x23,0x71,0x55,0x55,
+		0xEB,0xED,0x41,0xE5,0xF3,0x11,0xF1,0x43,
+		0x69,0x03,0xBD,0x0B,0x37,0x0F,0x51,0x8F,
+		0x0B,0xB5,0x89,0x5B,0x67,0xA9,0xD9,0x4F,
+		0x01,0xF9,0x21,0x77,0x37,0x73,0x79,0xC5,
+		0x7F,0x51,0xC1,0xCF,0x97,0xA1,0x75,0xAD,
+		0x35,0x9D,0xD3,0xD3,0xA7,0x9D,0x5D,0x41,
+		0x6F,0x65,0x1B,0xCF,0xA9,0x87,0x91,0x09
+	};
+	struct type86_reply *msg = reply->message;
+	unsigned short service_rc, service_rs;
+	unsigned int reply_len, pad_len;
+	char *data;
+
+	service_rc = le16_to_cpu(msg->cprb.ccp_rtcode);
+	if (unlikely(service_rc != 0)) {
+		service_rs = le16_to_cpu(msg->cprb.ccp_rscode);
+		if (service_rc == 8 && service_rs == 66) {
+			PDEBUG("Bad block format on PCICC\n");
+			return -EINVAL;
+		}
+		if (service_rc == 8 && service_rs == 65) {
+			PDEBUG("Probably an even modulus on PCICC\n");
+			return -EINVAL;
+		}
+		if (service_rc == 8 && service_rs == 770) {
+			PDEBUG("Invalid key length on PCICC\n");
+			zdev->max_mod_size = PCICC_MAX_MOD_SIZE_OLD;
+			return -EAGAIN;
+		}
+		if (service_rc == 8 && service_rs == 783) {
+			PDEBUG("Extended bitlengths not enabled on PCICC\n");
+			zdev->max_mod_size = PCICC_MAX_MOD_SIZE_OLD;
+			return -EAGAIN;
+		}
+		PRINTK("Unknown service rc/rs (PCICC): %d/%d\n",
+		       service_rc, service_rs);
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+	data = msg->text;
+	reply_len = le16_to_cpu(msg->length) - 2;
+	if (reply_len > outputdatalength)
+		return -EINVAL;
+	/**
+	 * For all encipher requests, the length of the ciphertext (reply_len)
+	 * will always equal the modulus length. For MEX decipher requests
+	 * the output needs to get padded. Minimum pad size is 10.
+	 *
+	 * Currently, the cases where padding will be added is for:
+	 * - PCIXCC_MCL2 using a CRT form token (since PKD didn't support
+	 *   ZERO-PAD and CRT is only supported for PKD requests)
+	 * - PCICC, always
+	 */
+	pad_len = outputdatalength - reply_len;
+	if (pad_len > 0) {
+		if (pad_len < 10)
+			return -EINVAL;
+		/* 'restore' padding left in the PCICC/PCIXCC card. */
+		if (copy_to_user(outputdata, static_pad, pad_len - 1))
+			return -EFAULT;
+		if (put_user(0, outputdata + pad_len - 1))
+			return -EFAULT;
+	}
+	/* Copy the crypto response to user space. */
+	if (copy_to_user(outputdata + pad_len, data, reply_len))
+		return -EFAULT;
+	return 0;
+}
+
+static int convert_response(struct zcrypt_device *zdev,
+			    struct ap_message *reply,
+			    char __user *outputdata,
+			    unsigned int outputdatalength)
+{
+	struct type86_reply *msg = reply->message;
+
+	/* Response type byte is the second byte in the response. */
+	switch (msg->hdr.type) {
+	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
+		return convert_error(zdev, reply);
+	case TYPE86_RSP_CODE:
+		if (msg->hdr.reply_code)
+			return convert_error(zdev, reply);
+		if (msg->cprb.cprb_ver_id == 0x01)
+			return convert_type86(zdev, reply,
+					      outputdata, outputdatalength);
+		/* no break, incorrect cprb version is an unknown response */
+	default: /* Unknown response type, this should NEVER EVER happen */
+		PRINTK("Unrecognized Message Header: %08x%08x\n",
+		       *(unsigned int *) reply->message,
+		       *(unsigned int *) (reply->message+4));
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
+/**
+ * This function is called from the AP bus code after a crypto request
+ * "msg" has finished with the reply message "reply".
+ * It is called from tasklet context.
+ * @ap_dev: pointer to the AP device
+ * @msg: pointer to the AP message
+ * @reply: pointer to the AP reply message
+ */
+static void zcrypt_pcicc_receive(struct ap_device *ap_dev,
+				 struct ap_message *msg,
+				 struct ap_message *reply)
+{
+	static struct error_hdr error_reply = {
+		.type = TYPE82_RSP_CODE,
+		.reply_code = REP82_ERROR_MACHINE_FAILURE,
+	};
+	struct type86_reply *t86r = reply->message;
+	int length;
+
+	/* Copy the reply message to the request message buffer. */
+	if (IS_ERR(reply))
+		memcpy(msg->message, &error_reply, sizeof(error_reply));
+	else if (t86r->hdr.type == TYPE86_RSP_CODE &&
+		 t86r->cprb.cprb_ver_id == 0x01) {
+		length = sizeof(struct type86_reply) + t86r->length - 2;
+		length = min(PCICC_MAX_RESPONSE_SIZE, length);
+		memcpy(msg->message, reply->message, length);
+	} else
+		memcpy(msg->message, reply->message, sizeof error_reply);
+	complete((struct completion *) msg->private);
+}
+
+static atomic_t zcrypt_step = ATOMIC_INIT(0);
+
+/**
+ * The request distributor calls this function if it picked the PCICC
+ * device to handle a modexpo request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCICC device to the request distributor
+ * @mex: pointer to the modexpo request buffer
+ */
+static long zcrypt_pcicc_modexpo(struct zcrypt_device *zdev,
+				 struct ica_rsa_modexpo *mex)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.length = PAGE_SIZE;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICAMEX_msg_to_type6MEX_msg(zdev, &ap_msg, mex);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, PCICC_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, mex->outputdata,
+				      mex->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	free_page((unsigned long) ap_msg.message);
+	return rc;
+}
+
+/**
+ * The request distributor calls this function if it picked the PCICC
+ * device to handle a modexpo_crt request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCICC device to the request distributor
+ * @crt: pointer to the modexpoc_crt request buffer
+ */
+static long zcrypt_pcicc_modexpo_crt(struct zcrypt_device *zdev,
+				     struct ica_rsa_modexpo_crt *crt)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.length = PAGE_SIZE;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICACRT_msg_to_type6CRT_msg(zdev, &ap_msg, crt);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, PCICC_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, crt->outputdata,
+				      crt->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	free_page((unsigned long) ap_msg.message);
+	return rc;
+}
+
+/**
+ * The crypto operations for a PCICC card.
+ */
+static struct zcrypt_ops zcrypt_pcicc_ops = {
+	.rsa_modexpo = zcrypt_pcicc_modexpo,
+	.rsa_modexpo_crt = zcrypt_pcicc_modexpo_crt,
+};
+
+/**
+ * Probe function for PCICC cards. It always accepts the AP device
+ * since the bus_match already checked the hardware type.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_pcicc_probe(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	zdev = zcrypt_device_alloc(PCICC_MAX_RESPONSE_SIZE);
+	if (!zdev)
+		return -ENOMEM;
+	zdev->ap_dev = ap_dev;
+	zdev->ops = &zcrypt_pcicc_ops;
+	zdev->online = 1;
+	zdev->user_space_type = ZCRYPT_PCICC;
+	zdev->type_string = "PCICC";
+	zdev->min_mod_size = PCICC_MIN_MOD_SIZE;
+	zdev->max_mod_size = PCICC_MAX_MOD_SIZE;
+	zdev->speed_rating = PCICC_SPEED_RATING;
+	ap_dev->reply = &zdev->reply;
+	ap_dev->private = zdev;
+	rc = zcrypt_device_register(zdev);
+	if (rc)
+		goto out_free;
+	return 0;
+
+ out_free:
+	ap_dev->private = NULL;
+	zcrypt_device_free(zdev);
+	return rc;
+}
+
+/**
+ * This is called to remove the extended PCICC driver information
+ * if an AP device is removed.
+ */
+static void zcrypt_pcicc_remove(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev = ap_dev->private;
+
+	zcrypt_device_unregister(zdev);
+}
+
+int __init zcrypt_pcicc_init(void)
+{
+	return ap_driver_register(&zcrypt_pcicc_driver, THIS_MODULE, "pcicc");
+}
+
+void zcrypt_pcicc_exit(void)
+{
+	ap_driver_unregister(&zcrypt_pcicc_driver);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(zcrypt_pcicc_init);
+module_exit(zcrypt_pcicc_exit);
+#endif
diff --git a/drivers/s390/crypto/zcrypt_pcicc.h b/drivers/s390/crypto/zcrypt_pcicc.h
new file mode 100644
index 0000000000000..027bafc7312a5
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcicc.h
@@ -0,0 +1,176 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcicc.h
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_PCICC_H_
+#define _ZCRYPT_PCICC_H_
+
+/**
+ * The type 6 message family is associated with PCICC or PCIXCC cards.
+ *
+ * It contains a message header followed by a CPRB, both of which
+ * are described below.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+struct type6_hdr {
+	unsigned char reserved1;	/* 0x00				*/
+	unsigned char type;		/* 0x06				*/
+	unsigned char reserved2[2];	/* 0x0000			*/
+	unsigned char right[4];		/* 0x00000000			*/
+	unsigned char reserved3[2];	/* 0x0000			*/
+	unsigned char reserved4[2];	/* 0x0000			*/
+	unsigned char apfs[4];		/* 0x00000000			*/
+	unsigned int  offset1;		/* 0x00000058 (offset to CPRB)	*/
+	unsigned int  offset2;		/* 0x00000000			*/
+	unsigned int  offset3;		/* 0x00000000			*/
+	unsigned int  offset4;		/* 0x00000000			*/
+	unsigned char agent_id[16];	/* PCICC:			*/
+					/*    0x0100			*/
+					/*    0x4343412d4150504c202020	*/
+					/*    0x010101			*/
+					/* PCIXCC:			*/
+					/*    0x4341000000000000	*/
+					/*    0x0000000000000000	*/
+	unsigned char rqid[2];		/* rqid.  internal to 603	*/
+	unsigned char reserved5[2];	/* 0x0000			*/
+	unsigned char function_code[2];	/* for PKD, 0x5044 (ascii 'PD')	*/
+	unsigned char reserved6[2];	/* 0x0000			*/
+	unsigned int  ToCardLen1;	/* (request CPRB len + 3) & -4	*/
+	unsigned int  ToCardLen2;	/* db len 0x00000000 for PKD	*/
+	unsigned int  ToCardLen3;	/* 0x00000000			*/
+	unsigned int  ToCardLen4;	/* 0x00000000			*/
+	unsigned int  FromCardLen1;	/* response buffer length	*/
+	unsigned int  FromCardLen2;	/* db len 0x00000000 for PKD	*/
+	unsigned int  FromCardLen3;	/* 0x00000000			*/
+	unsigned int  FromCardLen4;	/* 0x00000000			*/
+} __attribute__((packed));
+
+/**
+ * CPRB
+ *	  Note that all shorts, ints and longs are little-endian.
+ *	  All pointer fields are 32-bits long, and mean nothing
+ *
+ *	  A request CPRB is followed by a request_parameter_block.
+ *
+ *	  The request (or reply) parameter block is organized thus:
+ *	    function code
+ *	    VUD block
+ *	    key block
+ */
+struct CPRB {
+	unsigned short cprb_len;	/* CPRB length			 */
+	unsigned char cprb_ver_id;	/* CPRB version id.		 */
+	unsigned char pad_000;		/* Alignment pad byte.		 */
+	unsigned char srpi_rtcode[4];	/* SRPI return code LELONG	 */
+	unsigned char srpi_verb;	/* SRPI verb type		 */
+	unsigned char flags;		/* flags			 */
+	unsigned char func_id[2];	/* function id			 */
+	unsigned char checkpoint_flag;	/*				 */
+	unsigned char resv2;		/* reserved			 */
+	unsigned short req_parml;	/* request parameter buffer	 */
+					/* length 16-bit little endian	 */
+	unsigned char req_parmp[4];	/* request parameter buffer	 *
+					 * pointer (means nothing: the	 *
+					 * parameter buffer follows	 *
+					 * the CPRB).			 */
+	unsigned char req_datal[4];	/* request data buffer		 */
+					/* length	  ULELONG	 */
+	unsigned char req_datap[4];	/* request data buffer		 */
+					/* pointer			 */
+	unsigned short rpl_parml;	/* reply  parameter buffer	 */
+					/* length 16-bit little endian	 */
+	unsigned char pad_001[2];	/* Alignment pad bytes. ULESHORT */
+	unsigned char rpl_parmp[4];	/* reply parameter buffer	 *
+					 * pointer (means nothing: the	 *
+					 * parameter buffer follows	 *
+					 * the CPRB).			 */
+	unsigned char rpl_datal[4];	/* reply data buffer len ULELONG */
+	unsigned char rpl_datap[4];	/* reply data buffer		 */
+					/* pointer			 */
+	unsigned short ccp_rscode;	/* server reason code	ULESHORT */
+	unsigned short ccp_rtcode;	/* server return code	ULESHORT */
+	unsigned char repd_parml[2];	/* replied parameter len ULESHORT*/
+	unsigned char mac_data_len[2];	/* Mac Data Length	ULESHORT */
+	unsigned char repd_datal[4];	/* replied data length	ULELONG	 */
+	unsigned char req_pc[2];	/* PC identifier		 */
+	unsigned char res_origin[8];	/* resource origin		 */
+	unsigned char mac_value[8];	/* Mac Value			 */
+	unsigned char logon_id[8];	/* Logon Identifier		 */
+	unsigned char usage_domain[2];	/* cdx				 */
+	unsigned char resv3[18];	/* reserved for requestor	 */
+	unsigned short svr_namel;	/* server name length  ULESHORT	 */
+	unsigned char svr_name[8];	/* server name			 */
+} __attribute__((packed));
+
+/**
+ * The type 86 message family is associated with PCICC and PCIXCC cards.
+ *
+ * It contains a message header followed by a CPRB.  The CPRB is
+ * the same as the request CPRB, which is described above.
+ *
+ * If format is 1, an error condition exists and no data beyond
+ * the 8-byte message header is of interest.
+ *
+ * The non-error message is shown below.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+struct type86_hdr {
+	unsigned char reserved1;	/* 0x00				*/
+	unsigned char type;		/* 0x86				*/
+	unsigned char format;		/* 0x01 (error) or 0x02 (ok)	*/
+	unsigned char reserved2;	/* 0x00				*/
+	unsigned char reply_code;	/* reply code (see above)	*/
+	unsigned char reserved3[3];	/* 0x000000			*/
+} __attribute__((packed));
+
+#define TYPE86_RSP_CODE 0x86
+#define TYPE86_FMT2	0x02
+
+struct type86_fmt2_ext {
+	unsigned char	  reserved[4];	/* 0x00000000			*/
+	unsigned char	  apfs[4];	/* final status			*/
+	unsigned int	  count1;	/* length of CPRB + parameters	*/
+	unsigned int	  offset1;	/* offset to CPRB		*/
+	unsigned int	  count2;	/* 0x00000000			*/
+	unsigned int	  offset2;	/* db offset 0x00000000 for PKD	*/
+	unsigned int	  count3;	/* 0x00000000			*/
+	unsigned int	  offset3;	/* 0x00000000			*/
+	unsigned int	  count4;	/* 0x00000000			*/
+	unsigned int	  offset4;	/* 0x00000000			*/
+} __attribute__((packed));
+
+struct function_and_rules_block {
+	unsigned char function_code[2];
+	unsigned short ulen;
+	unsigned char only_rule[8];
+} __attribute__((packed));
+
+int zcrypt_pcicc_init(void);
+void zcrypt_pcicc_exit(void);
+
+#endif /* _ZCRYPT_PCICC_H_ */
diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c
new file mode 100644
index 0000000000000..6064cf58be43b
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcixcc.c
@@ -0,0 +1,714 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcixcc.c
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "ap_bus.h"
+#include "zcrypt_api.h"
+#include "zcrypt_error.h"
+#include "zcrypt_pcicc.h"
+#include "zcrypt_pcixcc.h"
+#include "zcrypt_cca_key.h"
+
+#define PCIXCC_MIN_MOD_SIZE	 16	/*  128 bits	*/
+#define PCIXCC_MIN_MOD_SIZE_OLD	 64	/*  512 bits	*/
+#define PCIXCC_MAX_MOD_SIZE	256	/* 2048 bits	*/
+
+#define PCIXCC_MCL2_SPEED_RATING	7870	/* FIXME: needs finetuning */
+#define PCIXCC_MCL3_SPEED_RATING	7870
+#define CEX2C_SPEED_RATING		8540
+
+#define PCIXCC_MAX_ICA_MESSAGE_SIZE 0x77c  /* max size type6 v2 crt message */
+#define PCIXCC_MAX_ICA_RESPONSE_SIZE 0x77c /* max size type86 v2 reply	    */
+
+#define PCIXCC_MAX_XCRB_MESSAGE_SIZE (12*1024)
+#define PCIXCC_MAX_XCRB_RESPONSE_SIZE PCIXCC_MAX_XCRB_MESSAGE_SIZE
+#define PCIXCC_MAX_XCRB_DATA_SIZE (11*1024)
+#define PCIXCC_MAX_XCRB_REPLY_SIZE (5*1024)
+
+#define PCIXCC_MAX_RESPONSE_SIZE PCIXCC_MAX_XCRB_RESPONSE_SIZE
+
+#define PCIXCC_CLEANUP_TIME	(15*HZ)
+
+static struct ap_device_id zcrypt_pcixcc_ids[] = {
+	{ AP_DEVICE(AP_DEVICE_TYPE_PCIXCC) },
+	{ AP_DEVICE(AP_DEVICE_TYPE_CEX2C) },
+	{ /* end of list */ },
+};
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+MODULE_DEVICE_TABLE(ap, zcrypt_pcixcc_ids);
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("PCIXCC Cryptographic Coprocessor device driver, "
+		   "Copyright 2001, 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+#endif
+
+static int zcrypt_pcixcc_probe(struct ap_device *ap_dev);
+static void zcrypt_pcixcc_remove(struct ap_device *ap_dev);
+static void zcrypt_pcixcc_receive(struct ap_device *, struct ap_message *,
+				 struct ap_message *);
+
+static struct ap_driver zcrypt_pcixcc_driver = {
+	.probe = zcrypt_pcixcc_probe,
+	.remove = zcrypt_pcixcc_remove,
+	.receive = zcrypt_pcixcc_receive,
+	.ids = zcrypt_pcixcc_ids,
+};
+
+/**
+ * The following is used to initialize the CPRBX passed to the PCIXCC/CEX2C
+ * card in a type6 message. The 3 fields that must be filled in at execution
+ * time are  req_parml, rpl_parml and usage_domain.
+ * Everything about this interface is ascii/big-endian, since the
+ * device does *not* have 'Intel inside'.
+ *
+ * The CPRBX is followed immediately by the parm block.
+ * The parm block contains:
+ * - function code ('PD' 0x5044 or 'PK' 0x504B)
+ * - rule block (one of:)
+ *   + 0x000A 'PKCS-1.2' (MCL2 'PD')
+ *   + 0x000A 'ZERO-PAD' (MCL2 'PK')
+ *   + 0x000A 'ZERO-PAD' (MCL3 'PD' or CEX2C 'PD')
+ *   + 0x000A 'MRP     ' (MCL3 'PK' or CEX2C 'PK')
+ * - VUD block
+ */
+static struct CPRBX static_cprbx = {
+	.cprb_len	=  0x00DC,
+	.cprb_ver_id	=  0x02,
+	.func_id	= {0x54,0x32},
+};
+
+/**
+ * Convert a ICAMEX message to a type6 MEX message.
+ *
+ * @zdev: crypto device pointer
+ * @ap_msg: pointer to AP message
+ * @mex: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ica_rsa_modexpo *mex)
+{
+	static struct type6_hdr static_type6_hdrX = {
+		.type		=  0x06,
+		.offset1	=  0x00000058,
+		.agent_id	= {'C','A',},
+		.function_code	= {'P','K'},
+	};
+	static struct function_and_rules_block static_pke_fnr = {
+		.function_code	= {'P','K'},
+		.ulen		= 10,
+		.only_rule	= {'M','R','P',' ',' ',' ',' ',' '}
+	};
+	static struct function_and_rules_block static_pke_fnr_MCL2 = {
+		.function_code	= {'P','K'},
+		.ulen		= 10,
+		.only_rule	= {'Z','E','R','O','-','P','A','D'}
+	};
+	struct {
+		struct type6_hdr hdr;
+		struct CPRBX cprbx;
+		struct function_and_rules_block fr;
+		unsigned short length;
+		char text[0];
+	} __attribute__((packed)) *msg = ap_msg->message;
+	int size;
+
+	/* VUD.ciphertext */
+	msg->length = mex->inputdatalength + 2;
+	if (copy_from_user(msg->text, mex->inputdata, mex->inputdatalength))
+		return -EFAULT;
+
+	/* Set up key which is located after the variable length text. */
+	size = zcrypt_type6_mex_key_en(mex, msg->text+mex->inputdatalength, 1);
+	if (size < 0)
+		return size;
+	size += sizeof(*msg) + mex->inputdatalength;
+
+	/* message header, cprbx and f&r */
+	msg->hdr = static_type6_hdrX;
+	msg->hdr.ToCardLen1 = size - sizeof(msg->hdr);
+	msg->hdr.FromCardLen1 = PCIXCC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr);
+
+	msg->cprbx = static_cprbx;
+	msg->cprbx.domain = AP_QID_QUEUE(zdev->ap_dev->qid);
+	msg->cprbx.rpl_msgbl = msg->hdr.FromCardLen1;
+
+	msg->fr = (zdev->user_space_type == ZCRYPT_PCIXCC_MCL2) ?
+		static_pke_fnr_MCL2 : static_pke_fnr;
+
+	msg->cprbx.req_parml = size - sizeof(msg->hdr) - sizeof(msg->cprbx);
+
+	ap_msg->length = size;
+	return 0;
+}
+
+/**
+ * Convert a ICACRT message to a type6 CRT message.
+ *
+ * @zdev: crypto device pointer
+ * @ap_msg: pointer to AP message
+ * @crt: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ica_rsa_modexpo_crt *crt)
+{
+	static struct type6_hdr static_type6_hdrX = {
+		.type		=  0x06,
+		.offset1	=  0x00000058,
+		.agent_id	= {'C','A',},
+		.function_code	= {'P','D'},
+	};
+	static struct function_and_rules_block static_pkd_fnr = {
+		.function_code	= {'P','D'},
+		.ulen		= 10,
+		.only_rule	= {'Z','E','R','O','-','P','A','D'}
+	};
+
+	static struct function_and_rules_block static_pkd_fnr_MCL2 = {
+		.function_code	= {'P','D'},
+		.ulen		= 10,
+		.only_rule	= {'P','K','C','S','-','1','.','2'}
+	};
+	struct {
+		struct type6_hdr hdr;
+		struct CPRBX cprbx;
+		struct function_and_rules_block fr;
+		unsigned short length;
+		char text[0];
+	} __attribute__((packed)) *msg = ap_msg->message;
+	int size;
+
+	/* VUD.ciphertext */
+	msg->length = crt->inputdatalength + 2;
+	if (copy_from_user(msg->text, crt->inputdata, crt->inputdatalength))
+		return -EFAULT;
+
+	/* Set up key which is located after the variable length text. */
+	size = zcrypt_type6_crt_key(crt, msg->text + crt->inputdatalength, 1);
+	if (size < 0)
+		return size;
+	size += sizeof(*msg) + crt->inputdatalength;	/* total size of msg */
+
+	/* message header, cprbx and f&r */
+	msg->hdr = static_type6_hdrX;
+	msg->hdr.ToCardLen1 = size -  sizeof(msg->hdr);
+	msg->hdr.FromCardLen1 = PCIXCC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr);
+
+	msg->cprbx = static_cprbx;
+	msg->cprbx.domain = AP_QID_QUEUE(zdev->ap_dev->qid);
+	msg->cprbx.req_parml = msg->cprbx.rpl_msgbl =
+		size - sizeof(msg->hdr) - sizeof(msg->cprbx);
+
+	msg->fr = (zdev->user_space_type == ZCRYPT_PCIXCC_MCL2) ?
+		static_pkd_fnr_MCL2 : static_pkd_fnr;
+
+	ap_msg->length = size;
+	return 0;
+}
+
+/**
+ * Copy results from a type 86 ICA reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @data: pointer to user output data
+ * @length: size of user output data
+ *
+ * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
+ */
+struct type86x_reply {
+	struct type86_hdr hdr;
+	struct type86_fmt2_ext fmt2;
+	struct CPRBX cprbx;
+	unsigned char pad[4];	/* 4 byte function code/rules block ? */
+	unsigned short length;
+	char text[0];
+} __attribute__((packed));
+
+static int convert_type86_ica(struct zcrypt_device *zdev,
+			  struct ap_message *reply,
+			  char __user *outputdata,
+			  unsigned int outputdatalength)
+{
+	static unsigned char static_pad[] = {
+		0x00,0x02,
+		0x1B,0x7B,0x5D,0xB5,0x75,0x01,0x3D,0xFD,
+		0x8D,0xD1,0xC7,0x03,0x2D,0x09,0x23,0x57,
+		0x89,0x49,0xB9,0x3F,0xBB,0x99,0x41,0x5B,
+		0x75,0x21,0x7B,0x9D,0x3B,0x6B,0x51,0x39,
+		0xBB,0x0D,0x35,0xB9,0x89,0x0F,0x93,0xA5,
+		0x0B,0x47,0xF1,0xD3,0xBB,0xCB,0xF1,0x9D,
+		0x23,0x73,0x71,0xFF,0xF3,0xF5,0x45,0xFB,
+		0x61,0x29,0x23,0xFD,0xF1,0x29,0x3F,0x7F,
+		0x17,0xB7,0x1B,0xA9,0x19,0xBD,0x57,0xA9,
+		0xD7,0x95,0xA3,0xCB,0xED,0x1D,0xDB,0x45,
+		0x7D,0x11,0xD1,0x51,0x1B,0xED,0x71,0xE9,
+		0xB1,0xD1,0xAB,0xAB,0x21,0x2B,0x1B,0x9F,
+		0x3B,0x9F,0xF7,0xF7,0xBD,0x63,0xEB,0xAD,
+		0xDF,0xB3,0x6F,0x5B,0xDB,0x8D,0xA9,0x5D,
+		0xE3,0x7D,0x77,0x49,0x47,0xF5,0xA7,0xFD,
+		0xAB,0x2F,0x27,0x35,0x77,0xD3,0x49,0xC9,
+		0x09,0xEB,0xB1,0xF9,0xBF,0x4B,0xCB,0x2B,
+		0xEB,0xEB,0x05,0xFF,0x7D,0xC7,0x91,0x8B,
+		0x09,0x83,0xB9,0xB9,0x69,0x33,0x39,0x6B,
+		0x79,0x75,0x19,0xBF,0xBB,0x07,0x1D,0xBD,
+		0x29,0xBF,0x39,0x95,0x93,0x1D,0x35,0xC7,
+		0xC9,0x4D,0xE5,0x97,0x0B,0x43,0x9B,0xF1,
+		0x16,0x93,0x03,0x1F,0xA5,0xFB,0xDB,0xF3,
+		0x27,0x4F,0x27,0x61,0x05,0x1F,0xB9,0x23,
+		0x2F,0xC3,0x81,0xA9,0x23,0x71,0x55,0x55,
+		0xEB,0xED,0x41,0xE5,0xF3,0x11,0xF1,0x43,
+		0x69,0x03,0xBD,0x0B,0x37,0x0F,0x51,0x8F,
+		0x0B,0xB5,0x89,0x5B,0x67,0xA9,0xD9,0x4F,
+		0x01,0xF9,0x21,0x77,0x37,0x73,0x79,0xC5,
+		0x7F,0x51,0xC1,0xCF,0x97,0xA1,0x75,0xAD,
+		0x35,0x9D,0xD3,0xD3,0xA7,0x9D,0x5D,0x41,
+		0x6F,0x65,0x1B,0xCF,0xA9,0x87,0x91,0x09
+	};
+	struct type86x_reply *msg = reply->message;
+	unsigned short service_rc, service_rs;
+	unsigned int reply_len, pad_len;
+	char *data;
+
+	service_rc = msg->cprbx.ccp_rtcode;
+	if (unlikely(service_rc != 0)) {
+		service_rs = msg->cprbx.ccp_rscode;
+		if (service_rc == 8 && service_rs == 66) {
+			PDEBUG("Bad block format on PCIXCC/CEX2C\n");
+			return -EINVAL;
+		}
+		if (service_rc == 8 && service_rs == 65) {
+			PDEBUG("Probably an even modulus on PCIXCC/CEX2C\n");
+			return -EINVAL;
+		}
+		if (service_rc == 8 && service_rs == 770) {
+			PDEBUG("Invalid key length on PCIXCC/CEX2C\n");
+			zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD;
+			return -EAGAIN;
+		}
+		if (service_rc == 8 && service_rs == 783) {
+			PDEBUG("Extended bitlengths not enabled on PCIXCC/CEX2C\n");
+			zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD;
+			return -EAGAIN;
+		}
+		PRINTK("Unknown service rc/rs (PCIXCC/CEX2C): %d/%d\n",
+		       service_rc, service_rs);
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+	data = msg->text;
+	reply_len = msg->length - 2;
+	if (reply_len > outputdatalength)
+		return -EINVAL;
+	/**
+	 * For all encipher requests, the length of the ciphertext (reply_len)
+	 * will always equal the modulus length. For MEX decipher requests
+	 * the output needs to get padded. Minimum pad size is 10.
+	 *
+	 * Currently, the cases where padding will be added is for:
+	 * - PCIXCC_MCL2 using a CRT form token (since PKD didn't support
+	 *   ZERO-PAD and CRT is only supported for PKD requests)
+	 * - PCICC, always
+	 */
+	pad_len = outputdatalength - reply_len;
+	if (pad_len > 0) {
+		if (pad_len < 10)
+			return -EINVAL;
+		/* 'restore' padding left in the PCICC/PCIXCC card. */
+		if (copy_to_user(outputdata, static_pad, pad_len - 1))
+			return -EFAULT;
+		if (put_user(0, outputdata + pad_len - 1))
+			return -EFAULT;
+	}
+	/* Copy the crypto response to user space. */
+	if (copy_to_user(outputdata + pad_len, data, reply_len))
+		return -EFAULT;
+	return 0;
+}
+
+static int convert_response_ica(struct zcrypt_device *zdev,
+			    struct ap_message *reply,
+			    char __user *outputdata,
+			    unsigned int outputdatalength)
+{
+	struct type86x_reply *msg = reply->message;
+
+	/* Response type byte is the second byte in the response. */
+	switch (((unsigned char *) reply->message)[1]) {
+	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
+		return convert_error(zdev, reply);
+	case TYPE86_RSP_CODE:
+		if (msg->hdr.reply_code)
+			return convert_error(zdev, reply);
+		if (msg->cprbx.cprb_ver_id == 0x02)
+			return convert_type86_ica(zdev, reply,
+						  outputdata, outputdatalength);
+		/* no break, incorrect cprb version is an unknown response */
+	default: /* Unknown response type, this should NEVER EVER happen */
+		PRINTK("Unrecognized Message Header: %08x%08x\n",
+		       *(unsigned int *) reply->message,
+		       *(unsigned int *) (reply->message+4));
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
+/**
+ * This function is called from the AP bus code after a crypto request
+ * "msg" has finished with the reply message "reply".
+ * It is called from tasklet context.
+ * @ap_dev: pointer to the AP device
+ * @msg: pointer to the AP message
+ * @reply: pointer to the AP reply message
+ */
+static void zcrypt_pcixcc_receive(struct ap_device *ap_dev,
+				  struct ap_message *msg,
+				  struct ap_message *reply)
+{
+	static struct error_hdr error_reply = {
+		.type = TYPE82_RSP_CODE,
+		.reply_code = REP82_ERROR_MACHINE_FAILURE,
+	};
+	struct type86x_reply *t86r = reply->message;
+	int length;
+
+	/* Copy the reply message to the request message buffer. */
+	if (IS_ERR(reply))
+		memcpy(msg->message, &error_reply, sizeof(error_reply));
+	else if (t86r->hdr.type == TYPE86_RSP_CODE &&
+		 t86r->cprbx.cprb_ver_id == 0x02) {
+		length = sizeof(struct type86x_reply) + t86r->length - 2;
+		length = min(PCIXCC_MAX_ICA_RESPONSE_SIZE, length);
+		memcpy(msg->message, reply->message, length);
+	} else
+		memcpy(msg->message, reply->message, sizeof error_reply);
+	complete((struct completion *) msg->private);
+}
+
+static atomic_t zcrypt_step = ATOMIC_INIT(0);
+
+/**
+ * The request distributor calls this function if it picked the PCIXCC/CEX2C
+ * device to handle a modexpo request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCIXCC/CEX2C device to the request distributor
+ * @mex: pointer to the modexpo request buffer
+ */
+static long zcrypt_pcixcc_modexpo(struct zcrypt_device *zdev,
+				  struct ica_rsa_modexpo *mex)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICAMEX_msg_to_type6MEX_msgX(zdev, &ap_msg, mex);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, PCIXCC_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response_ica(zdev, &ap_msg, mex->outputdata,
+					  mex->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	free_page((unsigned long) ap_msg.message);
+	return rc;
+}
+
+/**
+ * The request distributor calls this function if it picked the PCIXCC/CEX2C
+ * device to handle a modexpo_crt request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCIXCC/CEX2C device to the request distributor
+ * @crt: pointer to the modexpoc_crt request buffer
+ */
+static long zcrypt_pcixcc_modexpo_crt(struct zcrypt_device *zdev,
+				      struct ica_rsa_modexpo_crt *crt)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICACRT_msg_to_type6CRT_msgX(zdev, &ap_msg, crt);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, PCIXCC_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response_ica(zdev, &ap_msg, crt->outputdata,
+					  crt->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	free_page((unsigned long) ap_msg.message);
+	return rc;
+}
+
+/**
+ * The crypto operations for a PCIXCC/CEX2C card.
+ */
+static struct zcrypt_ops zcrypt_pcixcc_ops = {
+	.rsa_modexpo = zcrypt_pcixcc_modexpo,
+	.rsa_modexpo_crt = zcrypt_pcixcc_modexpo_crt,
+};
+
+/**
+ * Micro-code detection function. Its sends a message to a pcixcc card
+ * to find out the microcode level.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_pcixcc_mcl(struct ap_device *ap_dev)
+{
+	static unsigned char msg[] = {
+		0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x43,0x41,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x00,
+		0x00,0x00,0x01,0xC4,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x07,0x24,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0xDC,0x02,0x00,0x00,0x00,0x54,0x32,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xE8,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x24,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x0A,
+		0x4D,0x52,0x50,0x20,0x20,0x20,0x20,0x20,
+		0x00,0x42,0x00,0x01,0x02,0x03,0x04,0x05,
+		0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,
+		0x0E,0x0F,0x00,0x11,0x22,0x33,0x44,0x55,
+		0x66,0x77,0x88,0x99,0xAA,0xBB,0xCC,0xDD,
+		0xEE,0xFF,0xFF,0xEE,0xDD,0xCC,0xBB,0xAA,
+		0x99,0x88,0x77,0x66,0x55,0x44,0x33,0x22,
+		0x11,0x00,0x01,0x23,0x45,0x67,0x89,0xAB,
+		0xCD,0xEF,0xFE,0xDC,0xBA,0x98,0x76,0x54,
+		0x32,0x10,0x00,0x9A,0x00,0x98,0x00,0x00,
+		0x1E,0x00,0x00,0x94,0x00,0x00,0x00,0x00,
+		0x04,0x00,0x00,0x8C,0x00,0x00,0x00,0x40,
+		0x02,0x00,0x00,0x40,0xBA,0xE8,0x23,0x3C,
+		0x75,0xF3,0x91,0x61,0xD6,0x73,0x39,0xCF,
+		0x7B,0x6D,0x8E,0x61,0x97,0x63,0x9E,0xD9,
+		0x60,0x55,0xD6,0xC7,0xEF,0xF8,0x1E,0x63,
+		0x95,0x17,0xCC,0x28,0x45,0x60,0x11,0xC5,
+		0xC4,0x4E,0x66,0xC6,0xE6,0xC3,0xDE,0x8A,
+		0x19,0x30,0xCF,0x0E,0xD7,0xAA,0xDB,0x01,
+		0xD8,0x00,0xBB,0x8F,0x39,0x9F,0x64,0x28,
+		0xF5,0x7A,0x77,0x49,0xCC,0x6B,0xA3,0x91,
+		0x97,0x70,0xE7,0x60,0x1E,0x39,0xE1,0xE5,
+		0x33,0xE1,0x15,0x63,0x69,0x08,0x80,0x4C,
+		0x67,0xC4,0x41,0x8F,0x48,0xDF,0x26,0x98,
+		0xF1,0xD5,0x8D,0x88,0xD9,0x6A,0xA4,0x96,
+		0xC5,0x84,0xD9,0x30,0x49,0x67,0x7D,0x19,
+		0xB1,0xB3,0x45,0x4D,0xB2,0x53,0x9A,0x47,
+		0x3C,0x7C,0x55,0xBF,0xCC,0x85,0x00,0x36,
+		0xF1,0x3D,0x93,0x53
+	};
+	unsigned long long psmid;
+	struct CPRBX *cprbx;
+	char *reply;
+	int rc, i;
+
+	reply = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reply)
+		return -ENOMEM;
+
+	rc = ap_send(ap_dev->qid, 0x0102030405060708ULL, msg, sizeof(msg));
+	if (rc)
+		goto out_free;
+
+	/* Wait for the test message to complete. */
+	for (i = 0; i < 6; i++) {
+		mdelay(300);
+		rc = ap_recv(ap_dev->qid, &psmid, reply, 4096);
+		if (rc == 0 && psmid == 0x0102030405060708ULL)
+			break;
+	}
+
+	if (i >= 6) {
+		/* Got no answer. */
+		rc = -ENODEV;
+		goto out_free;
+	}
+
+	cprbx = (struct CPRBX *) (reply + 48);
+	if (cprbx->ccp_rtcode == 8 && cprbx->ccp_rscode == 33)
+		rc = ZCRYPT_PCIXCC_MCL2;
+	else
+		rc = ZCRYPT_PCIXCC_MCL3;
+out_free:
+	free_page((unsigned long) reply);
+	return rc;
+}
+
+/**
+ * Probe function for PCIXCC/CEX2C cards. It always accepts the AP device
+ * since the bus_match already checked the hardware type. The PCIXCC
+ * cards come in two flavours: micro code level 2 and micro code level 3.
+ * This is checked by sending a test message to the device.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_pcixcc_probe(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	zdev = zcrypt_device_alloc(PCIXCC_MAX_RESPONSE_SIZE);
+	if (!zdev)
+		return -ENOMEM;
+	zdev->ap_dev = ap_dev;
+	zdev->ops = &zcrypt_pcixcc_ops;
+	zdev->online = 1;
+	if (ap_dev->device_type == AP_DEVICE_TYPE_PCIXCC) {
+		rc = zcrypt_pcixcc_mcl(ap_dev);
+		if (rc < 0) {
+			zcrypt_device_free(zdev);
+			return rc;
+		}
+		zdev->user_space_type = rc;
+		if (rc == ZCRYPT_PCIXCC_MCL2) {
+			zdev->type_string = "PCIXCC_MCL2";
+			zdev->speed_rating = PCIXCC_MCL2_SPEED_RATING;
+			zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD;
+			zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE;
+		} else {
+			zdev->type_string = "PCIXCC_MCL3";
+			zdev->speed_rating = PCIXCC_MCL3_SPEED_RATING;
+			zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE;
+			zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE;
+		}
+	} else {
+		zdev->user_space_type = ZCRYPT_CEX2C;
+		zdev->type_string = "CEX2C";
+		zdev->speed_rating = CEX2C_SPEED_RATING;
+		zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE;
+		zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE;
+	}
+	ap_dev->reply = &zdev->reply;
+	ap_dev->private = zdev;
+	rc = zcrypt_device_register(zdev);
+	if (rc)
+		goto out_free;
+	return 0;
+
+ out_free:
+	ap_dev->private = NULL;
+	zcrypt_device_free(zdev);
+	return rc;
+}
+
+/**
+ * This is called to remove the extended PCIXCC/CEX2C driver information
+ * if an AP device is removed.
+ */
+static void zcrypt_pcixcc_remove(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev = ap_dev->private;
+
+	zcrypt_device_unregister(zdev);
+}
+
+int __init zcrypt_pcixcc_init(void)
+{
+	return ap_driver_register(&zcrypt_pcixcc_driver, THIS_MODULE, "pcixcc");
+}
+
+void zcrypt_pcixcc_exit(void)
+{
+	ap_driver_unregister(&zcrypt_pcixcc_driver);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(zcrypt_pcixcc_init);
+module_exit(zcrypt_pcixcc_exit);
+#endif
diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h
new file mode 100644
index 0000000000000..d4c44c4d7ad09
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcixcc.h
@@ -0,0 +1,79 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcixcc.h
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_PCIXCC_H_
+#define _ZCRYPT_PCIXCC_H_
+
+/**
+ * CPRBX
+ *	  Note that all shorts and ints are big-endian.
+ *	  All pointer fields are 16 bytes long, and mean nothing.
+ *
+ *	  A request CPRB is followed by a request_parameter_block.
+ *
+ *	  The request (or reply) parameter block is organized thus:
+ *	    function code
+ *	    VUD block
+ *	    key block
+ */
+struct CPRBX {
+	unsigned short cprb_len;	/* CPRB length	      220	 */
+	unsigned char  cprb_ver_id;	/* CPRB version id.   0x02	 */
+	unsigned char  pad_000[3];	/* Alignment pad bytes		 */
+	unsigned char  func_id[2];	/* function id	      0x5432	 */
+	unsigned char  cprb_flags[4];	/* Flags			 */
+	unsigned int   req_parml;	/* request parameter buffer len	 */
+	unsigned int   req_datal;	/* request data buffer		 */
+	unsigned int   rpl_msgbl;	/* reply  message block length	 */
+	unsigned int   rpld_parml;	/* replied parameter block len	 */
+	unsigned int   rpl_datal;	/* reply data block len		 */
+	unsigned int   rpld_datal;	/* replied data block len	 */
+	unsigned int   req_extbl;	/* request extension block len	 */
+	unsigned char  pad_001[4];	/* reserved			 */
+	unsigned int   rpld_extbl;	/* replied extension block len	 */
+	unsigned char  req_parmb[16];	/* request parm block 'address'	 */
+	unsigned char  req_datab[16];	/* request data block 'address'	 */
+	unsigned char  rpl_parmb[16];	/* reply parm block 'address'	 */
+	unsigned char  rpl_datab[16];	/* reply data block 'address'	 */
+	unsigned char  req_extb[16];	/* request extension block 'addr'*/
+	unsigned char  rpl_extb[16];	/* reply extension block 'addres'*/
+	unsigned short ccp_rtcode;	/* server return code		 */
+	unsigned short ccp_rscode;	/* server reason code		 */
+	unsigned int   mac_data_len;	/* Mac Data Length		 */
+	unsigned char  logon_id[8];	/* Logon Identifier		 */
+	unsigned char  mac_value[8];	/* Mac Value			 */
+	unsigned char  mac_content_flgs;/* Mac content flag byte	 */
+	unsigned char  pad_002;		/* Alignment			 */
+	unsigned short domain;		/* Domain			 */
+	unsigned char  pad_003[12];	/* Domain masks			 */
+	unsigned char  pad_004[36];	/* reserved			 */
+} __attribute__((packed));
+
+int zcrypt_pcixcc_init(void);
+void zcrypt_pcixcc_exit(void);
+
+#endif /* _ZCRYPT_PCIXCC_H_ */
-- 
GitLab


From fe3a1be59c851aba2330387596c6134bc5ec8397 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:34 +0200
Subject: [PATCH 0226/1063] [S390] zcrypt driver Makefile, Kconfig and
 monolithic build.

The Makefile and Kconfig changes should be obvious. The monolithic
build option is there to create an old-style z90crypt module for
backward compatability to older distributions.

Signed-off-by: Ralph Wuerthner <rwuerthn@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/Kconfig              |  21 +++++++
 drivers/s390/crypto/Makefile      |  13 ++++
 drivers/s390/crypto/zcrypt_mono.c | 100 ++++++++++++++++++++++++++++++
 3 files changed, 134 insertions(+)
 create mode 100644 drivers/s390/crypto/zcrypt_mono.c

diff --git a/drivers/s390/Kconfig b/drivers/s390/Kconfig
index f0ea550d39bcd..bc4261e8b6060 100644
--- a/drivers/s390/Kconfig
+++ b/drivers/s390/Kconfig
@@ -217,4 +217,25 @@ endmenu
 
 menu "Cryptographic devices"
 
+config ZCRYPT
+	tristate "Support for PCI-attached cryptographic adapters"
+	select ZCRYPT_MONOLITHIC if ZCRYPT="y"
+	default "m"
+	help
+	  Select this option if you want to use a PCI-attached cryptographic
+	  adapter like:
+	  + PCI Cryptographic Accelerator (PCICA)
+	  + PCI Cryptographic Coprocessor (PCICC)
+	  + PCI-X Cryptographic Coprocessor (PCIXCC)
+	  + Crypto Express2 Coprocessor (CEX2C)
+	  + Crypto Express2 Accelerator (CEX2A)
+
+config ZCRYPT_MONOLITHIC
+	bool "Monolithic zcrypt module"
+	depends on ZCRYPT="m"
+	help
+	  Select this option if you want to have a single module z90crypt.ko
+	  that contains all parts of the crypto device driver (ap bus,
+	  request router and all the card drivers).
+
 endmenu
diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile
index 67e75be8e4e4f..f0a12d2eb7806 100644
--- a/drivers/s390/crypto/Makefile
+++ b/drivers/s390/crypto/Makefile
@@ -2,3 +2,16 @@
 # S/390 crypto devices
 #
 
+ifdef CONFIG_ZCRYPT_MONOLITHIC
+
+z90crypt-objs := zcrypt_mono.o ap_bus.o zcrypt_api.o \
+		zcrypt_pcica.o zcrypt_pcicc.o zcrypt_pcixcc.o zcrypt_cex2a.o
+obj-$(CONFIG_ZCRYPT) += z90crypt.o
+
+else
+
+ap-objs := ap_bus.o
+obj-$(CONFIG_ZCRYPT) += ap.o zcrypt_api.o zcrypt_pcicc.o zcrypt_pcixcc.o
+obj-$(CONFIG_ZCRYPT) += zcrypt_pcica.o zcrypt_cex2a.o
+
+endif
diff --git a/drivers/s390/crypto/zcrypt_mono.c b/drivers/s390/crypto/zcrypt_mono.c
new file mode 100644
index 0000000000000..f48b61a6126c9
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_mono.c
@@ -0,0 +1,100 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_mono.c
+ *
+ *  zcrypt 2.0.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/compat.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "ap_bus.h"
+#include "zcrypt_api.h"
+#include "zcrypt_pcica.h"
+#include "zcrypt_pcicc.h"
+#include "zcrypt_pcixcc.h"
+#include "zcrypt_cex2a.h"
+
+/**
+ * The module initialization code.
+ */
+int __init zcrypt_init(void)
+{
+	int rc;
+
+	rc = ap_module_init();
+	if (rc)
+		goto out;
+	rc = zcrypt_api_init();
+	if (rc)
+		goto out_ap;
+	rc = zcrypt_pcica_init();
+	if (rc)
+		goto out_api;
+	rc = zcrypt_pcicc_init();
+	if (rc)
+		goto out_pcica;
+	rc = zcrypt_pcixcc_init();
+	if (rc)
+		goto out_pcicc;
+	rc = zcrypt_cex2a_init();
+	if (rc)
+		goto out_pcixcc;
+	return 0;
+
+out_pcixcc:
+	zcrypt_pcixcc_exit();
+out_pcicc:
+	zcrypt_pcicc_exit();
+out_pcica:
+	zcrypt_pcica_exit();
+out_api:
+	zcrypt_api_exit();
+out_ap:
+	ap_module_exit();
+out:
+	return rc;
+}
+
+/**
+ * The module termination code.
+ */
+void __exit zcrypt_exit(void)
+{
+	zcrypt_cex2a_exit();
+	zcrypt_pcixcc_exit();
+	zcrypt_pcicc_exit();
+	zcrypt_pcica_exit();
+	zcrypt_api_exit();
+	ap_module_exit();
+}
+
+module_init(zcrypt_init);
+module_exit(zcrypt_exit);
-- 
GitLab


From 5432114baf0300286a6ca1b0aea549492a379432 Mon Sep 17 00:00:00 2001
From: Ralph Wuerthner <rwuerthn@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:36 +0200
Subject: [PATCH 0227/1063] [S390] zcrypt secure key cryptography extension.

Allow the user space to send extended cprb messages directly to the
PCIXCC / CEX2C cards. This allows the CCA library to construct special
crypto requests that use "secure" keys that are stored on the card.

Signed-off-by: Ralph Wuerthner <rwuerthn@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/zcrypt_api.c     | 112 +++++++++++-
 drivers/s390/crypto/zcrypt_api.h     |   3 +-
 drivers/s390/crypto/zcrypt_cca_key.h |   2 +-
 drivers/s390/crypto/zcrypt_cex2a.c   |   2 +-
 drivers/s390/crypto/zcrypt_cex2a.h   |   2 +-
 drivers/s390/crypto/zcrypt_error.h   |   2 +-
 drivers/s390/crypto/zcrypt_mono.c    |   2 +-
 drivers/s390/crypto/zcrypt_pcica.c   |   2 +-
 drivers/s390/crypto/zcrypt_pcica.h   |   2 +-
 drivers/s390/crypto/zcrypt_pcicc.c   |   2 +-
 drivers/s390/crypto/zcrypt_pcicc.h   |   2 +-
 drivers/s390/crypto/zcrypt_pcixcc.c  | 263 +++++++++++++++++++++++++--
 drivers/s390/crypto/zcrypt_pcixcc.h  |   2 +-
 include/asm-s390/zcrypt.h            |  80 +++++++-
 14 files changed, 452 insertions(+), 26 deletions(-)

diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index b3fe003b3d2d5..1edc10a7a6f2d 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_api.c
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
@@ -392,6 +392,41 @@ static long zcrypt_rsa_crt(struct ica_rsa_modexpo_crt *crt)
 	return -ENODEV;
 }
 
+static long zcrypt_send_cprb(struct ica_xcRB *xcRB)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		if (!zdev->online || !zdev->ops->send_cprb ||
+		    (xcRB->user_defined != AUTOSELECT &&
+			AP_QID_DEVICE(zdev->ap_dev->qid) != xcRB->user_defined)
+		    )
+			continue;
+		zcrypt_device_get(zdev);
+		get_device(&zdev->ap_dev->device);
+		zdev->request_count++;
+		__zcrypt_decrease_preference(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		if (try_module_get(zdev->ap_dev->drv->driver.owner)) {
+			rc = zdev->ops->send_cprb(zdev, xcRB);
+			module_put(zdev->ap_dev->drv->driver.owner);
+		}
+		else
+			rc = -EAGAIN;
+		spin_lock_bh(&zcrypt_device_lock);
+		zdev->request_count--;
+		__zcrypt_increase_preference(zdev);
+		put_device(&zdev->ap_dev->device);
+		zcrypt_device_put(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		return rc;
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return -ENODEV;
+}
+
 static void zcrypt_status_mask(char status[AP_DEVICES])
 {
 	struct zcrypt_device *zdev;
@@ -535,6 +570,18 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
 			return rc;
 		return put_user(crt.outputdatalength, &ucrt->outputdatalength);
 	}
+	case ZSECSENDCPRB: {
+		struct ica_xcRB __user *uxcRB = (void __user *) arg;
+		struct ica_xcRB xcRB;
+		if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB)))
+			return -EFAULT;
+		do {
+			rc = zcrypt_send_cprb(&xcRB);
+		} while (rc == -EAGAIN);
+		if (copy_to_user(uxcRB, &xcRB, sizeof(xcRB)))
+			return -EFAULT;
+		return rc;
+	}
 	case Z90STAT_STATUS_MASK: {
 		char status[AP_DEVICES];
 		zcrypt_status_mask(status);
@@ -683,6 +730,67 @@ static long trans_modexpo_crt32(struct file *filp, unsigned int cmd,
 	return rc;
 }
 
+struct compat_ica_xcRB {
+	unsigned short	agent_ID;
+	unsigned int	user_defined;
+	unsigned short	request_ID;
+	unsigned int	request_control_blk_length;
+	unsigned char	padding1[16 - sizeof (compat_uptr_t)];
+	compat_uptr_t	request_control_blk_addr;
+	unsigned int	request_data_length;
+	char		padding2[16 - sizeof (compat_uptr_t)];
+	compat_uptr_t	request_data_address;
+	unsigned int	reply_control_blk_length;
+	char		padding3[16 - sizeof (compat_uptr_t)];
+	compat_uptr_t	reply_control_blk_addr;
+	unsigned int	reply_data_length;
+	char		padding4[16 - sizeof (compat_uptr_t)];
+	compat_uptr_t	reply_data_addr;
+	unsigned short	priority_window;
+	unsigned int	status;
+} __attribute__((packed));
+
+static long trans_xcRB32(struct file *filp, unsigned int cmd,
+			 unsigned long arg)
+{
+	struct compat_ica_xcRB __user *uxcRB32 = compat_ptr(arg);
+	struct compat_ica_xcRB xcRB32;
+	struct ica_xcRB xcRB64;
+	long rc;
+
+	if (copy_from_user(&xcRB32, uxcRB32, sizeof(xcRB32)))
+		return -EFAULT;
+	xcRB64.agent_ID = xcRB32.agent_ID;
+	xcRB64.user_defined = xcRB32.user_defined;
+	xcRB64.request_ID = xcRB32.request_ID;
+	xcRB64.request_control_blk_length =
+		xcRB32.request_control_blk_length;
+	xcRB64.request_control_blk_addr =
+		compat_ptr(xcRB32.request_control_blk_addr);
+	xcRB64.request_data_length =
+		xcRB32.request_data_length;
+	xcRB64.request_data_address =
+		compat_ptr(xcRB32.request_data_address);
+	xcRB64.reply_control_blk_length =
+		xcRB32.reply_control_blk_length;
+	xcRB64.reply_control_blk_addr =
+		compat_ptr(xcRB32.reply_control_blk_addr);
+	xcRB64.reply_data_length = xcRB32.reply_data_length;
+	xcRB64.reply_data_addr =
+		compat_ptr(xcRB32.reply_data_addr);
+	xcRB64.priority_window = xcRB32.priority_window;
+	xcRB64.status = xcRB32.status;
+	do {
+		rc = zcrypt_send_cprb(&xcRB64);
+	} while (rc == -EAGAIN);
+	xcRB32.reply_control_blk_length = xcRB64.reply_control_blk_length;
+	xcRB32.reply_data_length = xcRB64.reply_data_length;
+	xcRB32.status = xcRB64.status;
+	if (copy_to_user(uxcRB32, &xcRB32, sizeof(xcRB32)))
+			return -EFAULT;
+	return rc;
+}
+
 long zcrypt_compat_ioctl(struct file *filp, unsigned int cmd,
 			 unsigned long arg)
 {
@@ -690,6 +798,8 @@ long zcrypt_compat_ioctl(struct file *filp, unsigned int cmd,
 		return trans_modexpo32(filp, cmd, arg);
 	if (cmd == ICARSACRT)
 		return trans_modexpo_crt32(filp, cmd, arg);
+	if (cmd == ZSECSENDCPRB)
+		return trans_xcRB32(filp, cmd, arg);
 	return zcrypt_unlocked_ioctl(filp, cmd, arg);
 }
 #endif
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index 1f0e61f2e9b42..de4877ee618f9 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_api.h
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
@@ -106,6 +106,7 @@ struct zcrypt_ops {
 	long (*rsa_modexpo)(struct zcrypt_device *, struct ica_rsa_modexpo *);
 	long (*rsa_modexpo_crt)(struct zcrypt_device *,
 				struct ica_rsa_modexpo_crt *);
+	long (*send_cprb)(struct zcrypt_device *, struct ica_xcRB *);
 };
 
 struct zcrypt_device {
diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h
index c80f40d441976..8dbcf0eef3e55 100644
--- a/drivers/s390/crypto/zcrypt_cca_key.h
+++ b/drivers/s390/crypto/zcrypt_cca_key.h
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_cca_key.h
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c
index 350248e5cd93d..a62b00083d0cb 100644
--- a/drivers/s390/crypto/zcrypt_cex2a.c
+++ b/drivers/s390/crypto/zcrypt_cex2a.c
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_cex2a.c
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h
index 61a78c32dce46..8f69d1dacab8a 100644
--- a/drivers/s390/crypto/zcrypt_cex2a.h
+++ b/drivers/s390/crypto/zcrypt_cex2a.h
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_cex2a.h
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
index b22bd055a03ba..2cb616ba8becd 100644
--- a/drivers/s390/crypto/zcrypt_error.h
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_error.h
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
diff --git a/drivers/s390/crypto/zcrypt_mono.c b/drivers/s390/crypto/zcrypt_mono.c
index f48b61a6126c9..2a9349ad68b7e 100644
--- a/drivers/s390/crypto/zcrypt_mono.c
+++ b/drivers/s390/crypto/zcrypt_mono.c
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_mono.c
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
diff --git a/drivers/s390/crypto/zcrypt_pcica.c b/drivers/s390/crypto/zcrypt_pcica.c
index 0ff56e86caae6..b6a4ecdc80257 100644
--- a/drivers/s390/crypto/zcrypt_pcica.c
+++ b/drivers/s390/crypto/zcrypt_pcica.c
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_pcica.c
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
diff --git a/drivers/s390/crypto/zcrypt_pcica.h b/drivers/s390/crypto/zcrypt_pcica.h
index a08a4f8c33c95..3be11187f6df3 100644
--- a/drivers/s390/crypto/zcrypt_pcica.h
+++ b/drivers/s390/crypto/zcrypt_pcica.h
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_pcica.h
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
diff --git a/drivers/s390/crypto/zcrypt_pcicc.c b/drivers/s390/crypto/zcrypt_pcicc.c
index 900362983fec1..f295a403b29a9 100644
--- a/drivers/s390/crypto/zcrypt_pcicc.c
+++ b/drivers/s390/crypto/zcrypt_pcicc.c
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_pcicc.c
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
diff --git a/drivers/s390/crypto/zcrypt_pcicc.h b/drivers/s390/crypto/zcrypt_pcicc.h
index 027bafc7312a5..6d4454846c8f9 100644
--- a/drivers/s390/crypto/zcrypt_pcicc.h
+++ b/drivers/s390/crypto/zcrypt_pcicc.h
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_pcicc.h
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c
index 6064cf58be43b..2da8b9381407e 100644
--- a/drivers/s390/crypto/zcrypt_pcixcc.c
+++ b/drivers/s390/crypto/zcrypt_pcixcc.c
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_pcixcc.c
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
@@ -60,6 +60,15 @@
 
 #define PCIXCC_CLEANUP_TIME	(15*HZ)
 
+#define CEIL4(x) ((((x)+3)/4)*4)
+
+struct response_type {
+	struct completion work;
+	int type;
+};
+#define PCIXCC_RESPONSE_TYPE_ICA  0
+#define PCIXCC_RESPONSE_TYPE_XCRB 1
+
 static struct ap_device_id zcrypt_pcixcc_ids[] = {
 	{ AP_DEVICE(AP_DEVICE_TYPE_PCIXCC) },
 	{ AP_DEVICE(AP_DEVICE_TYPE_CEX2C) },
@@ -243,6 +252,108 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_device *zdev,
 	return 0;
 }
 
+/**
+ * Convert a XCRB message to a type6 CPRB message.
+ *
+ * @zdev: crypto device pointer
+ * @ap_msg: pointer to AP message
+ * @xcRB: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+struct type86_fmt2_msg {
+	struct type86_hdr hdr;
+	struct type86_fmt2_ext fmt2;
+} __attribute__((packed));
+
+static int XCRB_msg_to_type6CPRB_msgX(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ica_xcRB *xcRB)
+{
+	static struct type6_hdr static_type6_hdrX = {
+		.type		=  0x06,
+		.offset1	=  0x00000058,
+	};
+	struct {
+		struct type6_hdr hdr;
+		struct ica_CPRBX cprbx;
+	} __attribute__((packed)) *msg = ap_msg->message;
+
+	int rcblen = CEIL4(xcRB->request_control_blk_length);
+	int replylen;
+	char *req_data = ap_msg->message + sizeof(struct type6_hdr) + rcblen;
+	char *function_code;
+
+	/* length checks */
+	ap_msg->length = sizeof(struct type6_hdr) +
+		CEIL4(xcRB->request_control_blk_length) +
+		xcRB->request_data_length;
+	if (ap_msg->length > PCIXCC_MAX_XCRB_MESSAGE_SIZE) {
+		PRINTK("Combined message is too large (%ld/%d/%d).\n",
+		    sizeof(struct type6_hdr),
+		    xcRB->request_control_blk_length,
+		    xcRB->request_data_length);
+		return -EFAULT;
+	}
+	if (CEIL4(xcRB->reply_control_blk_length) >
+	    PCIXCC_MAX_XCRB_REPLY_SIZE) {
+		PDEBUG("Reply CPRB length is too large (%d).\n",
+		    xcRB->request_control_blk_length);
+		return -EFAULT;
+	}
+	if (CEIL4(xcRB->reply_data_length) > PCIXCC_MAX_XCRB_DATA_SIZE) {
+		PDEBUG("Reply data block length is too large (%d).\n",
+		    xcRB->reply_data_length);
+		return -EFAULT;
+	}
+	replylen = CEIL4(xcRB->reply_control_blk_length) +
+		CEIL4(xcRB->reply_data_length) +
+		sizeof(struct type86_fmt2_msg);
+	if (replylen > PCIXCC_MAX_XCRB_RESPONSE_SIZE) {
+		PDEBUG("Reply CPRB + data block > PCIXCC_MAX_XCRB_RESPONSE_SIZE"
+		       " (%d/%d/%d).\n",
+		       sizeof(struct type86_fmt2_msg),
+		       xcRB->reply_control_blk_length,
+		       xcRB->reply_data_length);
+		xcRB->reply_control_blk_length = PCIXCC_MAX_XCRB_RESPONSE_SIZE -
+			(sizeof(struct type86_fmt2_msg) +
+			    CEIL4(xcRB->reply_data_length));
+		PDEBUG("Capping Reply CPRB length at %d\n",
+		       xcRB->reply_control_blk_length);
+	}
+
+	/* prepare type6 header */
+	msg->hdr = static_type6_hdrX;
+	memcpy(msg->hdr.agent_id , &(xcRB->agent_ID), sizeof(xcRB->agent_ID));
+	msg->hdr.ToCardLen1 = xcRB->request_control_blk_length;
+	if (xcRB->request_data_length) {
+		msg->hdr.offset2 = msg->hdr.offset1 + rcblen;
+		msg->hdr.ToCardLen2 = xcRB->request_data_length;
+	}
+	msg->hdr.FromCardLen1 = xcRB->reply_control_blk_length;
+	msg->hdr.FromCardLen2 = xcRB->reply_data_length;
+
+	/* prepare CPRB */
+	if (copy_from_user(&(msg->cprbx), xcRB->request_control_blk_addr,
+		    xcRB->request_control_blk_length))
+		return -EFAULT;
+	if (msg->cprbx.cprb_len + sizeof(msg->hdr.function_code) >
+	    xcRB->request_control_blk_length) {
+		PDEBUG("cprb_len too large (%d/%d)\n", msg->cprbx.cprb_len,
+		    xcRB->request_control_blk_length);
+		return -EFAULT;
+	}
+	function_code = ((unsigned char *)&msg->cprbx) + msg->cprbx.cprb_len;
+	memcpy(msg->hdr.function_code, function_code, sizeof(msg->hdr.function_code));
+
+	/* copy data block */
+	if (xcRB->request_data_length &&
+	    copy_from_user(req_data, xcRB->request_data_address,
+		xcRB->request_data_length))
+		return -EFAULT;
+	return 0;
+}
+
 /**
  * Copy results from a type 86 ICA reply message back to user space.
  *
@@ -363,6 +474,37 @@ static int convert_type86_ica(struct zcrypt_device *zdev,
 	return 0;
 }
 
+/**
+ * Copy results from a type 86 XCRB reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @xcRB: pointer to XCRB
+ *
+ * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
+ */
+static int convert_type86_xcrb(struct zcrypt_device *zdev,
+			       struct ap_message *reply,
+			       struct ica_xcRB *xcRB)
+{
+	struct type86_fmt2_msg *msg = reply->message;
+	char *data = reply->message;
+
+	/* Copy CPRB to user */
+	if (copy_to_user(xcRB->reply_control_blk_addr,
+		data + msg->fmt2.offset1, msg->fmt2.count1))
+		return -EFAULT;
+	xcRB->reply_control_blk_length = msg->fmt2.count1;
+
+	/* Copy data buffer to user */
+	if (msg->fmt2.count2)
+		if (copy_to_user(xcRB->reply_data_addr,
+			data + msg->fmt2.offset2, msg->fmt2.count2))
+			return -EFAULT;
+	xcRB->reply_data_length = msg->fmt2.count2;
+	return 0;
+}
+
 static int convert_response_ica(struct zcrypt_device *zdev,
 			    struct ap_message *reply,
 			    char __user *outputdata,
@@ -391,6 +533,36 @@ static int convert_response_ica(struct zcrypt_device *zdev,
 	}
 }
 
+static int convert_response_xcrb(struct zcrypt_device *zdev,
+			    struct ap_message *reply,
+			    struct ica_xcRB *xcRB)
+{
+	struct type86x_reply *msg = reply->message;
+
+	/* Response type byte is the second byte in the response. */
+	switch (((unsigned char *) reply->message)[1]) {
+	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
+		xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
+		return convert_error(zdev, reply);
+	case TYPE86_RSP_CODE:
+		if (msg->hdr.reply_code) {
+			memcpy(&(xcRB->status), msg->fmt2.apfs, sizeof(u32));
+			return convert_error(zdev, reply);
+		}
+		if (msg->cprbx.cprb_ver_id == 0x02)
+			return convert_type86_xcrb(zdev, reply, xcRB);
+		/* no break, incorrect cprb version is an unknown response */
+	default: /* Unknown response type, this should NEVER EVER happen */
+		PRINTK("Unrecognized Message Header: %08x%08x\n",
+		       *(unsigned int *) reply->message,
+		       *(unsigned int *) (reply->message+4));
+		xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
 /**
  * This function is called from the AP bus code after a crypto request
  * "msg" has finished with the reply message "reply".
@@ -407,6 +579,8 @@ static void zcrypt_pcixcc_receive(struct ap_device *ap_dev,
 		.type = TYPE82_RSP_CODE,
 		.reply_code = REP82_ERROR_MACHINE_FAILURE,
 	};
+	struct response_type *resp_type =
+		(struct response_type *) msg->private;
 	struct type86x_reply *t86r = reply->message;
 	int length;
 
@@ -415,12 +589,27 @@ static void zcrypt_pcixcc_receive(struct ap_device *ap_dev,
 		memcpy(msg->message, &error_reply, sizeof(error_reply));
 	else if (t86r->hdr.type == TYPE86_RSP_CODE &&
 		 t86r->cprbx.cprb_ver_id == 0x02) {
-		length = sizeof(struct type86x_reply) + t86r->length - 2;
-		length = min(PCIXCC_MAX_ICA_RESPONSE_SIZE, length);
-		memcpy(msg->message, reply->message, length);
+		switch (resp_type->type) {
+		case PCIXCC_RESPONSE_TYPE_ICA:
+			length = sizeof(struct type86x_reply)
+				+ t86r->length - 2;
+			length = min(PCIXCC_MAX_ICA_RESPONSE_SIZE, length);
+			memcpy(msg->message, reply->message, length);
+			break;
+		case PCIXCC_RESPONSE_TYPE_XCRB:
+			length = t86r->fmt2.offset2 + t86r->fmt2.count2;
+			length = min(PCIXCC_MAX_XCRB_RESPONSE_SIZE, length);
+			memcpy(msg->message, reply->message, length);
+			break;
+		default:
+			PRINTK("Invalid internal response type: %i\n",
+			    resp_type->type);
+			memcpy(msg->message, &error_reply,
+			    sizeof error_reply);
+		}
 	} else
 		memcpy(msg->message, reply->message, sizeof error_reply);
-	complete((struct completion *) msg->private);
+	complete(&(resp_type->work));
 }
 
 static atomic_t zcrypt_step = ATOMIC_INIT(0);
@@ -436,7 +625,9 @@ static long zcrypt_pcixcc_modexpo(struct zcrypt_device *zdev,
 				  struct ica_rsa_modexpo *mex)
 {
 	struct ap_message ap_msg;
-	struct completion work;
+	struct response_type resp_type = {
+		.type = PCIXCC_RESPONSE_TYPE_ICA,
+	};
 	int rc;
 
 	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
@@ -444,14 +635,14 @@ static long zcrypt_pcixcc_modexpo(struct zcrypt_device *zdev,
 		return -ENOMEM;
 	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
 				atomic_inc_return(&zcrypt_step);
-	ap_msg.private = &work;
+	ap_msg.private = &resp_type;
 	rc = ICAMEX_msg_to_type6MEX_msgX(zdev, &ap_msg, mex);
 	if (rc)
 		goto out_free;
-	init_completion(&work);
+	init_completion(&resp_type.work);
 	ap_queue_message(zdev->ap_dev, &ap_msg);
 	rc = wait_for_completion_interruptible_timeout(
-				&work, PCIXCC_CLEANUP_TIME);
+				&resp_type.work, PCIXCC_CLEANUP_TIME);
 	if (rc > 0)
 		rc = convert_response_ica(zdev, &ap_msg, mex->outputdata,
 					  mex->outputdatalength);
@@ -478,7 +669,9 @@ static long zcrypt_pcixcc_modexpo_crt(struct zcrypt_device *zdev,
 				      struct ica_rsa_modexpo_crt *crt)
 {
 	struct ap_message ap_msg;
-	struct completion work;
+	struct response_type resp_type = {
+		.type = PCIXCC_RESPONSE_TYPE_ICA,
+	};
 	int rc;
 
 	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
@@ -486,14 +679,14 @@ static long zcrypt_pcixcc_modexpo_crt(struct zcrypt_device *zdev,
 		return -ENOMEM;
 	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
 				atomic_inc_return(&zcrypt_step);
-	ap_msg.private = &work;
+	ap_msg.private = &resp_type;
 	rc = ICACRT_msg_to_type6CRT_msgX(zdev, &ap_msg, crt);
 	if (rc)
 		goto out_free;
-	init_completion(&work);
+	init_completion(&resp_type.work);
 	ap_queue_message(zdev->ap_dev, &ap_msg);
 	rc = wait_for_completion_interruptible_timeout(
-				&work, PCIXCC_CLEANUP_TIME);
+				&resp_type.work, PCIXCC_CLEANUP_TIME);
 	if (rc > 0)
 		rc = convert_response_ica(zdev, &ap_msg, crt->outputdata,
 					  crt->outputdatalength);
@@ -509,12 +702,56 @@ static long zcrypt_pcixcc_modexpo_crt(struct zcrypt_device *zdev,
 	return rc;
 }
 
+/**
+ * The request distributor calls this function if it picked the PCIXCC/CEX2C
+ * device to handle a send_cprb request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCIXCC/CEX2C device to the request distributor
+ * @xcRB: pointer to the send_cprb request buffer
+ */
+long zcrypt_pcixcc_send_cprb(struct zcrypt_device *zdev, struct ica_xcRB *xcRB)
+{
+	struct ap_message ap_msg;
+	struct response_type resp_type = {
+		.type = PCIXCC_RESPONSE_TYPE_XCRB,
+	};
+	int rc;
+
+	ap_msg.message = (void *) kmalloc(PCIXCC_MAX_XCRB_MESSAGE_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &resp_type;
+	rc = XCRB_msg_to_type6CPRB_msgX(zdev, &ap_msg, xcRB);
+	if (rc)
+		goto out_free;
+	init_completion(&resp_type.work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&resp_type.work, PCIXCC_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response_xcrb(zdev, &ap_msg, xcRB);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	memset(ap_msg.message, 0x0, ap_msg.length);
+	kfree(ap_msg.message);
+	return rc;
+}
+
 /**
  * The crypto operations for a PCIXCC/CEX2C card.
  */
 static struct zcrypt_ops zcrypt_pcixcc_ops = {
 	.rsa_modexpo = zcrypt_pcixcc_modexpo,
 	.rsa_modexpo_crt = zcrypt_pcixcc_modexpo_crt,
+	.send_cprb = zcrypt_pcixcc_send_cprb,
 };
 
 /**
diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h
index d4c44c4d7ad09..a78ff307fd191 100644
--- a/drivers/s390/crypto/zcrypt_pcixcc.h
+++ b/drivers/s390/crypto/zcrypt_pcixcc.h
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/s390/crypto/zcrypt_pcixcc.h
  *
- *  zcrypt 2.0.0
+ *  zcrypt 2.1.0
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
diff --git a/include/asm-s390/zcrypt.h b/include/asm-s390/zcrypt.h
index 0d6a3e2a3349c..7244c68464f24 100644
--- a/include/asm-s390/zcrypt.h
+++ b/include/asm-s390/zcrypt.h
@@ -1,7 +1,7 @@
 /*
  *  include/asm-s390/zcrypt.h
  *
- *  zcrypt 2.0.0 (user-visible header)
+ *  zcrypt 2.1.0 (user-visible header)
  *
  *  Copyright (C)  2001, 2006 IBM Corporation
  *  Author(s): Robert Burroughs
@@ -79,6 +79,83 @@ struct ica_rsa_modexpo_crt {
 	char __user *	u_mult_inv;
 };
 
+/**
+ * CPRBX
+ *	  Note that all shorts and ints are big-endian.
+ *	  All pointer fields are 16 bytes long, and mean nothing.
+ *
+ *	  A request CPRB is followed by a request_parameter_block.
+ *
+ *	  The request (or reply) parameter block is organized thus:
+ *	    function code
+ *	    VUD block
+ *	    key block
+ */
+struct ica_CPRBX {
+	unsigned short	cprb_len;	/* CPRB length	      220	 */
+	unsigned char	cprb_ver_id;	/* CPRB version id.   0x02	 */
+	unsigned char	pad_000[3];	/* Alignment pad bytes		 */
+	unsigned char	func_id[2];	/* function id	      0x5432	 */
+	unsigned char	cprb_flags[4];	/* Flags			 */
+	unsigned int	req_parml;	/* request parameter buffer len	 */
+	unsigned int	req_datal;	/* request data buffer		 */
+	unsigned int	rpl_msgbl;	/* reply  message block length	 */
+	unsigned int	rpld_parml;	/* replied parameter block len	 */
+	unsigned int	rpl_datal;	/* reply data block len		 */
+	unsigned int	rpld_datal;	/* replied data block len	 */
+	unsigned int	req_extbl;	/* request extension block len	 */
+	unsigned char	pad_001[4];	/* reserved			 */
+	unsigned int	rpld_extbl;	/* replied extension block len	 */
+	unsigned char	padx000[16 - sizeof (char *)];
+	unsigned char *	req_parmb;	/* request parm block 'address'	 */
+	unsigned char	padx001[16 - sizeof (char *)];
+	unsigned char *	req_datab;	/* request data block 'address'	 */
+	unsigned char	padx002[16 - sizeof (char *)];
+	unsigned char *	rpl_parmb;	/* reply parm block 'address'	 */
+	unsigned char	padx003[16 - sizeof (char *)];
+	unsigned char *	rpl_datab;	/* reply data block 'address'	 */
+	unsigned char	padx004[16 - sizeof (char *)];
+	unsigned char *	req_extb;	/* request extension block 'addr'*/
+	unsigned char	padx005[16 - sizeof (char *)];
+	unsigned char *	rpl_extb;	/* reply extension block 'addres'*/
+	unsigned short	ccp_rtcode;	/* server return code		 */
+	unsigned short	ccp_rscode;	/* server reason code		 */
+	unsigned int	mac_data_len;	/* Mac Data Length		 */
+	unsigned char	logon_id[8];	/* Logon Identifier		 */
+	unsigned char	mac_value[8];	/* Mac Value			 */
+	unsigned char	mac_content_flgs;/* Mac content flag byte	 */
+	unsigned char	pad_002;	/* Alignment			 */
+	unsigned short	domain;		/* Domain			 */
+	unsigned char	usage_domain[4];/* Usage domain			 */
+	unsigned char	cntrl_domain[4];/* Control domain		 */
+	unsigned char	S390enf_mask[4];/* S/390 enforcement mask	 */
+	unsigned char	pad_004[36];	/* reserved			 */
+};
+
+/**
+ * xcRB
+ */
+struct ica_xcRB {
+	unsigned short	agent_ID;
+	unsigned int	user_defined;
+	unsigned short	request_ID;
+	unsigned int	request_control_blk_length;
+	unsigned char	padding1[16 - sizeof (char *)];
+	char __user *	request_control_blk_addr;
+	unsigned int	request_data_length;
+	char		padding2[16 - sizeof (char *)];
+	char __user *	request_data_address;
+	unsigned int	reply_control_blk_length;
+	char		padding3[16 - sizeof (char *)];
+	char __user *	reply_control_blk_addr;
+	unsigned int	reply_data_length;
+	char		padding4[16 - sizeof (char *)];
+	char __user *	reply_data_addr;
+	unsigned short	priority_window;
+	unsigned int	status;
+} __attribute__((packed));
+#define AUTOSELECT ((unsigned int)0xFFFFFFFF)
+
 #define ZCRYPT_IOCTL_MAGIC 'z'
 
 /**
@@ -187,6 +264,7 @@ struct ica_rsa_modexpo_crt {
  */
 #define ICARSAMODEXPO	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
 #define ICARSACRT	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
+#define ZSECSENDCPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
 
 /* New status calls */
 #define Z90STAT_TOTALCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
-- 
GitLab


From 4ba069b802c29eee066385f9826e2d83716626b4 Mon Sep 17 00:00:00 2001
From: Michael Grundy <grundym@us.ibm.com>
Date: Wed, 20 Sep 2006 15:58:39 +0200
Subject: [PATCH 0228/1063] [S390] add kprobes support.

Signed-off-by: Michael Grundy <grundym@us.ibm.com>
Signed-off-by: David Wilder <dwilder@us.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig              |  14 +
 arch/s390/kernel/Makefile      |   1 +
 arch/s390/kernel/entry.S       |  12 +
 arch/s390/kernel/entry64.S     |  12 +
 arch/s390/kernel/kprobes.c     | 657 +++++++++++++++++++++++++++++++++
 arch/s390/kernel/traps.c       |  31 +-
 arch/s390/kernel/vmlinux.lds.S |   1 +
 arch/s390/mm/fault.c           |  40 +-
 include/asm-s390/kdebug.h      |  59 +++
 include/asm-s390/kprobes.h     | 114 ++++++
 10 files changed, 937 insertions(+), 4 deletions(-)
 create mode 100644 arch/s390/kernel/kprobes.c
 create mode 100644 include/asm-s390/kdebug.h
 create mode 100644 include/asm-s390/kprobes.h

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2f4f70c4dbb29..76122ce1e6cbc 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -487,8 +487,22 @@ source "drivers/net/Kconfig"
 
 source "fs/Kconfig"
 
+menu "Instrumentation Support"
+
 source "arch/s390/oprofile/Kconfig"
 
+config KPROBES
+	bool "Kprobes (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && MODULES
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+	  execute a callback function.	register_kprobe() establishes
+	  a probepoint and specifies the callback.  Kprobes is useful
+	  for kernel debugging, non-intrusive instrumentation and testing.
+	  If in doubt, say "N".
+
+endmenu
+
 source "arch/s390/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 9a33ed6ca6960..33a5069c0e161 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o \
 
 obj-$(CONFIG_VIRT_TIMER)	+= vtime.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
 
 # Kexec part
 S390_KEXEC_OBJS := machine_kexec.o crash.o
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 5b5799ac8f839..0c712b78a7e82 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -505,6 +505,8 @@ pgm_no_vtime2:
 	mvc	__THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
 	oi	__TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	tm	SP_PSW+1(%r15),0x01	# kernel per event ?
+	bz	BASED(kernel_per)
 	l	%r3,__LC_PGM_ILC	 # load program interruption code
 	la	%r8,0x7f
 	nr	%r8,%r3                  # clear per-event-bit and ilc
@@ -536,6 +538,16 @@ pgm_no_vtime3:
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	b	BASED(sysc_do_svc)
 
+#
+# per was called from kernel, must be kprobes
+#
+kernel_per:
+	mvi	SP_TRAP+1(%r15),0x28	# set trap indication to pgm check
+	la	%r2,SP_PTREGS(%r15)	# address of register-save area
+	l	%r1,BASED(.Lhandle_per)	# load adr. of per handler
+	la	%r14,BASED(sysc_leave)	# load adr. of system return
+	br	%r1			# branch to do_single_step
+
 /*
  * IO interrupt handler routine
  */
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 56f5f613b868f..8b956d1538f55 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -518,6 +518,8 @@ pgm_no_vtime2:
 #endif
 	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
 	lg	%r1,__TI_task(%r9)
+	tm	SP_PSW+1(%r15),0x01	# kernel per event ?
+	jz	kernel_per
 	mvc	__THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID
 	mvc	__THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
@@ -553,6 +555,16 @@ pgm_no_vtime3:
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	j	sysc_do_svc
 
+#
+# per was called from kernel, must be kprobes
+#
+kernel_per:
+	lhi	%r0,__LC_PGM_OLD_PSW
+	sth	%r0,SP_TRAP(%r15)	# set trap indication to pgm check
+	la	%r2,SP_PTREGS(%r15)	# address of register-save area
+	larl	%r14,sysc_leave		# load adr. of system ret, no work
+	jg	do_single_step		# branch to do_single_step
+
 /*
  * IO interrupt handler routine
  */
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
new file mode 100644
index 0000000000000..ca28fb0b3790f
--- /dev/null
+++ b/arch/s390/kernel/kprobes.c
@@ -0,0 +1,657 @@
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2006
+ *
+ * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
+ */
+
+#include <linux/config.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/stop_machine.h>
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+#include <asm/sections.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	/* Make sure the probe isn't going on a difficult instruction */
+	if (is_prohibited_opcode((kprobe_opcode_t *) p->addr))
+		return -EINVAL;
+
+	if ((unsigned long)p->addr & 0x01) {
+		printk("Attempt to register kprobe at an unaligned address\n");
+		return -EINVAL;
+		}
+
+	/* Use the get_insn_slot() facility for correctness */
+	if (!(p->ainsn.insn = get_insn_slot()))
+		return -ENOMEM;
+
+	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+
+	get_instruction_type(&p->ainsn);
+	p->opcode = *p->addr;
+	return 0;
+}
+
+int __kprobes is_prohibited_opcode(kprobe_opcode_t *instruction)
+{
+	switch (*(__u8 *) instruction) {
+	case 0x0c:	/* bassm */
+	case 0x0b:	/* bsm	 */
+	case 0x83:	/* diag  */
+	case 0x44:	/* ex	 */
+		return -EINVAL;
+	}
+	switch (*(__u16 *) instruction) {
+	case 0x0101:	/* pr	 */
+	case 0xb25a:	/* bsa	 */
+	case 0xb240:	/* bakr  */
+	case 0xb258:	/* bsg	 */
+	case 0xb218:	/* pc	 */
+	case 0xb228:	/* pt	 */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void __kprobes get_instruction_type(struct arch_specific_insn *ainsn)
+{
+	/* default fixup method */
+	ainsn->fixup = FIXUP_PSW_NORMAL;
+
+	/* save r1 operand */
+	ainsn->reg = (*ainsn->insn & 0xf0) >> 4;
+
+	/* save the instruction length (pop 5-5) in bytes */
+	switch (*(__u8 *) (ainsn->insn) >> 4) {
+	case 0:
+		ainsn->ilen = 2;
+		break;
+	case 1:
+	case 2:
+		ainsn->ilen = 4;
+		break;
+	case 3:
+		ainsn->ilen = 6;
+		break;
+	}
+
+	switch (*(__u8 *) ainsn->insn) {
+	case 0x05:	/* balr	*/
+	case 0x0d:	/* basr */
+		ainsn->fixup = FIXUP_RETURN_REGISTER;
+		/* if r2 = 0, no branch will be taken */
+		if ((*ainsn->insn & 0x0f) == 0)
+			ainsn->fixup |= FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x06:	/* bctr	*/
+	case 0x07:	/* bcr	*/
+		ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x45:	/* bal	*/
+	case 0x4d:	/* bas	*/
+		ainsn->fixup = FIXUP_RETURN_REGISTER;
+		break;
+	case 0x47:	/* bc	*/
+	case 0x46:	/* bct	*/
+	case 0x86:	/* bxh	*/
+	case 0x87:	/* bxle	*/
+		ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x82:	/* lpsw	*/
+		ainsn->fixup = FIXUP_NOT_REQUIRED;
+		break;
+	case 0xb2:	/* lpswe */
+		if (*(((__u8 *) ainsn->insn) + 1) == 0xb2) {
+			ainsn->fixup = FIXUP_NOT_REQUIRED;
+		}
+		break;
+	case 0xa7:	/* bras	*/
+		if ((*ainsn->insn & 0x0f) == 0x05) {
+			ainsn->fixup |= FIXUP_RETURN_REGISTER;
+		}
+		break;
+	case 0xc0:
+		if ((*ainsn->insn & 0x0f) == 0x00  /* larl  */
+			|| (*ainsn->insn & 0x0f) == 0x05) /* brasl */
+		ainsn->fixup |= FIXUP_RETURN_REGISTER;
+		break;
+	case 0xeb:
+		if (*(((__u8 *) ainsn->insn) + 5 ) == 0x44 ||	/* bxhg  */
+			*(((__u8 *) ainsn->insn) + 5) == 0x45) {/* bxleg */
+			ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+		}
+		break;
+	case 0xe3:	/* bctg	*/
+		if (*(((__u8 *) ainsn->insn) + 5) == 0x46) {
+			ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+		}
+		break;
+	}
+}
+
+static int __kprobes swap_instruction(void *aref)
+{
+	struct ins_replace_args *args = aref;
+	int err = -EFAULT;
+
+	asm volatile(
+		"0: mvc  0(2,%2),0(%3)\n"
+		"1: la   %0,0\n"
+		"2:\n"
+		EX_TABLE(0b,2b)
+		: "+d" (err), "=m" (*args->ptr)
+		: "a" (args->ptr), "a" (&args->new), "m" (args->new));
+	return err;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long status = kcb->kprobe_status;
+	struct ins_replace_args args;
+
+	args.ptr = p->addr;
+	args.old = p->opcode;
+	args.new = BREAKPOINT_INSTRUCTION;
+
+	kcb->kprobe_status = KPROBE_SWAP_INST;
+	stop_machine_run(swap_instruction, &args, NR_CPUS);
+	kcb->kprobe_status = status;
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long status = kcb->kprobe_status;
+	struct ins_replace_args args;
+
+	args.ptr = p->addr;
+	args.old = BREAKPOINT_INSTRUCTION;
+	args.new = p->opcode;
+
+	kcb->kprobe_status = KPROBE_SWAP_INST;
+	stop_machine_run(swap_instruction, &args, NR_CPUS);
+	kcb->kprobe_status = status;
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	mutex_lock(&kprobe_mutex);
+	free_insn_slot(p->ainsn.insn);
+	mutex_unlock(&kprobe_mutex);
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	per_cr_bits kprobe_per_regs[1];
+
+	memset(kprobe_per_regs, 0, sizeof(per_cr_bits));
+	regs->psw.addr = (unsigned long)p->ainsn.insn | PSW_ADDR_AMODE;
+
+	/* Set up the per control reg info, will pass to lctl */
+	kprobe_per_regs[0].em_instruction_fetch = 1;
+	kprobe_per_regs[0].starting_addr = (unsigned long)p->ainsn.insn;
+	kprobe_per_regs[0].ending_addr = (unsigned long)p->ainsn.insn + 1;
+
+	/* Set the PER control regs, turns on single step for this address */
+	__ctl_load(kprobe_per_regs, 9, 11);
+	regs->psw.mask |= PSW_MASK_PER;
+	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK);
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+	kcb->prev_kprobe.kprobe_saved_imask = kcb->kprobe_saved_imask;
+	memcpy(kcb->prev_kprobe.kprobe_saved_ctl, kcb->kprobe_saved_ctl,
+					sizeof(kcb->kprobe_saved_ctl));
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+	kcb->kprobe_saved_imask = kcb->prev_kprobe.kprobe_saved_imask;
+	memcpy(kcb->kprobe_saved_ctl, kcb->prev_kprobe.kprobe_saved_ctl,
+					sizeof(kcb->kprobe_saved_ctl));
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+						struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = p;
+	/* Save the interrupt and per flags */
+	kcb->kprobe_saved_imask = regs->psw.mask &
+	    (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK);
+	/* Save the control regs that govern PER */
+	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+}
+
+/* Called with kretprobe_lock held */
+void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+					struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri;
+
+	if ((ri = get_free_rp_inst(rp)) != NULL) {
+		ri->rp = rp;
+		ri->task = current;
+		ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
+
+		/* Replace the return addr with trampoline addr */
+		regs->gprs[14] = (unsigned long)&kretprobe_trampoline;
+
+		add_rp_inst(ri);
+	} else {
+		rp->nmissed++;
+	}
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	int ret = 0;
+	unsigned long *addr = (unsigned long *)
+		((regs->psw.addr & PSW_ADDR_INSN) - 2);
+	struct kprobe_ctlblk *kcb;
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+
+	/* Check we're not actually recursing */
+	if (kprobe_running()) {
+		p = get_kprobe(addr);
+		if (p) {
+			if (kcb->kprobe_status == KPROBE_HIT_SS &&
+			    *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
+				regs->psw.mask &= ~PSW_MASK_PER;
+				regs->psw.mask |= kcb->kprobe_saved_imask;
+				goto no_kprobe;
+			}
+			/* We have reentered the kprobe_handler(), since
+			 * another probe was hit while within the handler.
+			 * We here save the original kprobes variables and
+			 * just single step on the instruction of the new probe
+			 * without calling any user handlers.
+			 */
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p, regs, kcb);
+			kprobes_inc_nmissed_count(p);
+			prepare_singlestep(p, regs);
+			kcb->kprobe_status = KPROBE_REENTER;
+			return 1;
+		} else {
+			p = __get_cpu_var(current_kprobe);
+			if (p->break_handler && p->break_handler(p, regs)) {
+				goto ss_probe;
+			}
+		}
+		goto no_kprobe;
+	}
+
+	p = get_kprobe(addr);
+	if (!p) {
+		if (*addr != BREAKPOINT_INSTRUCTION) {
+			/*
+			 * The breakpoint instruction was removed right
+			 * after we hit it.  Another cpu has removed
+			 * either a probepoint or a debugger breakpoint
+			 * at this address.  In either case, no further
+			 * handling of this interrupt is appropriate.
+			 *
+			 */
+			ret = 1;
+		}
+		/* Not one of ours: let kernel handle it */
+		goto no_kprobe;
+	}
+
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+	set_current_kprobe(p, regs, kcb);
+	if (p->pre_handler && p->pre_handler(p, regs))
+		/* handler has already set things up, so skip ss setup */
+		return 1;
+
+ss_probe:
+	prepare_singlestep(p, regs);
+	kcb->kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	preempt_enable_no_resched();
+	return ret;
+}
+
+/*
+ * Function return probe trampoline:
+ *	- init_kprobes() establishes a probepoint here
+ *	- When the probed function returns, this probe
+ *		causes the handlers to fire
+ */
+void __kprobes kretprobe_trampoline_holder(void)
+{
+	asm volatile(".global kretprobe_trampoline\n"
+		     "kretprobe_trampoline: bcr 0,0\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri = NULL;
+	struct hlist_head *head;
+	struct hlist_node *node, *tmp;
+	unsigned long flags, orig_ret_address = 0;
+	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+
+	spin_lock_irqsave(&kretprobe_lock, flags);
+	head = kretprobe_inst_table_head(current);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+	 *	 function, the first instance's ret_addr will point to the
+	 *	 real return address, and all the rest will point to
+	 *	 kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri);
+
+		if (orig_ret_address != trampoline_address) {
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+		}
+	}
+	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+
+	reset_current_kprobe();
+	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	preempt_enable_no_resched();
+
+	/*
+	 * By returning a non-zero value, we are telling
+	 * kprobe_handler() that we don't want the post_handler
+	 * to run (and have re-enabled preemption)
+	 */
+	return 1;
+}
+
+/*
+ * Called after single-stepping.  p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction.  To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.  The address of this
+ * copy is p->ainsn.insn.
+ */
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	regs->psw.addr &= PSW_ADDR_INSN;
+
+	if (p->ainsn.fixup & FIXUP_PSW_NORMAL)
+		regs->psw.addr = (unsigned long)p->addr +
+				((unsigned long)regs->psw.addr -
+				 (unsigned long)p->ainsn.insn);
+
+	if (p->ainsn.fixup & FIXUP_BRANCH_NOT_TAKEN)
+		if ((unsigned long)regs->psw.addr -
+		    (unsigned long)p->ainsn.insn == p->ainsn.ilen)
+			regs->psw.addr = (unsigned long)p->addr + p->ainsn.ilen;
+
+	if (p->ainsn.fixup & FIXUP_RETURN_REGISTER)
+		regs->gprs[p->ainsn.reg] = ((unsigned long)p->addr +
+						(regs->gprs[p->ainsn.reg] -
+						(unsigned long)p->ainsn.insn))
+						| PSW_ADDR_AMODE;
+
+	regs->psw.addr |= PSW_ADDR_AMODE;
+	/* turn off PER mode */
+	regs->psw.mask &= ~PSW_MASK_PER;
+	/* Restore the original per control regs */
+	__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+	regs->psw.mask |= kcb->kprobe_saved_imask;
+}
+
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
+		return 0;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	resume_execution(cur, regs);
+
+	/*Restore back the original saved kprobes variables and continue. */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+	reset_current_kprobe();
+out:
+	preempt_enable_no_resched();
+
+	/*
+	 * if somebody else is singlestepping across a probe point, psw mask
+	 * will have PER set, in which case, continue the remaining processing
+	 * of do_single_step, as if this is not a probe hit.
+	 */
+	if (regs->psw.mask & PSW_MASK_PER) {
+		return 0;
+	}
+
+	return 1;
+}
+
+static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	const struct exception_table_entry *entry;
+
+	switch(kcb->kprobe_status) {
+	case KPROBE_SWAP_INST:
+		/* We are here because the instruction replacement failed */
+		return 0;
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the nip points back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		regs->psw.addr = (unsigned long)cur->addr | PSW_ADDR_AMODE;
+		regs->psw.mask &= ~PSW_MASK_PER;
+		regs->psw.mask |= kcb->kprobe_saved_imask;
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
+		preempt_enable_no_resched();
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We increment the nmissed count for accounting,
+		 * we can also use npre/npostfault count for accouting
+		 * these specific fault cases.
+		 */
+		kprobes_inc_nmissed_count(cur);
+
+		/*
+		 * We come here because instructions in the pre/post
+		 * handler caused the page_fault, this could happen
+		 * if handler tries to access user space by
+		 * copy_from_user(), get_user() etc. Let the
+		 * user-specified handler try to fix it first.
+		 */
+		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+			return 1;
+
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+		if (entry) {
+			regs->psw.addr = entry->fixup | PSW_ADDR_AMODE;
+			return 1;
+		}
+
+		/*
+		 * fixup_exception() could not handle it,
+		 * Let do_page_fault() fix it.
+		 */
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_BPT:
+		if (kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (post_kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_TRAP:
+	case DIE_PAGE_FAULT:
+		/* kprobe_running() needs smp_processor_id() */
+		preempt_disable();
+		if (kprobe_running() &&
+		    kprobe_fault_handler(args->regs, args->trapnr))
+			ret = NOTIFY_STOP;
+		preempt_enable();
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	unsigned long addr;
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+	/* setup return addr to the jprobe handler routine */
+	regs->psw.addr = (unsigned long)(jp->entry) | PSW_ADDR_AMODE;
+
+	/* r14 is the function return address */
+	kcb->jprobe_saved_r14 = (unsigned long)regs->gprs[14];
+	/* r15 is the stack pointer */
+	kcb->jprobe_saved_r15 = (unsigned long)regs->gprs[15];
+	addr = (unsigned long)kcb->jprobe_saved_r15;
+
+	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *) addr,
+	       MIN_STACK_SIZE(addr));
+	return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+	asm volatile(".word 0x0002");
+}
+
+void __kprobes jprobe_return_end(void)
+{
+	asm volatile("bcr 0,0");
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_r15);
+
+	/* Put the regs back */
+	memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+	/* put the stack back */
+	memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
+	       MIN_STACK_SIZE(stack_addr));
+	preempt_enable_no_resched();
+	return 1;
+}
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) & kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	return register_kprobe(&trampoline_p);
+}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index bde1d1d598586..c4982c9634249 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/reboot.h>
+#include <linux/kprobes.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -39,6 +40,7 @@
 #include <asm/s390_ext.h>
 #include <asm/lowcore.h>
 #include <asm/debug.h>
+#include <asm/kdebug.h>
 
 /* Called from entry.S only */
 extern void handle_per_exception(struct pt_regs *regs);
@@ -74,6 +76,20 @@ static int kstack_depth_to_print = 12;
 static int kstack_depth_to_print = 20;
 #endif /* CONFIG_64BIT */
 
+ATOMIC_NOTIFIER_HEAD(s390die_chain);
+
+int register_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&s390die_chain, nb);
+}
+EXPORT_SYMBOL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&s390die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
+
 /*
  * For show_trace we have tree different stack to consider:
  *   - the panic stack which is used if the kernel stack has overflown
@@ -305,8 +321,9 @@ report_user_fault(long interruption_code, struct pt_regs *regs)
 #endif
 }
 
-static void inline do_trap(long interruption_code, int signr, char *str,
-                           struct pt_regs *regs, siginfo_t *info)
+static void __kprobes inline do_trap(long interruption_code, int signr,
+					char *str, struct pt_regs *regs,
+					siginfo_t *info)
 {
 	/*
 	 * We got all needed information from the lowcore and can
@@ -315,6 +332,10 @@ static void inline do_trap(long interruption_code, int signr, char *str,
         if (regs->psw.mask & PSW_MASK_PSTATE)
 		local_irq_enable();
 
+	if (notify_die(DIE_TRAP, str, regs, interruption_code,
+				interruption_code, signr) == NOTIFY_STOP)
+		return;
+
         if (regs->psw.mask & PSW_MASK_PSTATE) {
                 struct task_struct *tsk = current;
 
@@ -336,8 +357,12 @@ static inline void __user *get_check_address(struct pt_regs *regs)
 	return (void __user *)((regs->psw.addr-S390_lowcore.pgm_ilc) & PSW_ADDR_INSN);
 }
 
-void do_single_step(struct pt_regs *regs)
+void __kprobes do_single_step(struct pt_regs *regs)
 {
+	if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0,
+					SIGTRAP) == NOTIFY_STOP){
+		return;
+	}
 	if ((current->ptrace & PT_PTRACED) != 0)
 		force_sig(SIGTRAP, current);
 }
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index ff5f7bb34f75b..df0c16ab8e920 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -24,6 +24,7 @@ SECTIONS
 	*(.text)
 	SCHED_TEXT
 	LOCK_TEXT
+	KPROBES_TEXT
 	*(.fixup)
 	*(.gnu.warning)
 	} = 0x0700
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 7cd82575813d1..44f0cda7e72e4 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -25,10 +25,12 @@
 #include <linux/console.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
+#include <linux/kprobes.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include <asm/kdebug.h>
 
 #ifndef CONFIG_64BIT
 #define __FAIL_ADDR_MASK 0x7ffff000
@@ -48,6 +50,38 @@ extern int sysctl_userprocess_debug;
 
 extern void die(const char *,struct pt_regs *,long);
 
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, const char *str,
+			struct pt_regs *regs, long err, int trap, int sig)
+{
+	struct die_args args = {
+		.regs = regs,
+		.str = str,
+		.err = err,
+		.trapnr = trap,
+		.signr = sig
+	};
+	return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, const char *str,
+			struct pt_regs *regs, long err, int trap, int sig)
+{
+	return NOTIFY_DONE;
+}
+#endif
+
 extern spinlock_t timerlist_lock;
 
 /*
@@ -159,7 +193,7 @@ static void do_sigsegv(struct pt_regs *regs, unsigned long error_code,
  *   11       Page translation     ->  Not present       (nullification)
  *   3b       Region third trans.  ->  Not present       (nullification)
  */
-static inline void
+static inline void __kprobes
 do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection)
 {
         struct task_struct *tsk;
@@ -173,6 +207,10 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection)
         tsk = current;
         mm = tsk->mm;
 	
+	if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+					SIGSEGV) == NOTIFY_STOP)
+		return;
+
 	/* 
          * Check for low-address protection.  This needs to be treated
 	 * as a special case because the translation exception code 
diff --git a/include/asm-s390/kdebug.h b/include/asm-s390/kdebug.h
new file mode 100644
index 0000000000000..40cc68025e016
--- /dev/null
+++ b/include/asm-s390/kdebug.h
@@ -0,0 +1,59 @@
+#ifndef _S390_KDEBUG_H
+#define _S390_KDEBUG_H
+
+/*
+ * Feb 2006 Ported to s390 <grundym@us.ibm.com>
+ */
+#include <linux/notifier.h>
+
+struct pt_regs;
+
+struct die_args {
+	struct pt_regs *regs;
+	const char *str;
+	long err;
+	int trapnr;
+	int signr;
+};
+
+/* Note - you should never unregister because that can race with NMIs.
+ * If you really want to do it first unregister - then synchronize_sched
+ *  - then free.
+ */
+extern int register_die_notifier(struct notifier_block *);
+extern int unregister_die_notifier(struct notifier_block *);
+extern int register_page_fault_notifier(struct notifier_block *);
+extern int unregister_page_fault_notifier(struct notifier_block *);
+extern struct atomic_notifier_head s390die_chain;
+
+
+enum die_val {
+	DIE_OOPS = 1,
+	DIE_BPT,
+	DIE_SSTEP,
+	DIE_PANIC,
+	DIE_NMI,
+	DIE_DIE,
+	DIE_NMIWATCHDOG,
+	DIE_KERNELDEBUG,
+	DIE_TRAP,
+	DIE_GPF,
+	DIE_CALL,
+	DIE_NMI_IPI,
+	DIE_PAGE_FAULT,
+};
+
+static inline int notify_die(enum die_val val, const char *str,
+			struct pt_regs *regs, long err, int trap, int sig)
+{
+	struct die_args args = {
+		.regs = regs,
+		.str = str,
+		.err = err,
+		.trapnr = trap,
+		.signr = sig
+	};
+	return atomic_notifier_call_chain(&s390die_chain, val, &args);
+}
+
+#endif
diff --git a/include/asm-s390/kprobes.h b/include/asm-s390/kprobes.h
new file mode 100644
index 0000000000000..b847ff0ec3fad
--- /dev/null
+++ b/include/asm-s390/kprobes.h
@@ -0,0 +1,114 @@
+#ifndef _ASM_S390_KPROBES_H
+#define _ASM_S390_KPROBES_H
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2006
+ *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation ( includes suggestions from
+ *		Rusty Russell).
+ * 2004-Nov	Modified for PPC64 by Ananth N Mavinakayanahalli
+ *		<ananth@in.ibm.com>
+ * 2005-Dec	Used as a template for s390 by Mike Grundy
+ *		<grundym@us.ibm.com>
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define  __ARCH_WANT_KPROBES_INSN_SLOT
+struct pt_regs;
+struct kprobe;
+
+typedef u16 kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION	0x0002
+
+/* Maximum instruction size is 3 (16bit) halfwords: */
+#define MAX_INSN_SIZE		0x0003
+#define MAX_STACK_SIZE		64
+#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
+	(((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
+	? (MAX_STACK_SIZE) \
+	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
+
+#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)(pentry)
+
+#define ARCH_SUPPORTS_KRETPROBES
+#define ARCH_INACTIVE_KPROBE_COUNT 0
+
+#define KPROBE_SWAP_INST	0x10
+
+#define FIXUP_PSW_NORMAL	0x08
+#define FIXUP_BRANCH_NOT_TAKEN	0x04
+#define FIXUP_RETURN_REGISTER	0x02
+#define FIXUP_NOT_REQUIRED	0x01
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+	/* copy of original instruction */
+	kprobe_opcode_t *insn;
+	int fixup;
+	int ilen;
+	int reg;
+};
+
+struct ins_replace_args {
+	kprobe_opcode_t *ptr;
+	kprobe_opcode_t old;
+	kprobe_opcode_t new;
+};
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+	unsigned long saved_psw;
+	unsigned long kprobe_saved_imask;
+	unsigned long kprobe_saved_ctl[3];
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	unsigned long kprobe_saved_imask;
+	unsigned long kprobe_saved_ctl[3];
+	struct pt_regs jprobe_saved_regs;
+	unsigned long jprobe_saved_r14;
+	unsigned long jprobe_saved_r15;
+	struct prev_kprobe prev_kprobe;
+	kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
+};
+
+void arch_remove_kprobe(struct kprobe *p);
+void kretprobe_trampoline(void);
+int  is_prohibited_opcode(kprobe_opcode_t *instruction);
+void get_instruction_type(struct arch_specific_insn *ainsn);
+
+#define flush_insn_slot(p)	do { } while (0)
+
+#endif	/* _ASM_S390_KPROBES_H */
+
+#ifdef CONFIG_KPROBES
+
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+					unsigned long val, void *data);
+#else	/* !CONFIG_KPROBES */
+static inline int kprobe_exceptions_notify(struct notifier_block *self,
+						unsigned long val, void *data)
+{
+	return 0;
+}
+#endif
-- 
GitLab


From 65912a84c0f33304fa5ea004c7b6ee58d5f5572e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:41 +0200
Subject: [PATCH 0229/1063] [S390] initrd vs. bootmem bitmap.

Move initrd if the bitmap of the bootmem allocator would overwrite it.
In addition this patch sets the default size and address of the initrd to 0.
Therefore all boot loaders must set the initrd size and address correctly.
This is especially relevant for ftp boot via HMC/SE, where this change
requires a special patch file entry in the .ins file which sets these two
values contained at address 0x10408 and 0x10410.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/head.S   | 10 ++++++----
 arch/s390/kernel/head31.S |  4 ++--
 arch/s390/kernel/head64.S |  4 ++--
 arch/s390/kernel/setup.c  | 41 ++++++++++++++++++++++++++++++++++++---
 include/asm-s390/setup.h  |  2 --
 5 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index adad8863ee2f8..a6e9bdb535915 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -272,7 +272,7 @@ iplstart:
 # load parameter file from ipl device
 #
 .Lagain1:
- 	l     %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # ramdisk loc. is temp
+	l     %r2,.Linitrd		       # ramdisk loc. is temp
         bas   %r14,.Lloader                    # load parameter file
         ltr   %r2,%r2                          # got anything ?
         bz    .Lnopf
@@ -280,7 +280,7 @@ iplstart:
 	bnh   .Lnotrunc
 	la    %r2,895
 .Lnotrunc:
-	l     %r4,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
+	l     %r4,.Linitrd
 	clc   0(3,%r4),.L_hdr		       # if it is HDRx
 	bz    .Lagain1			       # skip dataset header
 	clc   0(3,%r4),.L_eof		       # if it is EOFx
@@ -323,14 +323,15 @@ iplstart:
 # load ramdisk from ipl device
 #	
 .Lagain2:
- 	l     %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # addr of ramdisk
+	l     %r2,.Linitrd		       # addr of ramdisk
+	st    %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
         bas   %r14,.Lloader                    # load ramdisk
  	st    %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of ramdisk
         ltr   %r2,%r2
         bnz   .Lrdcont
         st    %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found
 .Lrdcont:
-	l     %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
+	l     %r2,.Linitrd
 
 	clc   0(3,%r2),.L_hdr		       # skip HDRx and EOFx 
 	bz    .Lagain2
@@ -379,6 +380,7 @@ iplstart:
         l     %r1,.Lstartup
         br    %r1
 
+.Linitrd:.long _end + 0x400000		       # default address of initrd
 .Lparm:	.long  PARMAREA
 .Lstartup: .long startup
 .Lcvtab:.long  _ebcasc                         # ebcdic to ascii table
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index a4dc61f3285e3..0e46077d71405 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -26,8 +26,8 @@ startup:basr	%r13,0			# get base
 #
 	.org	PARMAREA
 	.long	0,0			# IPL_DEVICE
-	.long	0,RAMDISK_ORIGIN	# INITRD_START
-	.long	0,RAMDISK_SIZE		# INITRD_SIZE
+	.long	0,0			# INITRD_START
+	.long	0,0			# INITRD_SIZE
 
 	.org	COMMAND_LINE
 	.byte	"root=/dev/ram0 ro"
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 9d80c5b1ef958..3e0341acd04e4 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -26,8 +26,8 @@ startup:basr  %r13,0			 # get base
 #
 	.org   PARMAREA
 	.quad  0			# IPL_DEVICE
-	.quad  RAMDISK_ORIGIN		# INITRD_START
-	.quad  RAMDISK_SIZE		# INITRD_SIZE
+	.quad  0			# INITRD_START
+	.quad  0			# INITRD_SIZE
 
 	.org   COMMAND_LINE
 	.byte  "root=/dev/ram0 ro"
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c902f059c7aab..89051e8a5d8dd 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -37,6 +37,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/device.h>
 #include <linux/notifier.h>
+#include <linux/pfn.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -501,13 +502,47 @@ setup_memory(void)
 	 * partially used pages are not usable - thus
 	 * we are rounding upwards:
 	 */
-	start_pfn = (__pa(&_end) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	end_pfn = max_pfn = memory_end >> PAGE_SHIFT;
+	start_pfn = PFN_UP(__pa(&_end));
+	end_pfn = max_pfn = PFN_DOWN(memory_end);
 
 	/* Initialize storage key for kernel pages */
 	for (init_pfn = 0 ; init_pfn < start_pfn; init_pfn++)
 		page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY);
 
+#ifdef CONFIG_BLK_DEV_INITRD
+	/*
+	 * Move the initrd in case the bitmap of the bootmem allocater
+	 * would overwrite it.
+	 */
+
+	if (INITRD_START && INITRD_SIZE) {
+		unsigned long bmap_size;
+		unsigned long start;
+
+		bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1);
+		bmap_size = PFN_PHYS(bmap_size);
+
+		if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
+			start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;
+
+			if (start + INITRD_SIZE > memory_end) {
+				printk("initrd extends beyond end of memory "
+				       "(0x%08lx > 0x%08lx)\n"
+				       "disabling initrd\n",
+				       start + INITRD_SIZE, memory_end);
+				INITRD_START = INITRD_SIZE = 0;
+			} else {
+				printk("Moving initrd (0x%08lx -> 0x%08lx, "
+				       "size: %ld)\n",
+				       INITRD_START, start, INITRD_SIZE);
+				memmove((void *) start, (void *) INITRD_START,
+					INITRD_SIZE);
+				INITRD_START = start;
+			}
+		}
+	}
+#endif
+
 	/*
 	 * Initialize the boot-time allocator (with low memory only):
 	 */
@@ -559,7 +594,7 @@ setup_memory(void)
 	reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size);
 
 #ifdef CONFIG_BLK_DEV_INITRD
-	if (INITRD_START) {
+	if (INITRD_START && INITRD_SIZE) {
 		if (INITRD_START + INITRD_SIZE <= memory_end) {
 			reserve_bootmem(INITRD_START, INITRD_SIZE);
 			initrd_start = INITRD_START;
diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h
index 19e31979309a1..02c96d57f0cf2 100644
--- a/include/asm-s390/setup.h
+++ b/include/asm-s390/setup.h
@@ -14,8 +14,6 @@
 
 #define PARMAREA		0x10400
 #define COMMAND_LINE_SIZE 	896
-#define RAMDISK_ORIGIN		0x800000
-#define RAMDISK_SIZE		0x800000
 #define MEMORY_CHUNKS		16	/* max 0x7fff */
 #define IPL_PARMBLOCK_ORIGIN	0x2000
 
-- 
GitLab


From f19bfb2c9b8675590fbecb43e5ce3b34ee321185 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:44 +0200
Subject: [PATCH 0230/1063] [S390] hypfs comment cleanup.

Correct some comments in the hypervisor filesystem.

Signed-off-by: Michael Holzheu <holzheu@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/hypfs/hypfs.h      | 2 +-
 arch/s390/hypfs/hypfs_diag.c | 2 +-
 arch/s390/hypfs/hypfs_diag.h | 2 +-
 arch/s390/hypfs/inode.c      | 2 +-
 drivers/base/hypervisor.c    | 3 ++-
 5 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index ea5567be00fcf..f3dbd91965c6e 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -1,5 +1,5 @@
 /*
- *  fs/hypfs/hypfs.h
+ *  arch/s390/hypfs/hypfs.h
  *    Hypervisor filesystem for Linux on s390.
  *
  *    Copyright (C) IBM Corp. 2006
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 1785bce2b9196..874d761c9810a 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -1,5 +1,5 @@
 /*
- *  fs/hypfs/hypfs_diag.c
+ *  arch/s390/hypfs/hypfs_diag.c
  *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
  *    implementation.
  *
diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h
index 793dea6b9bb63..256b384aebe13 100644
--- a/arch/s390/hypfs/hypfs_diag.h
+++ b/arch/s390/hypfs/hypfs_diag.h
@@ -1,5 +1,5 @@
 /*
- *  fs/hypfs/hypfs_diag.h
+ *  arch/s390/hypfs_diag.h
  *    Hypervisor filesystem for Linux on s390.
  *
  *    Copyright (C) IBM Corp. 2006
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 18c091925ea5f..bdcad2ea1ff4b 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -1,5 +1,5 @@
 /*
- *  fs/hypfs/inode.c
+ *  arch/s390/hypfs/inode.c
  *    Hypervisor filesystem for Linux on s390.
  *
  *    Copyright (C) IBM Corp. 2006
diff --git a/drivers/base/hypervisor.c b/drivers/base/hypervisor.c
index 0c85e9d6a4485..7080b413ddc9f 100644
--- a/drivers/base/hypervisor.c
+++ b/drivers/base/hypervisor.c
@@ -1,8 +1,9 @@
 /*
  * hypervisor.c - /sys/hypervisor subsystem.
  *
- * This file is released under the GPLv2
+ * Copyright (C) IBM Corp. 2006
  *
+ * This file is released under the GPLv2
  */
 
 #include <linux/kobject.h>
-- 
GitLab


From 331c982d4a6b43cdc0d056956a1cae8a7d6237bf Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:47 +0200
Subject: [PATCH 0231/1063] [S390] hypfs compiler warnings.

Add casts to avoid compiler warnings.

Signed-off-by: Michael Holzheu <holzheu@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/hypfs/hypfs_diag.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 874d761c9810a..fee5aee605f6b 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -432,12 +432,14 @@ static int diag204_probe(void)
 
 	buf = diag204_get_buffer(INFO_EXT, &pages);
 	if (!IS_ERR(buf)) {
-		if (diag204(SUBC_STIB7 | INFO_EXT, pages, buf) >= 0) {
+		if (diag204((unsigned long)SUBC_STIB7 |
+			    (unsigned long)INFO_EXT, pages, buf) >= 0) {
 			diag204_store_sc = SUBC_STIB7;
 			diag204_info_type = INFO_EXT;
 			goto out;
 		}
-		if (diag204(SUBC_STIB6 | INFO_EXT, pages, buf) >= 0) {
+		if (diag204((unsigned long)SUBC_STIB6 |
+			    (unsigned long)INFO_EXT, pages, buf) >= 0) {
 			diag204_store_sc = SUBC_STIB7;
 			diag204_info_type = INFO_EXT;
 			goto out;
@@ -452,7 +454,8 @@ static int diag204_probe(void)
 		rc = PTR_ERR(buf);
 		goto fail_alloc;
 	}
-	if (diag204(SUBC_STIB4 | INFO_SIMPLE, pages, buf) >= 0) {
+	if (diag204((unsigned long)SUBC_STIB4 |
+		    (unsigned long)INFO_SIMPLE, pages, buf) >= 0) {
 		diag204_store_sc = SUBC_STIB4;
 		diag204_info_type = INFO_SIMPLE;
 		goto out;
@@ -476,7 +479,8 @@ static void *diag204_store(void)
 	buf = diag204_get_buffer(diag204_info_type, &pages);
 	if (IS_ERR(buf))
 		goto out;
-	if (diag204(diag204_store_sc | diag204_info_type, pages, buf) < 0)
+	if (diag204((unsigned long)diag204_store_sc |
+		    (unsigned long)diag204_info_type, pages, buf) < 0)
 		return ERR_PTR(-ENOSYS);
 out:
 	return buf;
-- 
GitLab


From ff6b8ea68f4b7353f88b97024f28127e2148aa00 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:49 +0200
Subject: [PATCH 0232/1063] [S390] ipl/dump on panic.

It is now possible to specify a ccw/fcp dump device which is used to
automatically create a system dump in case of a kernel panic. The dump
device can be configured under /sys/firmware/dump.
In addition it is now possible to specify a ccw/fcp device which is used
for the next reboot of Linux. The reipl device can be configured under
/sys/firmware/reipl.

Signed-off-by: Michael Holzheu <holzheu@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/Makefile     |   2 +-
 arch/s390/kernel/head31.S     |   3 +
 arch/s390/kernel/head64.S     |   3 +
 arch/s390/kernel/ipl.c        | 942 ++++++++++++++++++++++++++++++++++
 arch/s390/kernel/reipl.S      |  33 +-
 arch/s390/kernel/reipl64.S    |  34 +-
 arch/s390/kernel/reipl_diag.c |  39 --
 arch/s390/kernel/setup.c      | 220 +-------
 arch/s390/kernel/smp.c        |  10 +-
 drivers/s390/cio/cio.c        |  50 +-
 include/asm-s390/cio.h        |   7 +
 include/asm-s390/lowcore.h    |  13 +-
 include/asm-s390/setup.h      |  54 +-
 13 files changed, 1099 insertions(+), 311 deletions(-)
 create mode 100644 arch/s390/kernel/ipl.c
 delete mode 100644 arch/s390/kernel/reipl_diag.c

diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 33a5069c0e161..aa978978d3d1a 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -6,7 +6,7 @@ EXTRA_AFLAGS	:= -traditional
 
 obj-y	:=  bitmap.o traps.o time.o process.o \
             setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
-            semaphore.o s390_ext.o debug.o profile.o irq.o reipl_diag.o
+	    semaphore.o s390_ext.o debug.o profile.o irq.o ipl.o
 
 obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
 obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index 0e46077d71405..d8bb68a725273 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -38,6 +38,7 @@ startup:basr	%r13,0			# get base
 startup_continue:
 	basr	%r13,0			# get base
 .LPG1:	GET_IPL_DEVICE
+	mvi	__LC_AR_MODE_ID,0	# set ESA flag (mode 0)
 	lctl	%c0,%c15,.Lctl-.LPG1(%r13) # load control registers
 	l	%r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
 					# move IPL device to lowcore
@@ -274,6 +275,8 @@ startup_continue:
 .Lparmaddr: .long PARMAREA
 .Lsccbaddr: .long .Lsccb
 	.org	0x12000
+.globl s390_readinfo_sccb
+s390_readinfo_sccb:
 .Lsccb:
 	.hword	0x1000			# length, one page
 	.byte	0x00,0x00,0x00
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 3e0341acd04e4..c2005101fee1b 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -41,6 +41,7 @@ startup_continue:
         srl   %r13,1
 	GET_IPL_DEVICE
         lhi   %r1,1                      # mode 1 = esame
+	mvi   __LC_AR_MODE_ID,1		 # set esame flag
         slr   %r0,%r0                    # set cpuid to zero
         sigp  %r1,%r0,0x12               # switch to esame mode
 	sam64				 # switch to 64 bit mode
@@ -269,6 +270,8 @@ startup_continue:
 	.quad	PARMAREA
 
 	.org	0x12000
+.globl s390_readinfo_sccb
+s390_readinfo_sccb:
 .Lsccb:
 	.hword 0x1000			# length, one page
 	.byte 0x00,0x00,0x00
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
new file mode 100644
index 0000000000000..105ee15a2b316
--- /dev/null
+++ b/arch/s390/kernel/ipl.c
@@ -0,0 +1,942 @@
+/*
+ *  arch/s390/kernel/ipl.c
+ *    ipl/reipl/dump support for Linux on s390.
+ *
+ *    Copyright (C) IBM Corp. 2005,2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>
+ *		 Volker Sameske <sameske@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
+#include <asm/cpcmd.h>
+#include <asm/cio.h>
+
+#define IPL_PARM_BLOCK_VERSION 0
+
+enum ipl_type {
+	IPL_TYPE_NONE	 = 1,
+	IPL_TYPE_UNKNOWN = 2,
+	IPL_TYPE_CCW	 = 4,
+	IPL_TYPE_FCP	 = 8,
+};
+
+#define IPL_NONE_STR	 "none"
+#define IPL_UNKNOWN_STR  "unknown"
+#define IPL_CCW_STR	 "ccw"
+#define IPL_FCP_STR	 "fcp"
+
+static char *ipl_type_str(enum ipl_type type)
+{
+	switch (type) {
+	case IPL_TYPE_NONE:
+		return IPL_NONE_STR;
+	case IPL_TYPE_CCW:
+		return IPL_CCW_STR;
+	case IPL_TYPE_FCP:
+		return IPL_FCP_STR;
+	case IPL_TYPE_UNKNOWN:
+	default:
+		return IPL_UNKNOWN_STR;
+	}
+}
+
+enum ipl_method {
+	IPL_METHOD_NONE,
+	IPL_METHOD_CCW_CIO,
+	IPL_METHOD_CCW_DIAG,
+	IPL_METHOD_CCW_VM,
+	IPL_METHOD_FCP_RO_DIAG,
+	IPL_METHOD_FCP_RW_DIAG,
+	IPL_METHOD_FCP_RO_VM,
+};
+
+enum shutdown_action {
+	SHUTDOWN_REIPL,
+	SHUTDOWN_DUMP,
+	SHUTDOWN_STOP,
+};
+
+#define SHUTDOWN_REIPL_STR "reipl"
+#define SHUTDOWN_DUMP_STR  "dump"
+#define SHUTDOWN_STOP_STR  "stop"
+
+static char *shutdown_action_str(enum shutdown_action action)
+{
+	switch (action) {
+	case SHUTDOWN_REIPL:
+		return SHUTDOWN_REIPL_STR;
+	case SHUTDOWN_DUMP:
+		return SHUTDOWN_DUMP_STR;
+	case SHUTDOWN_STOP:
+		return SHUTDOWN_STOP_STR;
+	default:
+		BUG();
+	}
+}
+
+enum diag308_subcode  {
+	DIAG308_IPL   = 3,
+	DIAG308_DUMP  = 4,
+	DIAG308_SET   = 5,
+	DIAG308_STORE = 6,
+};
+
+enum diag308_ipl_type {
+	DIAG308_IPL_TYPE_FCP = 0,
+	DIAG308_IPL_TYPE_CCW = 2,
+};
+
+enum diag308_opt {
+	DIAG308_IPL_OPT_IPL  = 0x10,
+	DIAG308_IPL_OPT_DUMP = 0x20,
+};
+
+enum diag308_rc {
+	DIAG308_RC_OK = 1,
+};
+
+static int diag308_set_works = 0;
+
+static int reipl_capabilities = IPL_TYPE_UNKNOWN;
+static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
+static enum ipl_method reipl_method = IPL_METHOD_NONE;
+static struct ipl_parameter_block *reipl_block_fcp;
+static struct ipl_parameter_block *reipl_block_ccw;
+
+static int dump_capabilities = IPL_TYPE_NONE;
+static enum ipl_type dump_type = IPL_TYPE_NONE;
+static enum ipl_method dump_method = IPL_METHOD_NONE;
+static struct ipl_parameter_block *dump_block_fcp;
+static struct ipl_parameter_block *dump_block_ccw;
+
+static enum shutdown_action on_panic_action = SHUTDOWN_STOP;
+
+static int diag308(unsigned long subcode, void *addr)
+{
+	register unsigned long _addr asm("0") = (unsigned long)addr;
+	register unsigned long _rc asm("1") = 0;
+
+	asm volatile (
+		"   diag %0,%2,0x308\n"
+		"0: \n"
+		".section __ex_table,\"a\"\n"
+#ifdef CONFIG_64BIT
+		"   .align 8\n"
+		"   .quad 0b, 0b\n"
+#else
+		"   .align 4\n"
+		"   .long 0b, 0b\n"
+#endif
+		".previous\n"
+		: "+d" (_addr), "+d" (_rc)
+		: "d" (subcode) : "cc", "memory" );
+
+	return _rc;
+}
+
+/* SYSFS */
+
+#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value)		\
+static ssize_t sys_##_prefix##_##_name##_show(struct subsystem *subsys,	\
+		char *page)						\
+{									\
+	return sprintf(page, _format, _value);				\
+}									\
+static struct subsys_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL);
+
+#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)	\
+static ssize_t sys_##_prefix##_##_name##_show(struct subsystem *subsys,	\
+		char *page)						\
+{									\
+	return sprintf(page, _fmt_out,					\
+			(unsigned long long) _value);			\
+}									\
+static ssize_t sys_##_prefix##_##_name##_store(struct subsystem *subsys,\
+		const char *buf, size_t len)				\
+{									\
+	unsigned long long value;					\
+	if (sscanf(buf, _fmt_in, &value) != 1)				\
+		return -EINVAL;						\
+	_value = value;							\
+	return len;							\
+}									\
+static struct subsys_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name,(S_IRUGO | S_IWUSR),				\
+			sys_##_prefix##_##_name##_show,			\
+			sys_##_prefix##_##_name##_store);
+
+static void make_attrs_ro(struct attribute **attrs)
+{
+	while (*attrs) {
+		(*attrs)->mode = S_IRUGO;
+		attrs++;
+	}
+}
+
+/*
+ * ipl section
+ */
+
+static enum ipl_type ipl_get_type(void)
+{
+	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
+
+	if (!IPL_DEVNO_VALID)
+		return IPL_TYPE_UNKNOWN;
+	if (!IPL_PARMBLOCK_VALID)
+		return IPL_TYPE_CCW;
+	if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION)
+		return IPL_TYPE_UNKNOWN;
+	if (ipl->hdr.pbt != DIAG308_IPL_TYPE_FCP)
+		return IPL_TYPE_UNKNOWN;
+	return IPL_TYPE_FCP;
+}
+
+static ssize_t ipl_type_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(ipl_get_type()));
+}
+
+static struct subsys_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
+
+static ssize_t sys_ipl_device_show(struct subsystem *subsys, char *page)
+{
+	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
+
+	switch (ipl_get_type()) {
+	case IPL_TYPE_CCW:
+		return sprintf(page, "0.0.%04x\n", ipl_devno);
+	case IPL_TYPE_FCP:
+		return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno);
+	default:
+		return 0;
+	}
+}
+
+static struct subsys_attribute sys_ipl_device_attr =
+	__ATTR(device, S_IRUGO, sys_ipl_device_show, NULL);
+
+static ssize_t ipl_parameter_read(struct kobject *kobj, char *buf, loff_t off,
+				  size_t count)
+{
+	unsigned int size = IPL_PARMBLOCK_SIZE;
+
+	if (off > size)
+		return 0;
+	if (off + count > size)
+		count = size - off;
+	memcpy(buf, (void *)IPL_PARMBLOCK_START + off, count);
+	return count;
+}
+
+static struct bin_attribute ipl_parameter_attr = {
+	.attr = {
+		.name = "binary_parameter",
+		.mode = S_IRUGO,
+		.owner = THIS_MODULE,
+	},
+	.size = PAGE_SIZE,
+	.read = &ipl_parameter_read,
+};
+
+static ssize_t ipl_scp_data_read(struct kobject *kobj, char *buf, loff_t off,
+	size_t count)
+{
+	unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
+	void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
+
+	if (off > size)
+		return 0;
+	if (off + count > size)
+		count = size - off;
+	memcpy(buf, scp_data + off, count);
+	return count;
+}
+
+static struct bin_attribute ipl_scp_data_attr = {
+	.attr = {
+		.name = "scp_data",
+		.mode = S_IRUGO,
+		.owner = THIS_MODULE,
+	},
+	.size = PAGE_SIZE,
+	.read = &ipl_scp_data_read,
+};
+
+/* FCP ipl device attributes */
+
+DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.br_lba);
+
+static struct attribute *ipl_fcp_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_fcp_wwpn_attr.attr,
+	&sys_ipl_fcp_lun_attr.attr,
+	&sys_ipl_fcp_bootprog_attr.attr,
+	&sys_ipl_fcp_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_fcp_attr_group = {
+	.attrs = ipl_fcp_attrs,
+};
+
+/* CCW ipl device attributes */
+
+static struct attribute *ipl_ccw_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_device_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_ccw_attr_group = {
+	.attrs = ipl_ccw_attrs,
+};
+
+/* UNKNOWN ipl device attributes */
+
+static struct attribute *ipl_unknown_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_unknown_attr_group = {
+	.attrs = ipl_unknown_attrs,
+};
+
+static decl_subsys(ipl, NULL, NULL);
+
+/*
+ * reipl section
+ */
+
+/* FCP reipl device attributes */
+
+DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n",
+		   reipl_block_fcp->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n",
+		   reipl_block_fcp->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
+		   reipl_block_fcp->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
+		   reipl_block_fcp->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   reipl_block_fcp->ipl_info.fcp.devno);
+
+static struct attribute *reipl_fcp_attrs[] = {
+	&sys_reipl_fcp_device_attr.attr,
+	&sys_reipl_fcp_wwpn_attr.attr,
+	&sys_reipl_fcp_lun_attr.attr,
+	&sys_reipl_fcp_bootprog_attr.attr,
+	&sys_reipl_fcp_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_fcp_attr_group = {
+	.name  = IPL_FCP_STR,
+	.attrs = reipl_fcp_attrs,
+};
+
+/* CCW reipl device attributes */
+
+DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
+	reipl_block_ccw->ipl_info.ccw.devno);
+
+static struct attribute *reipl_ccw_attrs[] = {
+	&sys_reipl_ccw_device_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_ccw_attr_group = {
+	.name  = IPL_CCW_STR,
+	.attrs = reipl_ccw_attrs,
+};
+
+/* reipl type */
+
+static int reipl_set_type(enum ipl_type type)
+{
+	if (!(reipl_capabilities & type))
+		return -EINVAL;
+
+	switch(type) {
+	case IPL_TYPE_CCW:
+		if (MACHINE_IS_VM)
+			reipl_method = IPL_METHOD_CCW_VM;
+		else
+			reipl_method = IPL_METHOD_CCW_CIO;
+		break;
+	case IPL_TYPE_FCP:
+		if (diag308_set_works)
+			reipl_method = IPL_METHOD_FCP_RW_DIAG;
+		else if (MACHINE_IS_VM)
+			reipl_method = IPL_METHOD_FCP_RO_VM;
+		else
+			reipl_method = IPL_METHOD_FCP_RO_DIAG;
+		break;
+	default:
+		reipl_method = IPL_METHOD_NONE;
+	}
+	reipl_type = type;
+	return 0;
+}
+
+static ssize_t reipl_type_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+}
+
+static ssize_t reipl_type_store(struct subsystem *subsys, const char *buf,
+				size_t len)
+{
+	int rc = -EINVAL;
+
+	if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_CCW);
+	else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_FCP);
+	return (rc != 0) ? rc : len;
+}
+
+static struct subsys_attribute reipl_type_attr =
+		__ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
+
+static decl_subsys(reipl, NULL, NULL);
+
+/*
+ * dump section
+ */
+
+/* FCP dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n",
+		   dump_block_fcp->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n",
+		   dump_block_fcp->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
+		   dump_block_fcp->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
+		   dump_block_fcp->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   dump_block_fcp->ipl_info.fcp.devno);
+
+static struct attribute *dump_fcp_attrs[] = {
+	&sys_dump_fcp_device_attr.attr,
+	&sys_dump_fcp_wwpn_attr.attr,
+	&sys_dump_fcp_lun_attr.attr,
+	&sys_dump_fcp_bootprog_attr.attr,
+	&sys_dump_fcp_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_fcp_attr_group = {
+	.name  = IPL_FCP_STR,
+	.attrs = dump_fcp_attrs,
+};
+
+/* CCW dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   dump_block_ccw->ipl_info.ccw.devno);
+
+static struct attribute *dump_ccw_attrs[] = {
+	&sys_dump_ccw_device_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_ccw_attr_group = {
+	.name  = IPL_CCW_STR,
+	.attrs = dump_ccw_attrs,
+};
+
+/* dump type */
+
+static int dump_set_type(enum ipl_type type)
+{
+	if (!(dump_capabilities & type))
+		return -EINVAL;
+	switch(type) {
+	case IPL_TYPE_CCW:
+		if (MACHINE_IS_VM)
+			dump_method = IPL_METHOD_CCW_VM;
+		else
+			dump_method = IPL_METHOD_CCW_CIO;
+		break;
+	case IPL_TYPE_FCP:
+		dump_method = IPL_METHOD_FCP_RW_DIAG;
+		break;
+	default:
+		dump_method = IPL_METHOD_NONE;
+	}
+	dump_type = type;
+	return 0;
+}
+
+static ssize_t dump_type_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(dump_type));
+}
+
+static ssize_t dump_type_store(struct subsystem *subsys, const char *buf,
+			       size_t len)
+{
+	int rc = -EINVAL;
+
+	if (strncmp(buf, IPL_NONE_STR, strlen(IPL_NONE_STR)) == 0)
+		rc = dump_set_type(IPL_TYPE_NONE);
+	else if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
+		rc = dump_set_type(IPL_TYPE_CCW);
+	else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
+		rc = dump_set_type(IPL_TYPE_FCP);
+	return (rc != 0) ? rc : len;
+}
+
+static struct subsys_attribute dump_type_attr =
+		__ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+
+static decl_subsys(dump, NULL, NULL);
+
+#ifdef CONFIG_SMP
+static void dump_smp_stop_all(void)
+{
+	int cpu;
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+		while (signal_processor(cpu, sigp_stop) == sigp_busy)
+			udelay(10);
+	}
+	preempt_enable();
+}
+#else
+#define dump_smp_stop_all() do { } while (0)
+#endif
+
+/*
+ * Shutdown actions section
+ */
+
+static decl_subsys(shutdown_actions, NULL, NULL);
+
+/* on panic */
+
+static ssize_t on_panic_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%s\n", shutdown_action_str(on_panic_action));
+}
+
+static ssize_t on_panic_store(struct subsystem *subsys, const char *buf,
+			      size_t len)
+{
+	if (strncmp(buf, SHUTDOWN_REIPL_STR, strlen(SHUTDOWN_REIPL_STR)) == 0)
+		on_panic_action = SHUTDOWN_REIPL;
+	else if (strncmp(buf, SHUTDOWN_DUMP_STR,
+			 strlen(SHUTDOWN_DUMP_STR)) == 0)
+		on_panic_action = SHUTDOWN_DUMP;
+	else if (strncmp(buf, SHUTDOWN_STOP_STR,
+			 strlen(SHUTDOWN_STOP_STR)) == 0)
+		on_panic_action = SHUTDOWN_STOP;
+	else
+		return -EINVAL;
+
+	return len;
+}
+
+static struct subsys_attribute on_panic_attr =
+		__ATTR(on_panic, 0644, on_panic_show, on_panic_store);
+
+static void print_fcp_block(struct ipl_parameter_block *fcp_block)
+{
+	printk(KERN_EMERG "wwpn:      %016llx\n",
+		(unsigned long long)fcp_block->ipl_info.fcp.wwpn);
+	printk(KERN_EMERG "lun:       %016llx\n",
+		(unsigned long long)fcp_block->ipl_info.fcp.lun);
+	printk(KERN_EMERG "bootprog:  %lld\n",
+		(unsigned long long)fcp_block->ipl_info.fcp.bootprog);
+	printk(KERN_EMERG "br_lba:    %lld\n",
+		(unsigned long long)fcp_block->ipl_info.fcp.br_lba);
+	printk(KERN_EMERG "device:    %llx\n",
+		(unsigned long long)fcp_block->ipl_info.fcp.devno);
+	printk(KERN_EMERG "opt:       %x\n", fcp_block->ipl_info.fcp.opt);
+}
+
+void do_reipl(void)
+{
+	struct ccw_dev_id devid;
+	static char buf[100];
+
+	switch (reipl_type) {
+	case IPL_TYPE_CCW:
+		printk(KERN_EMERG "reboot on ccw device: 0.0.%04x\n",
+			reipl_block_ccw->ipl_info.ccw.devno);
+		break;
+	case IPL_TYPE_FCP:
+		printk(KERN_EMERG "reboot on fcp device:\n");
+		print_fcp_block(reipl_block_fcp);
+		break;
+	default:
+		break;
+	}
+
+	switch (reipl_method) {
+	case IPL_METHOD_CCW_CIO:
+		devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
+		devid.ssid  = 0;
+		reipl_ccw_dev(&devid);
+		break;
+	case IPL_METHOD_CCW_VM:
+		sprintf(buf, "IPL %X", reipl_block_ccw->ipl_info.ccw.devno);
+		cpcmd(buf, NULL, 0, NULL);
+		break;
+	case IPL_METHOD_CCW_DIAG:
+		diag308(DIAG308_SET, reipl_block_ccw);
+		diag308(DIAG308_IPL, NULL);
+		break;
+	case IPL_METHOD_FCP_RW_DIAG:
+		diag308(DIAG308_SET, reipl_block_fcp);
+		diag308(DIAG308_IPL, NULL);
+		break;
+	case IPL_METHOD_FCP_RO_DIAG:
+		diag308(DIAG308_IPL, NULL);
+		break;
+	case IPL_METHOD_FCP_RO_VM:
+		cpcmd("IPL", NULL, 0, NULL);
+		break;
+	case IPL_METHOD_NONE:
+	default:
+		if (MACHINE_IS_VM)
+			cpcmd("IPL", NULL, 0, NULL);
+		diag308(DIAG308_IPL, NULL);
+		break;
+	}
+	panic("reipl failed!\n");
+}
+
+static void do_dump(void)
+{
+	struct ccw_dev_id devid;
+	static char buf[100];
+
+	switch (dump_type) {
+	case IPL_TYPE_CCW:
+		printk(KERN_EMERG "Automatic dump on ccw device: 0.0.%04x\n",
+		       dump_block_ccw->ipl_info.ccw.devno);
+		break;
+	case IPL_TYPE_FCP:
+		printk(KERN_EMERG "Automatic dump on fcp device:\n");
+		print_fcp_block(dump_block_fcp);
+		break;
+	default:
+		return;
+	}
+
+	switch (dump_method) {
+	case IPL_METHOD_CCW_CIO:
+		dump_smp_stop_all();
+		devid.devno = dump_block_ccw->ipl_info.ccw.devno;
+		devid.ssid  = 0;
+		reipl_ccw_dev(&devid);
+		break;
+	case IPL_METHOD_CCW_VM:
+		dump_smp_stop_all();
+		sprintf(buf, "STORE STATUS");
+		cpcmd(buf, NULL, 0, NULL);
+		sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno);
+		cpcmd(buf, NULL, 0, NULL);
+		break;
+	case IPL_METHOD_CCW_DIAG:
+		diag308(DIAG308_SET, dump_block_ccw);
+		diag308(DIAG308_DUMP, NULL);
+		break;
+	case IPL_METHOD_FCP_RW_DIAG:
+		diag308(DIAG308_SET, dump_block_fcp);
+		diag308(DIAG308_DUMP, NULL);
+		break;
+	case IPL_METHOD_NONE:
+	default:
+		return;
+	}
+	printk(KERN_EMERG "Dump failed!\n");
+}
+
+/* init functions */
+
+static int __init ipl_register_fcp_files(void)
+{
+	int rc;
+
+	rc = sysfs_create_group(&ipl_subsys.kset.kobj,
+				&ipl_fcp_attr_group);
+	if (rc)
+		goto out;
+	rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj,
+				   &ipl_parameter_attr);
+	if (rc)
+		goto out_ipl_parm;
+	rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj,
+				   &ipl_scp_data_attr);
+	if (!rc)
+		goto out;
+
+	sysfs_remove_bin_file(&ipl_subsys.kset.kobj, &ipl_parameter_attr);
+
+out_ipl_parm:
+	sysfs_remove_group(&ipl_subsys.kset.kobj, &ipl_fcp_attr_group);
+out:
+	return rc;
+}
+
+static int __init ipl_init(void)
+{
+	int rc;
+
+	rc = firmware_register(&ipl_subsys);
+	if (rc)
+		return rc;
+	switch (ipl_get_type()) {
+	case IPL_TYPE_CCW:
+		rc = sysfs_create_group(&ipl_subsys.kset.kobj,
+					&ipl_ccw_attr_group);
+		break;
+	case IPL_TYPE_FCP:
+		rc = ipl_register_fcp_files();
+		break;
+	default:
+		rc = sysfs_create_group(&ipl_subsys.kset.kobj,
+					&ipl_unknown_attr_group);
+		break;
+	}
+	if (rc)
+		firmware_unregister(&ipl_subsys);
+	return rc;
+}
+
+static void __init reipl_probe(void)
+{
+	void *buffer;
+
+	buffer = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!buffer)
+		return;
+	if (diag308(DIAG308_STORE, buffer) == DIAG308_RC_OK)
+		diag308_set_works = 1;
+	free_page((unsigned long)buffer);
+}
+
+static int __init reipl_ccw_init(void)
+{
+	int rc;
+
+	reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_ccw)
+		return -ENOMEM;
+	rc = sysfs_create_group(&reipl_subsys.kset.kobj, &reipl_ccw_attr_group);
+	if (rc) {
+		free_page((unsigned long)reipl_block_ccw);
+		return rc;
+	}
+	reipl_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
+	reipl_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
+	reipl_block_ccw->hdr.blk0_len = sizeof(reipl_block_ccw->ipl_info.ccw);
+	reipl_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+	if (ipl_get_type() == IPL_TYPE_CCW)
+		reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
+	reipl_capabilities |= IPL_TYPE_CCW;
+	return 0;
+}
+
+static int __init reipl_fcp_init(void)
+{
+	int rc;
+
+	if ((!diag308_set_works) && (ipl_get_type() != IPL_TYPE_FCP))
+		return 0;
+	if ((!diag308_set_works) && (ipl_get_type() == IPL_TYPE_FCP))
+		make_attrs_ro(reipl_fcp_attrs);
+
+	reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_fcp)
+		return -ENOMEM;
+	rc = sysfs_create_group(&reipl_subsys.kset.kobj, &reipl_fcp_attr_group);
+	if (rc) {
+		free_page((unsigned long)reipl_block_fcp);
+		return rc;
+	}
+	if (ipl_get_type() == IPL_TYPE_FCP) {
+		memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
+	} else {
+		reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+		reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+		reipl_block_fcp->hdr.blk0_len =
+			sizeof(reipl_block_fcp->ipl_info.fcp);
+		reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
+		reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL;
+	}
+	reipl_capabilities |= IPL_TYPE_FCP;
+	return 0;
+}
+
+static int __init reipl_init(void)
+{
+	int rc;
+
+	rc = firmware_register(&reipl_subsys);
+	if (rc)
+		return rc;
+	rc = subsys_create_file(&reipl_subsys, &reipl_type_attr);
+	if (rc) {
+		firmware_unregister(&reipl_subsys);
+		return rc;
+	}
+	rc = reipl_ccw_init();
+	if (rc)
+		return rc;
+	rc = reipl_fcp_init();
+	if (rc)
+		return rc;
+	rc = reipl_set_type(ipl_get_type());
+	if (rc)
+		return rc;
+	return 0;
+}
+
+static int __init dump_ccw_init(void)
+{
+	int rc;
+
+	dump_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_ccw)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_subsys.kset.kobj, &dump_ccw_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_ccw);
+		return rc;
+	}
+	dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
+	dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_ccw->hdr.blk0_len = sizeof(reipl_block_ccw->ipl_info.ccw);
+	dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+	dump_capabilities |= IPL_TYPE_CCW;
+	return 0;
+}
+
+extern char s390_readinfo_sccb[];
+
+static int __init dump_fcp_init(void)
+{
+	int rc;
+
+	if(!(s390_readinfo_sccb[91] & 0x2))
+		return 0; /* LDIPL DUMP is not installed */
+	if (!diag308_set_works)
+		return 0;
+	dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_fcp)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_subsys.kset.kobj, &dump_fcp_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_fcp);
+		return rc;
+	}
+	dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+	dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_fcp->hdr.blk0_len = sizeof(dump_block_fcp->ipl_info.fcp);
+	dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
+	dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP;
+	dump_capabilities |= IPL_TYPE_FCP;
+	return 0;
+}
+
+#define SHUTDOWN_ON_PANIC_PRIO 0
+
+static int shutdown_on_panic_notify(struct notifier_block *self,
+				    unsigned long event, void *data)
+{
+	if (on_panic_action == SHUTDOWN_DUMP)
+		do_dump();
+	else if (on_panic_action == SHUTDOWN_REIPL)
+		do_reipl();
+	return NOTIFY_OK;
+}
+
+static struct notifier_block shutdown_on_panic_nb = {
+	.notifier_call = shutdown_on_panic_notify,
+	.priority = SHUTDOWN_ON_PANIC_PRIO
+};
+
+static int __init dump_init(void)
+{
+	int rc;
+
+	rc = firmware_register(&dump_subsys);
+	if (rc)
+		return rc;
+	rc = subsys_create_file(&dump_subsys, &dump_type_attr);
+	if (rc) {
+		firmware_unregister(&dump_subsys);
+		return rc;
+	}
+	rc = dump_ccw_init();
+	if (rc)
+		return rc;
+	rc = dump_fcp_init();
+	if (rc)
+		return rc;
+	dump_set_type(IPL_TYPE_NONE);
+	return 0;
+}
+
+static int __init shutdown_actions_init(void)
+{
+	int rc;
+
+	rc = firmware_register(&shutdown_actions_subsys);
+	if (rc)
+		return rc;
+	rc = subsys_create_file(&shutdown_actions_subsys, &on_panic_attr);
+	if (rc) {
+		firmware_unregister(&shutdown_actions_subsys);
+		return rc;
+	}
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &shutdown_on_panic_nb);
+	return 0;
+}
+
+static int __init s390_ipl_init(void)
+{
+	int rc;
+
+	reipl_probe();
+	rc = ipl_init();
+	if (rc)
+		return rc;
+	rc = reipl_init();
+	if (rc)
+		return rc;
+	rc = dump_init();
+	if (rc)
+		return rc;
+	rc = shutdown_actions_init();
+	if (rc)
+		return rc;
+	return 0;
+}
+
+__initcall(s390_ipl_init);
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 658e5ac484f94..4562cdbce8eb4 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -8,13 +8,30 @@
 
 #include <asm/lowcore.h>
 
-		.globl	do_reipl
-do_reipl:	basr	%r13,0
+		.globl	do_reipl_asm
+do_reipl_asm:	basr	%r13,0
 .Lpg0:		lpsw	.Lnewpsw-.Lpg0(%r13)
-.Lpg1:		lctl	%c6,%c6,.Lall-.Lpg0(%r13)
-                stctl   %c0,%c0,.Lctlsave-.Lpg0(%r13)
-                ni      .Lctlsave-.Lpg0(%r13),0xef
-                lctl    %c0,%c0,.Lctlsave-.Lpg0(%r13)
+
+		# switch off lowcore protection
+
+.Lpg1:		stctl	%c0,%c0,.Lctlsave1-.Lpg0(%r13)
+		stctl	%c0,%c0,.Lctlsave2-.Lpg0(%r13)
+		ni	.Lctlsave1-.Lpg0(%r13),0xef
+		lctl	%c0,%c0,.Lctlsave1-.Lpg0(%r13)
+
+		# do store status of all registers
+
+		stm	%r0,%r15,__LC_GPREGS_SAVE_AREA
+		stctl	%c0,%c15,__LC_CREGS_SAVE_AREA
+		mvc	__LC_CREGS_SAVE_AREA(4),.Lctlsave2-.Lpg0(%r13)
+		stam	%a0,%a15,__LC_AREGS_SAVE_AREA
+		stpx	__LC_PREFIX_SAVE_AREA
+		stckc	.Lclkcmp-.Lpg0(%r13)
+		mvc	__LC_CLOCK_COMP_SAVE_AREA(8),.Lclkcmp-.Lpg0(%r13)
+		stpt	__LC_CPU_TIMER_SAVE_AREA
+		st	%r13, __LC_PSW_SAVE_AREA+4
+
+		lctl	%c6,%c6,.Lall-.Lpg0(%r13)
                 lr      %r1,%r2
         	mvc     __LC_PGM_NEW_PSW(8),.Lpcnew-.Lpg0(%r13)
                 stsch   .Lschib-.Lpg0(%r13)                                    
@@ -46,9 +63,11 @@ do_reipl:	basr	%r13,0
 .Ldisab:	st      %r14,.Ldispsw+4-.Lpg0(%r13)
 		lpsw	.Ldispsw-.Lpg0(%r13)
                 .align 	8
+.Lclkcmp:	.quad	0x0000000000000000
 .Lall:		.long	0xff000000
 .Lnull:		.long   0x00000000
-.Lctlsave:      .long   0x00000000
+.Lctlsave1:	.long	0x00000000
+.Lctlsave2:	.long	0x00000000
                 .align 	8
 .Lnewpsw:	.long   0x00080000,0x80000000+.Lpg1
 .Lpcnew:  	.long   0x00080000,0x80000000+.Lecs
diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S
index 4d090d60f3efd..95bd1e234f632 100644
--- a/arch/s390/kernel/reipl64.S
+++ b/arch/s390/kernel/reipl64.S
@@ -8,13 +8,30 @@
  */
 
 #include <asm/lowcore.h>
-		.globl	do_reipl
-do_reipl:	basr	%r13,0
-.Lpg0:		lpswe   .Lnewpsw-.Lpg0(%r13)
+		.globl	do_reipl_asm
+do_reipl_asm:	basr	%r13,0
+
+		# do store status of all registers
+
+.Lpg0:		stg	%r1,.Lregsave-.Lpg0(%r13)
+		lghi	%r1,0x1000
+		stmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1)
+		lg	%r0,.Lregsave-.Lpg0(%r13)
+		stg	%r0,__LC_GPREGS_SAVE_AREA-0x1000+8(%r1)
+		stctg	%c0,%c15,__LC_CREGS_SAVE_AREA-0x1000(%r1)
+		stam	%a0,%a15,__LC_AREGS_SAVE_AREA-0x1000(%r1)
+		stpx	__LC_PREFIX_SAVE_AREA-0x1000(%r1)
+		stfpc	__LC_FP_CREG_SAVE_AREA-0x1000(%r1)
+		stckc	.Lclkcmp-.Lpg0(%r13)
+		mvc	__LC_CLOCK_COMP_SAVE_AREA-0x1000(8,%r1),.Lclkcmp-.Lpg0(%r13)
+		stpt	__LC_CPU_TIMER_SAVE_AREA-0x1000(%r1)
+		stg	%r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1)
+
+		lpswe	.Lnewpsw-.Lpg0(%r13)
 .Lpg1:		lctlg	%c6,%c6,.Lall-.Lpg0(%r13)
-                stctg   %c0,%c0,.Lctlsave-.Lpg0(%r13)
-                ni      .Lctlsave+4-.Lpg0(%r13),0xef
-                lctlg   %c0,%c0,.Lctlsave-.Lpg0(%r13)
+		stctg	%c0,%c0,.Lregsave-.Lpg0(%r13)
+		ni	.Lregsave+4-.Lpg0(%r13),0xef
+		lctlg	%c0,%c0,.Lregsave-.Lpg0(%r13)
                 lgr     %r1,%r2
         	mvc     __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13)
                 stsch   .Lschib-.Lpg0(%r13)                                    
@@ -50,8 +67,9 @@ do_reipl:	basr	%r13,0
 		st     %r14,.Ldispsw+12-.Lpg0(%r13)
 		lpswe	.Ldispsw-.Lpg0(%r13)
                 .align 	8
+.Lclkcmp:	.quad	0x0000000000000000
 .Lall:		.quad	0x00000000ff000000
-.Lctlsave:      .quad   0x0000000000000000
+.Lregsave:	.quad	0x0000000000000000
 .Lnull:		.long   0x0000000000000000
                 .align 	16
 /*
@@ -92,5 +110,3 @@ do_reipl:	basr	%r13,0
 		.long	0x00000000,0x00000000
 		.long	0x00000000,0x00000000
 	
-
-	
diff --git a/arch/s390/kernel/reipl_diag.c b/arch/s390/kernel/reipl_diag.c
deleted file mode 100644
index 1f33951ba4390..0000000000000
--- a/arch/s390/kernel/reipl_diag.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * This file contains the implementation of the
- * Linux re-IPL support
- *
- * (C) Copyright IBM Corp. 2005
- *
- * Author(s): Volker Sameske (sameske@de.ibm.com)
- *
- */
-
-#include <linux/kernel.h>
-
-static unsigned int reipl_diag_rc1;
-static unsigned int reipl_diag_rc2;
-
-/*
- * re-IPL the system using the last used IPL parameters
- */
-void reipl_diag(void)
-{
-        asm volatile (
-		"   la   %%r4,0\n"
-		"   la   %%r5,0\n"
-                "   diag %%r4,%2,0x308\n"
-                "0:\n"
-		"   st   %%r4,%0\n"
-		"   st   %%r5,%1\n"
-                ".section __ex_table,\"a\"\n"
-#ifdef CONFIG_64BIT
-                "   .align 8\n"
-                "   .quad 0b, 0b\n"
-#else
-                "   .align 4\n"
-                "   .long 0b, 0b\n"
-#endif
-                ".previous\n"
-                : "=m" (reipl_diag_rc1), "=m" (reipl_diag_rc2)
-		: "d" (3) : "cc", "4", "5" );
-}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 89051e8a5d8dd..f2a9165ca4f86 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -285,16 +285,9 @@ void (*_machine_power_off)(void) = machine_power_off_smp;
 /*
  * Reboot, halt and power_off routines for non SMP.
  */
-extern void reipl(unsigned long devno);
-extern void reipl_diag(void);
 static void do_machine_restart_nonsmp(char * __unused)
 {
-	reipl_diag();
-
-	if (MACHINE_IS_VM)
-		cpcmd ("IPL", NULL, 0, NULL);
-	else
-		reipl (0x10000 | S390_lowcore.ipl_device);
+	do_reipl();
 }
 
 static void do_machine_halt_nonsmp(void)
@@ -755,214 +748,3 @@ struct seq_operations cpuinfo_op = {
 	.show	= show_cpuinfo,
 };
 
-#define DEFINE_IPL_ATTR(_name, _format, _value)			\
-static ssize_t ipl_##_name##_show(struct subsystem *subsys,	\
-		char *page)					\
-{								\
-	return sprintf(page, _format, _value);			\
-}								\
-static struct subsys_attribute ipl_##_name##_attr =		\
-	__ATTR(_name, S_IRUGO, ipl_##_name##_show, NULL);
-
-DEFINE_IPL_ATTR(wwpn, "0x%016llx\n", (unsigned long long)
-		IPL_PARMBLOCK_START->fcp.wwpn);
-DEFINE_IPL_ATTR(lun, "0x%016llx\n", (unsigned long long)
-		IPL_PARMBLOCK_START->fcp.lun);
-DEFINE_IPL_ATTR(bootprog, "%lld\n", (unsigned long long)
-		IPL_PARMBLOCK_START->fcp.bootprog);
-DEFINE_IPL_ATTR(br_lba, "%lld\n", (unsigned long long)
-		IPL_PARMBLOCK_START->fcp.br_lba);
-
-enum ipl_type_type {
-	ipl_type_unknown,
-	ipl_type_ccw,
-	ipl_type_fcp,
-};
-
-static enum ipl_type_type
-get_ipl_type(void)
-{
-	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
-
-	if (!IPL_DEVNO_VALID)
-		return ipl_type_unknown;
-	if (!IPL_PARMBLOCK_VALID)
-		return ipl_type_ccw;
-	if (ipl->hdr.header.version > IPL_MAX_SUPPORTED_VERSION)
-		return ipl_type_unknown;
-	if (ipl->fcp.pbt != IPL_TYPE_FCP)
-		return ipl_type_unknown;
-	return ipl_type_fcp;
-}
-
-static ssize_t
-ipl_type_show(struct subsystem *subsys, char *page)
-{
-	switch (get_ipl_type()) {
-	case ipl_type_ccw:
-		return sprintf(page, "ccw\n");
-	case ipl_type_fcp:
-		return sprintf(page, "fcp\n");
-	default:
-		return sprintf(page, "unknown\n");
-	}
-}
-
-static struct subsys_attribute ipl_type_attr = __ATTR_RO(ipl_type);
-
-static ssize_t
-ipl_device_show(struct subsystem *subsys, char *page)
-{
-	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
-
-	switch (get_ipl_type()) {
-	case ipl_type_ccw:
-		return sprintf(page, "0.0.%04x\n", ipl_devno);
-	case ipl_type_fcp:
-		return sprintf(page, "0.0.%04x\n", ipl->fcp.devno);
-	default:
-		return 0;
-	}
-}
-
-static struct subsys_attribute ipl_device_attr =
-	__ATTR(device, S_IRUGO, ipl_device_show, NULL);
-
-static struct attribute *ipl_fcp_attrs[] = {
-	&ipl_type_attr.attr,
-	&ipl_device_attr.attr,
-	&ipl_wwpn_attr.attr,
-	&ipl_lun_attr.attr,
-	&ipl_bootprog_attr.attr,
-	&ipl_br_lba_attr.attr,
-	NULL,
-};
-
-static struct attribute_group ipl_fcp_attr_group = {
-	.attrs = ipl_fcp_attrs,
-};
-
-static struct attribute *ipl_ccw_attrs[] = {
-	&ipl_type_attr.attr,
-	&ipl_device_attr.attr,
-	NULL,
-};
-
-static struct attribute_group ipl_ccw_attr_group = {
-	.attrs = ipl_ccw_attrs,
-};
-
-static struct attribute *ipl_unknown_attrs[] = {
-	&ipl_type_attr.attr,
-	NULL,
-};
-
-static struct attribute_group ipl_unknown_attr_group = {
-	.attrs = ipl_unknown_attrs,
-};
-
-static ssize_t
-ipl_parameter_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
-{
-	unsigned int size = IPL_PARMBLOCK_SIZE;
-
-	if (off > size)
-		return 0;
-	if (off + count > size)
-		count = size - off;
-
-	memcpy(buf, (void *) IPL_PARMBLOCK_START + off, count);
-	return count;
-}
-
-static struct bin_attribute ipl_parameter_attr = {
-	.attr = {
-		.name = "binary_parameter",
-		.mode = S_IRUGO,
-		.owner = THIS_MODULE,
-	},
-	.size = PAGE_SIZE,
-	.read = &ipl_parameter_read,
-};
-
-static ssize_t
-ipl_scp_data_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
-{
-	unsigned int size =  IPL_PARMBLOCK_START->fcp.scp_data_len;
-	void *scp_data = &IPL_PARMBLOCK_START->fcp.scp_data;
-
-	if (off > size)
-		return 0;
-	if (off + count > size)
-		count = size - off;
-
-	memcpy(buf, scp_data + off, count);
-	return count;
-}
-
-static struct bin_attribute ipl_scp_data_attr = {
-	.attr = {
-		.name = "scp_data",
-		.mode = S_IRUGO,
-		.owner = THIS_MODULE,
-	},
-	.size = PAGE_SIZE,
-	.read = &ipl_scp_data_read,
-};
-
-static decl_subsys(ipl, NULL, NULL);
-
-static int ipl_register_fcp_files(void)
-{
-	int rc;
-
-	rc = sysfs_create_group(&ipl_subsys.kset.kobj,
-				&ipl_fcp_attr_group);
-	if (rc)
-		goto out;
-	rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj,
-				   &ipl_parameter_attr);
-	if (rc)
-		goto out_ipl_parm;
-	rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj,
-				   &ipl_scp_data_attr);
-	if (!rc)
-		goto out;
-
-	sysfs_remove_bin_file(&ipl_subsys.kset.kobj, &ipl_parameter_attr);
-
-out_ipl_parm:
-	sysfs_remove_group(&ipl_subsys.kset.kobj, &ipl_fcp_attr_group);
-out:
-	return rc;
-}
-
-static int __init
-ipl_device_sysfs_register(void) {
-	int rc;
-
-	rc = firmware_register(&ipl_subsys);
-	if (rc)
-		goto out;
-
-	switch (get_ipl_type()) {
-	case ipl_type_ccw:
-		rc = sysfs_create_group(&ipl_subsys.kset.kobj,
-					&ipl_ccw_attr_group);
-		break;
-	case ipl_type_fcp:
-		rc = ipl_register_fcp_files();
-		break;
-	default:
-		rc = sysfs_create_group(&ipl_subsys.kset.kobj,
-					&ipl_unknown_attr_group);
-		break;
-	}
-
-	if (rc)
-		firmware_unregister(&ipl_subsys);
-out:
-	return rc;
-}
-
-__initcall(ipl_device_sysfs_register);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 8e03219eea760..b2e6f4c8d382a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -59,9 +59,6 @@ static struct task_struct *current_set[NR_CPUS];
 extern char vmhalt_cmd[];
 extern char vmpoff_cmd[];
 
-extern void reipl(unsigned long devno);
-extern void reipl_diag(void);
-
 static void smp_ext_bitcall(int, ec_bit_sig);
 static void smp_ext_bitcall_others(ec_bit_sig);
 
@@ -279,12 +276,7 @@ static void do_machine_restart(void * __unused)
 	 * interrupted by an external interrupt and s390irq
 	 * locks are always held disabled).
 	 */
-	reipl_diag();
-
-	if (MACHINE_IS_VM)
-		cpcmd ("IPL", NULL, 0, NULL);
-	else
-		reipl (0x10000 | S390_lowcore.ipl_device);
+	do_reipl();
 }
 
 void machine_restart_smp(char * __unused) 
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 89320c1ad8251..050963f158025 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -841,14 +841,26 @@ __clear_subchannel_easy(struct subchannel_id schid)
 	return -EBUSY;
 }
 
-extern void do_reipl(unsigned long devno);
-static int
-__shutdown_subchannel_easy(struct subchannel_id schid, void *data)
+struct sch_match_id {
+	struct subchannel_id schid;
+	struct ccw_dev_id devid;
+	int rc;
+};
+
+static int __shutdown_subchannel_easy_and_match(struct subchannel_id schid,
+	void *data)
 {
 	struct schib schib;
+	struct sch_match_id *match_id = data;
 
 	if (stsch_err(schid, &schib))
 		return -ENXIO;
+	if (match_id && schib.pmcw.dnv &&
+		(schib.pmcw.dev == match_id->devid.devno) &&
+		(schid.ssid == match_id->devid.ssid)) {
+		match_id->schid = schid;
+		match_id->rc = 0;
+	}
 	if (!schib.pmcw.ena)
 		return 0;
 	switch(__disable_subchannel_easy(schid, &schib)) {
@@ -864,18 +876,36 @@ __shutdown_subchannel_easy(struct subchannel_id schid, void *data)
 	return 0;
 }
 
-void
-clear_all_subchannels(void)
+static int clear_all_subchannels_and_match(struct ccw_dev_id *devid,
+	struct subchannel_id *schid)
 {
+	struct sch_match_id match_id;
+
+	match_id.devid = *devid;
+	match_id.rc = -ENODEV;
 	local_irq_disable();
-	for_each_subchannel(__shutdown_subchannel_easy, NULL);
+	for_each_subchannel(__shutdown_subchannel_easy_and_match, &match_id);
+	if (match_id.rc == 0)
+		*schid = match_id.schid;
+	return match_id.rc;
 }
 
+
+void clear_all_subchannels(void)
+{
+	local_irq_disable();
+	for_each_subchannel(__shutdown_subchannel_easy_and_match, NULL);
+}
+
+extern void do_reipl_asm(__u32 schid);
+
 /* Make sure all subchannels are quiet before we re-ipl an lpar. */
-void
-reipl(unsigned long devno)
+void reipl_ccw_dev(struct ccw_dev_id *devid)
 {
-	clear_all_subchannels();
+	struct subchannel_id schid;
+
+	if (clear_all_subchannels_and_match(devid, &schid))
+		panic("IPL Device not found\n");
 	cio_reset_channel_paths();
-	do_reipl(devno);
+	do_reipl_asm(*((__u32*)&schid));
 }
diff --git a/include/asm-s390/cio.h b/include/asm-s390/cio.h
index 28fdd6e2b8bad..da063cd5f0a00 100644
--- a/include/asm-s390/cio.h
+++ b/include/asm-s390/cio.h
@@ -270,6 +270,11 @@ struct diag210 {
 	__u32 vrdccrft : 8;    /* real device feature (output) */
 } __attribute__ ((packed,aligned(4)));
 
+struct ccw_dev_id {
+	u8 ssid;
+	u16 devno;
+};
+
 extern int diag210(struct diag210 *addr);
 
 extern void wait_cons_dev(void);
@@ -280,6 +285,8 @@ extern void cio_reset_channel_paths(void);
 
 extern void css_schedule_reprobe(void);
 
+extern void reipl_ccw_dev(struct ccw_dev_id *id);
+
 #endif
 
 #endif
diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h
index 596c8b1721049..2e3d4cca5e211 100644
--- a/include/asm-s390/lowcore.h
+++ b/include/asm-s390/lowcore.h
@@ -47,6 +47,7 @@
 #define __LC_PER_ATMID			0x096
 #define __LC_PER_ADDRESS		0x098
 #define __LC_PER_ACCESS_ID		0x0A1
+#define __LC_AR_MODE_ID			0x0A3
 
 #define __LC_SUBCHANNEL_ID              0x0B8
 #define __LC_SUBCHANNEL_NR              0x0BA
@@ -106,18 +107,28 @@
 #define __LC_INT_CLOCK			0xDE8
 #endif /* __s390x__ */
 
-#define __LC_PANIC_MAGIC                0xE00
 
+#define __LC_PANIC_MAGIC		0xE00
 #ifndef __s390x__
 #define __LC_PFAULT_INTPARM             0x080
 #define __LC_CPU_TIMER_SAVE_AREA        0x0D8
+#define __LC_CLOCK_COMP_SAVE_AREA	0x0E0
+#define __LC_PSW_SAVE_AREA		0x100
+#define __LC_PREFIX_SAVE_AREA		0x108
 #define __LC_AREGS_SAVE_AREA            0x120
+#define __LC_FPREGS_SAVE_AREA		0x160
 #define __LC_GPREGS_SAVE_AREA           0x180
 #define __LC_CREGS_SAVE_AREA            0x1C0
 #else /* __s390x__ */
 #define __LC_PFAULT_INTPARM             0x11B8
+#define __LC_FPREGS_SAVE_AREA		0x1200
 #define __LC_GPREGS_SAVE_AREA           0x1280
+#define __LC_PSW_SAVE_AREA		0x1300
+#define __LC_PREFIX_SAVE_AREA		0x1318
+#define __LC_FP_CREG_SAVE_AREA		0x131C
+#define __LC_TODREG_SAVE_AREA		0x1324
 #define __LC_CPU_TIMER_SAVE_AREA        0x1328
+#define __LC_CLOCK_COMP_SAVE_AREA	0x1331
 #define __LC_AREGS_SAVE_AREA            0x1340
 #define __LC_CREGS_SAVE_AREA            0x1380
 #endif /* __s390x__ */
diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h
index 02c96d57f0cf2..4a1126d8439a0 100644
--- a/include/asm-s390/setup.h
+++ b/include/asm-s390/setup.h
@@ -68,39 +68,59 @@ extern unsigned int console_irq;
 #define SET_CONSOLE_3215	do { console_mode = 2; } while (0)
 #define SET_CONSOLE_3270	do { console_mode = 3; } while (0)
 
-struct ipl_list_header {
-	u32 length;
-	u8  reserved[3];
+
+struct ipl_list_hdr {
+	u32 len;
+	u8  reserved1[3];
 	u8  version;
+	u32 blk0_len;
+	u8  pbt;
+	u8  flags;
+	u16 reserved2;
 } __attribute__((packed));
 
 struct ipl_block_fcp {
-	u32 length;
-	u8  pbt;
-	u8  reserved1[322-1];
+	u8  reserved1[313-1];
+	u8  opt;
+	u8  reserved2[3];
+	u16 reserved3;
 	u16 devno;
-	u8  reserved2[4];
+	u8  reserved4[4];
 	u64 wwpn;
 	u64 lun;
 	u32 bootprog;
-	u8  reserved3[12];
+	u8  reserved5[12];
 	u64 br_lba;
 	u32 scp_data_len;
-	u8  reserved4[260];
+	u8  reserved6[260];
 	u8  scp_data[];
 } __attribute__((packed));
 
+struct ipl_block_ccw {
+	u8  load_param[8];
+	u8  reserved1[84];
+	u8  reserved2[2];
+	u16 devno;
+	u8  vm_flags;
+	u8  reserved3[3];
+	u32 vm_parm_len;
+} __attribute__((packed));
+
 struct ipl_parameter_block {
+	struct ipl_list_hdr hdr;
 	union {
-		u32 length;
-		struct ipl_list_header header;
-	} hdr;
-	struct ipl_block_fcp fcp;
+		struct ipl_block_fcp fcp;
+		struct ipl_block_ccw ccw;
+	} ipl_info;
 } __attribute__((packed));
 
-#define IPL_MAX_SUPPORTED_VERSION (0)
+#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \
+			      sizeof(struct ipl_block_fcp))
 
-#define IPL_TYPE_FCP (0)
+#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \
+			      sizeof(struct ipl_block_ccw))
+
+#define IPL_MAX_SUPPORTED_VERSION (0)
 
 /*
  * IPL validity flags and parameters as detected in head.S
@@ -108,12 +128,14 @@ struct ipl_parameter_block {
 extern u32 ipl_parameter_flags;
 extern u16 ipl_devno;
 
+void do_reipl(void);
+
 #define IPL_DEVNO_VALID		(ipl_parameter_flags & 1)
 #define IPL_PARMBLOCK_VALID	(ipl_parameter_flags & 2)
 
 #define IPL_PARMBLOCK_START	((struct ipl_parameter_block *) \
 				 IPL_PARMBLOCK_ORIGIN)
-#define IPL_PARMBLOCK_SIZE	(IPL_PARMBLOCK_START->hdr.length)
+#define IPL_PARMBLOCK_SIZE	(IPL_PARMBLOCK_START->hdr.len)
 
 #else /* __ASSEMBLY__ */
 
-- 
GitLab


From 39b083fe1c3c7b88939f6fa1b0b96e579f12e96f Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:51 +0200
Subject: [PATCH 0233/1063] [S390] empty function defines.

Use do { } while (0) constructs instead of empty defines to avoid
subtle compile bugs.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio.h      | 16 ++++++++--------
 drivers/s390/scsi/zfcp_def.h |  8 ++++----
 include/asm-s390/dma.h       |  2 +-
 include/asm-s390/io.h        |  2 +-
 include/asm-s390/smp.h       |  2 +-
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index ceb3ab31ee088..124569362f022 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -191,49 +191,49 @@ enum qdio_irq_states {
 #if QDIO_VERBOSE_LEVEL>8
 #define QDIO_PRINT_STUPID(x...) printk( KERN_DEBUG QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_STUPID(x...)
+#define QDIO_PRINT_STUPID(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>7
 #define QDIO_PRINT_ALL(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_ALL(x...)
+#define QDIO_PRINT_ALL(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>6
 #define QDIO_PRINT_INFO(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_INFO(x...)
+#define QDIO_PRINT_INFO(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>5
 #define QDIO_PRINT_WARN(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_WARN(x...)
+#define QDIO_PRINT_WARN(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>4
 #define QDIO_PRINT_ERR(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_ERR(x...)
+#define QDIO_PRINT_ERR(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>3
 #define QDIO_PRINT_CRIT(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_CRIT(x...)
+#define QDIO_PRINT_CRIT(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>2
 #define QDIO_PRINT_ALERT(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_ALERT(x...)
+#define QDIO_PRINT_ALERT(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>1
 #define QDIO_PRINT_EMERG(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_EMERG(x...)
+#define QDIO_PRINT_EMERG(x...) do { } while (0)
 #endif
 
 #define HEXDUMP16(importance,header,ptr) \
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 94d1b74db356e..7c84b3d4bd94f 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -543,7 +543,7 @@ do { \
 } while (0)
 	
 #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_NORMAL
-# define ZFCP_LOG_NORMAL(fmt, args...)
+# define ZFCP_LOG_NORMAL(fmt, args...)	do { } while (0)
 #else
 # define ZFCP_LOG_NORMAL(fmt, args...) \
 do { \
@@ -553,7 +553,7 @@ do { \
 #endif
 
 #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_INFO
-# define ZFCP_LOG_INFO(fmt, args...)
+# define ZFCP_LOG_INFO(fmt, args...)	do { } while (0)
 #else
 # define ZFCP_LOG_INFO(fmt, args...) \
 do { \
@@ -563,14 +563,14 @@ do { \
 #endif
 
 #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_DEBUG
-# define ZFCP_LOG_DEBUG(fmt, args...)
+# define ZFCP_LOG_DEBUG(fmt, args...)	do { } while (0)
 #else
 # define ZFCP_LOG_DEBUG(fmt, args...) \
 	ZFCP_LOG(ZFCP_LOG_LEVEL_DEBUG, fmt , ##args)
 #endif
 
 #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_TRACE
-# define ZFCP_LOG_TRACE(fmt, args...)
+# define ZFCP_LOG_TRACE(fmt, args...)	do { } while (0)
 #else
 # define ZFCP_LOG_TRACE(fmt, args...) \
 	ZFCP_LOG(ZFCP_LOG_LEVEL_TRACE, fmt , ##args)
diff --git a/include/asm-s390/dma.h b/include/asm-s390/dma.h
index 02720c449cd82..7425c6af6cd4c 100644
--- a/include/asm-s390/dma.h
+++ b/include/asm-s390/dma.h
@@ -11,6 +11,6 @@
 
 #define MAX_DMA_ADDRESS         0x80000000
 
-#define free_dma(x)
+#define free_dma(x)	do { } while (0)
 
 #endif /* _ASM_DMA_H */
diff --git a/include/asm-s390/io.h b/include/asm-s390/io.h
index d4614b35f4232..a6cc27e770074 100644
--- a/include/asm-s390/io.h
+++ b/include/asm-s390/io.h
@@ -116,7 +116,7 @@ extern void iounmap(void *addr);
 #define outb(x,addr) ((void) writeb(x,addr))
 #define outb_p(x,addr) outb(x,addr)
 
-#define mmiowb()
+#define mmiowb()	do { } while (0)
 
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h
index 657646054c5ed..9fb02e9779c9f 100644
--- a/include/asm-s390/smp.h
+++ b/include/asm-s390/smp.h
@@ -104,7 +104,7 @@ smp_call_function_on(void (*func) (void *info), void *info,
 #define smp_cpu_not_running(cpu)	1
 #define smp_get_cpu(cpu) ({ 0; })
 #define smp_put_cpu(cpu) ({ 0; })
-#define smp_setup_cpu_possible_map()
+#define smp_setup_cpu_possible_map()	do { } while (0)
 #endif
 
 #endif
-- 
GitLab


From 8427082a506f7ae0abf82ce0047a045ec4309e59 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:54 +0200
Subject: [PATCH 0234/1063] [S390] fix syscall restart handling.

If do_signal() gets called several times before returning to user space
and no signal is pending (e.g. cancelled by a debugger) syscall restart
handling could be done several times. This would change the user space
PSW to an address prior to the syscall instruction.
Fix this by making sure that syscall restart handling is only done once.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/signal.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index a887b686f2792..dd05423f87a84 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -457,6 +457,7 @@ void do_signal(struct pt_regs *regs)
 		case -ERESTART_RESTARTBLOCK:
 			regs->gprs[2] = -EINTR;
 		}
+		regs->trap = -1;	/* Don't deal with this again. */
 	}
 
 	/* Get signal to deliver.  When running under ptrace, at this point
-- 
GitLab


From ba8ce5c6f0a15f08eae39880a0de296007f4a4e7 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:56 +0200
Subject: [PATCH 0235/1063] [S390] #undef in unistd.h

Avoid using #undef in unistd.h.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/asm-s390/unistd.h | 170 +++++++++++++-------------------------
 1 file changed, 59 insertions(+), 111 deletions(-)

diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h
index aa7a243862e1b..02b942d85c377 100644
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -25,17 +25,12 @@
 #define __NR_unlink              10
 #define __NR_execve              11
 #define __NR_chdir               12
-#define __NR_time                13
 #define __NR_mknod               14
 #define __NR_chmod               15
-#define __NR_lchown              16
 #define __NR_lseek               19
 #define __NR_getpid              20
 #define __NR_mount               21
 #define __NR_umount              22
-#define __NR_setuid              23
-#define __NR_getuid              24
-#define __NR_stime               25
 #define __NR_ptrace              26
 #define __NR_alarm               27
 #define __NR_pause               29
@@ -51,11 +46,7 @@
 #define __NR_pipe                42
 #define __NR_times               43
 #define __NR_brk                 45
-#define __NR_setgid              46
-#define __NR_getgid              47
 #define __NR_signal              48
-#define __NR_geteuid             49
-#define __NR_getegid             50
 #define __NR_acct                51
 #define __NR_umount2             52
 #define __NR_ioctl               54
@@ -69,18 +60,13 @@
 #define __NR_getpgrp             65
 #define __NR_setsid              66
 #define __NR_sigaction           67
-#define __NR_setreuid            70
-#define __NR_setregid            71
 #define __NR_sigsuspend          72
 #define __NR_sigpending          73
 #define __NR_sethostname         74
 #define __NR_setrlimit           75
-#define __NR_getrlimit           76
 #define __NR_getrusage           77
 #define __NR_gettimeofday        78
 #define __NR_settimeofday        79
-#define __NR_getgroups           80
-#define __NR_setgroups           81
 #define __NR_symlink             83
 #define __NR_readlink            85
 #define __NR_uselib              86
@@ -92,12 +78,10 @@
 #define __NR_truncate            92
 #define __NR_ftruncate           93
 #define __NR_fchmod              94
-#define __NR_fchown              95
 #define __NR_getpriority         96
 #define __NR_setpriority         97
 #define __NR_statfs              99
 #define __NR_fstatfs            100
-#define __NR_ioperm             101
 #define __NR_socketcall         102
 #define __NR_syslog             103
 #define __NR_setitimer          104
@@ -131,11 +115,7 @@
 #define __NR_sysfs              135
 #define __NR_personality        136
 #define __NR_afs_syscall        137 /* Syscall for Andrew File System */
-#define __NR_setfsuid           138
-#define __NR_setfsgid           139
-#define __NR__llseek            140
 #define __NR_getdents           141
-#define __NR__newselect         142
 #define __NR_flock              143
 #define __NR_msync              144
 #define __NR_readv              145
@@ -157,13 +137,9 @@
 #define __NR_sched_rr_get_interval      161
 #define __NR_nanosleep          162
 #define __NR_mremap             163
-#define __NR_setresuid          164
-#define __NR_getresuid          165
 #define __NR_query_module       167
 #define __NR_poll               168
 #define __NR_nfsservctl         169
-#define __NR_setresgid          170
-#define __NR_getresgid          171
 #define __NR_prctl              172
 #define __NR_rt_sigreturn       173
 #define __NR_rt_sigaction       174
@@ -174,7 +150,6 @@
 #define __NR_rt_sigsuspend      179
 #define __NR_pread64            180
 #define __NR_pwrite64           181
-#define __NR_chown              182
 #define __NR_getcwd             183
 #define __NR_capget             184
 #define __NR_capset             185
@@ -183,39 +158,11 @@
 #define __NR_getpmsg		188
 #define __NR_putpmsg		189
 #define __NR_vfork		190
-#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_lchown32		198
-#define __NR_getuid32		199
-#define __NR_getgid32		200
-#define __NR_geteuid32		201
-#define __NR_getegid32		202
-#define __NR_setreuid32		203
-#define __NR_setregid32		204
-#define __NR_getgroups32	205
-#define __NR_setgroups32	206
-#define __NR_fchown32		207
-#define __NR_setresuid32	208
-#define __NR_getresuid32	209
-#define __NR_setresgid32	210
-#define __NR_getresgid32	211
-#define __NR_chown32		212
-#define __NR_setuid32		213
-#define __NR_setgid32		214
-#define __NR_setfsuid32		215
-#define __NR_setfsgid32		216
 #define __NR_pivot_root         217
 #define __NR_mincore            218
 #define __NR_madvise            219
 #define __NR_getdents64		220
-#define __NR_fcntl64		221
 #define __NR_readahead		222
-#define __NR_sendfile64		223
 #define __NR_setxattr		224
 #define __NR_lsetxattr		225
 #define __NR_fsetxattr		226
@@ -256,7 +203,6 @@
 #define __NR_clock_getres	(__NR_timer_create+7)
 #define __NR_clock_nanosleep	(__NR_timer_create+8)
 /* Number 263 is reserved for vserver */
-#define __NR_fadvise64_64	264
 #define __NR_statfs64		265
 #define __NR_fstatfs64		266
 #define __NR_remap_file_pages	267
@@ -285,7 +231,6 @@
 #define __NR_mknodat		290
 #define __NR_fchownat		291
 #define __NR_futimesat		292
-#define __NR_fstatat64		293
 #define __NR_unlinkat		294
 #define __NR_renameat		295
 #define __NR_linkat		296
@@ -310,62 +255,65 @@
  * have a different name although they do the same (e.g. __NR_chown32
  * is __NR_chown on 64 bit).
  */
-#ifdef __s390x__
-#undef  __NR_time
-#undef  __NR_lchown
-#undef  __NR_setuid
-#undef  __NR_getuid
-#undef  __NR_stime
-#undef  __NR_setgid
-#undef  __NR_getgid
-#undef  __NR_geteuid
-#undef  __NR_getegid
-#undef  __NR_setreuid
-#undef  __NR_setregid
-#undef  __NR_getrlimit
-#undef  __NR_getgroups
-#undef  __NR_setgroups
-#undef  __NR_fchown
-#undef  __NR_ioperm
-#undef  __NR_setfsuid
-#undef  __NR_setfsgid
-#undef  __NR__llseek
-#undef  __NR__newselect
-#undef  __NR_setresuid
-#undef  __NR_getresuid
-#undef  __NR_setresgid
-#undef  __NR_getresgid
-#undef  __NR_chown
-#undef  __NR_ugetrlimit
-#undef  __NR_mmap2
-#undef  __NR_truncate64
-#undef  __NR_ftruncate64
-#undef  __NR_stat64
-#undef  __NR_lstat64
-#undef  __NR_fstat64
-#undef  __NR_lchown32
-#undef  __NR_getuid32
-#undef  __NR_getgid32
-#undef  __NR_geteuid32
-#undef  __NR_getegid32
-#undef  __NR_setreuid32
-#undef  __NR_setregid32
-#undef  __NR_getgroups32
-#undef  __NR_setgroups32
-#undef  __NR_fchown32
-#undef  __NR_setresuid32
-#undef  __NR_getresuid32
-#undef  __NR_setresgid32
-#undef  __NR_getresgid32
-#undef  __NR_chown32
-#undef  __NR_setuid32
-#undef  __NR_setgid32
-#undef  __NR_setfsuid32
-#undef  __NR_setfsgid32
-#undef  __NR_fcntl64
-#undef  __NR_sendfile64
-#undef  __NR_fadvise64_64
-#undef  __NR_fstatat64
+#ifndef __s390x__
+
+#define __NR_time		 13
+#define __NR_lchown		 16
+#define __NR_setuid		 23
+#define __NR_getuid		 24
+#define __NR_stime		 25
+#define __NR_setgid		 46
+#define __NR_getgid		 47
+#define __NR_geteuid		 49
+#define __NR_getegid		 50
+#define __NR_setreuid		 70
+#define __NR_setregid		 71
+#define __NR_getrlimit		 76
+#define __NR_getgroups		 80
+#define __NR_setgroups		 81
+#define __NR_fchown		 95
+#define __NR_ioperm		101
+#define __NR_setfsuid		138
+#define __NR_setfsgid		139
+#define __NR__llseek		140
+#define __NR__newselect 	142
+#define __NR_setresuid		164
+#define __NR_getresuid		165
+#define __NR_setresgid		170
+#define __NR_getresgid		171
+#define __NR_chown		182
+#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
+#define __NR_lchown32		198
+#define __NR_getuid32		199
+#define __NR_getgid32		200
+#define __NR_geteuid32		201
+#define __NR_getegid32		202
+#define __NR_setreuid32		203
+#define __NR_setregid32		204
+#define __NR_getgroups32	205
+#define __NR_setgroups32	206
+#define __NR_fchown32		207
+#define __NR_setresuid32	208
+#define __NR_getresuid32	209
+#define __NR_setresgid32	210
+#define __NR_getresgid32	211
+#define __NR_chown32		212
+#define __NR_setuid32		213
+#define __NR_setgid32		214
+#define __NR_setfsuid32		215
+#define __NR_setfsgid32		216
+#define __NR_fcntl64		221
+#define __NR_sendfile64		223
+#define __NR_fadvise64_64	264
+#define __NR_fstatat64		293
+
+#else
 
 #define __NR_select		142
 #define __NR_getrlimit		191	/* SuS compliant getrlimit */
-- 
GitLab


From 8301425534b87bae9990261f3008f39999be738c Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Sep 2006 15:58:58 +0200
Subject: [PATCH 0236/1063] [S390] architecture co-maintainer.

Add Heiko Carstens as co-maintainer for the s390 architecture.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index a34c53c087423..576a7f2308401 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2445,6 +2445,8 @@ S:      Maintained
 S390
 P:	Martin Schwidefsky
 M:	schwidefsky@de.ibm.com
+P:	Heiko Carstens
+M:	heiko.carstens@de.ibm.com
 M:	linux390@de.ibm.com
 L:	linux-390@vm.marist.edu
 W:	http://www.ibm.com/developerworks/linux/linux390/
-- 
GitLab


From 7d5d688f724dd5a651d1ce7bc3ea7c03d28137a1 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:00 +0200
Subject: [PATCH 0237/1063] [S390] Use simple_strtoul instead of own
 cmm_strtoul wrapper.

Fix compile warning with some configurations:

arch/s390/mm/cmm.c:58: warning: 'cmm_strtoul' defined but not used

Originally cmm_strtoul was introduced because simple_strtoul couldn't
handle strings with hexadecimal numbers that contained a capital 'X'.
Since this is no longer true cmm_strtoul can be removed.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/cmm.c | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index ceea51cff03be..786a44dba5bf8 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -52,22 +52,6 @@ static struct timer_list cmm_timer;
 static void cmm_timer_fn(unsigned long);
 static void cmm_set_timer(void);
 
-static long
-cmm_strtoul(const char *cp, char **endp)
-{
-	unsigned int base = 10;
-
-	if (*cp == '0') {
-		base = 8;
-		cp++;
-		if ((*cp == 'x' || *cp == 'X') && isxdigit(cp[1])) {
-			base = 16;
-			cp++;
-		}
-	}
-	return simple_strtoul(cp, endp, base);
-}
-
 static long
 cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list)
 {
@@ -276,7 +260,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
 			return -EFAULT;
 		buf[sizeof(buf) - 1] = '\0';
 		cmm_skip_blanks(buf, &p);
-		pages = cmm_strtoul(p, &p);
+		pages = simple_strtoul(p, &p, 0);
 		if (ctl == &cmm_table[0])
 			cmm_set_pages(pages);
 		else
@@ -317,9 +301,9 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
 			return -EFAULT;
 		buf[sizeof(buf) - 1] = '\0';
 		cmm_skip_blanks(buf, &p);
-		pages = cmm_strtoul(p, &p);
+		pages = simple_strtoul(p, &p, 0);
 		cmm_skip_blanks(p, &p);
-		seconds = cmm_strtoul(p, &p);
+		seconds = simple_strtoul(p, &p, 0);
 		cmm_set_timeout(pages, seconds);
 	} else {
 		len = sprintf(buf, "%ld %ld\n",
@@ -382,24 +366,24 @@ cmm_smsg_target(char *from, char *msg)
 	if (strncmp(msg, "SHRINK", 6) == 0) {
 		if (!cmm_skip_blanks(msg + 6, &msg))
 			return;
-		pages = cmm_strtoul(msg, &msg);
+		pages = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
 			cmm_set_pages(pages);
 	} else if (strncmp(msg, "RELEASE", 7) == 0) {
 		if (!cmm_skip_blanks(msg + 7, &msg))
 			return;
-		pages = cmm_strtoul(msg, &msg);
+		pages = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
 			cmm_add_timed_pages(pages);
 	} else if (strncmp(msg, "REUSE", 5) == 0) {
 		if (!cmm_skip_blanks(msg + 5, &msg))
 			return;
-		pages = cmm_strtoul(msg, &msg);
+		pages = simple_strtoul(msg, &msg, 0);
 		if (!cmm_skip_blanks(msg, &msg))
 			return;
-		seconds = cmm_strtoul(msg, &msg);
+		seconds = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
 			cmm_set_timeout(pages, seconds);
-- 
GitLab


From 47addc84b450fd5e391ab118e178645cb0bbd89d Mon Sep 17 00:00:00 2001
From: Frank Pavlic <fpavlic@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:03 +0200
Subject: [PATCH 0238/1063] [S390] qdio_get_micros return value.

qdio_get_micros is supposed to return microseconds. The get_clock()
return value needs to be shifted by 12 to get to microseconds.

Signed-off-by: Frank Pavlic <fpavlic@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c
index 7c93a8798d234..16e3715d5c0d7 100644
--- a/drivers/s390/cio/qdio.c
+++ b/drivers/s390/cio/qdio.c
@@ -115,7 +115,7 @@ qdio_min(int a,int b)
 static inline __u64 
 qdio_get_micros(void)
 {
-        return (get_clock() >> 10); /* time>>12 is microseconds */
+	return (get_clock() >> 12); /* time>>12 is microseconds */
 }
 
 /* 
-- 
GitLab


From a00bfd7147c0c5c04a59f7adcb0e6d8948b90a6e Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:05 +0200
Subject: [PATCH 0239/1063] [S390] dasd deadlock after state change pending
 interrupt.

The dasd_device_from_cdev function is called from interrupt context
to get the struct dasd_device associated with a ccw device. The
driver_data of the ccw device points to the dasd_devmap structure
which contains the pointer to the dasd_device structure. The lock
that protects the dasd_devmap structure is acquire with out irqsave.
To prevent the deadlock in dasd_device_from_cdev if it is called
from interrupt context the dependency to the dasd_devmap structure
needs to be removed. Let the driver_data of the ccw device point
to the dasd_device structure directly and use the ccw device lock
to protect the access.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd.c        |  4 +-
 drivers/s390/block/dasd_devmap.c | 74 +++++++++++++++++++++-----------
 drivers/s390/block/dasd_int.h    |  1 +
 3 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 25c1ef6dfd44e..3cd87f85f7024 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -893,7 +893,7 @@ dasd_handle_killed_request(struct ccw_device *cdev, unsigned long intparm)
 
 	device = (struct dasd_device *) cqr->device;
 	if (device == NULL ||
-	    device != dasd_device_from_cdev(cdev) ||
+	    device != dasd_device_from_cdev_locked(cdev) ||
 	    strncmp(device->discipline->ebcname, (char *) &cqr->magic, 4)) {
 		MESSAGE(KERN_DEBUG, "invalid device in request: bus_id %s",
 			cdev->dev.bus_id);
@@ -970,7 +970,7 @@ dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
 	/* first of all check for state change pending interrupt */
 	mask = DEV_STAT_ATTENTION | DEV_STAT_DEV_END | DEV_STAT_UNIT_EXCEP;
 	if ((irb->scsw.dstat & mask) == mask) {
-		device = dasd_device_from_cdev(cdev);
+		device = dasd_device_from_cdev_locked(cdev);
 		if (!IS_ERR(device)) {
 			dasd_handle_state_change_pending(device);
 			dasd_put_device(device);
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 9af02c79ce8af..80cf0999465a4 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -523,17 +523,17 @@ dasd_create_device(struct ccw_device *cdev)
 {
 	struct dasd_devmap *devmap;
 	struct dasd_device *device;
+	unsigned long flags;
 	int rc;
 
 	devmap = dasd_devmap_from_cdev(cdev);
 	if (IS_ERR(devmap))
 		return (void *) devmap;
-	cdev->dev.driver_data = devmap;
 
 	device = dasd_alloc_device();
 	if (IS_ERR(device))
 		return device;
-	atomic_set(&device->ref_count, 2);
+	atomic_set(&device->ref_count, 3);
 
 	spin_lock(&dasd_devmap_lock);
 	if (!devmap->device) {
@@ -552,6 +552,11 @@ dasd_create_device(struct ccw_device *cdev)
 		dasd_free_device(device);
 		return ERR_PTR(rc);
 	}
+
+	spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+	cdev->dev.driver_data = device;
+	spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+
 	return device;
 }
 
@@ -569,6 +574,7 @@ dasd_delete_device(struct dasd_device *device)
 {
 	struct ccw_device *cdev;
 	struct dasd_devmap *devmap;
+	unsigned long flags;
 
 	/* First remove device pointer from devmap. */
 	devmap = dasd_find_busid(device->cdev->dev.bus_id);
@@ -582,9 +588,16 @@ dasd_delete_device(struct dasd_device *device)
 	devmap->device = NULL;
 	spin_unlock(&dasd_devmap_lock);
 
-	/* Drop ref_count by 2, one for the devmap reference and
-	 * one for the passed reference. */
-	atomic_sub(2, &device->ref_count);
+	/* Disconnect dasd_device structure from ccw_device structure. */
+	spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
+	device->cdev->dev.driver_data = NULL;
+	spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
+
+	/*
+	 * Drop ref_count by 3, one for the devmap reference, one for
+	 * the cdev reference and one for the passed reference.
+	 */
+	atomic_sub(3, &device->ref_count);
 
 	/* Wait for reference counter to drop to zero. */
 	wait_event(dasd_delete_wq, atomic_read(&device->ref_count) == 0);
@@ -593,9 +606,6 @@ dasd_delete_device(struct dasd_device *device)
 	cdev = device->cdev;
 	device->cdev = NULL;
 
-	/* Disconnect dasd_devmap structure from ccw_device structure. */
-	cdev->dev.driver_data = NULL;
-
 	/* Put ccw_device structure. */
 	put_device(&cdev->dev);
 
@@ -613,23 +623,34 @@ dasd_put_device_wake(struct dasd_device *device)
 	wake_up(&dasd_delete_wq);
 }
 
+/*
+ * Return dasd_device structure associated with cdev.
+ * This function needs to be called with the ccw device
+ * lock held. It can be used from interrupt context.
+ */
+struct dasd_device *
+dasd_device_from_cdev_locked(struct ccw_device *cdev)
+{
+	struct dasd_device *device = cdev->dev.driver_data;
+
+	if (!device)
+		return ERR_PTR(-ENODEV);
+	dasd_get_device(device);
+	return device;
+}
+
 /*
  * Return dasd_device structure associated with cdev.
  */
 struct dasd_device *
 dasd_device_from_cdev(struct ccw_device *cdev)
 {
-	struct dasd_devmap *devmap;
 	struct dasd_device *device;
+	unsigned long flags;
 
-	device = ERR_PTR(-ENODEV);
-	spin_lock(&dasd_devmap_lock);
-	devmap = cdev->dev.driver_data;
-	if (devmap && devmap->device) {
-		device = devmap->device;
-		dasd_get_device(device);
-	}
-	spin_unlock(&dasd_devmap_lock);
+	spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+	device = dasd_device_from_cdev_locked(cdev);
+	spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
 	return device;
 }
 
@@ -730,16 +751,17 @@ static ssize_t
 dasd_discipline_show(struct device *dev, struct device_attribute *attr,
 		     char *buf)
 {
-	struct dasd_devmap *devmap;
-	char *dname;
+	struct dasd_device *device;
+	ssize_t len;
 
-	spin_lock(&dasd_devmap_lock);
-	dname = "none";
-	devmap = dev->driver_data;
-	if (devmap && devmap->device && devmap->device->discipline)
-		dname = devmap->device->discipline->name;
-	spin_unlock(&dasd_devmap_lock);
-	return snprintf(buf, PAGE_SIZE, "%s\n", dname);
+	device = dasd_device_from_cdev(to_ccwdev(dev));
+	if (!IS_ERR(device) && device->discipline) {
+		len = snprintf(buf, PAGE_SIZE, "%s\n",
+			       device->discipline->name);
+		dasd_put_device(device);
+	} else
+		len = snprintf(buf, PAGE_SIZE, "none\n");
+	return len;
 }
 
 static DEVICE_ATTR(discipline, 0444, dasd_discipline_show, NULL);
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 3ccf06d28ba11..9f52004f6fc2f 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -534,6 +534,7 @@ int dasd_add_sysfs_files(struct ccw_device *);
 void dasd_remove_sysfs_files(struct ccw_device *);
 
 struct dasd_device *dasd_device_from_cdev(struct ccw_device *);
+struct dasd_device *dasd_device_from_cdev_locked(struct ccw_device *);
 struct dasd_device *dasd_device_from_devindex(int);
 
 int dasd_parse(void);
-- 
GitLab


From b0035f127e007ea0afc8baad740093eb124f7b0b Mon Sep 17 00:00:00 2001
From: Horst Hummel <horst.hummel@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:07 +0200
Subject: [PATCH 0240/1063] [S390] dasd default debug level.

Enhanced default DBF level to get most important messages
in debug feature files.

Signed-off-by: Horst Hummel <horst.hummel@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 3cd87f85f7024..d0647d116eaa1 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -184,7 +184,7 @@ dasd_state_known_to_basic(struct dasd_device * device)
 	device->debug_area = debug_register(device->cdev->dev.bus_id, 1, 2,
 					    8 * sizeof (long));
 	debug_register_view(device->debug_area, &debug_sprintf_view);
-	debug_set_level(device->debug_area, DBF_EMERG);
+	debug_set_level(device->debug_area, DBF_WARNING);
 	DBF_DEV_EVENT(DBF_EMERG, device, "%s", "debug area created");
 
 	device->state = DASD_STATE_BASIC;
@@ -2169,7 +2169,7 @@ dasd_init(void)
 		goto failed;
 	}
 	debug_register_view(dasd_debug_area, &debug_sprintf_view);
-	debug_set_level(dasd_debug_area, DBF_EMERG);
+	debug_set_level(dasd_debug_area, DBF_WARNING);
 
 	DBF_EVENT(DBF_EMERG, "%s", "debug area created");
 
-- 
GitLab


From 0fee644ada12c524abbf723132fbea6a082ecfc2 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:10 +0200
Subject: [PATCH 0241/1063] [S390] cleanup sysinfo and add system z9 specific
 extensions.

With System z9 additional fields have been added to the output of the
store system information instruction. This patch adds the new model
information field and the alternate cpu capability fields to the
output of /proc/sysinfo. While we at it clean up the code as well.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/sysinfo.c | 455 +++++++++++++++++++++--------------------
 1 file changed, 231 insertions(+), 224 deletions(-)

diff --git a/drivers/s390/sysinfo.c b/drivers/s390/sysinfo.c
index d1c1e75bfd60d..1e788e815ce7f 100644
--- a/drivers/s390/sysinfo.c
+++ b/drivers/s390/sysinfo.c
@@ -11,19 +11,18 @@
 #include <linux/init.h>
 #include <asm/ebcdic.h>
 
-struct sysinfo_1_1_1
-{
+struct sysinfo_1_1_1 {
 	char reserved_0[32];
 	char manufacturer[16];
 	char type[4];
 	char reserved_1[12];
-	char model[16];
+	char model_capacity[16];
 	char sequence[16];
 	char plant[4];
+	char model[16];
 };
 
-struct sysinfo_1_2_1
-{
+struct sysinfo_1_2_1 {
 	char reserved_0[80];
 	char sequence[16];
 	char plant[4];
@@ -31,9 +30,12 @@ struct sysinfo_1_2_1
 	unsigned short cpu_address;
 };
 
-struct sysinfo_1_2_2
-{
-	char reserved_0[32];
+struct sysinfo_1_2_2 {
+	char format;
+	char reserved_0[1];
+	unsigned short acc_offset;
+	char reserved_1[24];
+	unsigned int secondary_capability;
 	unsigned int capability;
 	unsigned short cpus_total;
 	unsigned short cpus_configured;
@@ -42,8 +44,12 @@ struct sysinfo_1_2_2
 	unsigned short adjustment[0];
 };
 
-struct sysinfo_2_2_1
-{
+struct sysinfo_1_2_2_extension {
+	unsigned int alt_capability;
+	unsigned short alt_adjustment[0];
+};
+
+struct sysinfo_2_2_1 {
 	char reserved_0[80];
 	char sequence[16];
 	char plant[4];
@@ -51,15 +57,11 @@ struct sysinfo_2_2_1
 	unsigned short cpu_address;
 };
 
-struct sysinfo_2_2_2
-{
+struct sysinfo_2_2_2 {
 	char reserved_0[32];
 	unsigned short lpar_number;
 	char reserved_1;
 	unsigned char characteristics;
-	#define LPAR_CHAR_DEDICATED	(1 << 7)
-	#define LPAR_CHAR_SHARED	(1 << 6)
-	#define LPAR_CHAR_LIMITED	(1 << 5)
 	unsigned short cpus_total;
 	unsigned short cpus_configured;
 	unsigned short cpus_standby;
@@ -71,12 +73,14 @@ struct sysinfo_2_2_2
 	unsigned short cpus_shared;
 };
 
-struct sysinfo_3_2_2
-{
+#define LPAR_CHAR_DEDICATED	(1 << 7)
+#define LPAR_CHAR_SHARED	(1 << 6)
+#define LPAR_CHAR_LIMITED	(1 << 5)
+
+struct sysinfo_3_2_2 {
 	char reserved_0[31];
 	unsigned char count;
-	struct
-	{
+	struct {
 		char reserved_0[4];
 		unsigned short cpus_total;
 		unsigned short cpus_configured;
@@ -90,136 +94,223 @@ struct sysinfo_3_2_2
 	} vm[8];
 };
 
-union s390_sysinfo
+static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
 {
-	struct sysinfo_1_1_1 sysinfo_1_1_1;
-	struct sysinfo_1_2_1 sysinfo_1_2_1;
-	struct sysinfo_1_2_2 sysinfo_1_2_2;
-	struct sysinfo_2_2_1 sysinfo_2_2_1;
-	struct sysinfo_2_2_2 sysinfo_2_2_2;
-	struct sysinfo_3_2_2 sysinfo_3_2_2;
-};
-
-static inline int stsi (void *sysinfo, 
-                        int fc, int sel1, int sel2)
-{
-	int cc, retv;
-
-#ifndef CONFIG_64BIT
-	__asm__ __volatile__ (	"lr\t0,%2\n"
-				"\tlr\t1,%3\n"
-				"\tstsi\t0(%4)\n"
-				"0:\tipm\t%0\n"
-				"\tsrl\t%0,28\n"
-				"1:lr\t%1,0\n"
-				".section .fixup,\"ax\"\n"
-				"2:\tlhi\t%0,3\n"
-				"\tbras\t1,3f\n"
-				"\t.long 1b\n"
-				"3:\tl\t1,0(1)\n"
-				"\tbr\t1\n"
-				".previous\n"
-				".section __ex_table,\"a\"\n"
-				"\t.align 4\n"
-				"\t.long 0b,2b\n"
-				".previous\n"
-				: "=d" (cc), "=d" (retv)
-				: "d" ((fc << 28) | sel1), "d" (sel2), "a" (sysinfo) 
-				: "cc", "memory", "0", "1" );
-#else
-	__asm__ __volatile__ (	"lr\t0,%2\n"
-				"lr\t1,%3\n"
-				"\tstsi\t0(%4)\n"
-				"0:\tipm\t%0\n"
-				"\tsrl\t%0,28\n"
-				"1:lr\t%1,0\n"
-				".section .fixup,\"ax\"\n"
-				"2:\tlhi\t%0,3\n"
-				"\tjg\t1b\n"
-				".previous\n"
-				".section __ex_table,\"a\"\n"
-				"\t.align 8\n"
-				"\t.quad 0b,2b\n"
-				".previous\n"
-				: "=d" (cc), "=d" (retv)
-				: "d" ((fc << 28) | sel1), "d" (sel2), "a" (sysinfo) 
-				: "cc", "memory", "0", "1" );
-#endif
-
-	return cc? -1 : retv;
+	register int r0 asm("0") = (fc << 28) | sel1;
+	register int r1 asm("1") = sel2;
+
+	asm volatile(
+		"   stsi 0(%2)\n"
+		"0: jz   2f\n"
+		"1: lhi  %0,%3\n"
+		"2:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS)
+		: "cc", "memory" );
+	return r0;
 }
 
-static inline int stsi_0 (void)
+static inline int stsi_0(void)
 {
 	int rc = stsi (NULL, 0, 0, 0);
-	return rc == -1 ? rc : (((unsigned int)rc) >> 28);
+	return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28);
 }
 
-static inline int stsi_1_1_1 (struct sysinfo_1_1_1 *info)
+static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len)
 {
-	int rc = stsi (info, 1, 1, 1);
-	if (rc != -1)
-	{
-		EBCASC (info->manufacturer, sizeof(info->manufacturer));
-		EBCASC (info->type, sizeof(info->type));
-		EBCASC (info->model, sizeof(info->model));
-		EBCASC (info->sequence, sizeof(info->sequence));
-		EBCASC (info->plant, sizeof(info->plant));
-	}
-	return rc == -1 ? rc : 0;
+	if (stsi(info, 1, 1, 1) == -ENOSYS)
+		return len;
+
+	EBCASC(info->manufacturer, sizeof(info->manufacturer));
+	EBCASC(info->type, sizeof(info->type));
+	EBCASC(info->model, sizeof(info->model));
+	EBCASC(info->sequence, sizeof(info->sequence));
+	EBCASC(info->plant, sizeof(info->plant));
+	EBCASC(info->model_capacity, sizeof(info->model_capacity));
+	len += sprintf(page + len, "Manufacturer:         %-16.16s\n",
+		       info->manufacturer);
+	len += sprintf(page + len, "Type:                 %-4.4s\n",
+		       info->type);
+	if (info->model[0] != '\0')
+		/*
+		 * Sigh: the model field has been renamed with System z9
+		 * to model_capacity and a new model field has been added
+		 * after the plant field. To avoid confusing older programs
+		 * the "Model:" prints "model_capacity model" or just
+		 * "model_capacity" if the model string is empty .
+		 */
+		len += sprintf(page + len,
+			       "Model:                %-16.16s %-16.16s\n",
+			       info->model_capacity, info->model);
+	else
+		len += sprintf(page + len, "Model:                %-16.16s\n",
+			       info->model_capacity);
+	len += sprintf(page + len, "Sequence Code:        %-16.16s\n",
+		       info->sequence);
+	len += sprintf(page + len, "Plant:                %-4.4s\n",
+		       info->plant);
+	len += sprintf(page + len, "Model Capacity:       %-16.16s\n",
+		       info->model_capacity);
+	return len;
 }
 
-static inline int stsi_1_2_1 (struct sysinfo_1_2_1 *info)
+#if 0 /* Currently unused */
+static int stsi_1_2_1(struct sysinfo_1_2_1 *info, char *page, int len)
 {
-	int rc = stsi (info, 1, 2, 1);
-	if (rc != -1)
-	{
-		EBCASC (info->sequence, sizeof(info->sequence));
-		EBCASC (info->plant, sizeof(info->plant));
-	}
-	return rc == -1 ? rc : 0;
+	if (stsi(info, 1, 2, 1) == -ENOSYS)
+		return len;
+
+	len += sprintf(page + len, "\n");
+	EBCASC(info->sequence, sizeof(info->sequence));
+	EBCASC(info->plant, sizeof(info->plant));
+	len += sprintf(page + len, "Sequence Code of CPU: %-16.16s\n",
+		       info->sequence);
+	len += sprintf(page + len, "Plant of CPU:         %-16.16s\n",
+		       info->plant);
+	return len;
 }
+#endif
 
-static inline int stsi_1_2_2 (struct sysinfo_1_2_2 *info)
+static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len)
 {
-	int rc = stsi (info, 1, 2, 2);
-	return rc == -1 ? rc : 0;
+	struct sysinfo_1_2_2_extension *ext;
+	int i;
+
+	if (stsi(info, 1, 2, 2) == -ENOSYS)
+		return len;
+	ext = (struct sysinfo_1_2_2_extension *)
+		((unsigned long) info + info->acc_offset);
+
+	len += sprintf(page + len, "\n");
+	len += sprintf(page + len, "CPUs Total:           %d\n",
+		       info->cpus_total);
+	len += sprintf(page + len, "CPUs Configured:      %d\n",
+		       info->cpus_configured);
+	len += sprintf(page + len, "CPUs Standby:         %d\n",
+		       info->cpus_standby);
+	len += sprintf(page + len, "CPUs Reserved:        %d\n",
+		       info->cpus_reserved);
+
+	if (info->format == 1) {
+		/*
+		 * Sigh 2. According to the specification the alternate
+		 * capability field is a 32 bit floating point number
+		 * if the higher order 8 bits are not zero. Printing
+		 * a floating point number in the kernel is a no-no,
+		 * always print the number as 32 bit unsigned integer.
+		 * The user-space needs to know about the stange
+		 * encoding of the alternate cpu capability.
+		 */
+		len += sprintf(page + len, "Capability:           %u %u\n",
+			       info->capability, ext->alt_capability);
+		for (i = 2; i <= info->cpus_total; i++)
+			len += sprintf(page + len,
+				       "Adjustment %02d-way:    %u %u\n",
+				       i, info->adjustment[i-2],
+				       ext->alt_adjustment[i-2]);
+
+	} else {
+		len += sprintf(page + len, "Capability:           %u\n",
+			       info->capability);
+		for (i = 2; i <= info->cpus_total; i++)
+			len += sprintf(page + len,
+				       "Adjustment %02d-way:    %u\n",
+				       i, info->adjustment[i-2]);
+	}
+
+	if (info->secondary_capability != 0)
+		len += sprintf(page + len, "Secondary Capability: %d\n",
+			       info->secondary_capability);
+
+	return len;
 }
 
-static inline int stsi_2_2_1 (struct sysinfo_2_2_1 *info)
+#if 0 /* Currently unused */
+static int stsi_2_2_1(struct sysinfo_2_2_1 *info, char *page, int len)
 {
-	int rc = stsi (info, 2, 2, 1);
-	if (rc != -1)
-	{
-		EBCASC (info->sequence, sizeof(info->sequence));
-		EBCASC (info->plant, sizeof(info->plant));
-	}
-	return rc == -1 ? rc : 0;
+	if (stsi(info, 2, 2, 1) == -ENOSYS)
+		return len;
+
+	len += sprintf(page + len, "\n");
+	EBCASC (info->sequence, sizeof(info->sequence));
+	EBCASC (info->plant, sizeof(info->plant));
+	len += sprintf(page + len, "Sequence Code of logical CPU: %-16.16s\n",
+		       info->sequence);
+	len += sprintf(page + len, "Plant of logical CPU: %-16.16s\n",
+		       info->plant);
+	return len;
 }
+#endif
 
-static inline int stsi_2_2_2 (struct sysinfo_2_2_2 *info)
+static int stsi_2_2_2(struct sysinfo_2_2_2 *info, char *page, int len)
 {
-	int rc = stsi (info, 2, 2, 2);
-	if (rc != -1)
-	{
-		EBCASC (info->name, sizeof(info->name));
-  	}
-	return rc == -1 ? rc : 0;
+	if (stsi(info, 2, 2, 2) == -ENOSYS)
+		return len;
+
+	EBCASC (info->name, sizeof(info->name));
+
+	len += sprintf(page + len, "\n");
+	len += sprintf(page + len, "LPAR Number:          %d\n",
+		       info->lpar_number);
+
+	len += sprintf(page + len, "LPAR Characteristics: ");
+	if (info->characteristics & LPAR_CHAR_DEDICATED)
+		len += sprintf(page + len, "Dedicated ");
+	if (info->characteristics & LPAR_CHAR_SHARED)
+		len += sprintf(page + len, "Shared ");
+	if (info->characteristics & LPAR_CHAR_LIMITED)
+		len += sprintf(page + len, "Limited ");
+	len += sprintf(page + len, "\n");
+
+	len += sprintf(page + len, "LPAR Name:            %-8.8s\n",
+		       info->name);
+
+	len += sprintf(page + len, "LPAR Adjustment:      %d\n",
+		       info->caf);
+
+	len += sprintf(page + len, "LPAR CPUs Total:      %d\n",
+		       info->cpus_total);
+	len += sprintf(page + len, "LPAR CPUs Configured: %d\n",
+		       info->cpus_configured);
+	len += sprintf(page + len, "LPAR CPUs Standby:    %d\n",
+		       info->cpus_standby);
+	len += sprintf(page + len, "LPAR CPUs Reserved:   %d\n",
+		       info->cpus_reserved);
+	len += sprintf(page + len, "LPAR CPUs Dedicated:  %d\n",
+		       info->cpus_dedicated);
+	len += sprintf(page + len, "LPAR CPUs Shared:     %d\n",
+		       info->cpus_shared);
+	return len;
 }
 
-static inline int stsi_3_2_2 (struct sysinfo_3_2_2 *info)
+static int stsi_3_2_2(struct sysinfo_3_2_2 *info, char *page, int len)
 {
-	int rc = stsi (info, 3, 2, 2);
-	if (rc != -1)
-	{
-		int i;
-		for (i = 0; i < info->count; i++)
-		{
-			EBCASC (info->vm[i].name, sizeof(info->vm[i].name));
-			EBCASC (info->vm[i].cpi, sizeof(info->vm[i].cpi));
-		}
+	int i;
+
+	if (stsi(info, 3, 2, 2) == -ENOSYS)
+		return len;
+	for (i = 0; i < info->count; i++) {
+		EBCASC (info->vm[i].name, sizeof(info->vm[i].name));
+		EBCASC (info->vm[i].cpi, sizeof(info->vm[i].cpi));
+		len += sprintf(page + len, "\n");
+		len += sprintf(page + len, "VM%02d Name:            %-8.8s\n",
+			       i, info->vm[i].name);
+		len += sprintf(page + len, "VM%02d Control Program: %-16.16s\n",
+			       i, info->vm[i].cpi);
+
+		len += sprintf(page + len, "VM%02d Adjustment:      %d\n",
+			       i, info->vm[i].caf);
+
+		len += sprintf(page + len, "VM%02d CPUs Total:      %d\n",
+			       i, info->vm[i].cpus_total);
+		len += sprintf(page + len, "VM%02d CPUs Configured: %d\n",
+			       i, info->vm[i].cpus_configured);
+		len += sprintf(page + len, "VM%02d CPUs Standby:    %d\n",
+			       i, info->vm[i].cpus_standby);
+		len += sprintf(page + len, "VM%02d CPUs Reserved:   %d\n",
+			       i, info->vm[i].cpus_reserved);
 	}
-	return rc == -1 ? rc : 0;
+	return len;
 }
 
 
@@ -227,118 +318,34 @@ static int proc_read_sysinfo(char *page, char **start,
                              off_t off, int count,
                              int *eof, void *data)
 {
-	unsigned long info_page = get_zeroed_page (GFP_KERNEL); 
-	union s390_sysinfo *info = (union s390_sysinfo *) info_page;
-	int len = 0;
-	int level;
-	int i;
+	unsigned long info = get_zeroed_page (GFP_KERNEL);
+	int level, len;
 	
 	if (!info)
 		return 0;
 
-	level = stsi_0 ();
-
-	if (level >= 1 && stsi_1_1_1 (&info->sysinfo_1_1_1) == 0)
-	{
-		len += sprintf (page+len, "Manufacturer:         %-16.16s\n",
-				info->sysinfo_1_1_1.manufacturer);
-		len += sprintf (page+len, "Type:                 %-4.4s\n",
-				info->sysinfo_1_1_1.type);
-		len += sprintf (page+len, "Model:                %-16.16s\n",
-				info->sysinfo_1_1_1.model);
-		len += sprintf (page+len, "Sequence Code:        %-16.16s\n",
-				info->sysinfo_1_1_1.sequence);
-		len += sprintf (page+len, "Plant:                %-4.4s\n",
-				info->sysinfo_1_1_1.plant);
-	}
-
-	if (level >= 1 && stsi_1_2_2 (&info->sysinfo_1_2_2) == 0)
-	{
-		len += sprintf (page+len, "\n");
-		len += sprintf (page+len, "CPUs Total:           %d\n",
-				info->sysinfo_1_2_2.cpus_total);
-		len += sprintf (page+len, "CPUs Configured:      %d\n",
-				info->sysinfo_1_2_2.cpus_configured);
-		len += sprintf (page+len, "CPUs Standby:         %d\n",
-				info->sysinfo_1_2_2.cpus_standby);
-		len += sprintf (page+len, "CPUs Reserved:        %d\n",
-				info->sysinfo_1_2_2.cpus_reserved);
-	
-		len += sprintf (page+len, "Capability:           %d\n",
-				info->sysinfo_1_2_2.capability);
+	len = 0;
+	level = stsi_0();
+	if (level >= 1)
+		len = stsi_1_1_1((struct sysinfo_1_1_1 *) info, page, len);
 
-		for (i = 2; i <= info->sysinfo_1_2_2.cpus_total; i++)
-			len += sprintf (page+len, "Adjustment %02d-way:    %d\n",
-					i, info->sysinfo_1_2_2.adjustment[i-2]);
-	}
+	if (level >= 1)
+		len = stsi_1_2_2((struct sysinfo_1_2_2 *) info, page, len);
 
-	if (level >= 2 && stsi_2_2_2 (&info->sysinfo_2_2_2) == 0)
-	{
-		len += sprintf (page+len, "\n");
-		len += sprintf (page+len, "LPAR Number:          %d\n",
-				info->sysinfo_2_2_2.lpar_number);
-
-		len += sprintf (page+len, "LPAR Characteristics: ");
-		if (info->sysinfo_2_2_2.characteristics & LPAR_CHAR_DEDICATED)
-			len += sprintf (page+len, "Dedicated ");
-		if (info->sysinfo_2_2_2.characteristics & LPAR_CHAR_SHARED)
-			len += sprintf (page+len, "Shared ");
-		if (info->sysinfo_2_2_2.characteristics & LPAR_CHAR_LIMITED)
-			len += sprintf (page+len, "Limited ");
-		len += sprintf (page+len, "\n");
-	
-		len += sprintf (page+len, "LPAR Name:            %-8.8s\n",
-				info->sysinfo_2_2_2.name);
-	
-		len += sprintf (page+len, "LPAR Adjustment:      %d\n",
-				info->sysinfo_2_2_2.caf);
-	
-		len += sprintf (page+len, "LPAR CPUs Total:      %d\n",
-				info->sysinfo_2_2_2.cpus_total);
-		len += sprintf (page+len, "LPAR CPUs Configured: %d\n",
-				info->sysinfo_2_2_2.cpus_configured);
-		len += sprintf (page+len, "LPAR CPUs Standby:    %d\n",
-				info->sysinfo_2_2_2.cpus_standby);
-		len += sprintf (page+len, "LPAR CPUs Reserved:   %d\n",
-				info->sysinfo_2_2_2.cpus_reserved);
-		len += sprintf (page+len, "LPAR CPUs Dedicated:  %d\n",
-				info->sysinfo_2_2_2.cpus_dedicated);
-		len += sprintf (page+len, "LPAR CPUs Shared:     %d\n",
-				info->sysinfo_2_2_2.cpus_shared);
-	}
+	if (level >= 2)
+		len = stsi_2_2_2((struct sysinfo_2_2_2 *) info, page, len);
 
-	if (level >= 3 && stsi_3_2_2 (&info->sysinfo_3_2_2) == 0)
-	{
-		for (i = 0; i < info->sysinfo_3_2_2.count; i++)
-		{
-			len += sprintf (page+len, "\n");
-			len += sprintf (page+len, "VM%02d Name:            %-8.8s\n",
-					i, info->sysinfo_3_2_2.vm[i].name);
-			len += sprintf (page+len, "VM%02d Control Program: %-16.16s\n",
-					i, info->sysinfo_3_2_2.vm[i].cpi);
-	
-			len += sprintf (page+len, "VM%02d Adjustment:      %d\n",
-					i, info->sysinfo_3_2_2.vm[i].caf);
-	
-			len += sprintf (page+len, "VM%02d CPUs Total:      %d\n",
-					i, info->sysinfo_3_2_2.vm[i].cpus_total);
-			len += sprintf (page+len, "VM%02d CPUs Configured: %d\n",
-					i, info->sysinfo_3_2_2.vm[i].cpus_configured);
-			len += sprintf (page+len, "VM%02d CPUs Standby:    %d\n",
-					i, info->sysinfo_3_2_2.vm[i].cpus_standby);
-			len += sprintf (page+len, "VM%02d CPUs Reserved:   %d\n",
-					i, info->sysinfo_3_2_2.vm[i].cpus_reserved);
-		}
-	}
+	if (level >= 3)
+		len = stsi_3_2_2((struct sysinfo_3_2_2 *) info, page, len);
 
-	free_page (info_page);
+	free_page (info);
         return len;
 }
 
 static __init int create_proc_sysinfo(void)
 {
-	create_proc_read_entry ("sysinfo", 0444, NULL, 
-				proc_read_sysinfo, NULL);
+	create_proc_read_entry("sysinfo", 0444, NULL,
+			       proc_read_sysinfo, NULL);
 	return 0;
 }
 
-- 
GitLab


From 1375fc1fb0434a26f93c59b1b9f3fdb8bf90bba5 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:12 +0200
Subject: [PATCH 0242/1063] [S390] __exit cleanup.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/hypfs/hypfs_diag.c   | 2 +-
 arch/s390/kernel/vmlinux.lds.S | 2 +-
 drivers/s390/block/dasd_eer.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index fee5aee605f6b..75144efbb92b6 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -535,7 +535,7 @@ __init int hypfs_diag_init(void)
 	return rc;
 }
 
-__exit void hypfs_diag_exit(void)
+void hypfs_diag_exit(void)
 {
 	diag224_delete_name_table();
 	diag204_free_buffer();
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index df0c16ab8e920..af9e69a030112 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -118,7 +118,7 @@ SECTIONS
 
   /* Sections to be discarded */
   /DISCARD/ : {
-	*(.exitcall.exit)
+	*(.exit.text) *(.exit.data) *(.exitcall.exit)
 	}
 
   /* Stabs debugging sections.  */
diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c
index da65f1b032f5a..e0bf30ebb2152 100644
--- a/drivers/s390/block/dasd_eer.c
+++ b/drivers/s390/block/dasd_eer.c
@@ -678,7 +678,7 @@ int __init dasd_eer_init(void)
 	return 0;
 }
 
-void __exit dasd_eer_exit(void)
+void dasd_eer_exit(void)
 {
 	WARN_ON(misc_deregister(&dasd_eer_dev) != 0);
 }
-- 
GitLab


From e87bfe51b5ca2db99dd680bbb1e8fe3c94b607df Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:15 +0200
Subject: [PATCH 0243/1063] [S390] convert some assembler to C.

Convert GET_IPL_DEVICE assembler macro to C function.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/head.S    | 59 --------------------------------------
 arch/s390/kernel/head31.S  | 43 ++++++++++++++++++---------
 arch/s390/kernel/head64.S  | 39 ++++++++++++++++---------
 arch/s390/kernel/ipl.c     |  4 +--
 drivers/s390/cio/cio.c     | 38 ++++++++++++++++++++++--
 include/asm-s390/lowcore.h |  1 +
 include/asm-s390/setup.h   |  8 ++++--
 7 files changed, 99 insertions(+), 93 deletions(-)

diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index a6e9bdb535915..0f1db268a8a96 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -481,65 +481,6 @@ start:
 	.byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7 
 	.byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
 
-.macro GET_IPL_DEVICE
-.Lget_ipl_device:
-	l     %r1,0xb8			# get sid
-	sll   %r1,15			# test if subchannel is enabled
-	srl   %r1,31
-	ltr   %r1,%r1
-	bz    2f-.LPG1(%r13)		# subchannel disabled
-	l     %r1,0xb8
-	la    %r5,.Lipl_schib-.LPG1(%r13)
-	stsch 0(%r5)		        # get schib of subchannel
-	bnz   2f-.LPG1(%r13)		# schib not available
-	tm    5(%r5),0x01		# devno valid?
-	bno   2f-.LPG1(%r13)
-	la    %r6,ipl_parameter_flags-.LPG1(%r13)
-	oi    3(%r6),0x01		# set flag
-	la    %r2,ipl_devno-.LPG1(%r13)
-	mvc   0(2,%r2),6(%r5)		# store devno
-	tm    4(%r5),0x80		# qdio capable device?
-	bno   2f-.LPG1(%r13)
-	oi    3(%r6),0x02		# set flag
-
-	# copy ipl parameters
-
-	lhi   %r0,4096
-	l     %r2,20(%r0)		# get address of parameter list
-	lhi   %r3,IPL_PARMBLOCK_ORIGIN
-	st    %r3,20(%r0)
-	lhi   %r4,1
-	cr    %r2,%r3			# start parameters < destination ?
-	jl    0f
-	lhi   %r1,1			# copy direction is upwards
-	j     1f
-0:	lhi   %r1,-1			# copy direction is downwards
-	ar    %r2,%r0
-	ar    %r3,%r0
-	ar    %r2,%r1
-	ar    %r3,%r1
-1:	mvc   0(1,%r3),0(%r2)		# finally copy ipl parameters
-	ar    %r3,%r1
-	ar    %r2,%r1
-	sr    %r0,%r4
-	jne   1b
-	b     2f-.LPG1(%r13)
-
-	.align 4
-.Lipl_schib:
-	.rept 13
-	.long 0
-	.endr
-
-	.globl ipl_parameter_flags
-ipl_parameter_flags:
-	.long 0
-	.globl ipl_devno
-ipl_devno:
-	.word 0
-2:
-.endm
-
 #ifdef CONFIG_64BIT
 #include "head64.S"
 #else
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index d8bb68a725273..1fa9fa1ca740a 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -37,13 +37,23 @@ startup:basr	%r13,0			# get base
 
 startup_continue:
 	basr	%r13,0			# get base
-.LPG1:	GET_IPL_DEVICE
-	mvi	__LC_AR_MODE_ID,0	# set ESA flag (mode 0)
+.LPG1:	mvi	__LC_AR_MODE_ID,0	# set ESA flag (mode 0)
 	lctl	%c0,%c15,.Lctl-.LPG1(%r13) # load control registers
 	l	%r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
 					# move IPL device to lowcore
 	mvc	__LC_IPLDEV(4),IPL_DEVICE-PARMAREA(%r12)
+#
+# Setup stack
+#
+	l	%r15,.Linittu-.LPG1(%r13)
+	mvc	__LC_CURRENT(4),__TI_task(%r15)
+	ahi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE
+	st	%r15,__LC_KERNEL_STACK	# set end of kernel stack
+	ahi	%r15,-96
+	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain
 
+	l	%r14,.Lipl_save_parameters-.LPG1(%r13)
+	basr	%r14,%r14
 #
 # clear bss memory
 #
@@ -115,6 +125,10 @@ startup_continue:
 	b	.Lfchunk-.LPG1(%r13)
 
 	.align 4
+.Lipl_save_parameters:
+	.long	ipl_save_parameters
+.Linittu:
+	.long	init_thread_union
 .Lpmask:
 	.byte	0
 .align 8
@@ -274,6 +288,20 @@ startup_continue:
 .Lbss_end:  .long _end
 .Lparmaddr: .long PARMAREA
 .Lsccbaddr: .long .Lsccb
+
+	.globl ipl_schib
+ipl_schib:
+	.rept 13
+	.long 0
+	.endr
+
+	.globl ipl_flags
+ipl_flags:
+	.long 0
+	.globl ipl_devno
+ipl_devno:
+	.word 0
+
 	.org	0x12000
 .globl s390_readinfo_sccb
 s390_readinfo_sccb:
@@ -305,16 +333,6 @@ s390_readinfo_sccb:
 	.globl	_stext
 _stext:	basr	%r13,0			# get base
 .LPG3:
-#
-# Setup stack
-#
-	l	%r15,.Linittu-.LPG3(%r13)
-	mvc	__LC_CURRENT(4),__TI_task(%r15)
-	ahi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE
-	st	%r15,__LC_KERNEL_STACK	# set end of kernel stack
-	ahi	%r15,-96
-	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain
-
 # check control registers
 	stctl	%c0,%c15,0(%r15)
 	oi	2(%r15),0x40		# enable sigp emergency signal
@@ -333,6 +351,5 @@ _stext:	basr	%r13,0			# get base
 #
 	.align	8
 .Ldw:	.long	0x000a0000,0x00000000
-.Linittu:.long	init_thread_union
 .Lstart:.long	start_kernel
 .Laregs:.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index c2005101fee1b..1ebaa338aa7e1 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -39,7 +39,6 @@ startup_continue:
 	basr  %r13,0			 # get base
 .LPG1:  sll   %r13,1                     # remove high order bit
         srl   %r13,1
-	GET_IPL_DEVICE
         lhi   %r1,1                      # mode 1 = esame
 	mvi   __LC_AR_MODE_ID,1		 # set esame flag
         slr   %r0,%r0                    # set cpuid to zero
@@ -49,7 +48,18 @@ startup_continue:
 	lg    %r12,.Lparmaddr-.LPG1(%r13)# pointer to parameter area
 					 # move IPL device to lowcore
         mvc   __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
+#
+# Setup stack
+#
+	larl  %r15,init_thread_union
+	lg    %r14,__TI_task(%r15)	# cache current in lowcore
+	stg   %r14,__LC_CURRENT
+	aghi  %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
+	stg   %r15,__LC_KERNEL_STACK	# set end of kernel stack
+	aghi  %r15,-160
+	xc    __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain
 
+	brasl %r14,ipl_save_parameters
 #
 # clear bss memory
 #
@@ -269,6 +279,19 @@ startup_continue:
 .Lparmaddr:
 	.quad	PARMAREA
 
+	.globl ipl_schib
+ipl_schib:
+	.rept 13
+	.long 0
+	.endr
+
+	.globl ipl_flags
+ipl_flags:
+	.long 0
+	.globl ipl_devno
+ipl_devno:
+	.word 0
+
 	.org	0x12000
 .globl s390_readinfo_sccb
 s390_readinfo_sccb:
@@ -300,24 +323,12 @@ s390_readinfo_sccb:
         .globl _stext
 _stext:	basr  %r13,0                    # get base
 .LPG3:
-#
-# Setup stack
-#
-	larl  %r15,init_thread_union
-	lg    %r14,__TI_task(%r15)      # cache current in lowcore
-	stg   %r14,__LC_CURRENT
-        aghi  %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
-        stg   %r15,__LC_KERNEL_STACK    # set end of kernel stack
-        aghi  %r15,-160
-        xc    __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain
-
 # check control registers
         stctg  %c0,%c15,0(%r15)
 	oi     6(%r15),0x40             # enable sigp emergency signal
 	oi     4(%r15),0x10             # switch on low address proctection
         lctlg  %c0,%c15,0(%r15)
 
-#
         lam    0,15,.Laregs-.LPG3(%r13) # load access regs needed by uaccess
         brasl  %r14,start_kernel        # go to C code
 #
@@ -325,7 +336,7 @@ _stext:	basr  %r13,0                    # get base
 #
         basr  %r13,0
 	lpswe .Ldw-.(%r13)           # load disabled wait psw
-#
+
             .align 8
 .Ldw:       .quad  0x0002000180000000,0x0000000000000000
 .Laregs:    .long  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 105ee15a2b316..6555cc48e28f8 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -189,9 +189,9 @@ static enum ipl_type ipl_get_type(void)
 {
 	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
 
-	if (!IPL_DEVNO_VALID)
+	if (!(ipl_flags & IPL_DEVNO_VALID))
 		return IPL_TYPE_UNKNOWN;
-	if (!IPL_PARMBLOCK_VALID)
+	if (!(ipl_flags & IPL_PARMBLOCK_VALID))
 		return IPL_TYPE_CCW;
 	if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION)
 		return IPL_TYPE_UNKNOWN;
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 050963f158025..61eb7caa1567b 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -16,11 +16,10 @@
 #include <linux/device.h>
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
-
 #include <asm/cio.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
-
+#include <asm/setup.h>
 #include "airq.h"
 #include "cio.h"
 #include "css.h"
@@ -909,3 +908,38 @@ void reipl_ccw_dev(struct ccw_dev_id *devid)
 	cio_reset_channel_paths();
 	do_reipl_asm(*((__u32*)&schid));
 }
+
+extern struct schib ipl_schib;
+
+/*
+ * ipl_save_parameters gets called very early. It is not allowed to access
+ * anything in the bss section at all. The bss section is not cleared yet,
+ * but may contain some ipl parameters written by the firmware.
+ * These parameters (if present) are copied to 0x2000.
+ * To avoid corruption of the ipl parameters, all variables used by this
+ * function must reside on the stack or in the data section.
+ */
+void ipl_save_parameters(void)
+{
+	struct subchannel_id schid;
+	unsigned int *ipl_ptr;
+	void *src, *dst;
+
+	schid = *(struct subchannel_id *)__LC_SUBCHANNEL_ID;
+	if (!schid.one)
+		return;
+	if (stsch(schid, &ipl_schib))
+		return;
+	if (!ipl_schib.pmcw.dnv)
+		return;
+	ipl_devno = ipl_schib.pmcw.dev;
+	ipl_flags |= IPL_DEVNO_VALID;
+	if (!ipl_schib.pmcw.qf)
+		return;
+	ipl_flags |= IPL_PARMBLOCK_VALID;
+	ipl_ptr = (unsigned int *)__LC_IPL_PARMBLOCK_PTR;
+	src = (void *)(unsigned long)*ipl_ptr;
+	dst = (void *)IPL_PARMBLOCK_ORIGIN;
+	memmove(dst, src, PAGE_SIZE);
+	*ipl_ptr = IPL_PARMBLOCK_ORIGIN;
+}
diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h
index 2e3d4cca5e211..18695d10dedfd 100644
--- a/include/asm-s390/lowcore.h
+++ b/include/asm-s390/lowcore.h
@@ -35,6 +35,7 @@
 #define __LC_IO_NEW_PSW                 0x01f0
 #endif /* !__s390x__ */
 
+#define __LC_IPL_PARMBLOCK_PTR		0x014
 #define __LC_EXT_PARAMS                 0x080
 #define __LC_CPU_ADDRESS                0x084
 #define __LC_EXT_INT_CODE               0x086
diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h
index 4a1126d8439a0..00c03e46689b1 100644
--- a/include/asm-s390/setup.h
+++ b/include/asm-s390/setup.h
@@ -125,13 +125,15 @@ struct ipl_parameter_block {
 /*
  * IPL validity flags and parameters as detected in head.S
  */
-extern u32 ipl_parameter_flags;
+extern u32 ipl_flags;
 extern u16 ipl_devno;
 
 void do_reipl(void);
 
-#define IPL_DEVNO_VALID		(ipl_parameter_flags & 1)
-#define IPL_PARMBLOCK_VALID	(ipl_parameter_flags & 2)
+enum {
+	IPL_DEVNO_VALID	= 1,
+	IPL_PARMBLOCK_VALID = 2,
+};
 
 #define IPL_PARMBLOCK_START	((struct ipl_parameter_block *) \
 				 IPL_PARMBLOCK_ORIGIN)
-- 
GitLab


From 81388d2a45b89c890b981cfc83b01ec15ae3483b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:17 +0200
Subject: [PATCH 0244/1063] [S390] Missing initialization in common i/o layer.

Previous patch that was intended to reduce stack usage within common
i/o layer didn't consider implicit memset(..., 0, ...) used with the
initializations used before.
Add these missing memsets wherever it's not obvious that the
concerned memory region is zeroed. This should give the same semantics
as before.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/device_fsm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 35e162ba6d54a..7756f324fb6f4 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -267,6 +267,7 @@ ccw_device_recog_done(struct ccw_device *cdev, int state)
 			notify = 1;
 		}
 		/* fill out sense information */
+		memset(&cdev->id, 0, sizeof(cdev->id));
 		cdev->id.cu_type   = cdev->private->senseid.cu_type;
 		cdev->id.cu_model  = cdev->private->senseid.cu_model;
 		cdev->id.dev_type  = cdev->private->senseid.dev_type;
-- 
GitLab


From 6981e936aa156c747bb3e6aea414bba673457115 Mon Sep 17 00:00:00 2001
From: Frank Pavlic <fpavlic@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:19 +0200
Subject: [PATCH 0245/1063] [S390] qdio slsb processing state.

The last SLSB has to be set to STATE_PROCESSING if we really want to
use the PROCESSING feature.

Signed-off-by: Frank Pavlic <fpavlic@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c
index 16e3715d5c0d7..cde822d8b5c82 100644
--- a/drivers/s390/cio/qdio.c
+++ b/drivers/s390/cio/qdio.c
@@ -1129,7 +1129,7 @@ qdio_get_inbound_buffer_frontier(struct qdio_q *q)
 
 #ifdef QDIO_USE_PROCESSING_STATE
 	if (last_position>=0)
-		set_slsb(q, &last_position, SLSB_P_INPUT_NOT_INIT, &count);
+		set_slsb(q, &last_position, SLSB_P_INPUT_PROCESSING, &count);
 #endif /* QDIO_USE_PROCESSING_STATE */
 
 	QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int));
-- 
GitLab


From 9514e2311be97a01e8669c4de78e9fea37489f09 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:22 +0200
Subject: [PATCH 0246/1063] [S390] Kernel stack overflow handling.

Substract the size of the initial stack frame from the correct
register. Otherwise we will end up in a program check loop.
Fix the offset into the save area as well.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry64.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 8b956d1538f55..29bbfbab73327 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -827,7 +827,7 @@ restart_go:
  */
 stack_overflow:
 	lg	%r15,__LC_PANIC_STACK	# change to panic stack
-	aghi	%r1,-SP_SIZE
+	aghi	%r15,-SP_SIZE
 	mvc	SP_PSW(16,%r15),0(%r12)	# move user PSW to stack
 	stmg	%r0,%r11,SP_R0(%r15)	# store gprs %r0-%r11 to kernel stack
 	la	%r1,__LC_SAVE_AREA
@@ -835,7 +835,7 @@ stack_overflow:
 	je	0f
 	chi	%r12,__LC_PGM_OLD_PSW
 	je	0f
-	la	%r1,__LC_SAVE_AREA+16
+	la	%r1,__LC_SAVE_AREA+32
 0:	mvc	SP_R12(32,%r15),0(%r1)  # move %r12-%r15 to stack
         xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain
         la      %r2,SP_PTREGS(%r15)	# load pt_regs
-- 
GitLab


From 45af3af8761a3f790fe414c017de039a08ccd780 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <cborntra@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:24 +0200
Subject: [PATCH 0247/1063] [S390] fix typo in vmcp.

Fix comment typo in vmcp, it is z/VM and not v/VM.

Signed-off-by: Christian Borntraeger <cborntra@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/char/vmcp.c | 2 +-
 drivers/s390/char/vmcp.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 19762f3476aad..1678b6c757ec7 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2004,2005 IBM Corporation
- * Interface implementation for communication with the v/VM control program
+ * Interface implementation for communication with the z/VM control program
  * Author(s): Christian Borntraeger <cborntra@de.ibm.com>
  *
  *
diff --git a/drivers/s390/char/vmcp.h b/drivers/s390/char/vmcp.h
index 87389e730465f..8a5975f3dad7c 100644
--- a/drivers/s390/char/vmcp.h
+++ b/drivers/s390/char/vmcp.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2004, 2005 IBM Corporation
- * Interface implementation for communication with the v/VM control program
+ * Interface implementation for communication with the z/VM control program
  * Version 1.0
  * Author(s): Christian Borntraeger <cborntra@de.ibm.com>
  *
-- 
GitLab


From 1f38d61347203055b55e34083cce7a9cd8c529a9 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <geraldsc@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:26 +0200
Subject: [PATCH 0248/1063] [S390] cleanup appldata.

Introduce asm header that contains the appldata data structures and
the diag inline assembly.

Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/appldata/appldata.h      | 16 ------
 arch/s390/appldata/appldata_base.c | 81 ++++-----------------------
 arch/s390/appldata/appldata_os.c   |  1 +
 include/asm-s390/appldata.h        | 90 ++++++++++++++++++++++++++++++
 4 files changed, 103 insertions(+), 85 deletions(-)
 create mode 100644 include/asm-s390/appldata.h

diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h
index 71d65eb306504..0429481dea633 100644
--- a/arch/s390/appldata/appldata.h
+++ b/arch/s390/appldata/appldata.h
@@ -29,22 +29,6 @@
 #define CTL_APPLDATA_NET_SUM	2125
 #define CTL_APPLDATA_PROC	2126
 
-#ifndef CONFIG_64BIT
-
-#define APPLDATA_START_INTERVAL_REC 0x00	/* Function codes for */
-#define APPLDATA_STOP_REC	    0x01	/* DIAG 0xDC	  */
-#define APPLDATA_GEN_EVENT_RECORD   0x02
-#define APPLDATA_START_CONFIG_REC   0x03
-
-#else
-
-#define APPLDATA_START_INTERVAL_REC 0x80
-#define APPLDATA_STOP_REC	    0x81
-#define APPLDATA_GEN_EVENT_RECORD   0x82
-#define APPLDATA_START_CONFIG_REC   0x83
-
-#endif /* CONFIG_64BIT */
-
 #define P_INFO(x...)	printk(KERN_INFO MY_PRINT_NAME " info: " x)
 #define P_ERROR(x...)	printk(KERN_ERR MY_PRINT_NAME " error: " x)
 #define P_WARNING(x...)	printk(KERN_WARNING MY_PRINT_NAME " status: " x)
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index a0a94e0ef8d12..b69ed742f9817 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -14,20 +14,20 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/smp.h>
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/page-flags.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
 #include <linux/sysctl.h>
-#include <asm/timer.h>
-//#include <linux/kernel_stat.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/workqueue.h>
+#include <asm/appldata.h>
+#include <asm/timer.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/smp.h>
 
 #include "appldata.h"
 
@@ -39,34 +39,6 @@
 
 #define TOD_MICRO	0x01000			/* nr. of TOD clock units
 						   for 1 microsecond */
-
-/*
- * Parameter list for DIAGNOSE X'DC'
- */
-#ifndef CONFIG_64BIT
-struct appldata_parameter_list {
-	u16 diag;		/* The DIAGNOSE code X'00DC'          */
-	u8  function;		/* The function code for the DIAGNOSE */
-	u8  parlist_length;	/* Length of the parameter list       */
-	u32 product_id_addr;	/* Address of the 16-byte product ID  */
-	u16 reserved;
-	u16 buffer_length;	/* Length of the application data buffer  */
-	u32 buffer_addr;	/* Address of the application data buffer */
-};
-#else
-struct appldata_parameter_list {
-	u16 diag;
-	u8  function;
-	u8  parlist_length;
-	u32 unused01;
-	u16 reserved;
-	u16 buffer_length;
-	u32 unused02;
-	u64 product_id_addr;
-	u64 buffer_addr;
-};
-#endif /* CONFIG_64BIT */
-
 /*
  * /proc entries (sysctl)
  */
@@ -181,46 +153,17 @@ static void appldata_work_fn(void *data)
 int appldata_diag(char record_nr, u16 function, unsigned long buffer,
 			u16 length, char *mod_lvl)
 {
-	unsigned long ry;
-	struct appldata_product_id {
-		char prod_nr[7];			/* product nr. */
-		char prod_fn[2];			/* product function */
-		char record_nr;				/* record nr. */
-		char version_nr[2];			/* version */
-		char release_nr[2];			/* release */
-		char mod_lvl[2];			/* modification lvl. */
-	} appldata_product_id = {
-	/* all strings are EBCDIC, record_nr is byte */
+	struct appldata_product_id id = {
 		.prod_nr    = {0xD3, 0xC9, 0xD5, 0xE4,
-				0xE7, 0xD2, 0xD9},	/* "LINUXKR" */
-		.prod_fn    = {0xD5, 0xD3},		/* "NL" */
+			       0xE7, 0xD2, 0xD9},	/* "LINUXKR" */
+		.prod_fn    = 0xD5D3,			/* "NL" */
 		.record_nr  = record_nr,
-		.version_nr = {0xF2, 0xF6},		/* "26" */
-		.release_nr = {0xF0, 0xF1},		/* "01" */
-		.mod_lvl    = {mod_lvl[0], mod_lvl[1]},
-	};
-	struct appldata_parameter_list appldata_parameter_list = {
-				.diag = 0xDC,
-				.function = function,
-				.parlist_length =
-					sizeof(appldata_parameter_list),
-				.buffer_length = length,
-				.product_id_addr =
-					(unsigned long) &appldata_product_id,
-				.buffer_addr = virt_to_phys((void *) buffer)
+		.version_nr = 0xF2F6,			/* "26" */
+		.release_nr = 0xF0F1,			/* "01" */
+		.mod_lvl    = (mod_lvl[0]) << 8 | mod_lvl[1],
 	};
 
-	if (!MACHINE_IS_VM)
-		return -ENOSYS;
-	ry = -1;
-	asm volatile(
-			"diag %1,%0,0xDC\n\t"
-			: "=d" (ry)
-			: "d" (&appldata_parameter_list),
-			  "m" (appldata_parameter_list),
-			  "m" (appldata_product_id)
-			: "cc");
-	return (int) ry;
+	return appldata_asm(&id, function, (void *) buffer, length);
 }
 /************************ timer, work, DIAG <END> ****************************/
 
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 161acc5c8a1b8..76a15523ae9e0 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -16,6 +16,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/netdevice.h>
 #include <linux/sched.h>
+#include <asm/appldata.h>
 #include <asm/smp.h>
 
 #include "appldata.h"
diff --git a/include/asm-s390/appldata.h b/include/asm-s390/appldata.h
new file mode 100644
index 0000000000000..b1770703b7060
--- /dev/null
+++ b/include/asm-s390/appldata.h
@@ -0,0 +1,90 @@
+/*
+ * include/asm-s390/appldata.h
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ */
+
+#ifndef _ASM_S390_APPLDATA_H
+#define _ASM_S390_APPLDATA_H
+
+#include <asm/io.h>
+
+#ifndef CONFIG_64BIT
+
+#define APPLDATA_START_INTERVAL_REC	0x00	/* Function codes for */
+#define APPLDATA_STOP_REC		0x01	/* DIAG 0xDC	      */
+#define APPLDATA_GEN_EVENT_REC		0x02
+#define APPLDATA_START_CONFIG_REC	0x03
+
+/*
+ * Parameter list for DIAGNOSE X'DC'
+ */
+struct appldata_parameter_list {
+	u16 diag;		/* The DIAGNOSE code X'00DC'	      */
+	u8  function;		/* The function code for the DIAGNOSE */
+	u8  parlist_length;	/* Length of the parameter list       */
+	u32 product_id_addr;	/* Address of the 16-byte product ID  */
+	u16 reserved;
+	u16 buffer_length;	/* Length of the application data buffer  */
+	u32 buffer_addr;	/* Address of the application data buffer */
+} __attribute__ ((packed));
+
+#else /* CONFIG_64BIT */
+
+#define APPLDATA_START_INTERVAL_REC	0x80
+#define APPLDATA_STOP_REC		0x81
+#define APPLDATA_GEN_EVENT_REC		0x82
+#define APPLDATA_START_CONFIG_REC	0x83
+
+/*
+ * Parameter list for DIAGNOSE X'DC'
+ */
+struct appldata_parameter_list {
+	u16 diag;
+	u8  function;
+	u8  parlist_length;
+	u32 unused01;
+	u16 reserved;
+	u16 buffer_length;
+	u32 unused02;
+	u64 product_id_addr;
+	u64 buffer_addr;
+} __attribute__ ((packed));
+
+#endif /* CONFIG_64BIT */
+
+struct appldata_product_id {
+	char prod_nr[7];	/* product number */
+	u16  prod_fn;		/* product function */
+	u8   record_nr; 	/* record number */
+	u16  version_nr;	/* version */
+	u16  release_nr;	/* release */
+	u16  mod_lvl;		/* modification level */
+} __attribute__ ((packed));
+
+static inline int appldata_asm(struct appldata_product_id *id,
+			       unsigned short fn, void *buffer,
+			       unsigned short length)
+{
+	struct appldata_parameter_list parm_list;
+	int ry;
+
+	if (!MACHINE_IS_VM)
+		return -ENOSYS;
+	parm_list.diag = 0xdc;
+	parm_list.function = fn;
+	parm_list.parlist_length = sizeof(parm_list);
+	parm_list.buffer_length = length;
+	parm_list.product_id_addr = (unsigned long) id;
+	parm_list.buffer_addr = virt_to_phys(buffer);
+	asm volatile(
+		"diag %1,%0,0xdc"
+		: "=d" (ry)
+		: "d" (&parm_list), "m" (parm_list), "m" (*id)
+		: "cc");
+	return ry;
+}
+
+#endif /* _ASM_S390_APPLDATA_H */
-- 
GitLab


From 07d43ce6a2ba0bb914078c3b066a7a3bab57599d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:29 +0200
Subject: [PATCH 0249/1063] [S390] Remove kexec experimental flag.

Follow other architectures and remove kexec experimental flag.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 76122ce1e6cbc..b216ca659cdff 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -460,8 +460,7 @@ config S390_HYPFS_FS
 	  information in an s390 hypervisor environment.
 
 config KEXEC
-	bool "kexec system call (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	bool "kexec system call"
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
-- 
GitLab


From e620c4940002348417e8d317d65bc7b152646493 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <cborntra@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:32 +0200
Subject: [PATCH 0250/1063] [S390] xpram off by one error.

The xpram driver shows and uses 4096 bytes less than available.

Signed-off-by: Christian Borntraeger <cborntra@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/xpram.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index ca7d51f7eccc2..cab2c736683a0 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -453,7 +453,7 @@ static int __init xpram_init(void)
 		PRINT_WARN("No expanded memory available\n");
 		return -ENODEV;
 	}
-	xpram_pages = xpram_highest_page_index();
+	xpram_pages = xpram_highest_page_index() + 1;
 	PRINT_INFO("  %u pages expanded memory found (%lu KB).\n",
 		   xpram_pages, (unsigned long) xpram_pages*4);
 	rc = xpram_setup_sizes(xpram_pages);
-- 
GitLab


From 31b58088292c7f00f0b81088bfb557285b0b6247 Mon Sep 17 00:00:00 2001
From: Melissa Howland <melissah@us.ibm.com>
Date: Wed, 20 Sep 2006 15:59:34 +0200
Subject: [PATCH 0251/1063] [S390] Linux API for writing z/VM APPLDATA Monitor
 records.

This patch delivers a new Linux API in the form of a misc char
device that is useable from user space and allows write access
to the z/VM APPLDATA Monitor Records collected by the *MONITOR
System Service of z/VM.

Signed-off-by: Melissa Howland <melissah@us.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/defconfig           |   1 +
 drivers/s390/Kconfig          |   6 +
 drivers/s390/char/Makefile    |   1 +
 drivers/s390/char/monwriter.c | 292 ++++++++++++++++++++++++++++++++++
 include/asm-s390/Kbuild       |   2 +-
 include/asm-s390/monwriter.h  |  33 ++++
 6 files changed, 334 insertions(+), 1 deletion(-)
 create mode 100644 drivers/s390/char/monwriter.c
 create mode 100644 include/asm-s390/monwriter.h

diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index f1d4591eddbbd..35da53986b1b8 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -428,6 +428,7 @@ CONFIG_S390_TAPE_34XX=m
 # CONFIG_VMLOGRDR is not set
 # CONFIG_VMCP is not set
 # CONFIG_MONREADER is not set
+CONFIG_MONWRITER=m
 
 #
 # Cryptographic devices
diff --git a/drivers/s390/Kconfig b/drivers/s390/Kconfig
index bc4261e8b6060..ae89b9b887434 100644
--- a/drivers/s390/Kconfig
+++ b/drivers/s390/Kconfig
@@ -213,6 +213,12 @@ config MONREADER
 	help
 	  Character device driver for reading z/VM monitor service records
 
+config MONWRITER
+	tristate "API for writing z/VM monitor service records"
+	default "m"
+	help
+	  Character device driver for writing z/VM monitor service records
+
 endmenu
 
 menu "Cryptographic devices"
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 0c0162ff6c0c0..c3e97b4fc1860 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -28,3 +28,4 @@ obj-$(CONFIG_S390_TAPE) += tape.o tape_class.o
 obj-$(CONFIG_S390_TAPE_34XX) += tape_34xx.o
 obj-$(CONFIG_S390_TAPE_3590) += tape_3590.o
 obj-$(CONFIG_MONREADER) += monreader.o
+obj-$(CONFIG_MONWRITER) += monwriter.o
diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c
new file mode 100644
index 0000000000000..1e3939aeb8ab7
--- /dev/null
+++ b/drivers/s390/char/monwriter.c
@@ -0,0 +1,292 @@
+/*
+ * drivers/s390/char/monwriter.c
+ *
+ * Character device driver for writing z/VM *MONITOR service records.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Author(s): Melissa Howland <Melissa.Howland@us.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/ctype.h>
+#include <linux/poll.h>
+#include <asm/uaccess.h>
+#include <asm/ebcdic.h>
+#include <asm/io.h>
+#include <asm/appldata.h>
+#include <asm/monwriter.h>
+
+#define MONWRITE_MAX_DATALEN	4024
+
+static int mon_max_bufs = 255;
+
+struct mon_buf {
+	struct list_head list;
+	struct monwrite_hdr hdr;
+	int diag_done;
+	char *data;
+};
+
+struct mon_private {
+	struct list_head list;
+	struct monwrite_hdr hdr;
+	size_t hdr_to_read;
+	size_t data_to_read;
+	struct mon_buf *current_buf;
+	int mon_buf_count;
+};
+
+/*
+ * helper functions
+ */
+
+static int monwrite_diag(struct monwrite_hdr *myhdr, char *buffer, int fcn)
+{
+	struct appldata_product_id id;
+	int rc;
+
+	strcpy(id.prod_nr, "LNXAPPL");
+	id.prod_fn = myhdr->applid;
+	id.record_nr = myhdr->record_num;
+	id.version_nr = myhdr->version;
+	id.release_nr = myhdr->release;
+	id.mod_lvl = myhdr->mod_level;
+	rc = appldata_asm(&id, fcn, (void *) buffer, myhdr->datalen);
+	if (rc <= 0)
+		return rc;
+	if (rc == 5)
+		return -EPERM;
+	printk("DIAG X'DC' error with return code: %i\n", rc);
+	return -EINVAL;
+}
+
+static inline struct mon_buf *monwrite_find_hdr(struct mon_private *monpriv,
+						struct monwrite_hdr *monhdr)
+{
+	struct mon_buf *entry, *next;
+
+	list_for_each_entry_safe(entry, next, &monpriv->list, list)
+		if (entry->hdr.applid == monhdr->applid &&
+		    entry->hdr.record_num == monhdr->record_num &&
+		    entry->hdr.version == monhdr->version &&
+		    entry->hdr.release == monhdr->release &&
+		    entry->hdr.mod_level == monhdr->mod_level)
+			return entry;
+	return NULL;
+}
+
+static int monwrite_new_hdr(struct mon_private *monpriv)
+{
+	struct monwrite_hdr *monhdr = &monpriv->hdr;
+	struct mon_buf *monbuf;
+	int rc;
+
+	if (monhdr->datalen > MONWRITE_MAX_DATALEN ||
+	    monhdr->mon_function > MONWRITE_START_CONFIG ||
+	    monhdr->hdrlen != sizeof(struct monwrite_hdr))
+		return -EINVAL;
+	monbuf = monwrite_find_hdr(monpriv, monhdr);
+	if (monbuf) {
+		if (monhdr->mon_function == MONWRITE_STOP_INTERVAL) {
+			monhdr->datalen = monbuf->hdr.datalen;
+			rc = monwrite_diag(monhdr, monbuf->data,
+					   APPLDATA_STOP_REC);
+			list_del(&monbuf->list);
+			monpriv->mon_buf_count--;
+			kfree(monbuf->data);
+			kfree(monbuf);
+			monbuf = NULL;
+		}
+	} else {
+		if (monpriv->mon_buf_count >= mon_max_bufs)
+			return -ENOSPC;
+		monbuf = kzalloc(sizeof(struct mon_buf), GFP_KERNEL);
+		if (!monbuf)
+			return -ENOMEM;
+		monbuf->data = kzalloc(monbuf->hdr.datalen,
+				       GFP_KERNEL | GFP_DMA);
+		if (!monbuf->data) {
+			kfree(monbuf);
+			return -ENOMEM;
+		}
+		monbuf->hdr = *monhdr;
+		list_add_tail(&monbuf->list, &monpriv->list);
+		monpriv->mon_buf_count++;
+	}
+	monpriv->current_buf = monbuf;
+	return 0;
+}
+
+static int monwrite_new_data(struct mon_private *monpriv)
+{
+	struct monwrite_hdr *monhdr = &monpriv->hdr;
+	struct mon_buf *monbuf = monpriv->current_buf;
+	int rc = 0;
+
+	switch (monhdr->mon_function) {
+	case MONWRITE_START_INTERVAL:
+		if (!monbuf->diag_done) {
+			rc = monwrite_diag(monhdr, monbuf->data,
+					   APPLDATA_START_INTERVAL_REC);
+			monbuf->diag_done = 1;
+		}
+		break;
+	case MONWRITE_START_CONFIG:
+		if (!monbuf->diag_done) {
+			rc = monwrite_diag(monhdr, monbuf->data,
+					   APPLDATA_START_CONFIG_REC);
+			monbuf->diag_done = 1;
+		}
+		break;
+	case MONWRITE_GEN_EVENT:
+		rc = monwrite_diag(monhdr, monbuf->data,
+				   APPLDATA_GEN_EVENT_REC);
+		list_del(&monpriv->current_buf->list);
+		kfree(monpriv->current_buf->data);
+		kfree(monpriv->current_buf);
+		monpriv->current_buf = NULL;
+		break;
+	default:
+		/* monhdr->mon_function is checked in monwrite_new_hdr */
+		BUG();
+	}
+	return rc;
+}
+
+/*
+ * file operations
+ */
+
+static int monwrite_open(struct inode *inode, struct file *filp)
+{
+	struct mon_private *monpriv;
+
+	monpriv = kzalloc(sizeof(struct mon_private), GFP_KERNEL);
+	if (!monpriv)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&monpriv->list);
+	monpriv->hdr_to_read = sizeof(monpriv->hdr);
+	filp->private_data = monpriv;
+	return nonseekable_open(inode, filp);
+}
+
+static int monwrite_close(struct inode *inode, struct file *filp)
+{
+	struct mon_private *monpriv = filp->private_data;
+	struct mon_buf *entry, *next;
+
+	list_for_each_entry_safe(entry, next, &monpriv->list, list) {
+		if (entry->hdr.mon_function != MONWRITE_GEN_EVENT)
+			monwrite_diag(&entry->hdr, entry->data,
+				      APPLDATA_STOP_REC);
+		monpriv->mon_buf_count--;
+		list_del(&entry->list);
+		kfree(entry->data);
+		kfree(entry);
+	}
+	kfree(monpriv);
+	return 0;
+}
+
+static ssize_t monwrite_write(struct file *filp, const char __user *data,
+			      size_t count, loff_t *ppos)
+{
+	struct mon_private *monpriv = filp->private_data;
+	size_t len, written;
+	void *to;
+	int rc;
+
+	for (written = 0; written < count; ) {
+		if (monpriv->hdr_to_read) {
+			len = min(count - written, monpriv->hdr_to_read);
+			to = (char *) &monpriv->hdr +
+				sizeof(monpriv->hdr) - monpriv->hdr_to_read;
+			if (copy_from_user(to, data + written, len)) {
+				rc = -EFAULT;
+				goto out_error;
+			}
+			monpriv->hdr_to_read -= len;
+			written += len;
+			if (monpriv->hdr_to_read > 0)
+				continue;
+			rc = monwrite_new_hdr(monpriv);
+			if (rc)
+				goto out_error;
+			monpriv->data_to_read = monpriv->current_buf ?
+				monpriv->current_buf->hdr.datalen : 0;
+		}
+
+		if (monpriv->data_to_read) {
+			len = min(count - written, monpriv->data_to_read);
+			to = monpriv->current_buf->data +
+				monpriv->hdr.datalen - monpriv->data_to_read;
+			if (copy_from_user(to, data + written, len)) {
+				rc = -EFAULT;
+				goto out_error;
+			}
+			monpriv->data_to_read -= len;
+			written += len;
+			if (monpriv->data_to_read > 0)
+				continue;
+			rc = monwrite_new_data(monpriv);
+			if (rc)
+				goto out_error;
+		}
+		monpriv->hdr_to_read = sizeof(monpriv->hdr);
+	}
+	return written;
+
+out_error:
+	monpriv->data_to_read = 0;
+	monpriv->hdr_to_read = sizeof(struct monwrite_hdr);
+	return rc;
+}
+
+static struct file_operations monwrite_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = &monwrite_open,
+	.release = &monwrite_close,
+	.write	 = &monwrite_write,
+};
+
+static struct miscdevice mon_dev = {
+	.name	= "monwriter",
+	.fops	= &monwrite_fops,
+	.minor	= MISC_DYNAMIC_MINOR,
+};
+
+/*
+ * module init/exit
+ */
+
+static int __init mon_init(void)
+{
+	if (MACHINE_IS_VM)
+		return misc_register(&mon_dev);
+	else
+		return -ENODEV;
+}
+
+static void __exit mon_exit(void)
+{
+	WARN_ON(misc_deregister(&mon_dev) != 0);
+}
+
+module_init(mon_init);
+module_exit(mon_exit);
+
+module_param_named(max_bufs, mon_max_bufs, int, 0644);
+MODULE_PARM_DESC(max_bufs, "Maximum number of sample monitor data buffers"
+		 "that can be active at one time");
+
+MODULE_AUTHOR("Melissa Howland <Melissa.Howland@us.ibm.com>");
+MODULE_DESCRIPTION("Character device driver for writing z/VM "
+		   "APPLDATA monitor records.");
+MODULE_LICENSE("GPL");
diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild
index ed8955f49e476..979145026a293 100644
--- a/include/asm-s390/Kbuild
+++ b/include/asm-s390/Kbuild
@@ -1,4 +1,4 @@
 include include/asm-generic/Kbuild.asm
 
 unifdef-y += cmb.h debug.h
-header-y += dasd.h qeth.h tape390.h ucontext.h vtoc.h z90crypt.h
+header-y += dasd.h monwriter.h qeth.h tape390.h ucontext.h vtoc.h z90crypt.h
diff --git a/include/asm-s390/monwriter.h b/include/asm-s390/monwriter.h
new file mode 100644
index 0000000000000..f0cbf96c52e61
--- /dev/null
+++ b/include/asm-s390/monwriter.h
@@ -0,0 +1,33 @@
+/*
+ * include/asm-s390/monwriter.h
+ *
+ * Copyright (C) IBM Corp. 2006
+ * Character device driver for writing z/VM APPLDATA monitor records
+ * Version 1.0
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ *
+ */
+
+#ifndef _ASM_390_MONWRITER_H
+#define _ASM_390_MONWRITER_H
+
+/* mon_function values */
+#define MONWRITE_START_INTERVAL	0x00 /* start interval recording */
+#define MONWRITE_STOP_INTERVAL	0x01 /* stop interval or config recording */
+#define MONWRITE_GEN_EVENT	0x02 /* generate event record */
+#define MONWRITE_START_CONFIG	0x03 /* start configuration recording */
+
+/* the header the app uses in its write() data */
+struct monwrite_hdr {
+	unsigned char mon_function;
+	unsigned short applid;
+	unsigned char record_num;
+	unsigned short version;
+	unsigned short release;
+	unsigned short mod_level;
+	unsigned short datalen;
+	unsigned char hdrlen;
+
+} __attribute__((packed));
+
+#endif /* _ASM_390_MONWRITER_H */
-- 
GitLab


From 9282ed929758b82f448a40d3c17319d794970624 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <geraldsc@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:37 +0200
Subject: [PATCH 0252/1063] [S390] Cleanup in page table related code.

Changed and simplified some page table related #defines and code.

Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/init.c        |  36 +++++------
 include/asm-s390/pgalloc.h |  67 ++++++++++----------
 include/asm-s390/pgtable.h | 124 +++++++++++++++++--------------------
 3 files changed, 106 insertions(+), 121 deletions(-)

diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 6e6b6de777706..cfd9b8f7a5239 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -108,16 +108,23 @@ void __init paging_init(void)
         unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
         static const int ssm_mask = 0x04000000L;
 	unsigned long ro_start_pfn, ro_end_pfn;
+	unsigned long zones_size[MAX_NR_ZONES];
 
 	ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata);
 	ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);
 
+	memset(zones_size, 0, sizeof(zones_size));
+	zones_size[ZONE_DMA] = max_low_pfn;
+	free_area_init_node(0, &contig_page_data, zones_size,
+			    __pa(PAGE_OFFSET) >> PAGE_SHIFT,
+			    zholes_size);
+
 	/* unmap whole virtual address space */
 	
         pg_dir = swapper_pg_dir;
 
-	for (i=0;i<KERNEL_PGD_PTRS;i++) 
-	        pmd_clear((pmd_t*)pg_dir++);
+	for (i = 0; i < PTRS_PER_PGD; i++)
+		pmd_clear((pmd_t *) pg_dir++);
 		
 	/*
 	 * map whole physical memory to virtual memory (identity mapping) 
@@ -131,10 +138,7 @@ void __init paging_init(void)
                  */
 		pg_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
 
-                pg_dir->pgd0 =  (_PAGE_TABLE | __pa(pg_table));
-                pg_dir->pgd1 =  (_PAGE_TABLE | (__pa(pg_table)+1024));
-                pg_dir->pgd2 =  (_PAGE_TABLE | (__pa(pg_table)+2048));
-                pg_dir->pgd3 =  (_PAGE_TABLE | (__pa(pg_table)+3072));
+		pmd_populate_kernel(&init_mm, (pmd_t *) pg_dir, pg_table);
                 pg_dir++;
 
                 for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
@@ -143,8 +147,8 @@ void __init paging_init(void)
 			else
 				pte = pfn_pte(pfn, PAGE_KERNEL);
                         if (pfn >= max_low_pfn)
-                                pte_clear(&init_mm, 0, &pte);
-                        set_pte(pg_table, pte);
+				pte_val(pte) = _PAGE_TYPE_EMPTY;
+			set_pte(pg_table, pte);
                         pfn++;
                 }
         }
@@ -159,16 +163,6 @@ void __init paging_init(void)
 			     : : "m" (pgdir_k), "m" (ssm_mask));
 
         local_flush_tlb();
-
-	{
-		unsigned long zones_size[MAX_NR_ZONES];
-
-		memset(zones_size, 0, sizeof(zones_size));
-		zones_size[ZONE_DMA] = max_low_pfn;
-		free_area_init_node(0, &contig_page_data, zones_size,
-				    __pa(PAGE_OFFSET) >> PAGE_SHIFT,
-				    zholes_size);
-	}
         return;
 }
 
@@ -236,10 +230,8 @@ void __init paging_init(void)
 					pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
 				else
 					pte = pfn_pte(pfn, PAGE_KERNEL);
-                                if (pfn >= max_low_pfn) {
-                                        pte_clear(&init_mm, 0, &pte); 
-                                        continue;
-                                }
+				if (pfn >= max_low_pfn)
+					pte_val(pte) = _PAGE_TYPE_EMPTY;
                                 set_pte(pt_dir, pte);
                                 pfn++;
                         }
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index a78e853e0dd52..803bc7064418c 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -21,6 +21,16 @@
 
 extern void diag10(unsigned long addr);
 
+/*
+ * Page allocation orders.
+ */
+#ifndef __s390x__
+# define PGD_ALLOC_ORDER	1
+#else /* __s390x__ */
+# define PMD_ALLOC_ORDER	2
+# define PGD_ALLOC_ORDER	2
+#endif /* __s390x__ */
+
 /*
  * Allocate and free page tables. The xxx_kernel() versions are
  * used to allocate a kernel page table - this turns on ASN bits
@@ -29,30 +39,23 @@ extern void diag10(unsigned long addr);
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	pgd_t *pgd;
+	pgd_t *pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ALLOC_ORDER);
 	int i;
 
+	if (!pgd)
+		return NULL;
+	for (i = 0; i < PTRS_PER_PGD; i++)
 #ifndef __s390x__
-	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,1);
-        if (pgd != NULL)
-		for (i = 0; i < USER_PTRS_PER_PGD; i++)
-			pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE));
-#else /* __s390x__ */
-	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,2);
-        if (pgd != NULL)
-		for (i = 0; i < PTRS_PER_PGD; i++)
-			pgd_clear(pgd + i);
-#endif /* __s390x__ */
+		pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE));
+#else
+		pgd_clear(pgd + i);
+#endif
 	return pgd;
 }
 
 static inline void pgd_free(pgd_t *pgd)
 {
-#ifndef __s390x__
-        free_pages((unsigned long) pgd, 1);
-#else /* __s390x__ */
-        free_pages((unsigned long) pgd, 2);
-#endif /* __s390x__ */
+	free_pages((unsigned long) pgd, PGD_ALLOC_ORDER);
 }
 
 #ifndef __s390x__
@@ -68,20 +71,19 @@ static inline void pgd_free(pgd_t *pgd)
 #else /* __s390x__ */
 static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 {
-	pmd_t *pmd;
-        int i;
+	pmd_t *pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ALLOC_ORDER);
+	int i;
 
-	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, 2);
-	if (pmd != NULL) {
-		for (i=0; i < PTRS_PER_PMD; i++)
-			pmd_clear(pmd+i);
-	}
+	if (!pmd)
+		return NULL;
+	for (i=0; i < PTRS_PER_PMD; i++)
+		pmd_clear(pmd + i);
 	return pmd;
 }
 
 static inline void pmd_free (pmd_t *pmd)
 {
-	free_pages((unsigned long) pmd, 2);
+	free_pages((unsigned long) pmd, PMD_ALLOC_ORDER);
 }
 
 #define __pmd_free_tlb(tlb,pmd)			\
@@ -123,15 +125,14 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
 {
-	pte_t *pte;
-        int i;
-
-	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
-	if (pte != NULL) {
-		for (i=0; i < PTRS_PER_PTE; i++) {
-			pte_clear(mm, vmaddr, pte+i);
-			vmaddr += PAGE_SIZE;
-		}
+	pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	int i;
+
+	if (!pte)
+		return NULL;
+	for (i=0; i < PTRS_PER_PTE; i++) {
+		pte_clear(mm, vmaddr, pte + i);
+		vmaddr += PAGE_SIZE;
 	}
 	return pte;
 }
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 24312387fa244..1a07028d575e5 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -89,19 +89,6 @@ extern char empty_zero_page[PAGE_SIZE];
 # define PTRS_PER_PGD    2048
 #endif /* __s390x__ */
 
-/*
- * pgd entries used up by user/kernel:
- */
-#ifndef __s390x__
-# define USER_PTRS_PER_PGD  512
-# define USER_PGD_PTRS      512
-# define KERNEL_PGD_PTRS    512
-#else /* __s390x__ */
-# define USER_PTRS_PER_PGD  2048
-# define USER_PGD_PTRS      2048
-# define KERNEL_PGD_PTRS    2048
-#endif /* __s390x__ */
-
 #define FIRST_USER_ADDRESS  0
 
 #define pte_ERROR(e) \
@@ -216,12 +203,14 @@ extern char empty_zero_page[PAGE_SIZE];
 #define _PAGE_RO        0x200          /* HW read-only                     */
 #define _PAGE_INVALID   0x400          /* HW invalid                       */
 
-/* Mask and four different kinds of invalid pages. */
-#define _PAGE_INVALID_MASK	0x601
-#define _PAGE_INVALID_EMPTY	0x400
-#define _PAGE_INVALID_NONE	0x401
-#define _PAGE_INVALID_SWAP	0x600
-#define _PAGE_INVALID_FILE	0x601
+/* Mask and six different types of pages. */
+#define _PAGE_TYPE_MASK		0x601
+#define _PAGE_TYPE_EMPTY	0x400
+#define _PAGE_TYPE_NONE		0x401
+#define _PAGE_TYPE_SWAP		0x600
+#define _PAGE_TYPE_FILE		0x601
+#define _PAGE_TYPE_RO		0x200
+#define _PAGE_TYPE_RW		0x000
 
 #ifndef __s390x__
 
@@ -280,15 +269,14 @@ extern char empty_zero_page[PAGE_SIZE];
 #endif /* __s390x__ */
 
 /*
- * No mapping available
+ * Page protection definitions.
  */
-#define PAGE_NONE_SHARED  __pgprot(_PAGE_INVALID_NONE)
-#define PAGE_NONE_PRIVATE __pgprot(_PAGE_INVALID_NONE)
-#define PAGE_RO_SHARED	  __pgprot(_PAGE_RO)
-#define PAGE_RO_PRIVATE	  __pgprot(_PAGE_RO)
-#define PAGE_COPY	  __pgprot(_PAGE_RO)
-#define PAGE_SHARED	  __pgprot(0)
-#define PAGE_KERNEL	  __pgprot(0)
+#define PAGE_NONE	__pgprot(_PAGE_TYPE_NONE)
+#define PAGE_RO		__pgprot(_PAGE_TYPE_RO)
+#define PAGE_RW		__pgprot(_PAGE_TYPE_RW)
+
+#define PAGE_KERNEL	PAGE_RW
+#define PAGE_COPY	PAGE_RO
 
 /*
  * The S390 can't do page protection for execute, and considers that the
@@ -296,23 +284,23 @@ extern char empty_zero_page[PAGE_SIZE];
  * the closest we can get..
  */
          /*xwr*/
-#define __P000  PAGE_NONE_PRIVATE
-#define __P001  PAGE_RO_PRIVATE
-#define __P010  PAGE_COPY
-#define __P011  PAGE_COPY
-#define __P100  PAGE_RO_PRIVATE
-#define __P101  PAGE_RO_PRIVATE
-#define __P110  PAGE_COPY
-#define __P111  PAGE_COPY
-
-#define __S000  PAGE_NONE_SHARED
-#define __S001  PAGE_RO_SHARED
-#define __S010  PAGE_SHARED
-#define __S011  PAGE_SHARED
-#define __S100  PAGE_RO_SHARED
-#define __S101  PAGE_RO_SHARED
-#define __S110  PAGE_SHARED
-#define __S111  PAGE_SHARED
+#define __P000	PAGE_NONE
+#define __P001	PAGE_RO
+#define __P010	PAGE_RO
+#define __P011	PAGE_RO
+#define __P100	PAGE_RO
+#define __P101	PAGE_RO
+#define __P110	PAGE_RO
+#define __P111	PAGE_RO
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_RO
+#define __S010	PAGE_RW
+#define __S011	PAGE_RW
+#define __S100	PAGE_RO
+#define __S101	PAGE_RO
+#define __S110	PAGE_RW
+#define __S111	PAGE_RW
 
 /*
  * Certain architectures need to do special things when PTEs
@@ -377,18 +365,18 @@ static inline int pmd_bad(pmd_t pmd)
 
 static inline int pte_none(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_INVALID_MASK) == _PAGE_INVALID_EMPTY;
+	return (pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_EMPTY;
 }
 
 static inline int pte_present(pte_t pte)
 {
 	return !(pte_val(pte) & _PAGE_INVALID) ||
-		(pte_val(pte) & _PAGE_INVALID_MASK) == _PAGE_INVALID_NONE;
+		(pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_NONE;
 }
 
 static inline int pte_file(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_INVALID_MASK) == _PAGE_INVALID_FILE;
+	return (pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_FILE;
 }
 
 #define pte_same(a,b)	(pte_val(a) == pte_val(b))
@@ -461,7 +449,7 @@ static inline void pmd_clear(pmd_t * pmdp)
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	pte_val(*ptep) = _PAGE_INVALID_EMPTY;
+	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
 }
 
 /*
@@ -477,7 +465,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	/* Do not clobber _PAGE_INVALID_NONE pages!  */
+	/* Do not clobber _PAGE_TYPE_NONE pages!  */
 	if (!(pte_val(pte) & _PAGE_INVALID))
 		pte_val(pte) |= _PAGE_RO;
 	return pte;
@@ -556,26 +544,30 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 	return pte;
 }
 
-static inline pte_t
-ptep_clear_flush(struct vm_area_struct *vma,
-		 unsigned long address, pte_t *ptep)
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
 {
-	pte_t pte = *ptep;
+	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
 #ifndef __s390x__
-	if (!(pte_val(pte) & _PAGE_INVALID)) {
 		/* S390 has 1mb segments, we are emulating 4MB segments */
 		pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
-		__asm__ __volatile__ ("ipte %2,%3"
-				      : "=m" (*ptep) : "m" (*ptep),
-				        "a" (pto), "a" (address) );
+#else
+		/* ipte in zarch mode can do the math */
+		pte_t *pto = ptep;
+#endif
+		asm volatile ("ipte %2,%3"
+			      : "=m" (*ptep) : "m" (*ptep),
+				"a" (pto), "a" (address) );
 	}
-#else /* __s390x__ */
-	if (!(pte_val(pte) & _PAGE_INVALID)) 
-		__asm__ __volatile__ ("ipte %2,%3"
-				      : "=m" (*ptep) : "m" (*ptep),
-				        "a" (ptep), "a" (address) );
-#endif /* __s390x__ */
-	pte_val(*ptep) = _PAGE_INVALID_EMPTY;
+	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+}
+
+static inline pte_t
+ptep_clear_flush(struct vm_area_struct *vma,
+		 unsigned long address, pte_t *ptep)
+{
+	pte_t pte = *ptep;
+
+	__ptep_ipte(address, ptep);
 	return pte;
 }
 
@@ -755,7 +747,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 {
 	pte_t pte;
 	offset &= __SWP_OFFSET_MASK;
-	pte_val(pte) = _PAGE_INVALID_SWAP | ((type & 0x1f) << 2) |
+	pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) |
 		((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
 	return pte;
 }
@@ -778,7 +770,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 
 #define pgoff_to_pte(__off) \
 	((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
-		   | _PAGE_INVALID_FILE })
+		   | _PAGE_TYPE_FILE })
 
 #endif /* !__ASSEMBLY__ */
 
-- 
GitLab


From 6837a8c352efcc5efc70424e9bfd94ff9bfa9a47 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <geraldsc@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:39 +0200
Subject: [PATCH 0253/1063] [S390] Cleanup in signal handling code.

Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/signal.c | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index dd05423f87a84..642095ec7c077 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -114,29 +114,26 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 {
 	unsigned long old_mask = regs->psw.mask;
-	int err;
-  
+	_sigregs user_sregs;
+
 	save_access_regs(current->thread.acrs);
 
 	/* Copy a 'clean' PSW mask to the user to avoid leaking
 	   information about whether PER is currently on.  */
 	regs->psw.mask = PSW_MASK_MERGE(PSW_USER_BITS, regs->psw.mask);
-	err = __copy_to_user(&sregs->regs.psw, &regs->psw,
-			     sizeof(sregs->regs.psw)+sizeof(sregs->regs.gprs));
+	memcpy(&user_sregs.regs.psw, &regs->psw, sizeof(sregs->regs.psw) +
+	       sizeof(sregs->regs.gprs));
 	regs->psw.mask = old_mask;
-	if (err != 0)
-		return err;
-	err = __copy_to_user(&sregs->regs.acrs, current->thread.acrs,
-			     sizeof(sregs->regs.acrs));
-	if (err != 0)
-		return err;
+	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+	       sizeof(sregs->regs.acrs));
 	/* 
 	 * We have to store the fp registers to current->thread.fp_regs
 	 * to merge them with the emulated registers.
 	 */
 	save_fp_regs(&current->thread.fp_regs);
-	return __copy_to_user(&sregs->fpregs, &current->thread.fp_regs,
-			      sizeof(s390_fp_regs));
+	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
+	       sizeof(s390_fp_regs));
+	return __copy_to_user(sregs, &user_sregs, sizeof(_sigregs));
 }
 
 /* Returns positive number on error */
@@ -144,27 +141,25 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 {
 	unsigned long old_mask = regs->psw.mask;
 	int err;
+	_sigregs user_sregs;
 
 	/* Alwys make any pending restarted system call return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-	err = __copy_from_user(&regs->psw, &sregs->regs.psw,
-			       sizeof(sregs->regs.psw)+sizeof(sregs->regs.gprs));
+	err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs));
 	regs->psw.mask = PSW_MASK_MERGE(old_mask, regs->psw.mask);
 	regs->psw.addr |= PSW_ADDR_AMODE;
 	if (err)
 		return err;
-	err = __copy_from_user(&current->thread.acrs, &sregs->regs.acrs,
-			       sizeof(sregs->regs.acrs));
-	if (err)
-		return err;
+	memcpy(&regs->psw, &user_sregs.regs.psw, sizeof(sregs->regs.psw) +
+	       sizeof(sregs->regs.gprs));
+	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+	       sizeof(sregs->regs.acrs));
 	restore_access_regs(current->thread.acrs);
 
-	err = __copy_from_user(&current->thread.fp_regs, &sregs->fpregs,
-			       sizeof(s390_fp_regs));
+	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
+	       sizeof(s390_fp_regs));
 	current->thread.fp_regs.fpc &= FPC_VALID_MASK;
-	if (err)
-		return err;
 
 	restore_fp_regs(&current->thread.fp_regs);
 	regs->trap = -1;	/* disable syscall checks */
-- 
GitLab


From d02765d1af743567398eb6d523dea0ba5e5e7e8e Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <geraldsc@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:42 +0200
Subject: [PATCH 0254/1063] [S390] Make user-copy operations run-time
 configurable.

Introduces a struct uaccess_ops which allows setting user-copy
operations at run-time.

Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/s390_ksyms.c |   6 -
 arch/s390/kernel/setup.c      |   7 +
 arch/s390/lib/Makefile        |   3 +-
 arch/s390/lib/uaccess.S       | 211 ---------------------
 arch/s390/lib/uaccess64.S     | 207 ---------------------
 arch/s390/lib/uaccess_std.c   | 340 ++++++++++++++++++++++++++++++++++
 include/asm-s390/futex.h      |  87 +--------
 include/asm-s390/uaccess.h    | 171 ++++++-----------
 8 files changed, 411 insertions(+), 621 deletions(-)
 delete mode 100644 arch/s390/lib/uaccess.S
 delete mode 100644 arch/s390/lib/uaccess64.S
 create mode 100644 arch/s390/lib/uaccess_std.c

diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index c73a45467fa45..9f19e833a5625 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -25,12 +25,6 @@ EXPORT_SYMBOL(_oi_bitmap);
 EXPORT_SYMBOL(_ni_bitmap);
 EXPORT_SYMBOL(_zb_findmap);
 EXPORT_SYMBOL(_sb_findmap);
-EXPORT_SYMBOL(__copy_from_user_asm);
-EXPORT_SYMBOL(__copy_to_user_asm);
-EXPORT_SYMBOL(__copy_in_user_asm);
-EXPORT_SYMBOL(__clear_user_asm);
-EXPORT_SYMBOL(__strncpy_from_user_asm);
-EXPORT_SYMBOL(__strnlen_user_asm);
 EXPORT_SYMBOL(diag10);
 
 /*
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f2a9165ca4f86..e229af59976c5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -50,6 +50,12 @@
 #include <asm/ptrace.h>
 #include <asm/sections.h>
 
+/*
+ * User copy operations.
+ */
+struct uaccess_ops uaccess;
+EXPORT_SYMBOL_GPL(uaccess);
+
 /*
  * Machine setup..
  */
@@ -641,6 +647,7 @@ setup_arch(char **cmdline_p)
 
 	memory_end = memory_size;
 
+	memcpy(&uaccess, &uaccess_std, sizeof(uaccess));
 	parse_early_param();
 
 #ifndef CONFIG_64BIT
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index e05d087a6eae3..96c82424d88be 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -4,6 +4,5 @@
 
 EXTRA_AFLAGS := -traditional
 
-lib-y += delay.o string.o
-lib-y += $(if $(CONFIG_64BIT),uaccess64.o,uaccess.o)
+lib-y += delay.o string.o uaccess_std.o
 lib-$(CONFIG_SMP) += spinlock.o
diff --git a/arch/s390/lib/uaccess.S b/arch/s390/lib/uaccess.S
deleted file mode 100644
index 837275284d9fa..0000000000000
--- a/arch/s390/lib/uaccess.S
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- *  arch/s390/lib/uaccess.S
- *    __copy_{from|to}_user functions.
- *
- *  s390
- *    Copyright (C) 2000,2002 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Authors(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- *  These functions have standard call interface
- */
-
-#include <linux/errno.h>
-#include <asm/lowcore.h>
-#include <asm/asm-offsets.h>
-
-        .text
-        .align 4
-        .globl __copy_from_user_asm
-	# %r2 = to, %r3 = n, %r4 = from
-__copy_from_user_asm:
-	slr	%r0,%r0
-0:	mvcp	0(%r3,%r2),0(%r4),%r0
-	jnz	1f
-	slr	%r2,%r2
-	br	%r14
-1:	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-	ahi	%r3,-256
-2:	mvcp	0(%r3,%r2),0(%r4),%r0
-	jnz	1b
-3:	slr	%r2,%r2
-	br	%r14
-4:	lhi	%r0,-4096
-	lr	%r5,%r4
-	slr	%r5,%r0
-	nr	%r5,%r0		# %r5 = (%r4 + 4096) & -4096
-	slr	%r5,%r4		# %r5 = #bytes to next user page boundary
-	clr	%r3,%r5		# copy crosses next page boundary ?
-	jnh	6f		# no, the current page faulted
-	# move with the reduced length which is < 256
-5:	mvcp	0(%r5,%r2),0(%r4),%r0
-	slr	%r3,%r5
-6:	lr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.long	0b,4b
-	.long	2b,4b
-	.long	5b,6b
-        .previous
-
-        .align 4
-        .text
-        .globl __copy_to_user_asm
-	# %r2 = from, %r3 = n, %r4 = to
-__copy_to_user_asm:
-	slr	%r0,%r0
-0:	mvcs	0(%r3,%r4),0(%r2),%r0
-	jnz	1f
-	slr	%r2,%r2
-	br	%r14
-1:	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-	ahi	%r3,-256
-2:	mvcs	0(%r3,%r4),0(%r2),%r0
-	jnz	1b
-3:	slr	%r2,%r2
-	br	%r14
-4:	lhi	%r0,-4096
-	lr	%r5,%r4
-	slr	%r5,%r0
-	nr	%r5,%r0		# %r5 = (%r4 + 4096) & -4096
-	slr	%r5,%r4		# %r5 = #bytes to next user page boundary
-	clr	%r3,%r5		# copy crosses next page boundary ?
-	jnh	6f		# no, the current page faulted
-	# move with the reduced length which is < 256
-5:	mvcs	0(%r5,%r4),0(%r2),%r0
-	slr	%r3,%r5
-6:	lr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.long	0b,4b
-	.long	2b,4b
-	.long	5b,6b
-        .previous
-
-        .align 4
-        .text
-        .globl __copy_in_user_asm
-	# %r2 = from, %r3 = n, %r4 = to
-__copy_in_user_asm:
-	ahi	%r3,-1
-	jo	6f
-	sacf	256
-	bras	%r1,4f
-0:	ahi	%r3,257
-1:	mvc	0(1,%r4),0(%r2)
-	la	%r2,1(%r2)
-	la	%r4,1(%r4)
-	ahi	%r3,-1
-	jnz	1b
-2:	lr	%r2,%r3
-	br	%r14
-3:	mvc	0(256,%r4),0(%r2)
-	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-4:	ahi	%r3,-256
-	jnm	3b
-5:	ex	%r3,4(%r1)
-	sacf	0
-6:	slr	%r2,%r2
-	br	%r14
-        .section __ex_table,"a"
-	.long	1b,2b
-	.long	3b,0b
-	.long	5b,0b
-        .previous
-
-        .align 4
-        .text
-        .globl __clear_user_asm
-	# %r2 = to, %r3 = n
-__clear_user_asm:
-	bras	%r5,0f
-	.long	empty_zero_page
-0:	l	%r5,0(%r5)
-	slr	%r0,%r0
-1:	mvcs	0(%r3,%r2),0(%r5),%r0
-	jnz	2f
-	slr	%r2,%r2
-	br	%r14
-2:	la	%r2,256(%r2)
-	ahi	%r3,-256
-3:	mvcs	0(%r3,%r2),0(%r5),%r0
-	jnz	2b
-4:	slr	%r2,%r2
-	br	%r14
-5:	lhi	%r0,-4096
-	lr	%r4,%r2
-	slr	%r4,%r0
-	nr	%r4,%r0		# %r4 = (%r2 + 4096) & -4096
-	slr	%r4,%r2		# %r4 = #bytes to next user page boundary
-	clr	%r3,%r4		# clear crosses next page boundary ?
-	jnh	7f		# no, the current page faulted
-	# clear with the reduced length which is < 256
-6:	mvcs	0(%r4,%r2),0(%r5),%r0
-	slr	%r3,%r4
-7:	lr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.long	1b,5b
-	.long	3b,5b
-	.long	6b,7b
-        .previous
-
-        .align 4
-        .text
-        .globl __strncpy_from_user_asm
-	# %r2 = count, %r3 = dst, %r4 = src
-__strncpy_from_user_asm:
-	lhi	%r0,0
-	lr	%r1,%r4
-	la	%r4,0(%r4)	# clear high order bit from %r4
-	la	%r2,0(%r2,%r4)	# %r2 points to first byte after string
-	sacf	256
-0:	srst	%r2,%r1
-	jo	0b
-	sacf	0
-	lr	%r1,%r2
-	jh	1f		# \0 found in string ?
-	ahi	%r1,1		# include \0 in copy
-1:	slr	%r1,%r4		# %r1 = copy length (without \0)
-	slr	%r2,%r4		# %r2 = return length (including \0)
-2:	mvcp	0(%r1,%r3),0(%r4),%r0
-	jnz	3f
-	br	%r14
-3:	la	%r3,256(%r3)
-	la	%r4,256(%r4)
-	ahi	%r1,-256
-	mvcp	0(%r1,%r3),0(%r4),%r0
-	jnz	3b
-	br	%r14
-4:	sacf	0
-	lhi	%r2,-EFAULT
-	br	%r14
-	.section __ex_table,"a"
-	.long	0b,4b
-	.previous
-
-        .align 4
-        .text
-        .globl __strnlen_user_asm
-	# %r2 = count, %r3 = src
-__strnlen_user_asm:
-	lhi	%r0,0
-	lr	%r1,%r3
-	la	%r3,0(%r3)	# clear high order bit from %r4
-	la	%r2,0(%r2,%r3)	# %r2 points to first byte after string
-	sacf	256
-0:	srst	%r2,%r1
-	jo	0b
-	sacf	0
-	ahi	%r2,1		# strnlen_user result includes the \0
-				# or return count+1 if \0 not found
-	slr	%r2,%r3
-	br	%r14
-2:	sacf	0
-	slr	%r2,%r2		# return 0 on exception
-	br	%r14
-	.section __ex_table,"a"
-	.long	0b,2b
-	.previous
diff --git a/arch/s390/lib/uaccess64.S b/arch/s390/lib/uaccess64.S
deleted file mode 100644
index 1f755be22f927..0000000000000
--- a/arch/s390/lib/uaccess64.S
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- *  arch/s390x/lib/uaccess.S
- *    __copy_{from|to}_user functions.
- *
- *  s390
- *    Copyright (C) 2000,2002 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Authors(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- *  These functions have standard call interface
- */
-
-#include <linux/errno.h>
-#include <asm/lowcore.h>
-#include <asm/asm-offsets.h>
-
-        .text
-        .align 4
-        .globl __copy_from_user_asm
-	# %r2 = to, %r3 = n, %r4 = from
-__copy_from_user_asm:
-	slgr	%r0,%r0
-0:	mvcp	0(%r3,%r2),0(%r4),%r0
-	jnz	1f
-	slgr	%r2,%r2
-	br	%r14
-1:	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-	aghi	%r3,-256
-2:	mvcp	0(%r3,%r2),0(%r4),%r0
-	jnz	1b
-3:	slgr	%r2,%r2
-	br	%r14
-4:	lghi	%r0,-4096
-	lgr	%r5,%r4
-	slgr	%r5,%r0
-	ngr	%r5,%r0		# %r5 = (%r4 + 4096) & -4096
-	slgr	%r5,%r4		# %r5 = #bytes to next user page boundary
-	clgr	%r3,%r5		# copy crosses next page boundary ?
-	jnh	6f		# no, the current page faulted
-	# move with the reduced length which is < 256
-5:	mvcp	0(%r5,%r2),0(%r4),%r0
-	slgr	%r3,%r5
-6:	lgr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.quad	0b,4b
-	.quad	2b,4b
-	.quad	5b,6b
-        .previous
-
-        .align 4
-        .text
-        .globl __copy_to_user_asm
-	# %r2 = from, %r3 = n, %r4 = to
-__copy_to_user_asm:
-	slgr	%r0,%r0
-0:	mvcs	0(%r3,%r4),0(%r2),%r0
-	jnz	1f
-	slgr	%r2,%r2
-	br	%r14
-1:	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-	aghi	%r3,-256
-2:	mvcs	0(%r3,%r4),0(%r2),%r0
-	jnz	1b
-3:	slgr	%r2,%r2
-	br	%r14
-4:	lghi	%r0,-4096
-	lgr	%r5,%r4
-	slgr	%r5,%r0
-	ngr	%r5,%r0		# %r5 = (%r4 + 4096) & -4096
-	slgr	%r5,%r4		# %r5 = #bytes to next user page boundary
-	clgr	%r3,%r5		# copy crosses next page boundary ?
-	jnh	6f		# no, the current page faulted
-	# move with the reduced length which is < 256
-5:	mvcs	0(%r5,%r4),0(%r2),%r0
-	slgr	%r3,%r5
-6:	lgr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.quad	0b,4b
-	.quad	2b,4b
-	.quad	5b,6b
-        .previous
-
-        .align 4
-        .text
-        .globl __copy_in_user_asm
-	# %r2 = from, %r3 = n, %r4 = to
-__copy_in_user_asm:
-	aghi	%r3,-1
-	jo	6f
-	sacf	256
-	bras	%r1,4f
-0:	aghi	%r3,257
-1:	mvc	0(1,%r4),0(%r2)
-	la	%r2,1(%r2)
-	la	%r4,1(%r4)
-	aghi	%r3,-1
-	jnz	1b
-2:	lgr	%r2,%r3
-	br	%r14
-3:	mvc	0(256,%r4),0(%r2)
-	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-4:	aghi	%r3,-256
-	jnm	3b
-5:	ex	%r3,4(%r1)
-	sacf	0
-6:	slgr	%r2,%r2
-	br	14
-        .section __ex_table,"a"
-	.quad	1b,2b
-	.quad	3b,0b
-	.quad	5b,0b
-        .previous
-
-        .align 4
-        .text
-        .globl __clear_user_asm
-	# %r2 = to, %r3 = n
-__clear_user_asm:
-	slgr	%r0,%r0
-	larl	%r5,empty_zero_page
-1:	mvcs	0(%r3,%r2),0(%r5),%r0
-	jnz	2f
-	slgr	%r2,%r2
-	br	%r14
-2:	la	%r2,256(%r2)
-	aghi	%r3,-256
-3:	mvcs	0(%r3,%r2),0(%r5),%r0
-	jnz	2b
-4:	slgr	%r2,%r2
-	br	%r14
-5:	lghi	%r0,-4096
-	lgr	%r4,%r2
-	slgr	%r4,%r0
-	ngr	%r4,%r0		# %r4 = (%r2 + 4096) & -4096
-	slgr	%r4,%r2		# %r4 = #bytes to next user page boundary
-	clgr	%r3,%r4		# clear crosses next page boundary ?
-	jnh	7f		# no, the current page faulted
-	# clear with the reduced length which is < 256
-6:	mvcs	0(%r4,%r2),0(%r5),%r0
-	slgr	%r3,%r4
-7:	lgr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.quad	1b,5b
-	.quad	3b,5b
-	.quad	6b,7b
-        .previous
-
-        .align 4
-        .text
-        .globl __strncpy_from_user_asm
-	# %r2 = count, %r3 = dst, %r4 = src
-__strncpy_from_user_asm:
-	lghi	%r0,0
-	lgr	%r1,%r4
-	la	%r2,0(%r2,%r4)	# %r2 points to first byte after string
-	sacf	256
-0:	srst	%r2,%r1
-	jo	0b
-	sacf	0
-	lgr	%r1,%r2
-	jh	1f		# \0 found in string ?
-	aghi	%r1,1		# include \0 in copy
-1:	slgr	%r1,%r4		# %r1 = copy length (without \0)
-	slgr	%r2,%r4		# %r2 = return length (including \0)
-2:	mvcp	0(%r1,%r3),0(%r4),%r0
-	jnz	3f
-	br	%r14
-3:	la	%r3,256(%r3)
-	la	%r4,256(%r4)
-	aghi	%r1,-256
-	mvcp	0(%r1,%r3),0(%r4),%r0
-	jnz	3b
-	br	%r14
-4:	sacf	0
-	lghi	%r2,-EFAULT
-	br	%r14
-	.section __ex_table,"a"
-	.quad	0b,4b
-	.previous
-
-        .align 4
-        .text
-        .globl __strnlen_user_asm
-	# %r2 = count, %r3 = src
-__strnlen_user_asm:
-	lghi	%r0,0
-	lgr	%r1,%r3
-	la	%r2,0(%r2,%r3)	# %r2 points to first byte after string
-	sacf	256
-0:	srst	%r2,%r1
-	jo	0b
-	sacf	0
-	aghi	%r2,1		# strnlen_user result includes the \0
-				# or return count+1 if \0 not found
-	slgr	%r2,%r3
-	br	%r14
-2:	sacf	0
-	slgr	%r2,%r2		# return 0 on exception
-	br	%r14
-	.section __ex_table,"a"
-	.quad	0b,2b
-	.previous
diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c
new file mode 100644
index 0000000000000..9a4d4a29ea79d
--- /dev/null
+++ b/arch/s390/lib/uaccess_std.c
@@ -0,0 +1,340 @@
+/*
+ *  arch/s390/lib/uaccess_std.c
+ *
+ *  Standard user space access functions based on mvcp/mvcs and doing
+ *  interesting things in the secondary space mode.
+ *
+ *    Copyright (C) IBM Corp. 2006
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *		 Gerald Schaefer (gerald.schaefer@de.ibm.com)
+ */
+
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <asm/uaccess.h>
+#include <asm/futex.h>
+
+#ifndef __s390x__
+#define AHI	"ahi"
+#define ALR	"alr"
+#define CLR	"clr"
+#define LHI	"lhi"
+#define SLR	"slr"
+#else
+#define AHI	"aghi"
+#define ALR	"algr"
+#define CLR	"clgr"
+#define LHI	"lghi"
+#define SLR	"slgr"
+#endif
+
+size_t copy_from_user_std(size_t size, const void __user *ptr, void *x)
+{
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -256UL;
+	asm volatile(
+		"0: mvcp  0(%0,%2),0(%1),%3\n"
+		"   jz    5f\n"
+		"1:"ALR"  %0,%3\n"
+		"   la    %1,256(%1)\n"
+		"   la    %2,256(%2)\n"
+		"2: mvcp  0(%0,%2),0(%1),%3\n"
+		"   jnz   1b\n"
+		"   j     5f\n"
+		"3: la    %4,255(%1)\n"	/* %4 = ptr + 255 */
+		"  "LHI"  %3,-4096\n"
+		"   nr    %4,%3\n"	/* %4 = (ptr + 255) & -4096 */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   6f\n"
+		"4: mvcp  0(%4,%2),0(%1),%3\n"
+		"  "SLR"  %0,%4\n"
+		"   j     6f\n"
+		"5:"SLR"  %0,%0\n"
+		"6: \n"
+		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t copy_from_user_std_small(size_t size, const void __user *ptr, void *x)
+{
+	unsigned long tmp1, tmp2;
+
+	tmp1 = 0UL;
+	asm volatile(
+		"0: mvcp  0(%0,%2),0(%1),%3\n"
+		"  "SLR"  %0,%0\n"
+		"   j     3f\n"
+		"1: la    %4,255(%1)\n" /* %4 = ptr + 255 */
+		"  "LHI"  %3,-4096\n"
+		"   nr    %4,%3\n"	/* %4 = (ptr + 255) & -4096 */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   3f\n"
+		"2: mvcp  0(%4,%2),0(%1),%3\n"
+		"  "SLR"  %0,%4\n"
+		"3:\n"
+		EX_TABLE(0b,1b) EX_TABLE(2b,3b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t copy_to_user_std(size_t size, void __user *ptr, const void *x)
+{
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -256UL;
+	asm volatile(
+		"0: mvcs  0(%0,%1),0(%2),%3\n"
+		"   jz    5f\n"
+		"1:"ALR"  %0,%3\n"
+		"   la    %1,256(%1)\n"
+		"   la    %2,256(%2)\n"
+		"2: mvcs  0(%0,%1),0(%2),%3\n"
+		"   jnz   1b\n"
+		"   j     5f\n"
+		"3: la    %4,255(%1)\n" /* %4 = ptr + 255 */
+		"  "LHI"  %3,-4096\n"
+		"   nr    %4,%3\n"	/* %4 = (ptr + 255) & -4096 */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   6f\n"
+		"4: mvcs  0(%4,%1),0(%2),%3\n"
+		"  "SLR"  %0,%4\n"
+		"   j     6f\n"
+		"5:"SLR"  %0,%0\n"
+		"6: \n"
+		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t copy_to_user_std_small(size_t size, void __user *ptr, const void *x)
+{
+	unsigned long tmp1, tmp2;
+
+	tmp1 = 0UL;
+	asm volatile(
+		"0: mvcs  0(%0,%1),0(%2),%3\n"
+		"  "SLR"  %0,%0\n"
+		"   j     3f\n"
+		"1: la    %4,255(%1)\n" /* ptr + 255 */
+		"  "LHI"  %3,-4096\n"
+		"   nr    %4,%3\n"	/* (ptr + 255) & -4096UL */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   3f\n"
+		"2: mvcs  0(%4,%1),0(%2),%3\n"
+		"  "SLR"  %0,%4\n"
+		"3:\n"
+		EX_TABLE(0b,1b) EX_TABLE(2b,3b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t copy_in_user_std(size_t size, void __user *to, const void __user *from)
+{
+	unsigned long tmp1;
+
+	asm volatile(
+		"  "AHI"  %0,-1\n"
+		"   jo    5f\n"
+		"   sacf  256\n"
+		"   bras  %3,3f\n"
+		"0:"AHI"  %0,257\n"
+		"1: mvc   0(1,%1),0(%2)\n"
+		"   la    %1,1(%1)\n"
+		"   la    %2,1(%2)\n"
+		"  "AHI"  %0,-1\n"
+		"   jnz   1b\n"
+		"   j     5f\n"
+		"2: mvc   0(256,%1),0(%2)\n"
+		"   la    %1,256(%1)\n"
+		"   la    %2,256(%2)\n"
+		"3:"AHI"  %0,-256\n"
+		"   jnm   2b\n"
+		"4: ex    %0,1b-0b(%3)\n"
+		"   sacf  0\n"
+		"5: "SLR"  %0,%0\n"
+		"6:\n"
+		EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+		: "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t clear_user_std(size_t size, void __user *to)
+{
+	unsigned long tmp1, tmp2;
+
+	asm volatile(
+		"  "AHI"  %0,-1\n"
+		"   jo    5f\n"
+		"   sacf  256\n"
+		"   bras  %3,3f\n"
+		"   xc    0(1,%1),0(%1)\n"
+		"0:"AHI"  %0,257\n"
+		"   la    %2,255(%1)\n" /* %2 = ptr + 255 */
+		"   srl   %2,12\n"
+		"   sll   %2,12\n"	/* %2 = (ptr + 255) & -4096 */
+		"  "SLR"  %2,%1\n"
+		"  "CLR"  %0,%2\n"	/* clear crosses next page boundary? */
+		"   jnh   5f\n"
+		"  "AHI"  %2,-1\n"
+		"1: ex    %2,0(%3)\n"
+		"  "AHI"  %2,1\n"
+		"  "SLR"  %0,%2\n"
+		"   j     5f\n"
+		"2: xc    0(256,%1),0(%1)\n"
+		"   la    %1,256(%1)\n"
+		"3:"AHI"  %0,-256\n"
+		"   jnm   2b\n"
+		"4: ex    %0,0(%3)\n"
+		"   sacf  0\n"
+		"5: "SLR"  %0,%0\n"
+		"6:\n"
+		EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+		: "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t strnlen_user_std(size_t size, const char __user *src)
+{
+	register unsigned long reg0 asm("0") = 0UL;
+	unsigned long tmp1, tmp2;
+
+	asm volatile(
+		"   la    %2,0(%1)\n"
+		"   la    %3,0(%0,%1)\n"
+		"  "SLR"  %0,%0\n"
+		"   sacf  256\n"
+		"0: srst  %3,%2\n"
+		"   jo    0b\n"
+		"   la    %0,1(%3)\n"	/* strnlen_user results includes \0 */
+		"  "SLR"  %0,%1\n"
+		"1: sacf  0\n"
+		EX_TABLE(0b,1b)
+		: "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst)
+{
+	register unsigned long reg0 asm("0") = 0UL;
+	unsigned long tmp1, tmp2;
+
+	asm volatile(
+		"   la    %3,0(%1)\n"
+		"   la    %4,0(%0,%1)\n"
+		"   sacf  256\n"
+		"0: srst  %4,%3\n"
+		"   jo    0b\n"
+		"   sacf  0\n"
+		"   la    %0,0(%4)\n"
+		"   jh    1f\n"		/* found \0 in string ? */
+		"  "AHI"  %4,1\n"	/* include \0 in copy */
+		"1:"SLR"  %0,%1\n"	/* %0 = return length (without \0) */
+		"  "SLR"  %4,%1\n"	/* %4 = copy length (including \0) */
+		"2: mvcp  0(%4,%2),0(%1),%5\n"
+		"   jz    9f\n"
+		"3:"AHI"  %4,-256\n"
+		"   la    %1,256(%1)\n"
+		"   la    %2,256(%2)\n"
+		"4: mvcp  0(%4,%2),0(%1),%5\n"
+		"   jnz   3b\n"
+		"   j     9f\n"
+		"7: sacf  0\n"
+		"8:"LHI"  %0,%6\n"
+		"9:\n"
+		EX_TABLE(0b,7b) EX_TABLE(2b,8b) EX_TABLE(4b,8b)
+		: "+a" (size), "+a" (src), "+d" (dst), "=a" (tmp1), "=a" (tmp2)
+		: "d" (reg0), "K" (-EFAULT) : "cc", "memory");
+	return size;
+}
+
+#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg)	\
+	asm volatile(							\
+		"   sacf  256\n"					\
+		"0: l     %1,0(%6)\n"					\
+		"1:"insn						\
+		"2: cs    %1,%2,0(%6)\n"				\
+		"3: jl    1b\n"						\
+		"   lhi   %0,0\n"					\
+		"4: sacf  0\n"						\
+		EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b)		\
+		: "=d" (ret), "=&d" (oldval), "=&d" (newval),		\
+		  "=m" (*uaddr)						\
+		: "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
+		  "m" (*uaddr) : "cc");
+
+int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old)
+{
+	int oldval = 0, newval, ret;
+
+	inc_preempt_count();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("lr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("lr %2,%1\nar %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("lr %2,%1\nor %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("lr %2,%1\nnr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("lr %2,%1\nxr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+	dec_preempt_count();
+	*old = oldval;
+	return ret;
+}
+
+int futex_atomic_cmpxchg(int __user *uaddr, int oldval, int newval)
+{
+	int ret;
+
+	asm volatile(
+		"   sacf 256\n"
+		"   cs   %1,%4,0(%5)\n"
+		"0: lr   %0,%1\n"
+		"1: sacf 0\n"
+		EX_TABLE(0b,1b)
+		: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
+		: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+		: "cc", "memory" );
+	return ret;
+}
+
+struct uaccess_ops uaccess_std = {
+	.copy_from_user = copy_from_user_std,
+	.copy_from_user_small = copy_from_user_std_small,
+	.copy_to_user = copy_to_user_std,
+	.copy_to_user_small = copy_to_user_std_small,
+	.copy_in_user = copy_in_user_std,
+	.clear_user = clear_user_std,
+	.strnlen_user = strnlen_user_std,
+	.strncpy_from_user = strncpy_from_user_std,
+	.futex_atomic_op = futex_atomic_op,
+	.futex_atomic_cmpxchg = futex_atomic_cmpxchg,
+};
diff --git a/include/asm-s390/futex.h b/include/asm-s390/futex.h
index ffedf14f89f68..5e261e1de6719 100644
--- a/include/asm-s390/futex.h
+++ b/include/asm-s390/futex.h
@@ -7,75 +7,21 @@
 #include <asm/errno.h>
 #include <asm/uaccess.h>
 
-#ifndef __s390x__
-#define __futex_atomic_fixup \
-		     ".section __ex_table,\"a\"\n"			\
-		     "   .align 4\n"					\
-		     "   .long  0b,4b,2b,4b,3b,4b\n"			\
-		     ".previous"
-#else /* __s390x__ */
-#define __futex_atomic_fixup \
-		     ".section __ex_table,\"a\"\n"			\
-		     "   .align 8\n"					\
-		     "   .quad  0b,4b,2b,4b,3b,4b\n"			\
-		     ".previous"
-#endif /* __s390x__ */
-
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg)	\
-	asm volatile("   sacf 256\n"					\
-		     "0: l   %1,0(%6)\n"				\
-		     "1: " insn						\
-		     "2: cs  %1,%2,0(%6)\n"				\
-		     "3: jl  1b\n"					\
-		     "   lhi %0,0\n"					\
-		     "4: sacf 0\n"					\
-		     __futex_atomic_fixup				\
-		     : "=d" (ret), "=&d" (oldval), "=&d" (newval),	\
-		       "=m" (*uaddr)					\
-		     : "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
-		       "m" (*uaddr) : "cc" );
-
 static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 {
 	int op = (encoded_op >> 28) & 7;
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, newval, ret;
+	int oldval, ret;
+
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	inc_preempt_count();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-		__futex_atomic_op("lr %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ADD:
-		__futex_atomic_op("lr %2,%1\nar %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
-		break;
-	case FUTEX_OP_OR:
-		__futex_atomic_op("lr %2,%1\nor %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ANDN:
-		__futex_atomic_op("lr %2,%1\nnr %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
-		break;
-	case FUTEX_OP_XOR:
-		__futex_atomic_op("lr %2,%1\nxr %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
-		break;
-	default:
-		ret = -ENOSYS;
-	}
-
-	dec_preempt_count();
+	ret = uaccess.futex_atomic_op(op, uaddr, oparg, &oldval);
 
 	if (!ret) {
 		switch (cmp) {
@@ -91,32 +37,13 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	return ret;
 }
 
-static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr,
+						int oldval, int newval)
 {
-	int ret;
-
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
-	asm volatile("   sacf 256\n"
-		     "   cs   %1,%4,0(%5)\n"
-		     "0: lr   %0,%1\n"
-		     "1: sacf 0\n"
-#ifndef __s390x__
-		     ".section __ex_table,\"a\"\n"
-		     "   .align 4\n"
-		     "   .long  0b,1b\n"
-		     ".previous"
-#else /* __s390x__ */
-		     ".section __ex_table,\"a\"\n"
-		     "   .align 8\n"
-		     "   .quad  0b,1b\n"
-		     ".previous"
-#endif /* __s390x__ */
-		     : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
-		     : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
-		     : "cc", "memory" );
-	return oldval;
+
+	return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval);
 }
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-s390/uaccess.h b/include/asm-s390/uaccess.h
index 0b7c0ca4c3d77..39a2716ae1884 100644
--- a/include/asm-s390/uaccess.h
+++ b/include/asm-s390/uaccess.h
@@ -47,7 +47,7 @@
 		S390_lowcore.user_asce : S390_lowcore.kernel_asce;	\
 	asm volatile ("lctlg 7,7,%0" : : "m" (__pto) );			\
 })
-#else
+#else /* __s390x__ */
 #define set_fs(x) \
 ({									\
 	unsigned long __pto;						\
@@ -56,7 +56,7 @@
 		S390_lowcore.user_asce : S390_lowcore.kernel_asce;	\
 	asm volatile ("lctl  7,7,%0" : : "m" (__pto) );			\
 })
-#endif
+#endif /* __s390x__ */
 
 #define segment_eq(a,b) ((a).ar4 == (b).ar4)
 
@@ -85,76 +85,50 @@ struct exception_table_entry
         unsigned long insn, fixup;
 };
 
-#ifndef __s390x__
-#define __uaccess_fixup \
-	".section .fixup,\"ax\"\n"	\
-	"2: lhi    %0,%4\n"		\
-	"   bras   1,3f\n"		\
-	"   .long  1b\n"		\
-	"3: l      1,0(1)\n"		\
-	"   br     1\n"			\
-	".previous\n"			\
-	".section __ex_table,\"a\"\n"	\
-	"   .align 4\n"			\
-	"   .long  0b,2b\n"		\
-	".previous"
-#define __uaccess_clobber "cc", "1"
-#else /* __s390x__ */
-#define __uaccess_fixup \
-	".section .fixup,\"ax\"\n"	\
-	"2: lghi   %0,%4\n"		\
-	"   jg     1b\n"		\
-	".previous\n"			\
-	".section __ex_table,\"a\"\n"	\
-	"   .align 8\n"			\
-	"   .quad  0b,2b\n"		\
-	".previous"
-#define __uaccess_clobber "cc"
-#endif /* __s390x__ */
+struct uaccess_ops {
+	size_t (*copy_from_user)(size_t, const void __user *, void *);
+	size_t (*copy_from_user_small)(size_t, const void __user *, void *);
+	size_t (*copy_to_user)(size_t, void __user *, const void *);
+	size_t (*copy_to_user_small)(size_t, void __user *, const void *);
+	size_t (*copy_in_user)(size_t, void __user *, const void __user *);
+	size_t (*clear_user)(size_t, void __user *);
+	size_t (*strnlen_user)(size_t, const char __user *);
+	size_t (*strncpy_from_user)(size_t, const char __user *, char *);
+	int (*futex_atomic_op)(int op, int __user *, int oparg, int *old);
+	int (*futex_atomic_cmpxchg)(int __user *, int old, int new);
+};
+
+extern struct uaccess_ops uaccess;
+extern struct uaccess_ops uaccess_std;
+
+static inline int __put_user_fn(size_t size, void __user *ptr, void *x)
+{
+	size = uaccess.copy_to_user_small(size, ptr, x);
+	return size ? -EFAULT : size;
+}
+
+static inline int __get_user_fn(size_t size, const void __user *ptr, void *x)
+{
+	size = uaccess.copy_from_user_small(size, ptr, x);
+	return size ? -EFAULT : size;
+}
 
 /*
  * These are the main single-value transfer routines.  They automatically
  * use the right size if we just have the right pointer type.
  */
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
-#define __put_user_asm(x, ptr, err) \
-({								\
-	err = 0;						\
-	asm volatile(						\
-		"0: mvcs  0(%1,%2),%3,%0\n"			\
-		"1:\n"						\
-		__uaccess_fixup					\
-		: "+&d" (err)					\
-		: "d" (sizeof(*(ptr))), "a" (ptr), "Q" (x),	\
-		  "K" (-EFAULT)					\
-		: __uaccess_clobber );				\
-})
-#else
-#define __put_user_asm(x, ptr, err) \
-({								\
-	err = 0;						\
-	asm volatile(						\
-		"0: mvcs  0(%1,%2),0(%3),%0\n"			\
-		"1:\n"						\
-		__uaccess_fixup					\
-		: "+&d" (err)					\
-		: "d" (sizeof(*(ptr))), "a" (ptr), "a" (&(x)),	\
-		  "K" (-EFAULT), "m" (x)			\
-		: __uaccess_clobber );				\
-})
-#endif
-
 #define __put_user(x, ptr) \
 ({								\
 	__typeof__(*(ptr)) __x = (x);				\
-	int __pu_err;						\
+	int __pu_err = -EFAULT;					\
         __chk_user_ptr(ptr);                                    \
 	switch (sizeof (*(ptr))) {				\
 	case 1:							\
 	case 2:							\
 	case 4:							\
 	case 8:							\
-		__put_user_asm(__x, ptr, __pu_err);		\
+		__pu_err = __put_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
 		break;						\
 	default:						\
 		__put_user_bad();				\
@@ -172,60 +146,36 @@ struct exception_table_entry
 
 extern int __put_user_bad(void) __attribute__((noreturn));
 
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
-#define __get_user_asm(x, ptr, err) \
-({								\
-	err = 0;						\
-	asm volatile (						\
-		"0: mvcp  %O1(%2,%R1),0(%3),%0\n"		\
-		"1:\n"						\
-		__uaccess_fixup					\
-		: "+&d" (err), "=Q" (x)				\
-		: "d" (sizeof(*(ptr))), "a" (ptr),		\
-		  "K" (-EFAULT)					\
-		: __uaccess_clobber );				\
-})
-#else
-#define __get_user_asm(x, ptr, err) \
-({								\
-	err = 0;						\
-	asm volatile (						\
-		"0: mvcp  0(%2,%5),0(%3),%0\n"			\
-		"1:\n"						\
-		__uaccess_fixup					\
-		: "+&d" (err), "=m" (x)				\
-		: "d" (sizeof(*(ptr))), "a" (ptr),		\
-		  "K" (-EFAULT), "a" (&(x))			\
-		: __uaccess_clobber );				\
-})
-#endif
-
 #define __get_user(x, ptr)					\
 ({								\
-	int __gu_err;						\
-        __chk_user_ptr(ptr);                                    \
+	int __gu_err = -EFAULT;					\
+	__chk_user_ptr(ptr);					\
 	switch (sizeof(*(ptr))) {				\
 	case 1: {						\
 		unsigned char __x;				\
-		__get_user_asm(__x, ptr, __gu_err);		\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
 		break;						\
 	};							\
 	case 2: {						\
 		unsigned short __x;				\
-		__get_user_asm(__x, ptr, __gu_err);		\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
 		break;						\
 	};							\
 	case 4: {						\
 		unsigned int __x;				\
-		__get_user_asm(__x, ptr, __gu_err);		\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
 		break;						\
 	};							\
 	case 8: {						\
 		unsigned long long __x;				\
-		__get_user_asm(__x, ptr, __gu_err);		\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
 		break;						\
 	};							\
@@ -247,8 +197,6 @@ extern int __get_user_bad(void) __attribute__((noreturn));
 #define __put_user_unaligned __put_user
 #define __get_user_unaligned __get_user
 
-extern long __copy_to_user_asm(const void *from, long n, void __user *to);
-
 /**
  * __copy_to_user: - Copy a block of data into user space, with less checking.
  * @to:   Destination address, in user space.
@@ -266,7 +214,10 @@ extern long __copy_to_user_asm(const void *from, long n, void __user *to);
 static inline unsigned long
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	return __copy_to_user_asm(from, n, to);
+	if (__builtin_constant_p(n) && (n <= 256))
+		return uaccess.copy_to_user_small(n, to, from);
+	else
+		return uaccess.copy_to_user(n, to, from);
 }
 
 #define __copy_to_user_inatomic __copy_to_user
@@ -294,8 +245,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 	return n;
 }
 
-extern long __copy_from_user_asm(void *to, long n, const void __user *from);
-
 /**
  * __copy_from_user: - Copy a block of data from user space, with less checking.
  * @to:   Destination address, in kernel space.
@@ -316,7 +265,10 @@ extern long __copy_from_user_asm(void *to, long n, const void __user *from);
 static inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	return __copy_from_user_asm(to, n, from);
+	if (__builtin_constant_p(n) && (n <= 256))
+		return uaccess.copy_from_user_small(n, from, to);
+	else
+		return uaccess.copy_from_user(n, from, to);
 }
 
 /**
@@ -346,13 +298,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
 	return n;
 }
 
-extern unsigned long __copy_in_user_asm(const void __user *from, long n,
-							void __user *to);
-
 static inline unsigned long
 __copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
-	return __copy_in_user_asm(from, n, to);
+	return uaccess.copy_in_user(n, to, from);
 }
 
 static inline unsigned long
@@ -360,34 +309,28 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
 	might_sleep();
 	if (__access_ok(from,n) && __access_ok(to,n))
-		n = __copy_in_user_asm(from, n, to);
+		n = __copy_in_user(to, from, n);
 	return n;
 }
 
 /*
  * Copy a null terminated string from userspace.
  */
-extern long __strncpy_from_user_asm(long count, char *dst,
-					const char __user *src);
-
 static inline long
 strncpy_from_user(char *dst, const char __user *src, long count)
 {
         long res = -EFAULT;
         might_sleep();
         if (access_ok(VERIFY_READ, src, 1))
-                res = __strncpy_from_user_asm(count, dst, src);
+		res = uaccess.strncpy_from_user(count, src, dst);
         return res;
 }
 
-
-extern long __strnlen_user_asm(long count, const char __user *src);
-
 static inline unsigned long
 strnlen_user(const char __user * src, unsigned long n)
 {
 	might_sleep();
-	return __strnlen_user_asm(n, src);
+	return uaccess.strnlen_user(n, src);
 }
 
 /**
@@ -410,12 +353,10 @@ strnlen_user(const char __user * src, unsigned long n)
  * Zero Userspace
  */
 
-extern long __clear_user_asm(void __user *to, long n);
-
 static inline unsigned long
 __clear_user(void __user *to, unsigned long n)
 {
-	return __clear_user_asm(to, n);
+	return uaccess.clear_user(n, to);
 }
 
 static inline unsigned long
@@ -423,7 +364,7 @@ clear_user(void __user *to, unsigned long n)
 {
 	might_sleep();
 	if (access_ok(VERIFY_WRITE, to, n))
-		n = __clear_user_asm(to, n);
+		n = uaccess.clear_user(n, to);
 	return n;
 }
 
-- 
GitLab


From 6c2a9e6df60478e712f3c3d98b5047778a82a3d7 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <geraldsc@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:44 +0200
Subject: [PATCH 0255/1063] [S390] Use alternative user-copy operations for new
 hardware.

This introduces new user-copy operations which are optimized for
copying more than 256 Bytes on new hardware.

Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/head64.S     |  13 +++
 arch/s390/kernel/setup.c      |   6 +-
 arch/s390/lib/Makefile        |   1 +
 arch/s390/lib/uaccess_mvcos.c | 156 ++++++++++++++++++++++++++++++++++
 include/asm-s390/setup.h      |   2 +
 include/asm-s390/uaccess.h    |   1 +
 6 files changed, 178 insertions(+), 1 deletion(-)
 create mode 100644 arch/s390/lib/uaccess_mvcos.c

diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 1ebaa338aa7e1..a8bdd96494c77 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -250,6 +250,19 @@ startup_continue:
 	oi	7(%r12),0x80		# set IDTE flag
 0:
 
+#
+# find out if we have the MVCOS instruction
+#
+	la	%r1,0f-.LPG1(%r13)	# set program check address
+	stg	%r1,__LC_PGM_NEW_PSW+8
+	.short	0xc800			# mvcos 0(%r0),0(%r0),%r0
+	.short	0x0000
+	.short	0x0000
+0:	tm	0x8f,0x13		# special-operation exception?
+	bno	1f-.LPG1(%r13)		# if yes, MVCOS is present
+	oi	6(%r12),2		# set MVCOS flag
+1:
+
         lpswe .Lentry-.LPG1(13)         # jump to _stext in primary-space,
                                         # virtual and never return ...
         .align 16
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index e229af59976c5..e3d9325f6022b 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -647,7 +647,11 @@ setup_arch(char **cmdline_p)
 
 	memory_end = memory_size;
 
-	memcpy(&uaccess, &uaccess_std, sizeof(uaccess));
+	if (MACHINE_HAS_MVCOS)
+		memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess));
+	else
+		memcpy(&uaccess, &uaccess_std, sizeof(uaccess));
+
 	parse_early_param();
 
 #ifndef CONFIG_64BIT
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 96c82424d88be..c42ffedfdb494 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -5,4 +5,5 @@
 EXTRA_AFLAGS := -traditional
 
 lib-y += delay.o string.o uaccess_std.o
+lib-$(CONFIG_64BIT) += uaccess_mvcos.o
 lib-$(CONFIG_SMP) += spinlock.o
diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c
new file mode 100644
index 0000000000000..86c96d6c191a3
--- /dev/null
+++ b/arch/s390/lib/uaccess_mvcos.c
@@ -0,0 +1,156 @@
+/*
+ *  arch/s390/lib/uaccess_mvcos.c
+ *
+ *  Optimized user space space access functions based on mvcos.
+ *
+ *    Copyright (C) IBM Corp. 2006
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *		 Gerald Schaefer (gerald.schaefer@de.ibm.com)
+ */
+
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <asm/uaccess.h>
+#include <asm/futex.h>
+
+#ifndef __s390x__
+#define AHI	"ahi"
+#define ALR	"alr"
+#define CLR	"clr"
+#define LHI	"lhi"
+#define SLR	"slr"
+#else
+#define AHI	"aghi"
+#define ALR	"algr"
+#define CLR	"clgr"
+#define LHI	"lghi"
+#define SLR	"slgr"
+#endif
+
+size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x)
+{
+	register unsigned long reg0 asm("0") = 0x81UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
+		"   jz    4f\n"
+		"1:"ALR"  %0,%3\n"
+		"  "SLR"  %1,%3\n"
+		"  "SLR"  %2,%3\n"
+		"   j     0b\n"
+		"2: la    %4,4095(%1)\n"/* %4 = ptr + 4095 */
+		"   nr    %4,%3\n"	/* %4 = (ptr + 4095) & -4096 */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   5f\n"
+		"3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
+		"  "SLR"  %0,%4\n"
+		"   j     5f\n"
+		"4:"SLR"  %0,%0\n"
+		"5: \n"
+		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x)
+{
+	register unsigned long reg0 asm("0") = 0x810000UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+		"   jz    4f\n"
+		"1:"ALR"  %0,%3\n"
+		"  "SLR"  %1,%3\n"
+		"  "SLR"  %2,%3\n"
+		"   j     0b\n"
+		"2: la    %4,4095(%1)\n"/* %4 = ptr + 4095 */
+		"   nr    %4,%3\n"	/* %4 = (ptr + 4095) & -4096 */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   5f\n"
+		"3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
+		"  "SLR"  %0,%4\n"
+		"   j     5f\n"
+		"4:"SLR"  %0,%0\n"
+		"5: \n"
+		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+size_t copy_in_user_mvcos(size_t size, void __user *to, const void __user *from)
+{
+	register unsigned long reg0 asm("0") = 0x810081UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	/* FIXME: copy with reduced length. */
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+		"   jz    2f\n"
+		"1:"ALR"  %0,%3\n"
+		"  "SLR"  %1,%3\n"
+		"  "SLR"  %2,%3\n"
+		"   j     0b\n"
+		"2:"SLR"  %0,%0\n"
+		"3: \n"
+		EX_TABLE(0b,3b)
+		: "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+size_t clear_user_mvcos(size_t size, void __user *to)
+{
+	register unsigned long reg0 asm("0") = 0x810000UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
+		"   jz    4f\n"
+		"1:"ALR"  %0,%2\n"
+		"  "SLR"  %1,%2\n"
+		"   j     0b\n"
+		"2: la    %3,4095(%1)\n"/* %4 = to + 4095 */
+		"   nr    %3,%2\n"	/* %4 = (to + 4095) & -4096 */
+		"  "SLR"  %3,%1\n"
+		"  "CLR"  %0,%3\n"	/* copy crosses next page boundary? */
+		"   jnh   5f\n"
+		"3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
+		"  "SLR"  %0,%3\n"
+		"   j     5f\n"
+		"4:"SLR"  %0,%0\n"
+		"5: \n"
+		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+		: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
+		: "a" (empty_zero_page), "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+extern size_t copy_from_user_std_small(size_t, const void __user *, void *);
+extern size_t copy_to_user_std_small(size_t, void __user *, const void *);
+extern size_t strnlen_user_std(size_t, const char __user *);
+extern size_t strncpy_from_user_std(size_t, const char __user *, char *);
+extern int futex_atomic_op(int, int __user *, int, int *);
+extern int futex_atomic_cmpxchg(int __user *, int, int);
+
+struct uaccess_ops uaccess_mvcos = {
+	.copy_from_user = copy_from_user_mvcos,
+	.copy_from_user_small = copy_from_user_std_small,
+	.copy_to_user = copy_to_user_mvcos,
+	.copy_to_user_small = copy_to_user_std_small,
+	.copy_in_user = copy_in_user_mvcos,
+	.clear_user = clear_user_mvcos,
+	.strnlen_user = strnlen_user_std,
+	.strncpy_from_user = strncpy_from_user_std,
+	.futex_atomic_op = futex_atomic_op,
+	.futex_atomic_cmpxchg = futex_atomic_cmpxchg,
+};
diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h
index 00c03e46689b1..f1959732b6fde 100644
--- a/include/asm-s390/setup.h
+++ b/include/asm-s390/setup.h
@@ -44,10 +44,12 @@ extern unsigned long machine_flags;
 #define MACHINE_HAS_IEEE	(machine_flags & 2)
 #define MACHINE_HAS_CSP		(machine_flags & 8)
 #define MACHINE_HAS_DIAG44	(1)
+#define MACHINE_HAS_MVCOS	(0)
 #else /* __s390x__ */
 #define MACHINE_HAS_IEEE	(1)
 #define MACHINE_HAS_CSP		(1)
 #define MACHINE_HAS_DIAG44	(machine_flags & 32)
+#define MACHINE_HAS_MVCOS	(machine_flags & 512)
 #endif /* __s390x__ */
 
 
diff --git a/include/asm-s390/uaccess.h b/include/asm-s390/uaccess.h
index 39a2716ae1884..e2047b0c90921 100644
--- a/include/asm-s390/uaccess.h
+++ b/include/asm-s390/uaccess.h
@@ -100,6 +100,7 @@ struct uaccess_ops {
 
 extern struct uaccess_ops uaccess;
 extern struct uaccess_ops uaccess_std;
+extern struct uaccess_ops uaccess_mvcos;
 
 static inline int __put_user_fn(size_t size, void __user *ptr, void *x)
 {
-- 
GitLab


From 250b2dc83347feb73eb6bdf7511685e72b587e68 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:47 +0200
Subject: [PATCH 0256/1063] [S390] Get rid of DBG macro.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/s390mach.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 5399c5d99b81e..a914129a4da98 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -19,9 +19,6 @@
 
 #include "s390mach.h"
 
-#define DBG printk
-// #define DBG(args,...) do {} while (0);
-
 static struct semaphore m_sem;
 
 extern int css_process_crw(int, int);
@@ -83,11 +80,11 @@ s390_collect_crw_info(void *param)
 		ccode = stcrw(&crw[chain]);
 		if (ccode != 0)
 			break;
-		DBG(KERN_DEBUG "crw_info : CRW reports slct=%d, oflw=%d, "
-		    "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
-		    crw[chain].slct, crw[chain].oflw, crw[chain].chn,
-		    crw[chain].rsc, crw[chain].anc, crw[chain].erc,
-		    crw[chain].rsid);
+		printk(KERN_DEBUG "crw_info : CRW reports slct=%d, oflw=%d, "
+		       "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
+		       crw[chain].slct, crw[chain].oflw, crw[chain].chn,
+		       crw[chain].rsc, crw[chain].anc, crw[chain].erc,
+		       crw[chain].rsid);
 		/* Check for overflows. */
 		if (crw[chain].oflw) {
 			pr_debug("%s: crw overflow detected!\n", __FUNCTION__);
@@ -117,8 +114,8 @@ s390_collect_crw_info(void *param)
 			 * reported to the common I/O layer.
 			 */
 			if (crw[chain].slct) {
-				DBG(KERN_INFO"solicited machine check for "
-				    "channel path %02X\n", crw[0].rsid);
+				pr_debug("solicited machine check for "
+					 "channel path %02X\n", crw[0].rsid);
 				break;
 			}
 			switch (crw[0].erc) {
-- 
GitLab


From db0c2d59087296b3567ec408abe17108db88b385 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:49 +0200
Subject: [PATCH 0257/1063] [S390] set modalias for ccw bus uevents.

Add the MODALIAS environment variable for ccw bus uevents.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/device.c | 109 +++++++++++++++++++++++---------------
 1 file changed, 66 insertions(+), 43 deletions(-)

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 646da56404015..688945662c151 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -52,53 +52,81 @@ ccw_bus_match (struct device * dev, struct device_driver * drv)
 	return 1;
 }
 
-/*
- * Hotplugging interface for ccw devices.
- * Heavily modeled on pci and usb hotplug.
- */
-static int
-ccw_uevent (struct device *dev, char **envp, int num_envp,
-	     char *buffer, int buffer_size)
+/* Store modalias string delimited by prefix/suffix string into buffer with
+ * specified size. Return length of resulting string (excluding trailing '\0')
+ * even if string doesn't fit buffer (snprintf semantics). */
+static int snprint_alias(char *buf, size_t size, const char *prefix,
+			 struct ccw_device_id *id, const char *suffix)
 {
-	struct ccw_device *cdev = to_ccwdev(dev);
-	int i = 0;
-	int length = 0;
+	int len;
 
-	if (!cdev)
-		return -ENODEV;
+	len = snprintf(buf, size, "%sccw:t%04Xm%02X", prefix, id->cu_type,
+		       id->cu_model);
+	if (len > size)
+		return len;
+	buf += len;
+	size -= len;
 
-	/* what we want to pass to /sbin/hotplug */
+	if (id->dev_type != 0)
+		len += snprintf(buf, size, "dt%04Xdm%02X%s", id->dev_type,
+				id->dev_model, suffix);
+	else
+		len += snprintf(buf, size, "dtdm%s", suffix);
 
-	envp[i++] = buffer;
-	length += scnprintf(buffer, buffer_size - length, "CU_TYPE=%04X",
-			   cdev->id.cu_type);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
-		return -ENOMEM;
-	++length;
-	buffer += length;
+	return len;
+}
 
+/* Set up environment variables for ccw device uevent. Return 0 on success,
+ * non-zero otherwise. */
+static int ccw_uevent(struct device *dev, char **envp, int num_envp,
+		      char *buffer, int buffer_size)
+{
+	struct ccw_device *cdev = to_ccwdev(dev);
+	struct ccw_device_id *id = &(cdev->id);
+	int i = 0;
+	int len;
+
+	/* CU_TYPE= */
+	len = snprintf(buffer, buffer_size, "CU_TYPE=%04X", id->cu_type) + 1;
+	if (len > buffer_size || i >= num_envp)
+		return -ENOMEM;
 	envp[i++] = buffer;
-	length += scnprintf(buffer, buffer_size - length, "CU_MODEL=%02X",
-			   cdev->id.cu_model);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	buffer += len;
+	buffer_size -= len;
+
+	/* CU_MODEL= */
+	len = snprintf(buffer, buffer_size, "CU_MODEL=%02X", id->cu_model) + 1;
+	if (len > buffer_size || i >= num_envp)
 		return -ENOMEM;
-	++length;
-	buffer += length;
+	envp[i++] = buffer;
+	buffer += len;
+	buffer_size -= len;
 
 	/* The next two can be zero, that's ok for us */
-	envp[i++] = buffer;
-	length += scnprintf(buffer, buffer_size - length, "DEV_TYPE=%04X",
-			   cdev->id.dev_type);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	/* DEV_TYPE= */
+	len = snprintf(buffer, buffer_size, "DEV_TYPE=%04X", id->dev_type) + 1;
+	if (len > buffer_size || i >= num_envp)
 		return -ENOMEM;
-	++length;
-	buffer += length;
+	envp[i++] = buffer;
+	buffer += len;
+	buffer_size -= len;
 
+	/* DEV_MODEL= */
+	len = snprintf(buffer, buffer_size, "DEV_MODEL=%02X",
+			(unsigned char) id->dev_model) + 1;
+	if (len > buffer_size || i >= num_envp)
+		return -ENOMEM;
 	envp[i++] = buffer;
-	length += scnprintf(buffer, buffer_size - length, "DEV_MODEL=%02X",
-			   cdev->id.dev_model);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	buffer += len;
+	buffer_size -= len;
+
+	/* MODALIAS=  */
+	len = snprint_alias(buffer, buffer_size, "MODALIAS=", id, "") + 1;
+	if (len > buffer_size || i >= num_envp)
 		return -ENOMEM;
+	envp[i++] = buffer;
+	buffer += len;
+	buffer_size -= len;
 
 	envp[i] = NULL;
 
@@ -251,16 +279,11 @@ modalias_show (struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct ccw_device *cdev = to_ccwdev(dev);
 	struct ccw_device_id *id = &(cdev->id);
-	int ret;
+	int len;
 
-	ret = sprintf(buf, "ccw:t%04Xm%02X",
-			id->cu_type, id->cu_model);
-	if (id->dev_type != 0)
-		ret += sprintf(buf + ret, "dt%04Xdm%02X\n",
-				id->dev_type, id->dev_model);
-	else
-		ret += sprintf(buf + ret, "dtdm\n");
-	return ret;
+	len = snprint_alias(buf, PAGE_SIZE, "", id, "\n") + 1;
+
+	return len > PAGE_SIZE ? PAGE_SIZE : len;
 }
 
 static ssize_t
-- 
GitLab


From dcd707b4bdc10b4fa20efa116dbaeded21513115 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:52 +0200
Subject: [PATCH 0258/1063] [S390] Replace nopav-message on VM.

Specifying kernel parameter "dasd=nopav" on systems running under VM
has no function but results in message "disable PAV mode". Correct
message is "'nopav' not supported on VM".

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd_devmap.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 80cf0999465a4..91cf971f0652e 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -258,8 +258,12 @@ dasd_parse_keyword( char *parsestring ) {
                 return residual_str;
         }
 	if (strncmp("nopav", parsestring, length) == 0) {
-		dasd_nopav = 1;
-		MESSAGE(KERN_INFO, "%s", "disable PAV mode");
+		if (MACHINE_IS_VM)
+			MESSAGE(KERN_INFO, "%s", "'nopav' not supported on VM");
+		else {
+			dasd_nopav = 1;
+			MESSAGE(KERN_INFO, "%s", "disable PAV mode");
+		}
 		return residual_str;
 	}
 	if (strncmp("fixedbuffers", parsestring, length) == 0) {
-- 
GitLab


From dd9963f9dd0985e16e878fd3632ecadfc54d3fbb Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:54 +0200
Subject: [PATCH 0259/1063] [S390] cio: subchannels in no-path state.

Subchannel may incorrectly remain in state no-path after channel paths
have reappeared. Currently the scan for subchannels which are using a
channel path ends at the first occurrence if a full link address was
provided by the channel subsystem. The scan needs to continue over
all subchannels.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/chsc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index c28444af0919f..9f9134b67e40c 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -378,6 +378,7 @@ __s390_process_res_acc(struct subchannel_id schid, void *data)
 
 	if (chp_mask == 0) {
 		spin_unlock_irq(&sch->lock);
+		put_device(&sch->dev);
 		return 0;
 	}
 	old_lpm = sch->lpm;
@@ -392,7 +393,7 @@ __s390_process_res_acc(struct subchannel_id schid, void *data)
 
 	spin_unlock_irq(&sch->lock);
 	put_device(&sch->dev);
-	return (res_data->fla_mask == 0xffff) ? -ENODEV : 0;
+	return 0;
 }
 
 
-- 
GitLab


From e0e32c8eba86fd5ea79eefad6f2c0b4988dfd02a Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:57 +0200
Subject: [PATCH 0260/1063] [S390] cio: update path groups on logical CHPID
 changes.

CHPIDs that are logically varied off will not be removed from
a CCW device's path group because resign-from-pathgroup command is
issued with invalid path mask of 0 because internal CCW operations
are masked by the logical path mask after the relevant bits are
cleared by the vary operation.
Do not apply logical path mask to internal operations.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/cio.c        |  2 +-
 drivers/s390/cio/device_ops.c | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 61eb7caa1567b..54cce542a1ee6 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -191,7 +191,7 @@ cio_start_key (struct subchannel *sch,	/* subchannel structure */
 	sch->orb.pfch = sch->options.prefetch == 0;
 	sch->orb.spnd = sch->options.suspend;
 	sch->orb.ssic = sch->options.suspend && sch->options.inter;
-	sch->orb.lpm = (lpm != 0) ? (lpm & sch->opm) : sch->lpm;
+	sch->orb.lpm = (lpm != 0) ? lpm : sch->lpm;
 #ifdef CONFIG_64BIT
 	/*
 	 * for 64 bit we always support 64 bit IDAWs with 4k page size only
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 9e3de0bd59b5f..acad8f852eda5 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -96,6 +96,12 @@ ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
 	ret = cio_set_options (sch, flags);
 	if (ret)
 		return ret;
+	/* Adjust requested path mask to excluded varied off paths. */
+	if (lpm) {
+		lpm &= sch->opm;
+		if (lpm == 0)
+			return -EACCES;
+	}
 	ret = cio_start_key (sch, cpa, lpm, key);
 	if (ret == 0)
 		cdev->private->intparm = intparm;
@@ -304,7 +310,7 @@ __ccw_device_retry_loop(struct ccw_device *cdev, struct ccw1 *ccw, long magic, _
 	sch = to_subchannel(cdev->dev.parent);
 	do {
 		ret = cio_start (sch, ccw, lpm);
-		if ((ret == -EBUSY) || (ret == -EACCES)) {
+		if (ret == -EBUSY) {
 			/* Try again later. */
 			spin_unlock_irq(&sch->lock);
 			msleep(10);
@@ -433,6 +439,13 @@ read_conf_data_lpm (struct ccw_device *cdev, void **buffer, int *length, __u8 lp
 	if (!ciw || ciw->cmd == 0)
 		return -EOPNOTSUPP;
 
+	/* Adjust requested path mask to excluded varied off paths. */
+	if (lpm) {
+		lpm &= sch->opm;
+		if (lpm == 0)
+			return -EACCES;
+	}
+
 	rcd_ccw = kzalloc(sizeof(struct ccw1), GFP_KERNEL | GFP_DMA);
 	if (!rcd_ccw)
 		return -ENOMEM;
-- 
GitLab


From 28bdc6f6233f380ddc0b430cabd88ffeafea34c7 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Wed, 20 Sep 2006 15:59:59 +0200
Subject: [PATCH 0261/1063] [S390] cio: always query all paths on path
 verification.

Reappearing channel paths are sometimes not utilized by CCW devices
because path verification incorrectly relies on path-operational-mask
information which is not updated until a channel path has been used
again.
Modify path verification procedure to always query all available paths
to a device.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/chsc.c        |  2 +-
 drivers/s390/cio/cio.c         |  5 +--
 drivers/s390/cio/device_fsm.c  | 39 +++++++++-------
 drivers/s390/cio/device_ops.c  |  2 +-
 drivers/s390/cio/device_pgid.c | 81 ++++++++++++++++++----------------
 5 files changed, 68 insertions(+), 61 deletions(-)

diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 9f9134b67e40c..3bb4e472d73de 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -256,7 +256,7 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 	/* trigger path verification. */
 	if (sch->driver && sch->driver->verify)
 		sch->driver->verify(&sch->dev);
-	else if (sch->vpm == mask)
+	else if (sch->lpm == mask)
 		goto out_unreg;
 out_unlock:
 	spin_unlock_irq(&sch->lock);
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 54cce542a1ee6..2e2882daefbbd 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -569,10 +569,7 @@ cio_validate_subchannel (struct subchannel *sch, struct subchannel_id schid)
 	sch->opm = 0xff;
 	if (!cio_is_console(sch->schid))
 		chsc_validate_chpids(sch);
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
+	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 
 	CIO_DEBUG(KERN_INFO, 0,
 		  "Detected device %04x on subchannel 0.%x.%04X"
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 7756f324fb6f4..dace46fc32e8d 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -232,10 +232,7 @@ ccw_device_recog_done(struct ccw_device *cdev, int state)
 	 */
 	old_lpm = sch->lpm;
 	stsch(sch->schid, &sch->schib);
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
+	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 	/* Check since device may again have become not operational. */
 	if (!sch->schib.pmcw.dnv)
 		state = DEV_STATE_NOT_OPER;
@@ -455,8 +452,8 @@ ccw_device_sense_pgid_done(struct ccw_device *cdev, int err)
 		return;
 	}
 	/* Start Path Group verification. */
-	sch->vpm = 0;	/* Start with no path groups set. */
 	cdev->private->state = DEV_STATE_VERIFY;
+	cdev->private->flags.doverify = 0;
 	ccw_device_verify_start(cdev);
 }
 
@@ -556,7 +553,19 @@ ccw_device_nopath_notify(void *data)
 void
 ccw_device_verify_done(struct ccw_device *cdev, int err)
 {
-	cdev->private->flags.doverify = 0;
+	struct subchannel *sch;
+
+	sch = to_subchannel(cdev->dev.parent);
+	/* Update schib - pom may have changed. */
+	stsch(sch->schid, &sch->schib);
+	/* Update lpm with verified path mask. */
+	sch->lpm = sch->vpm;
+	/* Repeat path verification? */
+	if (cdev->private->flags.doverify) {
+		cdev->private->flags.doverify = 0;
+		ccw_device_verify_start(cdev);
+		return;
+	}
 	switch (err) {
 	case -EOPNOTSUPP: /* path grouping not supported, just set online. */
 		cdev->private->options.pgroup = 0;
@@ -614,6 +623,7 @@ ccw_device_online(struct ccw_device *cdev)
 	if (!cdev->private->options.pgroup) {
 		/* Start initial path verification. */
 		cdev->private->state = DEV_STATE_VERIFY;
+		cdev->private->flags.doverify = 0;
 		ccw_device_verify_start(cdev);
 		return 0;
 	}
@@ -660,7 +670,6 @@ ccw_device_offline(struct ccw_device *cdev)
 	/* Are we doing path grouping? */
 	if (!cdev->private->options.pgroup) {
 		/* No, set state offline immediately. */
-		sch->vpm = 0;
 		ccw_device_done(cdev, DEV_STATE_OFFLINE);
 		return 0;
 	}
@@ -781,6 +790,7 @@ ccw_device_online_verify(struct ccw_device *cdev, enum dev_event dev_event)
 	}
 	/* Device is idle, we can do the path verification. */
 	cdev->private->state = DEV_STATE_VERIFY;
+	cdev->private->flags.doverify = 0;
 	ccw_device_verify_start(cdev);
 }
 
@@ -1043,9 +1053,9 @@ ccw_device_wait4io_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 }
 
 static void
-ccw_device_wait4io_verify(struct ccw_device *cdev, enum dev_event dev_event)
+ccw_device_delay_verify(struct ccw_device *cdev, enum dev_event dev_event)
 {
-	/* When the I/O has terminated, we have to start verification. */
+	/* Start verification after current task finished. */
 	cdev->private->flags.doverify = 1;
 }
 
@@ -1111,10 +1121,7 @@ device_trigger_reprobe(struct subchannel *sch)
 	 * The pim, pam, pom values may not be accurate, but they are the best
 	 * we have before performing device selection :/
 	 */
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
+	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 	/* Re-set some bits in the pmcw that were lost. */
 	sch->schib.pmcw.isc = 3;
 	sch->schib.pmcw.csense = 1;
@@ -1238,7 +1245,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_verify_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_onoff_timeout,
-		[DEV_EVENT_VERIFY]	= ccw_device_nop,
+		[DEV_EVENT_VERIFY]	= ccw_device_delay_verify,
 	},
 	[DEV_STATE_ONLINE] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
@@ -1281,7 +1288,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_wait4io_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_wait4io_timeout,
-		[DEV_EVENT_VERIFY]	= ccw_device_wait4io_verify,
+		[DEV_EVENT_VERIFY]	= ccw_device_delay_verify,
 	},
 	[DEV_STATE_QUIESCE] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_quiesce_done,
@@ -1294,7 +1301,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_nop,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_start_id,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_bug,
-		[DEV_EVENT_VERIFY]	= ccw_device_nop,
+		[DEV_EVENT_VERIFY]	= ccw_device_start_id,
 	},
 	[DEV_STATE_DISCONNECTED_SENSE_ID] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_recog_notoper,
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index acad8f852eda5..93a897eebfff7 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -256,7 +256,7 @@ ccw_device_get_path_mask(struct ccw_device *cdev)
 	if (!sch)
 		return 0;
 	else
-		return sch->vpm;
+		return sch->lpm;
 }
 
 static void
diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c
index 1693a102dcfe3..8ca2d078848c9 100644
--- a/drivers/s390/cio/device_pgid.c
+++ b/drivers/s390/cio/device_pgid.c
@@ -245,18 +245,17 @@ __ccw_device_do_pgid(struct ccw_device *cdev, __u8 func)
 	memset(&cdev->private->irb, 0, sizeof(struct irb));
 
 	/* Try multiple times. */
-	ret = -ENODEV;
+	ret = -EACCES;
 	if (cdev->private->iretry > 0) {
 		cdev->private->iretry--;
 		ret = cio_start (sch, cdev->private->iccws,
 				 cdev->private->imask);
-		/* ret is 0, -EBUSY, -EACCES or -ENODEV */
-		if ((ret != -EACCES) && (ret != -ENODEV))
+		/* We expect an interrupt in case of success or busy
+		 * indication. */
+		if ((ret == 0) || (ret == -EBUSY))
 			return ret;
 	}
-	/* PGID command failed on this path. Switch it off. */
-	sch->lpm &= ~cdev->private->imask;
-	sch->vpm &= ~cdev->private->imask;
+	/* PGID command failed on this path. */
 	CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel "
 		      "0.%x.%04x, lpm %02X, became 'not operational'\n",
 		      cdev->private->devno, sch->schid.ssid,
@@ -286,18 +285,17 @@ static int __ccw_device_do_nop(struct ccw_device *cdev)
 	memset(&cdev->private->irb, 0, sizeof(struct irb));
 
 	/* Try multiple times. */
-	ret = -ENODEV;
+	ret = -EACCES;
 	if (cdev->private->iretry > 0) {
 		cdev->private->iretry--;
 		ret = cio_start (sch, cdev->private->iccws,
 				 cdev->private->imask);
-		/* ret is 0, -EBUSY, -EACCES or -ENODEV */
-		if ((ret != -EACCES) && (ret != -ENODEV))
+		/* We expect an interrupt in case of success or busy
+		 * indication. */
+		if ((ret == 0) || (ret == -EBUSY))
 			return ret;
 	}
-	/* nop command failed on this path. Switch it off. */
-	sch->lpm &= ~cdev->private->imask;
-	sch->vpm &= ~cdev->private->imask;
+	/* nop command failed on this path. */
 	CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel "
 		      "0.%x.%04x, lpm %02X, became 'not operational'\n",
 		      cdev->private->devno, sch->schid.ssid,
@@ -372,27 +370,32 @@ static void
 __ccw_device_verify_start(struct ccw_device *cdev)
 {
 	struct subchannel *sch;
-	__u8 imask, func;
+	__u8 func;
 	int ret;
 
 	sch = to_subchannel(cdev->dev.parent);
-	while (sch->vpm != sch->lpm) {
-		/* Find first unequal bit in vpm vs. lpm */
-		for (imask = 0x80; imask != 0; imask >>= 1)
-			if ((sch->vpm & imask) != (sch->lpm & imask))
-				break;
-		cdev->private->imask = imask;
+	/* Repeat for all paths. */
+	for (; cdev->private->imask; cdev->private->imask >>= 1,
+				     cdev->private->iretry = 5) {
+		if ((cdev->private->imask & sch->schib.pmcw.pam) == 0)
+			/* Path not available, try next. */
+			continue;
 		if (cdev->private->options.pgroup) {
-			func = (sch->vpm & imask) ?
-				SPID_FUNC_RESIGN : SPID_FUNC_ESTABLISH;
+			if (sch->opm & cdev->private->imask)
+				func = SPID_FUNC_ESTABLISH;
+			else
+				func = SPID_FUNC_RESIGN;
 			ret = __ccw_device_do_pgid(cdev, func);
 		} else
 			ret = __ccw_device_do_nop(cdev);
+		/* We expect an interrupt in case of success or busy
+		 * indication. */
 		if (ret == 0 || ret == -EBUSY)
 			return;
-		cdev->private->iretry = 5;
+		/* Permanent path failure, try next. */
 	}
-	ccw_device_verify_done(cdev, (sch->lpm != 0) ? 0 : -ENODEV);
+	/* Done with all paths. */
+	ccw_device_verify_done(cdev, (sch->vpm != 0) ? 0 : -ENODEV);
 }
 		
 /*
@@ -421,14 +424,14 @@ ccw_device_verify_irq(struct ccw_device *cdev, enum dev_event dev_event)
 	else
 		ret = __ccw_device_check_nop(cdev);
 	memset(&cdev->private->irb, 0, sizeof(struct irb));
+
 	switch (ret) {
 	/* 0, -ETIME, -EAGAIN, -EOPNOTSUPP or -EACCES */
 	case 0:
-		/* Establish or Resign Path Group done. Update vpm. */
-		if ((sch->lpm & cdev->private->imask) != 0)
-			sch->vpm |= cdev->private->imask;
-		else
-			sch->vpm &= ~cdev->private->imask;
+		/* Path verification ccw finished successfully, update lpm. */
+		sch->vpm |= sch->opm & cdev->private->imask;
+		/* Go on with next path. */
+		cdev->private->imask >>= 1;
 		cdev->private->iretry = 5;
 		__ccw_device_verify_start(cdev);
 		break;
@@ -441,6 +444,10 @@ ccw_device_verify_irq(struct ccw_device *cdev, enum dev_event dev_event)
 			cdev->private->options.pgroup = 0;
 		else
 			cdev->private->flags.pgid_single = 1;
+		/* Retry */
+		sch->vpm = 0;
+		cdev->private->imask = 0x80;
+		cdev->private->iretry = 5;
 		/* fall through. */
 	case -EAGAIN:		/* Try again. */
 		__ccw_device_verify_start(cdev);
@@ -449,8 +456,7 @@ ccw_device_verify_irq(struct ccw_device *cdev, enum dev_event dev_event)
 		ccw_device_verify_done(cdev, -ETIME);
 		break;
 	case -EACCES:		/* channel is not operational. */
-		sch->lpm &= ~cdev->private->imask;
-		sch->vpm &= ~cdev->private->imask;
+		cdev->private->imask >>= 1;
 		cdev->private->iretry = 5;
 		__ccw_device_verify_start(cdev);
 		break;
@@ -463,19 +469,17 @@ ccw_device_verify_start(struct ccw_device *cdev)
 	struct subchannel *sch = to_subchannel(cdev->dev.parent);
 
 	cdev->private->flags.pgid_single = 0;
+	cdev->private->imask = 0x80;
 	cdev->private->iretry = 5;
-	/*
-	 * Update sch->lpm with current values to catch paths becoming
-	 * available again.
-	 */
+
+	/* Start with empty vpm. */
+	sch->vpm = 0;
+
+	/* Get current pam. */
 	if (stsch(sch->schid, &sch->schib)) {
 		ccw_device_verify_done(cdev, -ENODEV);
 		return;
 	}
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
 	__ccw_device_verify_start(cdev);
 }
 
@@ -524,7 +528,6 @@ ccw_device_disband_irq(struct ccw_device *cdev, enum dev_event dev_event)
 	switch (ret) {
 	/* 0, -ETIME, -EAGAIN, -EOPNOTSUPP or -EACCES */
 	case 0:			/* disband successful. */
-		sch->vpm = 0;
 		ccw_device_disband_done(cdev, ret);
 		break;
 	case -EOPNOTSUPP:
-- 
GitLab


From 564337f34cc10fd8f30329e4e5f14f8995db5711 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Wed, 20 Sep 2006 16:00:01 +0200
Subject: [PATCH 0262/1063] [S390] cio: subchannel evaluation function operates
 without lock

css_evaluate_subchannel() operates subchannel without lock which can
lead to erratic behavior caused by concurrent device access. Also
split evaluation function to make it more readable.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/css.c | 203 +++++++++++++++++++++--------------------
 1 file changed, 104 insertions(+), 99 deletions(-)

diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 13eeea3d547f2..7086a74e9871d 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -182,136 +182,141 @@ get_subchannel_by_schid(struct subchannel_id schid)
 	return dev ? to_subchannel(dev) : NULL;
 }
 
-
-static inline int
-css_get_subchannel_status(struct subchannel *sch, struct subchannel_id schid)
+static inline int css_get_subchannel_status(struct subchannel *sch)
 {
 	struct schib schib;
-	int cc;
 
-	cc = stsch(schid, &schib);
-	if (cc)
-		return CIO_GONE;
-	if (!schib.pmcw.dnv)
+	if (stsch(sch->schid, &schib) || !schib.pmcw.dnv)
 		return CIO_GONE;
-	if (sch && sch->schib.pmcw.dnv &&
-	    (schib.pmcw.dev != sch->schib.pmcw.dev))
+	if (sch->schib.pmcw.dnv && (schib.pmcw.dev != sch->schib.pmcw.dev))
 		return CIO_REVALIDATE;
-	if (sch && !sch->lpm)
+	if (!sch->lpm)
 		return CIO_NO_PATH;
 	return CIO_OPER;
 }
-	
-static int
-css_evaluate_subchannel(struct subchannel_id schid, int slow)
+
+static int css_evaluate_known_subchannel(struct subchannel *sch, int slow)
 {
 	int event, ret, disc;
-	struct subchannel *sch;
 	unsigned long flags;
+	enum { NONE, UNREGISTER, UNREGISTER_PROBE, REPROBE } action;
 
-	sch = get_subchannel_by_schid(schid);
-	disc = sch ? device_is_disconnected(sch) : 0;
+	spin_lock_irqsave(&sch->lock, flags);
+	disc = device_is_disconnected(sch);
 	if (disc && slow) {
-		if (sch)
-			put_device(&sch->dev);
-		return 0; /* Already processed. */
+		/* Disconnected devices are evaluated directly only.*/
+		spin_unlock_irqrestore(&sch->lock, flags);
+		return 0;
 	}
-	/*
-	 * We've got a machine check, so running I/O won't get an interrupt.
-	 * Kill any pending timers.
-	 */
-	if (sch)
-		device_kill_pending_timer(sch);
+	/* No interrupt after machine check - kill pending timers. */
+	device_kill_pending_timer(sch);
 	if (!disc && !slow) {
-		if (sch)
-			put_device(&sch->dev);
-		return -EAGAIN; /* Will be done on the slow path. */
+		/* Non-disconnected devices are evaluated on the slow path. */
+		spin_unlock_irqrestore(&sch->lock, flags);
+		return -EAGAIN;
 	}
-	event = css_get_subchannel_status(sch, schid);
+	event = css_get_subchannel_status(sch);
 	CIO_MSG_EVENT(4, "Evaluating schid 0.%x.%04x, event %d, %s, %s path.\n",
-		      schid.ssid, schid.sch_no, event,
-		      sch?(disc?"disconnected":"normal"):"unknown",
-		      slow?"slow":"fast");
+		      sch->schid.ssid, sch->schid.sch_no, event,
+		      disc ? "disconnected" : "normal",
+		      slow ? "slow" : "fast");
+	/* Analyze subchannel status. */
+	action = NONE;
 	switch (event) {
 	case CIO_NO_PATH:
-	case CIO_GONE:
-		if (!sch) {
-			/* Never used this subchannel. Ignore. */
-			ret = 0;
+		if (disc) {
+			/* Check if paths have become available. */
+			action = REPROBE;
 			break;
 		}
-		if (disc && (event == CIO_NO_PATH)) {
-			/*
-			 * Uargh, hack again. Because we don't get a machine
-			 * check on configure on, our path bookkeeping can
-			 * be out of date here (it's fine while we only do
-			 * logical varying or get chsc machine checks). We
-			 * need to force reprobing or we might miss devices
-			 * coming operational again. It won't do harm in real
-			 * no path situations.
-			 */
-			spin_lock_irqsave(&sch->lock, flags);
-			device_trigger_reprobe(sch);
+		/* fall through */
+	case CIO_GONE:
+		/* Prevent unwanted effects when opening lock. */
+		cio_disable_subchannel(sch);
+		device_set_disconnected(sch);
+		/* Ask driver what to do with device. */
+		action = UNREGISTER;
+		if (sch->driver && sch->driver->notify) {
 			spin_unlock_irqrestore(&sch->lock, flags);
-			ret = 0;
-			break;
-		}
-		if (sch->driver && sch->driver->notify &&
-		    sch->driver->notify(&sch->dev, event)) {
-			cio_disable_subchannel(sch);
-			device_set_disconnected(sch);
-			ret = 0;
-			break;
+			ret = sch->driver->notify(&sch->dev, event);
+			spin_lock_irqsave(&sch->lock, flags);
+			if (ret)
+				action = NONE;
 		}
-		/*
-		 * Unregister subchannel.
-		 * The device will be killed automatically.
-		 */
-		cio_disable_subchannel(sch);
-		css_sch_device_unregister(sch);
-		/* Reset intparm to zeroes. */
-		sch->schib.pmcw.intparm = 0;
-		cio_modify(sch);
-		put_device(&sch->dev);
-		ret = 0;
 		break;
 	case CIO_REVALIDATE:
-		/* 
-		 * Revalidation machine check. Sick.
-		 * We don't notify the driver since we have to throw the device
-		 * away in any case.
-		 */
-		if (!disc) {
-			css_sch_device_unregister(sch);
-			/* Reset intparm to zeroes. */
-			sch->schib.pmcw.intparm = 0;
-			cio_modify(sch);
-			put_device(&sch->dev);
-			ret = css_probe_device(schid);
-		} else {
-			/*
-			 * We can't immediately deregister the disconnected
-			 * device since it might block.
-			 */
-			spin_lock_irqsave(&sch->lock, flags);
-			device_trigger_reprobe(sch);
-			spin_unlock_irqrestore(&sch->lock, flags);
-			ret = 0;
-		}
+		/* Device will be removed, so no notify necessary. */
+		if (disc)
+			/* Reprobe because immediate unregister might block. */
+			action = REPROBE;
+		else
+			action = UNREGISTER_PROBE;
 		break;
 	case CIO_OPER:
-		if (disc) {
-			spin_lock_irqsave(&sch->lock, flags);
+		if (disc)
 			/* Get device operational again. */
-			device_trigger_reprobe(sch);
-			spin_unlock_irqrestore(&sch->lock, flags);
-		}
-		ret = sch ? 0 : css_probe_device(schid);
+			action = REPROBE;
+		break;
+	}
+	/* Perform action. */
+	ret = 0;
+	switch (action) {
+	case UNREGISTER:
+	case UNREGISTER_PROBE:
+		/* Unregister device (will use subchannel lock). */
+		spin_unlock_irqrestore(&sch->lock, flags);
+		css_sch_device_unregister(sch);
+		spin_lock_irqsave(&sch->lock, flags);
+
+		/* Reset intparm to zeroes. */
+		sch->schib.pmcw.intparm = 0;
+		cio_modify(sch);
+
+		/* Probe if necessary. */
+		if (action == UNREGISTER_PROBE)
+			ret = css_probe_device(sch->schid);
+		break;
+	case REPROBE:
+		device_trigger_reprobe(sch);
 		break;
 	default:
-		BUG();
-		ret = 0;
+		break;
+	}
+	spin_unlock_irqrestore(&sch->lock, flags);
+
+	return ret;
+}
+
+static int css_evaluate_new_subchannel(struct subchannel_id schid, int slow)
+{
+	struct schib schib;
+
+	if (!slow) {
+		/* Will be done on the slow path. */
+		return -EAGAIN;
 	}
+	if (stsch(schid, &schib) || !schib.pmcw.dnv) {
+		/* Unusable - ignore. */
+		return 0;
+	}
+	CIO_MSG_EVENT(4, "Evaluating schid 0.%x.%04x, event %d, unknown, "
+			 "slow path.\n", schid.ssid, schid.sch_no, CIO_OPER);
+
+	return css_probe_device(schid);
+}
+
+static int css_evaluate_subchannel(struct subchannel_id schid, int slow)
+{
+	struct subchannel *sch;
+	int ret;
+
+	sch = get_subchannel_by_schid(schid);
+	if (sch) {
+		ret = css_evaluate_known_subchannel(sch, slow);
+		put_device(&sch->dev);
+	} else
+		ret = css_evaluate_new_subchannel(schid, slow);
+
 	return ret;
 }
 
-- 
GitLab


From 388c571cffc4ae4e64f0786333e811308acbbc10 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@de.ibm.com>
Date: Wed, 20 Sep 2006 16:00:04 +0200
Subject: [PATCH 0263/1063] [S390] hypfs crashes with invalid mount option.

When an invalid mount option is specified, no root inode is created
for hypfs, hypfs_fill_super() returns with -EINVAL and then
hypfs_kill_super() is called. hypfs_kill_super() does not check if
the root inode has been initialized. This patch adds this check.

Signed-off-by: Michael Holzheu <holzheu@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/hypfs/inode.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index bdcad2ea1ff4b..bdade5f2e325e 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -312,10 +312,12 @@ static void hypfs_kill_super(struct super_block *sb)
 {
 	struct hypfs_sb_info *sb_info = sb->s_fs_info;
 
-	hypfs_delete_tree(sb->s_root);
-	hypfs_remove(sb_info->update_file);
-	kfree(sb->s_fs_info);
-	sb->s_fs_info = NULL;
+	if (sb->s_root) {
+		hypfs_delete_tree(sb->s_root);
+		hypfs_remove(sb_info->update_file);
+		kfree(sb->s_fs_info);
+		sb->s_fs_info = NULL;
+	}
 	kill_litter_super(sb);
 }
 
-- 
GitLab


From b4c98f625fffee3a6f633082e9e4be3e952ca2ab Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 13 Sep 2006 11:01:19 -0700
Subject: [PATCH 0264/1063] configfs: Prevent duplicate subsystem names.

For all child objects, creation comes through mkdir(2), so duplicate names
are prevented.

Subsystems, though, are registered by client drivers at init_module()/__init
time.  This patch prevents duplicate subsystem names.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/dir.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index df025453dd976..816e8ef645605 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -86,6 +86,32 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
 	return sd;
 }
 
+/*
+ *
+ * Return -EEXIST if there is already a configfs element with the same
+ * name for the same parent.
+ *
+ * called with parent inode's i_mutex held
+ */
+int configfs_dirent_exists(struct configfs_dirent *parent_sd,
+			   const unsigned char *new)
+{
+	struct configfs_dirent * sd;
+
+	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+		if (sd->s_element) {
+			const unsigned char *existing = configfs_get_name(sd);
+			if (strcmp(existing, new))
+				continue;
+			else
+				return -EEXIST;
+		}
+	}
+
+	return 0;
+}
+
+
 int configfs_make_dirent(struct configfs_dirent * parent_sd,
 			 struct dentry * dentry, void * element,
 			 umode_t mode, int type)
@@ -136,8 +162,10 @@ static int create_dir(struct config_item * k, struct dentry * p,
 	int error;
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
 
-	error = configfs_make_dirent(p->d_fsdata, d, k, mode,
-				     CONFIGFS_DIR);
+	error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
+	if (!error)
+		error = configfs_make_dirent(p->d_fsdata, d, k, mode,
+					     CONFIGFS_DIR);
 	if (!error) {
 		error = configfs_create(d, mode, init_dir);
 		if (!error) {
-- 
GitLab


From ca4d147e62df370c334898464023aa7f9126abe1 Mon Sep 17 00:00:00 2001
From: Herbert Poetzl <herbert@13thfloor.at>
Date: Mon, 3 Jul 2006 17:27:12 -0700
Subject: [PATCH 0265/1063] ocfs2: add ext2 attributes

Support immutable, and other attributes.

Some renaming and other minor fixes done by myself.

Signed-off-by: Herbert Poetzl <herbert@13thfloor.at>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/Makefile   |   1 +
 fs/ocfs2/dlmglue.c  |   9 ++-
 fs/ocfs2/dlmglue.h  |   5 +-
 fs/ocfs2/file.c     |   3 +
 fs/ocfs2/inode.c    |  28 ++++++++-
 fs/ocfs2/inode.h    |   3 +
 fs/ocfs2/ioctl.c    | 134 ++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/ioctl.h    |  16 ++++++
 fs/ocfs2/ocfs2_fs.h |  24 +++++++-
 9 files changed, 217 insertions(+), 6 deletions(-)
 create mode 100644 fs/ocfs2/ioctl.c
 create mode 100644 fs/ocfs2/ioctl.h

diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 7d3be845a6142..9fb8132f19b0f 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -16,6 +16,7 @@ ocfs2-objs := \
 	file.o 			\
 	heartbeat.o 		\
 	inode.o 		\
+	ioctl.o 		\
 	journal.o 		\
 	localalloc.o 		\
 	mmap.o 			\
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 762eb1fbb34d8..151b41781eabc 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1330,6 +1330,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
 	lvb->lvb_imtime_packed =
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
+	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
 
 	mlog_meta_lvb(0, lockres);
 
@@ -1360,6 +1361,9 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
 
+	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
+	ocfs2_set_inode_flags(inode);
+
 	/* fast-symlinks are a special case */
 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
 		inode->i_blocks = 0;
@@ -2899,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level,
 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
 	     be16_to_cpu(lvb->lvb_imode));
 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
-	     "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink),
+	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
-	     (long long)be64_to_cpu(lvb->lvb_imtime_packed));
+	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
+	     be32_to_cpu(lvb->lvb_iattr));
 }
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 8f2d1db2d9ead..243ae862ece53 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,7 +27,7 @@
 #ifndef DLMGLUE_H
 #define DLMGLUE_H
 
-#define OCFS2_LVB_VERSION 2
+#define OCFS2_LVB_VERSION 3
 
 struct ocfs2_meta_lvb {
 	__be32       lvb_version;
@@ -40,7 +40,8 @@ struct ocfs2_meta_lvb {
 	__be64       lvb_isize;
 	__be16       lvb_imode;
 	__be16       lvb_inlink;
-	__be32       lvb_reserved[3];
+	__be32       lvb_iattr;
+	__be32       lvb_reserved[2];
 };
 
 /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a9559c874530e..2bbfa17090cfd 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -44,6 +44,7 @@
 #include "file.h"
 #include "sysfile.h"
 #include "inode.h"
+#include "ioctl.h"
 #include "journal.h"
 #include "mmap.h"
 #include "suballoc.h"
@@ -1227,10 +1228,12 @@ const struct file_operations ocfs2_fops = {
 	.open		= ocfs2_file_open,
 	.aio_read	= ocfs2_file_aio_read,
 	.aio_write	= ocfs2_file_aio_write,
+	.ioctl		= ocfs2_ioctl,
 };
 
 const struct file_operations ocfs2_dops = {
 	.read		= generic_read_dir,
 	.readdir	= ocfs2_readdir,
 	.fsync		= ocfs2_sync_file,
+	.ioctl		= ocfs2_ioctl,
 };
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 327a5b7b86ed0..3f496c41fea8b 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -71,6 +71,26 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 				    struct inode *inode,
 				    struct buffer_head *fe_bh);
 
+void ocfs2_set_inode_flags(struct inode *inode)
+{
+	unsigned int flags = OCFS2_I(inode)->ip_attr;
+
+	inode->i_flags &= ~(S_IMMUTABLE |
+		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+
+	if (flags & OCFS2_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+
+	if (flags & OCFS2_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+	if (flags & OCFS2_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	if (flags & OCFS2_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	if (flags & OCFS2_DIRSYNC_FL)
+		inode->i_flags |= S_DIRSYNC;
+}
+
 struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
 				     u64 blkno,
 				     int delete_vote)
@@ -260,7 +280,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 		inode->i_blocks =
 			ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size));
 	inode->i_mapping->a_ops = &ocfs2_aops;
-	inode->i_flags |= S_NOATIME;
 	inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
 	inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
 	inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
@@ -276,6 +295,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
 	OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
+	OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
 
 	if (create_ino)
 		inode->i_ino = ino_from_blkno(inode->i_sb,
@@ -330,6 +350,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
 				  OCFS2_LOCK_TYPE_DATA, inode);
 
+	ocfs2_set_inode_flags(inode);
+	inode->i_flags |= S_NOATIME;
+
 	status = 0;
 bail:
 	mlog_exit(status);
@@ -1131,6 +1154,7 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
 
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
+	fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 
 	fe->i_size = cpu_to_le64(i_size_read(inode));
@@ -1169,6 +1193,8 @@ void ocfs2_refresh_inode(struct inode *inode,
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
+	OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
+	ocfs2_set_inode_flags(inode);
 	i_size_write(inode, le64_to_cpu(fe->i_size));
 	inode->i_nlink = le16_to_cpu(fe->i_links_count);
 	inode->i_uid = le32_to_cpu(fe->i_uid);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 35140f6cf840e..4d1e53992566b 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -56,6 +56,7 @@ struct ocfs2_inode_info
 	struct ocfs2_journal_handle	*ip_handle;
 
 	u32				ip_flags; /* see below */
+	u32				ip_attr; /* inode attributes */
 
 	/* protected by recovery_lock. */
 	struct inode			*ip_next_orphan;
@@ -142,4 +143,6 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
 int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
 int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
 
+void ocfs2_set_inode_flags(struct inode *inode);
+
 #endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
new file mode 100644
index 0000000000000..68f4806d3a354
--- /dev/null
+++ b/fs/ocfs2/ioctl.c
@@ -0,0 +1,134 @@
+/*
+ * linux/fs/ocfs2/ioctl.c
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ * adapted from Remy Card's ext2/ioctl.c
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+
+#define MLOG_MASK_PREFIX ML_INODE
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "alloc.h"
+#include "dlmglue.h"
+#include "inode.h"
+#include "journal.h"
+
+#include "ocfs2_fs.h"
+#include <linux/ext2_fs.h>
+
+static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
+{
+	int status;
+
+	status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+	if (status < 0) {
+		mlog_errno(status);
+		return status;
+	}
+	*flags = OCFS2_I(inode)->ip_attr;
+	ocfs2_meta_unlock(inode, 0);
+
+	mlog_exit(status);
+	return status;
+}
+
+static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
+				unsigned mask)
+{
+	struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_journal_handle *handle = NULL;
+	struct buffer_head *bh = NULL;
+	unsigned oldflags;
+	int status;
+
+	mutex_lock(&inode->i_mutex);
+
+	status = ocfs2_meta_lock(inode, NULL, &bh, 1);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	status = -EROFS;
+	if (IS_RDONLY(inode))
+		goto bail_unlock;
+
+	status = -EACCES;
+	if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+		goto bail_unlock;
+
+	if (!S_ISDIR(inode->i_mode))
+		flags &= ~OCFS2_DIRSYNC_FL;
+
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto bail_unlock;
+	}
+
+	oldflags = ocfs2_inode->ip_attr;
+	flags = flags & mask;
+	flags |= oldflags & ~mask;
+
+	/*
+	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+	 * the relevant capability.
+	 */
+	status = -EPERM;
+	if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) &
+		(OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) {
+		if (!capable(CAP_LINUX_IMMUTABLE))
+			goto bail_unlock;
+	}
+
+	ocfs2_inode->ip_attr = flags;
+	ocfs2_set_inode_flags(inode);
+
+	status = ocfs2_mark_inode_dirty(handle, inode, bh);
+	if (status < 0)
+		mlog_errno(status);
+
+	ocfs2_commit_trans(handle);
+bail_unlock:
+	ocfs2_meta_unlock(inode, 1);
+bail:
+	mutex_unlock(&inode->i_mutex);
+
+	if (bh)
+		brelse(bh);
+
+	mlog_exit(status);
+	return status;
+}
+
+int ocfs2_ioctl(struct inode * inode, struct file * filp,
+	unsigned int cmd, unsigned long arg)
+{
+	unsigned int flags;
+	int status;
+
+	switch (cmd) {
+	case OCFS2_IOC_GETFLAGS:
+		status = ocfs2_get_inode_attr(inode, &flags);
+		if (status < 0)
+			return status;
+
+		flags &= OCFS2_FL_VISIBLE;
+		return put_user(flags, (int __user *) arg);
+	case OCFS2_IOC_SETFLAGS:
+		if (get_user(flags, (int __user *) arg))
+			return -EFAULT;
+
+		return ocfs2_set_inode_attr(inode, flags,
+			OCFS2_FL_MODIFIABLE);
+	default:
+		return -ENOTTY;
+	}
+}
+
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
new file mode 100644
index 0000000000000..4a7c82931dbae
--- /dev/null
+++ b/fs/ocfs2/ioctl.h
@@ -0,0 +1,16 @@
+/*
+ * ioctl.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ *
+ */
+
+#ifndef OCFS2_IOCTL_H
+#define OCFS2_IOCTL_H
+
+int ocfs2_ioctl(struct inode * inode, struct file * filp,
+	unsigned int cmd, unsigned long arg);
+
+#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c5b1ac547c158..3330a5dc6be2f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -114,6 +114,26 @@
 #define OCFS2_CHAIN_FL		(0x00000400)	/* Chain allocator */
 #define OCFS2_DEALLOC_FL	(0x00000800)	/* Truncate log */
 
+/* Inode attributes, keep in sync with EXT2 */
+#define OCFS2_SECRM_FL		(0x00000001)	/* Secure deletion */
+#define OCFS2_UNRM_FL		(0x00000002)	/* Undelete */
+#define OCFS2_COMPR_FL		(0x00000004)	/* Compress file */
+#define OCFS2_SYNC_FL		(0x00000008)	/* Synchronous updates */
+#define OCFS2_IMMUTABLE_FL	(0x00000010)	/* Immutable file */
+#define OCFS2_APPEND_FL		(0x00000020)	/* writes to file may only append */
+#define OCFS2_NODUMP_FL		(0x00000040)	/* do not dump file */
+#define OCFS2_NOATIME_FL	(0x00000080)	/* do not update atime */
+#define OCFS2_DIRSYNC_FL	(0x00010000)	/* dirsync behaviour (directories only) */
+
+#define OCFS2_FL_VISIBLE	(0x000100FF)	/* User visible flags */
+#define OCFS2_FL_MODIFIABLE	(0x000100FF)	/* User modifiable flags */
+
+/*
+ * ioctl commands
+ */
+#define OCFS2_IOC_GETFLAGS	_IOR('f', 1, long)
+#define OCFS2_IOC_SETFLAGS	_IOW('f', 2, long)
+
 /*
  * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
  */
@@ -399,7 +419,9 @@ struct ocfs2_dinode {
 	__le32 i_atime_nsec;
 	__le32 i_ctime_nsec;
 	__le32 i_mtime_nsec;
-/*70*/	__le64 i_reserved1[9];
+	__le32 i_attr;
+	__le32 i_reserved1;
+/*70*/	__le64 i_reserved2[8];
 /*B8*/	union {
 		__le64 i_pad1;		/* Generic way to refer to this
 					   64bit union */
-- 
GitLab


From 2d5625181fac18f572cbbd18878d28f5eebf4733 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 10 Jul 2006 01:32:51 +0200
Subject: [PATCH 0266/1063] [PATCH] fs/ocfs2/ioctl.c should #include "ioctl.h"

Every file should #include the headers containing the prototypes for its
global functions.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 68f4806d3a354..3663cef806897 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -18,6 +18,8 @@
 #include "journal.h"
 
 #include "ocfs2_fs.h"
+#include "ioctl.h"
+
 #include <linux/ext2_fs.h>
 
 static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
-- 
GitLab


From 471e3f57286da7ce8820ad42c77d5f5f49d56a41 Mon Sep 17 00:00:00 2001
From: Mathieu Avila <mathieu.avila@seanodes.com>
Date: Wed, 13 Sep 2006 11:11:27 -0700
Subject: [PATCH 0267/1063] ocfs2: Fix heartbeat sector calculation

This fixes things for devices which set max_sectors to 8.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 504595d6cf65c..305cba3681fe0 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -320,8 +320,12 @@ static int compute_max_sectors(struct block_device *bdev)
 		max_pages = q->max_hw_segments;
 	max_pages--; /* Handle I/Os that straddle a page */
 
-	max_sectors = max_pages << (PAGE_SHIFT - 9);
-
+	if (max_pages) {
+		max_sectors = max_pages << (PAGE_SHIFT - 9);
+	} else {
+		/* If BIO contains 1 or less than 1 page. */
+		max_sectors = q->max_sectors;
+	}
 	/* Why is fls() 1-based???? */
 	pow_two_sectors = 1 << (fls(max_sectors) - 1);
 
-- 
GitLab


From a663e30513d7ecc77dd71d474e7646bf78c0ba62 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 9 Aug 2006 11:45:07 -0700
Subject: [PATCH 0268/1063] ocfs2: move nlink check in ocfs2_mknod()

The dir nlink check in ocfs2_mknod() was being done outside of the cluster
lock, which means we could have been checking against a stale version of the
inode. Fix this by doing the check after the cluster lock instead.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0673862c8bdd2..d8161a77c370e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -310,13 +310,6 @@ static int ocfs2_mknod(struct inode *dir,
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
 
-	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
-		mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
-		     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
-		status = -EMLINK;
-		goto leave;
-	}
-
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
 		status = -ENOMEM;
@@ -331,6 +324,11 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
+	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
+		status = -EMLINK;
+		goto leave;
+	}
+
 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
 	if (!dirfe->i_links_count) {
 		/* can't make a file in a deleted directory. */
-- 
GitLab


From 0f62de2c9ca60a35f63122e7ea992cee8aae4bef Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Thu, 31 Aug 2006 20:39:47 -0700
Subject: [PATCH 0269/1063] ocfs2: Fix directory link count checks in
 ocfs2_link()

Remove the redundant "i_nlink >= OCFS2_LINK_MAX" check and adds an unlinked
directory check in ocfs2_link().

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d8161a77c370e..24126476a8ccd 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -641,11 +641,6 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto bail;
 	}
 
-	if (inode->i_nlink >= OCFS2_LINK_MAX) {
-		err = -EMLINK;
-		goto bail;
-	}
-
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
 		err = -ENOMEM;
@@ -659,6 +654,11 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto bail;
 	}
 
+	if (!dir->i_nlink) {
+		err = -ENOENT;
+		goto bail;
+	}
+
 	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
 					dentry->d_name.len);
 	if (err)
-- 
GitLab


From e0b4096d34fbd6b30838c417100c9d0ef73c71f2 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 11 Jul 2006 14:38:54 -0700
Subject: [PATCH 0270/1063] ocfs2: properly update i_mtime on buffered write

We weren't always updating i_mtime on writes, so fix ocfs2_commit_write() to
handle this.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Acked-by: Zach Brown <zach.brown@oracle.com>
---
 fs/ocfs2/aops.c | 83 ++++++++++++++++++++-----------------------------
 1 file changed, 34 insertions(+), 49 deletions(-)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1d1c342ce010..3d7c082a8f582 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -391,31 +391,28 @@ struct ocfs2_journal_handle *ocfs2_start_walk_page_trans(struct inode *inode,
 static int ocfs2_commit_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
-	int ret, extending = 0, locklevel = 0;
-	loff_t new_i_size;
+	int ret;
 	struct buffer_head *di_bh = NULL;
 	struct inode *inode = page->mapping->host;
 	struct ocfs2_journal_handle *handle = NULL;
+	struct ocfs2_dinode *di;
 
 	mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
 
 	/* NOTE: ocfs2_file_aio_write has ensured that it's safe for
-	 * us to sample inode->i_size here without the metadata lock:
+	 * us to continue here without rechecking the I/O against
+	 * changed inode values.
 	 *
 	 * 1) We're currently holding the inode alloc lock, so no
 	 *    nodes can change it underneath us.
 	 *
 	 * 2) We've had to take the metadata lock at least once
-	 *    already to check for extending writes, hence insuring
-	 *    that our current copy is also up to date.
+	 *    already to check for extending writes, suid removal, etc.
+	 *    The meta data update code then ensures that we don't get a
+	 *    stale inode allocation image (i_size, i_clusters, etc).
 	 */
-	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-	if (new_i_size > i_size_read(inode)) {
-		extending = 1;
-		locklevel = 1;
-	}
 
-	ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, locklevel, page);
+	ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, 1, page);
 	if (ret != 0) {
 		mlog_errno(ret);
 		goto out;
@@ -427,23 +424,20 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
 		goto out_unlock_meta;
 	}
 
-	if (extending) {
-		handle = ocfs2_start_walk_page_trans(inode, page, from, to);
-		if (IS_ERR(handle)) {
-			ret = PTR_ERR(handle);
-			handle = NULL;
-			goto out_unlock_data;
-		}
+	handle = ocfs2_start_walk_page_trans(inode, page, from, to);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out_unlock_data;
+	}
 
-		/* Mark our buffer early. We'd rather catch this error up here
-		 * as opposed to after a successful commit_write which would
-		 * require us to set back inode->i_size. */
-		ret = ocfs2_journal_access(handle, inode, di_bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out_commit;
-		}
+	/* Mark our buffer early. We'd rather catch this error up here
+	 * as opposed to after a successful commit_write which would
+	 * require us to set back inode->i_size. */
+	ret = ocfs2_journal_access(handle, inode, di_bh,
+				   OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_commit;
 	}
 
 	/* might update i_size */
@@ -453,37 +447,28 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
 		goto out_commit;
 	}
 
-	if (extending) {
-		loff_t size = (u64) i_size_read(inode);
-		struct ocfs2_dinode *di =
-			(struct ocfs2_dinode *)di_bh->b_data;
+	di = (struct ocfs2_dinode *)di_bh->b_data;
 
-		/* ocfs2_mark_inode_dirty is too heavy to use here. */
-		inode->i_blocks = ocfs2_align_bytes_to_sectors(size);
-		inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	/* ocfs2_mark_inode_dirty() is too heavy to use here. */
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+	di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
 
-		di->i_size = cpu_to_le64(size);
-		di->i_ctime = di->i_mtime = 
-				cpu_to_le64(inode->i_mtime.tv_sec);
-		di->i_ctime_nsec = di->i_mtime_nsec = 
-				cpu_to_le32(inode->i_mtime.tv_nsec);
+	inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode)));
+	di->i_size = cpu_to_le64((u64)i_size_read(inode));
 
-		ret = ocfs2_journal_dirty(handle, di_bh);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out_commit;
-		}
+	ret = ocfs2_journal_dirty(handle, di_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_commit;
 	}
 
-	BUG_ON(extending && (i_size_read(inode) != new_i_size));
-
 out_commit:
-	if (handle)
-		ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(handle);
 out_unlock_data:
 	ocfs2_data_unlock(inode, 1);
 out_unlock_meta:
-	ocfs2_meta_unlock(inode, locklevel);
+	ocfs2_meta_unlock(inode, 1);
 out:
 	if (di_bh)
 		brelse(di_bh);
-- 
GitLab


From aa9588741db907785e4d92c8b768dd6c9077e6f0 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 21 Apr 2006 13:49:02 -0700
Subject: [PATCH 0271/1063] ocfs2: implement directory read-ahead

Uptodate.c now knows about read-ahead buffers. Use some more aggressive
logic in ocfs2_readdir().

The two functions which currently use directory read-ahead are
ocfs2_find_entry() and ocfs2_readdir().

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/buffer_head_io.c | 95 +++++++++++++++++++++++++++++----------
 fs/ocfs2/buffer_head_io.h |  2 +-
 fs/ocfs2/dir.c            | 28 +++++++-----
 fs/ocfs2/inode.c          |  4 --
 fs/ocfs2/namei.c          | 10 ++---
 fs/ocfs2/uptodate.c       | 21 ++++++++-
 fs/ocfs2/uptodate.h       |  2 +
 7 files changed, 115 insertions(+), 47 deletions(-)

diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 9a24adf9be6e8..c9037414f4f65 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
 		   (unsigned long long)block, nr, flags, inode);
 
+	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
+	       (!inode || !(flags & OCFS2_BH_CACHED)));
+
 	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
 		status = -EINVAL;
 		mlog_errno(status);
@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 		bh = bhs[i];
 		ignore_cache = 0;
 
+		/* There are three read-ahead cases here which we need to
+		 * be concerned with. All three assume a buffer has
+		 * previously been submitted with OCFS2_BH_READAHEAD
+		 * and it hasn't yet completed I/O.
+		 *
+		 * 1) The current request is sync to disk. This rarely
+		 *    happens these days, and never when performance
+		 *    matters - the code can just wait on the buffer
+		 *    lock and re-submit.
+		 *
+		 * 2) The current request is cached, but not
+		 *    readahead. ocfs2_buffer_uptodate() will return
+		 *    false anyway, so we'll wind up waiting on the
+		 *    buffer lock to do I/O. We re-check the request
+		 *    with after getting the lock to avoid a re-submit.
+		 *
+		 * 3) The current request is readahead (and so must
+		 *    also be a caching one). We short circuit if the
+		 *    buffer is locked (under I/O) and if it's in the
+		 *    uptodate cache. The re-check from #2 catches the
+		 *    case that the previous read-ahead completes just
+		 *    before our is-it-in-flight check.
+		 */
+
 		if (flags & OCFS2_BH_CACHED &&
 		    !ocfs2_buffer_uptodate(inode, bh)) {
 			mlog(ML_UPTODATE,
@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 				continue;
 			}
 
+			/* A read-ahead request was made - if the
+			 * buffer is already under read-ahead from a
+			 * previously submitted request than we are
+			 * done here. */
+			if ((flags & OCFS2_BH_READAHEAD)
+			    && ocfs2_buffer_read_ahead(inode, bh))
+				continue;
+
 			lock_buffer(bh);
 			if (buffer_jbd(bh)) {
 #ifdef CATCH_BH_JBD_RACES
@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 				continue;
 #endif
 			}
+
+			/* Re-check ocfs2_buffer_uptodate() as a
+			 * previously read-ahead buffer may have
+			 * completed I/O while we were waiting for the
+			 * buffer lock. */
+			if ((flags & OCFS2_BH_CACHED)
+			    && !(flags & OCFS2_BH_READAHEAD)
+			    && ocfs2_buffer_uptodate(inode, bh)) {
+				unlock_buffer(bh);
+				continue;
+			}
+
 			clear_buffer_uptodate(bh);
 			get_bh(bh); /* for end_buffer_read_sync() */
 			bh->b_end_io = end_buffer_read_sync;
-			if (flags & OCFS2_BH_READAHEAD)
-				submit_bh(READA, bh);
-			else
-				submit_bh(READ, bh);
+			submit_bh(READ, bh);
 			continue;
 		}
 	}
@@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	for (i = (nr - 1); i >= 0; i--) {
 		bh = bhs[i];
 
-		/* We know this can't have changed as we hold the
-		 * inode sem. Avoid doing any work on the bh if the
-		 * journal has it. */
-		if (!buffer_jbd(bh))
-			wait_on_buffer(bh);
-
-		if (!buffer_uptodate(bh)) {
-			/* Status won't be cleared from here on out,
-			 * so we can safely record this and loop back
-			 * to cleanup the other buffers. Don't need to
-			 * remove the clustered uptodate information
-			 * for this bh as it's not marked locally
-			 * uptodate. */
-			status = -EIO;
-			brelse(bh);
-			bhs[i] = NULL;
-			continue;
+		if (!(flags & OCFS2_BH_READAHEAD)) {
+			/* We know this can't have changed as we hold the
+			 * inode sem. Avoid doing any work on the bh if the
+			 * journal has it. */
+			if (!buffer_jbd(bh))
+				wait_on_buffer(bh);
+
+			if (!buffer_uptodate(bh)) {
+				/* Status won't be cleared from here on out,
+				 * so we can safely record this and loop back
+				 * to cleanup the other buffers. Don't need to
+				 * remove the clustered uptodate information
+				 * for this bh as it's not marked locally
+				 * uptodate. */
+				status = -EIO;
+				brelse(bh);
+				bhs[i] = NULL;
+				continue;
+			}
 		}
 
+		/* Always set the buffer in the cache, even if it was
+		 * a forced read, or read-ahead which hasn't yet
+		 * completed. */
 		if (inode)
 			ocfs2_set_buffer_uptodate(inode, bh);
 	}
 	if (inode)
 		mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
 
-	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 
+	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
 	     (unsigned long long)block, nr,
-	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
+	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
 
 bail:
 
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 6ecb90937b685..6cc20930fac31 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super          *osb,
 
 
 #define OCFS2_BH_CACHED            1
-#define OCFS2_BH_READAHEAD         8	/* use this to pass READA down to submit_bh */
+#define OCFS2_BH_READAHEAD         8
 
 static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
 				   struct buffer_head **bh, int flags,
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 3d494d1a5f363..04e01915b86e4 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	int error = 0;
-	unsigned long offset, blk;
-	int i, num, stored;
+	unsigned long offset, blk, last_ra_blk = 0;
+	int i, stored;
 	struct buffer_head * bh, * tmp;
 	struct ocfs2_dir_entry * de;
 	int err;
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct super_block * sb = inode->i_sb;
-	int have_disk_lock = 0;
+	unsigned int ra_sectors = 16;
 
 	mlog_entry("dirino=%llu\n",
 		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			mlog_errno(error);
 		/* we haven't got any yet, so propagate the error. */
 		stored = error;
-		goto bail;
+		goto bail_nolock;
 	}
-	have_disk_lock = 1;
 
 	offset = filp->f_pos & (sb->s_blocksize - 1);
 
@@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			continue;
 		}
 
-		/*
-		 * Do the readahead (8k)
-		 */
-		if (!offset) {
-			for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
+		/* The idea here is to begin with 8k read-ahead and to stay
+		 * 4k ahead of our current position.
+		 *
+		 * TODO: Use the pagecache for this. We just need to
+		 * make sure it's cluster-safe... */
+		if (!last_ra_blk
+		    || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
+			for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
 			     i > 0; i--) {
 				tmp = ocfs2_bread(inode, ++blk, &err, 1);
 				if (tmp)
 					brelse(tmp);
 			}
+			last_ra_blk = blk;
+			ra_sectors = 8;
 		}
 
 revalidate:
@@ -194,9 +198,9 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 
 	stored = 0;
 bail:
-	if (have_disk_lock)
-		ocfs2_meta_unlock(inode, 0);
+	ocfs2_meta_unlock(inode, 0);
 
+bail_nolock:
 	mlog_exit(stored);
 
 	return stored;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3f496c41fea8b..7bcf69154592e 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1050,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
 	u64 p_blkno;
 	int readflags = OCFS2_BH_CACHED;
 
-#if 0
-	/* only turn this on if we know we can deal with read_block
-	 * returning nothing */
 	if (reada)
 		readflags |= OCFS2_BH_READAHEAD;
-#endif
 
 	if (((u64)block << inode->i_sb->s_blocksize_bits) >=
 	    i_size_read(inode)) {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 24126476a8ccd..0d3e939b1f561 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -56,6 +56,7 @@
 #include "journal.h"
 #include "namei.h"
 #include "suballoc.h"
+#include "super.h"
 #include "symlink.h"
 #include "sysfile.h"
 #include "uptodate.h"
@@ -1962,13 +1963,8 @@ struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
 				}
 				num++;
 
-				/* XXX: questionable readahead stuff here */
 				bh = ocfs2_bread(dir, b++, &err, 1);
 				bh_use[ra_max] = bh;
-#if 0		// ???
-				if (bh)
-					ll_rw_block(READ, 1, &bh);
-#endif
 			}
 		}
 		if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1976,6 +1972,10 @@ struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
+			ocfs2_error(dir->i_sb, "reading directory %llu, "
+				    "offset %lu\n",
+				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
+				    block);
 			brelse(bh);
 			goto next;
 		}
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index b8a00a7933267..9707ed7a3206d 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 }
 
 /* Warning: even if it returns true, this does *not* guarantee that
- * the block is stored in our inode metadata cache. */
+ * the block is stored in our inode metadata cache. 
+ * 
+ * This can be called under lock_buffer()
+ */
 int ocfs2_buffer_uptodate(struct inode *inode,
 			  struct buffer_head *bh)
 {
@@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode,
 	return ocfs2_buffer_cached(OCFS2_I(inode), bh);
 }
 
+/* 
+ * Determine whether a buffer is currently out on a read-ahead request.
+ * ip_io_sem should be held to serialize submitters with the logic here.
+ */
+int ocfs2_buffer_read_ahead(struct inode *inode,
+			    struct buffer_head *bh)
+{
+	return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
+}
+
 /* Requires ip_lock */
 static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
 				     sector_t block)
@@ -403,7 +416,11 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
  *
  * Note that this function may actually fail to insert the block if
  * memory cannot be allocated. This is not fatal however (but may
- * result in a performance penalty) */
+ * result in a performance penalty)
+ *
+ * Readahead buffers can be passed in here before the I/O request is
+ * completed.
+ */
 void ocfs2_set_buffer_uptodate(struct inode *inode,
 			       struct buffer_head *bh)
 {
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 01cd32d26b068..2e73206059a85 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
 				   struct buffer_head *bh);
 void ocfs2_remove_from_cache(struct inode *inode,
 			     struct buffer_head *bh);
+int ocfs2_buffer_read_ahead(struct inode *inode,
+			    struct buffer_head *bh);
 
 #endif /* OCFS2_UPTODATE_H */
-- 
GitLab


From 02ed8416fe5b7e33b5bbf2d73f9af1d316806822 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 14 Sep 2006 10:28:06 -0700
Subject: [PATCH 0272/1063] ocfs2: Remove EXPERIMENTAL dependency

Things have been working pretty well for a while now.

We should've probably done this at least one kernel
revision ago, but it doesn't hurt to be paranoid.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 3f00a9faabcb6..5305816283113 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -325,8 +325,8 @@ config FS_POSIX_ACL
 source "fs/xfs/Kconfig"
 
 config OCFS2_FS
-	tristate "OCFS2 file system support (EXPERIMENTAL)"
-	depends on NET && SYSFS && EXPERIMENTAL
+	tristate "OCFS2 file system support"
+	depends on NET && SYSFS
 	select CONFIGFS_FS
 	select JBD
 	select CRC32
-- 
GitLab


From f12033d206ea48928d8124cdd5d35d8008c18935 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 13 Sep 2006 18:57:57 -0700
Subject: [PATCH 0273/1063] ocfs2: Don't print on unknown remote blocking call

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmast.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 42775e2bbe2c7..f13a4bac41f0c 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -367,12 +367,10 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 			goto do_ast;
 	}
 
-	mlog(ML_ERROR, "got %sast for unknown lock!  cookie=%u:%llu, "
-		       "name=%.*s, namelen=%u\n", 
-		       past->type == DLM_AST ? "" : "b", 
-		       dlm_get_lock_cookie_node(cookie),
-		       dlm_get_lock_cookie_seq(cookie),
-		       locklen, name, locklen);
+	mlog(0, "got %sast for unknown lock!  cookie=%u:%llu, "
+	     "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", 
+	     dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie),
+	     locklen, name, locklen);
 
 	ret = DLM_NORMAL;
 unlock_out:
-- 
GitLab


From eb35746ca5e2211569b91ebb44d55b88ec91f3b0 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 9 Aug 2006 13:23:08 -0700
Subject: [PATCH 0274/1063] ocfs2: Remove overzealous BUG_ON()

The truncate code was never supposed to BUG() on an allocator it doesn't
know about, but rather to ignore it. Right now, this does nothing, but when
we change our allocation paths to use all suballocator files, this will
allow current versions of the fs module to work fine.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/alloc.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index edaab05a93e02..f43bc5f18a352 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1717,17 +1717,29 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 
 			ocfs2_remove_from_cache(inode, eb_bh);
 
-			BUG_ON(eb->h_suballoc_slot);
 			BUG_ON(el->l_recs[0].e_clusters);
 			BUG_ON(el->l_recs[0].e_cpos);
 			BUG_ON(el->l_recs[0].e_blkno);
-			status = ocfs2_free_extent_block(handle,
-							 tc->tc_ext_alloc_inode,
-							 tc->tc_ext_alloc_bh,
-							 eb);
-			if (status < 0) {
-				mlog_errno(status);
-				goto bail;
+			if (eb->h_suballoc_slot == 0) {
+				/*
+				 * This code only understands how to
+				 * lock the suballocator in slot 0,
+				 * which is fine because allocation is
+				 * only ever done out of that
+				 * suballocator too. A future version
+				 * might change that however, so avoid
+				 * a free if we don't know how to
+				 * handle it. This way an fs incompat
+				 * bit will not be necessary.
+				 */
+				status = ocfs2_free_extent_block(handle,
+								 tc->tc_ext_alloc_inode,
+								 tc->tc_ext_alloc_bh,
+								 eb);
+				if (status < 0) {
+					mlog_errno(status);
+					goto bail;
+				}
 			}
 		}
 		brelse(eb_bh);
-- 
GitLab


From 799111020c66c41aef621a3b53ad112543754124 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 21 Aug 2006 21:03:52 +1000
Subject: [PATCH 0275/1063] [CRYPTO] api: Fixed crypto_tfm context alignment

Previously the __aligned__ attribute was added to the crypto_tfm context
member to ensure it is alinged correctly on architectures such as arm.
Unfortunately kmalloc does not use the same minimum alignment rules as
gcc so this is useless.

This patch changes it to use kmalloc's minimum alignment.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 7f946241b8792..cb1e6631b1325 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -21,8 +21,9 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 #include <linux/string.h>
-#include <asm/page.h>
+#include <linux/uaccess.h>
 
 /*
  * Algorithm masks and types.
@@ -61,6 +62,26 @@
 #define CRYPTO_DIR_ENCRYPT		1
 #define CRYPTO_DIR_DECRYPT		0
 
+/*
+ * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual
+ * declaration) is used to ensure that the crypto_tfm context structure is
+ * aligned correctly for the given architecture so that there are no alignment
+ * faults for C data types.  In particular, this is required on platforms such
+ * as arm where pointers are 32-bit aligned but there are data types such as
+ * u64 which require 64-bit alignment.
+ */
+#if defined(ARCH_KMALLOC_MINALIGN)
+#define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN
+#elif defined(ARCH_SLAB_MINALIGN)
+#define CRYPTO_MINALIGN ARCH_SLAB_MINALIGN
+#endif
+
+#ifdef CRYPTO_MINALIGN
+#define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN)))
+#else
+#define CRYPTO_MINALIGN_ATTR
+#endif
+
 struct scatterlist;
 struct crypto_tfm;
 
@@ -231,7 +252,7 @@ struct crypto_tfm {
 	
 	struct crypto_alg *__crt_alg;
 
-	char __crt_ctx[] __attribute__ ((__aligned__));
+	void *__crt_ctx[] CRYPTO_MINALIGN_ATTR;
 };
 
 /* 
-- 
GitLab


From 2729bb427f686e47970406d6bde6b11892885f29 Mon Sep 17 00:00:00 2001
From: Joachim Fritschi <jfritschi@freenet.de>
Date: Tue, 20 Jun 2006 20:37:23 +1000
Subject: [PATCH 0276/1063] [CRYPTO] twofish: Split out common c code

This patch splits up the twofish crypto routine into a common part ( key
setup  ) which will be uses by all twofish crypto modules ( generic-c , i586
assembler and x86_64 assembler ) and generic-c part. It also creates a new
header file which will be used by all 3 modules.

This eliminates all code duplication.

Correctness was verified with the tcrypt module and automated test scripts.

Signed-off-by: Joachim Fritschi <jfritschi@freenet.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig           |   8 +
 crypto/Makefile          |   1 +
 crypto/twofish.c         | 698 +-----------------------------------
 crypto/twofish_common.c  | 744 +++++++++++++++++++++++++++++++++++++++
 include/crypto/twofish.h |  23 ++
 5 files changed, 777 insertions(+), 697 deletions(-)
 create mode 100644 crypto/twofish_common.c
 create mode 100644 include/crypto/twofish.h

diff --git a/crypto/Kconfig b/crypto/Kconfig
index ba133d557045d..5472f693e6ec4 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -131,6 +131,7 @@ config CRYPTO_BLOWFISH
 config CRYPTO_TWOFISH
 	tristate "Twofish cipher algorithm"
 	depends on CRYPTO
+	select CRYPTO_TWOFISH_COMMON
 	help
 	  Twofish cipher algorithm.
 	  
@@ -142,6 +143,13 @@ config CRYPTO_TWOFISH
 	  See also:
 	  <http://www.schneier.com/twofish.html>
 
+config CRYPTO_TWOFISH_COMMON
+	tristate
+	depends on CRYPTO
+	help
+	  Common parts of the Twofish cipher algorithm shared by the
+	  generic c and the assembler implementations.
+
 config CRYPTO_SERPENT
 	tristate "Serpent cipher algorithm"
 	depends on CRYPTO
diff --git a/crypto/Makefile b/crypto/Makefile
index d287b9e60c472..fe934f1001c64 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
 obj-$(CONFIG_CRYPTO_DES) += des.o
 obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o
 obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o
+obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
 obj-$(CONFIG_CRYPTO_SERPENT) += serpent.o
 obj-$(CONFIG_CRYPTO_AES) += aes.o
 obj-$(CONFIG_CRYPTO_CAST5) += cast5.o
diff --git a/crypto/twofish.c b/crypto/twofish.c
index ec2488242e2d7..e3b3a0a6cb4da 100644
--- a/crypto/twofish.c
+++ b/crypto/twofish.c
@@ -39,6 +39,7 @@
  */
 
 #include <asm/byteorder.h>
+#include <crypto/twofish.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -46,534 +47,6 @@
 #include <linux/crypto.h>
 #include <linux/bitops.h>
 
-
-/* The large precomputed tables for the Twofish cipher (twofish.c)
- * Taken from the same source as twofish.c
- * Marc Mutz <Marc@Mutz.com>
- */
-
-/* These two tables are the q0 and q1 permutations, exactly as described in
- * the Twofish paper. */
-
-static const u8 q0[256] = {
-   0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78,
-   0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C,
-   0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30,
-   0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
-   0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE,
-   0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B,
-   0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45,
-   0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
-   0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF,
-   0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8,
-   0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED,
-   0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
-   0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B,
-   0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B,
-   0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F,
-   0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
-   0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17,
-   0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72,
-   0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68,
-   0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
-   0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42,
-   0x4A, 0x5E, 0xC1, 0xE0
-};
-
-static const u8 q1[256] = {
-   0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B,
-   0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1,
-   0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B,
-   0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
-   0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54,
-   0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96,
-   0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7,
-   0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
-   0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF,
-   0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9,
-   0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D,
-   0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
-   0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21,
-   0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01,
-   0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E,
-   0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
-   0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44,
-   0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E,
-   0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B,
-   0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
-   0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56,
-   0x55, 0x09, 0xBE, 0x91
-};
-
-/* These MDS tables are actually tables of MDS composed with q0 and q1,
- * because it is only ever used that way and we can save some time by
- * precomputing.  Of course the main saving comes from precomputing the
- * GF(2^8) multiplication involved in the MDS matrix multiply; by looking
- * things up in these tables we reduce the matrix multiply to four lookups
- * and three XORs.  Semi-formally, the definition of these tables is:
- * mds[0][i] = MDS (q1[i] 0 0 0)^T  mds[1][i] = MDS (0 q0[i] 0 0)^T
- * mds[2][i] = MDS (0 0 q1[i] 0)^T  mds[3][i] = MDS (0 0 0 q0[i])^T
- * where ^T means "transpose", the matrix multiply is performed in GF(2^8)
- * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described
- * by Schneier et al, and I'm casually glossing over the byte/word
- * conversion issues. */
-
-static const u32 mds[4][256] = {
-   {0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
-    0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B,
-    0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32,
-    0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1,
-    0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA,
-    0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B,
-    0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1,
-    0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5,
-    0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490,
-    0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154,
-    0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0,
-    0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796,
-    0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228,
-    0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7,
-    0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3,
-    0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8,
-    0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477,
-    0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF,
-    0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C,
-    0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9,
-    0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA,
-    0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D,
-    0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72,
-    0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E,
-    0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76,
-    0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321,
-    0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39,
-    0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01,
-    0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D,
-    0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E,
-    0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5,
-    0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64,
-    0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7,
-    0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544,
-    0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E,
-    0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E,
-    0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A,
-    0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B,
-    0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2,
-    0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9,
-    0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504,
-    0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756,
-    0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91},
-
-   {0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252,
-    0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A,
-    0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020,
-    0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141,
-    0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444,
-    0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424,
-    0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A,
-    0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757,
-    0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383,
-    0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A,
-    0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9,
-    0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656,
-    0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1,
-    0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898,
-    0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414,
-    0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3,
-    0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1,
-    0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989,
-    0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5,
-    0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282,
-    0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E,
-    0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E,
-    0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202,
-    0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC,
-    0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565,
-    0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A,
-    0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808,
-    0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272,
-    0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A,
-    0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969,
-    0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505,
-    0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5,
-    0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D,
-    0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343,
-    0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF,
-    0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3,
-    0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F,
-    0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646,
-    0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6,
-    0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF,
-    0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A,
-    0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7,
-    0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8},
-
-   {0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B,
-    0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F,
-    0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A,
-    0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783,
-    0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70,
-    0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3,
-    0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB,
-    0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA,
-    0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4,
-    0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41,
-    0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C,
-    0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07,
-    0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622,
-    0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18,
-    0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035,
-    0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96,
-    0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84,
-    0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E,
-    0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F,
-    0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD,
-    0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558,
-    0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40,
-    0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA,
-    0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85,
-    0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF,
-    0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773,
-    0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D,
-    0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B,
-    0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C,
-    0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19,
-    0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086,
-    0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D,
-    0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74,
-    0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755,
-    0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691,
-    0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D,
-    0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4,
-    0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53,
-    0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E,
-    0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9,
-    0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705,
-    0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7,
-    0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF},
-
-   {0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98,
-    0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866,
-    0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643,
-    0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77,
-    0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9,
-    0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C,
-    0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3,
-    0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216,
-    0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F,
-    0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25,
-    0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF,
-    0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7,
-    0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4,
-    0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E,
-    0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA,
-    0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C,
-    0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12,
-    0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A,
-    0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D,
-    0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE,
-    0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A,
-    0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C,
-    0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B,
-    0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4,
-    0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B,
-    0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3,
-    0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE,
-    0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB,
-    0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85,
-    0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA,
-    0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E,
-    0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8,
-    0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33,
-    0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC,
-    0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718,
-    0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA,
-    0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8,
-    0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872,
-    0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882,
-    0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D,
-    0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10,
-    0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6,
-    0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8}
-};
-
-/* The exp_to_poly and poly_to_exp tables are used to perform efficient
- * operations in GF(2^8) represented as GF(2)[x]/w(x) where
- * w(x)=x^8+x^6+x^3+x^2+1.  We care about doing that because it's part of the
- * definition of the RS matrix in the key schedule.  Elements of that field
- * are polynomials of degree not greater than 7 and all coefficients 0 or 1,
- * which can be represented naturally by bytes (just substitute x=2).  In that
- * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8)
- * multiplication is inefficient without hardware support.  To multiply
- * faster, I make use of the fact x is a generator for the nonzero elements,
- * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
- * some n in 0..254.  Note that that caret is exponentiation in GF(2^8),
- * *not* polynomial notation.  So if I want to compute pq where p and q are
- * in GF(2^8), I can just say:
- *    1. if p=0 or q=0 then pq=0
- *    2. otherwise, find m and n such that p=x^m and q=x^n
- *    3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq
- * The translations in steps 2 and 3 are looked up in the tables
- * poly_to_exp (for step 2) and exp_to_poly (for step 3).  To see this
- * in action, look at the CALC_S macro.  As additional wrinkles, note that
- * one of my operands is always a constant, so the poly_to_exp lookup on it
- * is done in advance; I included the original values in the comments so
- * readers can have some chance of recognizing that this *is* the RS matrix
- * from the Twofish paper.  I've only included the table entries I actually
- * need; I never do a lookup on a variable input of zero and the biggest
- * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll
- * never sum to more than 491.	I'm repeating part of the exp_to_poly table
- * so that I don't have to do mod-255 reduction in the exponent arithmetic.
- * Since I know my constant operands are never zero, I only have to worry
- * about zero values in the variable operand, and I do it with a simple
- * conditional branch.	I know conditionals are expensive, but I couldn't
- * see a non-horrible way of avoiding them, and I did manage to group the
- * statements so that each if covers four group multiplications. */
-
-static const u8 poly_to_exp[255] = {
-   0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19,
-   0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A,
-   0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C,
-   0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B,
-   0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47,
-   0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D,
-   0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8,
-   0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C,
-   0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83,
-   0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48,
-   0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26,
-   0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E,
-   0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3,
-   0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9,
-   0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A,
-   0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D,
-   0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75,
-   0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84,
-   0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64,
-   0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49,
-   0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF,
-   0x85, 0xC8, 0xA1
-};
-
-static const u8 exp_to_poly[492] = {
-   0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2,
-   0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03,
-   0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6,
-   0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A,
-   0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63,
-   0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C,
-   0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07,
-   0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88,
-   0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12,
-   0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7,
-   0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C,
-   0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8,
-   0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25,
-   0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A,
-   0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE,
-   0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC,
-   0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E,
-   0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92,
-   0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89,
-   0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB,
-   0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1,
-   0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D,
-   0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC,
-   0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3,
-   0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52,
-   0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0,
-   0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1,
-   0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A,
-   0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11,
-   0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51,
-   0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66,
-   0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB,
-   0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19,
-   0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D,
-   0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56,
-   0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE,
-   0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9,
-   0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE,
-   0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41,
-   0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E,
-   0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB
-};
-
-
-/* The table constants are indices of
- * S-box entries, preprocessed through q0 and q1. */
-static const u8 calc_sb_tbl[512] = {
-    0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4,
-    0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8,
-    0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B,
-    0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B,
-    0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD,
-    0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1,
-    0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B,
-    0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F,
-    0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B,
-    0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D,
-    0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E,
-    0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5,
-    0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14,
-    0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3,
-    0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54,
-    0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51,
-    0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A,
-    0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96,
-    0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10,
-    0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C,
-    0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7,
-    0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70,
-    0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB,
-    0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8,
-    0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF,
-    0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC,
-    0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF,
-    0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2,
-    0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82,
-    0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9,
-    0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97,
-    0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17,
-    0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D,
-    0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3,
-    0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C,
-    0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E,
-    0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F,
-    0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49,
-    0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21,
-    0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9,
-    0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD,
-    0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01,
-    0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F,
-    0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48,
-    0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E,
-    0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19,
-    0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57,
-    0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64,
-    0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE,
-    0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5,
-    0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44,
-    0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69,
-    0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15,
-    0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E,
-    0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34,
-    0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC,
-    0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B,
-    0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB,
-    0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52,
-    0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9,
-    0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4,
-    0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2,
-    0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56,
-    0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91
-};
-
-/* Macro to perform one column of the RS matrix multiplication.  The
- * parameters a, b, c, and d are the four bytes of output; i is the index
- * of the key bytes, and w, x, y, and z, are the column of constants from
- * the RS matrix, preprocessed through the poly_to_exp table. */
-
-#define CALC_S(a, b, c, d, i, w, x, y, z) \
-   if (key[i]) { \
-      tmp = poly_to_exp[key[i] - 1]; \
-      (a) ^= exp_to_poly[tmp + (w)]; \
-      (b) ^= exp_to_poly[tmp + (x)]; \
-      (c) ^= exp_to_poly[tmp + (y)]; \
-      (d) ^= exp_to_poly[tmp + (z)]; \
-   }
-
-/* Macros to calculate the key-dependent S-boxes for a 128-bit key using
- * the S vector from CALC_S.  CALC_SB_2 computes a single entry in all
- * four S-boxes, where i is the index of the entry to compute, and a and b
- * are the index numbers preprocessed through the q0 and q1 tables
- * respectively. */
-
-#define CALC_SB_2(i, a, b) \
-   ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \
-   ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \
-   ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \
-   ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh]
-
-/* Macro exactly like CALC_SB_2, but for 192-bit keys. */
-
-#define CALC_SB192_2(i, a, b) \
-   ctx->s[0][i] = mds[0][q0[q0[(b) ^ sa] ^ se] ^ si]; \
-   ctx->s[1][i] = mds[1][q0[q1[(b) ^ sb] ^ sf] ^ sj]; \
-   ctx->s[2][i] = mds[2][q1[q0[(a) ^ sc] ^ sg] ^ sk]; \
-   ctx->s[3][i] = mds[3][q1[q1[(a) ^ sd] ^ sh] ^ sl];
-
-/* Macro exactly like CALC_SB_2, but for 256-bit keys. */
-
-#define CALC_SB256_2(i, a, b) \
-   ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \
-   ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \
-   ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \
-   ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp];
-
-/* Macros to calculate the whitening and round subkeys.  CALC_K_2 computes the
- * last two stages of the h() function for a given index (either 2i or 2i+1).
- * a, b, c, and d are the four bytes going into the last two stages.  For
- * 128-bit keys, this is the entire h() function and a and c are the index
- * preprocessed through q0 and q1 respectively; for longer keys they are the
- * output of previous stages.  j is the index of the first key byte to use.
- * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
- * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
- * rotations.  Its parameters are: a, the array to write the results into,
- * j, the index of the first output entry, k and l, the preprocessed indices
- * for index 2i, and m and n, the preprocessed indices for index 2i+1.
- * CALC_K192_2 expands CALC_K_2 to handle 192-bit keys, by doing an
- * additional lookup-and-XOR stage.  The parameters a, b, c and d are the
- * four bytes going into the last three stages.  For 192-bit keys, c = d
- * are the index preprocessed through q0, and a = b are the index
- * preprocessed through q1; j is the index of the first key byte to use.
- * CALC_K192 is identical to CALC_K but for using the CALC_K192_2 macro
- * instead of CALC_K_2.
- * CALC_K256_2 expands CALC_K192_2 to handle 256-bit keys, by doing an
- * additional lookup-and-XOR stage.  The parameters a and b are the index
- * preprocessed through q0 and q1 respectively; j is the index of the first
- * key byte to use.  CALC_K256 is identical to CALC_K but for using the
- * CALC_K256_2 macro instead of CALC_K_2. */
-
-#define CALC_K_2(a, b, c, d, j) \
-     mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \
-   ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \
-   ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \
-   ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]]
-
-#define CALC_K(a, j, k, l, m, n) \
-   x = CALC_K_2 (k, l, k, l, 0); \
-   y = CALC_K_2 (m, n, m, n, 4); \
-   y = rol32(y, 8); \
-   x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = rol32(y, 9)
-
-#define CALC_K192_2(a, b, c, d, j) \
-   CALC_K_2 (q0[a ^ key[(j) + 16]], \
-	     q1[b ^ key[(j) + 17]], \
-	     q0[c ^ key[(j) + 18]], \
-	     q1[d ^ key[(j) + 19]], j)
-
-#define CALC_K192(a, j, k, l, m, n) \
-   x = CALC_K192_2 (l, l, k, k, 0); \
-   y = CALC_K192_2 (n, n, m, m, 4); \
-   y = rol32(y, 8); \
-   x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = rol32(y, 9)
-
-#define CALC_K256_2(a, b, j) \
-   CALC_K192_2 (q1[b ^ key[(j) + 24]], \
-	        q1[a ^ key[(j) + 25]], \
-	        q0[a ^ key[(j) + 26]], \
-	        q0[b ^ key[(j) + 27]], j)
-
-#define CALC_K256(a, j, k, l, m, n) \
-   x = CALC_K256_2 (k, l, 0); \
-   y = CALC_K256_2 (m, n, 4); \
-   y = rol32(y, 8); \
-   x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = rol32(y, 9)
-
-
 /* Macros to compute the g() function in the encryption and decryption
  * rounds.  G1 is the straight g() function; G2 includes the 8-bit
  * rotation for the high 32-bit word. */
@@ -630,176 +103,7 @@ static const u8 calc_sb_tbl[512] = {
    x ^= ctx->w[m]; \
    dst[n] = cpu_to_le32(x)
 
-#define TF_MIN_KEY_SIZE 16
-#define TF_MAX_KEY_SIZE 32
-#define TF_BLOCK_SIZE 16
-
-/* Structure for an expanded Twofish key.  s contains the key-dependent
- * S-boxes composed with the MDS matrix; w contains the eight "whitening"
- * subkeys, K[0] through K[7].	k holds the remaining, "round" subkeys.  Note
- * that k[i] corresponds to what the Twofish paper calls K[i+8]. */
-struct twofish_ctx {
-   u32 s[4][256], w[8], k[32];
-};
-
-/* Perform the key setup. */
-static int twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			  unsigned int key_len, u32 *flags)
-{
-	
-	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	int i, j, k;
-
-	/* Temporaries for CALC_K. */
-	u32 x, y;
-
-	/* The S vector used to key the S-boxes, split up into individual bytes.
-	 * 128-bit keys use only sa through sh; 256-bit use all of them. */
-	u8 sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0;
-	u8 si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0;
-
-	/* Temporary for CALC_S. */
-	u8 tmp;
-
-	/* Check key length. */
-	if (key_len != 16 && key_len != 24 && key_len != 32)
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL; /* unsupported key length */
-	}
-
-	/* Compute the first two words of the S vector.  The magic numbers are
-	 * the entries of the RS matrix, preprocessed through poly_to_exp. The
-	 * numbers in the comments are the original (polynomial form) matrix
-	 * entries. */
-	CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-	CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-	CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-	CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-	CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-	CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-	CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-	CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-	CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-	CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-	CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-	CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-	CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-	CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-	CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-	CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-
-	if (key_len == 24 || key_len == 32) { /* 192- or 256-bit key */
-		/* Calculate the third word of the S vector */
-		CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-		CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-		CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-		CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-		CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-		CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-		CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-		CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-	}
-
-	if (key_len == 32) { /* 256-bit key */
-		/* Calculate the fourth word of the S vector */
-		CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-		CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-		CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-		CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-		CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-		CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-		CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-		CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-
-		/* Compute the S-boxes. */
-		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
-			CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
-		}
-
-		/* Calculate whitening and round subkeys.  The constants are
-		 * indices of subkeys, preprocessed through q0 and q1. */
-		CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-		CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-		CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-		CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-		CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-		CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-		CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-		CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-		CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-		CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-		CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71);
-		CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-		CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F);
-		CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-		CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-		CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F);
-		CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-		CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-		CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00);
-		CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
-	} else if (key_len == 24) { /* 192-bit key */
-		/* Compute the S-boxes. */
-		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
-		        CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
-		}
-
-		/* Calculate whitening and round subkeys.  The constants are
-		 * indices of subkeys, preprocessed through q0 and q1. */
-		CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-		CALC_K192 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-		CALC_K192 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-		CALC_K192 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-		CALC_K192 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-		CALC_K192 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-		CALC_K192 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-		CALC_K192 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-		CALC_K192 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-		CALC_K192 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-		CALC_K192 (k, 12, 0x18, 0x37, 0xF7, 0x71);
-		CALC_K192 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-		CALC_K192 (k, 16, 0x43, 0x30, 0x75, 0x0F);
-		CALC_K192 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-		CALC_K192 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-		CALC_K192 (k, 22, 0x94, 0x06, 0x48, 0x3F);
-		CALC_K192 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-		CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-		CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00);
-		CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
-	} else { /* 128-bit key */
-		/* Compute the S-boxes. */
-		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
-			CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
-		}
-
-		/* Calculate whitening and round subkeys.  The constants are
-		 * indices of subkeys, preprocessed through q0 and q1. */
-		CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-		CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-		CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-		CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-		CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-		CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-		CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-		CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-		CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-		CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-		CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71);
-		CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-		CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F);
-		CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-		CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-		CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F);
-		CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-		CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-		CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00);
-		CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
-	}
-
-	return 0;
-}
 
 /* Encrypt one block.  in and out may be the same. */
 static void twofish_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c
new file mode 100644
index 0000000000000..1ae0280c25135
--- /dev/null
+++ b/crypto/twofish_common.c
@@ -0,0 +1,744 @@
+/*
+ * Common Twofish algorithm parts shared between the c and assembler
+ * implementations
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <crypto/twofish.h>
+#include <linux/bitops.h>
+#include <linux/crypto.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+
+/* The large precomputed tables for the Twofish cipher (twofish.c)
+ * Taken from the same source as twofish.c
+ * Marc Mutz <Marc@Mutz.com>
+ */
+
+/* These two tables are the q0 and q1 permutations, exactly as described in
+ * the Twofish paper. */
+
+static const u8 q0[256] = {
+	0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78,
+	0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C,
+	0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30,
+	0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
+	0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE,
+	0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B,
+	0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45,
+	0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
+	0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF,
+	0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8,
+	0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED,
+	0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
+	0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B,
+	0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B,
+	0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F,
+	0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
+	0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17,
+	0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72,
+	0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68,
+	0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
+	0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42,
+	0x4A, 0x5E, 0xC1, 0xE0
+};
+
+static const u8 q1[256] = {
+	0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B,
+	0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1,
+	0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B,
+	0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
+	0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54,
+	0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96,
+	0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7,
+	0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
+	0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF,
+	0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9,
+	0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D,
+	0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
+	0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21,
+	0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01,
+	0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E,
+	0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
+	0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44,
+	0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E,
+	0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B,
+	0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
+	0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56,
+	0x55, 0x09, 0xBE, 0x91
+};
+
+/* These MDS tables are actually tables of MDS composed with q0 and q1,
+ * because it is only ever used that way and we can save some time by
+ * precomputing.  Of course the main saving comes from precomputing the
+ * GF(2^8) multiplication involved in the MDS matrix multiply; by looking
+ * things up in these tables we reduce the matrix multiply to four lookups
+ * and three XORs.  Semi-formally, the definition of these tables is:
+ * mds[0][i] = MDS (q1[i] 0 0 0)^T  mds[1][i] = MDS (0 q0[i] 0 0)^T
+ * mds[2][i] = MDS (0 0 q1[i] 0)^T  mds[3][i] = MDS (0 0 0 q0[i])^T
+ * where ^T means "transpose", the matrix multiply is performed in GF(2^8)
+ * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described
+ * by Schneier et al, and I'm casually glossing over the byte/word
+ * conversion issues. */
+
+static const u32 mds[4][256] = {
+	{
+	0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
+	0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B,
+	0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32,
+	0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1,
+	0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA,
+	0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B,
+	0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1,
+	0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5,
+	0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490,
+	0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154,
+	0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0,
+	0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796,
+	0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228,
+	0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7,
+	0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3,
+	0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8,
+	0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477,
+	0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF,
+	0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C,
+	0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9,
+	0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA,
+	0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D,
+	0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72,
+	0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E,
+	0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76,
+	0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321,
+	0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39,
+	0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01,
+	0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D,
+	0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E,
+	0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5,
+	0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64,
+	0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7,
+	0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544,
+	0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E,
+	0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E,
+	0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A,
+	0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B,
+	0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2,
+	0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9,
+	0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504,
+	0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756,
+	0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91},
+
+	{
+	0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252,
+	0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A,
+	0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020,
+	0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141,
+	0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444,
+	0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424,
+	0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A,
+	0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757,
+	0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383,
+	0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A,
+	0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9,
+	0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656,
+	0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1,
+	0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898,
+	0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414,
+	0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3,
+	0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1,
+	0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989,
+	0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5,
+	0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282,
+	0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E,
+	0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E,
+	0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202,
+	0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC,
+	0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565,
+	0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A,
+	0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808,
+	0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272,
+	0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A,
+	0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969,
+	0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505,
+	0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5,
+	0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D,
+	0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343,
+	0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF,
+	0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3,
+	0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F,
+	0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646,
+	0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6,
+	0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF,
+	0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A,
+	0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7,
+	0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8},
+
+	{
+	0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B,
+	0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F,
+	0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A,
+	0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783,
+	0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70,
+	0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3,
+	0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB,
+	0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA,
+	0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4,
+	0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41,
+	0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C,
+	0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07,
+	0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622,
+	0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18,
+	0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035,
+	0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96,
+	0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84,
+	0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E,
+	0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F,
+	0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD,
+	0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558,
+	0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40,
+	0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA,
+	0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85,
+	0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF,
+	0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773,
+	0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D,
+	0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B,
+	0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C,
+	0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19,
+	0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086,
+	0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D,
+	0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74,
+	0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755,
+	0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691,
+	0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D,
+	0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4,
+	0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53,
+	0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E,
+	0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9,
+	0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705,
+	0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7,
+	0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF},
+
+	{
+	0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98,
+	0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866,
+	0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643,
+	0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77,
+	0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9,
+	0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C,
+	0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3,
+	0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216,
+	0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F,
+	0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25,
+	0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF,
+	0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7,
+	0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4,
+	0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E,
+	0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA,
+	0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C,
+	0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12,
+	0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A,
+	0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D,
+	0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE,
+	0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A,
+	0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C,
+	0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B,
+	0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4,
+	0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B,
+	0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3,
+	0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE,
+	0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB,
+	0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85,
+	0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA,
+	0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E,
+	0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8,
+	0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33,
+	0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC,
+	0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718,
+	0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA,
+	0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8,
+	0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872,
+	0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882,
+	0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D,
+	0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10,
+	0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6,
+	0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8}
+};
+
+/* The exp_to_poly and poly_to_exp tables are used to perform efficient
+ * operations in GF(2^8) represented as GF(2)[x]/w(x) where
+ * w(x)=x^8+x^6+x^3+x^2+1.  We care about doing that because it's part of the
+ * definition of the RS matrix in the key schedule.  Elements of that field
+ * are polynomials of degree not greater than 7 and all coefficients 0 or 1,
+ * which can be represented naturally by bytes (just substitute x=2).  In that
+ * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8)
+ * multiplication is inefficient without hardware support.  To multiply
+ * faster, I make use of the fact x is a generator for the nonzero elements,
+ * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
+ * some n in 0..254.  Note that that caret is exponentiation in GF(2^8),
+ * *not* polynomial notation.  So if I want to compute pq where p and q are
+ * in GF(2^8), I can just say:
+ *    1. if p=0 or q=0 then pq=0
+ *    2. otherwise, find m and n such that p=x^m and q=x^n
+ *    3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq
+ * The translations in steps 2 and 3 are looked up in the tables
+ * poly_to_exp (for step 2) and exp_to_poly (for step 3).  To see this
+ * in action, look at the CALC_S macro.  As additional wrinkles, note that
+ * one of my operands is always a constant, so the poly_to_exp lookup on it
+ * is done in advance; I included the original values in the comments so
+ * readers can have some chance of recognizing that this *is* the RS matrix
+ * from the Twofish paper.  I've only included the table entries I actually
+ * need; I never do a lookup on a variable input of zero and the biggest
+ * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll
+ * never sum to more than 491.	I'm repeating part of the exp_to_poly table
+ * so that I don't have to do mod-255 reduction in the exponent arithmetic.
+ * Since I know my constant operands are never zero, I only have to worry
+ * about zero values in the variable operand, and I do it with a simple
+ * conditional branch.	I know conditionals are expensive, but I couldn't
+ * see a non-horrible way of avoiding them, and I did manage to group the
+ * statements so that each if covers four group multiplications. */
+
+static const u8 poly_to_exp[255] = {
+	0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19,
+	0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A,
+	0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C,
+	0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B,
+	0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47,
+	0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D,
+	0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8,
+	0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C,
+	0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83,
+	0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48,
+	0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26,
+	0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E,
+	0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3,
+	0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9,
+	0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A,
+	0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D,
+	0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75,
+	0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84,
+	0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64,
+	0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49,
+	0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF,
+	0x85, 0xC8, 0xA1
+};
+
+static const u8 exp_to_poly[492] = {
+	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2,
+	0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03,
+	0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6,
+	0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A,
+	0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63,
+	0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C,
+	0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07,
+	0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88,
+	0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12,
+	0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7,
+	0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C,
+	0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8,
+	0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25,
+	0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A,
+	0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE,
+	0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC,
+	0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E,
+	0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92,
+	0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89,
+	0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB,
+	0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1,
+	0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D,
+	0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC,
+	0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3,
+	0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52,
+	0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0,
+	0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1,
+	0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A,
+	0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11,
+	0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51,
+	0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66,
+	0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB,
+	0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19,
+	0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D,
+	0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56,
+	0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE,
+	0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9,
+	0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE,
+	0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41,
+	0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E,
+	0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB
+};
+
+
+/* The table constants are indices of
+ * S-box entries, preprocessed through q0 and q1. */
+static const u8 calc_sb_tbl[512] = {
+	0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4,
+	0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8,
+	0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B,
+	0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B,
+	0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD,
+	0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1,
+	0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B,
+	0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F,
+	0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B,
+	0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D,
+	0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E,
+	0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5,
+	0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14,
+	0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3,
+	0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54,
+	0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51,
+	0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A,
+	0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96,
+	0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10,
+	0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C,
+	0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7,
+	0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70,
+	0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB,
+	0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8,
+	0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF,
+	0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC,
+	0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF,
+	0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2,
+	0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82,
+	0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9,
+	0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97,
+	0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17,
+	0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D,
+	0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3,
+	0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C,
+	0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E,
+	0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F,
+	0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49,
+	0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21,
+	0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9,
+	0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD,
+	0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01,
+	0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F,
+	0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48,
+	0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E,
+	0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19,
+	0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57,
+	0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64,
+	0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE,
+	0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5,
+	0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44,
+	0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69,
+	0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15,
+	0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E,
+	0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34,
+	0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC,
+	0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B,
+	0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB,
+	0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52,
+	0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9,
+	0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4,
+	0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2,
+	0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56,
+	0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91
+};
+
+/* Macro to perform one column of the RS matrix multiplication.  The
+ * parameters a, b, c, and d are the four bytes of output; i is the index
+ * of the key bytes, and w, x, y, and z, are the column of constants from
+ * the RS matrix, preprocessed through the poly_to_exp table. */
+
+#define CALC_S(a, b, c, d, i, w, x, y, z) \
+   if (key[i]) { \
+      tmp = poly_to_exp[key[i] - 1]; \
+      (a) ^= exp_to_poly[tmp + (w)]; \
+      (b) ^= exp_to_poly[tmp + (x)]; \
+      (c) ^= exp_to_poly[tmp + (y)]; \
+      (d) ^= exp_to_poly[tmp + (z)]; \
+   }
+
+/* Macros to calculate the key-dependent S-boxes for a 128-bit key using
+ * the S vector from CALC_S.  CALC_SB_2 computes a single entry in all
+ * four S-boxes, where i is the index of the entry to compute, and a and b
+ * are the index numbers preprocessed through the q0 and q1 tables
+ * respectively. */
+
+#define CALC_SB_2(i, a, b) \
+   ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \
+   ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \
+   ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \
+   ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh]
+
+/* Macro exactly like CALC_SB_2, but for 192-bit keys. */
+
+#define CALC_SB192_2(i, a, b) \
+   ctx->s[0][i] = mds[0][q0[q0[(b) ^ sa] ^ se] ^ si]; \
+   ctx->s[1][i] = mds[1][q0[q1[(b) ^ sb] ^ sf] ^ sj]; \
+   ctx->s[2][i] = mds[2][q1[q0[(a) ^ sc] ^ sg] ^ sk]; \
+   ctx->s[3][i] = mds[3][q1[q1[(a) ^ sd] ^ sh] ^ sl];
+
+/* Macro exactly like CALC_SB_2, but for 256-bit keys. */
+
+#define CALC_SB256_2(i, a, b) \
+   ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \
+   ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \
+   ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \
+   ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp];
+
+/* Macros to calculate the whitening and round subkeys.  CALC_K_2 computes the
+ * last two stages of the h() function for a given index (either 2i or 2i+1).
+ * a, b, c, and d are the four bytes going into the last two stages.  For
+ * 128-bit keys, this is the entire h() function and a and c are the index
+ * preprocessed through q0 and q1 respectively; for longer keys they are the
+ * output of previous stages.  j is the index of the first key byte to use.
+ * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
+ * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
+ * rotations.  Its parameters are: a, the array to write the results into,
+ * j, the index of the first output entry, k and l, the preprocessed indices
+ * for index 2i, and m and n, the preprocessed indices for index 2i+1.
+ * CALC_K192_2 expands CALC_K_2 to handle 192-bit keys, by doing an
+ * additional lookup-and-XOR stage.  The parameters a, b, c and d are the
+ * four bytes going into the last three stages.  For 192-bit keys, c = d
+ * are the index preprocessed through q0, and a = b are the index
+ * preprocessed through q1; j is the index of the first key byte to use.
+ * CALC_K192 is identical to CALC_K but for using the CALC_K192_2 macro
+ * instead of CALC_K_2.
+ * CALC_K256_2 expands CALC_K192_2 to handle 256-bit keys, by doing an
+ * additional lookup-and-XOR stage.  The parameters a and b are the index
+ * preprocessed through q0 and q1 respectively; j is the index of the first
+ * key byte to use.  CALC_K256 is identical to CALC_K but for using the
+ * CALC_K256_2 macro instead of CALC_K_2. */
+
+#define CALC_K_2(a, b, c, d, j) \
+     mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \
+   ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \
+   ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \
+   ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]]
+
+#define CALC_K(a, j, k, l, m, n) \
+   x = CALC_K_2 (k, l, k, l, 0); \
+   y = CALC_K_2 (m, n, m, n, 4); \
+   y = rol32(y, 8); \
+   x += y; y += x; ctx->a[j] = x; \
+   ctx->a[(j) + 1] = rol32(y, 9)
+
+#define CALC_K192_2(a, b, c, d, j) \
+   CALC_K_2 (q0[a ^ key[(j) + 16]], \
+	     q1[b ^ key[(j) + 17]], \
+	     q0[c ^ key[(j) + 18]], \
+	     q1[d ^ key[(j) + 19]], j)
+
+#define CALC_K192(a, j, k, l, m, n) \
+   x = CALC_K192_2 (l, l, k, k, 0); \
+   y = CALC_K192_2 (n, n, m, m, 4); \
+   y = rol32(y, 8); \
+   x += y; y += x; ctx->a[j] = x; \
+   ctx->a[(j) + 1] = rol32(y, 9)
+
+#define CALC_K256_2(a, b, j) \
+   CALC_K192_2 (q1[b ^ key[(j) + 24]], \
+	        q1[a ^ key[(j) + 25]], \
+	        q0[a ^ key[(j) + 26]], \
+	        q0[b ^ key[(j) + 27]], j)
+
+#define CALC_K256(a, j, k, l, m, n) \
+   x = CALC_K256_2 (k, l, 0); \
+   y = CALC_K256_2 (m, n, 4); \
+   y = rol32(y, 8); \
+   x += y; y += x; ctx->a[j] = x; \
+   ctx->a[(j) + 1] = rol32(y, 9)
+
+/* Perform the key setup. */
+int twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+		   unsigned int key_len, u32 *flags)
+{
+
+	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	int i, j, k;
+
+	/* Temporaries for CALC_K. */
+	u32 x, y;
+
+	/* The S vector used to key the S-boxes, split up into individual bytes.
+	 * 128-bit keys use only sa through sh; 256-bit use all of them. */
+	u8 sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0;
+	u8 si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0;
+
+	/* Temporary for CALC_S. */
+	u8 tmp;
+
+	/* Check key length. */
+	if (key_len != 16 && key_len != 24 && key_len != 32)
+	{
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL; /* unsupported key length */
+	}
+
+	/* Compute the first two words of the S vector.  The magic numbers are
+	 * the entries of the RS matrix, preprocessed through poly_to_exp. The
+	 * numbers in the comments are the original (polynomial form) matrix
+	 * entries. */
+	CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+	CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+	CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+	CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+	CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+	CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+	CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+	CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+	CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+	CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+	CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+	CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+	CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+	CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+	CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+	CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+
+	if (key_len == 24 || key_len == 32) { /* 192- or 256-bit key */
+		/* Calculate the third word of the S vector */
+		CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+		CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+		CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+		CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+		CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+		CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+		CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+		CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+	}
+
+	if (key_len == 32) { /* 256-bit key */
+		/* Calculate the fourth word of the S vector */
+		CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+		CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+		CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+		CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+		CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+		CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+		CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+		CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+
+		/* Compute the S-boxes. */
+		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
+			CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
+		}
+
+		/* Calculate whitening and round subkeys.  The constants are
+		 * indices of subkeys, preprocessed through q0 and q1. */
+		CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
+		CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
+		CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
+		CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
+		CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
+		CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
+		CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
+		CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
+		CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
+		CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
+		CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71);
+		CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
+		CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F);
+		CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
+		CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
+		CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F);
+		CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
+		CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
+		CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00);
+		CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+	} else if (key_len == 24) { /* 192-bit key */
+		/* Compute the S-boxes. */
+		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
+		        CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
+		}
+
+		/* Calculate whitening and round subkeys.  The constants are
+		 * indices of subkeys, preprocessed through q0 and q1. */
+		CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
+		CALC_K192 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
+		CALC_K192 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
+		CALC_K192 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
+		CALC_K192 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
+		CALC_K192 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
+		CALC_K192 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
+		CALC_K192 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
+		CALC_K192 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
+		CALC_K192 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
+		CALC_K192 (k, 12, 0x18, 0x37, 0xF7, 0x71);
+		CALC_K192 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
+		CALC_K192 (k, 16, 0x43, 0x30, 0x75, 0x0F);
+		CALC_K192 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
+		CALC_K192 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
+		CALC_K192 (k, 22, 0x94, 0x06, 0x48, 0x3F);
+		CALC_K192 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
+		CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
+		CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00);
+		CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+	} else { /* 128-bit key */
+		/* Compute the S-boxes. */
+		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
+			CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
+		}
+
+		/* Calculate whitening and round subkeys.  The constants are
+		 * indices of subkeys, preprocessed through q0 and q1. */
+		CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3);
+		CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
+		CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
+		CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
+		CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
+		CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B);
+		CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
+		CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
+		CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
+		CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD);
+		CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71);
+		CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
+		CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F);
+		CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B);
+		CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA);
+		CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F);
+		CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
+		CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
+		CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00);
+		CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(twofish_setkey);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Twofish cipher common functions");
diff --git a/include/crypto/twofish.h b/include/crypto/twofish.h
new file mode 100644
index 0000000000000..e4328cfaaf649
--- /dev/null
+++ b/include/crypto/twofish.h
@@ -0,0 +1,23 @@
+#ifndef _CRYPTO_TWOFISH_H
+#define _CRYPTO_TWOFISH_H
+
+#include <linux/types.h>
+
+#define TF_MIN_KEY_SIZE 16
+#define TF_MAX_KEY_SIZE 32
+#define TF_BLOCK_SIZE 16
+
+struct crypto_tfm;
+
+/* Structure for an expanded Twofish key.  s contains the key-dependent
+ * S-boxes composed with the MDS matrix; w contains the eight "whitening"
+ * subkeys, K[0] through K[7].	k holds the remaining, "round" subkeys.  Note
+ * that k[i] corresponds to what the Twofish paper calls K[i+8]. */
+struct twofish_ctx {
+	u32 s[4][256], w[8], k[32];
+};
+
+int twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+		   unsigned int key_len, u32 *flags);
+
+#endif
-- 
GitLab


From 758f570ea785a5fbcdca026dfab2e9e1a3f89726 Mon Sep 17 00:00:00 2001
From: Joachim Fritschi <jfritschi@freenet.de>
Date: Tue, 20 Jun 2006 20:39:29 +1000
Subject: [PATCH 0277/1063] [CRYPTO] twofish: Fix the priority

This patch adds a proper driver name and priority to the generic c
implemtation to allow coexistance of c and assembler modules.

Signed-off-by: Joachim Fritschi <jfritschi@freenet.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/twofish.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/crypto/twofish.c b/crypto/twofish.c
index e3b3a0a6cb4da..4979a2be48a96 100644
--- a/crypto/twofish.c
+++ b/crypto/twofish.c
@@ -181,6 +181,8 @@ static void twofish_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 
 static struct crypto_alg alg = {
 	.cra_name           =   "twofish",
+	.cra_driver_name    =   "twofish-generic",
+	.cra_priority       =   100,
 	.cra_flags          =   CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize      =   TF_BLOCK_SIZE,
 	.cra_ctxsize        =   sizeof(struct twofish_ctx),
-- 
GitLab


From b9f535ffe38f7eb61ac2219d32d97c377b69f70d Mon Sep 17 00:00:00 2001
From: Joachim Fritschi <jfritschi@freenet.de>
Date: Tue, 20 Jun 2006 20:59:16 +1000
Subject: [PATCH 0278/1063] [CRYPTO] twofish: i586 assembly version

The patch passed the trycpt tests and automated filesystem tests.
This rewrite resulted in some nice perfomance increase over my last patch.

Short summary of the tcrypt benchmarks:

Twofish Assembler vs. Twofish C (256bit 8kb block CBC)
encrypt: -33% Cycles
decrypt: -45% Cycles

Twofish Assembler vs. AES Assembler (128bit 8kb block CBC)
encrypt: +3%  Cycles
decrypt: -22% Cycles

Twofish Assembler vs. AES Assembler (256bit 8kb block CBC)
encrypt: -20% Cycles
decrypt: -36% Cycles

Full Output:
http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-asm-i586.txt
http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-c-i586.txt
http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-aes-asm-i586.txt


Here is another bonnie++ benchmark with encrypted filesystems. All runs with
the twofish assembler modules max out the drivespeed. It should give some
idea what the module can do for encrypted filesystem performance even though
you can't see the full numbers.

http://homepages.tu-darmstadt.de/~fritschi/twofish/output_20060611_205432_x86.html

Signed-off-by: Joachim Fritschi <jfritschi@freenet.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/i386/crypto/Makefile           |   3 +
 arch/i386/crypto/twofish-i586-asm.S | 335 ++++++++++++++++++++++++++++
 arch/i386/crypto/twofish.c          |  97 ++++++++
 crypto/Kconfig                      |  15 ++
 4 files changed, 450 insertions(+)
 create mode 100644 arch/i386/crypto/twofish-i586-asm.S
 create mode 100644 arch/i386/crypto/twofish.c

diff --git a/arch/i386/crypto/Makefile b/arch/i386/crypto/Makefile
index 103c353d0a637..3fd19af18e342 100644
--- a/arch/i386/crypto/Makefile
+++ b/arch/i386/crypto/Makefile
@@ -5,5 +5,8 @@
 # 
 
 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
+obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 
 aes-i586-y := aes-i586-asm.o aes.o
+twofish-i586-y := twofish-i586-asm.o twofish.o
+
diff --git a/arch/i386/crypto/twofish-i586-asm.S b/arch/i386/crypto/twofish-i586-asm.S
new file mode 100644
index 0000000000000..39b98ed2c1b9f
--- /dev/null
+++ b/arch/i386/crypto/twofish-i586-asm.S
@@ -0,0 +1,335 @@
+/***************************************************************************
+*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
+*                                                                         *
+*   This program is free software; you can redistribute it and/or modify  *
+*   it under the terms of the GNU General Public License as published by  *
+*   the Free Software Foundation; either version 2 of the License, or     *
+*   (at your option) any later version.                                   *
+*                                                                         *
+*   This program is distributed in the hope that it will be useful,       *
+*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+*   GNU General Public License for more details.                          *
+*                                                                         *
+*   You should have received a copy of the GNU General Public License     *
+*   along with this program; if not, write to the                         *
+*   Free Software Foundation, Inc.,                                       *
+*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+***************************************************************************/
+
+.file "twofish-i586-asm.S"
+.text
+
+#include <asm/asm-offsets.h>
+
+/* return adress at 0 */
+
+#define in_blk    12  /* input byte array address parameter*/
+#define out_blk   8  /* output byte array address parameter*/
+#define tfm       4  /* Twofish context structure */
+
+#define a_offset	0
+#define b_offset	4
+#define c_offset	8
+#define d_offset	12
+
+/* Structure of the crypto context struct*/
+
+#define s0	0	/* S0 Array 256 Words each */
+#define s1	1024	/* S1 Array */
+#define s2	2048	/* S2 Array */
+#define s3	3072	/* S3 Array */
+#define w	4096	/* 8 whitening keys (word) */
+#define k	4128	/* key 1-32 ( word ) */
+
+/* define a few register aliases to allow macro substitution */
+
+#define R0D    %eax
+#define R0B    %al
+#define R0H    %ah
+
+#define R1D    %ebx
+#define R1B    %bl
+#define R1H    %bh
+
+#define R2D    %ecx
+#define R2B    %cl
+#define R2H    %ch
+
+#define R3D    %edx
+#define R3B    %dl
+#define R3H    %dh
+
+
+/* performs input whitening */
+#define input_whitening(src,context,offset)\
+	xor	w+offset(context),	src;
+
+/* performs input whitening */
+#define output_whitening(src,context,offset)\
+	xor	w+16+offset(context),	src;
+
+/*
+ * a input register containing a (rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ */
+#define encrypt_round(a,b,c,d,round)\
+	push	d ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s1(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%ebp,%edi,4),%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%ebp,%edi,4),d ## D;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%ebp,%edi,4),%esi;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$15,		b ## D;\
+	xor	(%ebp,%edi,4),	d ## D;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	d ## D,		%esi;\
+	add	%esi,		d ## D;\
+	add	k+round(%ebp),	%esi;\
+	xor	%esi,		c ## D;\
+	rol	$15,		c ## D;\
+	add	k+4+round(%ebp),d ## D;\
+	xor	%edi,		d ## D;
+
+/*
+ * a input register containing a (rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ * last round has different rotations for the output preparation
+ */
+#define encrypt_last_round(a,b,c,d,round)\
+	push	d ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s1(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%ebp,%edi,4),%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%ebp,%edi,4),d ## D;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%ebp,%edi,4),%esi;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%ebp,%edi,4),	d ## D;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	d ## D,		%esi;\
+	add	%esi,		d ## D;\
+	add	k+round(%ebp),	%esi;\
+	xor	%esi,		c ## D;\
+	ror	$1,		c ## D;\
+	add	k+4+round(%ebp),d ## D;\
+	xor	%edi,		d ## D;
+
+/*
+ * a input register containing a
+ * b input register containing b (rotated 16)
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ */
+#define decrypt_round(a,b,c,d,round)\
+	push	c ## D;\
+	movzx	a ## B,		%edi;\
+	mov	(%ebp,%edi,4),	c ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s1(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%ebp,%edi,4),c ## D;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$15,		a ## D;\
+	xor	s3(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	%esi,		c ## D;\
+	add	c ## D,		%esi;\
+	add	k+round(%ebp),	c ## D;\
+	xor	%edi,		c ## D;\
+	add	k+4+round(%ebp),%esi;\
+	xor	%esi,		d ## D;\
+	rol	$15,		d ## D;
+
+/*
+ * a input register containing a
+ * b input register containing b (rotated 16)
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ * last round has different rotations for the output preparation
+ */
+#define decrypt_last_round(a,b,c,d,round)\
+	push	c ## D;\
+	movzx	a ## B,		%edi;\
+	mov	(%ebp,%edi,4),	c ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s1(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%ebp,%edi,4),c ## D;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	%esi,		c ## D;\
+	add	c ## D,		%esi;\
+	add	k+round(%ebp),	c ## D;\
+	xor	%edi,		c ## D;\
+	add	k+4+round(%ebp),%esi;\
+	xor	%esi,		d ## D;\
+	ror	$1,		d ## D;
+
+.align 4
+.global twofish_enc_blk
+.global twofish_dec_blk
+
+twofish_enc_blk:
+	push	%ebp			/* save registers according to calling convention*/
+	push    %ebx
+	push    %esi
+	push    %edi
+
+	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
+	add	$crypto_tfm_ctx_offset, %ebp	/* ctx adress */
+	mov     in_blk+16(%esp),%edi	/* input adress in edi */
+
+	mov	(%edi),		%eax
+	mov	b_offset(%edi),	%ebx
+	mov	c_offset(%edi),	%ecx
+	mov	d_offset(%edi),	%edx
+	input_whitening(%eax,%ebp,a_offset)
+	ror	$16,	%eax
+	input_whitening(%ebx,%ebp,b_offset)
+	input_whitening(%ecx,%ebp,c_offset)
+	input_whitening(%edx,%ebp,d_offset)
+	rol	$1,	%edx
+
+	encrypt_round(R0,R1,R2,R3,0);
+	encrypt_round(R2,R3,R0,R1,8);
+	encrypt_round(R0,R1,R2,R3,2*8);
+	encrypt_round(R2,R3,R0,R1,3*8);
+	encrypt_round(R0,R1,R2,R3,4*8);
+	encrypt_round(R2,R3,R0,R1,5*8);
+	encrypt_round(R0,R1,R2,R3,6*8);
+	encrypt_round(R2,R3,R0,R1,7*8);
+	encrypt_round(R0,R1,R2,R3,8*8);
+	encrypt_round(R2,R3,R0,R1,9*8);
+	encrypt_round(R0,R1,R2,R3,10*8);
+	encrypt_round(R2,R3,R0,R1,11*8);
+	encrypt_round(R0,R1,R2,R3,12*8);
+	encrypt_round(R2,R3,R0,R1,13*8);
+	encrypt_round(R0,R1,R2,R3,14*8);
+	encrypt_last_round(R2,R3,R0,R1,15*8);
+
+	output_whitening(%eax,%ebp,c_offset)
+	output_whitening(%ebx,%ebp,d_offset)
+	output_whitening(%ecx,%ebp,a_offset)
+	output_whitening(%edx,%ebp,b_offset)
+	mov	out_blk+16(%esp),%edi;
+	mov	%eax,		c_offset(%edi)
+	mov	%ebx,		d_offset(%edi)
+	mov	%ecx,		(%edi)
+	mov	%edx,		b_offset(%edi)
+
+	pop	%edi
+	pop	%esi
+	pop	%ebx
+	pop	%ebp
+	mov	$1,	%eax
+	ret
+
+twofish_dec_blk:
+	push	%ebp			/* save registers according to calling convention*/
+	push    %ebx
+	push    %esi
+	push    %edi
+
+
+	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
+	add	$crypto_tfm_ctx_offset, %ebp	/* ctx adress */
+	mov     in_blk+16(%esp),%edi	/* input adress in edi */
+
+	mov	(%edi),		%eax
+	mov	b_offset(%edi),	%ebx
+	mov	c_offset(%edi),	%ecx
+	mov	d_offset(%edi),	%edx
+	output_whitening(%eax,%ebp,a_offset)
+	output_whitening(%ebx,%ebp,b_offset)
+	ror	$16,	%ebx
+	output_whitening(%ecx,%ebp,c_offset)
+	output_whitening(%edx,%ebp,d_offset)
+	rol	$1,	%ecx
+
+	decrypt_round(R0,R1,R2,R3,15*8);
+	decrypt_round(R2,R3,R0,R1,14*8);
+	decrypt_round(R0,R1,R2,R3,13*8);
+	decrypt_round(R2,R3,R0,R1,12*8);
+	decrypt_round(R0,R1,R2,R3,11*8);
+	decrypt_round(R2,R3,R0,R1,10*8);
+	decrypt_round(R0,R1,R2,R3,9*8);
+	decrypt_round(R2,R3,R0,R1,8*8);
+	decrypt_round(R0,R1,R2,R3,7*8);
+	decrypt_round(R2,R3,R0,R1,6*8);
+	decrypt_round(R0,R1,R2,R3,5*8);
+	decrypt_round(R2,R3,R0,R1,4*8);
+	decrypt_round(R0,R1,R2,R3,3*8);
+	decrypt_round(R2,R3,R0,R1,2*8);
+	decrypt_round(R0,R1,R2,R3,1*8);
+	decrypt_last_round(R2,R3,R0,R1,0);
+
+	input_whitening(%eax,%ebp,c_offset)
+	input_whitening(%ebx,%ebp,d_offset)
+	input_whitening(%ecx,%ebp,a_offset)
+	input_whitening(%edx,%ebp,b_offset)
+	mov	out_blk+16(%esp),%edi;
+	mov	%eax,		c_offset(%edi)
+	mov	%ebx,		d_offset(%edi)
+	mov	%ecx,		(%edi)
+	mov	%edx,		b_offset(%edi)
+
+	pop	%edi
+	pop	%esi
+	pop	%ebx
+	pop	%ebp
+	mov	$1,	%eax
+	ret
diff --git a/arch/i386/crypto/twofish.c b/arch/i386/crypto/twofish.c
new file mode 100644
index 0000000000000..e3004dfe9c7ab
--- /dev/null
+++ b/arch/i386/crypto/twofish.c
@@ -0,0 +1,97 @@
+/*
+ *  Glue Code for optimized 586 assembler version of TWOFISH
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <crypto/twofish.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+
+asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+
+static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_enc_blk(tfm, dst, src);
+}
+
+static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		=	"twofish",
+	.cra_driver_name	=	"twofish-i586",
+	.cra_priority		=	200,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	TF_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct twofish_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
+			.cia_max_keysize	=	TF_MAX_KEY_SIZE,
+			.cia_setkey		=	twofish_setkey,
+			.cia_encrypt		=	twofish_encrypt,
+			.cia_decrypt		=	twofish_decrypt
+		}
+	}
+};
+
+static int __init init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
+MODULE_ALIAS("twofish");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 5472f693e6ec4..306738ceecb40 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -150,6 +150,21 @@ config CRYPTO_TWOFISH_COMMON
 	  Common parts of the Twofish cipher algorithm shared by the
 	  generic c and the assembler implementations.
 
+config CRYPTO_TWOFISH_586
+	tristate "Twofish cipher algorithms (i586)"
+	depends on CRYPTO && ((X86 || UML_X86) && !64BIT)
+	select CRYPTO_TWOFISH_COMMON
+	help
+	  Twofish cipher algorithm.
+
+	  Twofish was submitted as an AES (Advanced Encryption Standard)
+	  candidate cipher by researchers at CounterPane Systems.  It is a
+	  16 round block cipher supporting key sizes of 128, 192, and 256
+	  bits.
+
+	  See also:
+	  <http://www.schneier.com/twofish.html>
+
 config CRYPTO_SERPENT
 	tristate "Serpent cipher algorithm"
 	depends on CRYPTO
-- 
GitLab


From eaf44088ff467410dd15a033fef118888002ffe6 Mon Sep 17 00:00:00 2001
From: Joachim Fritschi <jfritschi@freenet.de>
Date: Tue, 20 Jun 2006 21:12:02 +1000
Subject: [PATCH 0279/1063] [CRYPTO] twofish: x86-64 assembly version

The patch passed the trycpt tests and automated filesystem tests.
This rewrite resulted in some nice perfomance increase over my last patch.

Short summary of the tcrypt benchmarks:

Twofish Assembler vs. Twofish C (256bit 8kb block CBC)
encrypt: -27% Cycles
decrypt: -23% Cycles

Twofish Assembler vs. AES Assembler (128bit 8kb block CBC)
encrypt: +18%  Cycles
decrypt: +15% Cycles

Twofish Assembler vs. AES Assembler (256bit 8kb block CBC)
encrypt: -9% Cycles
decrypt: -8% Cycles

Full Output:
http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-c-x86_64.txt
http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-asm-x86_64.txt
http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-aes-asm-x86_64.txt


Here is another bonnie++ benchmark with encrypted filesystems. Most runs maxed
out the hd. It should give some idea what the module can do for encrypted filesystem
performance even though you can't see the full numbers.

http://homepages.tu-darmstadt.de/~fritschi/twofish/output_20060610_130806_x86_64.html

Signed-off-by: Joachim Fritschi <jfritschi@freenet.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86_64/crypto/Makefile             |   3 +
 arch/x86_64/crypto/twofish-x86_64-asm.S | 324 ++++++++++++++++++++++++
 arch/x86_64/crypto/twofish.c            |  97 +++++++
 crypto/Kconfig                          |  15 ++
 4 files changed, 439 insertions(+)
 create mode 100644 arch/x86_64/crypto/twofish-x86_64-asm.S
 create mode 100644 arch/x86_64/crypto/twofish.c

diff --git a/arch/x86_64/crypto/Makefile b/arch/x86_64/crypto/Makefile
index 426d20f4b72ec..15b538a8b7f7e 100644
--- a/arch/x86_64/crypto/Makefile
+++ b/arch/x86_64/crypto/Makefile
@@ -5,5 +5,8 @@
 # 
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
+obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 
 aes-x86_64-y := aes-x86_64-asm.o aes.o
+twofish-x86_64-y := twofish-x86_64-asm.o twofish.o
+
diff --git a/arch/x86_64/crypto/twofish-x86_64-asm.S b/arch/x86_64/crypto/twofish-x86_64-asm.S
new file mode 100644
index 0000000000000..35974a5866158
--- /dev/null
+++ b/arch/x86_64/crypto/twofish-x86_64-asm.S
@@ -0,0 +1,324 @@
+/***************************************************************************
+*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
+*                                                                         *
+*   This program is free software; you can redistribute it and/or modify  *
+*   it under the terms of the GNU General Public License as published by  *
+*   the Free Software Foundation; either version 2 of the License, or     *
+*   (at your option) any later version.                                   *
+*                                                                         *
+*   This program is distributed in the hope that it will be useful,       *
+*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+*   GNU General Public License for more details.                          *
+*                                                                         *
+*   You should have received a copy of the GNU General Public License     *
+*   along with this program; if not, write to the                         *
+*   Free Software Foundation, Inc.,                                       *
+*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+***************************************************************************/
+
+.file "twofish-x86_64-asm.S"
+.text
+
+#include <asm/asm-offsets.h>
+
+#define a_offset	0
+#define b_offset	4
+#define c_offset	8
+#define d_offset	12
+
+/* Structure of the crypto context struct*/
+
+#define s0	0	/* S0 Array 256 Words each */
+#define s1	1024	/* S1 Array */
+#define s2	2048	/* S2 Array */
+#define s3	3072	/* S3 Array */
+#define w	4096	/* 8 whitening keys (word) */
+#define k	4128	/* key 1-32 ( word ) */
+
+/* define a few register aliases to allow macro substitution */
+
+#define R0     %rax
+#define R0D    %eax
+#define R0B    %al
+#define R0H    %ah
+
+#define R1     %rbx
+#define R1D    %ebx
+#define R1B    %bl
+#define R1H    %bh
+
+#define R2     %rcx
+#define R2D    %ecx
+#define R2B    %cl
+#define R2H    %ch
+
+#define R3     %rdx
+#define R3D    %edx
+#define R3B    %dl
+#define R3H    %dh
+
+
+/* performs input whitening */
+#define input_whitening(src,context,offset)\
+	xor	w+offset(context),	src;
+
+/* performs input whitening */
+#define output_whitening(src,context,offset)\
+	xor	w+16+offset(context),	src;
+
+
+/*
+ * a input register containing a (rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ */
+#define encrypt_round(a,b,c,d,round)\
+	movzx	b ## B,		%edi;\
+	mov	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	(%r11,%rdi,4),	%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$15,		b ## D;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	rol	$15,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D;
+
+/*
+ * a input register containing a(rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ * during the round a and b are prepared for the output whitening
+ */
+#define encrypt_last_round(a,b,c,d,round)\
+	mov	b ## D,		%r10d;\
+	shl	$32,		%r10;\
+	movzx	b ## B,		%edi;\
+	mov	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	(%r11,%rdi,4),	%r9d;\
+	xor	a,		%r10;\
+	movzx	b ## H,		%edi;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	ror	$1,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D
+
+/*
+ * a input register containing a
+ * b input register containing b (rotated 16)
+ * c input register containing c (already rol $1)
+ * d input register containing d
+ * operations on a and b are interleaved to increase performance
+ */
+#define decrypt_round(a,b,c,d,round)\
+	movzx	a ## B,		%edi;\
+	mov	(%r11,%rdi,4),	%r9d;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$15,		a ## D;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D;\
+	rol	$15,		d ## D;
+
+/*
+ * a input register containing a
+ * b input register containing b
+ * c input register containing c (already rol $1)
+ * d input register containing d
+ * operations on a and b are interleaved to increase performance
+ * during the round a and b are prepared for the output whitening
+ */
+#define decrypt_last_round(a,b,c,d,round)\
+	movzx	a ## B,		%edi;\
+	mov	(%r11,%rdi,4),	%r9d;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%r11,%rdi,4),%r8d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## H,		%edi;\
+	mov	b ## D,		%r10d;\
+	shl	$32,		%r10;\
+	xor	a,		%r10;\
+	ror	$16,		a ## D;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D;\
+	ror	$1,		d ## D;
+
+.align 8
+.global twofish_enc_blk
+.global twofish_dec_blk
+
+twofish_enc_blk:
+	pushq    R1
+
+	/* %rdi contains the crypto tfm adress */
+	/* %rsi contains the output adress */
+	/* %rdx contains the input adress */
+	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx adress */
+	/* ctx adress is moved to free one non-rex register
+	as target for the 8bit high operations */
+	mov	%rdi,		%r11
+
+	movq	(R3),	R1
+	movq	8(R3),	R3
+	input_whitening(R1,%r11,a_offset)
+	input_whitening(R3,%r11,c_offset)
+	mov	R1D,	R0D
+	rol	$16,	R0D
+	shr	$32,	R1
+	mov	R3D,	R2D
+	shr	$32,	R3
+	rol	$1,	R3D
+
+	encrypt_round(R0,R1,R2,R3,0);
+	encrypt_round(R2,R3,R0,R1,8);
+	encrypt_round(R0,R1,R2,R3,2*8);
+	encrypt_round(R2,R3,R0,R1,3*8);
+	encrypt_round(R0,R1,R2,R3,4*8);
+	encrypt_round(R2,R3,R0,R1,5*8);
+	encrypt_round(R0,R1,R2,R3,6*8);
+	encrypt_round(R2,R3,R0,R1,7*8);
+	encrypt_round(R0,R1,R2,R3,8*8);
+	encrypt_round(R2,R3,R0,R1,9*8);
+	encrypt_round(R0,R1,R2,R3,10*8);
+	encrypt_round(R2,R3,R0,R1,11*8);
+	encrypt_round(R0,R1,R2,R3,12*8);
+	encrypt_round(R2,R3,R0,R1,13*8);
+	encrypt_round(R0,R1,R2,R3,14*8);
+	encrypt_last_round(R2,R3,R0,R1,15*8);
+
+
+	output_whitening(%r10,%r11,a_offset)
+	movq	%r10,	(%rsi)
+
+	shl	$32,	R1
+	xor	R0,	R1
+
+	output_whitening(R1,%r11,c_offset)
+	movq	R1,	8(%rsi)
+
+	popq	R1
+	movq	$1,%rax
+	ret
+
+twofish_dec_blk:
+	pushq    R1
+
+	/* %rdi contains the crypto tfm adress */
+	/* %rsi contains the output adress */
+	/* %rdx contains the input adress */
+	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx adress */
+	/* ctx adress is moved to free one non-rex register
+	as target for the 8bit high operations */
+	mov	%rdi,		%r11
+
+	movq	(R3),	R1
+	movq	8(R3),	R3
+	output_whitening(R1,%r11,a_offset)
+	output_whitening(R3,%r11,c_offset)
+	mov	R1D,	R0D
+	shr	$32,	R1
+	rol	$16,	R1D
+	mov	R3D,	R2D
+	shr	$32,	R3
+	rol	$1,	R2D
+
+	decrypt_round(R0,R1,R2,R3,15*8);
+	decrypt_round(R2,R3,R0,R1,14*8);
+	decrypt_round(R0,R1,R2,R3,13*8);
+	decrypt_round(R2,R3,R0,R1,12*8);
+	decrypt_round(R0,R1,R2,R3,11*8);
+	decrypt_round(R2,R3,R0,R1,10*8);
+	decrypt_round(R0,R1,R2,R3,9*8);
+	decrypt_round(R2,R3,R0,R1,8*8);
+	decrypt_round(R0,R1,R2,R3,7*8);
+	decrypt_round(R2,R3,R0,R1,6*8);
+	decrypt_round(R0,R1,R2,R3,5*8);
+	decrypt_round(R2,R3,R0,R1,4*8);
+	decrypt_round(R0,R1,R2,R3,3*8);
+	decrypt_round(R2,R3,R0,R1,2*8);
+	decrypt_round(R0,R1,R2,R3,1*8);
+	decrypt_last_round(R2,R3,R0,R1,0);
+
+	input_whitening(%r10,%r11,a_offset)
+	movq	%r10,	(%rsi)
+
+	shl	$32,	R1
+	xor	R0,	R1
+
+	input_whitening(R1,%r11,c_offset)
+	movq	R1,	8(%rsi)
+
+	popq	R1
+	movq	$1,%rax
+	ret
diff --git a/arch/x86_64/crypto/twofish.c b/arch/x86_64/crypto/twofish.c
new file mode 100644
index 0000000000000..182d91d5cfb90
--- /dev/null
+++ b/arch/x86_64/crypto/twofish.c
@@ -0,0 +1,97 @@
+/*
+ * Glue Code for optimized x86_64 assembler version of TWOFISH
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <crypto/twofish.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+
+static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_enc_blk(tfm, dst, src);
+}
+
+static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		=	"twofish",
+	.cra_driver_name	=	"twofish-x86_64",
+	.cra_priority		=	200,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	TF_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct twofish_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
+			.cia_max_keysize	=	TF_MAX_KEY_SIZE,
+			.cia_setkey		=	twofish_setkey,
+			.cia_encrypt		=	twofish_encrypt,
+			.cia_decrypt		=	twofish_decrypt
+		}
+	}
+};
+
+static int __init init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized");
+MODULE_ALIAS("twofish");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 306738ceecb40..fa927a287a1d2 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -165,6 +165,21 @@ config CRYPTO_TWOFISH_586
 	  See also:
 	  <http://www.schneier.com/twofish.html>
 
+config CRYPTO_TWOFISH_X86_64
+	tristate "Twofish cipher algorithm (x86_64)"
+	depends on CRYPTO && ((X86 || UML_X86) && 64BIT)
+	select CRYPTO_TWOFISH_COMMON
+	help
+	  Twofish cipher algorithm (x86_64).
+
+	  Twofish was submitted as an AES (Advanced Encryption Standard)
+	  candidate cipher by researchers at CounterPane Systems.  It is a
+	  16 round block cipher supporting key sizes of 128, 192, and 256
+	  bits.
+
+	  See also:
+	  <http://www.schneier.com/twofish.html>
+
 config CRYPTO_SERPENT
 	tristate "Serpent cipher algorithm"
 	depends on CRYPTO
-- 
GitLab


From 72fa491912689ca69dd15f4266945d2c2f2819f8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 28 May 2006 09:05:24 +1000
Subject: [PATCH 0280/1063] [CRYPTO] api: Rename crypto_alg_get to
 crypto_mod_get

The functions crypto_alg_get and crypto_alg_put operates on the crypto
modules rather than the algorithms.  Therefore it makes sense to call
them crypto_mod_get and crypto_alg_put respectively.

This is needed because we need to have real algorithm reference counters
for parameterised algorithms as they can be unregistered from below by
when their parameter algorithms are themselves unregistered.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/api.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index c11ec1fd4f18d..8c2743a05f906 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -29,12 +29,12 @@
 LIST_HEAD(crypto_alg_list);
 DECLARE_RWSEM(crypto_alg_sem);
 
-static inline int crypto_alg_get(struct crypto_alg *alg)
+static inline int crypto_mod_get(struct crypto_alg *alg)
 {
 	return try_module_get(alg->cra_module);
 }
 
-static inline void crypto_alg_put(struct crypto_alg *alg)
+static inline void crypto_mod_put(struct crypto_alg *alg)
 {
 	module_put(alg->cra_module);
 }
@@ -57,12 +57,12 @@ static struct crypto_alg *crypto_alg_lookup(const char *name)
 		if (!exact && !(fuzzy && q->cra_priority > best))
 			continue;
 
-		if (unlikely(!crypto_alg_get(q)))
+		if (unlikely(!crypto_mod_get(q)))
 			continue;
 
 		best = q->cra_priority;
 		if (alg)
-			crypto_alg_put(alg);
+			crypto_mod_put(alg);
 		alg = q;
 
 		if (exact)
@@ -202,7 +202,7 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
 	kfree(tfm);
 	tfm = NULL;
 out_put:
-	crypto_alg_put(alg);
+	crypto_mod_put(alg);
 out:
 	return tfm;
 }
@@ -221,7 +221,7 @@ void crypto_free_tfm(struct crypto_tfm *tfm)
 	if (alg->cra_exit)
 		alg->cra_exit(tfm);
 	crypto_exit_ops(tfm);
-	crypto_alg_put(alg);
+	crypto_mod_put(alg);
 	memset(tfm, 0, size);
 	kfree(tfm);
 }
@@ -305,7 +305,7 @@ int crypto_alg_available(const char *name, u32 flags)
 	struct crypto_alg *alg = crypto_alg_mod_lookup(name);
 	
 	if (alg) {
-		crypto_alg_put(alg);
+		crypto_mod_put(alg);
 		ret = 1;
 	}
 	
-- 
GitLab


From 6521f30273fbec65146a0f16de74b7b402b0f7b0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 6 Aug 2006 20:28:44 +1000
Subject: [PATCH 0281/1063] [CRYPTO] api: Add crypto_alg reference counting

Up until now we've relied on module reference counting to ensure that the
crypto_alg structures don't disappear from under us.  This was good enough
as long as each crypto_alg came from exactly one module.

However, with parameterised crypto algorithms a crypto_alg object may need
two or more modules to operate.  This means that we need to count the
references to the crypto_alg object directly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/api.c           | 32 ++++++++++++++++++++++++++------
 crypto/proc.c          |  3 +++
 include/linux/crypto.h |  3 +++
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index 8c2743a05f906..5994a58ef954e 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -29,13 +29,26 @@
 LIST_HEAD(crypto_alg_list);
 DECLARE_RWSEM(crypto_alg_sem);
 
-static inline int crypto_mod_get(struct crypto_alg *alg)
+static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
 {
-	return try_module_get(alg->cra_module);
+	atomic_inc(&alg->cra_refcnt);
+	return alg;
+}
+
+static inline void crypto_alg_put(struct crypto_alg *alg)
+{
+	if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
+		alg->cra_destroy(alg);
+}
+
+static struct crypto_alg *crypto_mod_get(struct crypto_alg *alg)
+{
+	return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL;
 }
 
-static inline void crypto_mod_put(struct crypto_alg *alg)
+static void crypto_mod_put(struct crypto_alg *alg)
 {
+	crypto_alg_put(alg);
 	module_put(alg->cra_module);
 }
 
@@ -274,6 +287,7 @@ int crypto_register_alg(struct crypto_alg *alg)
 	}
 	
 	list_add(&alg->cra_list, &crypto_alg_list);
+	atomic_set(&alg->cra_refcnt, 1);
 out:	
 	up_write(&crypto_alg_sem);
 	return ret;
@@ -284,8 +298,6 @@ int crypto_unregister_alg(struct crypto_alg *alg)
 	int ret = -ENOENT;
 	struct crypto_alg *q;
 	
-	BUG_ON(!alg->cra_module);
-	
 	down_write(&crypto_alg_sem);
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
 		if (alg == q) {
@@ -296,7 +308,15 @@ int crypto_unregister_alg(struct crypto_alg *alg)
 	}
 out:	
 	up_write(&crypto_alg_sem);
-	return ret;
+
+	if (ret)
+		return ret;
+
+	BUG_ON(atomic_read(&alg->cra_refcnt) != 1);
+	if (alg->cra_destroy)
+		alg->cra_destroy(alg);
+
+	return 0;
 }
 
 int crypto_alg_available(const char *name, u32 flags)
diff --git a/crypto/proc.c b/crypto/proc.c
index c0a5dd7ce2ccb..8543b7a157d6a 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -12,6 +12,8 @@
  * any later version.
  *
  */
+
+#include <asm/atomic.h>
 #include <linux/init.h>
 #include <linux/crypto.h>
 #include <linux/rwsem.h>
@@ -54,6 +56,7 @@ static int c_show(struct seq_file *m, void *p)
 	seq_printf(m, "driver       : %s\n", alg->cra_driver_name);
 	seq_printf(m, "module       : %s\n", module_name(alg->cra_module));
 	seq_printf(m, "priority     : %d\n", alg->cra_priority);
+	seq_printf(m, "refcnt       : %d\n", atomic_read(&alg->cra_refcnt));
 	
 	switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_CIPHER:
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index cb1e6631b1325..7f57ff8ec9752 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -17,6 +17,7 @@
 #ifndef _LINUX_CRYPTO_H
 #define _LINUX_CRYPTO_H
 
+#include <asm/atomic.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
@@ -148,6 +149,7 @@ struct crypto_alg {
 	unsigned int cra_alignmask;
 
 	int cra_priority;
+	atomic_t cra_refcnt;
 
 	char cra_name[CRYPTO_MAX_ALG_NAME];
 	char cra_driver_name[CRYPTO_MAX_ALG_NAME];
@@ -160,6 +162,7 @@ struct crypto_alg {
 
 	int (*cra_init)(struct crypto_tfm *tfm);
 	void (*cra_exit)(struct crypto_tfm *tfm);
+	void (*cra_destroy)(struct crypto_alg *alg);
 	
 	struct module *cra_module;
 };
-- 
GitLab


From 9409f38a0c8773c04bff8dda8c552d7ea013d956 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 6 Aug 2006 19:49:12 +1000
Subject: [PATCH 0282/1063] [IPSEC]: Move linux/crypto.h inclusion out of
 net/xfrm.h

The header file linux/crypto.h is only needed by a few files so including
it in net/xfrm.h (which is included by half of the networking stack) is a
waste.  This patch moves it out of net/xfrm.h and into the specific header
files that actually need it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/net/ah.h     | 1 +
 include/net/esp.h    | 1 +
 include/net/ipcomp.h | 4 ++++
 include/net/xfrm.h   | 2 +-
 net/xfrm/xfrm_user.c | 1 +
 5 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/net/ah.h b/include/net/ah.h
index ceff00afae09f..8e27c9ba8b84d 100644
--- a/include/net/ah.h
+++ b/include/net/ah.h
@@ -1,6 +1,7 @@
 #ifndef _NET_AH_H
 #define _NET_AH_H
 
+#include <linux/crypto.h>
 #include <net/xfrm.h>
 
 /* This is the maximum truncated ICV length that we know of. */
diff --git a/include/net/esp.h b/include/net/esp.h
index 90cd94fad7d9c..6eb837973c84f 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -1,6 +1,7 @@
 #ifndef _NET_ESP_H
 #define _NET_ESP_H
 
+#include <linux/crypto.h>
 #include <net/xfrm.h>
 #include <asm/scatterlist.h>
 
diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h
index e651a57ecdd57..b94e3047b4d9b 100644
--- a/include/net/ipcomp.h
+++ b/include/net/ipcomp.h
@@ -1,8 +1,12 @@
 #ifndef _NET_IPCOMP_H
 #define _NET_IPCOMP_H
 
+#include <linux/types.h>
+
 #define IPCOMP_SCRATCH_SIZE     65400
 
+struct crypto_tfm;
+
 struct ipcomp_data {
 	u16 threshold;
 	struct crypto_tfm **tfms;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9c5ee9f20b65b..10396b4bde147 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -8,7 +8,6 @@
 #include <linux/list.h>
 #include <linux/skbuff.h>
 #include <linux/socket.h>
-#include <linux/crypto.h>
 #include <linux/pfkeyv2.h>
 #include <linux/in6.h>
 #include <linux/mutex.h>
@@ -985,6 +984,7 @@ extern struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe);
 extern struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe);
 
 struct crypto_tfm;
+struct scatterlist;
 typedef void (icv_update_fn_t)(struct crypto_tfm *, struct scatterlist *, unsigned int);
 
 extern void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 3e6a722d072ed..7d18ca03c80d3 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -10,6 +10,7 @@
  *
  */
 
+#include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
-- 
GitLab


From cce9e06d100df19a327b19f23adad76e7bf63edd Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 21 Aug 2006 21:08:13 +1000
Subject: [PATCH 0283/1063] [CRYPTO] api: Split out low-level API

The crypto API is made up of the part facing users such as IPsec and the
low-level part which is used by cryptographic entities such as algorithms.
This patch splits out the latter so that the two APIs are more clearly
delineated.  As a bonus the low-level API can now be modularised if all
algorithms are built as modules.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig          |  84 ++++++++++++++++------------
 crypto/Makefile         |   7 ++-
 crypto/algapi.c         | 118 ++++++++++++++++++++++++++++++++++++++++
 crypto/api.c            |  97 +--------------------------------
 crypto/internal.h       |   6 +-
 crypto/proc.c           |   5 ++
 drivers/crypto/Kconfig  |   3 +-
 include/crypto/algapi.h |  18 ++++++
 8 files changed, 204 insertions(+), 134 deletions(-)
 create mode 100644 crypto/algapi.c
 create mode 100644 include/crypto/algapi.h

diff --git a/crypto/Kconfig b/crypto/Kconfig
index fa927a287a1d2..aabc631952226 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -9,47 +9,54 @@ config CRYPTO
 	help
 	  This option provides the core Cryptographic API.
 
+if CRYPTO
+
+config CRYPTO_ALGAPI
+	tristate
+	help
+	  This option provides the API for cryptographic algorithms.
+
 config CRYPTO_HMAC
 	bool "HMAC support"
-	depends on CRYPTO
 	help
 	  HMAC: Keyed-Hashing for Message Authentication (RFC2104).
 	  This is required for IPSec.
 
 config CRYPTO_NULL
 	tristate "Null algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  These are 'Null' algorithms, used by IPsec, which do nothing.
 
 config CRYPTO_MD4
 	tristate "MD4 digest algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  MD4 message digest algorithm (RFC1320).
 
 config CRYPTO_MD5
 	tristate "MD5 digest algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  MD5 message digest algorithm (RFC1321).
 
 config CRYPTO_SHA1
 	tristate "SHA1 digest algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
 
 config CRYPTO_SHA1_S390
 	tristate "SHA1 digest algorithm (s390)"
-	depends on CRYPTO && S390
+	depends on S390
+	select CRYPTO_ALGAPI
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
 
 config CRYPTO_SHA256
 	tristate "SHA256 digest algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  SHA256 secure hash standard (DFIPS 180-2).
 	  
@@ -58,7 +65,8 @@ config CRYPTO_SHA256
 
 config CRYPTO_SHA256_S390
 	tristate "SHA256 digest algorithm (s390)"
-	depends on CRYPTO && S390
+	depends on S390
+	select CRYPTO_ALGAPI
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  SHA256 secure hash standard (DFIPS 180-2).
@@ -68,7 +76,7 @@ config CRYPTO_SHA256_S390
 
 config CRYPTO_SHA512
 	tristate "SHA384 and SHA512 digest algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  SHA512 secure hash standard (DFIPS 180-2).
 	  
@@ -80,7 +88,7 @@ config CRYPTO_SHA512
 
 config CRYPTO_WP512
 	tristate "Whirlpool digest algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Whirlpool hash algorithm 512, 384 and 256-bit hashes
 
@@ -92,7 +100,7 @@ config CRYPTO_WP512
 
 config CRYPTO_TGR192
 	tristate "Tiger digest algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Tiger hash algorithm 192, 160 and 128-bit hashes
 
@@ -105,19 +113,20 @@ config CRYPTO_TGR192
 
 config CRYPTO_DES
 	tristate "DES and Triple DES EDE cipher algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
 
 config CRYPTO_DES_S390
 	tristate "DES and Triple DES cipher algorithms (s390)"
-	depends on CRYPTO && S390
+	depends on S390
+	select CRYPTO_ALGAPI
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
 
 config CRYPTO_BLOWFISH
 	tristate "Blowfish cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Blowfish cipher algorithm, by Bruce Schneier.
 	  
@@ -130,7 +139,7 @@ config CRYPTO_BLOWFISH
 
 config CRYPTO_TWOFISH
 	tristate "Twofish cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	select CRYPTO_TWOFISH_COMMON
 	help
 	  Twofish cipher algorithm.
@@ -145,14 +154,14 @@ config CRYPTO_TWOFISH
 
 config CRYPTO_TWOFISH_COMMON
 	tristate
-	depends on CRYPTO
 	help
 	  Common parts of the Twofish cipher algorithm shared by the
 	  generic c and the assembler implementations.
 
 config CRYPTO_TWOFISH_586
 	tristate "Twofish cipher algorithms (i586)"
-	depends on CRYPTO && ((X86 || UML_X86) && !64BIT)
+	depends on (X86 || UML_X86) && !64BIT
+	select CRYPTO_ALGAPI
 	select CRYPTO_TWOFISH_COMMON
 	help
 	  Twofish cipher algorithm.
@@ -167,7 +176,8 @@ config CRYPTO_TWOFISH_586
 
 config CRYPTO_TWOFISH_X86_64
 	tristate "Twofish cipher algorithm (x86_64)"
-	depends on CRYPTO && ((X86 || UML_X86) && 64BIT)
+	depends on (X86 || UML_X86) && 64BIT
+	select CRYPTO_ALGAPI
 	select CRYPTO_TWOFISH_COMMON
 	help
 	  Twofish cipher algorithm (x86_64).
@@ -182,7 +192,7 @@ config CRYPTO_TWOFISH_X86_64
 
 config CRYPTO_SERPENT
 	tristate "Serpent cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 
@@ -195,7 +205,7 @@ config CRYPTO_SERPENT
 
 config CRYPTO_AES
 	tristate "AES cipher algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael 
 	  algorithm.
@@ -215,7 +225,8 @@ config CRYPTO_AES
 
 config CRYPTO_AES_586
 	tristate "AES cipher algorithms (i586)"
-	depends on CRYPTO && ((X86 || UML_X86) && !64BIT)
+	depends on (X86 || UML_X86) && !64BIT
+	select CRYPTO_ALGAPI
 	help
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael 
 	  algorithm.
@@ -235,7 +246,8 @@ config CRYPTO_AES_586
 
 config CRYPTO_AES_X86_64
 	tristate "AES cipher algorithms (x86_64)"
-	depends on CRYPTO && ((X86 || UML_X86) && 64BIT)
+	depends on (X86 || UML_X86) && 64BIT
+	select CRYPTO_ALGAPI
 	help
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael 
 	  algorithm.
@@ -255,7 +267,8 @@ config CRYPTO_AES_X86_64
 
 config CRYPTO_AES_S390
 	tristate "AES cipher algorithms (s390)"
-	depends on CRYPTO && S390
+	depends on S390
+	select CRYPTO_ALGAPI
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
@@ -275,21 +288,21 @@ config CRYPTO_AES_S390
 
 config CRYPTO_CAST5
 	tristate "CAST5 (CAST-128) cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  The CAST5 encryption algorithm (synonymous with CAST-128) is
 	  described in RFC2144.
 
 config CRYPTO_CAST6
 	tristate "CAST6 (CAST-256) cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  The CAST6 encryption algorithm (synonymous with CAST-256) is
 	  described in RFC2612.
 
 config CRYPTO_TEA
 	tristate "TEA, XTEA and XETA cipher algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  TEA cipher algorithm.
 
@@ -306,7 +319,7 @@ config CRYPTO_TEA
 
 config CRYPTO_ARC4
 	tristate "ARC4 cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  ARC4 cipher algorithm.
 
@@ -317,7 +330,7 @@ config CRYPTO_ARC4
 
 config CRYPTO_KHAZAD
 	tristate "Khazad cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Khazad cipher algorithm.
 
@@ -330,7 +343,7 @@ config CRYPTO_KHAZAD
 
 config CRYPTO_ANUBIS
 	tristate "Anubis cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Anubis cipher algorithm.
 
@@ -345,7 +358,7 @@ config CRYPTO_ANUBIS
 
 config CRYPTO_DEFLATE
 	tristate "Deflate compression algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	select ZLIB_INFLATE
 	select ZLIB_DEFLATE
 	help
@@ -356,7 +369,7 @@ config CRYPTO_DEFLATE
 
 config CRYPTO_MICHAEL_MIC
 	tristate "Michael MIC keyed digest algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Michael MIC is used for message integrity protection in TKIP
 	  (IEEE 802.11i). This algorithm is required for TKIP, but it
@@ -365,7 +378,7 @@ config CRYPTO_MICHAEL_MIC
 
 config CRYPTO_CRC32C
 	tristate "CRC32c CRC algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	select LIBCRC32C
 	help
 	  Castagnoli, et al Cyclic Redundancy-Check Algorithm.  Used
@@ -375,10 +388,13 @@ config CRYPTO_CRC32C
 
 config CRYPTO_TEST
 	tristate "Testing module"
-	depends on CRYPTO && m
+	depends on m
+	select CRYPTO_ALGAPI
 	help
 	  Quick & dirty crypto test module.
 
 source "drivers/crypto/Kconfig"
-endmenu
 
+endif	# if CRYPTO
+
+endmenu
diff --git a/crypto/Makefile b/crypto/Makefile
index fe934f1001c64..6d51f80753a13 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -2,10 +2,11 @@
 # Cryptographic API
 #
 
-proc-crypto-$(CONFIG_PROC_FS) = proc.o
+obj-$(CONFIG_CRYPTO) += api.o scatterwalk.o cipher.o digest.o compress.o
 
-obj-$(CONFIG_CRYPTO) += api.o scatterwalk.o cipher.o digest.o compress.o \
-			$(proc-crypto-y)
+crypto_algapi-$(CONFIG_PROC_FS) += proc.o
+crypto_algapi-objs := algapi.o $(crypto_algapi-y)
+obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o
 
 obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
 obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o
diff --git a/crypto/algapi.c b/crypto/algapi.c
new file mode 100644
index 0000000000000..a65c6ccfbe17a
--- /dev/null
+++ b/crypto/algapi.c
@@ -0,0 +1,118 @@
+/*
+ * Cryptographic API for algorithms (i.e., low-level API).
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include "internal.h"
+
+static inline int crypto_set_driver_name(struct crypto_alg *alg)
+{
+	static const char suffix[] = "-generic";
+	char *driver_name = alg->cra_driver_name;
+	int len;
+
+	if (*driver_name)
+		return 0;
+
+	len = strlcpy(driver_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
+	if (len + sizeof(suffix) > CRYPTO_MAX_ALG_NAME)
+		return -ENAMETOOLONG;
+
+	memcpy(driver_name + len, suffix, sizeof(suffix));
+	return 0;
+}
+
+int crypto_register_alg(struct crypto_alg *alg)
+{
+	int ret;
+	struct crypto_alg *q;
+
+	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
+		return -EINVAL;
+
+	if (alg->cra_alignmask & alg->cra_blocksize)
+		return -EINVAL;
+
+	if (alg->cra_blocksize > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	if (alg->cra_priority < 0)
+		return -EINVAL;
+	
+	ret = crypto_set_driver_name(alg);
+	if (unlikely(ret))
+		return ret;
+
+	down_write(&crypto_alg_sem);
+	
+	list_for_each_entry(q, &crypto_alg_list, cra_list) {
+		if (q == alg) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	
+	list_add(&alg->cra_list, &crypto_alg_list);
+	atomic_set(&alg->cra_refcnt, 1);
+out:	
+	up_write(&crypto_alg_sem);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_alg);
+
+int crypto_unregister_alg(struct crypto_alg *alg)
+{
+	int ret = -ENOENT;
+	struct crypto_alg *q;
+	
+	down_write(&crypto_alg_sem);
+	list_for_each_entry(q, &crypto_alg_list, cra_list) {
+		if (alg == q) {
+			list_del(&alg->cra_list);
+			ret = 0;
+			goto out;
+		}
+	}
+out:	
+	up_write(&crypto_alg_sem);
+
+	if (ret)
+		return ret;
+
+	BUG_ON(atomic_read(&alg->cra_refcnt) != 1);
+	if (alg->cra_destroy)
+		alg->cra_destroy(alg);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_alg);
+
+static int __init crypto_algapi_init(void)
+{
+	crypto_init_proc();
+	return 0;
+}
+
+static void __exit crypto_algapi_exit(void)
+{
+	crypto_exit_proc();
+}
+
+module_init(crypto_algapi_init);
+module_exit(crypto_algapi_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Cryptographic algorithms API");
diff --git a/crypto/api.c b/crypto/api.c
index 5994a58ef954e..c922090b48428 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -15,19 +15,17 @@
  *
  */
 
-#include <linux/compiler.h>
-#include <linux/init.h>
-#include <linux/crypto.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/kmod.h>
-#include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include "internal.h"
 
 LIST_HEAD(crypto_alg_list);
+EXPORT_SYMBOL_GPL(crypto_alg_list);
 DECLARE_RWSEM(crypto_alg_sem);
+EXPORT_SYMBOL_GPL(crypto_alg_sem);
 
 static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
 {
@@ -239,86 +237,6 @@ void crypto_free_tfm(struct crypto_tfm *tfm)
 	kfree(tfm);
 }
 
-static inline int crypto_set_driver_name(struct crypto_alg *alg)
-{
-	static const char suffix[] = "-generic";
-	char *driver_name = alg->cra_driver_name;
-	int len;
-
-	if (*driver_name)
-		return 0;
-
-	len = strlcpy(driver_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
-	if (len + sizeof(suffix) > CRYPTO_MAX_ALG_NAME)
-		return -ENAMETOOLONG;
-
-	memcpy(driver_name + len, suffix, sizeof(suffix));
-	return 0;
-}
-
-int crypto_register_alg(struct crypto_alg *alg)
-{
-	int ret;
-	struct crypto_alg *q;
-
-	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
-		return -EINVAL;
-
-	if (alg->cra_alignmask & alg->cra_blocksize)
-		return -EINVAL;
-
-	if (alg->cra_blocksize > PAGE_SIZE / 8)
-		return -EINVAL;
-
-	if (alg->cra_priority < 0)
-		return -EINVAL;
-	
-	ret = crypto_set_driver_name(alg);
-	if (unlikely(ret))
-		return ret;
-
-	down_write(&crypto_alg_sem);
-	
-	list_for_each_entry(q, &crypto_alg_list, cra_list) {
-		if (q == alg) {
-			ret = -EEXIST;
-			goto out;
-		}
-	}
-	
-	list_add(&alg->cra_list, &crypto_alg_list);
-	atomic_set(&alg->cra_refcnt, 1);
-out:	
-	up_write(&crypto_alg_sem);
-	return ret;
-}
-
-int crypto_unregister_alg(struct crypto_alg *alg)
-{
-	int ret = -ENOENT;
-	struct crypto_alg *q;
-	
-	down_write(&crypto_alg_sem);
-	list_for_each_entry(q, &crypto_alg_list, cra_list) {
-		if (alg == q) {
-			list_del(&alg->cra_list);
-			ret = 0;
-			goto out;
-		}
-	}
-out:	
-	up_write(&crypto_alg_sem);
-
-	if (ret)
-		return ret;
-
-	BUG_ON(atomic_read(&alg->cra_refcnt) != 1);
-	if (alg->cra_destroy)
-		alg->cra_destroy(alg);
-
-	return 0;
-}
-
 int crypto_alg_available(const char *name, u32 flags)
 {
 	int ret = 0;
@@ -332,17 +250,6 @@ int crypto_alg_available(const char *name, u32 flags)
 	return ret;
 }
 
-static int __init init_crypto(void)
-{
-	printk(KERN_INFO "Initializing Cryptographic API\n");
-	crypto_init_proc();
-	return 0;
-}
-
-__initcall(init_crypto);
-
-EXPORT_SYMBOL_GPL(crypto_register_alg);
-EXPORT_SYMBOL_GPL(crypto_unregister_alg);
 EXPORT_SYMBOL_GPL(crypto_alloc_tfm);
 EXPORT_SYMBOL_GPL(crypto_free_tfm);
 EXPORT_SYMBOL_GPL(crypto_alg_available);
diff --git a/crypto/internal.h b/crypto/internal.h
index 959e602909a60..26f47d3315519 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -12,7 +12,8 @@
  */
 #ifndef _CRYPTO_INTERNAL_H
 #define _CRYPTO_INTERNAL_H
-#include <linux/crypto.h>
+
+#include <crypto/algapi.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/interrupt.h>
@@ -64,9 +65,12 @@ static inline void crypto_free_hmac_block(struct crypto_tfm *tfm)
 
 #ifdef CONFIG_PROC_FS
 void __init crypto_init_proc(void);
+void __exit crypto_exit_proc(void);
 #else
 static inline void crypto_init_proc(void)
 { }
+static inline void crypto_exit_proc(void)
+{ }
 #endif
 
 static inline unsigned int crypto_digest_ctxsize(struct crypto_alg *alg,
diff --git a/crypto/proc.c b/crypto/proc.c
index 8543b7a157d6a..9e573b17e8879 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -113,3 +113,8 @@ void __init crypto_init_proc(void)
 	if (proc)
 		proc->proc_fops = &proc_crypto_ops;
 }
+
+void __exit crypto_exit_proc(void)
+{
+	remove_proc_entry("crypto", NULL);
+}
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 4263935443cc6..ba23683ab8c48 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -2,7 +2,8 @@ menu "Hardware crypto devices"
 
 config CRYPTO_DEV_PADLOCK
 	tristate "Support for VIA PadLock ACE"
-	depends on CRYPTO && X86_32
+	depends on X86_32
+	select CRYPTO_ALGAPI
 	help
 	  Some VIA processors come with an integrated crypto engine
 	  (so called VIA PadLock ACE, Advanced Cryptography Engine)
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
new file mode 100644
index 0000000000000..ed68d494b3647
--- /dev/null
+++ b/include/crypto/algapi.h
@@ -0,0 +1,18 @@
+/*
+ * Cryptographic API for algorithms (i.e., low-level API).
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ALGAPI_H
+#define _CRYPTO_ALGAPI_H
+
+#include <linux/crypto.h>
+
+#endif	/* _CRYPTO_ALGAPI_H */
+
-- 
GitLab


From 4cc7720cd165273b08a72b4193146dffee58e34b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 6 Aug 2006 21:16:34 +1000
Subject: [PATCH 0284/1063] [CRYPTO] api: Add template registration

A crypto_template generates a crypto_alg object when given a set of
parameters.  this patch adds the basic data structure fo templates
and code to handle their registration/deregistration.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/algapi.c         | 156 ++++++++++++++++++++++++++++++++++------
 crypto/internal.h       |  17 +++++
 include/crypto/algapi.h |  31 ++++++++
 3 files changed, 182 insertions(+), 22 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index a65c6ccfbe17a..232b37d816136 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -13,11 +13,14 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/list.h>
 #include <linux/module.h>
 #include <linux/string.h>
 
 #include "internal.h"
 
+static LIST_HEAD(crypto_template_list);
+
 static inline int crypto_set_driver_name(struct crypto_alg *alg)
 {
 	static const char suffix[] = "-generic";
@@ -35,11 +38,8 @@ static inline int crypto_set_driver_name(struct crypto_alg *alg)
 	return 0;
 }
 
-int crypto_register_alg(struct crypto_alg *alg)
+static int crypto_check_alg(struct crypto_alg *alg)
 {
-	int ret;
-	struct crypto_alg *q;
-
 	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
 		return -EINVAL;
 
@@ -51,42 +51,52 @@ int crypto_register_alg(struct crypto_alg *alg)
 
 	if (alg->cra_priority < 0)
 		return -EINVAL;
-	
-	ret = crypto_set_driver_name(alg);
-	if (unlikely(ret))
-		return ret;
 
-	down_write(&crypto_alg_sem);
-	
+	return crypto_set_driver_name(alg);
+}
+
+static int __crypto_register_alg(struct crypto_alg *alg)
+{
+	struct crypto_alg *q;
+	int ret = -EEXIST;
+
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
-		if (q == alg) {
-			ret = -EEXIST;
+		if (q == alg)
 			goto out;
-		}
 	}
 	
 	list_add(&alg->cra_list, &crypto_alg_list);
 	atomic_set(&alg->cra_refcnt, 1);
+	ret = 0;
 out:	
-	up_write(&crypto_alg_sem);
 	return ret;
 }
+
+int crypto_register_alg(struct crypto_alg *alg)
+{
+	int err;
+
+	err = crypto_check_alg(alg);
+	if (err)
+		return err;
+
+	down_write(&crypto_alg_sem);
+	err = __crypto_register_alg(alg);
+	up_write(&crypto_alg_sem);
+
+	return err;
+}
 EXPORT_SYMBOL_GPL(crypto_register_alg);
 
 int crypto_unregister_alg(struct crypto_alg *alg)
 {
 	int ret = -ENOENT;
-	struct crypto_alg *q;
 	
 	down_write(&crypto_alg_sem);
-	list_for_each_entry(q, &crypto_alg_list, cra_list) {
-		if (alg == q) {
-			list_del(&alg->cra_list);
-			ret = 0;
-			goto out;
-		}
+	if (likely(!list_empty(&alg->cra_list))) {
+		list_del_init(&alg->cra_list);
+		ret = 0;
 	}
-out:	
 	up_write(&crypto_alg_sem);
 
 	if (ret)
@@ -100,6 +110,108 @@ int crypto_unregister_alg(struct crypto_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_alg);
 
+int crypto_register_template(struct crypto_template *tmpl)
+{
+	struct crypto_template *q;
+	int err = -EEXIST;
+
+	down_write(&crypto_alg_sem);
+
+	list_for_each_entry(q, &crypto_template_list, list) {
+		if (q == tmpl)
+			goto out;
+	}
+
+	list_add(&tmpl->list, &crypto_template_list);
+	err = 0;
+out:
+	up_write(&crypto_alg_sem);
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_register_template);
+
+void crypto_unregister_template(struct crypto_template *tmpl)
+{
+	struct crypto_instance *inst;
+	struct hlist_node *p, *n;
+	struct hlist_head *list;
+
+	down_write(&crypto_alg_sem);
+
+	BUG_ON(list_empty(&tmpl->list));
+	list_del_init(&tmpl->list);
+
+	list = &tmpl->instances;
+	hlist_for_each_entry(inst, p, list, list) {
+		BUG_ON(list_empty(&inst->alg.cra_list));
+		list_del_init(&inst->alg.cra_list);
+	}
+
+	up_write(&crypto_alg_sem);
+
+	hlist_for_each_entry_safe(inst, p, n, list, list) {
+		BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1);
+		tmpl->free(inst);
+	}
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_template);
+
+static struct crypto_template *__crypto_lookup_template(const char *name)
+{
+	struct crypto_template *q, *tmpl = NULL;
+
+	down_read(&crypto_alg_sem);
+	list_for_each_entry(q, &crypto_template_list, list) {
+		if (strcmp(q->name, name))
+			continue;
+		if (unlikely(!crypto_tmpl_get(q)))
+			continue;
+
+		tmpl = q;
+		break;
+	}
+	up_read(&crypto_alg_sem);
+
+	return tmpl;
+}
+
+struct crypto_template *crypto_lookup_template(const char *name)
+{
+	return try_then_request_module(__crypto_lookup_template(name), name);
+}
+EXPORT_SYMBOL_GPL(crypto_lookup_template);
+
+int crypto_register_instance(struct crypto_template *tmpl,
+			     struct crypto_instance *inst)
+{
+	int err = -EINVAL;
+
+	if (inst->alg.cra_destroy)
+		goto err;
+
+	err = crypto_check_alg(&inst->alg);
+	if (err)
+		goto err;
+
+	inst->alg.cra_module = tmpl->module;
+
+	down_write(&crypto_alg_sem);
+
+	err = __crypto_register_alg(&inst->alg);
+	if (err)
+		goto unlock;
+
+	hlist_add_head(&inst->list, &tmpl->instances);
+	inst->tmpl = tmpl;
+
+unlock:
+	up_write(&crypto_alg_sem);
+
+err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_register_instance);
+
 static int __init crypto_algapi_init(void)
 {
 	crypto_init_proc();
diff --git a/crypto/internal.h b/crypto/internal.h
index 26f47d3315519..c3ab4a950f304 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -19,11 +19,15 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <asm/kmap_types.h>
 
+struct crypto_instance;
+struct crypto_template;
+
 extern struct list_head crypto_alg_list;
 extern struct rw_semaphore crypto_alg_sem;
 
@@ -112,5 +116,18 @@ void crypto_exit_digest_ops(struct crypto_tfm *tfm);
 void crypto_exit_cipher_ops(struct crypto_tfm *tfm);
 void crypto_exit_compress_ops(struct crypto_tfm *tfm);
 
+int crypto_register_instance(struct crypto_template *tmpl,
+			     struct crypto_instance *inst);
+
+static inline int crypto_tmpl_get(struct crypto_template *tmpl)
+{
+	return try_module_get(tmpl->module);
+}
+
+static inline void crypto_tmpl_put(struct crypto_template *tmpl)
+{
+	module_put(tmpl->module);
+}
+
 #endif	/* _CRYPTO_INTERNAL_H */
 
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index ed68d494b3647..ffec530d52fb5 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -14,5 +14,36 @@
 
 #include <linux/crypto.h>
 
+struct module;
+
+struct crypto_instance {
+	struct crypto_alg alg;
+
+	struct crypto_template *tmpl;
+	struct hlist_node list;
+
+	void *__ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+struct crypto_template {
+	struct list_head list;
+	struct hlist_head instances;
+	struct module *module;
+
+	struct crypto_instance *(*alloc)(void *param, unsigned int len);
+	void (*free)(struct crypto_instance *inst);
+
+	char name[CRYPTO_MAX_ALG_NAME];
+};
+
+int crypto_register_template(struct crypto_template *tmpl);
+void crypto_unregister_template(struct crypto_template *tmpl);
+struct crypto_template *crypto_lookup_template(const char *name);
+
+static inline void *crypto_instance_ctx(struct crypto_instance *inst)
+{
+	return inst->__ctx;
+}
+
 #endif	/* _CRYPTO_ALGAPI_H */
 
-- 
GitLab


From 2825982d9d66ebba4b532a07391dfbb357f71c5f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 6 Aug 2006 21:23:26 +1000
Subject: [PATCH 0285/1063] [CRYPTO] api: Added event notification

This patch adds a notifier chain for algorithm/template registration events.
This will be used to register compound algorithms such as cbc(aes).  In
future this will also be passed onto user-space through netlink.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/algapi.c        |  50 ++++++++++++++++-
 crypto/api.c           | 122 +++++++++++++++++++++++++++++++++++++----
 crypto/internal.h      |  37 +++++++++++++
 include/linux/crypto.h |   4 +-
 4 files changed, 199 insertions(+), 14 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 232b37d816136..f0df85fc1f50b 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -21,6 +21,24 @@
 
 static LIST_HEAD(crypto_template_list);
 
+void crypto_larval_error(const char *name)
+{
+	struct crypto_alg *alg;
+
+	down_read(&crypto_alg_sem);
+	alg = __crypto_alg_lookup(name);
+	up_read(&crypto_alg_sem);
+
+	if (alg) {
+		if (crypto_is_larval(alg)) {
+			struct crypto_larval *larval = (void *)alg;
+			complete(&larval->completion);
+		}
+		crypto_mod_put(alg);
+	}
+}
+EXPORT_SYMBOL_GPL(crypto_larval_error);
+
 static inline int crypto_set_driver_name(struct crypto_alg *alg)
 {
 	static const char suffix[] = "-generic";
@@ -60,14 +78,27 @@ static int __crypto_register_alg(struct crypto_alg *alg)
 	struct crypto_alg *q;
 	int ret = -EEXIST;
 
+	atomic_set(&alg->cra_refcnt, 1);
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
 		if (q == alg)
 			goto out;
+		if (crypto_is_larval(q) &&
+		    (!strcmp(alg->cra_name, q->cra_name) ||
+		     !strcmp(alg->cra_driver_name, q->cra_name))) {
+			struct crypto_larval *larval = (void *)q;
+
+			if (!crypto_mod_get(alg))
+				continue;
+			larval->adult = alg;
+			complete(&larval->completion);
+		}
 	}
 	
 	list_add(&alg->cra_list, &crypto_alg_list);
-	atomic_set(&alg->cra_refcnt, 1);
+
+	crypto_notify(CRYPTO_MSG_ALG_REGISTER, alg);
 	ret = 0;
+
 out:	
 	return ret;
 }
@@ -97,6 +128,7 @@ int crypto_unregister_alg(struct crypto_alg *alg)
 		list_del_init(&alg->cra_list);
 		ret = 0;
 	}
+	crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg);
 	up_write(&crypto_alg_sem);
 
 	if (ret)
@@ -123,6 +155,7 @@ int crypto_register_template(struct crypto_template *tmpl)
 	}
 
 	list_add(&tmpl->list, &crypto_template_list);
+	crypto_notify(CRYPTO_MSG_TMPL_REGISTER, tmpl);
 	err = 0;
 out:
 	up_write(&crypto_alg_sem);
@@ -145,8 +178,11 @@ void crypto_unregister_template(struct crypto_template *tmpl)
 	hlist_for_each_entry(inst, p, list, list) {
 		BUG_ON(list_empty(&inst->alg.cra_list));
 		list_del_init(&inst->alg.cra_list);
+		crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg);
 	}
 
+	crypto_notify(CRYPTO_MSG_TMPL_UNREGISTER, tmpl);
+
 	up_write(&crypto_alg_sem);
 
 	hlist_for_each_entry_safe(inst, p, n, list, list) {
@@ -212,6 +248,18 @@ int crypto_register_instance(struct crypto_template *tmpl,
 }
 EXPORT_SYMBOL_GPL(crypto_register_instance);
 
+int crypto_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&crypto_chain, nb);
+}
+EXPORT_SYMBOL_GPL(crypto_register_notifier);
+
+int crypto_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&crypto_chain, nb);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
+
 static int __init crypto_algapi_init(void)
 {
 	crypto_init_proc();
diff --git a/crypto/api.c b/crypto/api.c
index c922090b48428..5a0d6a17cfd70 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/kmod.h>
+#include <linux/param.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include "internal.h"
@@ -27,6 +28,9 @@ EXPORT_SYMBOL_GPL(crypto_alg_list);
 DECLARE_RWSEM(crypto_alg_sem);
 EXPORT_SYMBOL_GPL(crypto_alg_sem);
 
+BLOCKING_NOTIFIER_HEAD(crypto_chain);
+EXPORT_SYMBOL_GPL(crypto_chain);
+
 static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
 {
 	atomic_inc(&alg->cra_refcnt);
@@ -39,27 +43,24 @@ static inline void crypto_alg_put(struct crypto_alg *alg)
 		alg->cra_destroy(alg);
 }
 
-static struct crypto_alg *crypto_mod_get(struct crypto_alg *alg)
+struct crypto_alg *crypto_mod_get(struct crypto_alg *alg)
 {
 	return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL;
 }
+EXPORT_SYMBOL_GPL(crypto_mod_get);
 
-static void crypto_mod_put(struct crypto_alg *alg)
+void crypto_mod_put(struct crypto_alg *alg)
 {
 	crypto_alg_put(alg);
 	module_put(alg->cra_module);
 }
+EXPORT_SYMBOL_GPL(crypto_mod_put);
 
-static struct crypto_alg *crypto_alg_lookup(const char *name)
+struct crypto_alg *__crypto_alg_lookup(const char *name)
 {
 	struct crypto_alg *q, *alg = NULL;
-	int best = -1;
+	int best = -2;
 
-	if (!name)
-		return NULL;
-	
-	down_read(&crypto_alg_sem);
-	
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
 		int exact, fuzzy;
 
@@ -79,16 +80,113 @@ static struct crypto_alg *crypto_alg_lookup(const char *name)
 		if (exact)
 			break;
 	}
-	
+
+	return alg;
+}
+EXPORT_SYMBOL_GPL(__crypto_alg_lookup);
+
+static void crypto_larval_destroy(struct crypto_alg *alg)
+{
+	struct crypto_larval *larval = (void *)alg;
+
+	BUG_ON(!crypto_is_larval(alg));
+	if (larval->adult)
+		crypto_mod_put(larval->adult);
+	kfree(larval);
+}
+
+static struct crypto_alg *crypto_larval_alloc(const char *name)
+{
+	struct crypto_alg *alg;
+	struct crypto_larval *larval;
+
+	larval = kzalloc(sizeof(*larval), GFP_KERNEL);
+	if (!larval)
+		return NULL;
+
+	larval->alg.cra_flags = CRYPTO_ALG_LARVAL;
+	larval->alg.cra_priority = -1;
+	larval->alg.cra_destroy = crypto_larval_destroy;
+
+	atomic_set(&larval->alg.cra_refcnt, 2);
+	strlcpy(larval->alg.cra_name, name, CRYPTO_MAX_ALG_NAME);
+	init_completion(&larval->completion);
+
+	down_write(&crypto_alg_sem);
+	alg = __crypto_alg_lookup(name);
+	if (!alg) {
+		alg = &larval->alg;
+		list_add(&alg->cra_list, &crypto_alg_list);
+	}
+	up_write(&crypto_alg_sem);
+
+	if (alg != &larval->alg)
+		kfree(larval);
+
+	return alg;
+}
+
+static void crypto_larval_kill(struct crypto_alg *alg)
+{
+	struct crypto_larval *larval = (void *)alg;
+
+	down_write(&crypto_alg_sem);
+	list_del(&alg->cra_list);
+	up_write(&crypto_alg_sem);
+	complete(&larval->completion);
+	crypto_alg_put(alg);
+}
+
+static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
+{
+	struct crypto_larval *larval = (void *)alg;
+
+	wait_for_completion_interruptible_timeout(&larval->completion, 60 * HZ);
+	alg = larval->adult;
+	if (alg && !crypto_mod_get(alg))
+		alg = NULL;
+	crypto_mod_put(&larval->alg);
+
+	return alg;
+}
+
+static struct crypto_alg *crypto_alg_lookup(const char *name)
+{
+	struct crypto_alg *alg;
+
+	if (!name)
+		return NULL;
+
+	down_read(&crypto_alg_sem);
+	alg = __crypto_alg_lookup(name);
 	up_read(&crypto_alg_sem);
+
 	return alg;
 }
 
 /* A far more intelligent version of this is planned.  For now, just
  * try an exact match on the name of the algorithm. */
-static inline struct crypto_alg *crypto_alg_mod_lookup(const char *name)
+static struct crypto_alg *crypto_alg_mod_lookup(const char *name)
 {
-	return try_then_request_module(crypto_alg_lookup(name), name);
+	struct crypto_alg *alg;
+	struct crypto_alg *larval;
+
+	alg = try_then_request_module(crypto_alg_lookup(name), name);
+	if (alg)
+		return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg;
+
+	larval = crypto_larval_alloc(name);
+	if (!larval || !crypto_is_larval(larval))
+		return larval;
+
+	if (crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval) == NOTIFY_STOP)
+		alg = crypto_larval_wait(larval);
+	else {
+		crypto_mod_put(larval);
+		alg = NULL;
+	}
+	crypto_larval_kill(larval);
+	return alg;
 }
 
 static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags)
diff --git a/crypto/internal.h b/crypto/internal.h
index c3ab4a950f304..3a08d25fba452 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -14,6 +14,7 @@
 #define _CRYPTO_INTERNAL_H
 
 #include <crypto/algapi.h>
+#include <linux/completion.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/interrupt.h>
@@ -21,15 +22,32 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/notifier.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <asm/kmap_types.h>
 
+/* Crypto notification events. */
+enum {
+	CRYPTO_MSG_ALG_REQUEST,
+	CRYPTO_MSG_ALG_REGISTER,
+	CRYPTO_MSG_ALG_UNREGISTER,
+	CRYPTO_MSG_TMPL_REGISTER,
+	CRYPTO_MSG_TMPL_UNREGISTER,
+};
+
 struct crypto_instance;
 struct crypto_template;
 
+struct crypto_larval {
+	struct crypto_alg alg;
+	struct crypto_alg *adult;
+	struct completion completion;
+};
+
 extern struct list_head crypto_alg_list;
 extern struct rw_semaphore crypto_alg_sem;
+extern struct blocking_notifier_head crypto_chain;
 
 extern enum km_type crypto_km_types[];
 
@@ -104,6 +122,10 @@ static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg,
 	return alg->cra_ctxsize;
 }
 
+struct crypto_alg *crypto_mod_get(struct crypto_alg *alg);
+void crypto_mod_put(struct crypto_alg *alg);
+struct crypto_alg *__crypto_alg_lookup(const char *name);
+
 int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags);
 int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags);
 int crypto_init_compress_flags(struct crypto_tfm *tfm, u32 flags);
@@ -116,9 +138,14 @@ void crypto_exit_digest_ops(struct crypto_tfm *tfm);
 void crypto_exit_cipher_ops(struct crypto_tfm *tfm);
 void crypto_exit_compress_ops(struct crypto_tfm *tfm);
 
+void crypto_larval_error(const char *name);
+
 int crypto_register_instance(struct crypto_template *tmpl,
 			     struct crypto_instance *inst);
 
+int crypto_register_notifier(struct notifier_block *nb);
+int crypto_unregister_notifier(struct notifier_block *nb);
+
 static inline int crypto_tmpl_get(struct crypto_template *tmpl)
 {
 	return try_module_get(tmpl->module);
@@ -129,5 +156,15 @@ static inline void crypto_tmpl_put(struct crypto_template *tmpl)
 	module_put(tmpl->module);
 }
 
+static inline int crypto_is_larval(struct crypto_alg *alg)
+{
+	return alg->cra_flags & CRYPTO_ALG_LARVAL;
+}
+
+static inline int crypto_notify(unsigned long val, void *v)
+{
+	return blocking_notifier_call_chain(&crypto_chain, val, v);
+}
+
 #endif	/* _CRYPTO_INTERNAL_H */
 
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 7f57ff8ec9752..3e3e95aff133d 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -29,11 +29,13 @@
 /*
  * Algorithm masks and types.
  */
-#define CRYPTO_ALG_TYPE_MASK		0x000000ff
+#define CRYPTO_ALG_TYPE_MASK		0x0000000f
 #define CRYPTO_ALG_TYPE_CIPHER		0x00000001
 #define CRYPTO_ALG_TYPE_DIGEST		0x00000002
 #define CRYPTO_ALG_TYPE_COMPRESS	0x00000004
 
+#define CRYPTO_ALG_LARVAL		0x00000010
+
 /*
  * Transform masks and values (for crt_flags).
  */
-- 
GitLab


From 2b8c19dbdc692e81243a328725a02efb77b144a5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 21 Sep 2006 11:31:44 +1000
Subject: [PATCH 0286/1063] [CRYPTO] api: Add cryptomgr

The cryptomgr module is a simple manager of crypto algorithm instances.
It ensures that parameterised algorithms of the type tmpl(alg) (e.g.,
cbc(aes)) are always created.

This is meant to satisfy the needs for most users.  For more complex
cases such as deeper combinations or multiple parameters, a netlink
module will be created which allows arbitrary expressions to be parsed
in user-space.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/Kconfig         |   8 +++
 crypto/Makefile        |   1 +
 crypto/api.c           |  10 ++-
 crypto/cryptomgr.c     | 146 +++++++++++++++++++++++++++++++++++++++++
 include/linux/crypto.h |   9 +++
 5 files changed, 173 insertions(+), 1 deletion(-)
 create mode 100644 crypto/cryptomgr.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index aabc631952226..4ce509dba329f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -16,6 +16,14 @@ config CRYPTO_ALGAPI
 	help
 	  This option provides the API for cryptographic algorithms.
 
+config CRYPTO_MANAGER
+	tristate "Cryptographic algorithm manager"
+	select CRYPTO_ALGAPI
+	default m
+	help
+	  Create default cryptographic template instantiations such as
+	  cbc(aes).
+
 config CRYPTO_HMAC
 	bool "HMAC support"
 	help
diff --git a/crypto/Makefile b/crypto/Makefile
index 6d51f80753a13..b8745f3d35950 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -8,6 +8,7 @@ crypto_algapi-$(CONFIG_PROC_FS) += proc.o
 crypto_algapi-objs := algapi.o $(crypto_algapi-y)
 obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o
 
+obj-$(CONFIG_CRYPTO_MANAGER) += cryptomgr.o
 obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
 obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o
 obj-$(CONFIG_CRYPTO_MD4) += md4.o
diff --git a/crypto/api.c b/crypto/api.c
index 5a0d6a17cfd70..67cd6f87b74a5 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/kmod.h>
+#include <linux/module.h>
 #include <linux/param.h>
 #include <linux/slab.h>
 #include <linux/string.h>
@@ -170,6 +171,7 @@ static struct crypto_alg *crypto_alg_mod_lookup(const char *name)
 {
 	struct crypto_alg *alg;
 	struct crypto_alg *larval;
+	int ok;
 
 	alg = try_then_request_module(crypto_alg_lookup(name), name);
 	if (alg)
@@ -179,7 +181,13 @@ static struct crypto_alg *crypto_alg_mod_lookup(const char *name)
 	if (!larval || !crypto_is_larval(larval))
 		return larval;
 
-	if (crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval) == NOTIFY_STOP)
+	ok = crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval);
+	if (ok == NOTIFY_DONE) {
+		request_module("cryptomgr");
+		ok = crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval);
+	}
+
+	if (ok == NOTIFY_STOP)
 		alg = crypto_larval_wait(larval);
 	else {
 		crypto_mod_put(larval);
diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c
new file mode 100644
index 0000000000000..e0ebe1b44b994
--- /dev/null
+++ b/crypto/cryptomgr.c
@@ -0,0 +1,146 @@
+/*
+ * Create default crypto algorithm instances.
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/crypto.h>
+#include <linux/ctype.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/rtnetlink.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
+
+#include "internal.h"
+
+struct cryptomgr_param {
+	struct work_struct work;
+
+	struct {
+		struct rtattr attr;
+		struct crypto_attr_alg data;
+	} alg;
+
+	struct {
+		char name[CRYPTO_MAX_ALG_NAME];
+	} larval;
+
+	char template[CRYPTO_MAX_ALG_NAME];
+};
+
+static void cryptomgr_probe(void *data)
+{
+	struct cryptomgr_param *param = data;
+	struct crypto_template *tmpl;
+	struct crypto_instance *inst;
+
+	tmpl = crypto_lookup_template(param->template);
+	if (!tmpl)
+		goto err;
+
+	inst = tmpl->alloc(&param->alg, sizeof(param->alg));
+	if (IS_ERR(inst))
+		goto err;
+	else if ((err = crypto_register_instance(tmpl, inst))) {
+		tmpl->free(inst);
+		goto err;
+	}
+
+	crypto_tmpl_put(tmpl);
+
+out:
+	kfree(param);
+	return;
+
+err:
+	crypto_larval_error(param->larval.name);
+	goto out;
+}
+
+static int cryptomgr_schedule_probe(struct crypto_larval *larval)
+{
+	struct cryptomgr_param *param;
+	const char *name = larval->alg.cra_name;
+	const char *p;
+	unsigned int len;
+
+	param = kmalloc(sizeof(*param), GFP_KERNEL);
+	if (!param)
+		goto err;
+
+	for (p = name; isalnum(*p) || *p == '-' || *p == '_'; p++)
+		;
+
+	len = p - name;
+	if (!len || *p != '(')
+		goto err_free_param;
+
+	memcpy(param->template, name, len);
+	param->template[len] = 0;
+
+	name = p + 1;
+	for (p = name; isalnum(*p) || *p == '-' || *p == '_'; p++)
+		;
+
+	len = p - name;
+	if (!len || *p != ')' || p[1])
+		goto err_free_param;
+
+	param->alg.attr.rta_len = sizeof(param->alg);
+	param->alg.attr.rta_type = CRYPTOA_ALG;
+	memcpy(param->alg.data.name, name, len);
+	param->alg.data.name[len] = 0;
+
+	memcpy(param->larval.name, larval->alg.cra_name, CRYPTO_MAX_ALG_NAME);
+
+	INIT_WORK(&param->work, cryptomgr_probe, param);
+	schedule_work(&param->work);
+
+	return NOTIFY_STOP;
+
+err_free_param:
+	kfree(param);
+err:
+	return NOTIFY_OK;
+}
+
+static int cryptomgr_notify(struct notifier_block *this, unsigned long msg,
+			    void *data)
+{
+	switch (msg) {
+	case CRYPTO_MSG_ALG_REQUEST:
+		return cryptomgr_schedule_probe(data);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cryptomgr_notifier = {
+	.notifier_call = cryptomgr_notify,
+};
+
+static int __init cryptomgr_init(void)
+{
+	return crypto_register_notifier(&cryptomgr_notifier);
+}
+
+static void __exit cryptomgr_exit(void)
+{
+	int err = crypto_unregister_notifier(&cryptomgr_notifier);
+	BUG_ON(err);
+}
+
+module_init(cryptomgr_init);
+module_exit(cryptomgr_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Crypto Algorithm Manager");
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 3e3e95aff133d..85f73c3819139 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -260,6 +260,15 @@ struct crypto_tfm {
 	void *__crt_ctx[] CRYPTO_MINALIGN_ATTR;
 };
 
+enum {
+	CRYPTOA_UNSPEC,
+	CRYPTOA_ALG,
+};
+
+struct crypto_attr_alg {
+	char name[CRYPTO_MAX_ALG_NAME];
+};
+
 /* 
  * Transform user interface.
  */
-- 
GitLab


From 492e2b63eb10c28f4f0b694264d74a8755cd1be0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 21 Sep 2006 11:35:17 +1000
Subject: [PATCH 0287/1063] [CRYPTO] api: Allow algorithm lookup by type

This patch also adds the infrastructure to pick an algorithm based on
their type.  For example, this allows you to select the encryption
algorithm "aes", instead of any algorithm registered under the name
"aes".  For now this is only accessible internally.  Eventually it
will be made available through crypto_alloc_tfm.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/algapi.c    |  6 ++++--
 crypto/api.c       | 39 ++++++++++++++++++++++++++-------------
 crypto/cryptomgr.c |  7 ++++++-
 crypto/internal.h  |  6 ++++--
 4 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index f0df85fc1f50b..acea250677c02 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -21,12 +21,12 @@
 
 static LIST_HEAD(crypto_template_list);
 
-void crypto_larval_error(const char *name)
+void crypto_larval_error(const char *name, u32 type, u32 mask)
 {
 	struct crypto_alg *alg;
 
 	down_read(&crypto_alg_sem);
-	alg = __crypto_alg_lookup(name);
+	alg = __crypto_alg_lookup(name, type, mask);
 	up_read(&crypto_alg_sem);
 
 	if (alg) {
@@ -87,6 +87,8 @@ static int __crypto_register_alg(struct crypto_alg *alg)
 		     !strcmp(alg->cra_driver_name, q->cra_name))) {
 			struct crypto_larval *larval = (void *)q;
 
+			if ((q->cra_flags ^ alg->cra_flags) & larval->mask)
+				continue;
 			if (!crypto_mod_get(alg))
 				continue;
 			larval->adult = alg;
diff --git a/crypto/api.c b/crypto/api.c
index 67cd6f87b74a5..ddf6a767acdd7 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -57,7 +57,7 @@ void crypto_mod_put(struct crypto_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_mod_put);
 
-struct crypto_alg *__crypto_alg_lookup(const char *name)
+struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask)
 {
 	struct crypto_alg *q, *alg = NULL;
 	int best = -2;
@@ -65,6 +65,13 @@ struct crypto_alg *__crypto_alg_lookup(const char *name)
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
 		int exact, fuzzy;
 
+		if ((q->cra_flags ^ type) & mask)
+			continue;
+
+		if (crypto_is_larval(q) &&
+		    ((struct crypto_larval *)q)->mask != mask)
+			continue;
+
 		exact = !strcmp(q->cra_driver_name, name);
 		fuzzy = !strcmp(q->cra_name, name);
 		if (!exact && !(fuzzy && q->cra_priority > best))
@@ -96,7 +103,8 @@ static void crypto_larval_destroy(struct crypto_alg *alg)
 	kfree(larval);
 }
 
-static struct crypto_alg *crypto_larval_alloc(const char *name)
+static struct crypto_alg *crypto_larval_alloc(const char *name, u32 type,
+					      u32 mask)
 {
 	struct crypto_alg *alg;
 	struct crypto_larval *larval;
@@ -105,7 +113,8 @@ static struct crypto_alg *crypto_larval_alloc(const char *name)
 	if (!larval)
 		return NULL;
 
-	larval->alg.cra_flags = CRYPTO_ALG_LARVAL;
+	larval->mask = mask;
+	larval->alg.cra_flags = CRYPTO_ALG_LARVAL | type;
 	larval->alg.cra_priority = -1;
 	larval->alg.cra_destroy = crypto_larval_destroy;
 
@@ -114,7 +123,7 @@ static struct crypto_alg *crypto_larval_alloc(const char *name)
 	init_completion(&larval->completion);
 
 	down_write(&crypto_alg_sem);
-	alg = __crypto_alg_lookup(name);
+	alg = __crypto_alg_lookup(name, type, mask);
 	if (!alg) {
 		alg = &larval->alg;
 		list_add(&alg->cra_list, &crypto_alg_list);
@@ -151,7 +160,8 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 	return alg;
 }
 
-static struct crypto_alg *crypto_alg_lookup(const char *name)
+static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
+					    u32 mask)
 {
 	struct crypto_alg *alg;
 
@@ -159,25 +169,27 @@ static struct crypto_alg *crypto_alg_lookup(const char *name)
 		return NULL;
 
 	down_read(&crypto_alg_sem);
-	alg = __crypto_alg_lookup(name);
+	alg = __crypto_alg_lookup(name, type, mask);
 	up_read(&crypto_alg_sem);
 
 	return alg;
 }
 
-/* A far more intelligent version of this is planned.  For now, just
- * try an exact match on the name of the algorithm. */
-static struct crypto_alg *crypto_alg_mod_lookup(const char *name)
+struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
 {
 	struct crypto_alg *alg;
 	struct crypto_alg *larval;
 	int ok;
 
-	alg = try_then_request_module(crypto_alg_lookup(name), name);
+	mask &= ~CRYPTO_ALG_LARVAL;
+	type &= mask;
+
+	alg = try_then_request_module(crypto_alg_lookup(name, type, mask),
+				      name);
 	if (alg)
 		return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg;
 
-	larval = crypto_larval_alloc(name);
+	larval = crypto_larval_alloc(name, type, mask);
 	if (!larval || !crypto_is_larval(larval))
 		return larval;
 
@@ -196,6 +208,7 @@ static struct crypto_alg *crypto_alg_mod_lookup(const char *name)
 	crypto_larval_kill(larval);
 	return alg;
 }
+EXPORT_SYMBOL_GPL(crypto_alg_mod_lookup);
 
 static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags)
 {
@@ -291,7 +304,7 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
 	struct crypto_alg *alg;
 	unsigned int tfm_size;
 
-	alg = crypto_alg_mod_lookup(name);
+	alg = crypto_alg_mod_lookup(name, 0, 0);
 	if (alg == NULL)
 		goto out;
 
@@ -346,7 +359,7 @@ void crypto_free_tfm(struct crypto_tfm *tfm)
 int crypto_alg_available(const char *name, u32 flags)
 {
 	int ret = 0;
-	struct crypto_alg *alg = crypto_alg_mod_lookup(name);
+	struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, 0);
 	
 	if (alg) {
 		crypto_mod_put(alg);
diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c
index e0ebe1b44b994..ae54942e3b310 100644
--- a/crypto/cryptomgr.c
+++ b/crypto/cryptomgr.c
@@ -31,6 +31,8 @@ struct cryptomgr_param {
 	} alg;
 
 	struct {
+		u32 type;
+		u32 mask;
 		char name[CRYPTO_MAX_ALG_NAME];
 	} larval;
 
@@ -62,7 +64,8 @@ static void cryptomgr_probe(void *data)
 	return;
 
 err:
-	crypto_larval_error(param->larval.name);
+	crypto_larval_error(param->larval.name, param->larval.type,
+			    param->larval.mask);
 	goto out;
 }
 
@@ -101,6 +104,8 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
 	param->alg.data.name[len] = 0;
 
 	memcpy(param->larval.name, larval->alg.cra_name, CRYPTO_MAX_ALG_NAME);
+	param->larval.type = larval->alg.cra_flags;
+	param->larval.mask = larval->mask;
 
 	INIT_WORK(&param->work, cryptomgr_probe, param);
 	schedule_work(&param->work);
diff --git a/crypto/internal.h b/crypto/internal.h
index 3a08d25fba452..c08d93bdadc45 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -43,6 +43,7 @@ struct crypto_larval {
 	struct crypto_alg alg;
 	struct crypto_alg *adult;
 	struct completion completion;
+	u32 mask;
 };
 
 extern struct list_head crypto_alg_list;
@@ -124,7 +125,8 @@ static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg,
 
 struct crypto_alg *crypto_mod_get(struct crypto_alg *alg);
 void crypto_mod_put(struct crypto_alg *alg);
-struct crypto_alg *__crypto_alg_lookup(const char *name);
+struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask);
+struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
 
 int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags);
 int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags);
@@ -138,7 +140,7 @@ void crypto_exit_digest_ops(struct crypto_tfm *tfm);
 void crypto_exit_cipher_ops(struct crypto_tfm *tfm);
 void crypto_exit_compress_ops(struct crypto_tfm *tfm);
 
-void crypto_larval_error(const char *name);
+void crypto_larval_error(const char *name, u32 type, u32 mask);
 
 int crypto_register_instance(struct crypto_template *tmpl,
 			     struct crypto_instance *inst);
-- 
GitLab


From 6bfd48096ff8ecabf955958b51ddfa7988eb0a14 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 21 Sep 2006 11:39:29 +1000
Subject: [PATCH 0288/1063] [CRYPTO] api: Added spawns

Spawns lock a specific crypto algorithm in place.  They can then be used
with crypto_spawn_tfm to allocate a tfm for that algorithm.  When the base
algorithm of a spawn is deregistered, all its spawns will be automatically
removed.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/algapi.c         | 185 ++++++++++++++++++++++++++++++++++++----
 crypto/api.c            |  95 ++++++++++++++-------
 crypto/cryptomgr.c      |  19 +++--
 crypto/internal.h       |  19 +++++
 include/crypto/algapi.h |  11 +++
 include/linux/crypto.h  |   4 +
 6 files changed, 280 insertions(+), 53 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index acea250677c02..36c4f1bdb5214 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -10,6 +10,7 @@
  *
  */
 
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -73,27 +74,96 @@ static int crypto_check_alg(struct crypto_alg *alg)
 	return crypto_set_driver_name(alg);
 }
 
-static int __crypto_register_alg(struct crypto_alg *alg)
+static void crypto_destroy_instance(struct crypto_alg *alg)
+{
+	struct crypto_instance *inst = (void *)alg;
+	struct crypto_template *tmpl = inst->tmpl;
+
+	tmpl->free(inst);
+	crypto_tmpl_put(tmpl);
+}
+
+static void crypto_remove_spawns(struct list_head *spawns,
+				 struct list_head *list)
+{
+	struct crypto_spawn *spawn, *n;
+
+	list_for_each_entry_safe(spawn, n, spawns, list) {
+		struct crypto_instance *inst = spawn->inst;
+		struct crypto_template *tmpl = inst->tmpl;
+
+		list_del_init(&spawn->list);
+		spawn->alg = NULL;
+
+		if (crypto_is_dead(&inst->alg))
+			continue;
+
+		inst->alg.cra_flags |= CRYPTO_ALG_DEAD;
+		if (!tmpl || !crypto_tmpl_get(tmpl))
+			continue;
+
+		crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg);
+		list_move(&inst->alg.cra_list, list);
+		hlist_del(&inst->list);
+		inst->alg.cra_destroy = crypto_destroy_instance;
+
+		if (!list_empty(&inst->alg.cra_users)) {
+			if (&n->list == spawns)
+				n = list_entry(inst->alg.cra_users.next,
+					       typeof(*n), list);
+			__list_splice(&inst->alg.cra_users, spawns->prev);
+		}
+	}
+}
+
+static int __crypto_register_alg(struct crypto_alg *alg,
+				 struct list_head *list)
 {
 	struct crypto_alg *q;
-	int ret = -EEXIST;
+	int ret = -EAGAIN;
+
+	if (crypto_is_dead(alg))
+		goto out;
+
+	INIT_LIST_HEAD(&alg->cra_users);
+
+	ret = -EEXIST;
 
 	atomic_set(&alg->cra_refcnt, 1);
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
 		if (q == alg)
 			goto out;
-		if (crypto_is_larval(q) &&
-		    (!strcmp(alg->cra_name, q->cra_name) ||
-		     !strcmp(alg->cra_driver_name, q->cra_name))) {
+
+		if (crypto_is_moribund(q))
+			continue;
+
+		if (crypto_is_larval(q)) {
 			struct crypto_larval *larval = (void *)q;
 
+			if (strcmp(alg->cra_name, q->cra_name) &&
+			    strcmp(alg->cra_driver_name, q->cra_name))
+				continue;
+
+			if (larval->adult)
+				continue;
 			if ((q->cra_flags ^ alg->cra_flags) & larval->mask)
 				continue;
 			if (!crypto_mod_get(alg))
 				continue;
+
 			larval->adult = alg;
 			complete(&larval->completion);
+			continue;
 		}
+
+		if (strcmp(alg->cra_name, q->cra_name))
+			continue;
+
+		if (strcmp(alg->cra_driver_name, q->cra_driver_name) &&
+		    q->cra_priority > alg->cra_priority)
+			continue;
+
+		crypto_remove_spawns(&q->cra_users, list);
 	}
 	
 	list_add(&alg->cra_list, &crypto_alg_list);
@@ -105,8 +175,20 @@ static int __crypto_register_alg(struct crypto_alg *alg)
 	return ret;
 }
 
+static void crypto_remove_final(struct list_head *list)
+{
+	struct crypto_alg *alg;
+	struct crypto_alg *n;
+
+	list_for_each_entry_safe(alg, n, list, cra_list) {
+		list_del_init(&alg->cra_list);
+		crypto_alg_put(alg);
+	}
+}
+
 int crypto_register_alg(struct crypto_alg *alg)
 {
+	LIST_HEAD(list);
 	int err;
 
 	err = crypto_check_alg(alg);
@@ -114,23 +196,35 @@ int crypto_register_alg(struct crypto_alg *alg)
 		return err;
 
 	down_write(&crypto_alg_sem);
-	err = __crypto_register_alg(alg);
+	err = __crypto_register_alg(alg, &list);
 	up_write(&crypto_alg_sem);
 
+	crypto_remove_final(&list);
 	return err;
 }
 EXPORT_SYMBOL_GPL(crypto_register_alg);
 
+static int crypto_remove_alg(struct crypto_alg *alg, struct list_head *list)
+{
+	if (unlikely(list_empty(&alg->cra_list)))
+		return -ENOENT;
+
+	alg->cra_flags |= CRYPTO_ALG_DEAD;
+
+	crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg);
+	list_del_init(&alg->cra_list);
+	crypto_remove_spawns(&alg->cra_users, list);
+
+	return 0;
+}
+
 int crypto_unregister_alg(struct crypto_alg *alg)
 {
-	int ret = -ENOENT;
+	int ret;
+	LIST_HEAD(list);
 	
 	down_write(&crypto_alg_sem);
-	if (likely(!list_empty(&alg->cra_list))) {
-		list_del_init(&alg->cra_list);
-		ret = 0;
-	}
-	crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg);
+	ret = crypto_remove_alg(alg, &list);
 	up_write(&crypto_alg_sem);
 
 	if (ret)
@@ -140,6 +234,7 @@ int crypto_unregister_alg(struct crypto_alg *alg)
 	if (alg->cra_destroy)
 		alg->cra_destroy(alg);
 
+	crypto_remove_final(&list);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_alg);
@@ -170,6 +265,7 @@ void crypto_unregister_template(struct crypto_template *tmpl)
 	struct crypto_instance *inst;
 	struct hlist_node *p, *n;
 	struct hlist_head *list;
+	LIST_HEAD(users);
 
 	down_write(&crypto_alg_sem);
 
@@ -178,9 +274,8 @@ void crypto_unregister_template(struct crypto_template *tmpl)
 
 	list = &tmpl->instances;
 	hlist_for_each_entry(inst, p, list, list) {
-		BUG_ON(list_empty(&inst->alg.cra_list));
-		list_del_init(&inst->alg.cra_list);
-		crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg);
+		int err = crypto_remove_alg(&inst->alg, &users);
+		BUG_ON(err);
 	}
 
 	crypto_notify(CRYPTO_MSG_TMPL_UNREGISTER, tmpl);
@@ -191,6 +286,7 @@ void crypto_unregister_template(struct crypto_template *tmpl)
 		BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1);
 		tmpl->free(inst);
 	}
+	crypto_remove_final(&users);
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_template);
 
@@ -222,6 +318,7 @@ EXPORT_SYMBOL_GPL(crypto_lookup_template);
 int crypto_register_instance(struct crypto_template *tmpl,
 			     struct crypto_instance *inst)
 {
+	LIST_HEAD(list);
 	int err = -EINVAL;
 
 	if (inst->alg.cra_destroy)
@@ -235,7 +332,7 @@ int crypto_register_instance(struct crypto_template *tmpl,
 
 	down_write(&crypto_alg_sem);
 
-	err = __crypto_register_alg(&inst->alg);
+	err = __crypto_register_alg(&inst->alg, &list);
 	if (err)
 		goto unlock;
 
@@ -245,11 +342,67 @@ int crypto_register_instance(struct crypto_template *tmpl,
 unlock:
 	up_write(&crypto_alg_sem);
 
+	crypto_remove_final(&list);
+
 err:
 	return err;
 }
 EXPORT_SYMBOL_GPL(crypto_register_instance);
 
+int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
+		      struct crypto_instance *inst)
+{
+	int err = -EAGAIN;
+
+	spawn->inst = inst;
+
+	down_write(&crypto_alg_sem);
+	if (!crypto_is_moribund(alg)) {
+		list_add(&spawn->list, &alg->cra_users);
+		spawn->alg = alg;
+		err = 0;
+	}
+	up_write(&crypto_alg_sem);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_init_spawn);
+
+void crypto_drop_spawn(struct crypto_spawn *spawn)
+{
+	down_write(&crypto_alg_sem);
+	list_del(&spawn->list);
+	up_write(&crypto_alg_sem);
+}
+EXPORT_SYMBOL_GPL(crypto_drop_spawn);
+
+struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn)
+{
+	struct crypto_alg *alg;
+	struct crypto_alg *alg2;
+	struct crypto_tfm *tfm;
+
+	down_read(&crypto_alg_sem);
+	alg = spawn->alg;
+	alg2 = alg;
+	if (alg2)
+		alg2 = crypto_mod_get(alg2);
+	up_read(&crypto_alg_sem);
+
+	if (!alg2) {
+		if (alg)
+			crypto_shoot_alg(alg);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	tfm = __crypto_alloc_tfm(alg, 0);
+	if (IS_ERR(tfm))
+		crypto_mod_put(alg);
+
+	return tfm;
+}
+EXPORT_SYMBOL_GPL(crypto_spawn_tfm);
+
 int crypto_register_notifier(struct notifier_block *nb)
 {
 	return blocking_notifier_chain_register(&crypto_chain, nb);
diff --git a/crypto/api.c b/crypto/api.c
index ddf6a767acdd7..7e5522cf856e0 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -15,11 +15,13 @@
  *
  */
 
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/kmod.h>
 #include <linux/module.h>
 #include <linux/param.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include "internal.h"
@@ -38,12 +40,6 @@ static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
 	return alg;
 }
 
-static inline void crypto_alg_put(struct crypto_alg *alg)
-{
-	if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
-		alg->cra_destroy(alg);
-}
-
 struct crypto_alg *crypto_mod_get(struct crypto_alg *alg)
 {
 	return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL;
@@ -65,6 +61,9 @@ struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask)
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
 		int exact, fuzzy;
 
+		if (crypto_is_moribund(q))
+			continue;
+
 		if ((q->cra_flags ^ type) & mask)
 			continue;
 
@@ -111,7 +110,7 @@ static struct crypto_alg *crypto_larval_alloc(const char *name, u32 type,
 
 	larval = kzalloc(sizeof(*larval), GFP_KERNEL);
 	if (!larval)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	larval->mask = mask;
 	larval->alg.cra_flags = CRYPTO_ALG_LARVAL | type;
@@ -153,8 +152,11 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 
 	wait_for_completion_interruptible_timeout(&larval->completion, 60 * HZ);
 	alg = larval->adult;
-	if (alg && !crypto_mod_get(alg))
-		alg = NULL;
+	if (alg) {
+		if (!crypto_mod_get(alg))
+			alg = ERR_PTR(-EAGAIN);
+	} else
+		alg = ERR_PTR(-ENOENT);
 	crypto_mod_put(&larval->alg);
 
 	return alg;
@@ -165,9 +167,6 @@ static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
 {
 	struct crypto_alg *alg;
 
-	if (!name)
-		return NULL;
-
 	down_read(&crypto_alg_sem);
 	alg = __crypto_alg_lookup(name, type, mask);
 	up_read(&crypto_alg_sem);
@@ -181,7 +180,10 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
 	struct crypto_alg *larval;
 	int ok;
 
-	mask &= ~CRYPTO_ALG_LARVAL;
+	if (!name)
+		return ERR_PTR(-ENOENT);
+
+	mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD);
 	type &= mask;
 
 	alg = try_then_request_module(crypto_alg_lookup(name, type, mask),
@@ -190,7 +192,7 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
 		return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg;
 
 	larval = crypto_larval_alloc(name, type, mask);
-	if (!larval || !crypto_is_larval(larval))
+	if (IS_ERR(larval) || !crypto_is_larval(larval))
 		return larval;
 
 	ok = crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval);
@@ -203,7 +205,7 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
 		alg = crypto_larval_wait(larval);
 	else {
 		crypto_mod_put(larval);
-		alg = NULL;
+		alg = ERR_PTR(-ENOENT);
 	}
 	crypto_larval_kill(larval);
 	return alg;
@@ -298,31 +300,40 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags)
 	return len + (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1));
 }
 
-struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
+void crypto_shoot_alg(struct crypto_alg *alg)
+{
+	down_write(&crypto_alg_sem);
+	alg->cra_flags |= CRYPTO_ALG_DYING;
+	up_write(&crypto_alg_sem);
+}
+EXPORT_SYMBOL_GPL(crypto_shoot_alg);
+
+struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 flags)
 {
 	struct crypto_tfm *tfm = NULL;
-	struct crypto_alg *alg;
 	unsigned int tfm_size;
-
-	alg = crypto_alg_mod_lookup(name, 0, 0);
-	if (alg == NULL)
-		goto out;
+	int err = -ENOMEM;
 
 	tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags);
 	tfm = kzalloc(tfm_size, GFP_KERNEL);
 	if (tfm == NULL)
-		goto out_put;
+		goto out;
 
 	tfm->__crt_alg = alg;
-	
-	if (crypto_init_flags(tfm, flags))
+
+	err = crypto_init_flags(tfm, flags);
+	if (err)
 		goto out_free_tfm;
 		
-	if (crypto_init_ops(tfm))
+	err = crypto_init_ops(tfm);
+	if (err)
 		goto out_free_tfm;
 
-	if (alg->cra_init && alg->cra_init(tfm))
+	if (alg->cra_init && (err = alg->cra_init(tfm))) {
+		if (err == -EAGAIN)
+			crypto_shoot_alg(alg);
 		goto cra_init_failed;
+	}
 
 	goto out;
 
@@ -330,12 +341,36 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
 	crypto_exit_ops(tfm);
 out_free_tfm:
 	kfree(tfm);
-	tfm = NULL;
-out_put:
-	crypto_mod_put(alg);
+	tfm = ERR_PTR(err);
 out:
 	return tfm;
 }
+EXPORT_SYMBOL_GPL(__crypto_alloc_tfm);
+
+struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
+{
+	struct crypto_tfm *tfm = NULL;
+	int err;
+
+	do {
+		struct crypto_alg *alg;
+
+		alg = crypto_alg_mod_lookup(name, 0, 0);
+		err = PTR_ERR(alg);
+		if (IS_ERR(alg))
+			continue;
+
+		tfm = __crypto_alloc_tfm(alg, flags);
+		err = 0;
+		if (IS_ERR(tfm)) {
+			crypto_mod_put(alg);
+			err = PTR_ERR(tfm);
+			tfm = NULL;
+		}
+	} while (err == -EAGAIN && !signal_pending(current));
+
+	return tfm;
+}
 
 void crypto_free_tfm(struct crypto_tfm *tfm)
 {
@@ -361,7 +396,7 @@ int crypto_alg_available(const char *name, u32 flags)
 	int ret = 0;
 	struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, 0);
 	
-	if (alg) {
+	if (!IS_ERR(alg)) {
 		crypto_mod_put(alg);
 		ret = 1;
 	}
diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c
index ae54942e3b310..9b5b156010689 100644
--- a/crypto/cryptomgr.c
+++ b/crypto/cryptomgr.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/rtnetlink.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
 
@@ -44,21 +45,25 @@ static void cryptomgr_probe(void *data)
 	struct cryptomgr_param *param = data;
 	struct crypto_template *tmpl;
 	struct crypto_instance *inst;
+	int err;
 
 	tmpl = crypto_lookup_template(param->template);
 	if (!tmpl)
 		goto err;
 
-	inst = tmpl->alloc(&param->alg, sizeof(param->alg));
-	if (IS_ERR(inst))
-		goto err;
-	else if ((err = crypto_register_instance(tmpl, inst))) {
-		tmpl->free(inst);
-		goto err;
-	}
+	do {
+		inst = tmpl->alloc(&param->alg, sizeof(param->alg));
+		if (IS_ERR(inst))
+			err = PTR_ERR(inst);
+		else if ((err = crypto_register_instance(tmpl, inst)))
+			tmpl->free(inst);
+	} while (err == -EAGAIN && !signal_pending(current));
 
 	crypto_tmpl_put(tmpl);
 
+	if (err)
+		goto err;
+
 out:
 	kfree(param);
 	return;
diff --git a/crypto/internal.h b/crypto/internal.h
index c08d93bdadc45..03c00b0e6b60c 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -142,12 +142,21 @@ void crypto_exit_compress_ops(struct crypto_tfm *tfm);
 
 void crypto_larval_error(const char *name, u32 type, u32 mask);
 
+void crypto_shoot_alg(struct crypto_alg *alg);
+struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 flags);
+
 int crypto_register_instance(struct crypto_template *tmpl,
 			     struct crypto_instance *inst);
 
 int crypto_register_notifier(struct notifier_block *nb);
 int crypto_unregister_notifier(struct notifier_block *nb);
 
+static inline void crypto_alg_put(struct crypto_alg *alg)
+{
+	if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
+		alg->cra_destroy(alg);
+}
+
 static inline int crypto_tmpl_get(struct crypto_template *tmpl)
 {
 	return try_module_get(tmpl->module);
@@ -163,6 +172,16 @@ static inline int crypto_is_larval(struct crypto_alg *alg)
 	return alg->cra_flags & CRYPTO_ALG_LARVAL;
 }
 
+static inline int crypto_is_dead(struct crypto_alg *alg)
+{
+	return alg->cra_flags & CRYPTO_ALG_DEAD;
+}
+
+static inline int crypto_is_moribund(struct crypto_alg *alg)
+{
+	return alg->cra_flags & (CRYPTO_ALG_DEAD | CRYPTO_ALG_DYING);
+}
+
 static inline int crypto_notify(unsigned long val, void *v)
 {
 	return blocking_notifier_call_chain(&crypto_chain, val, v);
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index ffec530d52fb5..b20f4bdb23bac 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -36,10 +36,21 @@ struct crypto_template {
 	char name[CRYPTO_MAX_ALG_NAME];
 };
 
+struct crypto_spawn {
+	struct list_head list;
+	struct crypto_alg *alg;
+	struct crypto_instance *inst;
+};
+
 int crypto_register_template(struct crypto_template *tmpl);
 void crypto_unregister_template(struct crypto_template *tmpl);
 struct crypto_template *crypto_lookup_template(const char *name);
 
+int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
+		      struct crypto_instance *inst);
+void crypto_drop_spawn(struct crypto_spawn *spawn);
+struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn);
+
 static inline void *crypto_instance_ctx(struct crypto_instance *inst)
 {
 	return inst->__ctx;
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 85f73c3819139..40a6330abc8d7 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -35,6 +35,8 @@
 #define CRYPTO_ALG_TYPE_COMPRESS	0x00000004
 
 #define CRYPTO_ALG_LARVAL		0x00000010
+#define CRYPTO_ALG_DEAD			0x00000020
+#define CRYPTO_ALG_DYING		0x00000040
 
 /*
  * Transform masks and values (for crt_flags).
@@ -145,6 +147,8 @@ struct compress_alg {
 
 struct crypto_alg {
 	struct list_head cra_list;
+	struct list_head cra_users;
+
 	u32 cra_flags;
 	unsigned int cra_blocksize;
 	unsigned int cra_ctxsize;
-- 
GitLab


From b3be9a6d9a78bb820f5242f43b98f38b0ca610a6 Mon Sep 17 00:00:00 2001
From: Michal Ludvig <michal@logix.cz>
Date: Sun, 9 Jul 2006 08:59:38 +1000
Subject: [PATCH 0289/1063] [CRYPTO] sha: Add module aliases for sha1 / sha256

Crypto modules should be loadable by their .cra_driver_name, so
we should make MODULE_ALIAS()es with these names. This patch adds
aliases for SHA1 and SHA256 only as that's what we need for
PadLock-SHA driver.

Signed-off-by: Michal Ludvig <michal@logix.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha1.c   | 3 +++
 crypto/sha256.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/crypto/sha1.c b/crypto/sha1.c
index 6c77b689f87ec..1bba551e5b456 100644
--- a/crypto/sha1.c
+++ b/crypto/sha1.c
@@ -109,6 +109,7 @@ static void sha1_final(struct crypto_tfm *tfm, u8 *out)
 
 static struct crypto_alg alg = {
 	.cra_name	=	"sha1",
+	.cra_driver_name=	"sha1-generic",
 	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
 	.cra_blocksize	=	SHA1_HMAC_BLOCK_SIZE,
 	.cra_ctxsize	=	sizeof(struct sha1_ctx),
@@ -137,3 +138,5 @@ module_exit(fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
+
+MODULE_ALIAS("sha1-generic");
diff --git a/crypto/sha256.c b/crypto/sha256.c
index bc71d85a7d02d..716195bb54f24 100644
--- a/crypto/sha256.c
+++ b/crypto/sha256.c
@@ -309,6 +309,7 @@ static void sha256_final(struct crypto_tfm *tfm, u8 *out)
 
 static struct crypto_alg alg = {
 	.cra_name	=	"sha256",
+	.cra_driver_name=	"sha256-generic",
 	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
 	.cra_blocksize	=	SHA256_HMAC_BLOCK_SIZE,
 	.cra_ctxsize	=	sizeof(struct sha256_ctx),
@@ -337,3 +338,5 @@ module_exit(fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm");
+
+MODULE_ALIAS("sha256-generic");
-- 
GitLab


From b14cdd6704c96474ba5c74b5959487beaa5ee1cd Mon Sep 17 00:00:00 2001
From: Michal Ludvig <michal@logix.cz>
Date: Sun, 9 Jul 2006 09:02:24 +1000
Subject: [PATCH 0290/1063] [CRYPTO] api: Add missing accessors for new
 crypto_alg fields

Add missing accessors for cra_driver_name and cra_priority.

Signed-off-by: Michal Ludvig <michal@logix.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 40a6330abc8d7..d6e184c876b50 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -297,6 +297,16 @@ static inline const char *crypto_tfm_alg_name(struct crypto_tfm *tfm)
 	return tfm->__crt_alg->cra_name;
 }
 
+static inline const char *crypto_tfm_alg_driver_name(struct crypto_tfm *tfm)
+{
+	return tfm->__crt_alg->cra_driver_name;
+}
+
+static inline int crypto_tfm_alg_priority(struct crypto_tfm *tfm)
+{
+	return tfm->__crt_alg->cra_priority;
+}
+
 static inline const char *crypto_tfm_alg_modname(struct crypto_tfm *tfm)
 {
 	return module_name(tfm->__crt_alg->cra_module);
-- 
GitLab


From 1191f0a49390caf16f4a2831a4fc373757471ad6 Mon Sep 17 00:00:00 2001
From: Michal Ludvig <michal@logix.cz>
Date: Sun, 6 Aug 2006 22:46:20 +1000
Subject: [PATCH 0291/1063] [CRYPTO] padlock: Get rid of padlock-generic.c

Merge padlock-generic.c into padlock-aes.c and compile
AES as a standalone module. We won't make a monolithic
padlock.ko with all supported algorithms, instead we'll
compile each driver into its own module.

Signed-off-by: Michal Ludvig <michal@logix.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig           | 16 +++++---
 drivers/crypto/Makefile          |  8 +---
 drivers/crypto/padlock-aes.c     | 34 +++++++++++++++--
 drivers/crypto/padlock-generic.c | 63 --------------------------------
 4 files changed, 42 insertions(+), 79 deletions(-)
 delete mode 100644 drivers/crypto/padlock-generic.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index ba23683ab8c48..d260c86218fa6 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -1,24 +1,30 @@
 menu "Hardware crypto devices"
 
 config CRYPTO_DEV_PADLOCK
-	tristate "Support for VIA PadLock ACE"
+	bool "Support for VIA PadLock ACE"
 	depends on X86_32
 	select CRYPTO_ALGAPI
+	default y
 	help
 	  Some VIA processors come with an integrated crypto engine
 	  (so called VIA PadLock ACE, Advanced Cryptography Engine)
-	  that provides instructions for very fast {en,de}cryption 
-	  with some algorithms.
+	  that provides instructions for very fast cryptographic
+	  operations with supported algorithms.
 	  
 	  The instructions are used only when the CPU supports them.
 	  Otherwise software encryption is used. If you are unsure,
 	  say Y.
 
 config CRYPTO_DEV_PADLOCK_AES
-	bool "Support for AES in VIA PadLock"
+	tristate "PadLock driver for AES algorithm"
 	depends on CRYPTO_DEV_PADLOCK
-	default y
+	default m
 	help
 	  Use VIA PadLock for AES algorithm.
 
+	  Available in VIA C3 and newer CPUs.
+
+	  If unsure say M. The compiled module will be
+	  called padlock-aes.ko
+
 endmenu
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 45426ca19a23b..5e7d7d5e805a0 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -1,7 +1 @@
-
-obj-$(CONFIG_CRYPTO_DEV_PADLOCK) += padlock.o
-
-padlock-objs-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
-
-padlock-objs := padlock-generic.o $(padlock-objs-y)
-
+obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index b643d71298a91..ee33bd6c1b77b 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -495,15 +495,41 @@ static struct crypto_alg aes_alg = {
 	}
 };
 
-int __init padlock_init_aes(void)
+static int __init padlock_init(void)
 {
-	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
+	int ret;
+
+	if (!cpu_has_xcrypt) {
+		printk(KERN_ERR PFX "VIA PadLock not detected.\n");
+		return -ENODEV;
+	}
+
+	if (!cpu_has_xcrypt_enabled) {
+		printk(KERN_ERR PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n");
+		return -ENODEV;
+	}
 
 	gen_tabs();
-	return crypto_register_alg(&aes_alg);
+	if ((ret = crypto_register_alg(&aes_alg))) {
+		printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n");
+		return ret;
+	}
+
+	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
+
+	return ret;
 }
 
-void __exit padlock_fini_aes(void)
+static void __exit padlock_fini(void)
 {
 	crypto_unregister_alg(&aes_alg);
 }
+
+module_init(padlock_init);
+module_exit(padlock_fini);
+
+MODULE_DESCRIPTION("VIA PadLock AES algorithm support");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Ludvig");
+
+MODULE_ALIAS("aes-padlock");
diff --git a/drivers/crypto/padlock-generic.c b/drivers/crypto/padlock-generic.c
deleted file mode 100644
index 18cf0e8274a7d..0000000000000
--- a/drivers/crypto/padlock-generic.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/* 
- * Cryptographic API.
- *
- * Support for VIA PadLock hardware crypto engine.
- *
- * Copyright (c) 2004  Michal Ludvig <michal@logix.cz>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/crypto.h>
-#include <asm/byteorder.h>
-#include "padlock.h"
-
-static int __init
-padlock_init(void)
-{
-	int ret = -ENOSYS;
-	
-	if (!cpu_has_xcrypt) {
-		printk(KERN_ERR PFX "VIA PadLock not detected.\n");
-		return -ENODEV;
-	}
-
-	if (!cpu_has_xcrypt_enabled) {
-		printk(KERN_ERR PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n");
-		return -ENODEV;
-	}
-
-#ifdef CONFIG_CRYPTO_DEV_PADLOCK_AES
-	if ((ret = padlock_init_aes())) {
-		printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n");
-		return ret;
-	}
-#endif
-
-	if (ret == -ENOSYS)
-		printk(KERN_ERR PFX "Hmm, VIA PadLock was compiled without any algorithm.\n");
-
-	return ret;
-}
-
-static void __exit
-padlock_fini(void)
-{
-#ifdef CONFIG_CRYPTO_DEV_PADLOCK_AES
-	padlock_fini_aes();
-#endif
-}
-
-module_init(padlock_init);
-module_exit(padlock_fini);
-
-MODULE_DESCRIPTION("VIA PadLock crypto engine support.");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Michal Ludvig");
-- 
GitLab


From db5e9a42373ae6d84c4b0179c2fe0aba866474e8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 9 Jul 2006 10:35:49 +1000
Subject: [PATCH 0292/1063] [CRYPTO] padlock: Add compatibility alias after
 rename

Whenever we rename modules we should add an alias to ensure that existing
users can still locate the new module.

This patch also gets rid of the now unused module function prototypes from
padlock.h.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/padlock-aes.c | 3 +++
 drivers/crypto/padlock.h     | 5 -----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index ee33bd6c1b77b..241052da27878 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -533,3 +533,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Michal Ludvig");
 
 MODULE_ALIAS("aes-padlock");
+
+/* This module used to be called padlock. */
+MODULE_ALIAS("padlock");
diff --git a/drivers/crypto/padlock.h b/drivers/crypto/padlock.h
index b78489bc298ac..e2ee3b689dbd8 100644
--- a/drivers/crypto/padlock.h
+++ b/drivers/crypto/padlock.h
@@ -28,9 +28,4 @@ struct cword {
 
 #define PFX	"padlock: "
 
-#ifdef CONFIG_CRYPTO_DEV_PADLOCK_AES
-int padlock_init_aes(void);
-void padlock_fini_aes(void);
-#endif
-
 #endif	/* _CRYPTO_PADLOCK_H */
-- 
GitLab


From ccc17c34d676f116bd09dd36a3b01627bc6a2f8a Mon Sep 17 00:00:00 2001
From: Michal Ludvig <michal@logix.cz>
Date: Sat, 15 Jul 2006 10:23:49 +1000
Subject: [PATCH 0293/1063] [CRYPTO] padlock: Update private header file

PADLOCK_CRA_PRIORITY is shared between padlock-aes and padlock-sha
so it should be in the header.

On the other hand "struct cword" is only used in padlock-aes.c
so it's unnecessary to have it in padlock.h

Signed-off-by: Michal Ludvig <michal@logix.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/padlock-aes.c | 13 ++++++++++++-
 drivers/crypto/padlock.h     | 13 ++-----------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 241052da27878..149e54b0ea2e1 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -59,6 +59,17 @@
 #define AES_EXTENDED_KEY_SIZE	64	/* in uint32_t units */
 #define AES_EXTENDED_KEY_SIZE_B	(AES_EXTENDED_KEY_SIZE * sizeof(uint32_t))
 
+/* Control word. */
+struct cword {
+	unsigned int __attribute__ ((__packed__))
+		rounds:4,
+		algo:3,
+		keygen:1,
+		interm:1,
+		encdec:1,
+		ksize:2;
+} __attribute__ ((__aligned__(PADLOCK_ALIGNMENT)));
+
 /* Whenever making any changes to the following
  * structure *make sure* you keep E, d_data
  * and cword aligned on 16 Bytes boundaries!!! */
@@ -473,7 +484,7 @@ static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out,
 static struct crypto_alg aes_alg = {
 	.cra_name		=	"aes",
 	.cra_driver_name	=	"aes-padlock",
-	.cra_priority		=	300,
+	.cra_priority		=	PADLOCK_CRA_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct aes_ctx),
diff --git a/drivers/crypto/padlock.h b/drivers/crypto/padlock.h
index e2ee3b689dbd8..7e3385b0904db 100644
--- a/drivers/crypto/padlock.h
+++ b/drivers/crypto/padlock.h
@@ -15,17 +15,8 @@
 
 #define PADLOCK_ALIGNMENT 16
 
-/* Control word. */
-struct cword {
-	unsigned int __attribute__ ((__packed__))
-		rounds:4,
-		algo:3,
-		keygen:1,
-		interm:1,
-		encdec:1,
-		ksize:2;
-} __attribute__ ((__aligned__(PADLOCK_ALIGNMENT)));
-
 #define PFX	"padlock: "
 
+#define PADLOCK_CRA_PRIORITY	300
+
 #endif	/* _CRYPTO_PADLOCK_H */
-- 
GitLab


From 6c833275152b454d311f0e70b5e6bf028b4a2aaf Mon Sep 17 00:00:00 2001
From: Michal Ludvig <michal@logix.cz>
Date: Wed, 12 Jul 2006 12:29:38 +1000
Subject: [PATCH 0294/1063] [CRYPTO] padlock: Driver for SHA1 / SHA256
 algorithms

Support for SHA1 / SHA256 algorithms in VIA C7 processors.

Signed-off-by: Michal Ludvig <michal@logix.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig       |  14 ++
 drivers/crypto/Makefile      |   1 +
 drivers/crypto/padlock-sha.c | 339 +++++++++++++++++++++++++++++++++++
 3 files changed, 354 insertions(+)
 create mode 100644 drivers/crypto/padlock-sha.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index d260c86218fa6..910c715325be4 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -27,4 +27,18 @@ config CRYPTO_DEV_PADLOCK_AES
 	  If unsure say M. The compiled module will be
 	  called padlock-aes.ko
 
+config CRYPTO_DEV_PADLOCK_SHA
+	tristate "PadLock driver for SHA1 and SHA256 algorithms"
+	depends on CRYPTO_DEV_PADLOCK
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	default m
+	help
+	  Use VIA PadLock for SHA1/SHA256 algorithms.
+
+	  Available in VIA C7 and newer processors.
+
+	  If unsure say M. The compiled module will be
+	  called padlock-sha.ko
+
 endmenu
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 5e7d7d5e805a0..df498c7d97ab0 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
+obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
new file mode 100644
index 0000000000000..f7010038033be
--- /dev/null
+++ b/drivers/crypto/padlock-sha.c
@@ -0,0 +1,339 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for VIA PadLock hardware crypto engine.
+ *
+ * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+#include "padlock.h"
+
+#define SHA1_DEFAULT_FALLBACK	"sha1-generic"
+#define SHA1_DIGEST_SIZE        20
+#define SHA1_HMAC_BLOCK_SIZE    64
+
+#define SHA256_DEFAULT_FALLBACK "sha256-generic"
+#define SHA256_DIGEST_SIZE      32
+#define SHA256_HMAC_BLOCK_SIZE  64
+
+static char *sha1_fallback = SHA1_DEFAULT_FALLBACK;
+static char *sha256_fallback = SHA256_DEFAULT_FALLBACK;
+
+module_param(sha1_fallback, charp, 0644);
+module_param(sha256_fallback, charp, 0644);
+
+MODULE_PARM_DESC(sha1_fallback, "Fallback driver for SHA1. Default is "
+		 SHA1_DEFAULT_FALLBACK);
+MODULE_PARM_DESC(sha256_fallback, "Fallback driver for SHA256. Default is "
+		 SHA256_DEFAULT_FALLBACK);
+
+struct padlock_sha_ctx {
+	char		*data;
+	size_t		used;
+	int		bypass;
+	void (*f_sha_padlock)(const char *in, char *out, int count);
+	struct crypto_tfm *fallback_tfm;
+};
+
+static inline struct padlock_sha_ctx *ctx(struct crypto_tfm *tfm)
+{
+	return (struct padlock_sha_ctx *)(crypto_tfm_ctx(tfm));
+}
+
+/* We'll need aligned address on the stack */
+#define NEAREST_ALIGNED(ptr) \
+	((void *)ALIGN((size_t)(ptr), PADLOCK_ALIGNMENT))
+
+static struct crypto_alg sha1_alg, sha256_alg;
+
+static void padlock_sha_bypass(struct crypto_tfm *tfm)
+{
+	if (ctx(tfm)->bypass)
+		return;
+
+	BUG_ON(!ctx(tfm)->fallback_tfm);
+
+	crypto_digest_init(ctx(tfm)->fallback_tfm);
+	if (ctx(tfm)->data && ctx(tfm)->used) {
+		struct scatterlist sg;
+
+		sg_set_buf(&sg, ctx(tfm)->data, ctx(tfm)->used);
+		crypto_digest_update(ctx(tfm)->fallback_tfm, &sg, 1);
+	}
+
+	ctx(tfm)->used = 0;
+	ctx(tfm)->bypass = 1;
+}
+
+static void padlock_sha_init(struct crypto_tfm *tfm)
+{
+	ctx(tfm)->used = 0;
+	ctx(tfm)->bypass = 0;
+}
+
+static void padlock_sha_update(struct crypto_tfm *tfm,
+			const uint8_t *data, unsigned int length)
+{
+	/* Our buffer is always one page. */
+	if (unlikely(!ctx(tfm)->bypass &&
+		     (ctx(tfm)->used + length > PAGE_SIZE)))
+		padlock_sha_bypass(tfm);
+
+	if (unlikely(ctx(tfm)->bypass)) {
+		struct scatterlist sg;
+		BUG_ON(!ctx(tfm)->fallback_tfm);
+		sg_set_buf(&sg, (uint8_t *)data, length);
+		crypto_digest_update(ctx(tfm)->fallback_tfm, &sg, 1);
+		return;
+	}
+
+	memcpy(ctx(tfm)->data + ctx(tfm)->used, data, length);
+	ctx(tfm)->used += length;
+}
+
+static inline void padlock_output_block(uint32_t *src,
+		 	uint32_t *dst, size_t count)
+{
+	while (count--)
+		*dst++ = swab32(*src++);
+}
+
+void padlock_do_sha1(const char *in, char *out, int count)
+{
+	/* We can't store directly to *out as it may be unaligned. */
+	/* BTW Don't reduce the buffer size below 128 Bytes!
+	 *     PadLock microcode needs it that big. */
+	char buf[128+16];
+	char *result = NEAREST_ALIGNED(buf);
+
+	((uint32_t *)result)[0] = 0x67452301;
+	((uint32_t *)result)[1] = 0xEFCDAB89;
+	((uint32_t *)result)[2] = 0x98BADCFE;
+	((uint32_t *)result)[3] = 0x10325476;
+	((uint32_t *)result)[4] = 0xC3D2E1F0;
+ 
+	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
+		      : "+S"(in), "+D"(result)
+		      : "c"(count), "a"(0));
+
+	padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
+}
+
+void padlock_do_sha256(const char *in, char *out, int count)
+{
+	/* We can't store directly to *out as it may be unaligned. */
+	/* BTW Don't reduce the buffer size below 128 Bytes!
+	 *     PadLock microcode needs it that big. */
+	char buf[128+16];
+	char *result = NEAREST_ALIGNED(buf);
+
+	((uint32_t *)result)[0] = 0x6A09E667;
+	((uint32_t *)result)[1] = 0xBB67AE85;
+	((uint32_t *)result)[2] = 0x3C6EF372;
+	((uint32_t *)result)[3] = 0xA54FF53A;
+	((uint32_t *)result)[4] = 0x510E527F;
+	((uint32_t *)result)[5] = 0x9B05688C;
+	((uint32_t *)result)[6] = 0x1F83D9AB;
+	((uint32_t *)result)[7] = 0x5BE0CD19;
+
+	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
+		      : "+S"(in), "+D"(result)
+		      : "c"(count), "a"(0));
+
+	padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
+}
+
+static void padlock_sha_final(struct crypto_tfm *tfm, uint8_t *out)
+{
+	if (unlikely(ctx(tfm)->bypass)) {
+		BUG_ON(!ctx(tfm)->fallback_tfm);
+		crypto_digest_final(ctx(tfm)->fallback_tfm, out);
+		ctx(tfm)->bypass = 0;
+		return;
+	}
+
+	/* Pass the input buffer to PadLock microcode... */
+	ctx(tfm)->f_sha_padlock(ctx(tfm)->data, out, ctx(tfm)->used);
+
+	ctx(tfm)->used = 0;
+}
+
+static int padlock_cra_init(struct crypto_tfm *tfm, const char *fallback_driver_name)
+{
+	/* For now we'll allocate one page. This
+	 * could eventually be configurable one day. */
+	ctx(tfm)->data = (char *)__get_free_page(GFP_KERNEL);
+	if (!ctx(tfm)->data)
+		return -ENOMEM;
+
+	/* Allocate a fallback and abort if it failed. */
+	ctx(tfm)->fallback_tfm = crypto_alloc_tfm(fallback_driver_name, 0);
+	if (!ctx(tfm)->fallback_tfm) {
+		printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
+		       fallback_driver_name);
+		free_page((unsigned long)(ctx(tfm)->data));
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int padlock_sha1_cra_init(struct crypto_tfm *tfm)
+{
+	ctx(tfm)->f_sha_padlock = padlock_do_sha1;
+
+	return padlock_cra_init(tfm, sha1_fallback);
+}
+
+static int padlock_sha256_cra_init(struct crypto_tfm *tfm)
+{
+	ctx(tfm)->f_sha_padlock = padlock_do_sha256;
+
+	return padlock_cra_init(tfm, sha256_fallback);
+}
+
+static void padlock_cra_exit(struct crypto_tfm *tfm)
+{
+	if (ctx(tfm)->data) {
+		free_page((unsigned long)(ctx(tfm)->data));
+		ctx(tfm)->data = NULL;
+	}
+
+	BUG_ON(!ctx(tfm)->fallback_tfm);
+	crypto_free_tfm(ctx(tfm)->fallback_tfm);
+	ctx(tfm)->fallback_tfm = NULL;
+}
+
+static struct crypto_alg sha1_alg = {
+	.cra_name		=	"sha1",
+	.cra_driver_name	=	"sha1-padlock",
+	.cra_priority		=	PADLOCK_CRA_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_DIGEST,
+	.cra_blocksize		=	SHA1_HMAC_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(sha1_alg.cra_list),
+	.cra_init		=	padlock_sha1_cra_init,
+	.cra_exit		=	padlock_cra_exit,
+	.cra_u			=	{
+		.digest = {
+			.dia_digestsize	=	SHA1_DIGEST_SIZE,
+			.dia_init   	= 	padlock_sha_init,
+			.dia_update 	=	padlock_sha_update,
+			.dia_final  	=	padlock_sha_final,
+		}
+	}
+};
+
+static struct crypto_alg sha256_alg = {
+	.cra_name		=	"sha256",
+	.cra_driver_name	=	"sha256-padlock",
+	.cra_priority		=	PADLOCK_CRA_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_DIGEST,
+	.cra_blocksize		=	SHA256_HMAC_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(sha256_alg.cra_list),
+	.cra_init		=	padlock_sha256_cra_init,
+	.cra_exit		=	padlock_cra_exit,
+	.cra_u			=	{
+		.digest = {
+			.dia_digestsize	=	SHA256_DIGEST_SIZE,
+			.dia_init   	= 	padlock_sha_init,
+			.dia_update 	=	padlock_sha_update,
+			.dia_final  	=	padlock_sha_final,
+		}
+	}
+};
+
+static void __init padlock_sha_check_fallbacks(void)
+{
+	static struct crypto_tfm *tfm_sha1, *tfm_sha256;
+
+	/* We'll try to allocate one TFM for each fallback
+	 * to test that the modules are available. */
+	tfm_sha1 = crypto_alloc_tfm(sha1_fallback, 0);
+	if (!tfm_sha1) {
+		printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n",
+		       sha1_alg.cra_name, sha1_fallback);
+	} else {
+		printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha1_alg.cra_name,
+		       crypto_tfm_alg_driver_name(tfm_sha1), crypto_tfm_alg_priority(tfm_sha1));
+		crypto_free_tfm(tfm_sha1);
+	}
+
+	tfm_sha256 = crypto_alloc_tfm(sha256_fallback, 0);
+	if (!tfm_sha256) {
+		printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n",
+		       sha256_alg.cra_name, sha256_fallback);
+	} else {
+		printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha256_alg.cra_name,
+		       crypto_tfm_alg_driver_name(tfm_sha256), crypto_tfm_alg_priority(tfm_sha256));
+		crypto_free_tfm(tfm_sha256);
+	}
+}
+
+static int __init padlock_init(void)
+{
+	int rc = -ENODEV;
+
+	if (!cpu_has_phe) {
+		printk(KERN_ERR PFX "VIA PadLock Hash Engine not detected.\n");
+		return -ENODEV;
+	}
+
+	if (!cpu_has_phe_enabled) {
+		printk(KERN_ERR PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n");
+		return -ENODEV;
+	}
+
+	padlock_sha_check_fallbacks();
+
+	rc = crypto_register_alg(&sha1_alg);
+	if (rc)
+		goto out;
+
+	rc = crypto_register_alg(&sha256_alg);
+	if (rc)
+		goto out_unreg1;
+
+	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
+
+	return 0;
+
+out_unreg1:
+	crypto_unregister_alg(&sha1_alg);
+out:
+	printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
+	return rc;
+}
+
+static void __exit padlock_fini(void)
+{
+	crypto_unregister_alg(&sha1_alg);
+	crypto_unregister_alg(&sha256_alg);
+}
+
+module_init(padlock_init);
+module_exit(padlock_fini);
+
+MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Ludvig");
+
+MODULE_ALIAS("sha1-padlock");
+MODULE_ALIAS("sha256-padlock");
-- 
GitLab


From cb17530b0a4e01bd595a7ac437467a1a9833a15c Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 15 Jul 2006 11:31:25 +1000
Subject: [PATCH 0295/1063] [CRYPTO] padlock-sha: Make 2 functions static

This patch makes two needlessly global functions static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/padlock-sha.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index f7010038033be..95e9971d1a70d 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -112,7 +112,7 @@ static inline void padlock_output_block(uint32_t *src,
 		*dst++ = swab32(*src++);
 }
 
-void padlock_do_sha1(const char *in, char *out, int count)
+static void padlock_do_sha1(const char *in, char *out, int count)
 {
 	/* We can't store directly to *out as it may be unaligned. */
 	/* BTW Don't reduce the buffer size below 128 Bytes!
@@ -133,7 +133,7 @@ void padlock_do_sha1(const char *in, char *out, int count)
 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
 }
 
-void padlock_do_sha256(const char *in, char *out, int count)
+static void padlock_do_sha256(const char *in, char *out, int count)
 {
 	/* We can't store directly to *out as it may be unaligned. */
 	/* BTW Don't reduce the buffer size below 128 Bytes!
-- 
GitLab


From 5644bda5d6aa17a70b8842eb56365d501a5da159 Mon Sep 17 00:00:00 2001
From: Michal Ludvig <michal@logix.cz>
Date: Sun, 6 Aug 2006 22:50:30 +1000
Subject: [PATCH 0296/1063] [CRYPTO] padlock: Helper module padlock.ko

Compile a helper module padlock.ko that will try
to autoload all configured padlock algorithms.

This also provides backward compatibility with
the ancient times before padlock.ko was renamed
to padlock-aes.ko

Signed-off-by: Michal Ludvig <michal@logix.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig       | 17 ++++++++---
 drivers/crypto/Makefile      |  1 +
 drivers/crypto/padlock-aes.c |  3 --
 drivers/crypto/padlock.c     | 58 ++++++++++++++++++++++++++++++++++++
 4 files changed, 72 insertions(+), 7 deletions(-)
 create mode 100644 drivers/crypto/padlock.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 910c715325be4..86c99cd333fae 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -1,10 +1,10 @@
 menu "Hardware crypto devices"
 
 config CRYPTO_DEV_PADLOCK
-	bool "Support for VIA PadLock ACE"
+	tristate "Support for VIA PadLock ACE"
 	depends on X86_32
 	select CRYPTO_ALGAPI
-	default y
+	default m
 	help
 	  Some VIA processors come with an integrated crypto engine
 	  (so called VIA PadLock ACE, Advanced Cryptography Engine)
@@ -12,8 +12,17 @@ config CRYPTO_DEV_PADLOCK
 	  operations with supported algorithms.
 	  
 	  The instructions are used only when the CPU supports them.
-	  Otherwise software encryption is used. If you are unsure,
-	  say Y.
+	  Otherwise software encryption is used.
+
+	  Selecting M for this option will compile a helper module
+	  padlock.ko that should autoload all below configured
+	  algorithms. Don't worry if your hardware does not support
+	  some or all of them. In such case padlock.ko will
+	  simply write a single line into the kernel log informing
+	  about its failure but everything will keep working fine.
+
+	  If you are unsure, say M. The compiled module will be
+	  called padlock.ko
 
 config CRYPTO_DEV_PADLOCK_AES
 	tristate "PadLock driver for AES algorithm"
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index df498c7d97ab0..4c3d0ec1cf805 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -1,2 +1,3 @@
+obj-$(CONFIG_CRYPTO_DEV_PADLOCK) += padlock.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 149e54b0ea2e1..3a2a71108d352 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -544,6 +544,3 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Michal Ludvig");
 
 MODULE_ALIAS("aes-padlock");
-
-/* This module used to be called padlock. */
-MODULE_ALIAS("padlock");
diff --git a/drivers/crypto/padlock.c b/drivers/crypto/padlock.c
new file mode 100644
index 0000000000000..ce581684f4b4a
--- /dev/null
+++ b/drivers/crypto/padlock.c
@@ -0,0 +1,58 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for VIA PadLock hardware crypto engine.
+ *
+ * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+#include "padlock.h"
+
+static int __init padlock_init(void)
+{
+	int success = 0;
+
+	if (crypto_alg_available("aes-padlock", 0))
+		success++;
+
+	if (crypto_alg_available("sha1-padlock", 0))
+		success++;
+
+	if (crypto_alg_available("sha256-padlock", 0))
+		success++;
+
+	if (!success) {
+		printk(KERN_WARNING PFX "No VIA PadLock drivers have been loaded.\n");
+		return -ENODEV;
+	}
+
+	printk(KERN_NOTICE PFX "%d drivers are available.\n", success);
+
+	return 0;
+}
+
+static void __exit padlock_fini(void)
+{
+}
+
+module_init(padlock_init);
+module_exit(padlock_fini);
+
+MODULE_DESCRIPTION("Load all configured PadLock algorithms.");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Ludvig");
+
-- 
GitLab


From 58ec4152895b96f047dcf5e490ee49b4c574dec3 Mon Sep 17 00:00:00 2001
From: Michal Ludvig <michal@logix.cz>
Date: Mon, 17 Jul 2006 08:14:58 +1000
Subject: [PATCH 0297/1063] [CRYPTO] padlock-sha: TFMs don't need to be static

TFMs are local variables. No need to declare them
static. After all one is enough.

Signed-off-by: Michal Ludvig <michal@logix.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/padlock-sha.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index 95e9971d1a70d..b028db61c3010 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -262,28 +262,28 @@ static struct crypto_alg sha256_alg = {
 
 static void __init padlock_sha_check_fallbacks(void)
 {
-	static struct crypto_tfm *tfm_sha1, *tfm_sha256;
+	struct crypto_tfm *tfm;
 
 	/* We'll try to allocate one TFM for each fallback
 	 * to test that the modules are available. */
-	tfm_sha1 = crypto_alloc_tfm(sha1_fallback, 0);
-	if (!tfm_sha1) {
+	tfm = crypto_alloc_tfm(sha1_fallback, 0);
+	if (!tfm) {
 		printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n",
 		       sha1_alg.cra_name, sha1_fallback);
 	} else {
 		printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha1_alg.cra_name,
-		       crypto_tfm_alg_driver_name(tfm_sha1), crypto_tfm_alg_priority(tfm_sha1));
-		crypto_free_tfm(tfm_sha1);
+		       crypto_tfm_alg_driver_name(tfm), crypto_tfm_alg_priority(tfm));
+		crypto_free_tfm(tfm);
 	}
 
-	tfm_sha256 = crypto_alloc_tfm(sha256_fallback, 0);
-	if (!tfm_sha256) {
+	tfm = crypto_alloc_tfm(sha256_fallback, 0);
+	if (!tfm) {
 		printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n",
 		       sha256_alg.cra_name, sha256_fallback);
 	} else {
 		printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha256_alg.cra_name,
-		       crypto_tfm_alg_driver_name(tfm_sha256), crypto_tfm_alg_priority(tfm_sha256));
-		crypto_free_tfm(tfm_sha256);
+		       crypto_tfm_alg_driver_name(tfm), crypto_tfm_alg_priority(tfm));
+		crypto_free_tfm(tfm);
 	}
 }
 
-- 
GitLab


From 25cdbcd9e5d20e431f829cafce48a418830011f4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 6 Aug 2006 23:03:08 +1000
Subject: [PATCH 0298/1063] [CRYPTO] crc32c: Fix unconventional setkey usage

The convention for setkey is that once it is set it should not change,
in particular, init must not wipe out the key set by it.  In fact, init
should always be used after setkey before any digestion is performed.

The only user of crc32c that sets the key is tcrypt.  This patch adds
the necessary init calls there.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crc32c.c | 25 +++++++++++++++----------
 crypto/tcrypt.c |  4 ++++
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/crypto/crc32c.c b/crypto/crc32c.c
index f2660123aeb41..91ecd895e957a 100644
--- a/crypto/crc32c.c
+++ b/crypto/crc32c.c
@@ -16,14 +16,14 @@
 #include <linux/string.h>
 #include <linux/crypto.h>
 #include <linux/crc32c.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
+#include <linux/kernel.h>
 
 #define CHKSUM_BLOCK_SIZE	32
 #define CHKSUM_DIGEST_SIZE	4
 
 struct chksum_ctx {
 	u32 crc;
+	u32 key;
 };
 
 /*
@@ -35,7 +35,7 @@ static void chksum_init(struct crypto_tfm *tfm)
 {
 	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
 
-	mctx->crc = ~(u32)0;			/* common usage */
+	mctx->crc = mctx->key;
 }
 
 /*
@@ -53,7 +53,7 @@ static int chksum_setkey(struct crypto_tfm *tfm, const u8 *key,
 			*flags = CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
-	mctx->crc = __cpu_to_le32(*(u32 *)key);
+	mctx->key = le32_to_cpu(*(__le32 *)key);
 	return 0;
 }
 
@@ -61,19 +61,23 @@ static void chksum_update(struct crypto_tfm *tfm, const u8 *data,
 			  unsigned int length)
 {
 	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-	u32 mcrc;
 
-	mcrc = crc32c(mctx->crc, data, (size_t)length);
-
-	mctx->crc = mcrc;
+	mctx->crc = crc32c(mctx->crc, data, length);
 }
 
 static void chksum_final(struct crypto_tfm *tfm, u8 *out)
 {
 	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-	u32 mcrc = (mctx->crc ^ ~(u32)0);
 	
-	*(u32 *)out = __le32_to_cpu(mcrc);
+	*(__le32 *)out = ~cpu_to_le32(mctx->crc);
+}
+
+static int crc32c_cra_init(struct crypto_tfm *tfm)
+{
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
 }
 
 static struct crypto_alg alg = {
@@ -83,6 +87,7 @@ static struct crypto_alg alg = {
 	.cra_ctxsize	=	sizeof(struct chksum_ctx),
 	.cra_module	=	THIS_MODULE,
 	.cra_list	=	LIST_HEAD_INIT(alg.cra_list),
+	.cra_init	=	crc32c_cra_init,
 	.cra_u		=	{
 		.digest = {
 			 .dia_digestsize=	CHKSUM_DIGEST_SIZE,
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index e52f56c5bd5e4..bed225e832316 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -810,6 +810,7 @@ static void test_crc32c(void)
 
 	seed = SEEDTESTVAL;
 	(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
+	crypto_digest_init(tfm);
 	crypto_digest_final(tfm, (u8*)&crc);
 	printk("testing crc32c setkey returns %08x : %s\n", crc, (crc == (SEEDTESTVAL ^ ~(u32)0)) ?
 	       "pass" : "ERROR");
@@ -821,6 +822,7 @@ static void test_crc32c(void)
 	for (i = 0; i < NUMVEC; i++) {
 		seed = ~(u32)0;
 		(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
+		crypto_digest_init(tfm);
 		crypto_digest_update(tfm, &sg[i], 1);
 		crypto_digest_final(tfm, (u8*)&crc);
 		if (crc == vec_results[i]) {
@@ -836,6 +838,7 @@ static void test_crc32c(void)
 	for (i = 0; i < NUMVEC; i++) {
 		seed = (crc ^ ~(u32)0);
 		(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
+		crypto_digest_init(tfm);
 		crypto_digest_update(tfm, &sg[i], 1);
 		crypto_digest_final(tfm, (u8*)&crc);
 	}
@@ -849,6 +852,7 @@ static void test_crc32c(void)
 	printk("\ntesting crc32c using digest:\n");
 	seed = ~(u32)0;
 	(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
+	crypto_digest_init(tfm);
 	crypto_digest_digest(tfm, sg, NUMVEC, (u8*)&crc);
 	if (crc == tot_vec_results) {
 		printk(" %08x:OK", crc);
-- 
GitLab


From 560c06ae1ab7c677002ea3b6ac83521bf12ee07d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2006 14:16:39 +1000
Subject: [PATCH 0299/1063] [CRYPTO] api: Get rid of flags argument to setkey

Now that the tfm is passed directly to setkey instead of the ctx, we no
longer need to pass the &tfm->crt_flags pointer.

This patch also gets rid of a few unnecessary checks on the key length
for ciphers as the cipher layer guarantees that the key length is within
the bounds specified by the algorithm.

Rather than testing dia_setkey every time, this patch does it only once
during crypto_alloc_tfm.  The redundant check from crypto_digest_setkey
is also removed.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/i386/crypto/aes.c       |  3 ++-
 arch/s390/crypto/aes_s390.c  |  3 ++-
 arch/s390/crypto/des_s390.c  | 13 ++++++++-----
 arch/x86_64/crypto/aes.c     |  5 +++--
 crypto/aes.c                 |  5 +++--
 crypto/anubis.c              |  3 ++-
 crypto/arc4.c                |  2 +-
 crypto/blowfish.c            |  3 +--
 crypto/cast5.c               |  8 +-------
 crypto/cast6.c               |  5 +++--
 crypto/cipher.c              |  4 ++--
 crypto/crc32c.c              |  5 ++---
 crypto/crypto_null.c         |  2 +-
 crypto/des.c                 |  6 ++++--
 crypto/digest.c              | 15 ++++++++++-----
 crypto/khazad.c              |  8 +-------
 crypto/michael_mic.c         |  5 ++---
 crypto/serpent.c             | 19 +++----------------
 crypto/tcrypt.c              |  5 +----
 crypto/tea.c                 | 16 ++--------------
 crypto/twofish_common.c      |  6 +++---
 drivers/crypto/padlock-aes.c |  5 +++--
 include/crypto/twofish.h     |  3 +--
 include/linux/crypto.h       |  6 ++----
 24 files changed, 63 insertions(+), 92 deletions(-)

diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c
index d3806daa3de3b..49aad9397f10a 100644
--- a/arch/i386/crypto/aes.c
+++ b/arch/i386/crypto/aes.c
@@ -379,12 +379,13 @@ static void gen_tabs(void)
 }
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	int i;
 	u32 ss[8];
 	struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
+	u32 *flags = &tfm->crt_flags;
 
 	/* encryption schedule */
 	
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 5713c7e5bd169..c7c43c9de0d90 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -38,9 +38,10 @@ struct s390_aes_ctx {
 };
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
 
 	switch (key_len) {
 	case 16:
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index b3f7496a79b4a..170757b3451da 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -45,9 +45,10 @@ struct crypt_s390_des3_192_ctx {
 };
 
 static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
-		      unsigned int keylen, u32 *flags)
+		      unsigned int keylen)
 {
 	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
 	int ret;
 
 	/* test if key is valid (not a weak key) */
@@ -167,11 +168,12 @@ static struct crypto_alg des_alg = {
  *
  */
 static int des3_128_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen, u32 *flags)
+			   unsigned int keylen)
 {
 	int i, ret;
 	struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u8* temp_key = key;
+	const u8 *temp_key = key;
+	u32 *flags = &tfm->crt_flags;
 
 	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE))) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED;
@@ -303,11 +305,12 @@ static struct crypto_alg des3_128_alg = {
  *
  */
 static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen, u32 *flags)
+			   unsigned int keylen)
 {
 	int i, ret;
 	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u8* temp_key = key;
+	const u8 *temp_key = key;
+	u32 *flags = &tfm->crt_flags;
 
 	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
 	    memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c
index 68866fab37aa1..5cdb13ea5cc2d 100644
--- a/arch/x86_64/crypto/aes.c
+++ b/arch/x86_64/crypto/aes.c
@@ -228,13 +228,14 @@ static void __init gen_tabs(void)
 }
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
+	u32 *flags = &tfm->crt_flags;
 	u32 i, j, t, u, v, w;
 
-	if (key_len != 16 && key_len != 24 && key_len != 32) {
+	if (key_len % 8) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
diff --git a/crypto/aes.c b/crypto/aes.c
index a038711831e75..e2440773878cc 100644
--- a/crypto/aes.c
+++ b/crypto/aes.c
@@ -249,13 +249,14 @@ gen_tabs (void)
 }
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
+	u32 *flags = &tfm->crt_flags;
 	u32 i, t, u, v, w;
 
-	if (key_len != 16 && key_len != 24 && key_len != 32) {
+	if (key_len % 8) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
diff --git a/crypto/anubis.c b/crypto/anubis.c
index 7e2e1a29800e3..1c771f7f4dc5e 100644
--- a/crypto/anubis.c
+++ b/crypto/anubis.c
@@ -461,10 +461,11 @@ static const u32 rc[] = {
 };
 
 static int anubis_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			 unsigned int key_len, u32 *flags)
+			 unsigned int key_len)
 {
 	struct anubis_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __be32 *key = (const __be32 *)in_key;
+	u32 *flags = &tfm->crt_flags;
 	int N, R, i, r;
 	u32 kappa[ANUBIS_MAX_N];
 	u32 inter[ANUBIS_MAX_N];
diff --git a/crypto/arc4.c b/crypto/arc4.c
index 5edc6a65b987a..8be47e13a9e33 100644
--- a/crypto/arc4.c
+++ b/crypto/arc4.c
@@ -25,7 +25,7 @@ struct arc4_ctx {
 };
 
 static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-			unsigned int key_len, u32 *flags)
+			unsigned int key_len)
 {
 	struct arc4_ctx *ctx = crypto_tfm_ctx(tfm);
 	int i, j = 0, k = 0;
diff --git a/crypto/blowfish.c b/crypto/blowfish.c
index 490265f42b3ba..55238c4e37f03 100644
--- a/crypto/blowfish.c
+++ b/crypto/blowfish.c
@@ -399,8 +399,7 @@ static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 /* 
  * Calculates the blowfish S and P boxes for encryption and decryption.
  */
-static int bf_setkey(struct crypto_tfm *tfm, const u8 *key,
-		     unsigned int keylen, u32 *flags)
+static int bf_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
 {
 	struct bf_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 *P = ctx->p;
diff --git a/crypto/cast5.c b/crypto/cast5.c
index 08eef58c1d3dd..13ea60abc19ab 100644
--- a/crypto/cast5.c
+++ b/crypto/cast5.c
@@ -769,8 +769,7 @@ static void key_schedule(u32 * x, u32 * z, u32 * k)
 }
 
 
-static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key,
-			unsigned key_len, u32 *flags)
+static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned key_len)
 {
 	struct cast5_ctx *c = crypto_tfm_ctx(tfm);
 	int i;
@@ -778,11 +777,6 @@ static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key,
 	u32 z[4];
 	u32 k[16];
 	__be32 p_key[4];
-	
-	if (key_len < 5 || key_len > 16) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
 
 	c->rr = key_len <= 10 ? 1 : 0;
 
diff --git a/crypto/cast6.c b/crypto/cast6.c
index 08e33bfc3ad10..136ab6dfe8c56 100644
--- a/crypto/cast6.c
+++ b/crypto/cast6.c
@@ -382,14 +382,15 @@ static inline void W(u32 *key, unsigned int i) {
 }
 
 static int cast6_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			unsigned key_len, u32 *flags)
+			unsigned key_len)
 {
 	int i;
 	u32 key[8];
 	__be32 p_key[8]; /* padded key */
 	struct cast6_ctx *c = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
 
-	if (key_len < 16 || key_len > 32 || key_len % 4 != 0) {
+	if (key_len % 4 != 0) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}	
diff --git a/crypto/cipher.c b/crypto/cipher.c
index b899eb97abd7c..56406a4a88d4d 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -264,12 +264,12 @@ static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
 {
 	struct cipher_alg *cia = &tfm->__crt_alg->cra_cipher;
 	
+	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
 	if (keylen < cia->cia_min_keysize || keylen > cia->cia_max_keysize) {
 		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	} else
-		return cia->cia_setkey(tfm, key, keylen,
-		                       &tfm->crt_flags);
+		return cia->cia_setkey(tfm, key, keylen);
 }
 
 static int ecb_encrypt(struct crypto_tfm *tfm,
diff --git a/crypto/crc32c.c b/crypto/crc32c.c
index 91ecd895e957a..0fa744392a4c3 100644
--- a/crypto/crc32c.c
+++ b/crypto/crc32c.c
@@ -44,13 +44,12 @@ static void chksum_init(struct crypto_tfm *tfm)
  * the seed.
  */
 static int chksum_setkey(struct crypto_tfm *tfm, const u8 *key,
-			 unsigned int keylen, u32 *flags)
+			 unsigned int keylen)
 {
 	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
 
 	if (keylen != sizeof(mctx->crc)) {
-		if (flags)
-			*flags = CRYPTO_TFM_RES_BAD_KEY_LEN;
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
 	mctx->key = le32_to_cpu(*(__le32 *)key);
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index a0d956b529498..24dbb5d8617e0 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -48,7 +48,7 @@ static void null_final(struct crypto_tfm *tfm, u8 *out)
 { }
 
 static int null_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen, u32 *flags)
+		       unsigned int keylen)
 { return 0; }
 
 static void null_crypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
diff --git a/crypto/des.c b/crypto/des.c
index a9d3c235a6af9..1df3a714fa47f 100644
--- a/crypto/des.c
+++ b/crypto/des.c
@@ -784,9 +784,10 @@ static void dkey(u32 *pe, const u8 *k)
 }
 
 static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
-		      unsigned int keylen, u32 *flags)
+		      unsigned int keylen)
 {
 	struct des_ctx *dctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
 	u32 tmp[DES_EXPKEY_WORDS];
 	int ret;
 
@@ -864,11 +865,12 @@ static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
  *
  */
 static int des3_ede_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen, u32 *flags)
+			   unsigned int keylen)
 {
 	const u32 *K = (const u32 *)key;
 	struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
 	u32 *expkey = dctx->expkey;
+	u32 *flags = &tfm->crt_flags;
 
 	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
 		     !((K[2] ^ K[4]) | (K[3] ^ K[5]))))
diff --git a/crypto/digest.c b/crypto/digest.c
index 603006a7bef2a..0df7f392a56a8 100644
--- a/crypto/digest.c
+++ b/crypto/digest.c
@@ -76,12 +76,16 @@ static void final(struct crypto_tfm *tfm, u8 *out)
 		tfm->__crt_alg->cra_digest.dia_final(tfm, out);
 }
 
+static int nosetkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
+{
+	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+	return -ENOSYS;
+}
+
 static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
 {
-	u32 flags;
-	if (tfm->__crt_alg->cra_digest.dia_setkey == NULL)
-		return -ENOSYS;
-	return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen, &flags);
+	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+	return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen);
 }
 
 static void digest(struct crypto_tfm *tfm,
@@ -100,12 +104,13 @@ int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags)
 int crypto_init_digest_ops(struct crypto_tfm *tfm)
 {
 	struct digest_tfm *ops = &tfm->crt_digest;
+	struct digest_alg *dalg = &tfm->__crt_alg->cra_digest;
 	
 	ops->dit_init	= init;
 	ops->dit_update	= update;
 	ops->dit_final	= final;
 	ops->dit_digest	= digest;
-	ops->dit_setkey	= setkey;
+	ops->dit_setkey	= dalg->dia_setkey ? setkey : nosetkey;
 	
 	return crypto_alloc_hmac_block(tfm);
 }
diff --git a/crypto/khazad.c b/crypto/khazad.c
index d4c9d3657b36f..9fa24a2dd6ffb 100644
--- a/crypto/khazad.c
+++ b/crypto/khazad.c
@@ -755,19 +755,13 @@ static const u64 c[KHAZAD_ROUNDS + 1] = {
 };
 
 static int khazad_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			 unsigned int key_len, u32 *flags)
+			 unsigned int key_len)
 {
 	struct khazad_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __be32 *key = (const __be32 *)in_key;
 	int r;
 	const u64 *S = T7;
 	u64 K2, K1;
-	
-	if (key_len != 16)
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
 
 	/* key is supposed to be 32-bit aligned */
 	K2 = ((u64)be32_to_cpu(key[0]) << 32) | be32_to_cpu(key[1]);
diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c
index d061da21cfda0..094397b488496 100644
--- a/crypto/michael_mic.c
+++ b/crypto/michael_mic.c
@@ -123,14 +123,13 @@ static void michael_final(struct crypto_tfm *tfm, u8 *out)
 
 
 static int michael_setkey(struct crypto_tfm *tfm, const u8 *key,
-			  unsigned int keylen, u32 *flags)
+			  unsigned int keylen)
 {
 	struct michael_mic_ctx *mctx = crypto_tfm_ctx(tfm);
 	const __le32 *data = (const __le32 *)key;
 
 	if (keylen != 8) {
-		if (flags)
-			*flags = CRYPTO_TFM_RES_BAD_KEY_LEN;
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
 
diff --git a/crypto/serpent.c b/crypto/serpent.c
index de60cdddbf4a1..465d091cd3ec3 100644
--- a/crypto/serpent.c
+++ b/crypto/serpent.c
@@ -216,7 +216,7 @@ struct serpent_ctx {
 
 
 static int serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			  unsigned int keylen, u32 *flags)
+			  unsigned int keylen)
 {
 	struct serpent_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 *k = ctx->expkey;
@@ -224,13 +224,6 @@ static int serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
 	u32 r0,r1,r2,r3,r4;
 	int i;
 
-	if ((keylen < SERPENT_MIN_KEY_SIZE)
-			|| (keylen > SERPENT_MAX_KEY_SIZE))
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
 	/* Copy key, add padding */
 
 	for (i = 0; i < keylen; ++i)
@@ -497,21 +490,15 @@ static struct crypto_alg serpent_alg = {
 };
 
 static int tnepres_setkey(struct crypto_tfm *tfm, const u8 *key,
-			  unsigned int keylen, u32 *flags)
+			  unsigned int keylen)
 {
 	u8 rev_key[SERPENT_MAX_KEY_SIZE];
 	int i;
 
-	if ((keylen < SERPENT_MIN_KEY_SIZE)
-	    || (keylen > SERPENT_MAX_KEY_SIZE)) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	} 
-
 	for (i = 0; i < keylen; ++i)
 		rev_key[keylen - i - 1] = key[i];
  
-	return serpent_setkey(tfm, rev_key, keylen, flags);
+	return serpent_setkey(tfm, rev_key, keylen);
 }
 
 static void tnepres_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index bed225e832316..6067770746717 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -118,10 +118,7 @@ static void test_hash(char *algo, struct hash_testvec *template,
 		sg_set_buf(&sg[0], hash_tv[i].plaintext, hash_tv[i].psize);
 
 		crypto_digest_init(tfm);
-		if (tfm->crt_u.digest.dit_setkey) {
-			crypto_digest_setkey(tfm, hash_tv[i].key,
-					     hash_tv[i].ksize);
-		}
+		crypto_digest_setkey(tfm, hash_tv[i].key, hash_tv[i].ksize);
 		crypto_digest_update(tfm, sg, 1);
 		crypto_digest_final(tfm, result);
 
diff --git a/crypto/tea.c b/crypto/tea.c
index 5367adc82fc9d..1c54e26fa5293 100644
--- a/crypto/tea.c
+++ b/crypto/tea.c
@@ -46,16 +46,10 @@ struct xtea_ctx {
 };
 
 static int tea_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-		      unsigned int key_len, u32 *flags)
+		      unsigned int key_len)
 {
 	struct tea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
-	
-	if (key_len != 16)
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
 
 	ctx->KEY[0] = le32_to_cpu(key[0]);
 	ctx->KEY[1] = le32_to_cpu(key[1]);
@@ -125,16 +119,10 @@ static void tea_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 }
 
 static int xtea_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	struct xtea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
-	
-	if (key_len != 16)
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
 
 	ctx->KEY[0] = le32_to_cpu(key[0]);
 	ctx->KEY[1] = le32_to_cpu(key[1]);
diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c
index 1ae0280c25135..b4b9c0c3f4ae7 100644
--- a/crypto/twofish_common.c
+++ b/crypto/twofish_common.c
@@ -580,11 +580,11 @@ static const u8 calc_sb_tbl[512] = {
    ctx->a[(j) + 1] = rol32(y, 9)
 
 /* Perform the key setup. */
-int twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-		   unsigned int key_len, u32 *flags)
+int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len)
 {
 
 	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
 
 	int i, j, k;
 
@@ -600,7 +600,7 @@ int twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
 	u8 tmp;
 
 	/* Check key length. */
-	if (key_len != 16 && key_len != 24 && key_len != 32)
+	if (key_len % 8)
 	{
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL; /* unsupported key length */
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 3a2a71108d352..3e683709243e4 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -308,15 +308,16 @@ static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm)
 }
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	struct aes_ctx *ctx = aes_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
+	u32 *flags = &tfm->crt_flags;
 	uint32_t i, t, u, v, w;
 	uint32_t P[AES_EXTENDED_KEY_SIZE];
 	uint32_t rounds;
 
-	if (key_len != 16 && key_len != 24 && key_len != 32) {
+	if (key_len % 8) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
diff --git a/include/crypto/twofish.h b/include/crypto/twofish.h
index e4328cfaaf649..c408522595c68 100644
--- a/include/crypto/twofish.h
+++ b/include/crypto/twofish.h
@@ -17,7 +17,6 @@ struct twofish_ctx {
 	u32 s[4][256], w[8], k[32];
 };
 
-int twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-		   unsigned int key_len, u32 *flags);
+int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len);
 
 #endif
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index d6e184c876b50..053bfab43e8d9 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -106,7 +106,7 @@ struct cipher_alg {
 	unsigned int cia_min_keysize;
 	unsigned int cia_max_keysize;
 	int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key,
-	                  unsigned int keylen, u32 *flags);
+	                  unsigned int keylen);
 	void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 	void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 
@@ -131,7 +131,7 @@ struct digest_alg {
 			   unsigned int len);
 	void (*dia_final)(struct crypto_tfm *tfm, u8 *out);
 	int (*dia_setkey)(struct crypto_tfm *tfm, const u8 *key,
-	                  unsigned int keylen, u32 *flags);
+	                  unsigned int keylen);
 };
 
 struct compress_alg {
@@ -397,8 +397,6 @@ static inline int crypto_digest_setkey(struct crypto_tfm *tfm,
                                        const u8 *key, unsigned int keylen)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	if (tfm->crt_digest.dit_setkey == NULL)
-		return -ENOSYS;
 	return tfm->crt_digest.dit_setkey(tfm, key, keylen);
 }
 
-- 
GitLab


From ee7564166da9e218c3f605ee78ff16599d4d5a05 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 9 Jul 2006 14:49:42 +1000
Subject: [PATCH 0300/1063] [CRYPTO] digest: Store temporary digest in tfm

When the final result location is unaligned, we store the digest in a
temporary buffer before copying it to the final location.  Currently
that buffer sits on the stack.  This patch moves it to an area in the
tfm, just like the CBC IV buffer.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/digest.c   | 16 ++++++++++------
 crypto/internal.h |  9 ++++++++-
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/crypto/digest.c b/crypto/digest.c
index 0df7f392a56a8..19e75563776bb 100644
--- a/crypto/digest.c
+++ b/crypto/digest.c
@@ -66,14 +66,18 @@ static void update(struct crypto_tfm *tfm,
 static void final(struct crypto_tfm *tfm, u8 *out)
 {
 	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
+	struct digest_alg *digest = &tfm->__crt_alg->cra_digest;
+
 	if (unlikely((unsigned long)out & alignmask)) {
-		unsigned int size = crypto_tfm_alg_digestsize(tfm);
-		u8 buffer[size + alignmask];
-		u8 *dst = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
-		tfm->__crt_alg->cra_digest.dia_final(tfm, dst);
-		memcpy(out, dst, size);
+		unsigned long align = alignmask + 1;
+		unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm);
+		u8 *dst = (u8 *)ALIGN(addr, align) +
+			  ALIGN(tfm->__crt_alg->cra_ctxsize, align);
+
+		digest->dia_final(tfm, dst);
+		memcpy(out, dst, digest->dia_digestsize);
 	} else
-		tfm->__crt_alg->cra_digest.dia_final(tfm, out);
+		digest->dia_final(tfm, out);
 }
 
 static int nosetkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
diff --git a/crypto/internal.h b/crypto/internal.h
index 03c00b0e6b60c..b110b979b9888 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -99,7 +99,14 @@ static inline void crypto_exit_proc(void)
 static inline unsigned int crypto_digest_ctxsize(struct crypto_alg *alg,
 						 int flags)
 {
-	return alg->cra_ctxsize;
+	unsigned int len = alg->cra_ctxsize;
+
+	if (alg->cra_alignmask) {
+		len = ALIGN(len, (unsigned long)alg->cra_alignmask + 1);
+		len += alg->cra_digest.dia_digestsize;
+	}
+
+	return len;
 }
 
 static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg,
-- 
GitLab


From c907ee76d8456fe1d98f40b5febfc7802a73b784 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 21 Aug 2006 22:04:03 +1000
Subject: [PATCH 0301/1063] [CRYPTO] tcrypt: Use test_hash for crc32c

Now that crc32c has been fixed to conform with standard digest semantics,
we can use test_hash for it.  I've turned the last test into a chunky
test.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 106 +---------------------------
 crypto/tcrypt.h | 179 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 180 insertions(+), 105 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 6067770746717..56d0d8b3bcf2b 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -762,108 +762,6 @@ static void test_deflate(void)
 	crypto_free_tfm(tfm);
 }
 
-static void test_crc32c(void)
-{
-#define NUMVEC 6
-#define VECSIZE 40
-
-	int i, j, pass;
-	u32 crc;
-	u8 b, test_vec[NUMVEC][VECSIZE];
-	static u32 vec_results[NUMVEC] = {
-		0x0e2c157f, 0xe980ebf6, 0xde74bded,
-		0xd579c862, 0xba979ad0, 0x2b29d913
-	};
-	static u32 tot_vec_results = 0x24c5d375;
-
-	struct scatterlist sg[NUMVEC];
-	struct crypto_tfm *tfm;
-	char *fmtdata = "testing crc32c initialized to %08x: %s\n";
-#define SEEDTESTVAL 0xedcba987
-	u32 seed;
-
-	printk("\ntesting crc32c\n");
-
-	tfm = crypto_alloc_tfm("crc32c", 0);
-	if (tfm == NULL) {
-		printk("failed to load transform for crc32c\n");
-		return;
-	}
-
-	crypto_digest_init(tfm);
-	crypto_digest_final(tfm, (u8*)&crc);
-	printk(fmtdata, crc, (crc == 0) ? "pass" : "ERROR");
-
-	/*
-	 * stuff test_vec with known values, simple incrementing
-	 * byte values.
-	 */
-	b = 0;
-	for (i = 0; i < NUMVEC; i++) {
-		for (j = 0; j < VECSIZE; j++)
-			test_vec[i][j] = ++b;
-		sg_set_buf(&sg[i], test_vec[i], VECSIZE);
-	}
-
-	seed = SEEDTESTVAL;
-	(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
-	crypto_digest_init(tfm);
-	crypto_digest_final(tfm, (u8*)&crc);
-	printk("testing crc32c setkey returns %08x : %s\n", crc, (crc == (SEEDTESTVAL ^ ~(u32)0)) ?
-	       "pass" : "ERROR");
-
-	printk("testing crc32c using update/final:\n");
-
-	pass = 1;		    /* assume all is well */
-
-	for (i = 0; i < NUMVEC; i++) {
-		seed = ~(u32)0;
-		(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
-		crypto_digest_init(tfm);
-		crypto_digest_update(tfm, &sg[i], 1);
-		crypto_digest_final(tfm, (u8*)&crc);
-		if (crc == vec_results[i]) {
-			printk(" %08x:OK", crc);
-		} else {
-			printk(" %08x:BAD, wanted %08x\n", crc, vec_results[i]);
-			pass = 0;
-		}
-	}
-
-	printk("\ntesting crc32c using incremental accumulator:\n");
-	crc = 0;
-	for (i = 0; i < NUMVEC; i++) {
-		seed = (crc ^ ~(u32)0);
-		(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
-		crypto_digest_init(tfm);
-		crypto_digest_update(tfm, &sg[i], 1);
-		crypto_digest_final(tfm, (u8*)&crc);
-	}
-	if (crc == tot_vec_results) {
-		printk(" %08x:OK", crc);
-	} else {
-		printk(" %08x:BAD, wanted %08x\n", crc, tot_vec_results);
-		pass = 0;
-	}
-
-	printk("\ntesting crc32c using digest:\n");
-	seed = ~(u32)0;
-	(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
-	crypto_digest_init(tfm);
-	crypto_digest_digest(tfm, sg, NUMVEC, (u8*)&crc);
-	if (crc == tot_vec_results) {
-		printk(" %08x:OK", crc);
-	} else {
-		printk(" %08x:BAD, wanted %08x\n", crc, tot_vec_results);
-		pass = 0;
-	}
-
-	printk("\n%s\n", pass ? "pass" : "ERROR");
-
-	crypto_free_tfm(tfm);
-	printk("crc32c test complete\n");
-}
-
 static void test_available(void)
 {
 	char **name = check;
@@ -969,7 +867,7 @@ static void do_test(void)
 		test_hash("tgr160", tgr160_tv_template, TGR160_TEST_VECTORS);
 		test_hash("tgr128", tgr128_tv_template, TGR128_TEST_VECTORS);
 		test_deflate();
-		test_crc32c();
+		test_hash("crc32c", crc32c_tv_template, CRC32C_TEST_VECTORS);
 #ifdef CONFIG_CRYPTO_HMAC
 		test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS);
 		test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS);
@@ -1065,7 +963,7 @@ static void do_test(void)
 		break;
 
 	case 18:
-		test_crc32c();
+		test_hash("crc32c", crc32c_tv_template, CRC32C_TEST_VECTORS);
 		break;
 
 	case 19:
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index 1fac5602f6331..408d5aad58644 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -28,7 +28,7 @@
 struct hash_testvec {
 	/* only used with keyed hash algorithms */
 	char key[128] __attribute__ ((__aligned__(4)));
-	char plaintext[128];
+	char plaintext[240];
 	char digest[MAX_DIGEST_SIZE];
 	unsigned char tap[MAX_TAP];
 	unsigned char psize;
@@ -2896,6 +2896,183 @@ static struct hash_testvec michael_mic_tv_template[] = {
 	}
 };
 
+/*
+ * CRC32C test vectors
+ */
+#define CRC32C_TEST_VECTORS 14
+
+static struct hash_testvec crc32c_tv_template[] = {
+	{
+		.psize = 0,
+		.digest = { 0x00, 0x00, 0x00, 0x00 }
+	},
+	{
+		.key = { 0x87, 0xa9, 0xcb, 0xed },
+		.ksize = 4,
+		.psize = 0,
+		.digest = { 0x78, 0x56, 0x34, 0x12 },
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+			       0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
+			       0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+			       0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+			       0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28 },
+		.psize = 40,
+		.digest = { 0x7f, 0x15, 0x2c, 0x0e }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+			       0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+			       0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
+			       0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+			       0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50 },
+		.psize = 40,
+		.digest = { 0xf6, 0xeb, 0x80, 0xe9 }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+			       0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60,
+			       0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+			       0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
+			       0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78 },
+		.psize = 40,
+		.digest = { 0xed, 0xbd, 0x74, 0xde }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+			       0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
+			       0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90,
+			       0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+			       0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0 },
+		.psize = 40,
+		.digest = { 0x62, 0xc8, 0x79, 0xd5 }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8,
+			       0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+			       0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8,
+			       0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,
+			       0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8 },
+		.psize = 40,
+		.digest = { 0xd0, 0x9a, 0x97, 0xba }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
+			       0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8,
+			       0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+			       0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8,
+			       0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0 },
+		.psize = 40,
+		.digest = { 0x13, 0xd9, 0x29, 0x2b }
+	},
+	{
+		.key = { 0x80, 0xea, 0xd3, 0xf1 },
+		.ksize = 4,
+		.plaintext = { 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+			       0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+			       0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
+			       0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+			       0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50 },
+		.psize = 40,
+		.digest = { 0x0c, 0xb5, 0xe2, 0xa2 }
+	},
+	{
+		.key = { 0xf3, 0x4a, 0x1d, 0x5d },
+		.ksize = 4,
+		.plaintext = { 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+			       0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60,
+			       0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+			       0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
+			       0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78 },
+		.psize = 40,
+		.digest = { 0xd1, 0x7f, 0xfb, 0xa6 }
+	},
+	{
+		.key = { 0x2e, 0x80, 0x04, 0x59 },
+		.ksize = 4,
+		.plaintext = { 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+			       0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
+			       0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90,
+			       0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+			       0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0 },
+		.psize = 40,
+		.digest = { 0x59, 0x33, 0xe6, 0x7a }
+	},
+	{
+		.key = { 0xa6, 0xcc, 0x19, 0x85 },
+		.ksize = 4,
+		.plaintext = { 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8,
+			       0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+			       0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8,
+			       0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,
+			       0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8 },
+		.psize = 40,
+		.digest = { 0xbe, 0x03, 0x01, 0xd2 }
+	},
+	{
+		.key = { 0x41, 0xfc, 0xfe, 0x2d },
+		.ksize = 4,
+		.plaintext = { 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
+			       0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8,
+			       0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+			       0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8,
+			       0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0 },
+		.psize = 40,
+		.digest = { 0x75, 0xd3, 0xc5, 0x24 }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+			       0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
+			       0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+			       0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+			       0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+			       0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+			       0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+			       0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
+			       0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+			       0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+			       0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+			       0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60,
+			       0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+			       0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
+			       0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+			       0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+			       0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
+			       0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90,
+			       0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+			       0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0,
+			       0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8,
+			       0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+			       0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8,
+			       0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,
+			       0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+			       0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
+			       0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8,
+			       0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+			       0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8,
+			       0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0 },
+		.psize = 240,
+		.digest = { 0x75, 0xd3, 0xc5, 0x24 },
+		.np = 2,
+		.tap = { 31, 209 }
+	},
+};
+
 /*
  * Cipher speed tests
  */
-- 
GitLab


From df89820ebd5bbf4f3c6b5f8ee7d9e983107f6a91 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 14 Jul 2006 10:42:27 +1000
Subject: [PATCH 0302/1063] [CRYPTO] cipher: Removed special IV checks for ECB

This patch makes IV operations on ECB fail through nocrypt_iv rather than
calling BUG().  This is needed to generalise CBC/ECB using the template
mechanism.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/cipher.c        | 2 ++
 include/linux/crypto.h | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/crypto/cipher.c b/crypto/cipher.c
index 56406a4a88d4d..aebc4a2adc805 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -399,6 +399,8 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm)
 	case CRYPTO_TFM_MODE_ECB:
 		ops->cit_encrypt = ecb_encrypt;
 		ops->cit_decrypt = ecb_decrypt;
+		ops->cit_encrypt_iv = nocrypt_iv;
+		ops->cit_decrypt_iv = nocrypt_iv;
 		break;
 		
 	case CRYPTO_TFM_MODE_CBC:
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 053bfab43e8d9..dbdfc7c793673 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -422,7 +422,6 @@ static inline int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm,
                                            unsigned int nbytes, u8 *iv)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	BUG_ON(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB);
 	return tfm->crt_cipher.cit_encrypt_iv(tfm, dst, src, nbytes, iv);
 }                                        
 
@@ -441,7 +440,6 @@ static inline int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm,
                                            unsigned int nbytes, u8 *iv)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	BUG_ON(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB);
 	return tfm->crt_cipher.cit_decrypt_iv(tfm, dst, src, nbytes, iv);
 }
 
-- 
GitLab


From 7fed0bf271b374be4c98a5880faed4b1128e78e9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 6 Aug 2006 23:10:45 +1000
Subject: [PATCH 0303/1063] [CRYPTO] api: Add common instance initialisation
 code

This patch adds the helpers crypto_get_attr_alg and crypto_alloc_instance
which can be used by simple one-argument templates like hmac to process
input parameters and allocate instances.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c         | 53 +++++++++++++++++++++++++++++++++++++++++
 include/crypto/algapi.h |  5 ++++
 2 files changed, 58 insertions(+)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 36c4f1bdb5214..c91530021e9ce 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -16,6 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/module.h>
+#include <linux/rtnetlink.h>
 #include <linux/string.h>
 
 #include "internal.h"
@@ -415,6 +416,58 @@ int crypto_unregister_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
 
+struct crypto_alg *crypto_get_attr_alg(void *param, unsigned int len,
+				       u32 type, u32 mask)
+{
+	struct rtattr *rta = param;
+	struct crypto_attr_alg *alga;
+
+	if (!RTA_OK(rta, len))
+		return ERR_PTR(-EBADR);
+	if (rta->rta_type != CRYPTOA_ALG || RTA_PAYLOAD(rta) < sizeof(*alga))
+		return ERR_PTR(-EINVAL);
+
+	alga = RTA_DATA(rta);
+	alga->name[CRYPTO_MAX_ALG_NAME - 1] = 0;
+
+	return crypto_alg_mod_lookup(alga->name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_get_attr_alg);
+
+struct crypto_instance *crypto_alloc_instance(const char *name,
+					      struct crypto_alg *alg)
+{
+	struct crypto_instance *inst;
+	struct crypto_spawn *spawn;
+	int err;
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return ERR_PTR(-ENOMEM);
+
+	err = -ENAMETOOLONG;
+	if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", name,
+		     alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
+		goto err_free_inst;
+
+	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
+		     name, alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		goto err_free_inst;
+
+	spawn = crypto_instance_ctx(inst);
+	err = crypto_init_spawn(spawn, alg, inst);
+
+	if (err)
+		goto err_free_inst;
+
+	return inst;
+
+err_free_inst:
+	kfree(inst);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_instance);
+
 static int __init crypto_algapi_init(void)
 {
 	crypto_init_proc();
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index b20f4bdb23bac..1a598f8294176 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -51,6 +51,11 @@ int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
 void crypto_drop_spawn(struct crypto_spawn *spawn);
 struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn);
 
+struct crypto_alg *crypto_get_attr_alg(void *param, unsigned int len,
+				       u32 type, u32 mask);
+struct crypto_instance *crypto_alloc_instance(const char *name,
+					      struct crypto_alg *alg);
+
 static inline void *crypto_instance_ctx(struct crypto_instance *inst)
 {
 	return inst->__ctx;
-- 
GitLab


From f3f632d61ae9af85d436706ee8e33af1a7fb9c28 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 6 Aug 2006 23:12:59 +1000
Subject: [PATCH 0304/1063] [CRYPTO] api: Added asynchronous flag

This patch adds the asynchronous flag and changes all existing users to
only look up algorithms that are synchronous.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/api.c           | 5 +++--
 include/linux/crypto.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index 7e5522cf856e0..1e4692a134744 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -355,7 +355,7 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
 	do {
 		struct crypto_alg *alg;
 
-		alg = crypto_alg_mod_lookup(name, 0, 0);
+		alg = crypto_alg_mod_lookup(name, 0, CRYPTO_ALG_ASYNC);
 		err = PTR_ERR(alg);
 		if (IS_ERR(alg))
 			continue;
@@ -394,7 +394,8 @@ void crypto_free_tfm(struct crypto_tfm *tfm)
 int crypto_alg_available(const char *name, u32 flags)
 {
 	int ret = 0;
-	struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, 0);
+	struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0,
+						       CRYPTO_ALG_ASYNC);
 	
 	if (!IS_ERR(alg)) {
 		crypto_mod_put(alg);
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index dbdfc7c793673..530dc4bf363cc 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -37,6 +37,7 @@
 #define CRYPTO_ALG_LARVAL		0x00000010
 #define CRYPTO_ALG_DEAD			0x00000020
 #define CRYPTO_ALG_DYING		0x00000040
+#define CRYPTO_ALG_ASYNC		0x00000080
 
 /*
  * Transform masks and values (for crt_flags).
-- 
GitLab


From 65b75c36f4e8422602826c75c803136e0da94122 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 21 Aug 2006 21:18:50 +1000
Subject: [PATCH 0305/1063] [CRYPTO] s390: Added missing driver name and
 priority

Accelerated versions of crypto algorithms must carry a distinct driver name
and priority in order to distinguish themselves from their generic counter-
part.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/aes_s390.c    | 2 ++
 arch/s390/crypto/crypt_s390.h  | 2 ++
 arch/s390/crypto/des_s390.c    | 6 ++++++
 arch/s390/crypto/sha1_s390.c   | 2 ++
 arch/s390/crypto/sha256_s390.c | 2 ++
 5 files changed, 14 insertions(+)

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index c7c43c9de0d90..220300e760d8a 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -222,6 +222,8 @@ static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out,
 
 static struct crypto_alg aes_alg = {
 	.cra_name		=	"aes",
+	.cra_driver_name	=	"aes-s390",
+	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index d1c259a7fe33a..d1d330797f75d 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -20,6 +20,8 @@
 #define CRYPT_S390_OP_MASK 0xFF00
 #define CRYPT_S390_FUNC_MASK 0x00FF
 
+#define CRYPT_S390_PRIORITY 300
+
 /* s930 cryptographic operations */
 enum crypt_s390_operations {
 	CRYPT_S390_KM   = 0x0100,
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 170757b3451da..3fd5d37d5e05b 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -135,6 +135,8 @@ static unsigned int des_decrypt_cbc(const struct cipher_desc *desc, u8 *out,
 
 static struct crypto_alg des_alg = {
 	.cra_name		=	"des",
+	.cra_driver_name	=	"des-s390",
+	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
@@ -271,6 +273,8 @@ static unsigned int des3_128_decrypt_cbc(const struct cipher_desc *desc,
 
 static struct crypto_alg des3_128_alg = {
 	.cra_name		=	"des3_ede128",
+	.cra_driver_name	=	"des3_ede128-s390",
+	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
@@ -411,6 +415,8 @@ static unsigned int des3_192_decrypt_cbc(const struct cipher_desc *desc,
 
 static struct crypto_alg des3_192_alg = {
 	.cra_name		=	"des3_ede",
+	.cra_driver_name	=	"des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 9d34a35b1aa56..49ca8690ee39b 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -126,6 +126,8 @@ static void sha1_final(struct crypto_tfm *tfm, u8 *out)
 
 static struct crypto_alg alg = {
 	.cra_name	=	"sha1",
+	.cra_driver_name =	"sha1-s390",
+	.cra_priority	=	CRYPT_S390_PRIORITY,
 	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
 	.cra_blocksize	=	SHA1_BLOCK_SIZE,
 	.cra_ctxsize	=	sizeof(struct crypt_s390_sha1_ctx),
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index f573df30f31d2..8e4e67503fe7f 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -127,6 +127,8 @@ static void sha256_final(struct crypto_tfm *tfm, u8 *out)
 
 static struct crypto_alg alg = {
 	.cra_name	=	"sha256",
+	.cra_driver_name =	"sha256-s390",
+	.cra_priority	=	CRYPT_S390_PRIORITY,
 	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
 	.cra_blocksize	=	SHA256_BLOCK_SIZE,
 	.cra_ctxsize	=	sizeof(struct s390_sha256_ctx),
-- 
GitLab


From 6d7d684d635ac5a345f075015f2c84169c111c6a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 30 Jul 2006 11:53:01 +1000
Subject: [PATCH 0306/1063] [CRYPTO] api: Added crypto_alloc_base

Up until now all crypto transforms have been of the same type, struct
crypto_tfm, regardless of whether they are ciphers, digests, or other
types.  As a result of that, we check the types at run-time before
each crypto operation.

This is rather cumbersome.  We could instead use different C types for
each crypto type to ensure that the correct types are used at compile
time.  That is, we would have crypto_cipher/crypto_digest instead of
just crypto_tfm.  The appropriate type would then be required for the
actual operations such as crypto_digest_digest.

Now that we have the type/mask fields when looking up algorithms, it
is easy to request for an algorithm of the precise type that the user
wants.  However, crypto_alloc_tfm currently does not expose these new
attributes.

This patch introduces the function crypto_alloc_base which will carry
these new parameters.  It will be renamed to crypto_alloc_tfm once
all existing users have been converted.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/api.c           | 60 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/crypto.h | 14 +++-------
 2 files changed, 63 insertions(+), 11 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index 1e4692a134744..bc4b7901acdf4 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -372,6 +372,66 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
 	return tfm;
 }
 
+/*
+ *	crypto_alloc_base - Locate algorithm and allocate transform
+ *	@alg_name: Name of algorithm
+ *	@type: Type of algorithm
+ *	@mask: Mask for type comparison
+ *
+ *	crypto_alloc_base() will first attempt to locate an already loaded
+ *	algorithm.  If that fails and the kernel supports dynamically loadable
+ *	modules, it will then attempt to load a module of the same name or
+ *	alias.  If that fails it will send a query to any loaded crypto manager
+ *	to construct an algorithm on the fly.  A refcount is grabbed on the
+ *	algorithm which is then associated with the new transform.
+ *
+ *	The returned transform is of a non-determinate type.  Most people
+ *	should use one of the more specific allocation functions such as
+ *	crypto_alloc_blkcipher.
+ *
+ *	In case of error the return value is an error pointer.
+ */
+struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask)
+{
+	struct crypto_tfm *tfm;
+	int err;
+
+	for (;;) {
+		struct crypto_alg *alg;
+
+		alg = crypto_alg_mod_lookup(alg_name, type, mask);
+		err = PTR_ERR(alg);
+		tfm = ERR_PTR(err);
+		if (IS_ERR(alg))
+			goto err;
+
+		tfm = __crypto_alloc_tfm(alg, 0);
+		if (!IS_ERR(tfm))
+			break;
+
+		crypto_mod_put(alg);
+		err = PTR_ERR(tfm);
+
+err:
+		if (err != -EAGAIN)
+			break;
+		if (signal_pending(current)) {
+			err = -EINTR;
+			break;
+		}
+	};
+
+	return tfm;
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_base);
+ 
+/*
+ *	crypto_free_tfm - Free crypto transform
+ *	@tfm: Transform to free
+ *
+ *	crypto_free_tfm() frees up the transform and any associated resources,
+ *	then drops the refcount on the associated algorithm.
+ */
 void crypto_free_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *alg;
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 530dc4bf363cc..6847ab0ea30e1 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -194,8 +194,8 @@ static inline int crypto_alg_available(const char *name, u32 flags)
 
 /*
  * Transforms: user-instantiated objects which encapsulate algorithms
- * and core processing logic.  Managed via crypto_alloc_tfm() and
- * crypto_free_tfm(), as well as the various helpers below.
+ * and core processing logic.  Managed via crypto_alloc_*() and
+ * crypto_free_*(), as well as the various helpers below.
  */
 
 struct cipher_tfm {
@@ -278,16 +278,8 @@ struct crypto_attr_alg {
  * Transform user interface.
  */
  
-/*
- * crypto_alloc_tfm() will first attempt to locate an already loaded algorithm.
- * If that fails and the kernel supports dynamically loadable modules, it
- * will then attempt to load a module of the same name or alias.  A refcount
- * is grabbed on the algorithm which is then associated with the new transform.
- *
- * crypto_free_tfm() frees up the transform and any associated resources,
- * then drops the refcount on the associated algorithm.
- */
 struct crypto_tfm *crypto_alloc_tfm(const char *alg_name, u32 tfm_flags);
+struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask);
 void crypto_free_tfm(struct crypto_tfm *tfm);
 
 /*
-- 
GitLab


From 8f21cf0d2bae04ece761595036c9da8328b279aa Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 30 Jul 2006 11:53:45 +1000
Subject: [PATCH 0307/1063] [CRYPTO] api: Feed flag directly to crypto_yield

The sleeping flag used to determine whether crypto_yield can actually
yield is really a per-operation flag rather than a per-tfm flag.  This
patch changes crypto_yield to take a flag directly so that we can start
using a per-operation flag instead the tfm flag.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/cipher.c   | 2 +-
 crypto/digest.c   | 2 +-
 crypto/internal.h | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/crypto/cipher.c b/crypto/cipher.c
index aebc4a2adc805..f573c59ed9dc7 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -145,7 +145,7 @@ static int crypt(const struct cipher_desc *desc,
 		if (!nbytes)
 			break;
 
-		crypto_yield(tfm);
+		crypto_yield(tfm->crt_flags);
 	}
 
 	if (buffer)
diff --git a/crypto/digest.c b/crypto/digest.c
index 19e75563776bb..96244a528844d 100644
--- a/crypto/digest.c
+++ b/crypto/digest.c
@@ -55,7 +55,7 @@ static void update(struct crypto_tfm *tfm,
 			tfm->__crt_alg->cra_digest.dia_update(tfm, p,
 							      bytes_from_page);
 			crypto_kunmap(src, 0);
-			crypto_yield(tfm);
+			crypto_yield(tfm->crt_flags);
 			offset = 0;
 			pg++;
 			l -= bytes_from_page;
diff --git a/crypto/internal.h b/crypto/internal.h
index b110b979b9888..7dc04efb55c6c 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -67,9 +67,9 @@ static inline void crypto_kunmap(void *vaddr, int out)
 	kunmap_atomic(vaddr, crypto_kmap_type(out));
 }
 
-static inline void crypto_yield(struct crypto_tfm *tfm)
+static inline void crypto_yield(u32 flags)
 {
-	if (tfm->crt_flags & CRYPTO_TFM_REQ_MAY_SLEEP)
+	if (flags & CRYPTO_TFM_REQ_MAY_SLEEP)
 		cond_resched();
 }
 
-- 
GitLab


From e853c3cfa8cc24869ecd2526e589bcb176bc12e9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Aug 2006 00:06:54 +1000
Subject: [PATCH 0308/1063] [CRYPTO] api: Added crypto_type support

This patch adds the crypto_type structure which will be used for all new
crypto algorithm types, beginning with block ciphers.

The primary purpose of this abstraction is to allow different crypto_type
objects for crypto algorithms of the same type, in particular, there will
be a different crypto_type objects for asynchronous algorithms.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/api.c            | 32 +++++++++++++++++++++++---------
 crypto/proc.c           |  5 ++++-
 include/crypto/algapi.h |  8 ++++++++
 include/linux/crypto.h  |  3 +++
 4 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index bc4b7901acdf4..edaa843d8e83b 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -226,17 +226,18 @@ static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags)
 		
 	case CRYPTO_ALG_TYPE_COMPRESS:
 		return crypto_init_compress_flags(tfm, flags);
-	
-	default:
-		break;
 	}
 	
-	BUG();
-	return -EINVAL;
+	return 0;
 }
 
 static int crypto_init_ops(struct crypto_tfm *tfm)
 {
+	const struct crypto_type *type = tfm->__crt_alg->cra_type;
+
+	if (type)
+		return type->init(tfm);
+
 	switch (crypto_tfm_alg_type(tfm)) {
 	case CRYPTO_ALG_TYPE_CIPHER:
 		return crypto_init_cipher_ops(tfm);
@@ -257,6 +258,14 @@ static int crypto_init_ops(struct crypto_tfm *tfm)
 
 static void crypto_exit_ops(struct crypto_tfm *tfm)
 {
+	const struct crypto_type *type = tfm->__crt_alg->cra_type;
+
+	if (type) {
+		if (type->exit)
+			type->exit(tfm);
+		return;
+	}
+
 	switch (crypto_tfm_alg_type(tfm)) {
 	case CRYPTO_ALG_TYPE_CIPHER:
 		crypto_exit_cipher_ops(tfm);
@@ -278,26 +287,31 @@ static void crypto_exit_ops(struct crypto_tfm *tfm)
 
 static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags)
 {
+	const struct crypto_type *type = alg->cra_type;
 	unsigned int len;
 
+	len = alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1);
+	if (type)
+		return len + type->ctxsize(alg);
+
 	switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
 	default:
 		BUG();
 
 	case CRYPTO_ALG_TYPE_CIPHER:
-		len = crypto_cipher_ctxsize(alg, flags);
+		len += crypto_cipher_ctxsize(alg, flags);
 		break;
 		
 	case CRYPTO_ALG_TYPE_DIGEST:
-		len = crypto_digest_ctxsize(alg, flags);
+		len += crypto_digest_ctxsize(alg, flags);
 		break;
 		
 	case CRYPTO_ALG_TYPE_COMPRESS:
-		len = crypto_compress_ctxsize(alg, flags);
+		len += crypto_compress_ctxsize(alg, flags);
 		break;
 	}
 
-	return len + (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1));
+	return len;
 }
 
 void crypto_shoot_alg(struct crypto_alg *alg)
diff --git a/crypto/proc.c b/crypto/proc.c
index 9e573b17e8879..dabce0676f63a 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -78,7 +78,10 @@ static int c_show(struct seq_file *m, void *p)
 		seq_printf(m, "type         : compression\n");
 		break;
 	default:
-		seq_printf(m, "type         : unknown\n");
+		if (alg->cra_type && alg->cra_type->show)
+			alg->cra_type->show(m, alg);
+		else
+			seq_printf(m, "type         : unknown\n");
 		break;
 	}
 
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 1a598f8294176..c533c0a291af4 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -15,6 +15,14 @@
 #include <linux/crypto.h>
 
 struct module;
+struct seq_file;
+
+struct crypto_type {
+	unsigned int (*ctxsize)(struct crypto_alg *alg);
+	int (*init)(struct crypto_tfm *tfm);
+	void (*exit)(struct crypto_tfm *tfm);
+	void (*show)(struct seq_file *m, struct crypto_alg *alg);
+};
 
 struct crypto_instance {
 	struct crypto_alg alg;
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 6847ab0ea30e1..8e9c407b00d26 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -90,6 +90,7 @@
 
 struct scatterlist;
 struct crypto_tfm;
+struct crypto_type;
 
 struct cipher_desc {
 	struct crypto_tfm *tfm;
@@ -161,6 +162,8 @@ struct crypto_alg {
 	char cra_name[CRYPTO_MAX_ALG_NAME];
 	char cra_driver_name[CRYPTO_MAX_ALG_NAME];
 
+	const struct crypto_type *cra_type;
+
 	union {
 		struct cipher_alg cipher;
 		struct digest_alg digest;
-- 
GitLab


From f28776a369b12f9a03a822a8e1090ed670a41f4f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2006 20:58:18 +1000
Subject: [PATCH 0309/1063] [CRYPTO] cipher: Added encrypt_one/decrypt_one

This patch adds two new operations for the simple cipher that encrypts or
decrypts a single block at a time.  This will be the main interface after
the existing block operations have moved over to the new block ciphers.

It also adds the crypto_cipher type which is currently only used on the
new operations but will be extended to setkey as well once existing users
have been converted to use block ciphers where applicable.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/cipher.c         | 48 +++++++++++++++++++++
 include/crypto/algapi.h |  5 +++
 include/linux/crypto.h  | 96 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 149 insertions(+)

diff --git a/crypto/cipher.c b/crypto/cipher.c
index f573c59ed9dc7..d8ca0ec8d0be9 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -388,12 +388,60 @@ int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags)
 	return 0;
 }
 
+static void cipher_crypt_unaligned(void (*fn)(struct crypto_tfm *, u8 *,
+					      const u8 *),
+				   struct crypto_tfm *tfm,
+				   u8 *dst, const u8 *src)
+{
+	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
+	unsigned int size = crypto_tfm_alg_blocksize(tfm);
+	u8 buffer[size + alignmask];
+	u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
+
+	memcpy(tmp, src, size);
+	fn(tfm, tmp, tmp);
+	memcpy(dst, tmp, size);
+}
+
+static void cipher_encrypt_unaligned(struct crypto_tfm *tfm,
+				     u8 *dst, const u8 *src)
+{
+	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+
+	if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) {
+		cipher_crypt_unaligned(cipher->cia_encrypt, tfm, dst, src);
+		return;
+	}
+
+	cipher->cia_encrypt(tfm, dst, src);
+}
+
+static void cipher_decrypt_unaligned(struct crypto_tfm *tfm,
+				     u8 *dst, const u8 *src)
+{
+	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+
+	if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) {
+		cipher_crypt_unaligned(cipher->cia_decrypt, tfm, dst, src);
+		return;
+	}
+
+	cipher->cia_decrypt(tfm, dst, src);
+}
+
 int crypto_init_cipher_ops(struct crypto_tfm *tfm)
 {
 	int ret = 0;
 	struct cipher_tfm *ops = &tfm->crt_cipher;
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
 
 	ops->cit_setkey = setkey;
+	ops->cit_encrypt_one = crypto_tfm_alg_alignmask(tfm) ?
+		cipher_encrypt_unaligned : cipher->cia_encrypt;
+	ops->cit_decrypt_one = crypto_tfm_alg_alignmask(tfm) ?
+		cipher_decrypt_unaligned : cipher->cia_decrypt;
 
 	switch (tfm->crt_cipher.cit_mode) {
 	case CRYPTO_TFM_MODE_ECB:
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index c533c0a291af4..6f9fb27b2071f 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -69,5 +69,10 @@ static inline void *crypto_instance_ctx(struct crypto_instance *inst)
 	return inst->__ctx;
 }
 
+static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm)
+{
+	return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher;
+}
+
 #endif	/* _CRYPTO_ALGAPI_H */
 
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 8e9c407b00d26..fdecee83878ca 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -224,6 +224,8 @@ struct cipher_tfm {
 			   struct scatterlist *src,
 			   unsigned int nbytes, u8 *iv);
 	void (*cit_xor_block)(u8 *dst, const u8 *src);
+	void (*cit_encrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+	void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 };
 
 struct digest_tfm {
@@ -268,6 +270,8 @@ struct crypto_tfm {
 	void *__crt_ctx[] CRYPTO_MINALIGN_ATTR;
 };
 
+#define crypto_cipher crypto_tfm
+
 enum {
 	CRYPTOA_UNSPEC,
 	CRYPTOA_ALG,
@@ -347,6 +351,21 @@ static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm)
 	return tfm->__crt_alg->cra_alignmask;
 }
 
+static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm)
+{
+	return tfm->crt_flags;
+}
+
+static inline void crypto_tfm_set_flags(struct crypto_tfm *tfm, u32 flags)
+{
+	tfm->crt_flags |= flags;
+}
+
+static inline void crypto_tfm_clear_flags(struct crypto_tfm *tfm, u32 flags)
+{
+	tfm->crt_flags &= ~flags;
+}
+
 static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
 {
 	return tfm->__crt_ctx;
@@ -361,6 +380,83 @@ static inline unsigned int crypto_tfm_ctx_alignment(void)
 /*
  * API wrappers.
  */
+static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm)
+{
+	return (struct crypto_cipher *)tfm;
+}
+
+static inline struct crypto_cipher *crypto_cipher_cast(struct crypto_tfm *tfm)
+{
+	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
+	return __crypto_cipher_cast(tfm);
+}
+
+static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name,
+							u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_CIPHER;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return __crypto_cipher_cast(crypto_alloc_base(alg_name, type, mask));
+}
+
+static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm)
+{
+	return tfm;
+}
+
+static inline void crypto_free_cipher(struct crypto_cipher *tfm)
+{
+	crypto_free_tfm(crypto_cipher_tfm(tfm));
+}
+
+static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm)
+{
+	return &crypto_cipher_tfm(tfm)->crt_cipher;
+}
+
+static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm)
+{
+	return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm));
+}
+
+static inline unsigned int crypto_cipher_alignmask(struct crypto_cipher *tfm)
+{
+	return crypto_tfm_alg_alignmask(crypto_cipher_tfm(tfm));
+}
+
+static inline u32 crypto_cipher_get_flags(struct crypto_cipher *tfm)
+{
+	return crypto_tfm_get_flags(crypto_cipher_tfm(tfm));
+}
+
+static inline void crypto_cipher_set_flags(struct crypto_cipher *tfm,
+					   u32 flags)
+{
+	crypto_tfm_set_flags(crypto_cipher_tfm(tfm), flags);
+}
+
+static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm,
+					     u32 flags)
+{
+	crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags);
+}
+
+static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm,
+					     u8 *dst, const u8 *src)
+{
+	crypto_cipher_crt(tfm)->cit_encrypt_one(crypto_cipher_tfm(tfm),
+						dst, src);
+}
+
+static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm,
+					     u8 *dst, const u8 *src)
+{
+	crypto_cipher_crt(tfm)->cit_decrypt_one(crypto_cipher_tfm(tfm),
+						dst, src);
+}
+
 static inline void crypto_digest_init(struct crypto_tfm *tfm)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-- 
GitLab


From 5c64097aa0f6dc4f27718ef47ca9a12538d62860 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 12 Aug 2006 21:56:17 +1000
Subject: [PATCH 0310/1063] [CRYPTO] scatterwalk: Prepare for block ciphers

This patch prepares the scatterwalk code for use by the new block cipher
type.

Firstly it halves the size of scatter_walk on 32-bit platforms.  This
is important as we allocate at least two of these objects on the stack
for each block cipher operation.

It also exports the symbols since the block cipher code can be built as
a module.

Finally there is a hack in scatterwalk_unmap that relies on progress
being made.  Unfortunately, for hardware crypto we can't guarantee
progress to be made since the hardware can fail.

So this also gets rid of the hack by not advancing the address returned
by scatterwalk_map.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/cipher.c         | 27 ++++++-------
 crypto/scatterwalk.c    | 89 ++++++++++++++++++-----------------------
 crypto/scatterwalk.h    | 48 +++++++++++++---------
 include/crypto/algapi.h |  5 +++
 4 files changed, 87 insertions(+), 82 deletions(-)

diff --git a/crypto/cipher.c b/crypto/cipher.c
index d8ca0ec8d0be9..3264617806735 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -45,15 +45,10 @@ static unsigned int crypt_slow(const struct cipher_desc *desc,
 	u8 buffer[bsize * 2 + alignmask];
 	u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
 	u8 *dst = src + bsize;
-	unsigned int n;
-
-	n = scatterwalk_copychunks(src, in, bsize, 0);
-	scatterwalk_advance(in, n);
 
+	scatterwalk_copychunks(src, in, bsize, 0);
 	desc->prfn(desc, dst, src, bsize);
-
-	n = scatterwalk_copychunks(dst, out, bsize, 1);
-	scatterwalk_advance(out, n);
+	scatterwalk_copychunks(dst, out, bsize, 1);
 
 	return bsize;
 }
@@ -64,12 +59,16 @@ static inline unsigned int crypt_fast(const struct cipher_desc *desc,
 				      unsigned int nbytes, u8 *tmp)
 {
 	u8 *src, *dst;
+	u8 *real_src, *real_dst;
+
+	real_src = scatterwalk_map(in, 0);
+	real_dst = scatterwalk_map(out, 1);
 
-	src = in->data;
-	dst = scatterwalk_samebuf(in, out) ? src : out->data;
+	src = real_src;
+	dst = scatterwalk_samebuf(in, out) ? src : real_dst;
 
 	if (tmp) {
-		memcpy(tmp, in->data, nbytes);
+		memcpy(tmp, src, nbytes);
 		src = tmp;
 		dst = tmp;
 	}
@@ -77,7 +76,10 @@ static inline unsigned int crypt_fast(const struct cipher_desc *desc,
 	nbytes = desc->prfn(desc, dst, src, nbytes);
 
 	if (tmp)
-		memcpy(out->data, tmp, nbytes);
+		memcpy(real_dst, tmp, nbytes);
+
+	scatterwalk_unmap(real_src, 0);
+	scatterwalk_unmap(real_dst, 1);
 
 	scatterwalk_advance(in, nbytes);
 	scatterwalk_advance(out, nbytes);
@@ -126,9 +128,6 @@ static int crypt(const struct cipher_desc *desc,
 			tmp = (u8 *)buffer;
 		}
 
-		scatterwalk_map(&walk_in, 0);
-		scatterwalk_map(&walk_out, 1);
-
 		n = scatterwalk_clamp(&walk_in, n);
 		n = scatterwalk_clamp(&walk_out, n);
 
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 2953e2cc56f08..35172d3f043b4 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -15,9 +15,11 @@
  */
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
+
 #include "internal.h"
 #include "scatterwalk.h"
 
@@ -27,88 +29,77 @@ enum km_type crypto_km_types[] = {
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
 };
+EXPORT_SYMBOL_GPL(crypto_km_types);
 
-static void memcpy_dir(void *buf, void *sgdata, size_t nbytes, int out)
+static inline void memcpy_dir(void *buf, void *sgdata, size_t nbytes, int out)
 {
-	if (out)
-		memcpy(sgdata, buf, nbytes);
-	else
-		memcpy(buf, sgdata, nbytes);
+	void *src = out ? buf : sgdata;
+	void *dst = out ? sgdata : buf;
+
+	memcpy(dst, src, nbytes);
 }
 
 void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg)
 {
-	unsigned int rest_of_page;
-
 	walk->sg = sg;
 
-	walk->page = sg->page;
-	walk->len_this_segment = sg->length;
-
 	BUG_ON(!sg->length);
 
-	rest_of_page = PAGE_CACHE_SIZE - (sg->offset & (PAGE_CACHE_SIZE - 1));
-	walk->len_this_page = min(sg->length, rest_of_page);
 	walk->offset = sg->offset;
 }
+EXPORT_SYMBOL_GPL(scatterwalk_start);
 
-void scatterwalk_map(struct scatter_walk *walk, int out)
-{
-	walk->data = crypto_kmap(walk->page, out) + walk->offset;
-}
-
-static inline void scatterwalk_unmap(struct scatter_walk *walk, int out)
+void *scatterwalk_map(struct scatter_walk *walk, int out)
 {
-	/* walk->data may be pointing the first byte of the next page;
-	   however, we know we transfered at least one byte.  So,
-	   walk->data - 1 will be a virtual address in the mapped page. */
-	crypto_kunmap(walk->data - 1, out);
+	return crypto_kmap(scatterwalk_page(walk), out) +
+	       offset_in_page(walk->offset);
 }
+EXPORT_SYMBOL_GPL(scatterwalk_map);
 
 static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
 				 unsigned int more)
 {
 	if (out)
-		flush_dcache_page(walk->page);
+		flush_dcache_page(scatterwalk_page(walk));
 
 	if (more) {
-		walk->len_this_segment -= walk->len_this_page;
-
-		if (walk->len_this_segment) {
-			walk->page++;
-			walk->len_this_page = min(walk->len_this_segment,
-						  (unsigned)PAGE_CACHE_SIZE);
-			walk->offset = 0;
-		}
-		else
+		walk->offset += PAGE_SIZE - 1;
+		walk->offset &= PAGE_MASK;
+		if (walk->offset >= walk->sg->offset + walk->sg->length)
 			scatterwalk_start(walk, sg_next(walk->sg));
 	}
 }
 
 void scatterwalk_done(struct scatter_walk *walk, int out, int more)
 {
-	scatterwalk_unmap(walk, out);
-	if (walk->len_this_page == 0 || !more)
+	if (!offset_in_page(walk->offset) || !more)
 		scatterwalk_pagedone(walk, out, more);
 }
+EXPORT_SYMBOL_GPL(scatterwalk_done);
 
-/*
- * Do not call this unless the total length of all of the fragments
- * has been verified as multiple of the block size.
- */
-int scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
-			   size_t nbytes, int out)
+void scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
+			    size_t nbytes, int out)
 {
-	while (nbytes > walk->len_this_page) {
-		memcpy_dir(buf, walk->data, walk->len_this_page, out);
-		buf += walk->len_this_page;
-		nbytes -= walk->len_this_page;
+	for (;;) {
+		unsigned int len_this_page = scatterwalk_pagelen(walk);
+		u8 *vaddr;
+
+		if (len_this_page > nbytes)
+			len_this_page = nbytes;
+
+		vaddr = scatterwalk_map(walk, out);
+		memcpy_dir(buf, vaddr, len_this_page, out);
+		scatterwalk_unmap(vaddr, out);
+
+		if (nbytes == len_this_page)
+			break;
+
+		buf += len_this_page;
+		nbytes -= len_this_page;
 
-		scatterwalk_unmap(walk, out);
 		scatterwalk_pagedone(walk, out, 1);
-		scatterwalk_map(walk, out);
 	}
 
-	memcpy_dir(buf, walk->data, nbytes, out);
-	return nbytes;
+	scatterwalk_advance(walk, nbytes);
 }
+EXPORT_SYMBOL_GPL(scatterwalk_copychunks);
diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h
index e79925c474a3a..ace595a2e1191 100644
--- a/crypto/scatterwalk.h
+++ b/crypto/scatterwalk.h
@@ -14,17 +14,11 @@
 
 #ifndef _CRYPTO_SCATTERWALK_H
 #define _CRYPTO_SCATTERWALK_H
+
 #include <linux/mm.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 
-struct scatter_walk {
-	struct scatterlist	*sg;
-	struct page		*page;
-	void			*data;
-	unsigned int		len_this_page;
-	unsigned int		len_this_segment;
-	unsigned int		offset;
-};
+#include "internal.h"
 
 /* Define sg_next is an inline routine now in case we want to change
    scatterlist to a linked list later. */
@@ -33,26 +27,31 @@ static inline struct scatterlist *sg_next(struct scatterlist *sg)
 	return sg + 1;
 }
 
-static inline int scatterwalk_samebuf(struct scatter_walk *walk_in,
-				      struct scatter_walk *walk_out)
+static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in,
+						struct scatter_walk *walk_out)
 {
-	return walk_in->page == walk_out->page &&
-	       walk_in->offset == walk_out->offset;
+	return !(((walk_in->sg->page - walk_out->sg->page) << PAGE_SHIFT) +
+		 (int)(walk_in->offset - walk_out->offset));
+}
+
+static inline unsigned int scatterwalk_pagelen(struct scatter_walk *walk)
+{
+	unsigned int len = walk->sg->offset + walk->sg->length - walk->offset;
+	unsigned int len_this_page = offset_in_page(~walk->offset) + 1;
+	return len_this_page > len ? len : len_this_page;
 }
 
 static inline unsigned int scatterwalk_clamp(struct scatter_walk *walk,
 					     unsigned int nbytes)
 {
-	return nbytes > walk->len_this_page ? walk->len_this_page : nbytes;
+	unsigned int len_this_page = scatterwalk_pagelen(walk);
+	return nbytes > len_this_page ? len_this_page : nbytes;
 }
 
 static inline void scatterwalk_advance(struct scatter_walk *walk,
 				       unsigned int nbytes)
 {
-	walk->data += nbytes;
 	walk->offset += nbytes;
-	walk->len_this_page -= nbytes;
-	walk->len_this_segment -= nbytes;
 }
 
 static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk,
@@ -61,9 +60,20 @@ static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk,
 	return !(walk->offset & alignmask);
 }
 
+static inline struct page *scatterwalk_page(struct scatter_walk *walk)
+{
+	return walk->sg->page + (walk->offset >> PAGE_SHIFT);
+}
+
+static inline void scatterwalk_unmap(void *vaddr, int out)
+{
+	crypto_kunmap(vaddr, out);
+}
+
 void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg);
-int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out);
-void scatterwalk_map(struct scatter_walk *walk, int out);
+void scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
+			    size_t nbytes, int out);
+void *scatterwalk_map(struct scatter_walk *walk, int out);
 void scatterwalk_done(struct scatter_walk *walk, int out, int more);
 
 #endif  /* _CRYPTO_SCATTERWALK_H */
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 6f9fb27b2071f..f21ae672e8a8a 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -50,6 +50,11 @@ struct crypto_spawn {
 	struct crypto_instance *inst;
 };
 
+struct scatter_walk {
+	struct scatterlist *sg;
+	unsigned int offset;
+};
+
 int crypto_register_template(struct crypto_template *tmpl);
 void crypto_unregister_template(struct crypto_template *tmpl);
 struct crypto_template *crypto_lookup_template(const char *name);
-- 
GitLab


From 5cde0af2a9825dd1edaca233bd9590566579ef21 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Aug 2006 00:07:53 +1000
Subject: [PATCH 0311/1063] [CRYPTO] cipher: Added block cipher type

This patch adds the new type of block ciphers.  Unlike current cipher
algorithms which operate on a single block at a time, block ciphers
operate on an arbitrarily long linear area of data.  As it is block-based,
it will skip any data remaining at the end which cannot form a block.

The block cipher has one major difference when compared to the existing
block cipher implementation.  The sg walking is now performed by the
algorithm rather than the cipher mid-layer.  This is needed for drivers
that directly support sg lists.  It also improves performance for all
algorithms as it reduces the total number of indirect calls by one.

In future the existing cipher algorithm will be converted to only have
a single-block interface.  This will be done after all existing users
have switched over to the new block cipher type.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig          |   4 +
 crypto/Makefile         |   2 +
 crypto/blkcipher.c      | 405 ++++++++++++++++++++++++++++++++++++++++
 include/crypto/algapi.h |  65 +++++++
 include/linux/crypto.h  | 179 ++++++++++++++++++
 5 files changed, 655 insertions(+)
 create mode 100644 crypto/blkcipher.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 4ce509dba329f..68790ad7308de 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -16,6 +16,10 @@ config CRYPTO_ALGAPI
 	help
 	  This option provides the API for cryptographic algorithms.
 
+config CRYPTO_BLKCIPHER
+	tristate
+	select CRYPTO_ALGAPI
+
 config CRYPTO_MANAGER
 	tristate "Cryptographic algorithm manager"
 	select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index b8745f3d35950..b5051951c636b 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -8,6 +8,8 @@ crypto_algapi-$(CONFIG_PROC_FS) += proc.o
 crypto_algapi-objs := algapi.o $(crypto_algapi-y)
 obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o
 
+obj-$(CONFIG_CRYPTO_BLKCIPHER) += blkcipher.o
+
 obj-$(CONFIG_CRYPTO_MANAGER) += cryptomgr.o
 obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
 obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
new file mode 100644
index 0000000000000..034c939bf91a2
--- /dev/null
+++ b/crypto/blkcipher.c
@@ -0,0 +1,405 @@
+/*
+ * Block chaining cipher operations.
+ * 
+ * Generic encrypt/decrypt wrapper for ciphers, handles operations across
+ * multiple page boundaries by using temporary blocks.  In user context,
+ * the kernel is given a chance to schedule us once per page.
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ *
+ */
+
+#include <linux/crypto.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "internal.h"
+#include "scatterwalk.h"
+
+enum {
+	BLKCIPHER_WALK_PHYS = 1 << 0,
+	BLKCIPHER_WALK_SLOW = 1 << 1,
+	BLKCIPHER_WALK_COPY = 1 << 2,
+	BLKCIPHER_WALK_DIFF = 1 << 3,
+};
+
+static int blkcipher_walk_next(struct blkcipher_desc *desc,
+			       struct blkcipher_walk *walk);
+static int blkcipher_walk_first(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk);
+
+static inline void blkcipher_map_src(struct blkcipher_walk *walk)
+{
+	walk->src.virt.addr = scatterwalk_map(&walk->in, 0);
+}
+
+static inline void blkcipher_map_dst(struct blkcipher_walk *walk)
+{
+	walk->dst.virt.addr = scatterwalk_map(&walk->out, 1);
+}
+
+static inline void blkcipher_unmap_src(struct blkcipher_walk *walk)
+{
+	scatterwalk_unmap(walk->src.virt.addr, 0);
+}
+
+static inline void blkcipher_unmap_dst(struct blkcipher_walk *walk)
+{
+	scatterwalk_unmap(walk->dst.virt.addr, 1);
+}
+
+static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len)
+{
+	if (offset_in_page(start + len) < len)
+		return (u8 *)((unsigned long)(start + len) & PAGE_MASK);
+	return start;
+}
+
+static inline unsigned int blkcipher_done_slow(struct crypto_blkcipher *tfm,
+					       struct blkcipher_walk *walk,
+					       unsigned int bsize)
+{
+	u8 *addr;
+	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
+
+	addr = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1);
+	addr = blkcipher_get_spot(addr, bsize);
+	scatterwalk_copychunks(addr, &walk->out, bsize, 1);
+	return bsize;
+}
+
+static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk,
+					       unsigned int n)
+{
+	n = walk->nbytes - n;
+
+	if (walk->flags & BLKCIPHER_WALK_COPY) {
+		blkcipher_map_dst(walk);
+		memcpy(walk->dst.virt.addr, walk->page, n);
+		blkcipher_unmap_dst(walk);
+	} else if (!(walk->flags & BLKCIPHER_WALK_PHYS)) {
+		blkcipher_unmap_src(walk);
+		if (walk->flags & BLKCIPHER_WALK_DIFF)
+			blkcipher_unmap_dst(walk);
+	}
+
+	scatterwalk_advance(&walk->in, n);
+	scatterwalk_advance(&walk->out, n);
+
+	return n;
+}
+
+int blkcipher_walk_done(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk, int err)
+{
+	struct crypto_blkcipher *tfm = desc->tfm;
+	unsigned int nbytes = 0;
+
+	if (likely(err >= 0)) {
+		unsigned int bsize = crypto_blkcipher_blocksize(tfm);
+		unsigned int n;
+
+		if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW)))
+			n = blkcipher_done_fast(walk, err);
+		else
+			n = blkcipher_done_slow(tfm, walk, bsize);
+
+		nbytes = walk->total - n;
+		err = 0;
+	}
+
+	scatterwalk_done(&walk->in, 0, nbytes);
+	scatterwalk_done(&walk->out, 1, nbytes);
+
+	walk->total = nbytes;
+	walk->nbytes = nbytes;
+
+	if (nbytes) {
+		crypto_yield(desc->flags);
+		return blkcipher_walk_next(desc, walk);
+	}
+
+	if (walk->iv != desc->info)
+		memcpy(desc->info, walk->iv, crypto_blkcipher_ivsize(tfm));
+	if (walk->buffer != walk->page)
+		kfree(walk->buffer);
+	if (walk->page)
+		free_page((unsigned long)walk->page);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(blkcipher_walk_done);
+
+static inline int blkcipher_next_slow(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk,
+				      unsigned int bsize,
+				      unsigned int alignmask)
+{
+	unsigned int n;
+
+	if (walk->buffer)
+		goto ok;
+
+	walk->buffer = walk->page;
+	if (walk->buffer)
+		goto ok;
+
+	n = bsize * 2 + (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
+	walk->buffer = kmalloc(n, GFP_ATOMIC);
+	if (!walk->buffer)
+		return blkcipher_walk_done(desc, walk, -ENOMEM);
+
+ok:
+	walk->dst.virt.addr = (u8 *)ALIGN((unsigned long)walk->buffer,
+					  alignmask + 1);
+	walk->dst.virt.addr = blkcipher_get_spot(walk->dst.virt.addr, bsize);
+	walk->src.virt.addr = blkcipher_get_spot(walk->dst.virt.addr + bsize,
+						 bsize);
+
+	scatterwalk_copychunks(walk->src.virt.addr, &walk->in, bsize, 0);
+
+	walk->nbytes = bsize;
+	walk->flags |= BLKCIPHER_WALK_SLOW;
+
+	return 0;
+}
+
+static inline int blkcipher_next_copy(struct blkcipher_walk *walk)
+{
+	u8 *tmp = walk->page;
+
+	blkcipher_map_src(walk);
+	memcpy(tmp, walk->src.virt.addr, walk->nbytes);
+	blkcipher_unmap_src(walk);
+
+	walk->src.virt.addr = tmp;
+	walk->dst.virt.addr = tmp;
+
+	return 0;
+}
+
+static inline int blkcipher_next_fast(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk)
+{
+	unsigned long diff;
+
+	walk->src.phys.page = scatterwalk_page(&walk->in);
+	walk->src.phys.offset = offset_in_page(walk->in.offset);
+	walk->dst.phys.page = scatterwalk_page(&walk->out);
+	walk->dst.phys.offset = offset_in_page(walk->out.offset);
+
+	if (walk->flags & BLKCIPHER_WALK_PHYS)
+		return 0;
+
+	diff = walk->src.phys.offset - walk->dst.phys.offset;
+	diff |= walk->src.virt.page - walk->dst.virt.page;
+
+	blkcipher_map_src(walk);
+	walk->dst.virt.addr = walk->src.virt.addr;
+
+	if (diff) {
+		walk->flags |= BLKCIPHER_WALK_DIFF;
+		blkcipher_map_dst(walk);
+	}
+
+	return 0;
+}
+
+static int blkcipher_walk_next(struct blkcipher_desc *desc,
+			       struct blkcipher_walk *walk)
+{
+	struct crypto_blkcipher *tfm = desc->tfm;
+	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
+	unsigned int bsize = crypto_blkcipher_blocksize(tfm);
+	unsigned int n;
+	int err;
+
+	n = walk->total;
+	if (unlikely(n < bsize)) {
+		desc->flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
+		return blkcipher_walk_done(desc, walk, -EINVAL);
+	}
+
+	walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY |
+			 BLKCIPHER_WALK_DIFF);
+	if (!scatterwalk_aligned(&walk->in, alignmask) ||
+	    !scatterwalk_aligned(&walk->out, alignmask)) {
+		walk->flags |= BLKCIPHER_WALK_COPY;
+		if (!walk->page) {
+			walk->page = (void *)__get_free_page(GFP_ATOMIC);
+			if (!walk->page)
+				n = 0;
+		}
+	}
+
+	n = scatterwalk_clamp(&walk->in, n);
+	n = scatterwalk_clamp(&walk->out, n);
+
+	if (unlikely(n < bsize)) {
+		err = blkcipher_next_slow(desc, walk, bsize, alignmask);
+		goto set_phys_lowmem;
+	}
+
+	walk->nbytes = n;
+	if (walk->flags & BLKCIPHER_WALK_COPY) {
+		err = blkcipher_next_copy(walk);
+		goto set_phys_lowmem;
+	}
+
+	return blkcipher_next_fast(desc, walk);
+
+set_phys_lowmem:
+	if (walk->flags & BLKCIPHER_WALK_PHYS) {
+		walk->src.phys.page = virt_to_page(walk->src.virt.addr);
+		walk->dst.phys.page = virt_to_page(walk->dst.virt.addr);
+		walk->src.phys.offset &= PAGE_SIZE - 1;
+		walk->dst.phys.offset &= PAGE_SIZE - 1;
+	}
+	return err;
+}
+
+static inline int blkcipher_copy_iv(struct blkcipher_walk *walk,
+				    struct crypto_blkcipher *tfm,
+				    unsigned int alignmask)
+{
+	unsigned bs = crypto_blkcipher_blocksize(tfm);
+	unsigned int ivsize = crypto_blkcipher_ivsize(tfm);
+	unsigned int size = bs * 2 + ivsize + max(bs, ivsize) - (alignmask + 1);
+	u8 *iv;
+
+	size += alignmask & ~(crypto_tfm_ctx_alignment() - 1);
+	walk->buffer = kmalloc(size, GFP_ATOMIC);
+	if (!walk->buffer)
+		return -ENOMEM;
+
+	iv = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1);
+	iv = blkcipher_get_spot(iv, bs) + bs;
+	iv = blkcipher_get_spot(iv, bs) + bs;
+	iv = blkcipher_get_spot(iv, ivsize);
+
+	walk->iv = memcpy(iv, walk->iv, ivsize);
+	return 0;
+}
+
+int blkcipher_walk_virt(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk)
+{
+	walk->flags &= ~BLKCIPHER_WALK_PHYS;
+	return blkcipher_walk_first(desc, walk);
+}
+EXPORT_SYMBOL_GPL(blkcipher_walk_virt);
+
+int blkcipher_walk_phys(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk)
+{
+	walk->flags |= BLKCIPHER_WALK_PHYS;
+	return blkcipher_walk_first(desc, walk);
+}
+EXPORT_SYMBOL_GPL(blkcipher_walk_phys);
+
+static int blkcipher_walk_first(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk)
+{
+	struct crypto_blkcipher *tfm = desc->tfm;
+	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
+
+	walk->nbytes = walk->total;
+	if (unlikely(!walk->total))
+		return 0;
+
+	walk->buffer = NULL;
+	walk->iv = desc->info;
+	if (unlikely(((unsigned long)walk->iv & alignmask))) {
+		int err = blkcipher_copy_iv(walk, tfm, alignmask);
+		if (err)
+			return err;
+	}
+
+	scatterwalk_start(&walk->in, walk->in.sg);
+	scatterwalk_start(&walk->out, walk->out.sg);
+	walk->page = NULL;
+
+	return blkcipher_walk_next(desc, walk);
+}
+
+static int setkey(struct crypto_tfm *tfm, const u8 *key,
+		  unsigned int keylen)
+{
+	struct blkcipher_alg *cipher = &tfm->__crt_alg->cra_blkcipher;
+
+	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	return cipher->setkey(tfm, key, keylen);
+}
+
+static unsigned int crypto_blkcipher_ctxsize(struct crypto_alg *alg)
+{
+	struct blkcipher_alg *cipher = &alg->cra_blkcipher;
+	unsigned int len = alg->cra_ctxsize;
+
+	if (cipher->ivsize) {
+		len = ALIGN(len, (unsigned long)alg->cra_alignmask + 1);
+		len += cipher->ivsize;
+	}
+
+	return len;
+}
+
+static int crypto_init_blkcipher_ops(struct crypto_tfm *tfm)
+{
+	struct blkcipher_tfm *crt = &tfm->crt_blkcipher;
+	struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher;
+	unsigned long align = crypto_tfm_alg_alignmask(tfm) + 1;
+	unsigned long addr;
+
+	if (alg->ivsize > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	crt->setkey = setkey;
+	crt->encrypt = alg->encrypt;
+	crt->decrypt = alg->decrypt;
+
+	addr = (unsigned long)crypto_tfm_ctx(tfm);
+	addr = ALIGN(addr, align);
+	addr += ALIGN(tfm->__crt_alg->cra_ctxsize, align);
+	crt->iv = (void *)addr;
+
+	return 0;
+}
+
+static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute_used__;
+static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	seq_printf(m, "type         : blkcipher\n");
+	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+	seq_printf(m, "min keysize  : %u\n", alg->cra_blkcipher.min_keysize);
+	seq_printf(m, "max keysize  : %u\n", alg->cra_blkcipher.max_keysize);
+	seq_printf(m, "ivsize       : %u\n", alg->cra_blkcipher.ivsize);
+}
+
+const struct crypto_type crypto_blkcipher_type = {
+	.ctxsize = crypto_blkcipher_ctxsize,
+	.init = crypto_init_blkcipher_ops,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_blkcipher_show,
+#endif
+};
+EXPORT_SYMBOL_GPL(crypto_blkcipher_type);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Generic block chaining cipher type");
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index f21ae672e8a8a..f3946baf0c078 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -55,6 +55,34 @@ struct scatter_walk {
 	unsigned int offset;
 };
 
+struct blkcipher_walk {
+	union {
+		struct {
+			struct page *page;
+			unsigned long offset;
+		} phys;
+
+		struct {
+			u8 *page;
+			u8 *addr;
+		} virt;
+	} src, dst;
+
+	struct scatter_walk in;
+	unsigned int nbytes;
+
+	struct scatter_walk out;
+	unsigned int total;
+
+	void *page;
+	u8 *buffer;
+	u8 *iv;
+
+	int flags;
+};
+
+extern const struct crypto_type crypto_blkcipher_type;
+
 int crypto_register_template(struct crypto_template *tmpl);
 void crypto_unregister_template(struct crypto_template *tmpl);
 struct crypto_template *crypto_lookup_template(const char *name);
@@ -69,15 +97,52 @@ struct crypto_alg *crypto_get_attr_alg(void *param, unsigned int len,
 struct crypto_instance *crypto_alloc_instance(const char *name,
 					      struct crypto_alg *alg);
 
+int blkcipher_walk_done(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk, int err);
+int blkcipher_walk_virt(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk);
+int blkcipher_walk_phys(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk);
+
+static inline void *crypto_tfm_ctx_aligned(struct crypto_tfm *tfm)
+{
+	unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm);
+	unsigned long align = crypto_tfm_alg_alignmask(tfm);
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+	return (void *)ALIGN(addr, align);
+}
+
 static inline void *crypto_instance_ctx(struct crypto_instance *inst)
 {
 	return inst->__ctx;
 }
 
+static inline void *crypto_blkcipher_ctx(struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_ctx(&tfm->base);
+}
+
+static inline void *crypto_blkcipher_ctx_aligned(struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_ctx_aligned(&tfm->base);
+}
+
 static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm)
 {
 	return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher;
 }
 
+static inline void blkcipher_walk_init(struct blkcipher_walk *walk,
+				       struct scatterlist *dst,
+				       struct scatterlist *src,
+				       unsigned int nbytes)
+{
+	walk->in.sg = src;
+	walk->out.sg = dst;
+	walk->total = nbytes;
+}
+
 #endif	/* _CRYPTO_ALGAPI_H */
 
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index fdecee83878ca..5a5466d518e8e 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -32,6 +32,7 @@
 #define CRYPTO_ALG_TYPE_MASK		0x0000000f
 #define CRYPTO_ALG_TYPE_CIPHER		0x00000001
 #define CRYPTO_ALG_TYPE_DIGEST		0x00000002
+#define CRYPTO_ALG_TYPE_BLKCIPHER	0x00000003
 #define CRYPTO_ALG_TYPE_COMPRESS	0x00000004
 
 #define CRYPTO_ALG_LARVAL		0x00000010
@@ -89,9 +90,16 @@
 #endif
 
 struct scatterlist;
+struct crypto_blkcipher;
 struct crypto_tfm;
 struct crypto_type;
 
+struct blkcipher_desc {
+	struct crypto_blkcipher *tfm;
+	void *info;
+	u32 flags;
+};
+
 struct cipher_desc {
 	struct crypto_tfm *tfm;
 	void (*crfn)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
@@ -104,6 +112,21 @@ struct cipher_desc {
  * Algorithms: modular crypto algorithm implementations, managed
  * via crypto_register_alg() and crypto_unregister_alg().
  */
+struct blkcipher_alg {
+	int (*setkey)(struct crypto_tfm *tfm, const u8 *key,
+	              unsigned int keylen);
+	int (*encrypt)(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes);
+	int (*decrypt)(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes);
+
+	unsigned int min_keysize;
+	unsigned int max_keysize;
+	unsigned int ivsize;
+};
+
 struct cipher_alg {
 	unsigned int cia_min_keysize;
 	unsigned int cia_max_keysize;
@@ -143,6 +166,7 @@ struct compress_alg {
 			      unsigned int slen, u8 *dst, unsigned int *dlen);
 };
 
+#define cra_blkcipher	cra_u.blkcipher
 #define cra_cipher	cra_u.cipher
 #define cra_digest	cra_u.digest
 #define cra_compress	cra_u.compress
@@ -165,6 +189,7 @@ struct crypto_alg {
 	const struct crypto_type *cra_type;
 
 	union {
+		struct blkcipher_alg blkcipher;
 		struct cipher_alg cipher;
 		struct digest_alg digest;
 		struct compress_alg compress;
@@ -201,6 +226,16 @@ static inline int crypto_alg_available(const char *name, u32 flags)
  * crypto_free_*(), as well as the various helpers below.
  */
 
+struct blkcipher_tfm {
+	void *iv;
+	int (*setkey)(struct crypto_tfm *tfm, const u8 *key,
+		      unsigned int keylen);
+	int (*encrypt)(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes);
+	int (*decrypt)(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes);
+};
+
 struct cipher_tfm {
 	void *cit_iv;
 	unsigned int cit_ivsize;
@@ -251,6 +286,7 @@ struct compress_tfm {
 	                      u8 *dst, unsigned int *dlen);
 };
 
+#define crt_blkcipher	crt_u.blkcipher
 #define crt_cipher	crt_u.cipher
 #define crt_digest	crt_u.digest
 #define crt_compress	crt_u.compress
@@ -260,6 +296,7 @@ struct crypto_tfm {
 	u32 crt_flags;
 	
 	union {
+		struct blkcipher_tfm blkcipher;
 		struct cipher_tfm cipher;
 		struct digest_tfm digest;
 		struct compress_tfm compress;
@@ -272,6 +309,10 @@ struct crypto_tfm {
 
 #define crypto_cipher crypto_tfm
 
+struct crypto_blkcipher {
+	struct crypto_tfm base;
+};
+
 enum {
 	CRYPTOA_UNSPEC,
 	CRYPTOA_ALG,
@@ -380,6 +421,144 @@ static inline unsigned int crypto_tfm_ctx_alignment(void)
 /*
  * API wrappers.
  */
+static inline struct crypto_blkcipher *__crypto_blkcipher_cast(
+	struct crypto_tfm *tfm)
+{
+	return (struct crypto_blkcipher *)tfm;
+}
+
+static inline struct crypto_blkcipher *crypto_blkcipher_cast(
+	struct crypto_tfm *tfm)
+{
+	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_BLKCIPHER);
+	return __crypto_blkcipher_cast(tfm);
+}
+
+static inline struct crypto_blkcipher *crypto_alloc_blkcipher(
+	const char *alg_name, u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_BLKCIPHER;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return __crypto_blkcipher_cast(crypto_alloc_base(alg_name, type, mask));
+}
+
+static inline struct crypto_tfm *crypto_blkcipher_tfm(
+	struct crypto_blkcipher *tfm)
+{
+	return &tfm->base;
+}
+
+static inline void crypto_free_blkcipher(struct crypto_blkcipher *tfm)
+{
+	crypto_free_tfm(crypto_blkcipher_tfm(tfm));
+}
+
+static inline const char *crypto_blkcipher_name(struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_alg_name(crypto_blkcipher_tfm(tfm));
+}
+
+static inline struct blkcipher_tfm *crypto_blkcipher_crt(
+	struct crypto_blkcipher *tfm)
+{
+	return &crypto_blkcipher_tfm(tfm)->crt_blkcipher;
+}
+
+static inline struct blkcipher_alg *crypto_blkcipher_alg(
+	struct crypto_blkcipher *tfm)
+{
+	return &crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher;
+}
+
+static inline unsigned int crypto_blkcipher_ivsize(struct crypto_blkcipher *tfm)
+{
+	return crypto_blkcipher_alg(tfm)->ivsize;
+}
+
+static inline unsigned int crypto_blkcipher_blocksize(
+	struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_alg_blocksize(crypto_blkcipher_tfm(tfm));
+}
+
+static inline unsigned int crypto_blkcipher_alignmask(
+	struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_alg_alignmask(crypto_blkcipher_tfm(tfm));
+}
+
+static inline u32 crypto_blkcipher_get_flags(struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_get_flags(crypto_blkcipher_tfm(tfm));
+}
+
+static inline void crypto_blkcipher_set_flags(struct crypto_blkcipher *tfm,
+					      u32 flags)
+{
+	crypto_tfm_set_flags(crypto_blkcipher_tfm(tfm), flags);
+}
+
+static inline void crypto_blkcipher_clear_flags(struct crypto_blkcipher *tfm,
+						u32 flags)
+{
+	crypto_tfm_clear_flags(crypto_blkcipher_tfm(tfm), flags);
+}
+
+static inline int crypto_blkcipher_setkey(struct crypto_blkcipher *tfm,
+					  const u8 *key, unsigned int keylen)
+{
+	return crypto_blkcipher_crt(tfm)->setkey(crypto_blkcipher_tfm(tfm),
+						 key, keylen);
+}
+
+static inline int crypto_blkcipher_encrypt(struct blkcipher_desc *desc,
+					   struct scatterlist *dst,
+					   struct scatterlist *src,
+					   unsigned int nbytes)
+{
+	desc->info = crypto_blkcipher_crt(desc->tfm)->iv;
+	return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes);
+}
+
+static inline int crypto_blkcipher_encrypt_iv(struct blkcipher_desc *desc,
+					      struct scatterlist *dst,
+					      struct scatterlist *src,
+					      unsigned int nbytes)
+{
+	return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes);
+}
+
+static inline int crypto_blkcipher_decrypt(struct blkcipher_desc *desc,
+					   struct scatterlist *dst,
+					   struct scatterlist *src,
+					   unsigned int nbytes)
+{
+	desc->info = crypto_blkcipher_crt(desc->tfm)->iv;
+	return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes);
+}
+
+static inline int crypto_blkcipher_decrypt_iv(struct blkcipher_desc *desc,
+					      struct scatterlist *dst,
+					      struct scatterlist *src,
+					      unsigned int nbytes)
+{
+	return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes);
+}
+
+static inline void crypto_blkcipher_set_iv(struct crypto_blkcipher *tfm,
+					   const u8 *src, unsigned int len)
+{
+	memcpy(crypto_blkcipher_crt(tfm)->iv, src, len);
+}
+
+static inline void crypto_blkcipher_get_iv(struct crypto_blkcipher *tfm,
+					   u8 *dst, unsigned int len)
+{
+	memcpy(dst, crypto_blkcipher_crt(tfm)->iv, len);
+}
+
 static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm)
 {
 	return (struct crypto_cipher *)tfm;
-- 
GitLab


From db131ef9084110d9e82549c0a627e157e8bb99d7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 21 Sep 2006 11:44:08 +1000
Subject: [PATCH 0312/1063] [CRYPTO] cipher: Added block ciphers for CBC/ECB

This patch adds two block cipher algorithms, CBC and ECB.  These
are implemented as templates on top of existing single-block cipher
algorithms.  They invoke the single-block cipher through the new
encrypt_one/decrypt_one interface.

This also optimises the in-place encryption and decryption to remove
the cost of an IV copy each round.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig          |  17 ++
 crypto/Makefile         |   2 +
 crypto/cbc.c            | 344 ++++++++++++++++++++++++++++++++++++++++
 crypto/ecb.c            | 181 +++++++++++++++++++++
 crypto/internal.h       |   1 -
 include/crypto/algapi.h |   2 +
 6 files changed, 546 insertions(+), 1 deletion(-)
 create mode 100644 crypto/cbc.c
 create mode 100644 crypto/ecb.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 68790ad7308de..90d467c99c2cd 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -123,6 +123,23 @@ config CRYPTO_TGR192
 	  See also:
 	  <http://www.cs.technion.ac.il/~biham/Reports/Tiger/>.
 
+config CRYPTO_ECB
+	tristate "ECB support"
+	select CRYPTO_BLKCIPHER
+	default m
+	help
+	  ECB: Electronic CodeBook mode
+	  This is the simplest block cipher algorithm.  It simply encrypts
+	  the input block by block.
+
+config CRYPTO_CBC
+	tristate "CBC support"
+	select CRYPTO_BLKCIPHER
+	default m
+	help
+	  CBC: Cipher Block Chaining mode
+	  This block cipher algorithm is required for IPSec.
+
 config CRYPTO_DES
 	tristate "DES and Triple DES EDE cipher algorithms"
 	select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index b5051951c636b..5e1ff4e0b1fcc 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -20,6 +20,8 @@ obj-$(CONFIG_CRYPTO_SHA256) += sha256.o
 obj-$(CONFIG_CRYPTO_SHA512) += sha512.o
 obj-$(CONFIG_CRYPTO_WP512) += wp512.o
 obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
+obj-$(CONFIG_CRYPTO_ECB) += ecb.o
+obj-$(CONFIG_CRYPTO_CBC) += cbc.o
 obj-$(CONFIG_CRYPTO_DES) += des.o
 obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o
 obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o
diff --git a/crypto/cbc.c b/crypto/cbc.c
new file mode 100644
index 0000000000000..f5542b4db387e
--- /dev/null
+++ b/crypto/cbc.c
@@ -0,0 +1,344 @@
+/*
+ * CBC: Cipher Block Chaining mode
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/algapi.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+struct crypto_cbc_ctx {
+	struct crypto_cipher *child;
+	void (*xor)(u8 *dst, const u8 *src, unsigned int bs);
+};
+
+static int crypto_cbc_setkey(struct crypto_tfm *parent, const u8 *key,
+			     unsigned int keylen)
+{
+	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(parent);
+	struct crypto_cipher *child = ctx->child;
+	int err;
+
+	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(child, key, keylen);
+	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
+				     CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int crypto_cbc_encrypt_segment(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk,
+				      struct crypto_cipher *tfm,
+				      void (*xor)(u8 *, const u8 *,
+						  unsigned int))
+{
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
+		crypto_cipher_alg(tfm)->cia_encrypt;
+	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		xor(iv, src, bsize);
+		fn(crypto_cipher_tfm(tfm), dst, iv);
+		memcpy(iv, dst, bsize);
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	return nbytes;
+}
+
+static int crypto_cbc_encrypt_inplace(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk,
+				      struct crypto_cipher *tfm,
+				      void (*xor)(u8 *, const u8 *,
+						  unsigned int))
+{
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
+		crypto_cipher_alg(tfm)->cia_encrypt;
+	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		xor(src, iv, bsize);
+		fn(crypto_cipher_tfm(tfm), src, src);
+		iv = src;
+
+		src += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cbc_encrypt(struct blkcipher_desc *desc,
+			      struct scatterlist *dst, struct scatterlist *src,
+			      unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+	void (*xor)(u8 *, const u8 *, unsigned int bs) = ctx->xor;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		if (walk.src.virt.addr == walk.dst.virt.addr)
+			nbytes = crypto_cbc_encrypt_inplace(desc, &walk, child,
+							    xor);
+		else
+			nbytes = crypto_cbc_encrypt_segment(desc, &walk, child,
+							    xor);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static int crypto_cbc_decrypt_segment(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk,
+				      struct crypto_cipher *tfm,
+				      void (*xor)(u8 *, const u8 *,
+						  unsigned int))
+{
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
+		crypto_cipher_alg(tfm)->cia_decrypt;
+	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		fn(crypto_cipher_tfm(tfm), dst, src);
+		xor(dst, iv, bsize);
+		iv = src;
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cbc_decrypt_inplace(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk,
+				      struct crypto_cipher *tfm,
+				      void (*xor)(u8 *, const u8 *,
+						  unsigned int))
+{
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
+		crypto_cipher_alg(tfm)->cia_decrypt;
+	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned long alignmask = crypto_cipher_alignmask(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 stack[bsize + alignmask];
+	u8 *first_iv = (u8 *)ALIGN((unsigned long)stack, alignmask + 1);
+
+	memcpy(first_iv, walk->iv, bsize);
+
+	/* Start of the last block. */
+	src += nbytes - nbytes % bsize - bsize;
+	memcpy(walk->iv, src, bsize);
+
+	for (;;) {
+		fn(crypto_cipher_tfm(tfm), src, src);
+		if ((nbytes -= bsize) < bsize)
+			break;
+		xor(src, src - bsize, bsize);
+		src -= bsize;
+	}
+
+	xor(src, first_iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cbc_decrypt(struct blkcipher_desc *desc,
+			      struct scatterlist *dst, struct scatterlist *src,
+			      unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+	void (*xor)(u8 *, const u8 *, unsigned int bs) = ctx->xor;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		if (walk.src.virt.addr == walk.dst.virt.addr)
+			nbytes = crypto_cbc_decrypt_inplace(desc, &walk, child,
+							    xor);
+		else
+			nbytes = crypto_cbc_decrypt_segment(desc, &walk, child,
+							    xor);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static void xor_byte(u8 *a, const u8 *b, unsigned int bs)
+{
+	do {
+		*a++ ^= *b++;
+	} while (--bs);
+}
+
+static void xor_quad(u8 *dst, const u8 *src, unsigned int bs)
+{
+	u32 *a = (u32 *)dst;
+	u32 *b = (u32 *)src;
+
+	do {
+		*a++ ^= *b++;
+	} while ((bs -= 4));
+}
+
+static void xor_64(u8 *a, const u8 *b, unsigned int bs)
+{
+	((u32 *)a)[0] ^= ((u32 *)b)[0];
+	((u32 *)a)[1] ^= ((u32 *)b)[1];
+}
+
+static void xor_128(u8 *a, const u8 *b, unsigned int bs)
+{
+	((u32 *)a)[0] ^= ((u32 *)b)[0];
+	((u32 *)a)[1] ^= ((u32 *)b)[1];
+	((u32 *)a)[2] ^= ((u32 *)b)[2];
+	((u32 *)a)[3] ^= ((u32 *)b)[3];
+}
+
+static int crypto_cbc_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	switch (crypto_tfm_alg_blocksize(tfm)) {
+	case 8:
+		ctx->xor = xor_64;
+		break;
+
+	case 16:
+		ctx->xor = xor_128;
+		break;
+
+	default:
+		if (crypto_tfm_alg_blocksize(tfm) % 4)
+			ctx->xor = xor_byte;
+		else
+			ctx->xor = xor_quad;
+	}
+
+	tfm = crypto_spawn_tfm(spawn);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	ctx->child = crypto_cipher_cast(tfm);
+	return 0;
+}
+
+static void crypto_cbc_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+	crypto_free_cipher(ctx->child);
+}
+
+static struct crypto_instance *crypto_cbc_alloc(void *param, unsigned int len)
+{
+	struct crypto_instance *inst;
+	struct crypto_alg *alg;
+
+	alg = crypto_get_attr_alg(param, len, CRYPTO_ALG_TYPE_CIPHER,
+				  CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC);
+	if (IS_ERR(alg))
+		return ERR_PTR(PTR_ERR(alg));
+
+	inst = crypto_alloc_instance("cbc", alg);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
+	inst->alg.cra_priority = alg->cra_priority;
+	inst->alg.cra_blocksize = alg->cra_blocksize;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+	inst->alg.cra_type = &crypto_blkcipher_type;
+
+	if (!(alg->cra_blocksize % 4))
+		inst->alg.cra_alignmask |= 3;
+	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
+	inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
+	inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
+
+	inst->alg.cra_ctxsize = sizeof(struct crypto_cbc_ctx);
+
+	inst->alg.cra_init = crypto_cbc_init_tfm;
+	inst->alg.cra_exit = crypto_cbc_exit_tfm;
+
+	inst->alg.cra_blkcipher.setkey = crypto_cbc_setkey;
+	inst->alg.cra_blkcipher.encrypt = crypto_cbc_encrypt;
+	inst->alg.cra_blkcipher.decrypt = crypto_cbc_decrypt;
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return inst;
+}
+
+static void crypto_cbc_free(struct crypto_instance *inst)
+{
+	crypto_drop_spawn(crypto_instance_ctx(inst));
+	kfree(inst);
+}
+
+static struct crypto_template crypto_cbc_tmpl = {
+	.name = "cbc",
+	.alloc = crypto_cbc_alloc,
+	.free = crypto_cbc_free,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_cbc_module_init(void)
+{
+	return crypto_register_template(&crypto_cbc_tmpl);
+}
+
+static void __exit crypto_cbc_module_exit(void)
+{
+	crypto_unregister_template(&crypto_cbc_tmpl);
+}
+
+module_init(crypto_cbc_module_init);
+module_exit(crypto_cbc_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CBC block cipher algorithm");
diff --git a/crypto/ecb.c b/crypto/ecb.c
new file mode 100644
index 0000000000000..f239aa9c40178
--- /dev/null
+++ b/crypto/ecb.c
@@ -0,0 +1,181 @@
+/*
+ * ECB: Electronic CodeBook mode
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/algapi.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+struct crypto_ecb_ctx {
+	struct crypto_cipher *child;
+};
+
+static int crypto_ecb_setkey(struct crypto_tfm *parent, const u8 *key,
+			     unsigned int keylen)
+{
+	struct crypto_ecb_ctx *ctx = crypto_tfm_ctx(parent);
+	struct crypto_cipher *child = ctx->child;
+	int err;
+
+	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(child, key, keylen);
+	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
+				     CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int crypto_ecb_crypt(struct blkcipher_desc *desc,
+			    struct blkcipher_walk *walk,
+			    struct crypto_cipher *tfm,
+			    void (*fn)(struct crypto_tfm *, u8 *, const u8 *))
+{
+	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned int nbytes;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		do {
+			fn(crypto_cipher_tfm(tfm), wdst, wsrc);
+	
+			wsrc += bsize;
+			wdst += bsize;
+		} while ((nbytes -= bsize) >= bsize);
+
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return err;
+}
+
+static int crypto_ecb_encrypt(struct blkcipher_desc *desc,
+			      struct scatterlist *dst, struct scatterlist *src,
+			      unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct crypto_ecb_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return crypto_ecb_crypt(desc, &walk, child,
+				crypto_cipher_alg(child)->cia_encrypt);
+}
+
+static int crypto_ecb_decrypt(struct blkcipher_desc *desc,
+			      struct scatterlist *dst, struct scatterlist *src,
+			      unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct crypto_ecb_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return crypto_ecb_crypt(desc, &walk, child,
+				crypto_cipher_alg(child)->cia_decrypt);
+}
+
+static int crypto_ecb_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct crypto_ecb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	tfm = crypto_spawn_tfm(spawn);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	ctx->child = crypto_cipher_cast(tfm);
+	return 0;
+}
+
+static void crypto_ecb_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_ecb_ctx *ctx = crypto_tfm_ctx(tfm);
+	crypto_free_cipher(ctx->child);
+}
+
+static struct crypto_instance *crypto_ecb_alloc(void *param, unsigned int len)
+{
+	struct crypto_instance *inst;
+	struct crypto_alg *alg;
+
+	alg = crypto_get_attr_alg(param, len, CRYPTO_ALG_TYPE_CIPHER,
+				  CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC);
+	if (IS_ERR(alg))
+		return ERR_PTR(PTR_ERR(alg));
+
+	inst = crypto_alloc_instance("ecb", alg);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
+	inst->alg.cra_priority = alg->cra_priority;
+	inst->alg.cra_blocksize = alg->cra_blocksize;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+	inst->alg.cra_type = &crypto_blkcipher_type;
+
+	inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
+	inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
+
+	inst->alg.cra_ctxsize = sizeof(struct crypto_ecb_ctx);
+
+	inst->alg.cra_init = crypto_ecb_init_tfm;
+	inst->alg.cra_exit = crypto_ecb_exit_tfm;
+
+	inst->alg.cra_blkcipher.setkey = crypto_ecb_setkey;
+	inst->alg.cra_blkcipher.encrypt = crypto_ecb_encrypt;
+	inst->alg.cra_blkcipher.decrypt = crypto_ecb_decrypt;
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return inst;
+}
+
+static void crypto_ecb_free(struct crypto_instance *inst)
+{
+	crypto_drop_spawn(crypto_instance_ctx(inst));
+	kfree(inst);
+}
+
+static struct crypto_template crypto_ecb_tmpl = {
+	.name = "ecb",
+	.alloc = crypto_ecb_alloc,
+	.free = crypto_ecb_free,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_ecb_module_init(void)
+{
+	return crypto_register_template(&crypto_ecb_tmpl);
+}
+
+static void __exit crypto_ecb_module_exit(void)
+{
+	crypto_unregister_template(&crypto_ecb_tmpl);
+}
+
+module_init(crypto_ecb_module_init);
+module_exit(crypto_ecb_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ECB block cipher algorithm");
diff --git a/crypto/internal.h b/crypto/internal.h
index 7dc04efb55c6c..93d9b10ff9145 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -131,7 +131,6 @@ static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg,
 }
 
 struct crypto_alg *crypto_mod_get(struct crypto_alg *alg);
-void crypto_mod_put(struct crypto_alg *alg);
 struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask);
 struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
 
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index f3946baf0c078..444f602724db5 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -83,6 +83,8 @@ struct blkcipher_walk {
 
 extern const struct crypto_type crypto_blkcipher_type;
 
+void crypto_mod_put(struct crypto_alg *alg);
+
 int crypto_register_template(struct crypto_template *tmpl);
 void crypto_unregister_template(struct crypto_template *tmpl);
 struct crypto_template *crypto_lookup_template(const char *name);
-- 
GitLab


From 28ce728a90cce3a0c6c0ed00354299de52db94b1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 21 Aug 2006 21:38:42 +1000
Subject: [PATCH 0313/1063] [CRYPTO] padlock: Added block cipher versions of
 CBC/ECB

This patch adds block cipher algorithms for cbc(aes) and ecb(aes) for
the PadLock device.  Once all users to the old cipher type have been
converted the old cbc/ecb PadLock operations will be removed.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig       |   1 +
 drivers/crypto/padlock-aes.c | 174 +++++++++++++++++++++++++++++++++--
 drivers/crypto/padlock.h     |   1 +
 3 files changed, 169 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 86c99cd333fae..adb554153f672 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -27,6 +27,7 @@ config CRYPTO_DEV_PADLOCK
 config CRYPTO_DEV_PADLOCK_AES
 	tristate "PadLock driver for AES algorithm"
 	depends on CRYPTO_DEV_PADLOCK
+	select CRYPTO_BLKCIPHER
 	default m
 	help
 	  Use VIA PadLock for AES algorithm.
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 3e683709243e4..f53301e836d98 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -43,11 +43,11 @@
  * ---------------------------------------------------------------------------
  */
 
+#include <crypto/algapi.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/crypto.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <asm/byteorder.h>
@@ -297,9 +297,9 @@ aes_hw_extkey_available(uint8_t key_len)
 	return 0;
 }
 
-static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm)
+static inline struct aes_ctx *aes_ctx_common(void *ctx)
 {
-	unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm);
+	unsigned long addr = (unsigned long)ctx;
 	unsigned long align = PADLOCK_ALIGNMENT;
 
 	if (align <= crypto_tfm_ctx_alignment())
@@ -307,6 +307,16 @@ static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm)
 	return (struct aes_ctx *)ALIGN(addr, align);
 }
 
+static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm)
+{
+	return aes_ctx_common(crypto_tfm_ctx(tfm));
+}
+
+static inline struct aes_ctx *blk_aes_ctx(struct crypto_blkcipher *tfm)
+{
+	return aes_ctx_common(crypto_blkcipher_ctx(tfm));
+}
+
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		       unsigned int key_len)
 {
@@ -507,6 +517,141 @@ static struct crypto_alg aes_alg = {
 	}
 };
 
+static int ecb_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
+				   ctx->E, &ctx->cword.encrypt,
+				   nbytes / AES_BLOCK_SIZE);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static int ecb_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
+				   ctx->D, &ctx->cword.decrypt,
+				   nbytes / AES_BLOCK_SIZE);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static struct crypto_alg ecb_aes_alg = {
+	.cra_name		=	"ecb(aes)",
+	.cra_driver_name	=	"ecb-aes-padlock",
+	.cra_priority		=	PADLOCK_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct aes_ctx),
+	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ecb_aes_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.setkey	   		= 	aes_set_key,
+			.encrypt		=	ecb_aes_encrypt,
+			.decrypt		=	ecb_aes_decrypt,
+		}
+	}
+};
+
+static int cbc_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr,
+					    walk.dst.virt.addr, ctx->E,
+					    walk.iv, &ctx->cword.encrypt,
+					    nbytes / AES_BLOCK_SIZE);
+		memcpy(walk.iv, iv, AES_BLOCK_SIZE);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static int cbc_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr,
+				   ctx->D, walk.iv, &ctx->cword.decrypt,
+				   nbytes / AES_BLOCK_SIZE);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static struct crypto_alg cbc_aes_alg = {
+	.cra_name		=	"cbc(aes)",
+	.cra_driver_name	=	"cbc-aes-padlock",
+	.cra_priority		=	PADLOCK_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct aes_ctx),
+	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(cbc_aes_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey	   		= 	aes_set_key,
+			.encrypt		=	cbc_aes_encrypt,
+			.decrypt		=	cbc_aes_decrypt,
+		}
+	}
+};
+
 static int __init padlock_init(void)
 {
 	int ret;
@@ -522,18 +667,33 @@ static int __init padlock_init(void)
 	}
 
 	gen_tabs();
-	if ((ret = crypto_register_alg(&aes_alg))) {
-		printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n");
-		return ret;
-	}
+	if ((ret = crypto_register_alg(&aes_alg)))
+		goto aes_err;
+
+	if ((ret = crypto_register_alg(&ecb_aes_alg)))
+		goto ecb_aes_err;
+
+	if ((ret = crypto_register_alg(&cbc_aes_alg)))
+		goto cbc_aes_err;
 
 	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
 
+out:
 	return ret;
+
+cbc_aes_err:
+	crypto_unregister_alg(&ecb_aes_alg);
+ecb_aes_err:
+	crypto_unregister_alg(&aes_alg);
+aes_err:
+	printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n");
+	goto out;
 }
 
 static void __exit padlock_fini(void)
 {
+	crypto_unregister_alg(&cbc_aes_alg);
+	crypto_unregister_alg(&ecb_aes_alg);
 	crypto_unregister_alg(&aes_alg);
 }
 
diff --git a/drivers/crypto/padlock.h b/drivers/crypto/padlock.h
index 7e3385b0904db..b728e4518bd15 100644
--- a/drivers/crypto/padlock.h
+++ b/drivers/crypto/padlock.h
@@ -18,5 +18,6 @@
 #define PFX	"padlock: "
 
 #define PADLOCK_CRA_PRIORITY	300
+#define PADLOCK_COMPOSITE_PRIORITY 400
 
 #endif	/* _CRYPTO_PADLOCK_H */
-- 
GitLab


From a9e62fadf0b02ba4a1d945d1a75652507da94319 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 21 Aug 2006 21:39:24 +1000
Subject: [PATCH 0314/1063] [CRYPTO] s390: Added block cipher versions of
 CBC/ECB

This patch adds block cipher algorithms for S390.  Once all users of the
old cipher type have been converted the existing CBC/ECB non-block cipher
operations will be removed.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/aes_s390.c   | 218 ++++++++++++++++++-
 arch/s390/crypto/crypt_s390.h |   1 +
 arch/s390/crypto/des_s390.c   | 385 +++++++++++++++++++++++++++++++++-
 crypto/Kconfig                |   2 +
 4 files changed, 592 insertions(+), 14 deletions(-)

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 220300e760d8a..8f04b4e41b557 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -16,9 +16,9 @@
  *
  */
 
+#include <crypto/algapi.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/crypto.h>
 #include "crypt_s390.h"
 
 #define AES_MIN_KEY_SIZE	16
@@ -34,6 +34,8 @@ int has_aes_256 = 0;
 struct s390_aes_ctx {
 	u8 iv[AES_BLOCK_SIZE];
 	u8 key[AES_MAX_KEY_SIZE];
+	long enc;
+	long dec;
 	int key_len;
 };
 
@@ -244,6 +246,189 @@ static struct crypto_alg aes_alg = {
 	}
 };
 
+static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	switch (key_len) {
+	case 16:
+		sctx->enc = KM_AES_128_ENCRYPT;
+		sctx->dec = KM_AES_128_DECRYPT;
+		break;
+	case 24:
+		sctx->enc = KM_AES_192_ENCRYPT;
+		sctx->dec = KM_AES_192_DECRYPT;
+		break;
+	case 32:
+		sctx->enc = KM_AES_256_ENCRYPT;
+		sctx->dec = KM_AES_256_DECRYPT;
+		break;
+	}
+
+	return aes_set_key(tfm, in_key, key_len);
+}
+
+static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
+			 struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes;
+
+	while ((nbytes = walk->nbytes)) {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_km(func, param, out, in, n);
+		BUG_ON((ret < 0) || (ret != n));
+
+		nbytes &= AES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return ret;
+}
+
+static int ecb_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk);
+}
+
+static int ecb_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk);
+}
+
+static struct crypto_alg ecb_aes_alg = {
+	.cra_name		=	"ecb(aes)",
+	.cra_driver_name	=	"ecb-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ecb_aes_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.setkey			=	ecb_aes_set_key,
+			.encrypt		=	ecb_aes_encrypt,
+			.decrypt		=	ecb_aes_decrypt,
+		}
+	}
+};
+
+static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	switch (key_len) {
+	case 16:
+		sctx->enc = KMC_AES_128_ENCRYPT;
+		sctx->dec = KMC_AES_128_DECRYPT;
+		break;
+	case 24:
+		sctx->enc = KMC_AES_192_ENCRYPT;
+		sctx->dec = KMC_AES_192_DECRYPT;
+		break;
+	case 32:
+		sctx->enc = KMC_AES_256_ENCRYPT;
+		sctx->dec = KMC_AES_256_DECRYPT;
+		break;
+	}
+
+	return aes_set_key(tfm, in_key, key_len);
+}
+
+static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
+			 struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes = walk->nbytes;
+
+	if (!nbytes)
+		goto out;
+
+	memcpy(param, walk->iv, AES_BLOCK_SIZE);
+	do {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_kmc(func, param, out, in, n);
+		BUG_ON((ret < 0) || (ret != n));
+
+		nbytes &= AES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	} while ((nbytes = walk->nbytes));
+	memcpy(walk->iv, param, AES_BLOCK_SIZE);
+
+out:
+	return ret;
+}
+
+static int cbc_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_aes_crypt(desc, sctx->enc, sctx->iv, &walk);
+}
+
+static int cbc_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_aes_crypt(desc, sctx->dec, sctx->iv, &walk);
+}
+
+static struct crypto_alg cbc_aes_alg = {
+	.cra_name		=	"cbc(aes)",
+	.cra_driver_name	=	"cbc-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(cbc_aes_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	cbc_aes_set_key,
+			.encrypt		=	cbc_aes_encrypt,
+			.decrypt		=	cbc_aes_decrypt,
+		}
+	}
+};
+
 static int __init aes_init(void)
 {
 	int ret;
@@ -259,13 +444,40 @@ static int __init aes_init(void)
 		return -ENOSYS;
 
 	ret = crypto_register_alg(&aes_alg);
-	if (ret != 0)
-		printk(KERN_INFO "crypt_s390: aes_s390 couldn't be loaded.\n");
+	if (ret != 0) {
+		printk(KERN_INFO "crypt_s390: aes-s390 couldn't be loaded.\n");
+		goto aes_err;
+	}
+
+	ret = crypto_register_alg(&ecb_aes_alg);
+	if (ret != 0) {
+		printk(KERN_INFO
+		       "crypt_s390: ecb-aes-s390 couldn't be loaded.\n");
+		goto ecb_aes_err;
+	}
+
+	ret = crypto_register_alg(&cbc_aes_alg);
+	if (ret != 0) {
+		printk(KERN_INFO
+		       "crypt_s390: cbc-aes-s390 couldn't be loaded.\n");
+		goto cbc_aes_err;
+	}
+
+out:
 	return ret;
+
+cbc_aes_err:
+	crypto_unregister_alg(&ecb_aes_alg);
+ecb_aes_err:
+	crypto_unregister_alg(&aes_alg);
+aes_err:
+	goto out;
 }
 
 static void __exit aes_fini(void)
 {
+	crypto_unregister_alg(&cbc_aes_alg);
+	crypto_unregister_alg(&ecb_aes_alg);
 	crypto_unregister_alg(&aes_alg);
 }
 
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index d1d330797f75d..efd836c2e4a6c 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -21,6 +21,7 @@
 #define CRYPT_S390_FUNC_MASK 0x00FF
 
 #define CRYPT_S390_PRIORITY 300
+#define CRYPT_S390_COMPOSITE_PRIORITY 400
 
 /* s930 cryptographic operations */
 enum crypt_s390_operations {
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 3fd5d37d5e05b..a6d2385ccb7af 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -13,9 +13,10 @@
  * (at your option) any later version.
  *
  */
+
+#include <crypto/algapi.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/crypto.h>
 
 #include "crypt_s390.h"
 #include "crypto_des.h"
@@ -157,6 +158,143 @@ static struct crypto_alg des_alg = {
 	}
 };
 
+static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
+			    void *param, struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes;
+
+	while ((nbytes = walk->nbytes)) {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_km(func, param, out, in, n);
+		BUG_ON((ret < 0) || (ret != n));
+
+		nbytes &= DES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return ret;
+}
+
+static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
+			    void *param, struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes = walk->nbytes;
+
+	if (!nbytes)
+		goto out;
+
+	memcpy(param, walk->iv, DES_BLOCK_SIZE);
+	do {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_kmc(func, param, out, in, n);
+		BUG_ON((ret < 0) || (ret != n));
+
+		nbytes &= DES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	} while ((nbytes = walk->nbytes));
+	memcpy(walk->iv, param, DES_BLOCK_SIZE);
+
+out:
+	return ret;
+}
+
+static int ecb_des_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, sctx->key, &walk);
+}
+
+static int ecb_des_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_DEA_DECRYPT, sctx->key, &walk);
+}
+
+static struct crypto_alg ecb_des_alg = {
+	.cra_name		=	"ecb(des)",
+	.cra_driver_name	=	"ecb-des-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ecb_des_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES_KEY_SIZE,
+			.max_keysize		=	DES_KEY_SIZE,
+			.setkey			=	des_setkey,
+			.encrypt		=	ecb_des_encrypt,
+			.decrypt		=	ecb_des_decrypt,
+		}
+	}
+};
+
+static int cbc_des_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, sctx->iv, &walk);
+}
+
+static int cbc_des_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, sctx->iv, &walk);
+}
+
+static struct crypto_alg cbc_des_alg = {
+	.cra_name		=	"cbc(des)",
+	.cra_driver_name	=	"cbc-des-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(cbc_des_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES_KEY_SIZE,
+			.max_keysize		=	DES_KEY_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
+			.setkey			=	des_setkey,
+			.encrypt		=	cbc_des_encrypt,
+			.decrypt		=	cbc_des_decrypt,
+		}
+	}
+};
+
 /*
  * RFC2451:
  *
@@ -295,6 +433,95 @@ static struct crypto_alg des3_128_alg = {
 	}
 };
 
+static int ecb_des3_128_encrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_TDEA_128_ENCRYPT, sctx->key, &walk);
+}
+
+static int ecb_des3_128_decrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_TDEA_128_DECRYPT, sctx->key, &walk);
+}
+
+static struct crypto_alg ecb_des3_128_alg = {
+	.cra_name		=	"ecb(des3_ede128)",
+	.cra_driver_name	=	"ecb-des3_ede128-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(
+						ecb_des3_128_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_128_KEY_SIZE,
+			.max_keysize		=	DES3_128_KEY_SIZE,
+			.setkey			=	des3_128_setkey,
+			.encrypt		=	ecb_des3_128_encrypt,
+			.decrypt		=	ecb_des3_128_decrypt,
+		}
+	}
+};
+
+static int cbc_des3_128_encrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_TDEA_128_ENCRYPT, sctx->iv, &walk);
+}
+
+static int cbc_des3_128_decrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_TDEA_128_DECRYPT, sctx->iv, &walk);
+}
+
+static struct crypto_alg cbc_des3_128_alg = {
+	.cra_name		=	"cbc(des3_ede128)",
+	.cra_driver_name	=	"cbc-des3_ede128-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(
+						cbc_des3_128_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_128_KEY_SIZE,
+			.max_keysize		=	DES3_128_KEY_SIZE,
+			.ivsize			=	DES3_128_BLOCK_SIZE,
+			.setkey			=	des3_128_setkey,
+			.encrypt		=	cbc_des3_128_encrypt,
+			.decrypt		=	cbc_des3_128_decrypt,
+		}
+	}
+};
+
 /*
  * RFC2451:
  *
@@ -437,6 +664,95 @@ static struct crypto_alg des3_192_alg = {
 	}
 };
 
+static int ecb_des3_192_encrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, sctx->key, &walk);
+}
+
+static int ecb_des3_192_decrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, sctx->key, &walk);
+}
+
+static struct crypto_alg ecb_des3_192_alg = {
+	.cra_name		=	"ecb(des3_ede)",
+	.cra_driver_name	=	"ecb-des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(
+						ecb_des3_192_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_192_KEY_SIZE,
+			.max_keysize		=	DES3_192_KEY_SIZE,
+			.setkey			=	des3_192_setkey,
+			.encrypt		=	ecb_des3_192_encrypt,
+			.decrypt		=	ecb_des3_192_decrypt,
+		}
+	}
+};
+
+static int cbc_des3_192_encrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, sctx->iv, &walk);
+}
+
+static int cbc_des3_192_decrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, sctx->iv, &walk);
+}
+
+static struct crypto_alg cbc_des3_192_alg = {
+	.cra_name		=	"cbc(des3_ede)",
+	.cra_driver_name	=	"cbc-des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(
+						cbc_des3_192_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_192_KEY_SIZE,
+			.max_keysize		=	DES3_192_KEY_SIZE,
+			.ivsize			=	DES3_192_BLOCK_SIZE,
+			.setkey			=	des3_192_setkey,
+			.encrypt		=	cbc_des3_192_encrypt,
+			.decrypt		=	cbc_des3_192_decrypt,
+		}
+	}
+};
+
 static int init(void)
 {
 	int ret = 0;
@@ -446,22 +762,69 @@ static int init(void)
 	    !crypt_s390_func_available(KM_TDEA_192_ENCRYPT))
 		return -ENOSYS;
 
-	ret |= (crypto_register_alg(&des_alg) == 0) ? 0:1;
-	ret |= (crypto_register_alg(&des3_128_alg) == 0) ? 0:2;
-	ret |= (crypto_register_alg(&des3_192_alg) == 0) ? 0:4;
-	if (ret) {
-		crypto_unregister_alg(&des3_192_alg);
-		crypto_unregister_alg(&des3_128_alg);
-		crypto_unregister_alg(&des_alg);
-		return -EEXIST;
-	}
-	return 0;
+	ret = crypto_register_alg(&des_alg);
+	if (ret)
+		goto des_err;
+	ret = crypto_register_alg(&ecb_des_alg);
+	if (ret)
+		goto ecb_des_err;
+	ret = crypto_register_alg(&cbc_des_alg);
+	if (ret)
+		goto cbc_des_err;
+
+	ret = crypto_register_alg(&des3_128_alg);
+	if (ret)
+		goto des3_128_err;
+	ret = crypto_register_alg(&ecb_des3_128_alg);
+	if (ret)
+		goto ecb_des3_128_err;
+	ret = crypto_register_alg(&cbc_des3_128_alg);
+	if (ret)
+		goto cbc_des3_128_err;
+
+	ret = crypto_register_alg(&des3_192_alg);
+	if (ret)
+		goto des3_192_err;
+	ret = crypto_register_alg(&ecb_des3_192_alg);
+	if (ret)
+		goto ecb_des3_192_err;
+	ret = crypto_register_alg(&cbc_des3_192_alg);
+	if (ret)
+		goto cbc_des3_192_err;
+
+out:
+	return ret;
+
+cbc_des3_192_err:
+	crypto_unregister_alg(&ecb_des3_192_alg);
+ecb_des3_192_err:
+	crypto_unregister_alg(&des3_192_alg);
+des3_192_err:
+	crypto_unregister_alg(&cbc_des3_128_alg);
+cbc_des3_128_err:
+	crypto_unregister_alg(&ecb_des3_128_alg);
+ecb_des3_128_err:
+	crypto_unregister_alg(&des3_128_alg);
+des3_128_err:
+	crypto_unregister_alg(&cbc_des_alg);
+cbc_des_err:
+	crypto_unregister_alg(&ecb_des_alg);
+ecb_des_err:
+	crypto_unregister_alg(&des_alg);
+des_err:
+	goto out;
 }
 
 static void __exit fini(void)
 {
+	crypto_unregister_alg(&cbc_des3_192_alg);
+	crypto_unregister_alg(&ecb_des3_192_alg);
 	crypto_unregister_alg(&des3_192_alg);
+	crypto_unregister_alg(&cbc_des3_128_alg);
+	crypto_unregister_alg(&ecb_des3_128_alg);
 	crypto_unregister_alg(&des3_128_alg);
+	crypto_unregister_alg(&cbc_des_alg);
+	crypto_unregister_alg(&ecb_des_alg);
 	crypto_unregister_alg(&des_alg);
 }
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 90d467c99c2cd..be5eb0cb7c30e 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -150,6 +150,7 @@ config CRYPTO_DES_S390
 	tristate "DES and Triple DES cipher algorithms (s390)"
 	depends on S390
 	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
 
@@ -298,6 +299,7 @@ config CRYPTO_AES_S390
 	tristate "AES cipher algorithms (s390)"
 	depends on S390
 	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
-- 
GitLab


From cba83564d112e4aec52227f68670f8dbd4d4ac89 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2006 08:26:09 +1000
Subject: [PATCH 0315/1063] [CRYPTO] tcrypt: Use block ciphers where applicable

This patch converts tcrypt to use the new block cipher type where
applicable.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 435 ++++++++++++++++++++++++++++--------------------
 1 file changed, 259 insertions(+), 176 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 56d0d8b3bcf2b..5e2278069d226 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -17,6 +17,7 @@
  *
  */
 
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
@@ -54,8 +55,6 @@
 */
 #define ENCRYPT 1
 #define DECRYPT 0
-#define MODE_ECB 1
-#define MODE_CBC 0
 
 static unsigned int IDX[8] = { IDX1, IDX2, IDX3, IDX4, IDX5, IDX6, IDX7, IDX8 };
 
@@ -250,28 +249,27 @@ static void test_hmac(char *algo, struct hmac_testvec *template,
 
 #endif	/* CONFIG_CRYPTO_HMAC */
 
-static void test_cipher(char *algo, int mode, int enc,
+static void test_cipher(char *algo, int enc,
 			struct cipher_testvec *template, unsigned int tcount)
 {
 	unsigned int ret, i, j, k, temp;
 	unsigned int tsize;
+	unsigned int iv_len;
+	unsigned int len;
 	char *q;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
 	char *key;
 	struct cipher_testvec *cipher_tv;
+	struct blkcipher_desc desc;
 	struct scatterlist sg[8];
-	const char *e, *m;
+	const char *e;
 
 	if (enc == ENCRYPT)
 	        e = "encryption";
 	else
 		e = "decryption";
-	if (mode == MODE_ECB)
-		m = "ECB";
-	else
-		m = "CBC";
 
-	printk("\ntesting %s %s %s\n", algo, m, e);
+	printk("\ntesting %s %s\n", algo, e);
 
 	tsize = sizeof (struct cipher_testvec);
 	tsize *= tcount;
@@ -285,15 +283,15 @@ static void test_cipher(char *algo, int mode, int enc,
 	memcpy(tvmem, template, tsize);
 	cipher_tv = (void *)tvmem;
 
-	if (mode)
-		tfm = crypto_alloc_tfm(algo, 0);
-	else
-		tfm = crypto_alloc_tfm(algo, CRYPTO_TFM_MODE_CBC);
+	tfm = crypto_alloc_blkcipher(algo, 0, CRYPTO_ALG_ASYNC);
 
-	if (tfm == NULL) {
-		printk("failed to load transform for %s %s\n", algo, m);
+	if (IS_ERR(tfm)) {
+		printk("failed to load transform for %s: %ld\n", algo,
+		       PTR_ERR(tfm));
 		return;
 	}
+	desc.tfm = tfm;
+	desc.flags = 0;
 
 	j = 0;
 	for (i = 0; i < tcount; i++) {
@@ -302,14 +300,17 @@ static void test_cipher(char *algo, int mode, int enc,
 			printk("test %u (%d bit key):\n",
 			j, cipher_tv[i].klen * 8);
 
-			tfm->crt_flags = 0;
+			crypto_blkcipher_clear_flags(tfm, ~0);
 			if (cipher_tv[i].wk)
-				tfm->crt_flags |= CRYPTO_TFM_REQ_WEAK_KEY;
+				crypto_blkcipher_set_flags(
+					tfm, CRYPTO_TFM_REQ_WEAK_KEY);
 			key = cipher_tv[i].key;
 
-			ret = crypto_cipher_setkey(tfm, key, cipher_tv[i].klen);
+			ret = crypto_blkcipher_setkey(tfm, key,
+						      cipher_tv[i].klen);
 			if (ret) {
-				printk("setkey() failed flags=%x\n", tfm->crt_flags);
+				printk("setkey() failed flags=%x\n",
+				       crypto_blkcipher_get_flags(tfm));
 
 				if (!cipher_tv[i].fail)
 					goto out;
@@ -318,19 +319,19 @@ static void test_cipher(char *algo, int mode, int enc,
 			sg_set_buf(&sg[0], cipher_tv[i].input,
 				   cipher_tv[i].ilen);
 
-			if (!mode) {
-				crypto_cipher_set_iv(tfm, cipher_tv[i].iv,
-					crypto_tfm_alg_ivsize(tfm));
-			}
-
-			if (enc)
-				ret = crypto_cipher_encrypt(tfm, sg, sg, cipher_tv[i].ilen);
-			else
-				ret = crypto_cipher_decrypt(tfm, sg, sg, cipher_tv[i].ilen);
+			iv_len = crypto_blkcipher_ivsize(tfm);
+			if (iv_len)
+				crypto_blkcipher_set_iv(tfm, cipher_tv[i].iv,
+							iv_len);
 
+			len = cipher_tv[i].ilen;
+			ret = enc ?
+				crypto_blkcipher_encrypt(&desc, sg, sg, len) :
+				crypto_blkcipher_decrypt(&desc, sg, sg, len);
 
 			if (ret) {
-				printk("%s () failed flags=%x\n", e, tfm->crt_flags);
+				printk("%s () failed flags=%x\n", e,
+				       desc.flags);
 				goto out;
 			}
 
@@ -343,7 +344,7 @@ static void test_cipher(char *algo, int mode, int enc,
 		}
 	}
 
-	printk("\ntesting %s %s %s across pages (chunking)\n", algo, m, e);
+	printk("\ntesting %s %s across pages (chunking)\n", algo, e);
 	memset(xbuf, 0, XBUFSIZE);
 
 	j = 0;
@@ -353,14 +354,17 @@ static void test_cipher(char *algo, int mode, int enc,
 			printk("test %u (%d bit key):\n",
 			j, cipher_tv[i].klen * 8);
 
-			tfm->crt_flags = 0;
+			crypto_blkcipher_clear_flags(tfm, ~0);
 			if (cipher_tv[i].wk)
-				tfm->crt_flags |= CRYPTO_TFM_REQ_WEAK_KEY;
+				crypto_blkcipher_set_flags(
+					tfm, CRYPTO_TFM_REQ_WEAK_KEY);
 			key = cipher_tv[i].key;
 
-			ret = crypto_cipher_setkey(tfm, key, cipher_tv[i].klen);
+			ret = crypto_blkcipher_setkey(tfm, key,
+						      cipher_tv[i].klen);
 			if (ret) {
-				printk("setkey() failed flags=%x\n", tfm->crt_flags);
+				printk("setkey() failed flags=%x\n",
+				       crypto_blkcipher_get_flags(tfm));
 
 				if (!cipher_tv[i].fail)
 					goto out;
@@ -376,18 +380,19 @@ static void test_cipher(char *algo, int mode, int enc,
 					   cipher_tv[i].tap[k]);
 			}
 
-			if (!mode) {
-				crypto_cipher_set_iv(tfm, cipher_tv[i].iv,
-						crypto_tfm_alg_ivsize(tfm));
-			}
+			iv_len = crypto_blkcipher_ivsize(tfm);
+			if (iv_len)
+				crypto_blkcipher_set_iv(tfm, cipher_tv[i].iv,
+							iv_len);
 
-			if (enc)
-				ret = crypto_cipher_encrypt(tfm, sg, sg, cipher_tv[i].ilen);
-			else
-				ret = crypto_cipher_decrypt(tfm, sg, sg, cipher_tv[i].ilen);
+			len = cipher_tv[i].ilen;
+			ret = enc ?
+				crypto_blkcipher_encrypt(&desc, sg, sg, len) :
+				crypto_blkcipher_decrypt(&desc, sg, sg, len);
 
 			if (ret) {
-				printk("%s () failed flags=%x\n", e, tfm->crt_flags);
+				printk("%s () failed flags=%x\n", e,
+				       desc.flags);
 				goto out;
 			}
 
@@ -406,10 +411,10 @@ static void test_cipher(char *algo, int mode, int enc,
 	}
 
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_blkcipher(tfm);
 }
 
-static int test_cipher_jiffies(struct crypto_tfm *tfm, int enc, char *p,
+static int test_cipher_jiffies(struct blkcipher_desc *desc, int enc, char *p,
 			       int blen, int sec)
 {
 	struct scatterlist sg[1];
@@ -422,9 +427,9 @@ static int test_cipher_jiffies(struct crypto_tfm *tfm, int enc, char *p,
 	for (start = jiffies, end = start + sec * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
 		if (enc)
-			ret = crypto_cipher_encrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_encrypt(desc, sg, sg, blen);
 		else
-			ret = crypto_cipher_decrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_decrypt(desc, sg, sg, blen);
 
 		if (ret)
 			return ret;
@@ -435,7 +440,7 @@ static int test_cipher_jiffies(struct crypto_tfm *tfm, int enc, char *p,
 	return 0;
 }
 
-static int test_cipher_cycles(struct crypto_tfm *tfm, int enc, char *p,
+static int test_cipher_cycles(struct blkcipher_desc *desc, int enc, char *p,
 			      int blen)
 {
 	struct scatterlist sg[1];
@@ -451,9 +456,9 @@ static int test_cipher_cycles(struct crypto_tfm *tfm, int enc, char *p,
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
 		if (enc)
-			ret = crypto_cipher_encrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_encrypt(desc, sg, sg, blen);
 		else
-			ret = crypto_cipher_decrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_decrypt(desc, sg, sg, blen);
 
 		if (ret)
 			goto out;
@@ -465,9 +470,9 @@ static int test_cipher_cycles(struct crypto_tfm *tfm, int enc, char *p,
 
 		start = get_cycles();
 		if (enc)
-			ret = crypto_cipher_encrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_encrypt(desc, sg, sg, blen);
 		else
-			ret = crypto_cipher_decrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_decrypt(desc, sg, sg, blen);
 		end = get_cycles();
 
 		if (ret)
@@ -487,35 +492,32 @@ static int test_cipher_cycles(struct crypto_tfm *tfm, int enc, char *p,
 	return ret;
 }
 
-static void test_cipher_speed(char *algo, int mode, int enc, unsigned int sec,
+static void test_cipher_speed(char *algo, int enc, unsigned int sec,
 			      struct cipher_testvec *template,
 			      unsigned int tcount, struct cipher_speed *speed)
 {
 	unsigned int ret, i, j, iv_len;
 	unsigned char *key, *p, iv[128];
-	struct crypto_tfm *tfm;
-	const char *e, *m;
+	struct crypto_blkcipher *tfm;
+	struct blkcipher_desc desc;
+	const char *e;
 
 	if (enc == ENCRYPT)
 	        e = "encryption";
 	else
 		e = "decryption";
-	if (mode == MODE_ECB)
-		m = "ECB";
-	else
-		m = "CBC";
 
-	printk("\ntesting speed of %s %s %s\n", algo, m, e);
+	printk("\ntesting speed of %s %s\n", algo, e);
 
-	if (mode)
-		tfm = crypto_alloc_tfm(algo, 0);
-	else
-		tfm = crypto_alloc_tfm(algo, CRYPTO_TFM_MODE_CBC);
+	tfm = crypto_alloc_blkcipher(algo, 0, CRYPTO_ALG_ASYNC);
 
-	if (tfm == NULL) {
-		printk("failed to load transform for %s %s\n", algo, m);
+	if (IS_ERR(tfm)) {
+		printk("failed to load transform for %s: %ld\n", algo,
+		       PTR_ERR(tfm));
 		return;
 	}
+	desc.tfm = tfm;
+	desc.flags = 0;
 
 	for (i = 0; speed[i].klen != 0; i++) {
 		if ((speed[i].blen + speed[i].klen) > TVMEMSIZE) {
@@ -539,32 +541,33 @@ static void test_cipher_speed(char *algo, int mode, int enc, unsigned int sec,
 		}
 		p = (unsigned char *)tvmem + speed[i].klen;
 
-		ret = crypto_cipher_setkey(tfm, key, speed[i].klen);
+		ret = crypto_blkcipher_setkey(tfm, key, speed[i].klen);
 		if (ret) {
-			printk("setkey() failed flags=%x\n", tfm->crt_flags);
+			printk("setkey() failed flags=%x\n",
+			       crypto_blkcipher_get_flags(tfm));
 			goto out;
 		}
 
-		if (!mode) {
-			iv_len = crypto_tfm_alg_ivsize(tfm);
+		iv_len = crypto_blkcipher_ivsize(tfm);
+		if (iv_len) {
 			memset(&iv, 0xff, iv_len);
-			crypto_cipher_set_iv(tfm, iv, iv_len);
+			crypto_blkcipher_set_iv(tfm, iv, iv_len);
 		}
 
 		if (sec)
-			ret = test_cipher_jiffies(tfm, enc, p, speed[i].blen,
+			ret = test_cipher_jiffies(&desc, enc, p, speed[i].blen,
 						  sec);
 		else
-			ret = test_cipher_cycles(tfm, enc, p, speed[i].blen);
+			ret = test_cipher_cycles(&desc, enc, p, speed[i].blen);
 
 		if (ret) {
-			printk("%s() failed flags=%x\n", e, tfm->crt_flags);
+			printk("%s() failed flags=%x\n", e, desc.flags);
 			break;
 		}
 	}
 
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_blkcipher(tfm);
 }
 
 static void test_digest_jiffies(struct crypto_tfm *tfm, char *p, int blen,
@@ -784,79 +787,119 @@ static void do_test(void)
 		test_hash("sha1", sha1_tv_template, SHA1_TEST_VECTORS);
 
 		//DES
-		test_cipher ("des", MODE_ECB, ENCRYPT, des_enc_tv_template, DES_ENC_TEST_VECTORS);
-		test_cipher ("des", MODE_ECB, DECRYPT, des_dec_tv_template, DES_DEC_TEST_VECTORS);
-		test_cipher ("des", MODE_CBC, ENCRYPT, des_cbc_enc_tv_template, DES_CBC_ENC_TEST_VECTORS);
-		test_cipher ("des", MODE_CBC, DECRYPT, des_cbc_dec_tv_template, DES_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(des)", ENCRYPT, des_enc_tv_template,
+			    DES_ENC_TEST_VECTORS);
+		test_cipher("ecb(des)", DECRYPT, des_dec_tv_template,
+			    DES_DEC_TEST_VECTORS);
+		test_cipher("cbc(des)", ENCRYPT, des_cbc_enc_tv_template,
+			    DES_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(des)", DECRYPT, des_cbc_dec_tv_template,
+			    DES_CBC_DEC_TEST_VECTORS);
 
 		//DES3_EDE
-		test_cipher ("des3_ede", MODE_ECB, ENCRYPT, des3_ede_enc_tv_template, DES3_EDE_ENC_TEST_VECTORS);
-		test_cipher ("des3_ede", MODE_ECB, DECRYPT, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS);
+		test_cipher("ecb(des3_ede)", ENCRYPT, des3_ede_enc_tv_template,
+			    DES3_EDE_ENC_TEST_VECTORS);
+		test_cipher("ecb(des3_ede)", DECRYPT, des3_ede_dec_tv_template,
+			    DES3_EDE_DEC_TEST_VECTORS);
 
 		test_hash("md4", md4_tv_template, MD4_TEST_VECTORS);
 
 		test_hash("sha256", sha256_tv_template, SHA256_TEST_VECTORS);
 
 		//BLOWFISH
-		test_cipher ("blowfish", MODE_ECB, ENCRYPT, bf_enc_tv_template, BF_ENC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_ECB, DECRYPT, bf_dec_tv_template, BF_DEC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_CBC, ENCRYPT, bf_cbc_enc_tv_template, BF_CBC_ENC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_CBC, DECRYPT, bf_cbc_dec_tv_template, BF_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(blowfish)", ENCRYPT, bf_enc_tv_template,
+			    BF_ENC_TEST_VECTORS);
+		test_cipher("ecb(blowfish)", DECRYPT, bf_dec_tv_template,
+			    BF_DEC_TEST_VECTORS);
+		test_cipher("cbc(blowfish)", ENCRYPT, bf_cbc_enc_tv_template,
+			    BF_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(blowfish)", DECRYPT, bf_cbc_dec_tv_template,
+			    BF_CBC_DEC_TEST_VECTORS);
 
 		//TWOFISH
-		test_cipher ("twofish", MODE_ECB, ENCRYPT, tf_enc_tv_template, TF_ENC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_ECB, DECRYPT, tf_dec_tv_template, TF_DEC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_CBC, ENCRYPT, tf_cbc_enc_tv_template, TF_CBC_ENC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_CBC, DECRYPT, tf_cbc_dec_tv_template, TF_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(twofish)", ENCRYPT, tf_enc_tv_template,
+			    TF_ENC_TEST_VECTORS);
+		test_cipher("ecb(twofish)", DECRYPT, tf_dec_tv_template,
+			    TF_DEC_TEST_VECTORS);
+		test_cipher("cbc(twofish)", ENCRYPT, tf_cbc_enc_tv_template,
+			    TF_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(twofish)", DECRYPT, tf_cbc_dec_tv_template,
+			    TF_CBC_DEC_TEST_VECTORS);
 
 		//SERPENT
-		test_cipher ("serpent", MODE_ECB, ENCRYPT, serpent_enc_tv_template, SERPENT_ENC_TEST_VECTORS);
-		test_cipher ("serpent", MODE_ECB, DECRYPT, serpent_dec_tv_template, SERPENT_DEC_TEST_VECTORS);
+		test_cipher("ecb(serpent)", ENCRYPT, serpent_enc_tv_template,
+			    SERPENT_ENC_TEST_VECTORS);
+		test_cipher("ecb(serpent)", DECRYPT, serpent_dec_tv_template,
+			    SERPENT_DEC_TEST_VECTORS);
 
 		//TNEPRES
-		test_cipher ("tnepres", MODE_ECB, ENCRYPT, tnepres_enc_tv_template, TNEPRES_ENC_TEST_VECTORS);
-		test_cipher ("tnepres", MODE_ECB, DECRYPT, tnepres_dec_tv_template, TNEPRES_DEC_TEST_VECTORS);
+		test_cipher("ecb(tnepres)", ENCRYPT, tnepres_enc_tv_template,
+			    TNEPRES_ENC_TEST_VECTORS);
+		test_cipher("ecb(tnepres)", DECRYPT, tnepres_dec_tv_template,
+			    TNEPRES_DEC_TEST_VECTORS);
 
 		//AES
-		test_cipher ("aes", MODE_ECB, ENCRYPT, aes_enc_tv_template, AES_ENC_TEST_VECTORS);
-		test_cipher ("aes", MODE_ECB, DECRYPT, aes_dec_tv_template, AES_DEC_TEST_VECTORS);
-		test_cipher ("aes", MODE_CBC, ENCRYPT, aes_cbc_enc_tv_template, AES_CBC_ENC_TEST_VECTORS);
-		test_cipher ("aes", MODE_CBC, DECRYPT, aes_cbc_dec_tv_template, AES_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(aes)", ENCRYPT, aes_enc_tv_template,
+			    AES_ENC_TEST_VECTORS);
+		test_cipher("ecb(aes)", DECRYPT, aes_dec_tv_template,
+			    AES_DEC_TEST_VECTORS);
+		test_cipher("cbc(aes)", ENCRYPT, aes_cbc_enc_tv_template,
+			    AES_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(aes)", DECRYPT, aes_cbc_dec_tv_template,
+			    AES_CBC_DEC_TEST_VECTORS);
 
 		//CAST5
-		test_cipher ("cast5", MODE_ECB, ENCRYPT, cast5_enc_tv_template, CAST5_ENC_TEST_VECTORS);
-		test_cipher ("cast5", MODE_ECB, DECRYPT, cast5_dec_tv_template, CAST5_DEC_TEST_VECTORS);
+		test_cipher("ecb(cast5)", ENCRYPT, cast5_enc_tv_template,
+			    CAST5_ENC_TEST_VECTORS);
+		test_cipher("ecb(cast5)", DECRYPT, cast5_dec_tv_template,
+			    CAST5_DEC_TEST_VECTORS);
 
 		//CAST6
-		test_cipher ("cast6", MODE_ECB, ENCRYPT, cast6_enc_tv_template, CAST6_ENC_TEST_VECTORS);
-		test_cipher ("cast6", MODE_ECB, DECRYPT, cast6_dec_tv_template, CAST6_DEC_TEST_VECTORS);
+		test_cipher("ecb(cast6)", ENCRYPT, cast6_enc_tv_template,
+			    CAST6_ENC_TEST_VECTORS);
+		test_cipher("ecb(cast6)", DECRYPT, cast6_dec_tv_template,
+			    CAST6_DEC_TEST_VECTORS);
 
 		//ARC4
-		test_cipher ("arc4", MODE_ECB, ENCRYPT, arc4_enc_tv_template, ARC4_ENC_TEST_VECTORS);
-		test_cipher ("arc4", MODE_ECB, DECRYPT, arc4_dec_tv_template, ARC4_DEC_TEST_VECTORS);
+		test_cipher("ecb(arc4)", ENCRYPT, arc4_enc_tv_template,
+			    ARC4_ENC_TEST_VECTORS);
+		test_cipher("ecb(arc4)", DECRYPT, arc4_dec_tv_template,
+			    ARC4_DEC_TEST_VECTORS);
 
 		//TEA
-		test_cipher ("tea", MODE_ECB, ENCRYPT, tea_enc_tv_template, TEA_ENC_TEST_VECTORS);
-		test_cipher ("tea", MODE_ECB, DECRYPT, tea_dec_tv_template, TEA_DEC_TEST_VECTORS);
+		test_cipher("ecb(tea)", ENCRYPT, tea_enc_tv_template,
+			    TEA_ENC_TEST_VECTORS);
+		test_cipher("ecb(tea)", DECRYPT, tea_dec_tv_template,
+			    TEA_DEC_TEST_VECTORS);
 
 
 		//XTEA
-		test_cipher ("xtea", MODE_ECB, ENCRYPT, xtea_enc_tv_template, XTEA_ENC_TEST_VECTORS);
-		test_cipher ("xtea", MODE_ECB, DECRYPT, xtea_dec_tv_template, XTEA_DEC_TEST_VECTORS);
+		test_cipher("ecb(xtea)", ENCRYPT, xtea_enc_tv_template,
+			    XTEA_ENC_TEST_VECTORS);
+		test_cipher("ecb(xtea)", DECRYPT, xtea_dec_tv_template,
+			    XTEA_DEC_TEST_VECTORS);
 
 		//KHAZAD
-		test_cipher ("khazad", MODE_ECB, ENCRYPT, khazad_enc_tv_template, KHAZAD_ENC_TEST_VECTORS);
-		test_cipher ("khazad", MODE_ECB, DECRYPT, khazad_dec_tv_template, KHAZAD_DEC_TEST_VECTORS);
+		test_cipher("ecb(khazad)", ENCRYPT, khazad_enc_tv_template,
+			    KHAZAD_ENC_TEST_VECTORS);
+		test_cipher("ecb(khazad)", DECRYPT, khazad_dec_tv_template,
+			    KHAZAD_DEC_TEST_VECTORS);
 
 		//ANUBIS
-		test_cipher ("anubis", MODE_ECB, ENCRYPT, anubis_enc_tv_template, ANUBIS_ENC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_ECB, DECRYPT, anubis_dec_tv_template, ANUBIS_DEC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_CBC, ENCRYPT, anubis_cbc_enc_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_CBC, DECRYPT, anubis_cbc_dec_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS);
+		test_cipher("ecb(anubis)", ENCRYPT, anubis_enc_tv_template,
+			    ANUBIS_ENC_TEST_VECTORS);
+		test_cipher("ecb(anubis)", DECRYPT, anubis_dec_tv_template,
+			    ANUBIS_DEC_TEST_VECTORS);
+		test_cipher("cbc(anubis)", ENCRYPT, anubis_cbc_enc_tv_template,
+			    ANUBIS_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(anubis)", DECRYPT, anubis_cbc_dec_tv_template,
+			    ANUBIS_CBC_ENC_TEST_VECTORS);
 
 		//XETA
-		test_cipher ("xeta", MODE_ECB, ENCRYPT, xeta_enc_tv_template, XETA_ENC_TEST_VECTORS);
-		test_cipher ("xeta", MODE_ECB, DECRYPT, xeta_dec_tv_template, XETA_DEC_TEST_VECTORS);
+		test_cipher("ecb(xeta)", ENCRYPT, xeta_enc_tv_template,
+			    XETA_ENC_TEST_VECTORS);
+		test_cipher("ecb(xeta)", DECRYPT, xeta_dec_tv_template,
+			    XETA_DEC_TEST_VECTORS);
 
 		test_hash("sha384", sha384_tv_template, SHA384_TEST_VECTORS);
 		test_hash("sha512", sha512_tv_template, SHA512_TEST_VECTORS);
@@ -886,15 +929,21 @@ static void do_test(void)
 		break;
 
 	case 3:
-		test_cipher ("des", MODE_ECB, ENCRYPT, des_enc_tv_template, DES_ENC_TEST_VECTORS);
-		test_cipher ("des", MODE_ECB, DECRYPT, des_dec_tv_template, DES_DEC_TEST_VECTORS);
-		test_cipher ("des", MODE_CBC, ENCRYPT, des_cbc_enc_tv_template, DES_CBC_ENC_TEST_VECTORS);
-		test_cipher ("des", MODE_CBC, DECRYPT, des_cbc_dec_tv_template, DES_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(des)", ENCRYPT, des_enc_tv_template,
+			    DES_ENC_TEST_VECTORS);
+		test_cipher("ecb(des)", DECRYPT, des_dec_tv_template,
+			    DES_DEC_TEST_VECTORS);
+		test_cipher("cbc(des)", ENCRYPT, des_cbc_enc_tv_template,
+			    DES_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(des)", DECRYPT, des_cbc_dec_tv_template,
+			    DES_CBC_DEC_TEST_VECTORS);
 		break;
 
 	case 4:
-		test_cipher ("des3_ede", MODE_ECB, ENCRYPT, des3_ede_enc_tv_template, DES3_EDE_ENC_TEST_VECTORS);
-		test_cipher ("des3_ede", MODE_ECB, DECRYPT, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS);
+		test_cipher("ecb(des3_ede)", ENCRYPT, des3_ede_enc_tv_template,
+			    DES3_EDE_ENC_TEST_VECTORS);
+		test_cipher("ecb(des3_ede)", DECRYPT, des3_ede_dec_tv_template,
+			    DES3_EDE_DEC_TEST_VECTORS);
 		break;
 
 	case 5:
@@ -906,29 +955,43 @@ static void do_test(void)
 		break;
 
 	case 7:
-		test_cipher ("blowfish", MODE_ECB, ENCRYPT, bf_enc_tv_template, BF_ENC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_ECB, DECRYPT, bf_dec_tv_template, BF_DEC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_CBC, ENCRYPT, bf_cbc_enc_tv_template, BF_CBC_ENC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_CBC, DECRYPT, bf_cbc_dec_tv_template, BF_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(blowfish)", ENCRYPT, bf_enc_tv_template,
+			    BF_ENC_TEST_VECTORS);
+		test_cipher("ecb(blowfish)", DECRYPT, bf_dec_tv_template,
+			    BF_DEC_TEST_VECTORS);
+		test_cipher("cbc(blowfish)", ENCRYPT, bf_cbc_enc_tv_template,
+			    BF_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(blowfish)", DECRYPT, bf_cbc_dec_tv_template,
+			    BF_CBC_DEC_TEST_VECTORS);
 		break;
 
 	case 8:
-		test_cipher ("twofish", MODE_ECB, ENCRYPT, tf_enc_tv_template, TF_ENC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_ECB, DECRYPT, tf_dec_tv_template, TF_DEC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_CBC, ENCRYPT, tf_cbc_enc_tv_template, TF_CBC_ENC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_CBC, DECRYPT, tf_cbc_dec_tv_template, TF_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(twofish)", ENCRYPT, tf_enc_tv_template,
+			    TF_ENC_TEST_VECTORS);
+		test_cipher("ecb(twofish)", DECRYPT, tf_dec_tv_template,
+			    TF_DEC_TEST_VECTORS);
+		test_cipher("cbc(twofish)", ENCRYPT, tf_cbc_enc_tv_template,
+			    TF_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(twofish)", DECRYPT, tf_cbc_dec_tv_template,
+			    TF_CBC_DEC_TEST_VECTORS);
 		break;
 
 	case 9:
-		test_cipher ("serpent", MODE_ECB, ENCRYPT, serpent_enc_tv_template, SERPENT_ENC_TEST_VECTORS);
-		test_cipher ("serpent", MODE_ECB, DECRYPT, serpent_dec_tv_template, SERPENT_DEC_TEST_VECTORS);
+		test_cipher("ecb(serpent)", ENCRYPT, serpent_enc_tv_template,
+			    SERPENT_ENC_TEST_VECTORS);
+		test_cipher("ecb(serpent)", DECRYPT, serpent_dec_tv_template,
+			    SERPENT_DEC_TEST_VECTORS);
 		break;
 
 	case 10:
-		test_cipher ("aes", MODE_ECB, ENCRYPT, aes_enc_tv_template, AES_ENC_TEST_VECTORS);
-		test_cipher ("aes", MODE_ECB, DECRYPT, aes_dec_tv_template, AES_DEC_TEST_VECTORS);
-		test_cipher ("aes", MODE_CBC, ENCRYPT, aes_cbc_enc_tv_template, AES_CBC_ENC_TEST_VECTORS);
-		test_cipher ("aes", MODE_CBC, DECRYPT, aes_cbc_dec_tv_template, AES_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(aes)", ENCRYPT, aes_enc_tv_template,
+			    AES_ENC_TEST_VECTORS);
+		test_cipher("ecb(aes)", DECRYPT, aes_dec_tv_template,
+			    AES_DEC_TEST_VECTORS);
+		test_cipher("cbc(aes)", ENCRYPT, aes_cbc_enc_tv_template,
+			    AES_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(aes)", DECRYPT, aes_cbc_dec_tv_template,
+			    AES_CBC_DEC_TEST_VECTORS);
 		break;
 
 	case 11:
@@ -944,18 +1007,24 @@ static void do_test(void)
 		break;
 
 	case 14:
-		test_cipher ("cast5", MODE_ECB, ENCRYPT, cast5_enc_tv_template, CAST5_ENC_TEST_VECTORS);
-		test_cipher ("cast5", MODE_ECB, DECRYPT, cast5_dec_tv_template, CAST5_DEC_TEST_VECTORS);
+		test_cipher("ecb(cast5)", ENCRYPT, cast5_enc_tv_template,
+			    CAST5_ENC_TEST_VECTORS);
+		test_cipher("ecb(cast5)", DECRYPT, cast5_dec_tv_template,
+			    CAST5_DEC_TEST_VECTORS);
 		break;
 
 	case 15:
-		test_cipher ("cast6", MODE_ECB, ENCRYPT, cast6_enc_tv_template, CAST6_ENC_TEST_VECTORS);
-		test_cipher ("cast6", MODE_ECB, DECRYPT, cast6_dec_tv_template, CAST6_DEC_TEST_VECTORS);
+		test_cipher("ecb(cast6)", ENCRYPT, cast6_enc_tv_template,
+			    CAST6_ENC_TEST_VECTORS);
+		test_cipher("ecb(cast6)", DECRYPT, cast6_dec_tv_template,
+			    CAST6_DEC_TEST_VECTORS);
 		break;
 
 	case 16:
-		test_cipher ("arc4", MODE_ECB, ENCRYPT, arc4_enc_tv_template, ARC4_ENC_TEST_VECTORS);
-		test_cipher ("arc4", MODE_ECB, DECRYPT, arc4_dec_tv_template, ARC4_DEC_TEST_VECTORS);
+		test_cipher("ecb(arc4)", ENCRYPT, arc4_enc_tv_template,
+			    ARC4_ENC_TEST_VECTORS);
+		test_cipher("ecb(arc4)", DECRYPT, arc4_dec_tv_template,
+			    ARC4_DEC_TEST_VECTORS);
 		break;
 
 	case 17:
@@ -967,18 +1036,24 @@ static void do_test(void)
 		break;
 
 	case 19:
-		test_cipher ("tea", MODE_ECB, ENCRYPT, tea_enc_tv_template, TEA_ENC_TEST_VECTORS);
-		test_cipher ("tea", MODE_ECB, DECRYPT, tea_dec_tv_template, TEA_DEC_TEST_VECTORS);
+		test_cipher("ecb(tea)", ENCRYPT, tea_enc_tv_template,
+			    TEA_ENC_TEST_VECTORS);
+		test_cipher("ecb(tea)", DECRYPT, tea_dec_tv_template,
+			    TEA_DEC_TEST_VECTORS);
 		break;
 
 	case 20:
-		test_cipher ("xtea", MODE_ECB, ENCRYPT, xtea_enc_tv_template, XTEA_ENC_TEST_VECTORS);
-		test_cipher ("xtea", MODE_ECB, DECRYPT, xtea_dec_tv_template, XTEA_DEC_TEST_VECTORS);
+		test_cipher("ecb(xtea)", ENCRYPT, xtea_enc_tv_template,
+			    XTEA_ENC_TEST_VECTORS);
+		test_cipher("ecb(xtea)", DECRYPT, xtea_dec_tv_template,
+			    XTEA_DEC_TEST_VECTORS);
 		break;
 
 	case 21:
-		test_cipher ("khazad", MODE_ECB, ENCRYPT, khazad_enc_tv_template, KHAZAD_ENC_TEST_VECTORS);
-		test_cipher ("khazad", MODE_ECB, DECRYPT, khazad_dec_tv_template, KHAZAD_DEC_TEST_VECTORS);
+		test_cipher("ecb(khazad)", ENCRYPT, khazad_enc_tv_template,
+			    KHAZAD_ENC_TEST_VECTORS);
+		test_cipher("ecb(khazad)", DECRYPT, khazad_dec_tv_template,
+			    KHAZAD_DEC_TEST_VECTORS);
 		break;
 
 	case 22:
@@ -994,15 +1069,21 @@ static void do_test(void)
 		break;
 
 	case 25:
-		test_cipher ("tnepres", MODE_ECB, ENCRYPT, tnepres_enc_tv_template, TNEPRES_ENC_TEST_VECTORS);
-		test_cipher ("tnepres", MODE_ECB, DECRYPT, tnepres_dec_tv_template, TNEPRES_DEC_TEST_VECTORS);
+		test_cipher("ecb(tnepres)", ENCRYPT, tnepres_enc_tv_template,
+			    TNEPRES_ENC_TEST_VECTORS);
+		test_cipher("ecb(tnepres)", DECRYPT, tnepres_dec_tv_template,
+			    TNEPRES_DEC_TEST_VECTORS);
 		break;
 
 	case 26:
-		test_cipher ("anubis", MODE_ECB, ENCRYPT, anubis_enc_tv_template, ANUBIS_ENC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_ECB, DECRYPT, anubis_dec_tv_template, ANUBIS_DEC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_CBC, ENCRYPT, anubis_cbc_enc_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_CBC, DECRYPT, anubis_cbc_dec_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS);
+		test_cipher("ecb(anubis)", ENCRYPT, anubis_enc_tv_template,
+			    ANUBIS_ENC_TEST_VECTORS);
+		test_cipher("ecb(anubis)", DECRYPT, anubis_dec_tv_template,
+			    ANUBIS_DEC_TEST_VECTORS);
+		test_cipher("cbc(anubis)", ENCRYPT, anubis_cbc_enc_tv_template,
+			    ANUBIS_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(anubis)", DECRYPT, anubis_cbc_dec_tv_template,
+			    ANUBIS_CBC_ENC_TEST_VECTORS);
 		break;
 
 	case 27:
@@ -1019,8 +1100,10 @@ static void do_test(void)
 		break;
 		
 	case 30:
-		test_cipher ("xeta", MODE_ECB, ENCRYPT, xeta_enc_tv_template, XETA_ENC_TEST_VECTORS);
-		test_cipher ("xeta", MODE_ECB, DECRYPT, xeta_dec_tv_template, XETA_DEC_TEST_VECTORS);
+		test_cipher("ecb(xeta)", ENCRYPT, xeta_enc_tv_template,
+			    XETA_ENC_TEST_VECTORS);
+		test_cipher("ecb(xeta)", DECRYPT, xeta_dec_tv_template,
+			    XETA_DEC_TEST_VECTORS);
 		break;
 
 #ifdef CONFIG_CRYPTO_HMAC
@@ -1039,65 +1122,65 @@ static void do_test(void)
 #endif
 
 	case 200:
-		test_cipher_speed("aes", MODE_ECB, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
 				  aes_speed_template);
-		test_cipher_speed("aes", MODE_ECB, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(aes)", DECRYPT, sec, NULL, 0,
 				  aes_speed_template);
-		test_cipher_speed("aes", MODE_CBC, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(aes)", ENCRYPT, sec, NULL, 0,
 				  aes_speed_template);
-		test_cipher_speed("aes", MODE_CBC, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(aes)", DECRYPT, sec, NULL, 0,
 				  aes_speed_template);
 		break;
 
 	case 201:
-		test_cipher_speed("des3_ede", MODE_ECB, ENCRYPT, sec,
+		test_cipher_speed("ecb(des3_ede)", ENCRYPT, sec,
 				  des3_ede_enc_tv_template,
 				  DES3_EDE_ENC_TEST_VECTORS,
 				  des3_ede_speed_template);
-		test_cipher_speed("des3_ede", MODE_ECB, DECRYPT, sec,
+		test_cipher_speed("ecb(des3_ede)", DECRYPT, sec,
 				  des3_ede_dec_tv_template,
 				  DES3_EDE_DEC_TEST_VECTORS,
 				  des3_ede_speed_template);
-		test_cipher_speed("des3_ede", MODE_CBC, ENCRYPT, sec,
+		test_cipher_speed("cbc(des3_ede)", ENCRYPT, sec,
 				  des3_ede_enc_tv_template,
 				  DES3_EDE_ENC_TEST_VECTORS,
 				  des3_ede_speed_template);
-		test_cipher_speed("des3_ede", MODE_CBC, DECRYPT, sec,
+		test_cipher_speed("cbc(des3_ede)", DECRYPT, sec,
 				  des3_ede_dec_tv_template,
 				  DES3_EDE_DEC_TEST_VECTORS,
 				  des3_ede_speed_template);
 		break;
 
 	case 202:
-		test_cipher_speed("twofish", MODE_ECB, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(twofish)", ENCRYPT, sec, NULL, 0,
 				  twofish_speed_template);
-		test_cipher_speed("twofish", MODE_ECB, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(twofish)", DECRYPT, sec, NULL, 0,
 				  twofish_speed_template);
-		test_cipher_speed("twofish", MODE_CBC, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(twofish)", ENCRYPT, sec, NULL, 0,
 				  twofish_speed_template);
-		test_cipher_speed("twofish", MODE_CBC, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(twofish)", DECRYPT, sec, NULL, 0,
 				  twofish_speed_template);
 		break;
 
 	case 203:
-		test_cipher_speed("blowfish", MODE_ECB, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(blowfish)", ENCRYPT, sec, NULL, 0,
 				  blowfish_speed_template);
-		test_cipher_speed("blowfish", MODE_ECB, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(blowfish)", DECRYPT, sec, NULL, 0,
 				  blowfish_speed_template);
-		test_cipher_speed("blowfish", MODE_CBC, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(blowfish)", ENCRYPT, sec, NULL, 0,
 				  blowfish_speed_template);
-		test_cipher_speed("blowfish", MODE_CBC, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(blowfish)", DECRYPT, sec, NULL, 0,
 				  blowfish_speed_template);
 		break;
 
 	case 204:
-		test_cipher_speed("des", MODE_ECB, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(des)", ENCRYPT, sec, NULL, 0,
 				  des_speed_template);
-		test_cipher_speed("des", MODE_ECB, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(des)", DECRYPT, sec, NULL, 0,
 				  des_speed_template);
-		test_cipher_speed("des", MODE_CBC, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(des)", ENCRYPT, sec, NULL, 0,
 				  des_speed_template);
-		test_cipher_speed("des", MODE_CBC, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(des)", DECRYPT, sec, NULL, 0,
 				  des_speed_template);
 		break;
 
-- 
GitLab


From 69affe7fc52c14e4b81408a2076e9e58ba4af60a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 21 Sep 2006 11:45:53 +1000
Subject: [PATCH 0316/1063] [BLOCK] cryptoloop: Use block ciphers where
 applicable

This patch converts cryptoloop to use the new block cipher type where
applicable.  As a result the ECB-specific and CBC-specific transfer
functions have been merged.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/block/cryptoloop.c | 160 +++++++++++++------------------------
 1 file changed, 54 insertions(+), 106 deletions(-)

diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c
index 3d4261c39f16d..40535036e8936 100644
--- a/drivers/block/cryptoloop.c
+++ b/drivers/block/cryptoloop.c
@@ -40,11 +40,13 @@ static int
 cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info)
 {
 	int err = -EINVAL;
+	int cipher_len;
+	int mode_len;
 	char cms[LO_NAME_SIZE];			/* cipher-mode string */
 	char *cipher;
 	char *mode;
 	char *cmsp = cms;			/* c-m string pointer */
-	struct crypto_tfm *tfm = NULL;
+	struct crypto_blkcipher *tfm;
 
 	/* encryption breaks for non sector aligned offsets */
 
@@ -53,20 +55,39 @@ cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info)
 
 	strncpy(cms, info->lo_crypt_name, LO_NAME_SIZE);
 	cms[LO_NAME_SIZE - 1] = 0;
-	cipher = strsep(&cmsp, "-");
-	mode = strsep(&cmsp, "-");
-
-	if (mode == NULL || strcmp(mode, "cbc") == 0)
-		tfm = crypto_alloc_tfm(cipher, CRYPTO_TFM_MODE_CBC |
-					       CRYPTO_TFM_REQ_MAY_SLEEP);
-	else if (strcmp(mode, "ecb") == 0)
-		tfm = crypto_alloc_tfm(cipher, CRYPTO_TFM_MODE_ECB |
-					       CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (tfm == NULL)
+
+	cipher = cmsp;
+	cipher_len = strcspn(cmsp, "-");
+
+	mode = cmsp + cipher_len;
+	mode_len = 0;
+	if (*mode) {
+		mode++;
+		mode_len = strcspn(mode, "-");
+	}
+
+	if (!mode_len) {
+		mode = "cbc";
+		mode_len = 3;
+	}
+
+	if (cipher_len + mode_len + 3 > LO_NAME_SIZE)
 		return -EINVAL;
 
-	err = tfm->crt_u.cipher.cit_setkey(tfm, info->lo_encrypt_key,
-					   info->lo_encrypt_key_size);
+	memmove(cms, mode, mode_len);
+	cmsp = cms + mode_len;
+	*cmsp++ = '(';
+	memcpy(cmsp, info->lo_crypt_name, cipher_len);
+	cmsp += cipher_len;
+	*cmsp++ = ')';
+	*cmsp = 0;
+
+	tfm = crypto_alloc_blkcipher(cms, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	err = crypto_blkcipher_setkey(tfm, info->lo_encrypt_key,
+				      info->lo_encrypt_key_size);
 	
 	if (err != 0)
 		goto out_free_tfm;
@@ -75,99 +96,49 @@ cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info)
 	return 0;
 
  out_free_tfm:
-	crypto_free_tfm(tfm);
+	crypto_free_blkcipher(tfm);
 
  out:
 	return err;
 }
 
 
-typedef int (*encdec_ecb_t)(struct crypto_tfm *tfm,
+typedef int (*encdec_cbc_t)(struct blkcipher_desc *desc,
 			struct scatterlist *sg_out,
 			struct scatterlist *sg_in,
 			unsigned int nsg);
 
-
-static int
-cryptoloop_transfer_ecb(struct loop_device *lo, int cmd,
-			struct page *raw_page, unsigned raw_off,
-			struct page *loop_page, unsigned loop_off,
-			int size, sector_t IV)
-{
-	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
-	struct scatterlist sg_out = { NULL, };
-	struct scatterlist sg_in = { NULL, };
-
-	encdec_ecb_t encdecfunc;
-	struct page *in_page, *out_page;
-	unsigned in_offs, out_offs;
-
-	if (cmd == READ) {
-		in_page = raw_page;
-		in_offs = raw_off;
-		out_page = loop_page;
-		out_offs = loop_off;
-		encdecfunc = tfm->crt_u.cipher.cit_decrypt;
-	} else {
-		in_page = loop_page;
-		in_offs = loop_off;
-		out_page = raw_page;
-		out_offs = raw_off;
-		encdecfunc = tfm->crt_u.cipher.cit_encrypt;
-	}
-
-	while (size > 0) {
-		const int sz = min(size, LOOP_IV_SECTOR_SIZE);
-
-		sg_in.page = in_page;
-		sg_in.offset = in_offs;
-		sg_in.length = sz;
-
-		sg_out.page = out_page;
-		sg_out.offset = out_offs;
-		sg_out.length = sz;
-
-		encdecfunc(tfm, &sg_out, &sg_in, sz);
-
-		size -= sz;
-		in_offs += sz;
-		out_offs += sz;
-	}
-
-	return 0;
-}
-
-typedef int (*encdec_cbc_t)(struct crypto_tfm *tfm,
-			struct scatterlist *sg_out,
-			struct scatterlist *sg_in,
-			unsigned int nsg, u8 *iv);
-
 static int
-cryptoloop_transfer_cbc(struct loop_device *lo, int cmd,
-			struct page *raw_page, unsigned raw_off,
-			struct page *loop_page, unsigned loop_off,
-			int size, sector_t IV)
+cryptoloop_transfer(struct loop_device *lo, int cmd,
+		    struct page *raw_page, unsigned raw_off,
+		    struct page *loop_page, unsigned loop_off,
+		    int size, sector_t IV)
 {
-	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
+	struct crypto_blkcipher *tfm = lo->key_data;
+	struct blkcipher_desc desc = {
+		.tfm = tfm,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP,
+	};
 	struct scatterlist sg_out = { NULL, };
 	struct scatterlist sg_in = { NULL, };
 
 	encdec_cbc_t encdecfunc;
 	struct page *in_page, *out_page;
 	unsigned in_offs, out_offs;
+	int err;
 
 	if (cmd == READ) {
 		in_page = raw_page;
 		in_offs = raw_off;
 		out_page = loop_page;
 		out_offs = loop_off;
-		encdecfunc = tfm->crt_u.cipher.cit_decrypt_iv;
+		encdecfunc = crypto_blkcipher_crt(tfm)->decrypt;
 	} else {
 		in_page = loop_page;
 		in_offs = loop_off;
 		out_page = raw_page;
 		out_offs = raw_off;
-		encdecfunc = tfm->crt_u.cipher.cit_encrypt_iv;
+		encdecfunc = crypto_blkcipher_crt(tfm)->encrypt;
 	}
 
 	while (size > 0) {
@@ -183,7 +154,10 @@ cryptoloop_transfer_cbc(struct loop_device *lo, int cmd,
 		sg_out.offset = out_offs;
 		sg_out.length = sz;
 
-		encdecfunc(tfm, &sg_out, &sg_in, sz, (u8 *)iv);
+		desc.info = iv;
+		err = encdecfunc(&desc, &sg_out, &sg_in, sz);
+		if (err)
+			return err;
 
 		IV++;
 		size -= sz;
@@ -194,32 +168,6 @@ cryptoloop_transfer_cbc(struct loop_device *lo, int cmd,
 	return 0;
 }
 
-static int
-cryptoloop_transfer(struct loop_device *lo, int cmd,
-		    struct page *raw_page, unsigned raw_off,
-		    struct page *loop_page, unsigned loop_off,
-		    int size, sector_t IV)
-{
-	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
-	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB)
-	{
-		lo->transfer = cryptoloop_transfer_ecb;
-		return cryptoloop_transfer_ecb(lo, cmd, raw_page, raw_off,
-					       loop_page, loop_off, size, IV);
-	}	
-	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_CBC)
-	{	
-		lo->transfer = cryptoloop_transfer_cbc;
-		return cryptoloop_transfer_cbc(lo, cmd, raw_page, raw_off,
-					       loop_page, loop_off, size, IV);
-	}
-	
-	/*  This is not supposed to happen */
-
-	printk( KERN_ERR "cryptoloop: unsupported cipher mode in cryptoloop_transfer!\n");
-	return -EINVAL;
-}
-
 static int
 cryptoloop_ioctl(struct loop_device *lo, int cmd, unsigned long arg)
 {
@@ -229,9 +177,9 @@ cryptoloop_ioctl(struct loop_device *lo, int cmd, unsigned long arg)
 static int
 cryptoloop_release(struct loop_device *lo)
 {
-	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
+	struct crypto_blkcipher *tfm = lo->key_data;
 	if (tfm != NULL) {
-		crypto_free_tfm(tfm);
+		crypto_free_blkcipher(tfm);
 		lo->key_data = NULL;
 		return 0;
 	}
-- 
GitLab


From d1806f6a97a536b043fe50e6d8a25b061755cf50 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Aug 2006 20:29:17 +1000
Subject: [PATCH 0317/1063] [BLOCK] dm-crypt: Use block ciphers where
 applicable

This patch converts dm-crypt to use the new block cipher type where
applicable.  It also changes simple cipher operations to use the new
encrypt_one/decrypt_one interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/md/dm-crypt.c | 108 ++++++++++++++++++------------------------
 1 file changed, 47 insertions(+), 61 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 6022ed12a795d..91d4081cb00ed 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -5,6 +5,7 @@
  * This file is released under the GPL.
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -78,11 +79,13 @@ struct crypt_config {
 	 */
 	struct crypt_iv_operations *iv_gen_ops;
 	char *iv_mode;
-	void *iv_gen_private;
+	struct crypto_cipher *iv_gen_private;
 	sector_t iv_offset;
 	unsigned int iv_size;
 
-	struct crypto_tfm *tfm;
+	char cipher[CRYPTO_MAX_ALG_NAME];
+	char chainmode[CRYPTO_MAX_ALG_NAME];
+	struct crypto_blkcipher *tfm;
 	unsigned int key_size;
 	u8 key[0];
 };
@@ -118,11 +121,12 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
 static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
 	                      const char *opts)
 {
-	struct crypto_tfm *essiv_tfm;
+	struct crypto_cipher *essiv_tfm;
 	struct crypto_tfm *hash_tfm;
 	struct scatterlist sg;
 	unsigned int saltsize;
 	u8 *salt;
+	int err;
 
 	if (opts == NULL) {
 		ti->error = "Digest algorithm missing for ESSIV mode";
@@ -155,51 +159,44 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
 	crypto_free_tfm(hash_tfm);
 
 	/* Setup the essiv_tfm with the given salt */
-	essiv_tfm = crypto_alloc_tfm(crypto_tfm_alg_name(cc->tfm),
-	                             CRYPTO_TFM_MODE_ECB |
-	                             CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (essiv_tfm == NULL) {
+	essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(essiv_tfm)) {
 		ti->error = "Error allocating crypto tfm for ESSIV";
 		kfree(salt);
-		return -EINVAL;
+		return PTR_ERR(essiv_tfm);
 	}
-	if (crypto_tfm_alg_blocksize(essiv_tfm)
-	    != crypto_tfm_alg_ivsize(cc->tfm)) {
+	if (crypto_cipher_blocksize(essiv_tfm) !=
+	    crypto_blkcipher_ivsize(cc->tfm)) {
 		ti->error = "Block size of ESSIV cipher does "
 			        "not match IV size of block cipher";
-		crypto_free_tfm(essiv_tfm);
+		crypto_free_cipher(essiv_tfm);
 		kfree(salt);
 		return -EINVAL;
 	}
-	if (crypto_cipher_setkey(essiv_tfm, salt, saltsize) < 0) {
+	err = crypto_cipher_setkey(essiv_tfm, salt, saltsize);
+	if (err) {
 		ti->error = "Failed to set key for ESSIV cipher";
-		crypto_free_tfm(essiv_tfm);
+		crypto_free_cipher(essiv_tfm);
 		kfree(salt);
-		return -EINVAL;
+		return err;
 	}
 	kfree(salt);
 
-	cc->iv_gen_private = (void *)essiv_tfm;
+	cc->iv_gen_private = essiv_tfm;
 	return 0;
 }
 
 static void crypt_iv_essiv_dtr(struct crypt_config *cc)
 {
-	crypto_free_tfm((struct crypto_tfm *)cc->iv_gen_private);
+	crypto_free_cipher(cc->iv_gen_private);
 	cc->iv_gen_private = NULL;
 }
 
 static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
 {
-	struct scatterlist sg;
-
 	memset(iv, 0, cc->iv_size);
 	*(u64 *)iv = cpu_to_le64(sector);
-
-	sg_set_buf(&sg, iv, cc->iv_size);
-	crypto_cipher_encrypt((struct crypto_tfm *)cc->iv_gen_private,
-	                      &sg, &sg, cc->iv_size);
-
+	crypto_cipher_encrypt_one(cc->iv_gen_private, iv, iv);
 	return 0;
 }
 
@@ -220,6 +217,11 @@ crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out,
                           int write, sector_t sector)
 {
 	u8 iv[cc->iv_size];
+	struct blkcipher_desc desc = {
+		.tfm = cc->tfm,
+		.info = iv,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP,
+	};
 	int r;
 
 	if (cc->iv_gen_ops) {
@@ -228,14 +230,14 @@ crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out,
 			return r;
 
 		if (write)
-			r = crypto_cipher_encrypt_iv(cc->tfm, out, in, length, iv);
+			r = crypto_blkcipher_encrypt_iv(&desc, out, in, length);
 		else
-			r = crypto_cipher_decrypt_iv(cc->tfm, out, in, length, iv);
+			r = crypto_blkcipher_decrypt_iv(&desc, out, in, length);
 	} else {
 		if (write)
-			r = crypto_cipher_encrypt(cc->tfm, out, in, length);
+			r = crypto_blkcipher_encrypt(&desc, out, in, length);
 		else
-			r = crypto_cipher_decrypt(cc->tfm, out, in, length);
+			r = crypto_blkcipher_decrypt(&desc, out, in, length);
 	}
 
 	return r;
@@ -510,13 +512,12 @@ static void crypt_encode_key(char *hex, u8 *key, unsigned int size)
 static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
 	struct crypt_config *cc;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
 	char *tmp;
 	char *cipher;
 	char *chainmode;
 	char *ivmode;
 	char *ivopts;
-	unsigned int crypto_flags;
 	unsigned int key_size;
 	unsigned long long tmpll;
 
@@ -556,31 +557,25 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		ivmode = "plain";
 	}
 
-	/* Choose crypto_flags according to chainmode */
-	if (strcmp(chainmode, "cbc") == 0)
-		crypto_flags = CRYPTO_TFM_MODE_CBC;
-	else if (strcmp(chainmode, "ecb") == 0)
-		crypto_flags = CRYPTO_TFM_MODE_ECB;
-	else {
-		ti->error = "Unknown chaining mode";
+	if (strcmp(chainmode, "ecb") && !ivmode) {
+		ti->error = "This chaining mode requires an IV mechanism";
 		goto bad1;
 	}
 
-	if (crypto_flags != CRYPTO_TFM_MODE_ECB && !ivmode) {
-		ti->error = "This chaining mode requires an IV mechanism";
+	if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", chainmode, 
+		     cipher) >= CRYPTO_MAX_ALG_NAME) {
+		ti->error = "Chain mode + cipher name is too long";
 		goto bad1;
 	}
 
-	tfm = crypto_alloc_tfm(cipher, crypto_flags | CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (!tfm) {
+	tfm = crypto_alloc_blkcipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
 		ti->error = "Error allocating crypto tfm";
 		goto bad1;
 	}
-	if (crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER) {
-		ti->error = "Expected cipher algorithm";
-		goto bad2;
-	}
 
+	strcpy(cc->cipher, cipher);
+	strcpy(cc->chainmode, chainmode);
 	cc->tfm = tfm;
 
 	/*
@@ -603,12 +598,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	    cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0)
 		goto bad2;
 
-	if (tfm->crt_cipher.cit_decrypt_iv && tfm->crt_cipher.cit_encrypt_iv)
+	cc->iv_size = crypto_blkcipher_ivsize(tfm);
+	if (cc->iv_size)
 		/* at least a 64 bit sector number should fit in our buffer */
-		cc->iv_size = max(crypto_tfm_alg_ivsize(tfm),
+		cc->iv_size = max(cc->iv_size,
 		                  (unsigned int)(sizeof(u64) / sizeof(u8)));
 	else {
-		cc->iv_size = 0;
 		if (cc->iv_gen_ops) {
 			DMWARN("Selected cipher does not support IVs");
 			if (cc->iv_gen_ops->dtr)
@@ -629,7 +624,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad4;
 	}
 
-	if (tfm->crt_cipher.cit_setkey(tfm, cc->key, key_size) < 0) {
+	if (crypto_blkcipher_setkey(tfm, cc->key, key_size) < 0) {
 		ti->error = "Error setting key";
 		goto bad5;
 	}
@@ -675,7 +670,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
 		cc->iv_gen_ops->dtr(cc);
 bad2:
-	crypto_free_tfm(tfm);
+	crypto_free_blkcipher(tfm);
 bad1:
 	/* Must zero key material before freeing */
 	memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
@@ -693,7 +688,7 @@ static void crypt_dtr(struct dm_target *ti)
 	kfree(cc->iv_mode);
 	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
 		cc->iv_gen_ops->dtr(cc);
-	crypto_free_tfm(cc->tfm);
+	crypto_free_blkcipher(cc->tfm);
 	dm_put_device(ti, cc->dev);
 
 	/* Must zero key material before freeing */
@@ -858,18 +853,9 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
 		break;
 
 	case STATUSTYPE_TABLE:
-		cipher = crypto_tfm_alg_name(cc->tfm);
+		cipher = crypto_blkcipher_name(cc->tfm);
 
-		switch(cc->tfm->crt_cipher.cit_mode) {
-		case CRYPTO_TFM_MODE_CBC:
-			chainmode = "cbc";
-			break;
-		case CRYPTO_TFM_MODE_ECB:
-			chainmode = "ecb";
-			break;
-		default:
-			BUG();
-		}
+		chainmode = cc->chainmode;
 
 		if (cc->iv_mode)
 			DMEMIT("%s-%s-%s ", cipher, chainmode, cc->iv_mode);
-- 
GitLab


From 04ff12609445c7b462d7fc7f2d30dad442c922f3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2006 08:50:00 +1000
Subject: [PATCH 0318/1063] [IPSEC]: Add compatibility algorithm name support

This patch adds a compatibility name field for each IPsec algorithm.  This
is needed when parameterised algorithms are used.  For example, "md5" will
become "hmac(md5)", and "aes" will become "cbc(aes)".

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/net/xfrm.h   | 1 +
 net/xfrm/xfrm_algo.c | 3 ++-
 net/xfrm/xfrm_user.c | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 10396b4bde147..e9114e41affce 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -854,6 +854,7 @@ struct xfrm_algo_comp_info {
 
 struct xfrm_algo_desc {
 	char *name;
+	char *compat;
 	u8 available:1;
 	union {
 		struct xfrm_algo_auth_info auth;
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 04e1aea58bc94..b68974b387416 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -359,7 +359,8 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list,
 		return NULL;
 
 	for (i = 0; i < entries; i++) {
-		if (strcmp(name, list[i].name))
+		if (strcmp(name, list[i].name) &&
+		    (!list[i].compat || strcmp(name, list[i].compat)))
 			continue;
 
 		if (list[i].available)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7d18ca03c80d3..fa79ddc4239e0 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -213,6 +213,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
 		return -ENOMEM;
 
 	memcpy(p, ualg, len);
+	strcpy(p->alg_name, algo->name);
 	*algpp = p;
 	return 0;
 }
-- 
GitLab


From 6b7326c8497f954c2cfcb4c49fe42be5b80887bc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 30 Jul 2006 15:41:01 +1000
Subject: [PATCH 0319/1063] [IPSEC] ESP: Use block ciphers where applicable

This patch converts IPSec/ESP to use the new block cipher type where
applicable.  Similar to the HMAC conversion, existing algorithm names
have been kept for compatibility.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/net/esp.h    |  2 +-
 net/ipv4/Kconfig     |  1 +
 net/ipv4/esp4.c      | 49 ++++++++++++++++++++++++++------------------
 net/ipv6/Kconfig     |  1 +
 net/ipv6/esp6.c      | 48 +++++++++++++++++++++++++------------------
 net/xfrm/xfrm_algo.c | 24 ++++++++++++++--------
 6 files changed, 76 insertions(+), 49 deletions(-)

diff --git a/include/net/esp.h b/include/net/esp.h
index 6eb837973c84f..af2ff18700c70 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -22,7 +22,7 @@ struct esp_data
 		 * >= crypto_tfm_alg_ivsize(tfm). */
 		int			ivlen;
 		int			padlen;		/* 0..255 */
-		struct crypto_tfm	*tfm;		/* crypto handle */
+		struct crypto_blkcipher	*tfm;		/* crypto handle */
 	} conf;
 
 	/* Integrity. It is active when icv_full_len != 0 */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 8514106761b0c..3b5d504a74be6 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -386,6 +386,7 @@ config INET_ESP
 	select CRYPTO
 	select CRYPTO_HMAC
 	select CRYPTO_MD5
+	select CRYPTO_CBC
 	select CRYPTO_SHA1
 	select CRYPTO_DES
 	---help---
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index fc2f8ce441def..7c63ae4947429 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1,3 +1,4 @@
+#include <linux/err.h>
 #include <linux/module.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -16,7 +17,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	int err;
 	struct iphdr *top_iph;
 	struct ip_esp_hdr *esph;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
+	struct blkcipher_desc desc;
 	struct esp_data *esp;
 	struct sk_buff *trailer;
 	int blksize;
@@ -36,7 +38,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	esp = x->data;
 	alen = esp->auth.icv_trunc_len;
 	tfm = esp->conf.tfm;
-	blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4);
+	desc.tfm = tfm;
+	desc.flags = 0;
+	blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
 	clen = ALIGN(clen + 2, blksize);
 	if (esp->conf.padlen)
 		clen = ALIGN(clen, esp->conf.padlen);
@@ -92,7 +96,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	xfrm_aevent_doreplay(x);
 
 	if (esp->conf.ivlen)
-		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+		crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
 
 	do {
 		struct scatterlist *sg = &esp->sgbuf[0];
@@ -103,14 +107,17 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 				goto error;
 		}
 		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
-		crypto_cipher_encrypt(tfm, sg, sg, clen);
+		err = crypto_blkcipher_encrypt(&desc, sg, sg, clen);
 		if (unlikely(sg != &esp->sgbuf[0]))
 			kfree(sg);
 	} while (0);
 
+	if (unlikely(err))
+		goto error;
+
 	if (esp->conf.ivlen) {
-		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
-		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+		memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen);
+		crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
 	}
 
 	if (esp->auth.icv_full_len) {
@@ -121,8 +128,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	ip_send_check(top_iph);
 
-	err = 0;
-
 error:
 	return err;
 }
@@ -137,8 +142,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 	struct iphdr *iph;
 	struct ip_esp_hdr *esph;
 	struct esp_data *esp = x->data;
+	struct crypto_blkcipher *tfm = esp->conf.tfm;
+	struct blkcipher_desc desc = { .tfm = tfm };
 	struct sk_buff *trailer;
-	int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+	int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
 	int alen = esp->auth.icv_trunc_len;
 	int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen;
 	int nfrags;
@@ -146,6 +153,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 	u8 nexthdr[2];
 	struct scatterlist *sg;
 	int padlen;
+	int err;
 
 	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr)))
 		goto out;
@@ -178,7 +186,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* Get ivec. This can be wrong, check against another impls. */
 	if (esp->conf.ivlen)
-		crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
+		crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
 
 	sg = &esp->sgbuf[0];
 
@@ -188,9 +196,11 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 			goto out;
 	}
 	skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen);
-	crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
+	err = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
 	if (unlikely(sg != &esp->sgbuf[0]))
 		kfree(sg);
+	if (unlikely(err))
+		return err;
 
 	if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
 		BUG();
@@ -254,7 +264,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
 {
 	struct esp_data *esp = x->data;
-	u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
 
 	if (x->props.mode) {
 		mtu = ALIGN(mtu + 2, blksize);
@@ -293,7 +303,7 @@ static void esp_destroy(struct xfrm_state *x)
 	if (!esp)
 		return;
 
-	crypto_free_tfm(esp->conf.tfm);
+	crypto_free_blkcipher(esp->conf.tfm);
 	esp->conf.tfm = NULL;
 	kfree(esp->conf.ivec);
 	esp->conf.ivec = NULL;
@@ -307,6 +317,7 @@ static void esp_destroy(struct xfrm_state *x)
 static int esp_init_state(struct xfrm_state *x)
 {
 	struct esp_data *esp = NULL;
+	struct crypto_blkcipher *tfm;
 
 	/* null auth and encryption can have zero length keys */
 	if (x->aalg) {
@@ -351,13 +362,11 @@ static int esp_init_state(struct xfrm_state *x)
 	}
 	esp->conf.key = x->ealg->alg_key;
 	esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
-	if (x->props.ealgo == SADB_EALG_NULL)
-		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
-	else
-		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
-	if (esp->conf.tfm == NULL)
+	tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
 		goto error;
-	esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
+	esp->conf.tfm = tfm;
+	esp->conf.ivlen = crypto_blkcipher_ivsize(tfm);
 	esp->conf.padlen = 0;
 	if (esp->conf.ivlen) {
 		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
@@ -365,7 +374,7 @@ static int esp_init_state(struct xfrm_state *x)
 			goto error;
 		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
 	}
-	if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len))
+	if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
 		goto error;
 	x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
 	if (x->props.mode)
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index e923d4dea4188..0ba06c0c5d390 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -77,6 +77,7 @@ config INET6_ESP
 	select CRYPTO
 	select CRYPTO_HMAC
 	select CRYPTO_MD5
+	select CRYPTO_CBC
 	select CRYPTO_SHA1
 	select CRYPTO_DES
 	---help---
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a278d5e862fe3..46a7e687948ec 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -24,6 +24,7 @@
  * 	This file is derived from net/ipv4/esp.c
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -44,7 +45,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	int hdr_len;
 	struct ipv6hdr *top_iph;
 	struct ipv6_esp_hdr *esph;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
+	struct blkcipher_desc desc;
 	struct esp_data *esp;
 	struct sk_buff *trailer;
 	int blksize;
@@ -67,7 +69,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	alen = esp->auth.icv_trunc_len;
 	tfm = esp->conf.tfm;
-	blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4);
+	desc.tfm = tfm;
+	desc.flags = 0;
+	blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
 	clen = ALIGN(clen + 2, blksize);
 	if (esp->conf.padlen)
 		clen = ALIGN(clen, esp->conf.padlen);
@@ -96,7 +100,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	xfrm_aevent_doreplay(x);
 
 	if (esp->conf.ivlen)
-		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+		crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
 
 	do {
 		struct scatterlist *sg = &esp->sgbuf[0];
@@ -107,14 +111,17 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 				goto error;
 		}
 		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
-		crypto_cipher_encrypt(tfm, sg, sg, clen);
+		err = crypto_blkcipher_encrypt(&desc, sg, sg, clen);
 		if (unlikely(sg != &esp->sgbuf[0]))
 			kfree(sg);
 	} while (0);
 
+	if (unlikely(err))
+		goto error;
+
 	if (esp->conf.ivlen) {
-		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
-		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+		memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen);
+		crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
 	}
 
 	if (esp->auth.icv_full_len) {
@@ -123,8 +130,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 		pskb_put(skb, trailer, alen);
 	}
 
-	err = 0;
-
 error:
 	return err;
 }
@@ -134,8 +139,10 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	struct ipv6hdr *iph;
 	struct ipv6_esp_hdr *esph;
 	struct esp_data *esp = x->data;
+	struct crypto_blkcipher *tfm = esp->conf.tfm;
+	struct blkcipher_desc desc = { .tfm = tfm };
 	struct sk_buff *trailer;
-	int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+	int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
 	int alen = esp->auth.icv_trunc_len;
 	int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
 
@@ -182,7 +189,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* Get ivec. This can be wrong, check against another impls. */
 	if (esp->conf.ivlen)
-		crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
+		crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
 
         {
 		u8 nexthdr[2];
@@ -197,9 +204,11 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 			}
 		}
 		skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen);
-		crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
+		ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
 		if (unlikely(sg != &esp->sgbuf[0]))
 			kfree(sg);
+		if (unlikely(ret))
+			goto out;
 
 		if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
 			BUG();
@@ -225,7 +234,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
 {
 	struct esp_data *esp = x->data;
-	u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
 
 	if (x->props.mode) {
 		mtu = ALIGN(mtu + 2, blksize);
@@ -266,7 +275,7 @@ static void esp6_destroy(struct xfrm_state *x)
 	if (!esp)
 		return;
 
-	crypto_free_tfm(esp->conf.tfm);
+	crypto_free_blkcipher(esp->conf.tfm);
 	esp->conf.tfm = NULL;
 	kfree(esp->conf.ivec);
 	esp->conf.ivec = NULL;
@@ -280,6 +289,7 @@ static void esp6_destroy(struct xfrm_state *x)
 static int esp6_init_state(struct xfrm_state *x)
 {
 	struct esp_data *esp = NULL;
+	struct crypto_blkcipher *tfm;
 
 	/* null auth and encryption can have zero length keys */
 	if (x->aalg) {
@@ -327,13 +337,11 @@ static int esp6_init_state(struct xfrm_state *x)
 	}
 	esp->conf.key = x->ealg->alg_key;
 	esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
-	if (x->props.ealgo == SADB_EALG_NULL)
-		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
-	else
-		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
-	if (esp->conf.tfm == NULL)
+	tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
 		goto error;
-	esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
+	esp->conf.tfm = tfm;
+	esp->conf.ivlen = crypto_blkcipher_ivsize(tfm);
 	esp->conf.padlen = 0;
 	if (esp->conf.ivlen) {
 		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
@@ -341,7 +349,7 @@ static int esp6_init_state(struct xfrm_state *x)
 			goto error;
 		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
 	}
-	if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len))
+	if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
 		goto error;
 	x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
 	if (x->props.mode)
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index b68974b387416..9b03d8497fbaa 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -118,7 +118,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 
 static struct xfrm_algo_desc ealg_list[] = {
 {
-	.name = "cipher_null",
+	.name = "ecb(cipher_null)",
+	.compat = "cipher_null",
 	
 	.uinfo = {
 		.encr = {
@@ -135,7 +136,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 	}
 },
 {
-	.name = "des",
+	.name = "cbc(des)",
+	.compat = "des",
 
 	.uinfo = {
 		.encr = {
@@ -152,7 +154,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 	}
 },
 {
-	.name = "des3_ede",
+	.name = "cbc(des3_ede)",
+	.compat = "des3_ede",
 
 	.uinfo = {
 		.encr = {
@@ -169,7 +172,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 	}
 },
 {
-	.name = "cast128",
+	.name = "cbc(cast128)",
+	.compat = "cast128",
 
 	.uinfo = {
 		.encr = {
@@ -186,7 +190,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 	}
 },
 {
-	.name = "blowfish",
+	.name = "cbc(blowfish)",
+	.compat = "blowfish",
 
 	.uinfo = {
 		.encr = {
@@ -203,7 +208,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 	}
 },
 {
-	.name = "aes",
+	.name = "cbc(aes)",
+	.compat = "aes",
 
 	.uinfo = {
 		.encr = {
@@ -220,7 +226,8 @@ static struct xfrm_algo_desc ealg_list[] = {
 	}
 },
 {
-        .name = "serpent",
+        .name = "cbc(serpent)",
+        .compat = "serpent",
 
         .uinfo = {
                 .encr = {
@@ -237,7 +244,8 @@ static struct xfrm_algo_desc ealg_list[] = {
         }
 },
 {
-        .name = "twofish",
+        .name = "cbc(twofish)",
+        .compat = "twofish",
                  
         .uinfo = {
                 .encr = {
-- 
GitLab


From 378c6697a282c383d89428380a3405bf95189347 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Aug 2006 20:33:54 +1000
Subject: [PATCH 0320/1063] [SUNRPC] GSS: Use block ciphers where applicable

This patch converts SUNRPC/GSS to use the new block cipher type where
applicable.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/sunrpc/gss_krb5.h       | 19 ++++-----
 include/linux/sunrpc/gss_spkm3.h      |  4 +-
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 57 +++++++++++++++------------
 net/sunrpc/auth_gss/gss_krb5_mech.c   | 24 +++++------
 net/sunrpc/auth_gss/gss_krb5_seqnum.c |  4 +-
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |  4 +-
 net/sunrpc/auth_gss/gss_spkm3_mech.c  | 29 +++++++-------
 7 files changed, 76 insertions(+), 65 deletions(-)

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 1279280d71966..e30ba201910ae 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -46,8 +46,8 @@ struct krb5_ctx {
 	unsigned char		seed[16];
 	int			signalg;
 	int			sealalg;
-	struct crypto_tfm	*enc;
-	struct crypto_tfm	*seq;
+	struct crypto_blkcipher	*enc;
+	struct crypto_blkcipher	*seq;
 	s32			endtime;
 	u32			seq_send;
 	struct xdr_netobj	mech_used;
@@ -136,26 +136,27 @@ gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset,
 
 
 u32
-krb5_encrypt(struct crypto_tfm * key,
+krb5_encrypt(struct crypto_blkcipher *key,
 	     void *iv, void *in, void *out, int length);
 
 u32
-krb5_decrypt(struct crypto_tfm * key,
+krb5_decrypt(struct crypto_blkcipher *key,
 	     void *iv, void *in, void *out, int length); 
 
 int
-gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset,
-		struct page **pages);
+gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *outbuf,
+		    int offset, struct page **pages);
 
 int
-gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset);
+gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *inbuf,
+		    int offset);
 
 s32
-krb5_make_seq_num(struct crypto_tfm * key,
+krb5_make_seq_num(struct crypto_blkcipher *key,
 		int direction,
 		s32 seqnum, unsigned char *cksum, unsigned char *buf);
 
 s32
-krb5_get_seq_num(struct crypto_tfm * key,
+krb5_get_seq_num(struct crypto_blkcipher *key,
 	       unsigned char *cksum,
 	       unsigned char *buf, int *direction, s32 * seqnum);
diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h
index 336e218c27827..2cf3fbb40b4fe 100644
--- a/include/linux/sunrpc/gss_spkm3.h
+++ b/include/linux/sunrpc/gss_spkm3.h
@@ -19,9 +19,9 @@ struct spkm3_ctx {
 	unsigned int		req_flags ;
 	struct xdr_netobj	share_key;
 	int			conf_alg;
-	struct crypto_tfm*	derived_conf_key;
+	struct crypto_blkcipher	*derived_conf_key;
 	int			intg_alg;
-	struct crypto_tfm*	derived_integ_key;
+	struct crypto_blkcipher	*derived_integ_key;
 	int			keyestb_alg;   /* alg used to get share_key */
 	int			owf_alg;   /* one way function */
 };
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 76b969e6904fd..57192dfe30656 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -49,7 +49,7 @@
 
 u32
 krb5_encrypt(
-	struct crypto_tfm *tfm,
+	struct crypto_blkcipher *tfm,
 	void * iv,
 	void * in,
 	void * out,
@@ -58,26 +58,27 @@ krb5_encrypt(
 	u32 ret = -EINVAL;
         struct scatterlist sg[1];
 	u8 local_iv[16] = {0};
+	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
 
 	dprintk("RPC:      krb5_encrypt: input data:\n");
 	print_hexl((u32 *)in, length, 0);
 
-	if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+	if (length % crypto_blkcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_tfm_alg_ivsize(tfm) > 16) {
+	if (crypto_blkcipher_ivsize(tfm) > 16) {
 		dprintk("RPC:      gss_k5encrypt: tfm iv size to large %d\n",
-		         crypto_tfm_alg_ivsize(tfm));
+		         crypto_blkcipher_ivsize(tfm));
 		goto out;
 	}
 
 	if (iv)
-		memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm));
+		memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_set_buf(sg, out, length);
 
-	ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv);
+	ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length);
 
 	dprintk("RPC:      krb5_encrypt: output data:\n");
 	print_hexl((u32 *)out, length, 0);
@@ -90,7 +91,7 @@ EXPORT_SYMBOL(krb5_encrypt);
 
 u32
 krb5_decrypt(
-     struct crypto_tfm *tfm,
+     struct crypto_blkcipher *tfm,
      void * iv,
      void * in,
      void * out,
@@ -99,25 +100,26 @@ krb5_decrypt(
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
 	u8 local_iv[16] = {0};
+	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
 
 	dprintk("RPC:      krb5_decrypt: input data:\n");
 	print_hexl((u32 *)in, length, 0);
 
-	if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+	if (length % crypto_blkcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_tfm_alg_ivsize(tfm) > 16) {
+	if (crypto_blkcipher_ivsize(tfm) > 16) {
 		dprintk("RPC:      gss_k5decrypt: tfm iv size to large %d\n",
-			crypto_tfm_alg_ivsize(tfm));
+			crypto_blkcipher_ivsize(tfm));
 		goto out;
 	}
 	if (iv)
-		memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm));
+		memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_set_buf(sg, out, length);
 
-	ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv);
+	ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length);
 
 	dprintk("RPC:      krb5_decrypt: output_data:\n");
 	print_hexl((u32 *)out, length, 0);
@@ -240,7 +242,7 @@ EXPORT_SYMBOL(make_checksum);
 
 struct encryptor_desc {
 	u8 iv[8]; /* XXX hard-coded blocksize */
-	struct crypto_tfm *tfm;
+	struct blkcipher_desc desc;
 	int pos;
 	struct xdr_buf *outbuf;
 	struct page **pages;
@@ -285,8 +287,8 @@ encryptor(struct scatterlist *sg, void *data)
 	if (thislen == 0)
 		return 0;
 
-	ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags,
-					thislen, desc->iv);
+	ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags,
+					  desc->infrags, thislen);
 	if (ret)
 		return ret;
 	if (fraglen) {
@@ -305,16 +307,18 @@ encryptor(struct scatterlist *sg, void *data)
 }
 
 int
-gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset,
-		struct page **pages)
+gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+		    int offset, struct page **pages)
 {
 	int ret;
 	struct encryptor_desc desc;
 
-	BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
+	BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
-	desc.tfm = tfm;
+	desc.desc.tfm = tfm;
+	desc.desc.info = desc.iv;
+	desc.desc.flags = 0;
 	desc.pos = offset;
 	desc.outbuf = buf;
 	desc.pages = pages;
@@ -329,7 +333,7 @@ EXPORT_SYMBOL(gss_encrypt_xdr_buf);
 
 struct decryptor_desc {
 	u8 iv[8]; /* XXX hard-coded blocksize */
-	struct crypto_tfm *tfm;
+	struct blkcipher_desc desc;
 	struct scatterlist frags[4];
 	int fragno;
 	int fraglen;
@@ -355,8 +359,8 @@ decryptor(struct scatterlist *sg, void *data)
 	if (thislen == 0)
 		return 0;
 
-	ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags,
-					thislen, desc->iv);
+	ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags,
+					  desc->frags, thislen);
 	if (ret)
 		return ret;
 	if (fraglen) {
@@ -373,15 +377,18 @@ decryptor(struct scatterlist *sg, void *data)
 }
 
 int
-gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset)
+gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+		    int offset)
 {
 	struct decryptor_desc desc;
 
 	/* XXXJBF: */
-	BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
+	BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
-	desc.tfm = tfm;
+	desc.desc.tfm = tfm;
+	desc.desc.info = desc.iv;
+	desc.desc.flags = 0;
 	desc.fragno = 0;
 	desc.fraglen = 0;
 	return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 70e1e53a632b1..325e72e4fd31c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -34,6 +34,7 @@
  *
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -78,10 +79,10 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
 }
 
 static inline const void *
-get_key(const void *p, const void *end, struct crypto_tfm **res)
+get_key(const void *p, const void *end, struct crypto_blkcipher **res)
 {
 	struct xdr_netobj	key;
-	int			alg, alg_mode;
+	int			alg;
 	char			*alg_name;
 
 	p = simple_get_bytes(p, end, &alg, sizeof(alg));
@@ -93,18 +94,19 @@ get_key(const void *p, const void *end, struct crypto_tfm **res)
 
 	switch (alg) {
 		case ENCTYPE_DES_CBC_RAW:
-			alg_name = "des";
-			alg_mode = CRYPTO_TFM_MODE_CBC;
+			alg_name = "cbc(des)";
 			break;
 		default:
 			printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
 			goto out_err_free_key;
 	}
-	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
+	*res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(*res)) {
 		printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name);
+		*res = NULL;
 		goto out_err_free_key;
 	}
-	if (crypto_cipher_setkey(*res, key.data, key.len)) {
+	if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
 		printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name);
 		goto out_err_free_tfm;
 	}
@@ -113,7 +115,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res)
 	return p;
 
 out_err_free_tfm:
-	crypto_free_tfm(*res);
+	crypto_free_blkcipher(*res);
 out_err_free_key:
 	kfree(key.data);
 	p = ERR_PTR(-EINVAL);
@@ -172,9 +174,9 @@ gss_import_sec_context_kerberos(const void *p,
 	return 0;
 
 out_err_free_key2:
-	crypto_free_tfm(ctx->seq);
+	crypto_free_blkcipher(ctx->seq);
 out_err_free_key1:
-	crypto_free_tfm(ctx->enc);
+	crypto_free_blkcipher(ctx->enc);
 out_err_free_mech:
 	kfree(ctx->mech_used.data);
 out_err_free_ctx:
@@ -187,8 +189,8 @@ static void
 gss_delete_sec_context_kerberos(void *internal_ctx) {
 	struct krb5_ctx *kctx = internal_ctx;
 
-	crypto_free_tfm(kctx->seq);
-	crypto_free_tfm(kctx->enc);
+	crypto_free_blkcipher(kctx->seq);
+	crypto_free_blkcipher(kctx->enc);
 	kfree(kctx->mech_used.data);
 	kfree(kctx);
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index c53ead39118df..c604baf3a5f69 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -41,7 +41,7 @@
 #endif
 
 s32
-krb5_make_seq_num(struct crypto_tfm *key,
+krb5_make_seq_num(struct crypto_blkcipher *key,
 		int direction,
 		s32 seqnum,
 		unsigned char *cksum, unsigned char *buf)
@@ -62,7 +62,7 @@ krb5_make_seq_num(struct crypto_tfm *key,
 }
 
 s32
-krb5_get_seq_num(struct crypto_tfm *key,
+krb5_get_seq_num(struct crypto_blkcipher *key,
 	       unsigned char *cksum,
 	       unsigned char *buf,
 	       int *direction, s32 * seqnum)
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 89d1f3e14128c..f179415d0c38e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -149,7 +149,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 		goto out_err;
 	}
 
-	blocksize = crypto_tfm_alg_blocksize(kctx->enc);
+	blocksize = crypto_blkcipher_blocksize(kctx->enc);
 	gss_krb5_add_padding(buf, offset, blocksize);
 	BUG_ON((buf->len - offset) % blocksize);
 	plainlen = blocksize + buf->len - offset;
@@ -346,7 +346,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
 	/* Copy the data back to the right position.  XXX: Would probably be
 	 * better to copy and encrypt at the same time. */
 
-	blocksize = crypto_tfm_alg_blocksize(kctx->enc);
+	blocksize = crypto_blkcipher_blocksize(kctx->enc);
 	data_start = ptr + 22 + blocksize;
 	orig_start = buf->head[0].iov_base + offset;
 	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 88dcb52d171b6..bdedf456bc17d 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -34,6 +34,7 @@
  *
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -83,10 +84,11 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
 }
 
 static inline const void *
-get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
+get_key(const void *p, const void *end, struct crypto_blkcipher **res,
+	int *resalg)
 {
 	struct xdr_netobj	key = { 0 };
-	int			alg_mode,setkey = 0;
+	int			setkey = 0;
 	char			*alg_name;
 
 	p = simple_get_bytes(p, end, resalg, sizeof(*resalg));
@@ -98,14 +100,12 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
 
 	switch (*resalg) {
 		case NID_des_cbc:
-			alg_name = "des";
-			alg_mode = CRYPTO_TFM_MODE_CBC;
+			alg_name = "cbc(des)";
 			setkey = 1;
 			break;
 		case NID_cast5_cbc:
 			/* XXXX here in name only, not used */
-			alg_name = "cast5";
-			alg_mode = CRYPTO_TFM_MODE_CBC;
+			alg_name = "cbc(cast5)";
 			setkey = 0; /* XXX will need to set to 1 */
 			break;
 		case NID_md5:
@@ -113,19 +113,20 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
 				dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
 			}
 			alg_name = "md5";
-			alg_mode = 0;
 			setkey = 0;
 			break;
 		default:
 			dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg);
 			goto out_err_free_key;
 	}
-	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
+	*res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(*res)) {
 		printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name);
+		*res = NULL;
 		goto out_err_free_key;
 	}
 	if (setkey) {
-		if (crypto_cipher_setkey(*res, key.data, key.len)) {
+		if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
 			printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name);
 			goto out_err_free_tfm;
 		}
@@ -136,7 +137,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
 	return p;
 
 out_err_free_tfm:
-	crypto_free_tfm(*res);
+	crypto_free_blkcipher(*res);
 out_err_free_key:
 	if(key.len > 0)
 		kfree(key.data);
@@ -204,9 +205,9 @@ gss_import_sec_context_spkm3(const void *p, size_t len,
 	return 0;
 
 out_err_free_key2:
-	crypto_free_tfm(ctx->derived_integ_key);
+	crypto_free_blkcipher(ctx->derived_integ_key);
 out_err_free_key1:
-	crypto_free_tfm(ctx->derived_conf_key);
+	crypto_free_blkcipher(ctx->derived_conf_key);
 out_err_free_s_key:
 	kfree(ctx->share_key.data);
 out_err_free_mech:
@@ -223,8 +224,8 @@ static void
 gss_delete_sec_context_spkm3(void *internal_ctx) {
 	struct spkm3_ctx *sctx = internal_ctx;
 
-	crypto_free_tfm(sctx->derived_integ_key);
-	crypto_free_tfm(sctx->derived_conf_key);
+	crypto_free_blkcipher(sctx->derived_integ_key);
+	crypto_free_blkcipher(sctx->derived_conf_key);
 	kfree(sctx->share_key.data);
 	kfree(sctx->mech_used.data);
 	kfree(sctx);
-- 
GitLab


From f12cc2090d721647c23dfce20834f4306db3b77d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Aug 2006 20:36:13 +1000
Subject: [PATCH 0321/1063] [CRYPTO] users: Use block ciphers where applicable

This patch converts all remaining users to use the new block cipher type
where applicable.  It also changes all simple cipher operations to use
the new encrypt_one/decrypt_one interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/net/ppp_mppe.c               | 32 +++++++++++++++-----------
 drivers/net/wireless/airo.c          | 22 ++++++++++--------
 net/ieee80211/ieee80211_crypt_ccmp.c | 32 ++++++++++----------------
 net/ieee80211/ieee80211_crypt_tkip.c | 34 ++++++++++++++++++----------
 net/ieee80211/ieee80211_crypt_wep.c  | 25 +++++++++++---------
 5 files changed, 79 insertions(+), 66 deletions(-)

diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c
index 51ff9a9d1bb5f..495d8667419a9 100644
--- a/drivers/net/ppp_mppe.c
+++ b/drivers/net/ppp_mppe.c
@@ -43,6 +43,7 @@
  *                    deprecated in 2.6
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/version.h>
@@ -95,7 +96,7 @@ static inline void sha_pad_init(struct sha_pad *shapad)
  * State for an MPPE (de)compressor.
  */
 struct ppp_mppe_state {
-	struct crypto_tfm *arc4;
+	struct crypto_blkcipher *arc4;
 	struct crypto_tfm *sha1;
 	unsigned char *sha1_digest;
 	unsigned char master_key[MPPE_MAX_KEY_LEN];
@@ -156,14 +157,15 @@ static void mppe_rekey(struct ppp_mppe_state * state, int initial_key)
 {
 	unsigned char InterimKey[MPPE_MAX_KEY_LEN];
 	struct scatterlist sg_in[1], sg_out[1];
+	struct blkcipher_desc desc = { .tfm = state->arc4 };
 
 	get_new_key_from_sha(state, InterimKey);
 	if (!initial_key) {
-		crypto_cipher_setkey(state->arc4, InterimKey, state->keylen);
+		crypto_blkcipher_setkey(state->arc4, InterimKey, state->keylen);
 		setup_sg(sg_in, InterimKey, state->keylen);
 		setup_sg(sg_out, state->session_key, state->keylen);
-		if (crypto_cipher_encrypt(state->arc4, sg_out, sg_in,
-				      state->keylen) != 0) {
+		if (crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+					     state->keylen) != 0) {
     		    printk(KERN_WARNING "mppe_rekey: cipher_encrypt failed\n");
 		}
 	} else {
@@ -175,7 +177,7 @@ static void mppe_rekey(struct ppp_mppe_state * state, int initial_key)
 		state->session_key[1] = 0x26;
 		state->session_key[2] = 0x9e;
 	}
-	crypto_cipher_setkey(state->arc4, state->session_key, state->keylen);
+	crypto_blkcipher_setkey(state->arc4, state->session_key, state->keylen);
 }
 
 /*
@@ -196,9 +198,11 @@ static void *mppe_alloc(unsigned char *options, int optlen)
 
 	memset(state, 0, sizeof(*state));
 
-	state->arc4 = crypto_alloc_tfm("arc4", 0);
-	if (!state->arc4)
+	state->arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(state->arc4)) {
+		state->arc4 = NULL;
 		goto out_free;
+	}
 
 	state->sha1 = crypto_alloc_tfm("sha1", 0);
 	if (!state->sha1)
@@ -231,7 +235,7 @@ static void *mppe_alloc(unsigned char *options, int optlen)
 	    if (state->sha1)
 		crypto_free_tfm(state->sha1);
 	    if (state->arc4)
-		crypto_free_tfm(state->arc4);
+		crypto_free_blkcipher(state->arc4);
 	    kfree(state);
 	out:
 	return NULL;
@@ -249,7 +253,7 @@ static void mppe_free(void *arg)
 	    if (state->sha1)
 		crypto_free_tfm(state->sha1);
 	    if (state->arc4)
-		crypto_free_tfm(state->arc4);
+		crypto_free_blkcipher(state->arc4);
 	    kfree(state);
 	}
 }
@@ -356,6 +360,7 @@ mppe_compress(void *arg, unsigned char *ibuf, unsigned char *obuf,
 	      int isize, int osize)
 {
 	struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg;
+	struct blkcipher_desc desc = { .tfm = state->arc4 };
 	int proto;
 	struct scatterlist sg_in[1], sg_out[1];
 
@@ -413,7 +418,7 @@ mppe_compress(void *arg, unsigned char *ibuf, unsigned char *obuf,
 	/* Encrypt packet */
 	setup_sg(sg_in, ibuf, isize);
 	setup_sg(sg_out, obuf, osize);
-	if (crypto_cipher_encrypt(state->arc4, sg_out, sg_in, isize) != 0) {
+	if (crypto_blkcipher_encrypt(&desc, sg_out, sg_in, isize) != 0) {
 		printk(KERN_DEBUG "crypto_cypher_encrypt failed\n");
 		return -1;
 	}
@@ -462,6 +467,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf,
 		int osize)
 {
 	struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg;
+	struct blkcipher_desc desc = { .tfm = state->arc4 };
 	unsigned ccount;
 	int flushed = MPPE_BITS(ibuf) & MPPE_BIT_FLUSHED;
 	int sanity = 0;
@@ -599,7 +605,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf,
 	 */
 	setup_sg(sg_in, ibuf, 1);
 	setup_sg(sg_out, obuf, 1);
-	if (crypto_cipher_decrypt(state->arc4, sg_out, sg_in, 1) != 0) {
+	if (crypto_blkcipher_decrypt(&desc, sg_out, sg_in, 1) != 0) {
 		printk(KERN_DEBUG "crypto_cypher_decrypt failed\n");
 		return DECOMP_ERROR;
 	}
@@ -619,7 +625,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf,
 	/* And finally, decrypt the rest of the packet. */
 	setup_sg(sg_in, ibuf + 1, isize - 1);
 	setup_sg(sg_out, obuf + 1, osize - 1);
-	if (crypto_cipher_decrypt(state->arc4, sg_out, sg_in, isize - 1) != 0) {
+	if (crypto_blkcipher_decrypt(&desc, sg_out, sg_in, isize - 1)) {
 		printk(KERN_DEBUG "crypto_cypher_decrypt failed\n");
 		return DECOMP_ERROR;
 	}
@@ -694,7 +700,7 @@ static struct compressor ppp_mppe = {
 static int __init ppp_mppe_init(void)
 {
 	int answer;
-	if (!(crypto_alg_available("arc4", 0) &&
+	if (!(crypto_alg_available("ecb(arc4)", 0) &&
 	      crypto_alg_available("sha1", 0)))
 		return -ENODEV;
 
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index a4dd139427144..170c500169dac 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -19,6 +19,7 @@
 
 ======================================================================*/
 
+#include <linux/err.h>
 #include <linux/init.h>
 
 #include <linux/kernel.h>
@@ -1203,7 +1204,7 @@ struct airo_info {
 	struct iw_spy_data	spy_data;
 	struct iw_public_data	wireless_data;
 	/* MIC stuff */
-	struct crypto_tfm	*tfm;
+	struct crypto_cipher	*tfm;
 	mic_module		mod[2];
 	mic_statistics		micstats;
 	HostRxDesc rxfids[MPI_MAX_FIDS]; // rx/tx/config MPI350 descriptors
@@ -1271,7 +1272,8 @@ static int flashrestart(struct airo_info *ai,struct net_device *dev);
 
 static int RxSeqValid (struct airo_info *ai,miccntx *context,int mcast,u32 micSeq);
 static void MoveWindow(miccntx *context, u32 micSeq);
-static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, struct crypto_tfm *);
+static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen,
+			   struct crypto_cipher *tfm);
 static void emmh32_init(emmh32_context *context);
 static void emmh32_update(emmh32_context *context, u8 *pOctets, int len);
 static void emmh32_final(emmh32_context *context, u8 digest[4]);
@@ -1339,10 +1341,11 @@ static int micsetup(struct airo_info *ai) {
 	int i;
 
 	if (ai->tfm == NULL)
-	        ai->tfm = crypto_alloc_tfm("aes", CRYPTO_TFM_REQ_MAY_SLEEP);
+	        ai->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
 
-        if (ai->tfm == NULL) {
+        if (IS_ERR(ai->tfm)) {
                 airo_print_err(ai->dev->name, "failed to load transform for AES");
+                ai->tfm = NULL;
                 return ERROR;
         }
 
@@ -1608,7 +1611,8 @@ static void MoveWindow(miccntx *context, u32 micSeq)
 static unsigned char aes_counter[16];
 
 /* expand the key to fill the MMH coefficient array */
-static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, struct crypto_tfm *tfm)
+static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen,
+			   struct crypto_cipher *tfm)
 {
   /* take the keying material, expand if necessary, truncate at 16-bytes */
   /* run through AES counter mode to generate context->coeff[] */
@@ -1616,7 +1620,6 @@ static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, struct
 	int i,j;
 	u32 counter;
 	u8 *cipher, plain[16];
-	struct scatterlist sg[1];
 
 	crypto_cipher_setkey(tfm, pkey, 16);
 	counter = 0;
@@ -1627,9 +1630,8 @@ static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, struct
 		aes_counter[12] = (u8)(counter >> 24);
 		counter++;
 		memcpy (plain, aes_counter, 16);
-		sg_set_buf(sg, plain, 16);
-		crypto_cipher_encrypt(tfm, sg, sg, 16);
-		cipher = kmap(sg->page) + sg->offset;
+		crypto_cipher_encrypt_one(tfm, plain, plain);
+		cipher = plain;
 		for (j=0; (j<16) && (i< (sizeof(context->coeff)/sizeof(context->coeff[0]))); ) {
 			context->coeff[i++] = ntohl(*(u32 *)&cipher[j]);
 			j += 4;
@@ -2432,7 +2434,7 @@ void stop_airo_card( struct net_device *dev, int freeres )
 				ai->shared, ai->shared_dma);
 		}
         }
-	crypto_free_tfm(ai->tfm);
+	crypto_free_cipher(ai->tfm);
 	del_airo_dev( dev );
 	free_netdev( dev );
 }
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c
index ed90a8af14449..fdfe7704a4692 100644
--- a/net/ieee80211/ieee80211_crypt_ccmp.c
+++ b/net/ieee80211/ieee80211_crypt_ccmp.c
@@ -9,6 +9,7 @@
  * more details.
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -48,7 +49,7 @@ struct ieee80211_ccmp_data {
 
 	int key_idx;
 
-	struct crypto_tfm *tfm;
+	struct crypto_cipher *tfm;
 
 	/* scratch buffers for virt_to_page() (crypto API) */
 	u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN],
@@ -56,20 +57,10 @@ struct ieee80211_ccmp_data {
 	u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN];
 };
 
-static void ieee80211_ccmp_aes_encrypt(struct crypto_tfm *tfm,
-				       const u8 pt[16], u8 ct[16])
+static inline void ieee80211_ccmp_aes_encrypt(struct crypto_cipher *tfm,
+					      const u8 pt[16], u8 ct[16])
 {
-	struct scatterlist src, dst;
-
-	src.page = virt_to_page(pt);
-	src.offset = offset_in_page(pt);
-	src.length = AES_BLOCK_LEN;
-
-	dst.page = virt_to_page(ct);
-	dst.offset = offset_in_page(ct);
-	dst.length = AES_BLOCK_LEN;
-
-	crypto_cipher_encrypt(tfm, &dst, &src, AES_BLOCK_LEN);
+	crypto_cipher_encrypt_one(tfm, ct, pt);
 }
 
 static void *ieee80211_ccmp_init(int key_idx)
@@ -81,10 +72,11 @@ static void *ieee80211_ccmp_init(int key_idx)
 		goto fail;
 	priv->key_idx = key_idx;
 
-	priv->tfm = crypto_alloc_tfm("aes", 0);
-	if (priv->tfm == NULL) {
+	priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tfm)) {
 		printk(KERN_DEBUG "ieee80211_crypt_ccmp: could not allocate "
 		       "crypto API aes\n");
+		priv->tfm = NULL;
 		goto fail;
 	}
 
@@ -93,7 +85,7 @@ static void *ieee80211_ccmp_init(int key_idx)
       fail:
 	if (priv) {
 		if (priv->tfm)
-			crypto_free_tfm(priv->tfm);
+			crypto_free_cipher(priv->tfm);
 		kfree(priv);
 	}
 
@@ -104,7 +96,7 @@ static void ieee80211_ccmp_deinit(void *priv)
 {
 	struct ieee80211_ccmp_data *_priv = priv;
 	if (_priv && _priv->tfm)
-		crypto_free_tfm(_priv->tfm);
+		crypto_free_cipher(_priv->tfm);
 	kfree(priv);
 }
 
@@ -115,7 +107,7 @@ static inline void xor_block(u8 * b, u8 * a, size_t len)
 		b[i] ^= a[i];
 }
 
-static void ccmp_init_blocks(struct crypto_tfm *tfm,
+static void ccmp_init_blocks(struct crypto_cipher *tfm,
 			     struct ieee80211_hdr_4addr *hdr,
 			     u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0)
 {
@@ -377,7 +369,7 @@ static int ieee80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv)
 {
 	struct ieee80211_ccmp_data *data = priv;
 	int keyidx;
-	struct crypto_tfm *tfm = data->tfm;
+	struct crypto_cipher *tfm = data->tfm;
 
 	keyidx = data->key_idx;
 	memset(data, 0, sizeof(*data));
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 34dba0ba545de..d60ce9b49b4fe 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -9,6 +9,7 @@
  * more details.
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -52,7 +53,7 @@ struct ieee80211_tkip_data {
 
 	int key_idx;
 
-	struct crypto_tfm *tfm_arc4;
+	struct crypto_blkcipher *tfm_arc4;
 	struct crypto_tfm *tfm_michael;
 
 	/* scratch buffers for virt_to_page() (crypto API) */
@@ -85,10 +86,12 @@ static void *ieee80211_tkip_init(int key_idx)
 
 	priv->key_idx = key_idx;
 
-	priv->tfm_arc4 = crypto_alloc_tfm("arc4", 0);
-	if (priv->tfm_arc4 == NULL) {
+	priv->tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
+						CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tfm_arc4)) {
 		printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
 		       "crypto API arc4\n");
+		priv->tfm_arc4 = NULL;
 		goto fail;
 	}
 
@@ -106,7 +109,7 @@ static void *ieee80211_tkip_init(int key_idx)
 		if (priv->tfm_michael)
 			crypto_free_tfm(priv->tfm_michael);
 		if (priv->tfm_arc4)
-			crypto_free_tfm(priv->tfm_arc4);
+			crypto_free_blkcipher(priv->tfm_arc4);
 		kfree(priv);
 	}
 
@@ -119,7 +122,7 @@ static void ieee80211_tkip_deinit(void *priv)
 	if (_priv && _priv->tfm_michael)
 		crypto_free_tfm(_priv->tfm_michael);
 	if (_priv && _priv->tfm_arc4)
-		crypto_free_tfm(_priv->tfm_arc4);
+		crypto_free_blkcipher(_priv->tfm_arc4);
 	kfree(priv);
 }
 
@@ -318,6 +321,7 @@ static int ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
 static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct ieee80211_tkip_data *tkey = priv;
+	struct blkcipher_desc desc = { .tfm = tkey->tfm_arc4 };
 	int len;
 	u8 rc4key[16], *pos, *icv;
 	u32 crc;
@@ -351,18 +355,17 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	icv[2] = crc >> 16;
 	icv[3] = crc >> 24;
 
-	crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16);
+	crypto_blkcipher_setkey(tkey->tfm_arc4, rc4key, 16);
 	sg.page = virt_to_page(pos);
 	sg.offset = offset_in_page(pos);
 	sg.length = len + 4;
-	crypto_cipher_encrypt(tkey->tfm_arc4, &sg, &sg, len + 4);
-
-	return 0;
+	return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
 }
 
 static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct ieee80211_tkip_data *tkey = priv;
+	struct blkcipher_desc desc = { .tfm = tkey->tfm_arc4 };
 	u8 rc4key[16];
 	u8 keyidx, *pos;
 	u32 iv32;
@@ -434,11 +437,18 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 
 	plen = skb->len - hdr_len - 12;
 
-	crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16);
+	crypto_blkcipher_setkey(tkey->tfm_arc4, rc4key, 16);
 	sg.page = virt_to_page(pos);
 	sg.offset = offset_in_page(pos);
 	sg.length = plen + 4;
-	crypto_cipher_decrypt(tkey->tfm_arc4, &sg, &sg, plen + 4);
+	if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG ": TKIP: failed to decrypt "
+			       "received packet from " MAC_FMT "\n",
+			       MAC_ARG(hdr->addr2));
+		}
+		return -7;
+	}
 
 	crc = ~crc32_le(~0, pos, plen);
 	icv[0] = crc;
@@ -619,7 +629,7 @@ static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
 	struct ieee80211_tkip_data *tkey = priv;
 	int keyidx;
 	struct crypto_tfm *tfm = tkey->tfm_michael;
-	struct crypto_tfm *tfm2 = tkey->tfm_arc4;
+	struct crypto_blkcipher *tfm2 = tkey->tfm_arc4;
 
 	keyidx = tkey->key_idx;
 	memset(tkey, 0, sizeof(*tkey));
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index 0ebf235f69393..3d46d3efe1dd9 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -9,6 +9,7 @@
  * more details.
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -32,7 +33,7 @@ struct prism2_wep_data {
 	u8 key[WEP_KEY_LEN + 1];
 	u8 key_len;
 	u8 key_idx;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
 };
 
 static void *prism2_wep_init(int keyidx)
@@ -44,10 +45,11 @@ static void *prism2_wep_init(int keyidx)
 		goto fail;
 	priv->key_idx = keyidx;
 
-	priv->tfm = crypto_alloc_tfm("arc4", 0);
-	if (priv->tfm == NULL) {
+	priv->tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tfm)) {
 		printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate "
 		       "crypto API arc4\n");
+		priv->tfm = NULL;
 		goto fail;
 	}
 
@@ -59,7 +61,7 @@ static void *prism2_wep_init(int keyidx)
       fail:
 	if (priv) {
 		if (priv->tfm)
-			crypto_free_tfm(priv->tfm);
+			crypto_free_blkcipher(priv->tfm);
 		kfree(priv);
 	}
 	return NULL;
@@ -69,7 +71,7 @@ static void prism2_wep_deinit(void *priv)
 {
 	struct prism2_wep_data *_priv = priv;
 	if (_priv && _priv->tfm)
-		crypto_free_tfm(_priv->tfm);
+		crypto_free_blkcipher(_priv->tfm);
 	kfree(priv);
 }
 
@@ -120,6 +122,7 @@ static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len,
 static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct prism2_wep_data *wep = priv;
+	struct blkcipher_desc desc = { .tfm = wep->tfm };
 	u32 crc, klen, len;
 	u8 *pos, *icv;
 	struct scatterlist sg;
@@ -151,13 +154,11 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	icv[2] = crc >> 16;
 	icv[3] = crc >> 24;
 
-	crypto_cipher_setkey(wep->tfm, key, klen);
+	crypto_blkcipher_setkey(wep->tfm, key, klen);
 	sg.page = virt_to_page(pos);
 	sg.offset = offset_in_page(pos);
 	sg.length = len + 4;
-	crypto_cipher_encrypt(wep->tfm, &sg, &sg, len + 4);
-
-	return 0;
+	return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
 }
 
 /* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
@@ -170,6 +171,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct prism2_wep_data *wep = priv;
+	struct blkcipher_desc desc = { .tfm = wep->tfm };
 	u32 crc, klen, plen;
 	u8 key[WEP_KEY_LEN + 3];
 	u8 keyidx, *pos, icv[4];
@@ -194,11 +196,12 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	/* Apply RC4 to data and compute CRC32 over decrypted data */
 	plen = skb->len - hdr_len - 8;
 
-	crypto_cipher_setkey(wep->tfm, key, klen);
+	crypto_blkcipher_setkey(wep->tfm, key, klen);
 	sg.page = virt_to_page(pos);
 	sg.offset = offset_in_page(pos);
 	sg.length = plen + 4;
-	crypto_cipher_decrypt(wep->tfm, &sg, &sg, plen + 4);
+	if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4))
+		return -7;
 
 	crc = ~crc32_le(~0, pos, plen);
 	icv[0] = crc;
-- 
GitLab


From efcf8023e299be605f217dc2c1b2754b5534569c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 5 Aug 2006 16:28:19 +1000
Subject: [PATCH 0322/1063] [CRYPTO] drivers: Remove obsolete block cipher
 operations

This patch removes obsolete block operations of the simple cipher type
from drivers.  These were preserved so that existing users can make a
smooth transition.  Now that the transition is complete, they are no
longer needed.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/aes_s390.c  | 112 -------------------
 arch/s390/crypto/des_s390.c  | 203 -----------------------------------
 drivers/crypto/padlock-aes.c |  44 --------
 3 files changed, 359 deletions(-)

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 8f04b4e41b557..15c9eec02928c 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -113,114 +113,6 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 	}
 }
 
-static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(AES_BLOCK_SIZE - 1);
-
-	switch (sctx->key_len) {
-	case 16:
-		ret = crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 24:
-		ret = crypt_s390_km(KM_AES_192_ENCRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 32:
-		ret = crypt_s390_km(KM_AES_256_ENCRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	}
-	return nbytes;
-}
-
-static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(AES_BLOCK_SIZE - 1);
-
-	switch (sctx->key_len) {
-	case 16:
-		ret = crypt_s390_km(KM_AES_128_DECRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 24:
-		ret = crypt_s390_km(KM_AES_192_DECRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 32:
-		ret = crypt_s390_km(KM_AES_256_DECRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	}
-	return nbytes;
-}
-
-static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(AES_BLOCK_SIZE - 1);
-
-	memcpy(&sctx->iv, desc->info, AES_BLOCK_SIZE);
-	switch (sctx->key_len) {
-	case 16:
-		ret = crypt_s390_kmc(KMC_AES_128_ENCRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 24:
-		ret = crypt_s390_kmc(KMC_AES_192_ENCRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 32:
-		ret = crypt_s390_kmc(KMC_AES_256_ENCRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	}
-	memcpy(desc->info, &sctx->iv, AES_BLOCK_SIZE);
-
-	return nbytes;
-}
-
-static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(AES_BLOCK_SIZE - 1);
-
-	memcpy(&sctx->iv, desc->info, AES_BLOCK_SIZE);
-	switch (sctx->key_len) {
-	case 16:
-		ret = crypt_s390_kmc(KMC_AES_128_DECRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 24:
-		ret = crypt_s390_kmc(KMC_AES_192_DECRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 32:
-		ret = crypt_s390_kmc(KMC_AES_256_DECRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	}
-	return nbytes;
-}
-
 
 static struct crypto_alg aes_alg = {
 	.cra_name		=	"aes",
@@ -238,10 +130,6 @@ static struct crypto_alg aes_alg = {
 			.cia_setkey		=	aes_set_key,
 			.cia_encrypt		=	aes_encrypt,
 			.cia_decrypt		=	aes_decrypt,
-			.cia_encrypt_ecb	=	aes_encrypt_ecb,
-			.cia_decrypt_ecb	=	aes_decrypt_ecb,
-			.cia_encrypt_cbc	=	aes_encrypt_cbc,
-			.cia_decrypt_cbc	=	aes_decrypt_cbc,
 		}
 	}
 };
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index a6d2385ccb7af..2aba04852fe3e 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -73,67 +73,6 @@ static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 	crypt_s390_km(KM_DEA_DECRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
 }
 
-static unsigned int des_encrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_DEA_ENCRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des_decrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_DEA_DECRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des_encrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES_BLOCK_SIZE - 1);
-
-	memcpy(sctx->iv, desc->info, DES_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_DEA_ENCRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	memcpy(desc->info, sctx->iv, DES_BLOCK_SIZE);
-	return nbytes;
-}
-
-static unsigned int des_decrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES_BLOCK_SIZE - 1);
-
-	memcpy(&sctx->iv, desc->info, DES_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_DEA_DECRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
 static struct crypto_alg des_alg = {
 	.cra_name		=	"des",
 	.cra_driver_name	=	"des-s390",
@@ -150,10 +89,6 @@ static struct crypto_alg des_alg = {
 			.cia_setkey		=	des_setkey,
 			.cia_encrypt		=	des_encrypt,
 			.cia_decrypt		=	des_decrypt,
-			.cia_encrypt_ecb	=	des_encrypt_ecb,
-			.cia_decrypt_ecb	=	des_decrypt_ecb,
-			.cia_encrypt_cbc	=	des_encrypt_cbc,
-			.cia_decrypt_cbc	=	des_decrypt_cbc,
 		}
 	}
 };
@@ -344,71 +279,6 @@ static void des3_128_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 		      DES3_128_BLOCK_SIZE);
 }
 
-static unsigned int des3_128_encrypt_ecb(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_128_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_TDEA_128_ENCRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des3_128_decrypt_ecb(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_128_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_TDEA_128_DECRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des3_128_encrypt_cbc(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_128_BLOCK_SIZE - 1);
-
-	memcpy(sctx->iv, desc->info, DES3_128_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_TDEA_128_ENCRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	memcpy(desc->info, sctx->iv, DES3_128_BLOCK_SIZE);
-	return nbytes;
-}
-
-static unsigned int des3_128_decrypt_cbc(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_128_BLOCK_SIZE - 1);
-
-	memcpy(&sctx->iv, desc->info, DES3_128_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_TDEA_128_DECRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
 static struct crypto_alg des3_128_alg = {
 	.cra_name		=	"des3_ede128",
 	.cra_driver_name	=	"des3_ede128-s390",
@@ -425,10 +295,6 @@ static struct crypto_alg des3_128_alg = {
 			.cia_setkey		=	des3_128_setkey,
 			.cia_encrypt		=	des3_128_encrypt,
 			.cia_decrypt		=	des3_128_decrypt,
-			.cia_encrypt_ecb	=	des3_128_encrypt_ecb,
-			.cia_decrypt_ecb	=	des3_128_decrypt_ecb,
-			.cia_encrypt_cbc	=	des3_128_encrypt_cbc,
-			.cia_decrypt_cbc	=	des3_128_decrypt_cbc,
 		}
 	}
 };
@@ -575,71 +441,6 @@ static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 		      DES3_192_BLOCK_SIZE);
 }
 
-static unsigned int des3_192_encrypt_ecb(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_192_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_TDEA_192_ENCRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des3_192_decrypt_ecb(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_192_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_TDEA_192_DECRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des3_192_encrypt_cbc(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_192_BLOCK_SIZE - 1);
-
-	memcpy(sctx->iv, desc->info, DES3_192_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_TDEA_192_ENCRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	memcpy(desc->info, sctx->iv, DES3_192_BLOCK_SIZE);
-	return nbytes;
-}
-
-static unsigned int des3_192_decrypt_cbc(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_192_BLOCK_SIZE - 1);
-
-	memcpy(&sctx->iv, desc->info, DES3_192_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_TDEA_192_DECRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
 static struct crypto_alg des3_192_alg = {
 	.cra_name		=	"des3_ede",
 	.cra_driver_name	=	"des3_ede-s390",
@@ -656,10 +457,6 @@ static struct crypto_alg des3_192_alg = {
 			.cia_setkey		=	des3_192_setkey,
 			.cia_encrypt		=	des3_192_encrypt,
 			.cia_decrypt		=	des3_192_decrypt,
-			.cia_encrypt_ecb	=	des3_192_encrypt_ecb,
-			.cia_decrypt_ecb	=	des3_192_decrypt_ecb,
-			.cia_encrypt_cbc	=	des3_192_encrypt_cbc,
-			.cia_decrypt_cbc	=	des3_192_decrypt_cbc,
 		}
 	}
 };
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index f53301e836d98..d4501dc7e6502 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -452,46 +452,6 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 	padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, 1);
 }
 
-static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct aes_ctx *ctx = aes_ctx(desc->tfm);
-	padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt,
-			   nbytes / AES_BLOCK_SIZE);
-	return nbytes & ~(AES_BLOCK_SIZE - 1);
-}
-
-static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct aes_ctx *ctx = aes_ctx(desc->tfm);
-	padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt,
-			   nbytes / AES_BLOCK_SIZE);
-	return nbytes & ~(AES_BLOCK_SIZE - 1);
-}
-
-static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct aes_ctx *ctx = aes_ctx(desc->tfm);
-	u8 *iv;
-
-	iv = padlock_xcrypt_cbc(in, out, ctx->E, desc->info,
-				&ctx->cword.encrypt, nbytes / AES_BLOCK_SIZE);
-	memcpy(desc->info, iv, AES_BLOCK_SIZE);
-
-	return nbytes & ~(AES_BLOCK_SIZE - 1);
-}
-
-static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct aes_ctx *ctx = aes_ctx(desc->tfm);
-	padlock_xcrypt_cbc(in, out, ctx->D, desc->info, &ctx->cword.decrypt,
-			   nbytes / AES_BLOCK_SIZE);
-	return nbytes & ~(AES_BLOCK_SIZE - 1);
-}
-
 static struct crypto_alg aes_alg = {
 	.cra_name		=	"aes",
 	.cra_driver_name	=	"aes-padlock",
@@ -509,10 +469,6 @@ static struct crypto_alg aes_alg = {
 			.cia_setkey	   	= 	aes_set_key,
 			.cia_encrypt	 	=	aes_encrypt,
 			.cia_decrypt	  	=	aes_decrypt,
-			.cia_encrypt_ecb 	=	aes_encrypt_ecb,
-			.cia_decrypt_ecb  	=	aes_decrypt_ecb,
-			.cia_encrypt_cbc 	=	aes_encrypt_cbc,
-			.cia_decrypt_cbc  	=	aes_decrypt_cbc,
 		}
 	}
 };
-- 
GitLab


From 03fd9cee7f46dddcd2562bc175d2c348502ce281 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 14 Aug 2006 23:11:53 +1000
Subject: [PATCH 0323/1063] [PATCH] scatterlist: Add const to
 sg_set_buf/sg_init_one pointer argument

This patch adds a const modifier to the buf argument of sg_set_buf and
sg_init_one.  This lets people call it with pointers that are const.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/scatterlist.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 66ff545552f71..4efbd9c445f5b 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -5,7 +5,7 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 
-static inline void sg_set_buf(struct scatterlist *sg, void *buf,
+static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
 			      unsigned int buflen)
 {
 	sg->page = virt_to_page(buf);
@@ -13,7 +13,7 @@ static inline void sg_set_buf(struct scatterlist *sg, void *buf,
 	sg->length = buflen;
 }
 
-static inline void sg_init_one(struct scatterlist *sg, void *buf,
+static inline void sg_init_one(struct scatterlist *sg, const void *buf,
 			       unsigned int buflen)
 {
 	memset(sg, 0, sizeof(*sg));
-- 
GitLab


From 7226bc877a22244e8003924031435a4bffd52654 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 21 Aug 2006 21:40:49 +1000
Subject: [PATCH 0324/1063] [CRYPTO] api: Mark parts of cipher interface as
 deprecated

Mark the parts of the cipher interface that have been replaced by
block ciphers as deprecated.  Thanks to Andrew Morton for suggesting
doing this before removing them completely.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/cipher.c        | 34 ++++++++++++++++++++++++------
 include/linux/crypto.h | 48 +++++++++++++++++++++++++++++++-----------
 2 files changed, 64 insertions(+), 18 deletions(-)

diff --git a/crypto/cipher.c b/crypto/cipher.c
index 3264617806735..9e03701cfdcc6 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -23,6 +23,28 @@
 #include "internal.h"
 #include "scatterwalk.h"
 
+struct cipher_alg_compat {
+	unsigned int cia_min_keysize;
+	unsigned int cia_max_keysize;
+	int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key,
+	                  unsigned int keylen);
+	void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+	void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+
+	unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+};
+
 static inline void xor_64(u8 *a, const u8 *b)
 {
 	((u32 *)a)[0] ^= ((u32 *)b)[0];
@@ -276,7 +298,7 @@ static int ecb_encrypt(struct crypto_tfm *tfm,
                        struct scatterlist *src, unsigned int nbytes)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_encrypt;
@@ -291,7 +313,7 @@ static int ecb_decrypt(struct crypto_tfm *tfm,
 		       unsigned int nbytes)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_decrypt;
@@ -306,7 +328,7 @@ static int cbc_encrypt(struct crypto_tfm *tfm,
 		       unsigned int nbytes)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_encrypt;
@@ -322,7 +344,7 @@ static int cbc_encrypt_iv(struct crypto_tfm *tfm,
                           unsigned int nbytes, u8 *iv)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_encrypt;
@@ -338,7 +360,7 @@ static int cbc_decrypt(struct crypto_tfm *tfm,
 		       unsigned int nbytes)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_decrypt;
@@ -354,7 +376,7 @@ static int cbc_decrypt_iv(struct crypto_tfm *tfm,
                           unsigned int nbytes, u8 *iv)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_decrypt;
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 5a5466d518e8e..0be666b504632 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -20,7 +20,6 @@
 #include <asm/atomic.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/types.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/string.h>
@@ -137,16 +136,16 @@ struct cipher_alg {
 
 	unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc,
 					u8 *dst, const u8 *src,
-					unsigned int nbytes);
+					unsigned int nbytes) __deprecated;
 	unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc,
 					u8 *dst, const u8 *src,
-					unsigned int nbytes);
+					unsigned int nbytes) __deprecated;
 	unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc,
 					u8 *dst, const u8 *src,
-					unsigned int nbytes);
+					unsigned int nbytes) __deprecated;
 	unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc,
 					u8 *dst, const u8 *src,
-					unsigned int nbytes);
+					unsigned int nbytes) __deprecated;
 };
 
 struct digest_alg {
@@ -358,18 +357,23 @@ static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm)
 	return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK;
 }
 
+static unsigned int crypto_tfm_alg_min_keysize(struct crypto_tfm *tfm)
+	__deprecated;
 static inline unsigned int crypto_tfm_alg_min_keysize(struct crypto_tfm *tfm)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
 	return tfm->__crt_alg->cra_cipher.cia_min_keysize;
 }
 
+static unsigned int crypto_tfm_alg_max_keysize(struct crypto_tfm *tfm)
+	__deprecated;
 static inline unsigned int crypto_tfm_alg_max_keysize(struct crypto_tfm *tfm)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
 	return tfm->__crt_alg->cra_cipher.cia_max_keysize;
 }
 
+static unsigned int crypto_tfm_alg_ivsize(struct crypto_tfm *tfm) __deprecated;
 static inline unsigned int crypto_tfm_alg_ivsize(struct crypto_tfm *tfm)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
@@ -622,6 +626,13 @@ static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm,
 	crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags);
 }
 
+static inline int crypto_cipher_setkey(struct crypto_cipher *tfm,
+                                       const u8 *key, unsigned int keylen)
+{
+	return crypto_cipher_crt(tfm)->cit_setkey(crypto_cipher_tfm(tfm),
+						  key, keylen);
+}
+
 static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm,
 					     u8 *dst, const u8 *src)
 {
@@ -671,13 +682,10 @@ static inline int crypto_digest_setkey(struct crypto_tfm *tfm,
 	return tfm->crt_digest.dit_setkey(tfm, key, keylen);
 }
 
-static inline int crypto_cipher_setkey(struct crypto_tfm *tfm,
-                                       const u8 *key, unsigned int keylen)
-{
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	return tfm->crt_cipher.cit_setkey(tfm, key, keylen);
-}
-
+static int crypto_cipher_encrypt(struct crypto_tfm *tfm,
+				 struct scatterlist *dst,
+				 struct scatterlist *src,
+				 unsigned int nbytes) __deprecated;
 static inline int crypto_cipher_encrypt(struct crypto_tfm *tfm,
                                         struct scatterlist *dst,
                                         struct scatterlist *src,
@@ -687,6 +695,10 @@ static inline int crypto_cipher_encrypt(struct crypto_tfm *tfm,
 	return tfm->crt_cipher.cit_encrypt(tfm, dst, src, nbytes);
 }                                        
 
+static int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm,
+				    struct scatterlist *dst,
+				    struct scatterlist *src,
+				    unsigned int nbytes, u8 *iv) __deprecated;
 static inline int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm,
                                            struct scatterlist *dst,
                                            struct scatterlist *src,
@@ -696,6 +708,10 @@ static inline int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm,
 	return tfm->crt_cipher.cit_encrypt_iv(tfm, dst, src, nbytes, iv);
 }                                        
 
+static int crypto_cipher_decrypt(struct crypto_tfm *tfm,
+				 struct scatterlist *dst,
+				 struct scatterlist *src,
+				 unsigned int nbytes) __deprecated;
 static inline int crypto_cipher_decrypt(struct crypto_tfm *tfm,
                                         struct scatterlist *dst,
                                         struct scatterlist *src,
@@ -705,6 +721,10 @@ static inline int crypto_cipher_decrypt(struct crypto_tfm *tfm,
 	return tfm->crt_cipher.cit_decrypt(tfm, dst, src, nbytes);
 }
 
+static int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm,
+				    struct scatterlist *dst,
+				    struct scatterlist *src,
+				    unsigned int nbytes, u8 *iv) __deprecated;
 static inline int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm,
                                            struct scatterlist *dst,
                                            struct scatterlist *src,
@@ -714,6 +734,8 @@ static inline int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm,
 	return tfm->crt_cipher.cit_decrypt_iv(tfm, dst, src, nbytes, iv);
 }
 
+static void crypto_cipher_set_iv(struct crypto_tfm *tfm,
+				 const u8 *src, unsigned int len) __deprecated;
 static inline void crypto_cipher_set_iv(struct crypto_tfm *tfm,
                                         const u8 *src, unsigned int len)
 {
@@ -721,6 +743,8 @@ static inline void crypto_cipher_set_iv(struct crypto_tfm *tfm,
 	memcpy(tfm->crt_cipher.cit_iv, src, len);
 }
 
+static void crypto_cipher_get_iv(struct crypto_tfm *tfm,
+				 u8 *dst, unsigned int len) __deprecated;
 static inline void crypto_cipher_get_iv(struct crypto_tfm *tfm,
                                         u8 *dst, unsigned int len)
 {
-- 
GitLab


From 055bcee3102dc35f019b69df9c2618e9d6dd1c09 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 19 Aug 2006 22:24:23 +1000
Subject: [PATCH 0325/1063] [CRYPTO] digest: Added user API for new hash type

The existing digest user interface is inadequate for support asynchronous
operations.  For one it doesn't return a value to indicate success or
failure, nor does it take a per-operation descriptor which is essential
for the issuing of requests while other requests are still outstanding.

This patch is the first in a series of steps to remodel the interface
for asynchronous operations.

For the ease of transition the new interface will be known as "hash"
while the old one will remain as "digest".

This patch also changes sg_next to allow chaining.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig          |   4 +
 crypto/Makefile         |   3 +
 crypto/digest.c         | 129 +++++++++++++++++++++++-------
 crypto/hash.c           |  61 ++++++++++++++
 crypto/hmac.c           |  12 +--
 crypto/scatterwalk.h    |   4 +-
 include/crypto/algapi.h |   6 ++
 include/linux/crypto.h  | 172 ++++++++++++++++++++++++++++++++--------
 8 files changed, 321 insertions(+), 70 deletions(-)
 create mode 100644 crypto/hash.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index be5eb0cb7c30e..69c5f992bcd4b 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -20,6 +20,10 @@ config CRYPTO_BLKCIPHER
 	tristate
 	select CRYPTO_ALGAPI
 
+config CRYPTO_HASH
+	tristate
+	select CRYPTO_ALGAPI
+
 config CRYPTO_MANAGER
 	tristate "Cryptographic algorithm manager"
 	select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index 5e1ff4e0b1fcc..72366208e291c 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -10,6 +10,9 @@ obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o
 
 obj-$(CONFIG_CRYPTO_BLKCIPHER) += blkcipher.o
 
+crypto_hash-objs := hash.o
+obj-$(CONFIG_CRYPTO_HASH) += crypto_hash.o
+
 obj-$(CONFIG_CRYPTO_MANAGER) += cryptomgr.o
 obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
 obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o
diff --git a/crypto/digest.c b/crypto/digest.c
index 96244a528844d..5873063db8400 100644
--- a/crypto/digest.c
+++ b/crypto/digest.c
@@ -11,29 +11,89 @@
  * any later version.
  *
  */
-#include <linux/crypto.h>
+
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/highmem.h>
-#include <asm/scatterlist.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+
 #include "internal.h"
+#include "scatterwalk.h"
 
-static void init(struct crypto_tfm *tfm)
+void crypto_digest_init(struct crypto_tfm *tfm)
 {
-	tfm->__crt_alg->cra_digest.dia_init(tfm);
+	struct crypto_hash *hash = crypto_hash_cast(tfm);
+	struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags };
+
+	crypto_hash_init(&desc);
 }
+EXPORT_SYMBOL_GPL(crypto_digest_init);
 
-static void update(struct crypto_tfm *tfm,
-                   struct scatterlist *sg, unsigned int nsg)
+void crypto_digest_update(struct crypto_tfm *tfm,
+			  struct scatterlist *sg, unsigned int nsg)
 {
+	struct crypto_hash *hash = crypto_hash_cast(tfm);
+	struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags };
+	unsigned int nbytes = 0;
 	unsigned int i;
+
+	for (i = 0; i < nsg; i++)
+		nbytes += sg[i].length;
+
+	crypto_hash_update(&desc, sg, nbytes);
+}
+EXPORT_SYMBOL_GPL(crypto_digest_update);
+
+void crypto_digest_final(struct crypto_tfm *tfm, u8 *out)
+{
+	struct crypto_hash *hash = crypto_hash_cast(tfm);
+	struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags };
+
+	crypto_hash_final(&desc, out);
+}
+EXPORT_SYMBOL_GPL(crypto_digest_final);
+
+void crypto_digest_digest(struct crypto_tfm *tfm,
+			  struct scatterlist *sg, unsigned int nsg, u8 *out)
+{
+	struct crypto_hash *hash = crypto_hash_cast(tfm);
+	struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags };
+	unsigned int nbytes = 0;
+	unsigned int i;
+
+	for (i = 0; i < nsg; i++)
+		nbytes += sg[i].length;
+
+	crypto_hash_digest(&desc, sg, nbytes, out);
+}
+EXPORT_SYMBOL_GPL(crypto_digest_digest);
+
+static int init(struct hash_desc *desc)
+{
+	struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm);
+
+	tfm->__crt_alg->cra_digest.dia_init(tfm);
+	return 0;
+}
+
+static int update(struct hash_desc *desc,
+		  struct scatterlist *sg, unsigned int nbytes)
+{
+	struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm);
 	unsigned int alignmask = crypto_tfm_alg_alignmask(tfm);
 
-	for (i = 0; i < nsg; i++) {
+	if (!nbytes)
+		return 0;
+
+	for (;;) {
+		struct page *pg = sg->page;
+		unsigned int offset = sg->offset;
+		unsigned int l = sg->length;
 
-		struct page *pg = sg[i].page;
-		unsigned int offset = sg[i].offset;
-		unsigned int l = sg[i].length;
+		if (unlikely(l > nbytes))
+			l = nbytes;
+		nbytes -= l;
 
 		do {
 			unsigned int bytes_from_page = min(l, ((unsigned int)
@@ -55,16 +115,23 @@ static void update(struct crypto_tfm *tfm,
 			tfm->__crt_alg->cra_digest.dia_update(tfm, p,
 							      bytes_from_page);
 			crypto_kunmap(src, 0);
-			crypto_yield(tfm->crt_flags);
+			crypto_yield(desc->flags);
 			offset = 0;
 			pg++;
 			l -= bytes_from_page;
 		} while (l > 0);
+
+		if (!nbytes)
+			break;
+		sg = sg_next(sg);
 	}
+
+	return 0;
 }
 
-static void final(struct crypto_tfm *tfm, u8 *out)
+static int final(struct hash_desc *desc, u8 *out)
 {
+	struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm);
 	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
 	struct digest_alg *digest = &tfm->__crt_alg->cra_digest;
 
@@ -78,26 +145,30 @@ static void final(struct crypto_tfm *tfm, u8 *out)
 		memcpy(out, dst, digest->dia_digestsize);
 	} else
 		digest->dia_final(tfm, out);
+
+	return 0;
 }
 
-static int nosetkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
+static int nosetkey(struct crypto_hash *tfm, const u8 *key, unsigned int keylen)
 {
-	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+	crypto_hash_clear_flags(tfm, CRYPTO_TFM_RES_MASK);
 	return -ENOSYS;
 }
 
-static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
+static int setkey(struct crypto_hash *hash, const u8 *key, unsigned int keylen)
 {
-	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+	struct crypto_tfm *tfm = crypto_hash_tfm(hash);
+
+	crypto_hash_clear_flags(hash, CRYPTO_TFM_RES_MASK);
 	return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen);
 }
 
-static void digest(struct crypto_tfm *tfm,
-                   struct scatterlist *sg, unsigned int nsg, u8 *out)
+static int digest(struct hash_desc *desc,
+		  struct scatterlist *sg, unsigned int nbytes, u8 *out)
 {
-	init(tfm);
-	update(tfm, sg, nsg);
-	final(tfm, out);
+	init(desc);
+	update(desc, sg, nbytes);
+	return final(desc, out);
 }
 
 int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags)
@@ -107,14 +178,18 @@ int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags)
 
 int crypto_init_digest_ops(struct crypto_tfm *tfm)
 {
-	struct digest_tfm *ops = &tfm->crt_digest;
+	struct hash_tfm *ops = &tfm->crt_hash;
 	struct digest_alg *dalg = &tfm->__crt_alg->cra_digest;
+
+	if (dalg->dia_digestsize > crypto_tfm_alg_blocksize(tfm))
+		return -EINVAL;
 	
-	ops->dit_init	= init;
-	ops->dit_update	= update;
-	ops->dit_final	= final;
-	ops->dit_digest	= digest;
-	ops->dit_setkey	= dalg->dia_setkey ? setkey : nosetkey;
+	ops->init	= init;
+	ops->update	= update;
+	ops->final	= final;
+	ops->digest	= digest;
+	ops->setkey	= dalg->dia_setkey ? setkey : nosetkey;
+	ops->digestsize	= dalg->dia_digestsize;
 	
 	return crypto_alloc_hmac_block(tfm);
 }
diff --git a/crypto/hash.c b/crypto/hash.c
new file mode 100644
index 0000000000000..cdec23d885fed
--- /dev/null
+++ b/crypto/hash.c
@@ -0,0 +1,61 @@
+/*
+ * Cryptographic Hash operations.
+ * 
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+
+#include "internal.h"
+
+static unsigned int crypto_hash_ctxsize(struct crypto_alg *alg)
+{
+	return alg->cra_ctxsize;
+}
+
+static int crypto_init_hash_ops(struct crypto_tfm *tfm)
+{
+	struct hash_tfm *crt = &tfm->crt_hash;
+	struct hash_alg *alg = &tfm->__crt_alg->cra_hash;
+
+	if (alg->digestsize > crypto_tfm_alg_blocksize(tfm))
+		return -EINVAL;
+
+	crt->init = alg->init;
+	crt->update = alg->update;
+	crt->final = alg->final;
+	crt->digest = alg->digest;
+	crt->setkey = alg->setkey;
+	crt->digestsize = alg->digestsize;
+
+	return 0;
+}
+
+static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute_used__;
+static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	seq_printf(m, "type         : hash\n");
+	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+	seq_printf(m, "digestsize   : %u\n", alg->cra_hash.digestsize);
+}
+
+const struct crypto_type crypto_hash_type = {
+	.ctxsize = crypto_hash_ctxsize,
+	.init = crypto_init_hash_ops,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_hash_show,
+#endif
+};
+EXPORT_SYMBOL_GPL(crypto_hash_type);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Generic cryptographic hash type");
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 46120dee5ada6..ecf7b0a95b564 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -35,9 +35,9 @@ int crypto_alloc_hmac_block(struct crypto_tfm *tfm)
 
 	BUG_ON(!crypto_tfm_alg_blocksize(tfm));
 	
-	tfm->crt_digest.dit_hmac_block = kmalloc(crypto_tfm_alg_blocksize(tfm),
-	                                         GFP_KERNEL);
-	if (tfm->crt_digest.dit_hmac_block == NULL)
+	tfm->crt_hash.hmac_block = kmalloc(crypto_tfm_alg_blocksize(tfm),
+					   GFP_KERNEL);
+	if (tfm->crt_hash.hmac_block == NULL)
 		ret = -ENOMEM;
 
 	return ret;
@@ -46,14 +46,14 @@ int crypto_alloc_hmac_block(struct crypto_tfm *tfm)
 
 void crypto_free_hmac_block(struct crypto_tfm *tfm)
 {
-	kfree(tfm->crt_digest.dit_hmac_block);
+	kfree(tfm->crt_hash.hmac_block);
 }
 
 void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen)
 {
 	unsigned int i;
 	struct scatterlist tmp;
-	char *ipad = tfm->crt_digest.dit_hmac_block;
+	char *ipad = tfm->crt_hash.hmac_block;
 	
 	if (*keylen > crypto_tfm_alg_blocksize(tfm)) {
 		hash_key(tfm, key, *keylen);
@@ -83,7 +83,7 @@ void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key,
 {
 	unsigned int i;
 	struct scatterlist tmp;
-	char *opad = tfm->crt_digest.dit_hmac_block;
+	char *opad = tfm->crt_hash.hmac_block;
 	
 	if (*keylen > crypto_tfm_alg_blocksize(tfm)) {
 		hash_key(tfm, key, *keylen);
diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h
index ace595a2e1191..f1592cc2d0f42 100644
--- a/crypto/scatterwalk.h
+++ b/crypto/scatterwalk.h
@@ -20,11 +20,9 @@
 
 #include "internal.h"
 
-/* Define sg_next is an inline routine now in case we want to change
-   scatterlist to a linked list later. */
 static inline struct scatterlist *sg_next(struct scatterlist *sg)
 {
-	return sg + 1;
+	return (++sg)->length ? sg : (void *)sg->page;
 }
 
 static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in,
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 444f602724db5..5748aecdb4147 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -82,6 +82,7 @@ struct blkcipher_walk {
 };
 
 extern const struct crypto_type crypto_blkcipher_type;
+extern const struct crypto_type crypto_hash_type;
 
 void crypto_mod_put(struct crypto_alg *alg);
 
@@ -136,6 +137,11 @@ static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm)
 	return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher;
 }
 
+static inline void *crypto_hash_ctx_aligned(struct crypto_hash *tfm)
+{
+	return crypto_tfm_ctx_aligned(&tfm->base);
+}
+
 static inline void blkcipher_walk_init(struct blkcipher_walk *walk,
 				       struct scatterlist *dst,
 				       struct scatterlist *src,
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 0be666b504632..40c0aab8ad4c9 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -31,8 +31,11 @@
 #define CRYPTO_ALG_TYPE_MASK		0x0000000f
 #define CRYPTO_ALG_TYPE_CIPHER		0x00000001
 #define CRYPTO_ALG_TYPE_DIGEST		0x00000002
-#define CRYPTO_ALG_TYPE_BLKCIPHER	0x00000003
-#define CRYPTO_ALG_TYPE_COMPRESS	0x00000004
+#define CRYPTO_ALG_TYPE_HASH		0x00000003
+#define CRYPTO_ALG_TYPE_BLKCIPHER	0x00000004
+#define CRYPTO_ALG_TYPE_COMPRESS	0x00000005
+
+#define CRYPTO_ALG_TYPE_HASH_MASK	0x0000000e
 
 #define CRYPTO_ALG_LARVAL		0x00000010
 #define CRYPTO_ALG_DEAD			0x00000020
@@ -90,6 +93,7 @@
 
 struct scatterlist;
 struct crypto_blkcipher;
+struct crypto_hash;
 struct crypto_tfm;
 struct crypto_type;
 
@@ -107,6 +111,11 @@ struct cipher_desc {
 	void *info;
 };
 
+struct hash_desc {
+	struct crypto_hash *tfm;
+	u32 flags;
+};
+
 /*
  * Algorithms: modular crypto algorithm implementations, managed
  * via crypto_register_alg() and crypto_unregister_alg().
@@ -158,6 +167,19 @@ struct digest_alg {
 	                  unsigned int keylen);
 };
 
+struct hash_alg {
+	int (*init)(struct hash_desc *desc);
+	int (*update)(struct hash_desc *desc, struct scatterlist *sg,
+		      unsigned int nbytes);
+	int (*final)(struct hash_desc *desc, u8 *out);
+	int (*digest)(struct hash_desc *desc, struct scatterlist *sg,
+		      unsigned int nbytes, u8 *out);
+	int (*setkey)(struct crypto_hash *tfm, const u8 *key,
+		      unsigned int keylen);
+
+	unsigned int digestsize;
+};
+
 struct compress_alg {
 	int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src,
 			    unsigned int slen, u8 *dst, unsigned int *dlen);
@@ -168,6 +190,7 @@ struct compress_alg {
 #define cra_blkcipher	cra_u.blkcipher
 #define cra_cipher	cra_u.cipher
 #define cra_digest	cra_u.digest
+#define cra_hash	cra_u.hash
 #define cra_compress	cra_u.compress
 
 struct crypto_alg {
@@ -191,6 +214,7 @@ struct crypto_alg {
 		struct blkcipher_alg blkcipher;
 		struct cipher_alg cipher;
 		struct digest_alg digest;
+		struct hash_alg hash;
 		struct compress_alg compress;
 	} cra_u;
 
@@ -262,18 +286,19 @@ struct cipher_tfm {
 	void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 };
 
-struct digest_tfm {
-	void (*dit_init)(struct crypto_tfm *tfm);
-	void (*dit_update)(struct crypto_tfm *tfm,
-	                   struct scatterlist *sg, unsigned int nsg);
-	void (*dit_final)(struct crypto_tfm *tfm, u8 *out);
-	void (*dit_digest)(struct crypto_tfm *tfm, struct scatterlist *sg,
-	                   unsigned int nsg, u8 *out);
-	int (*dit_setkey)(struct crypto_tfm *tfm,
-	                  const u8 *key, unsigned int keylen);
+struct hash_tfm {
+	int (*init)(struct hash_desc *desc);
+	int (*update)(struct hash_desc *desc,
+		      struct scatterlist *sg, unsigned int nsg);
+	int (*final)(struct hash_desc *desc, u8 *out);
+	int (*digest)(struct hash_desc *desc, struct scatterlist *sg,
+		      unsigned int nsg, u8 *out);
+	int (*setkey)(struct crypto_hash *tfm, const u8 *key,
+		      unsigned int keylen);
 #ifdef CONFIG_CRYPTO_HMAC
-	void *dit_hmac_block;
+	void *hmac_block;
 #endif
+	unsigned int digestsize;
 };
 
 struct compress_tfm {
@@ -287,7 +312,7 @@ struct compress_tfm {
 
 #define crt_blkcipher	crt_u.blkcipher
 #define crt_cipher	crt_u.cipher
-#define crt_digest	crt_u.digest
+#define crt_hash	crt_u.hash
 #define crt_compress	crt_u.compress
 
 struct crypto_tfm {
@@ -297,7 +322,7 @@ struct crypto_tfm {
 	union {
 		struct blkcipher_tfm blkcipher;
 		struct cipher_tfm cipher;
-		struct digest_tfm digest;
+		struct hash_tfm hash;
 		struct compress_tfm compress;
 	} crt_u;
 	
@@ -312,6 +337,10 @@ struct crypto_blkcipher {
 	struct crypto_tfm base;
 };
 
+struct crypto_hash {
+	struct crypto_tfm base;
+};
+
 enum {
 	CRYPTOA_UNSPEC,
 	CRYPTOA_ALG,
@@ -647,39 +676,114 @@ static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm,
 						dst, src);
 }
 
-static inline void crypto_digest_init(struct crypto_tfm *tfm)
+void crypto_digest_init(struct crypto_tfm *tfm);
+void crypto_digest_update(struct crypto_tfm *tfm,
+			  struct scatterlist *sg, unsigned int nsg);
+void crypto_digest_final(struct crypto_tfm *tfm, u8 *out);
+void crypto_digest_digest(struct crypto_tfm *tfm,
+			  struct scatterlist *sg, unsigned int nsg, u8 *out);
+
+static inline struct crypto_hash *__crypto_hash_cast(struct crypto_tfm *tfm)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	tfm->crt_digest.dit_init(tfm);
+	return (struct crypto_hash *)tfm;
 }
 
-static inline void crypto_digest_update(struct crypto_tfm *tfm,
-                                        struct scatterlist *sg,
-                                        unsigned int nsg)
+static inline struct crypto_hash *crypto_hash_cast(struct crypto_tfm *tfm)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	tfm->crt_digest.dit_update(tfm, sg, nsg);
+	BUG_ON((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_HASH) &
+	       CRYPTO_ALG_TYPE_HASH_MASK);
+	return __crypto_hash_cast(tfm);
 }
 
-static inline void crypto_digest_final(struct crypto_tfm *tfm, u8 *out)
+static inline int crypto_digest_setkey(struct crypto_tfm *tfm,
+                                       const u8 *key, unsigned int keylen)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	tfm->crt_digest.dit_final(tfm, out);
+	return tfm->crt_hash.setkey(crypto_hash_cast(tfm), key, keylen);
 }
 
-static inline void crypto_digest_digest(struct crypto_tfm *tfm,
-                                        struct scatterlist *sg,
-                                        unsigned int nsg, u8 *out)
+static inline struct crypto_hash *crypto_alloc_hash(const char *alg_name,
+						    u32 type, u32 mask)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	tfm->crt_digest.dit_digest(tfm, sg, nsg, out);
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_HASH;
+	mask |= CRYPTO_ALG_TYPE_HASH_MASK;
+
+	return __crypto_hash_cast(crypto_alloc_base(alg_name, type, mask));
 }
 
-static inline int crypto_digest_setkey(struct crypto_tfm *tfm,
-                                       const u8 *key, unsigned int keylen)
+static inline struct crypto_tfm *crypto_hash_tfm(struct crypto_hash *tfm)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	return tfm->crt_digest.dit_setkey(tfm, key, keylen);
+	return &tfm->base;
+}
+
+static inline void crypto_free_hash(struct crypto_hash *tfm)
+{
+	crypto_free_tfm(crypto_hash_tfm(tfm));
+}
+
+static inline struct hash_tfm *crypto_hash_crt(struct crypto_hash *tfm)
+{
+	return &crypto_hash_tfm(tfm)->crt_hash;
+}
+
+static inline unsigned int crypto_hash_blocksize(struct crypto_hash *tfm)
+{
+	return crypto_tfm_alg_blocksize(crypto_hash_tfm(tfm));
+}
+
+static inline unsigned int crypto_hash_alignmask(struct crypto_hash *tfm)
+{
+	return crypto_tfm_alg_alignmask(crypto_hash_tfm(tfm));
+}
+
+static inline unsigned int crypto_hash_digestsize(struct crypto_hash *tfm)
+{
+	return crypto_hash_crt(tfm)->digestsize;
+}
+
+static inline u32 crypto_hash_get_flags(struct crypto_hash *tfm)
+{
+	return crypto_tfm_get_flags(crypto_hash_tfm(tfm));
+}
+
+static inline void crypto_hash_set_flags(struct crypto_hash *tfm, u32 flags)
+{
+	crypto_tfm_set_flags(crypto_hash_tfm(tfm), flags);
+}
+
+static inline void crypto_hash_clear_flags(struct crypto_hash *tfm, u32 flags)
+{
+	crypto_tfm_clear_flags(crypto_hash_tfm(tfm), flags);
+}
+
+static inline int crypto_hash_init(struct hash_desc *desc)
+{
+	return crypto_hash_crt(desc->tfm)->init(desc);
+}
+
+static inline int crypto_hash_update(struct hash_desc *desc,
+				     struct scatterlist *sg,
+				     unsigned int nbytes)
+{
+	return crypto_hash_crt(desc->tfm)->update(desc, sg, nbytes);
+}
+
+static inline int crypto_hash_final(struct hash_desc *desc, u8 *out)
+{
+	return crypto_hash_crt(desc->tfm)->final(desc, out);
+}
+
+static inline int crypto_hash_digest(struct hash_desc *desc,
+				     struct scatterlist *sg,
+				     unsigned int nbytes, u8 *out)
+{
+	return crypto_hash_crt(desc->tfm)->digest(desc, sg, nbytes, out);
+}
+
+static inline int crypto_hash_setkey(struct crypto_hash *hash,
+				     const u8 *key, unsigned int keylen)
+{
+	return crypto_hash_crt(hash)->setkey(hash, key, keylen);
 }
 
 static int crypto_cipher_encrypt(struct crypto_tfm *tfm,
-- 
GitLab


From 0796ae061e6da5de7cfc1af57dfd42a73908b1bf Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 21 Aug 2006 20:50:52 +1000
Subject: [PATCH 0326/1063] [CRYPTO] hmac: Add crypto template implementation

This patch rewrites HMAC as a crypto template.  This means that HMAC is no
longer a hard-coded part of the API.  It's now a template that generates
standard digest algorithms like any other.

The old HMAC is preserved until all current users are converted.

The same structure can be used by other MACs such as AES-XCBC-MAC.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/Kconfig |   1 +
 crypto/hmac.c  | 241 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 236 insertions(+), 6 deletions(-)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 69c5f992bcd4b..f07d9237950f5 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -34,6 +34,7 @@ config CRYPTO_MANAGER
 
 config CRYPTO_HMAC
 	bool "HMAC support"
+	select CRYPTO_HASH
 	help
 	  HMAC: Keyed-Hashing for Message Authentication (RFC2104).
 	  This is required for IPSec.
diff --git a/crypto/hmac.c b/crypto/hmac.c
index ecf7b0a95b564..eac77e2947407 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -4,22 +4,30 @@
  * HMAC: Keyed-Hashing for Message Authentication (RFC2104).
  *
  * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
  *
  * The HMAC implementation is derived from USAGI.
  * Copyright (c) 2002 Kazunori Miyazawa <miyazawa@linux-ipv6.org> / USAGI
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option) 
+ * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  *
  */
-#include <linux/crypto.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
+
+#include <crypto/algapi.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/scatterlist.h>
-#include "internal.h"
+#include <linux/slab.h>
+#include <linux/string.h>
+
+struct hmac_ctx {
+	struct crypto_hash *child;
+};
 
 static void hash_key(struct crypto_tfm *tfm, u8 *key, unsigned int keylen)
 {
@@ -122,3 +130,224 @@ EXPORT_SYMBOL_GPL(crypto_hmac_update);
 EXPORT_SYMBOL_GPL(crypto_hmac_final);
 EXPORT_SYMBOL_GPL(crypto_hmac);
 
+static inline void *align_ptr(void *p, unsigned int align)
+{
+	return (void *)ALIGN((unsigned long)p, align);
+}
+
+static inline struct hmac_ctx *hmac_ctx(struct crypto_hash *tfm)
+{
+	return align_ptr(crypto_hash_ctx_aligned(tfm) +
+			 crypto_hash_blocksize(tfm) * 2 +
+			 crypto_hash_digestsize(tfm), sizeof(void *));
+}
+
+static int hmac_setkey(struct crypto_hash *parent,
+		       const u8 *inkey, unsigned int keylen)
+{
+	int bs = crypto_hash_blocksize(parent);
+	int ds = crypto_hash_digestsize(parent);
+	char *ipad = crypto_hash_ctx_aligned(parent);
+	char *opad = ipad + bs;
+	char *digest = opad + bs;
+	struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *));
+	struct crypto_hash *tfm = ctx->child;
+	unsigned int i;
+
+	if (keylen > bs) {
+		struct hash_desc desc;
+		struct scatterlist tmp;
+		int err;
+
+		desc.tfm = tfm;
+		desc.flags = crypto_hash_get_flags(parent);
+		desc.flags &= CRYPTO_TFM_REQ_MAY_SLEEP;
+		sg_set_buf(&tmp, inkey, keylen);
+
+		err = crypto_hash_digest(&desc, &tmp, keylen, digest);
+		if (err)
+			return err;
+
+		inkey = digest;
+		keylen = ds;
+	}
+
+	memcpy(ipad, inkey, keylen);
+	memset(ipad + keylen, 0, bs - keylen);
+	memcpy(opad, ipad, bs);
+
+	for (i = 0; i < bs; i++) {
+		ipad[i] ^= 0x36;
+		opad[i] ^= 0x5c;
+	}
+
+	return 0;
+}
+
+static int hmac_init(struct hash_desc *pdesc)
+{
+	struct crypto_hash *parent = pdesc->tfm;
+	int bs = crypto_hash_blocksize(parent);
+	int ds = crypto_hash_digestsize(parent);
+	char *ipad = crypto_hash_ctx_aligned(parent);
+	struct hmac_ctx *ctx = align_ptr(ipad + bs * 2 + ds, sizeof(void *));
+	struct hash_desc desc;
+	struct scatterlist tmp;
+
+	desc.tfm = ctx->child;
+	desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+	sg_set_buf(&tmp, ipad, bs);
+
+	return unlikely(crypto_hash_init(&desc)) ?:
+	       crypto_hash_update(&desc, &tmp, 1);
+}
+
+static int hmac_update(struct hash_desc *pdesc,
+		       struct scatterlist *sg, unsigned int nbytes)
+{
+	struct hmac_ctx *ctx = hmac_ctx(pdesc->tfm);
+	struct hash_desc desc;
+
+	desc.tfm = ctx->child;
+	desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	return crypto_hash_update(&desc, sg, nbytes);
+}
+
+static int hmac_final(struct hash_desc *pdesc, u8 *out)
+{
+	struct crypto_hash *parent = pdesc->tfm;
+	int bs = crypto_hash_blocksize(parent);
+	int ds = crypto_hash_digestsize(parent);
+	char *opad = crypto_hash_ctx_aligned(parent) + bs;
+	char *digest = opad + bs;
+	struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *));
+	struct hash_desc desc;
+	struct scatterlist tmp;
+
+	desc.tfm = ctx->child;
+	desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+	sg_set_buf(&tmp, opad, bs + ds);
+
+	return unlikely(crypto_hash_final(&desc, digest)) ?:
+	       crypto_hash_digest(&desc, &tmp, bs + ds, out);
+}
+
+static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
+		       unsigned int nbytes, u8 *out)
+{
+	struct crypto_hash *parent = pdesc->tfm;
+	int bs = crypto_hash_blocksize(parent);
+	int ds = crypto_hash_digestsize(parent);
+	char *ipad = crypto_hash_ctx_aligned(parent);
+	char *opad = ipad + bs;
+	char *digest = opad + bs;
+	struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *));
+	struct hash_desc desc;
+	struct scatterlist sg1[2];
+	struct scatterlist sg2[1];
+
+	desc.tfm = ctx->child;
+	desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	sg_set_buf(sg1, ipad, bs);
+	sg1[1].page = (void *)sg;
+	sg1[1].length = 0;
+	sg_set_buf(sg2, opad, bs + ds);
+
+	return unlikely(crypto_hash_digest(&desc, sg1, nbytes + bs, digest)) ?:
+	       crypto_hash_digest(&desc, sg2, bs + ds, out);
+}
+
+static int hmac_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct hmac_ctx *ctx = hmac_ctx(__crypto_hash_cast(tfm));
+
+	tfm = crypto_spawn_tfm(spawn);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	ctx->child = crypto_hash_cast(tfm);
+	return 0;
+}
+
+static void hmac_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct hmac_ctx *ctx = hmac_ctx(__crypto_hash_cast(tfm));
+	crypto_free_hash(ctx->child);
+}
+
+static void hmac_free(struct crypto_instance *inst)
+{
+	crypto_drop_spawn(crypto_instance_ctx(inst));
+	kfree(inst);
+}
+
+static struct crypto_instance *hmac_alloc(void *param, unsigned int len)
+{
+	struct crypto_instance *inst;
+	struct crypto_alg *alg;
+
+	alg = crypto_get_attr_alg(param, len, CRYPTO_ALG_TYPE_HASH,
+				  CRYPTO_ALG_TYPE_HASH_MASK | CRYPTO_ALG_ASYNC);
+	if (IS_ERR(alg))
+		return ERR_PTR(PTR_ERR(alg));
+
+	inst = crypto_alloc_instance("hmac", alg);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_HASH;
+	inst->alg.cra_priority = alg->cra_priority;
+	inst->alg.cra_blocksize = alg->cra_blocksize;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+	inst->alg.cra_type = &crypto_hash_type;
+
+	inst->alg.cra_hash.digestsize =
+		(alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+		CRYPTO_ALG_TYPE_HASH ? alg->cra_hash.digestsize :
+				       alg->cra_digest.dia_digestsize;
+
+	inst->alg.cra_ctxsize = sizeof(struct hmac_ctx) +
+				ALIGN(inst->alg.cra_blocksize * 2 +
+				      inst->alg.cra_hash.digestsize,
+				      sizeof(void *));
+
+	inst->alg.cra_init = hmac_init_tfm;
+	inst->alg.cra_exit = hmac_exit_tfm;
+
+	inst->alg.cra_hash.init = hmac_init;
+	inst->alg.cra_hash.update = hmac_update;
+	inst->alg.cra_hash.final = hmac_final;
+	inst->alg.cra_hash.digest = hmac_digest;
+	inst->alg.cra_hash.setkey = hmac_setkey;
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return inst;
+}
+
+static struct crypto_template hmac_tmpl = {
+	.name = "hmac",
+	.alloc = hmac_alloc,
+	.free = hmac_free,
+	.module = THIS_MODULE,
+};
+
+static int __init hmac_module_init(void)
+{
+	return crypto_register_template(&hmac_tmpl);
+}
+
+static void __exit hmac_module_exit(void)
+{
+	crypto_unregister_template(&hmac_tmpl);
+}
+
+module_init(hmac_module_init);
+module_exit(hmac_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("HMAC hash algorithm");
-- 
GitLab


From e9d41164e2fdd897fe4520c2079ea0000f6e0ec3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 19 Aug 2006 21:38:49 +1000
Subject: [PATCH 0327/1063] [CRYPTO] tcrypt: Use HMAC template and hash
 interface

This patch converts tcrypt to use the new HMAC template rather than the
hard-coded version of HMAC.  It also converts all digest users to use
the new cipher interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/tcrypt.c | 355 ++++++++++++++++++++++++++++--------------------
 crypto/tcrypt.h |  23 +---
 2 files changed, 213 insertions(+), 165 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 5e2278069d226..840ab8be0b967 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -88,9 +88,11 @@ static void test_hash(char *algo, struct hash_testvec *template,
 	unsigned int i, j, k, temp;
 	struct scatterlist sg[8];
 	char result[64];
-	struct crypto_tfm *tfm;
+	struct crypto_hash *tfm;
+	struct hash_desc desc;
 	struct hash_testvec *hash_tv;
 	unsigned int tsize;
+	int ret;
 
 	printk("\ntesting %s\n", algo);
 
@@ -104,27 +106,42 @@ static void test_hash(char *algo, struct hash_testvec *template,
 
 	memcpy(tvmem, template, tsize);
 	hash_tv = (void *)tvmem;
-	tfm = crypto_alloc_tfm(algo, 0);
-	if (tfm == NULL) {
-		printk("failed to load transform for %s\n", algo);
+
+	tfm = crypto_alloc_hash(algo, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
+		printk("failed to load transform for %s: %ld\n", algo,
+		       PTR_ERR(tfm));
 		return;
 	}
 
+	desc.tfm = tfm;
+	desc.flags = 0;
+
 	for (i = 0; i < tcount; i++) {
 		printk("test %u:\n", i + 1);
 		memset(result, 0, 64);
 
 		sg_set_buf(&sg[0], hash_tv[i].plaintext, hash_tv[i].psize);
 
-		crypto_digest_init(tfm);
-		crypto_digest_setkey(tfm, hash_tv[i].key, hash_tv[i].ksize);
-		crypto_digest_update(tfm, sg, 1);
-		crypto_digest_final(tfm, result);
+		if (hash_tv[i].ksize) {
+			ret = crypto_hash_setkey(tfm, hash_tv[i].key,
+						 hash_tv[i].ksize);
+			if (ret) {
+				printk("setkey() failed ret=%d\n", ret);
+				goto out;
+			}
+		}
+
+		ret = crypto_hash_digest(&desc, sg, hash_tv[i].psize, result);
+		if (ret) {
+			printk("digest () failed ret=%d\n", ret);
+			goto out;
+		}
 
-		hexdump(result, crypto_tfm_alg_digestsize(tfm));
+		hexdump(result, crypto_hash_digestsize(tfm));
 		printk("%s\n",
 		       memcmp(result, hash_tv[i].digest,
-			      crypto_tfm_alg_digestsize(tfm)) ?
+			      crypto_hash_digestsize(tfm)) ?
 		       "fail" : "pass");
 	}
 
@@ -150,105 +167,35 @@ static void test_hash(char *algo, struct hash_testvec *template,
 					    hash_tv[i].tap[k]);
 			}
 
-			crypto_digest_digest(tfm, sg, hash_tv[i].np, result);
-
-			hexdump(result, crypto_tfm_alg_digestsize(tfm));
-			printk("%s\n",
-			       memcmp(result, hash_tv[i].digest,
-				      crypto_tfm_alg_digestsize(tfm)) ?
-			       "fail" : "pass");
-		}
-	}
-
-	crypto_free_tfm(tfm);
-}
-
-
-#ifdef CONFIG_CRYPTO_HMAC
-
-static void test_hmac(char *algo, struct hmac_testvec *template,
-		      unsigned int tcount)
-{
-	unsigned int i, j, k, temp;
-	struct scatterlist sg[8];
-	char result[64];
-	struct crypto_tfm *tfm;
-	struct hmac_testvec *hmac_tv;
-	unsigned int tsize, klen;
-
-	tfm = crypto_alloc_tfm(algo, 0);
-	if (tfm == NULL) {
-		printk("failed to load transform for %s\n", algo);
-		return;
-	}
-
-	printk("\ntesting hmac_%s\n", algo);
+			if (hash_tv[i].ksize) {
+				ret = crypto_hash_setkey(tfm, hash_tv[i].key,
+							 hash_tv[i].ksize);
 
-	tsize = sizeof(struct hmac_testvec);
-	tsize *= tcount;
-	if (tsize > TVMEMSIZE) {
-		printk("template (%u) too big for tvmem (%u)\n", tsize,
-		       TVMEMSIZE);
-		goto out;
-	}
-
-	memcpy(tvmem, template, tsize);
-	hmac_tv = (void *)tvmem;
-
-	for (i = 0; i < tcount; i++) {
-		printk("test %u:\n", i + 1);
-		memset(result, 0, sizeof (result));
-
-		klen = hmac_tv[i].ksize;
-		sg_set_buf(&sg[0], hmac_tv[i].plaintext, hmac_tv[i].psize);
-
-		crypto_hmac(tfm, hmac_tv[i].key, &klen, sg, 1, result);
-
-		hexdump(result, crypto_tfm_alg_digestsize(tfm));
-		printk("%s\n",
-		       memcmp(result, hmac_tv[i].digest,
-			      crypto_tfm_alg_digestsize(tfm)) ? "fail" :
-		       "pass");
-	}
-
-	printk("\ntesting hmac_%s across pages\n", algo);
-
-	memset(xbuf, 0, XBUFSIZE);
-
-	j = 0;
-	for (i = 0; i < tcount; i++) {
-		if (hmac_tv[i].np) {
-			j++;
-			printk("test %u:\n",j);
-			memset(result, 0, 64);
-
-			temp = 0;
-			klen = hmac_tv[i].ksize;
-			for (k = 0; k < hmac_tv[i].np; k++) {
-				memcpy(&xbuf[IDX[k]],
-				       hmac_tv[i].plaintext + temp,
-				       hmac_tv[i].tap[k]);
-				temp += hmac_tv[i].tap[k];
-				sg_set_buf(&sg[k], &xbuf[IDX[k]],
-					    hmac_tv[i].tap[k]);
+				if (ret) {
+					printk("setkey() failed ret=%d\n", ret);
+					goto out;
+				}
 			}
 
-			crypto_hmac(tfm, hmac_tv[i].key, &klen, sg,
-				    hmac_tv[i].np, result);
-			hexdump(result, crypto_tfm_alg_digestsize(tfm));
+			ret = crypto_hash_digest(&desc, sg, hash_tv[i].psize,
+						 result);
+			if (ret) {
+				printk("digest () failed ret=%d\n", ret);
+				goto out;
+			}
 
+			hexdump(result, crypto_hash_digestsize(tfm));
 			printk("%s\n",
-			       memcmp(result, hmac_tv[i].digest,
-				      crypto_tfm_alg_digestsize(tfm)) ?
+			       memcmp(result, hash_tv[i].digest,
+				      crypto_hash_digestsize(tfm)) ?
 			       "fail" : "pass");
 		}
 	}
+
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_hash(tfm);
 }
 
-#endif	/* CONFIG_CRYPTO_HMAC */
-
 static void test_cipher(char *algo, int enc,
 			struct cipher_testvec *template, unsigned int tcount)
 {
@@ -570,97 +517,202 @@ static void test_cipher_speed(char *algo, int enc, unsigned int sec,
 	crypto_free_blkcipher(tfm);
 }
 
-static void test_digest_jiffies(struct crypto_tfm *tfm, char *p, int blen,
-				int plen, char *out, int sec)
+static int test_hash_jiffies_digest(struct hash_desc *desc, char *p, int blen,
+				    char *out, int sec)
+{
+	struct scatterlist sg[1];
+	unsigned long start, end;
+	int bcount;
+	int ret;
+
+	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	     time_before(jiffies, end); bcount++) {
+		sg_set_buf(sg, p, blen);
+		ret = crypto_hash_digest(desc, sg, blen, out);
+		if (ret)
+			return ret;
+	}
+
+	printk("%6u opers/sec, %9lu bytes/sec\n",
+	       bcount / sec, ((long)bcount * blen) / sec);
+
+	return 0;
+}
+
+static int test_hash_jiffies(struct hash_desc *desc, char *p, int blen,
+			     int plen, char *out, int sec)
 {
 	struct scatterlist sg[1];
 	unsigned long start, end;
 	int bcount, pcount;
+	int ret;
+
+	if (plen == blen)
+		return test_hash_jiffies_digest(desc, p, blen, out, sec);
 
 	for (start = jiffies, end = start + sec * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
-		crypto_digest_init(tfm);
+		ret = crypto_hash_init(desc);
+		if (ret)
+			return ret;
 		for (pcount = 0; pcount < blen; pcount += plen) {
 			sg_set_buf(sg, p + pcount, plen);
-			crypto_digest_update(tfm, sg, 1);
+			ret = crypto_hash_update(desc, sg, plen);
+			if (ret)
+				return ret;
 		}
 		/* we assume there is enough space in 'out' for the result */
-		crypto_digest_final(tfm, out);
+		ret = crypto_hash_final(desc, out);
+		if (ret)
+			return ret;
 	}
 
 	printk("%6u opers/sec, %9lu bytes/sec\n",
 	       bcount / sec, ((long)bcount * blen) / sec);
 
-	return;
+	return 0;
+}
+
+static int test_hash_cycles_digest(struct hash_desc *desc, char *p, int blen,
+				   char *out)
+{
+	struct scatterlist sg[1];
+	unsigned long cycles = 0;
+	int i;
+	int ret;
+
+	local_bh_disable();
+	local_irq_disable();
+
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		sg_set_buf(sg, p, blen);
+		ret = crypto_hash_digest(desc, sg, blen, out);
+		if (ret)
+			goto out;
+	}
+
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		start = get_cycles();
+
+		sg_set_buf(sg, p, blen);
+		ret = crypto_hash_digest(desc, sg, blen, out);
+		if (ret)
+			goto out;
+
+		end = get_cycles();
+
+		cycles += end - start;
+	}
+
+out:
+	local_irq_enable();
+	local_bh_enable();
+
+	if (ret)
+		return ret;
+
+	printk("%6lu cycles/operation, %4lu cycles/byte\n",
+	       cycles / 8, cycles / (8 * blen));
+
+	return 0;
 }
 
-static void test_digest_cycles(struct crypto_tfm *tfm, char *p, int blen,
-			       int plen, char *out)
+static int test_hash_cycles(struct hash_desc *desc, char *p, int blen,
+			    int plen, char *out)
 {
 	struct scatterlist sg[1];
 	unsigned long cycles = 0;
 	int i, pcount;
+	int ret;
+
+	if (plen == blen)
+		return test_hash_cycles_digest(desc, p, blen, out);
 
 	local_bh_disable();
 	local_irq_disable();
 
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
-		crypto_digest_init(tfm);
+		ret = crypto_hash_init(desc);
+		if (ret)
+			goto out;
 		for (pcount = 0; pcount < blen; pcount += plen) {
 			sg_set_buf(sg, p + pcount, plen);
-			crypto_digest_update(tfm, sg, 1);
+			ret = crypto_hash_update(desc, sg, plen);
+			if (ret)
+				goto out;
 		}
-		crypto_digest_final(tfm, out);
+		crypto_hash_final(desc, out);
+		if (ret)
+			goto out;
 	}
 
 	/* The real thing. */
 	for (i = 0; i < 8; i++) {
 		cycles_t start, end;
 
-		crypto_digest_init(tfm);
-
 		start = get_cycles();
 
+		ret = crypto_hash_init(desc);
+		if (ret)
+			goto out;
 		for (pcount = 0; pcount < blen; pcount += plen) {
 			sg_set_buf(sg, p + pcount, plen);
-			crypto_digest_update(tfm, sg, 1);
+			ret = crypto_hash_update(desc, sg, plen);
+			if (ret)
+				goto out;
 		}
-		crypto_digest_final(tfm, out);
+		ret = crypto_hash_final(desc, out);
+		if (ret)
+			goto out;
 
 		end = get_cycles();
 
 		cycles += end - start;
 	}
 
+out:
 	local_irq_enable();
 	local_bh_enable();
 
+	if (ret)
+		return ret;
+
 	printk("%6lu cycles/operation, %4lu cycles/byte\n",
 	       cycles / 8, cycles / (8 * blen));
 
-	return;
+	return 0;
 }
 
-static void test_digest_speed(char *algo, unsigned int sec,
-			      struct digest_speed *speed)
+static void test_hash_speed(char *algo, unsigned int sec,
+			      struct hash_speed *speed)
 {
-	struct crypto_tfm *tfm;
+	struct crypto_hash *tfm;
+	struct hash_desc desc;
 	char output[1024];
 	int i;
+	int ret;
 
 	printk("\ntesting speed of %s\n", algo);
 
-	tfm = crypto_alloc_tfm(algo, 0);
+	tfm = crypto_alloc_hash(algo, 0, CRYPTO_ALG_ASYNC);
 
-	if (tfm == NULL) {
-		printk("failed to load transform for %s\n", algo);
+	if (IS_ERR(tfm)) {
+		printk("failed to load transform for %s: %ld\n", algo,
+		       PTR_ERR(tfm));
 		return;
 	}
 
-	if (crypto_tfm_alg_digestsize(tfm) > sizeof(output)) {
+	desc.tfm = tfm;
+	desc.flags = 0;
+
+	if (crypto_hash_digestsize(tfm) > sizeof(output)) {
 		printk("digestsize(%u) > outputbuffer(%zu)\n",
-		       crypto_tfm_alg_digestsize(tfm), sizeof(output));
+		       crypto_hash_digestsize(tfm), sizeof(output));
 		goto out;
 	}
 
@@ -677,13 +729,20 @@ static void test_digest_speed(char *algo, unsigned int sec,
 		memset(tvmem, 0xff, speed[i].blen);
 
 		if (sec)
-			test_digest_jiffies(tfm, tvmem, speed[i].blen, speed[i].plen, output, sec);
+			ret = test_hash_jiffies(&desc, tvmem, speed[i].blen,
+						speed[i].plen, output, sec);
 		else
-			test_digest_cycles(tfm, tvmem, speed[i].blen, speed[i].plen, output);
+			ret = test_hash_cycles(&desc, tvmem, speed[i].blen,
+					       speed[i].plen, output);
+
+		if (ret) {
+			printk("hashing failed ret=%d\n", ret);
+			break;
+		}
 	}
 
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_hash(tfm);
 }
 
 static void test_deflate(void)
@@ -911,11 +970,12 @@ static void do_test(void)
 		test_hash("tgr128", tgr128_tv_template, TGR128_TEST_VECTORS);
 		test_deflate();
 		test_hash("crc32c", crc32c_tv_template, CRC32C_TEST_VECTORS);
-#ifdef CONFIG_CRYPTO_HMAC
-		test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS);
-		test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS);
-		test_hmac("sha256", hmac_sha256_tv_template, HMAC_SHA256_TEST_VECTORS);
-#endif
+		test_hash("hmac(md5)", hmac_md5_tv_template,
+			  HMAC_MD5_TEST_VECTORS);
+		test_hash("hmac(sha1)", hmac_sha1_tv_template,
+			  HMAC_SHA1_TEST_VECTORS);
+		test_hash("hmac(sha256)", hmac_sha256_tv_template,
+			  HMAC_SHA256_TEST_VECTORS);
 
 		test_hash("michael_mic", michael_mic_tv_template, MICHAEL_MIC_TEST_VECTORS);
 		break;
@@ -1106,20 +1166,21 @@ static void do_test(void)
 			    XETA_DEC_TEST_VECTORS);
 		break;
 
-#ifdef CONFIG_CRYPTO_HMAC
 	case 100:
-		test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS);
+		test_hash("hmac(md5)", hmac_md5_tv_template,
+			  HMAC_MD5_TEST_VECTORS);
 		break;
 
 	case 101:
-		test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS);
+		test_hash("hmac(sha1)", hmac_sha1_tv_template,
+			  HMAC_SHA1_TEST_VECTORS);
 		break;
 
 	case 102:
-		test_hmac("sha256", hmac_sha256_tv_template, HMAC_SHA256_TEST_VECTORS);
+		test_hash("hmac(sha256)", hmac_sha256_tv_template,
+			  HMAC_SHA256_TEST_VECTORS);
 		break;
 
-#endif
 
 	case 200:
 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
@@ -1188,51 +1249,51 @@ static void do_test(void)
 		/* fall through */
 
 	case 301:
-		test_digest_speed("md4", sec, generic_digest_speed_template);
+		test_hash_speed("md4", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 302:
-		test_digest_speed("md5", sec, generic_digest_speed_template);
+		test_hash_speed("md5", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 303:
-		test_digest_speed("sha1", sec, generic_digest_speed_template);
+		test_hash_speed("sha1", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 304:
-		test_digest_speed("sha256", sec, generic_digest_speed_template);
+		test_hash_speed("sha256", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 305:
-		test_digest_speed("sha384", sec, generic_digest_speed_template);
+		test_hash_speed("sha384", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 306:
-		test_digest_speed("sha512", sec, generic_digest_speed_template);
+		test_hash_speed("sha512", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 307:
-		test_digest_speed("wp256", sec, generic_digest_speed_template);
+		test_hash_speed("wp256", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 308:
-		test_digest_speed("wp384", sec, generic_digest_speed_template);
+		test_hash_speed("wp384", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 309:
-		test_digest_speed("wp512", sec, generic_digest_speed_template);
+		test_hash_speed("wp512", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 310:
-		test_digest_speed("tgr128", sec, generic_digest_speed_template);
+		test_hash_speed("tgr128", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 311:
-		test_digest_speed("tgr160", sec, generic_digest_speed_template);
+		test_hash_speed("tgr160", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 312:
-		test_digest_speed("tgr192", sec, generic_digest_speed_template);
+		test_hash_speed("tgr192", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 399:
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index 408d5aad58644..a40c4411729ee 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -36,16 +36,6 @@ struct hash_testvec {
 	unsigned char ksize;
 };
 
-struct hmac_testvec {
-	char key[128];
-	char plaintext[128];
-	char digest[MAX_DIGEST_SIZE];
-	unsigned char tap[MAX_TAP];
-	unsigned char ksize;
-	unsigned char psize;
-	unsigned char np;
-};
-
 struct cipher_testvec {
 	char key[MAX_KEYLEN] __attribute__ ((__aligned__(4)));
 	char iv[MAX_IVLEN];
@@ -65,7 +55,7 @@ struct cipher_speed {
 	unsigned int blen;
 };
 
-struct digest_speed {
+struct hash_speed {
 	unsigned int blen;	/* buffer length */
 	unsigned int plen;	/* per-update length */
 };
@@ -697,14 +687,13 @@ static struct hash_testvec tgr128_tv_template[] = {
 	},
 };
 
-#ifdef CONFIG_CRYPTO_HMAC
 /*
  * HMAC-MD5 test vectors from RFC2202
  * (These need to be fixed to not use strlen).
  */
 #define HMAC_MD5_TEST_VECTORS	7
 
-static struct hmac_testvec hmac_md5_tv_template[] =
+static struct hash_testvec hmac_md5_tv_template[] =
 {
 	{
 		.key	= { [0 ... 15] =  0x0b },
@@ -768,7 +757,7 @@ static struct hmac_testvec hmac_md5_tv_template[] =
  */
 #define HMAC_SHA1_TEST_VECTORS	7
 
-static struct hmac_testvec hmac_sha1_tv_template[] = {
+static struct hash_testvec hmac_sha1_tv_template[] = {
 	{
 		.key	= { [0 ... 19] = 0x0b },
 		.ksize	= 20,
@@ -833,7 +822,7 @@ static struct hmac_testvec hmac_sha1_tv_template[] = {
  */
 #define HMAC_SHA256_TEST_VECTORS	10
 
-static struct hmac_testvec hmac_sha256_tv_template[] = {
+static struct hash_testvec hmac_sha256_tv_template[] = {
 	{
 		.key	= { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
 			    0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
@@ -944,8 +933,6 @@ static struct hmac_testvec hmac_sha256_tv_template[] = {
 	},
 };
 
-#endif	/* CONFIG_CRYPTO_HMAC */
-
 /*
  * DES test vectors.
  */
@@ -3160,7 +3147,7 @@ static struct cipher_speed des_speed_template[] = {
 /*
  * Digest speed tests
  */
-static struct digest_speed generic_digest_speed_template[] = {
+static struct hash_speed generic_hash_speed_template[] = {
 	{ .blen = 16, 	.plen = 16, },
 	{ .blen = 64,	.plen = 16, },
 	{ .blen = 64,	.plen = 64, },
-- 
GitLab


From 07d4ee583e21830ec5604d31f65cdc60a6eca19e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 20 Aug 2006 14:24:50 +1000
Subject: [PATCH 0328/1063] [IPSEC]: Use HMAC template and hash interface

This patch converts IPsec to use the new HMAC template.  The names of
existing simple digest algorithms may still be used to refer to their
HMAC composites.

The same structure can be used by other MACs such as AES-XCBC-MAC.

This patch also switches from the digest interface to hash.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ah.h     | 29 ++++++++++++++++++-----------
 include/net/esp.h    | 28 ++++++++++++++++------------
 include/net/xfrm.h   |  9 +++++----
 net/ipv4/ah4.c       | 36 ++++++++++++++++++++++++------------
 net/ipv4/esp4.c      | 36 +++++++++++++++++++++---------------
 net/ipv6/ah6.c       | 35 +++++++++++++++++++++++------------
 net/ipv6/esp6.c      | 42 ++++++++++++++++++++++++------------------
 net/xfrm/xfrm_algo.c | 40 +++++++++++++++++++++++++++-------------
 8 files changed, 158 insertions(+), 97 deletions(-)

diff --git a/include/net/ah.h b/include/net/ah.h
index 8e27c9ba8b84d..8f257c1599028 100644
--- a/include/net/ah.h
+++ b/include/net/ah.h
@@ -15,22 +15,29 @@ struct ah_data
 	int			icv_full_len;
 	int			icv_trunc_len;
 
-	void			(*icv)(struct ah_data*,
-	                               struct sk_buff *skb, u8 *icv);
-
-	struct crypto_tfm	*tfm;
+	struct crypto_hash	*tfm;
 };
 
-static inline void
-ah_hmac_digest(struct ah_data *ahp, struct sk_buff *skb, u8 *auth_data)
+static inline int ah_mac_digest(struct ah_data *ahp, struct sk_buff *skb,
+				u8 *auth_data)
 {
-	struct crypto_tfm *tfm = ahp->tfm;
+	struct hash_desc desc;
+	int err;
+
+	desc.tfm = ahp->tfm;
+	desc.flags = 0;
 
 	memset(auth_data, 0, ahp->icv_trunc_len);
-	crypto_hmac_init(tfm, ahp->key, &ahp->key_len);
-	skb_icv_walk(skb, tfm, 0, skb->len, crypto_hmac_update);
-	crypto_hmac_final(tfm, ahp->key, &ahp->key_len, ahp->work_icv);
-	memcpy(auth_data, ahp->work_icv, ahp->icv_trunc_len);
+	err = crypto_hash_init(&desc);
+	if (unlikely(err))
+		goto out;
+	err = skb_icv_walk(skb, &desc, 0, skb->len, crypto_hash_update);
+	if (unlikely(err))
+		goto out;
+	err = crypto_hash_final(&desc, ahp->work_icv);
+
+out:
+	return err;
 }
 
 #endif
diff --git a/include/net/esp.h b/include/net/esp.h
index af2ff18700c70..064366d66eead 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -35,7 +35,7 @@ struct esp_data
 		void			(*icv)(struct esp_data*,
 		                               struct sk_buff *skb,
 		                               int offset, int len, u8 *icv);
-		struct crypto_tfm	*tfm;
+		struct crypto_hash	*tfm;
 	} auth;
 };
 
@@ -43,18 +43,22 @@ extern int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset,
 extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
 extern void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
 
-static inline void
-esp_hmac_digest(struct esp_data *esp, struct sk_buff *skb, int offset,
-                int len, u8 *auth_data)
+static inline int esp_mac_digest(struct esp_data *esp, struct sk_buff *skb,
+				 int offset, int len)
 {
-	struct crypto_tfm *tfm = esp->auth.tfm;
-	char *icv = esp->auth.work_icv;
-
-	memset(auth_data, 0, esp->auth.icv_trunc_len);
-	crypto_hmac_init(tfm, esp->auth.key, &esp->auth.key_len);
-	skb_icv_walk(skb, tfm, offset, len, crypto_hmac_update);
-	crypto_hmac_final(tfm, esp->auth.key, &esp->auth.key_len, icv);
-	memcpy(auth_data, icv, esp->auth.icv_trunc_len);
+	struct hash_desc desc;
+	int err;
+
+	desc.tfm = esp->auth.tfm;
+	desc.flags = 0;
+
+	err = crypto_hash_init(&desc);
+	if (unlikely(err))
+		return err;
+	err = skb_icv_walk(skb, &desc, offset, len, crypto_hash_update);
+	if (unlikely(err))
+		return err;
+	return crypto_hash_final(&desc, esp->auth.work_icv);
 }
 
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e9114e41affce..3ecd9fa1ed4b9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -984,12 +984,13 @@ extern struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe);
 extern struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe);
 extern struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe);
 
-struct crypto_tfm;
+struct hash_desc;
 struct scatterlist;
-typedef void (icv_update_fn_t)(struct crypto_tfm *, struct scatterlist *, unsigned int);
+typedef int (icv_update_fn_t)(struct hash_desc *, struct scatterlist *,
+			      unsigned int);
 
-extern void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
-			 int offset, int len, icv_update_fn_t icv_update);
+extern int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *tfm,
+			int offset, int len, icv_update_fn_t icv_update);
 
 static inline int xfrm_addr_cmp(xfrm_address_t *a, xfrm_address_t *b,
 				int family)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 1366bc6ce6a5c..2b98943e6b025 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -1,3 +1,4 @@
+#include <linux/err.h>
 #include <linux/module.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -97,7 +98,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
 	ah->spi = x->id.spi;
 	ah->seq_no = htonl(++x->replay.oseq);
 	xfrm_aevent_doreplay(x);
-	ahp->icv(ahp, skb, ah->auth_data);
+	err = ah_mac_digest(ahp, skb, ah->auth_data);
+	if (err)
+		goto error;
+	memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len);
 
 	top_iph->tos = iph->tos;
 	top_iph->ttl = iph->ttl;
@@ -119,6 +123,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int ah_hlen;
 	int ihl;
+	int err = -EINVAL;
 	struct iphdr *iph;
 	struct ip_auth_hdr *ah;
 	struct ah_data *ahp;
@@ -166,8 +171,11 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 		
 		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
 		skb_push(skb, ihl);
-		ahp->icv(ahp, skb, ah->auth_data);
-		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
+		err = ah_mac_digest(ahp, skb, ah->auth_data);
+		if (err)
+			goto out;
+		err = -EINVAL;
+		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
 			x->stats.integrity_failed++;
 			goto out;
 		}
@@ -179,7 +187,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 
 out:
-	return -EINVAL;
+	return err;
 }
 
 static void ah4_err(struct sk_buff *skb, u32 info)
@@ -204,6 +212,7 @@ static int ah_init_state(struct xfrm_state *x)
 {
 	struct ah_data *ahp = NULL;
 	struct xfrm_algo_desc *aalg_desc;
+	struct crypto_hash *tfm;
 
 	if (!x->aalg)
 		goto error;
@@ -221,24 +230,27 @@ static int ah_init_state(struct xfrm_state *x)
 
 	ahp->key = x->aalg->alg_key;
 	ahp->key_len = (x->aalg->alg_key_len+7)/8;
-	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
-	if (!ahp->tfm)
+	tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		goto error;
+
+	ahp->tfm = tfm;
+	if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len))
 		goto error;
-	ahp->icv = ah_hmac_digest;
 	
 	/*
 	 * Lookup the algorithm description maintained by xfrm_algo,
 	 * verify crypto transform properties, and store information
 	 * we need for AH processing.  This lookup cannot fail here
-	 * after a successful crypto_alloc_tfm().
+	 * after a successful crypto_alloc_hash().
 	 */
 	aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
 	BUG_ON(!aalg_desc);
 
 	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
-	    crypto_tfm_alg_digestsize(ahp->tfm)) {
+	    crypto_hash_digestsize(tfm)) {
 		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
-		       x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
+		       x->aalg->alg_name, crypto_hash_digestsize(tfm),
 		       aalg_desc->uinfo.auth.icv_fullbits/8);
 		goto error;
 	}
@@ -262,7 +274,7 @@ static int ah_init_state(struct xfrm_state *x)
 error:
 	if (ahp) {
 		kfree(ahp->work_icv);
-		crypto_free_tfm(ahp->tfm);
+		crypto_free_hash(ahp->tfm);
 		kfree(ahp);
 	}
 	return -EINVAL;
@@ -277,7 +289,7 @@ static void ah_destroy(struct xfrm_state *x)
 
 	kfree(ahp->work_icv);
 	ahp->work_icv = NULL;
-	crypto_free_tfm(ahp->tfm);
+	crypto_free_hash(ahp->tfm);
 	ahp->tfm = NULL;
 	kfree(ahp);
 }
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 7c63ae4947429..b428489f6ccd9 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -121,9 +121,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	if (esp->auth.icv_full_len) {
-		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
-		              sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
-		pskb_put(skb, trailer, alen);
+		err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data,
+				     sizeof(*esph) + esp->conf.ivlen + clen);
+		memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen);
 	}
 
 	ip_send_check(top_iph);
@@ -163,15 +163,16 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* If integrity check is required, do this. */
 	if (esp->auth.icv_full_len) {
-		u8 sum[esp->auth.icv_full_len];
-		u8 sum1[alen];
-		
-		esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+		u8 sum[alen];
 
-		if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
+		err = esp_mac_digest(esp, skb, 0, skb->len - alen);
+		if (err)
+			goto out;
+
+		if (skb_copy_bits(skb, skb->len - alen, sum, alen))
 			BUG();
 
-		if (unlikely(memcmp(sum, sum1, alen))) {
+		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
 			x->stats.integrity_failed++;
 			goto out;
 		}
@@ -307,7 +308,7 @@ static void esp_destroy(struct xfrm_state *x)
 	esp->conf.tfm = NULL;
 	kfree(esp->conf.ivec);
 	esp->conf.ivec = NULL;
-	crypto_free_tfm(esp->auth.tfm);
+	crypto_free_hash(esp->auth.tfm);
 	esp->auth.tfm = NULL;
 	kfree(esp->auth.work_icv);
 	esp->auth.work_icv = NULL;
@@ -333,22 +334,27 @@ static int esp_init_state(struct xfrm_state *x)
 
 	if (x->aalg) {
 		struct xfrm_algo_desc *aalg_desc;
+		struct crypto_hash *hash;
 
 		esp->auth.key = x->aalg->alg_key;
 		esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
-		esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
-		if (esp->auth.tfm == NULL)
+		hash = crypto_alloc_hash(x->aalg->alg_name, 0,
+					 CRYPTO_ALG_ASYNC);
+		if (IS_ERR(hash))
+			goto error;
+
+		esp->auth.tfm = hash;
+		if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len))
 			goto error;
-		esp->auth.icv = esp_hmac_digest;
 
 		aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
 		BUG_ON(!aalg_desc);
 
 		if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
-		    crypto_tfm_alg_digestsize(esp->auth.tfm)) {
+		    crypto_hash_digestsize(hash)) {
 			NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
 				 x->aalg->alg_name,
-				 crypto_tfm_alg_digestsize(esp->auth.tfm),
+				 crypto_hash_digestsize(hash),
 				 aalg_desc->uinfo.auth.icv_fullbits/8);
 			goto error;
 		}
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 9d4831bd43355..00ffa7bc6c9f4 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -213,7 +213,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	ah->spi = x->id.spi;
 	ah->seq_no = htonl(++x->replay.oseq);
 	xfrm_aevent_doreplay(x);
-	ahp->icv(ahp, skb, ah->auth_data);
+	err = ah_mac_digest(ahp, skb, ah->auth_data);
+	if (err)
+		goto error_free_iph;
+	memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len);
 
 	err = 0;
 
@@ -251,6 +254,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	u16 hdr_len;
 	u16 ah_hlen;
 	int nexthdr;
+	int err = -EINVAL;
 
 	if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
 		goto out;
@@ -292,8 +296,11 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
 		memset(ah->auth_data, 0, ahp->icv_trunc_len);
 		skb_push(skb, hdr_len);
-		ahp->icv(ahp, skb, ah->auth_data);
-		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
+		err = ah_mac_digest(ahp, skb, ah->auth_data);
+		if (err)
+			goto free_out;
+		err = -EINVAL;
+		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
 			LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
 			x->stats.integrity_failed++;
 			goto free_out;
@@ -310,7 +317,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 free_out:
 	kfree(tmp_hdr);
 out:
-	return -EINVAL;
+	return err;
 }
 
 static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 
@@ -338,6 +345,7 @@ static int ah6_init_state(struct xfrm_state *x)
 {
 	struct ah_data *ahp = NULL;
 	struct xfrm_algo_desc *aalg_desc;
+	struct crypto_hash *tfm;
 
 	if (!x->aalg)
 		goto error;
@@ -355,24 +363,27 @@ static int ah6_init_state(struct xfrm_state *x)
 
 	ahp->key = x->aalg->alg_key;
 	ahp->key_len = (x->aalg->alg_key_len+7)/8;
-	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
-	if (!ahp->tfm)
+	tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		goto error;
+
+	ahp->tfm = tfm;
+	if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len))
 		goto error;
-	ahp->icv = ah_hmac_digest;
 	
 	/*
 	 * Lookup the algorithm description maintained by xfrm_algo,
 	 * verify crypto transform properties, and store information
 	 * we need for AH processing.  This lookup cannot fail here
-	 * after a successful crypto_alloc_tfm().
+	 * after a successful crypto_alloc_hash().
 	 */
 	aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
 	BUG_ON(!aalg_desc);
 
 	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
-	    crypto_tfm_alg_digestsize(ahp->tfm)) {
+	    crypto_hash_digestsize(tfm)) {
 		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
-		       x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
+		       x->aalg->alg_name, crypto_hash_digestsize(tfm),
 		       aalg_desc->uinfo.auth.icv_fullbits/8);
 		goto error;
 	}
@@ -396,7 +407,7 @@ static int ah6_init_state(struct xfrm_state *x)
 error:
 	if (ahp) {
 		kfree(ahp->work_icv);
-		crypto_free_tfm(ahp->tfm);
+		crypto_free_hash(ahp->tfm);
 		kfree(ahp);
 	}
 	return -EINVAL;
@@ -411,7 +422,7 @@ static void ah6_destroy(struct xfrm_state *x)
 
 	kfree(ahp->work_icv);
 	ahp->work_icv = NULL;
-	crypto_free_tfm(ahp->tfm);
+	crypto_free_hash(ahp->tfm);
 	ahp->tfm = NULL;
 	kfree(ahp);
 }
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 46a7e687948ec..2ebfd281e7218 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -125,9 +125,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	if (esp->auth.icv_full_len) {
-		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
-			sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
-		pskb_put(skb, trailer, alen);
+		err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data,
+				     sizeof(*esph) + esp->conf.ivlen + clen);
+		memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen);
 	}
 
 error:
@@ -162,15 +162,16 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* If integrity check is required, do this. */
         if (esp->auth.icv_full_len) {
-		u8 sum[esp->auth.icv_full_len];
-		u8 sum1[alen];
+		u8 sum[alen];
 
-		esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+		ret = esp_mac_digest(esp, skb, 0, skb->len - alen);
+		if (ret)
+			goto out;
 
-		if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
+		if (skb_copy_bits(skb, skb->len - alen, sum, alen))
 			BUG();
 
-		if (unlikely(memcmp(sum, sum1, alen))) {
+		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
 			x->stats.integrity_failed++;
 			ret = -EINVAL;
 			goto out;
@@ -279,7 +280,7 @@ static void esp6_destroy(struct xfrm_state *x)
 	esp->conf.tfm = NULL;
 	kfree(esp->conf.ivec);
 	esp->conf.ivec = NULL;
-	crypto_free_tfm(esp->auth.tfm);
+	crypto_free_hash(esp->auth.tfm);
 	esp->auth.tfm = NULL;
 	kfree(esp->auth.work_icv);
 	esp->auth.work_icv = NULL;
@@ -308,24 +309,29 @@ static int esp6_init_state(struct xfrm_state *x)
 
 	if (x->aalg) {
 		struct xfrm_algo_desc *aalg_desc;
+		struct crypto_hash *hash;
 
 		esp->auth.key = x->aalg->alg_key;
 		esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
-		esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
-		if (esp->auth.tfm == NULL)
+		hash = crypto_alloc_hash(x->aalg->alg_name, 0,
+					 CRYPTO_ALG_ASYNC);
+		if (IS_ERR(hash))
+			goto error;
+
+		esp->auth.tfm = hash;
+		if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len))
 			goto error;
-		esp->auth.icv = esp_hmac_digest;
  
 		aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
 		BUG_ON(!aalg_desc);
  
 		if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
-			crypto_tfm_alg_digestsize(esp->auth.tfm)) {
-				printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
-					x->aalg->alg_name,
-					crypto_tfm_alg_digestsize(esp->auth.tfm),
-					aalg_desc->uinfo.auth.icv_fullbits/8);
-				goto error;
+		    crypto_hash_digestsize(hash)) {
+			NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+				 x->aalg->alg_name,
+				 crypto_hash_digestsize(hash),
+				 aalg_desc->uinfo.auth.icv_fullbits/8);
+			goto error;
 		}
  
 		esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 9b03d8497fbaa..87918f281bb43 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -30,7 +30,8 @@
  */
 static struct xfrm_algo_desc aalg_list[] = {
 {
-	.name = "digest_null",
+	.name = "hmac(digest_null)",
+	.compat = "digest_null",
 	
 	.uinfo = {
 		.auth = {
@@ -47,7 +48,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 	}
 },
 {
-	.name = "md5",
+	.name = "hmac(md5)",
+	.compat = "md5",
 
 	.uinfo = {
 		.auth = {
@@ -64,7 +66,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 	}
 },
 {
-	.name = "sha1",
+	.name = "hmac(sha1)",
+	.compat = "sha1",
 
 	.uinfo = {
 		.auth = {
@@ -81,7 +84,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 	}
 },
 {
-	.name = "sha256",
+	.name = "hmac(sha256)",
+	.compat = "sha256",
 
 	.uinfo = {
 		.auth = {
@@ -98,7 +102,8 @@ static struct xfrm_algo_desc aalg_list[] = {
 	}
 },
 {
-	.name = "ripemd160",
+	.name = "hmac(ripemd160)",
+	.compat = "ripemd160",
 
 	.uinfo = {
 		.auth = {
@@ -480,11 +485,12 @@ EXPORT_SYMBOL_GPL(xfrm_count_enc_supported);
 
 /* Move to common area: it is shared with AH. */
 
-void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
-		  int offset, int len, icv_update_fn_t icv_update)
+int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
+		 int offset, int len, icv_update_fn_t icv_update)
 {
 	int start = skb_headlen(skb);
 	int i, copy = start - offset;
+	int err;
 	struct scatterlist sg;
 
 	/* Checksum header. */
@@ -496,10 +502,12 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
 		sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
 		sg.length = copy;
 		
-		icv_update(tfm, &sg, 1);
+		err = icv_update(desc, &sg, copy);
+		if (unlikely(err))
+			return err;
 		
 		if ((len -= copy) == 0)
-			return;
+			return 0;
 		offset += copy;
 	}
 
@@ -519,10 +527,12 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
 			sg.offset = frag->page_offset + offset-start;
 			sg.length = copy;
 			
-			icv_update(tfm, &sg, 1);
+			err = icv_update(desc, &sg, copy);
+			if (unlikely(err))
+				return err;
 
 			if (!(len -= copy))
-				return;
+				return 0;
 			offset += copy;
 		}
 		start = end;
@@ -540,15 +550,19 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
 			if ((copy = end - offset) > 0) {
 				if (copy > len)
 					copy = len;
-				skb_icv_walk(list, tfm, offset-start, copy, icv_update);
+				err = skb_icv_walk(list, desc, offset-start,
+						   copy, icv_update);
+				if (unlikely(err))
+					return err;
 				if ((len -= copy) == 0)
-					return;
+					return 0;
 				offset += copy;
 			}
 			start = end;
 		}
 	}
 	BUG_ON(len);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(skb_icv_walk);
 
-- 
GitLab


From 1b489e11d4df82514792f9f981f31976f8a94ddf Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 20 Aug 2006 15:07:14 +1000
Subject: [PATCH 0329/1063] [SCTP]: Use HMAC template and hash interface

This patch converts SCTP to use the new HMAC template and hash interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h |  4 ++--
 include/net/sctp/sctp.h      | 11 -----------
 include/net/sctp/structs.h   |  3 ++-
 net/sctp/endpointola.c       |  2 +-
 net/sctp/sm_make_chunk.c     | 37 ++++++++++++++++++++++++++----------
 net/sctp/socket.c            |  6 +++---
 6 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index c51541ee0247c..57166bfdf8eb0 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -312,9 +312,9 @@ enum { SCTP_MAX_GABS = 16 };
 				 */
 
 #if defined (CONFIG_SCTP_HMAC_MD5)
-#define SCTP_COOKIE_HMAC_ALG "md5"
+#define SCTP_COOKIE_HMAC_ALG "hmac(md5)"
 #elif defined (CONFIG_SCTP_HMAC_SHA1)
-#define SCTP_COOKIE_HMAC_ALG "sha1"
+#define SCTP_COOKIE_HMAC_ALG "hmac(sha1)"
 #else
 #define SCTP_COOKIE_HMAC_ALG NULL
 #endif
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 92eae0e0f3f15..1c1abce5f6b64 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -330,17 +330,6 @@ static inline void sctp_v6_exit(void) { return; }
 
 #endif /* #if defined(CONFIG_IPV6) */
 
-/* Some wrappers, in case crypto not available. */
-#if defined (CONFIG_CRYPTO_HMAC)
-#define sctp_crypto_alloc_tfm crypto_alloc_tfm
-#define sctp_crypto_free_tfm crypto_free_tfm
-#define sctp_crypto_hmac crypto_hmac
-#else
-#define sctp_crypto_alloc_tfm(x...) NULL
-#define sctp_crypto_free_tfm(x...)
-#define sctp_crypto_hmac(x...)
-#endif
-
 
 /* Map an association to an assoc_id. */
 static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc)
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e5aa7ff1f5b5c..0412e730c7653 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -87,6 +87,7 @@ struct sctp_bind_addr;
 struct sctp_ulpq;
 struct sctp_ep_common;
 struct sctp_ssnmap;
+struct crypto_hash;
 
 
 #include <net/sctp/tsnmap.h>
@@ -264,7 +265,7 @@ struct sctp_sock {
 	struct sctp_pf *pf;
 
 	/* Access to HMAC transform. */
-	struct crypto_tfm *hmac;
+	struct crypto_hash *hmac;
 
 	/* What is our base endpointer? */
 	struct sctp_endpoint *ep;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index ffda1d680529c..35c49ff2d0621 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -173,7 +173,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 	SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return);
 
 	/* Free up the HMAC transform. */
-	sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac);
+	crypto_free_hash(sctp_sk(ep->base.sk)->hmac);
 
 	/* Cleanup. */
 	sctp_inq_free(&ep->base.inqueue);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 17b509282cf20..7745bdea7817a 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1282,10 +1282,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 
 	retval = kmalloc(*cookie_len, GFP_ATOMIC);
 
-	if (!retval) {
-		*cookie_len = 0;
+	if (!retval)
 		goto nodata;
-	}
 
 	/* Clear this memory since we are sending this data structure
 	 * out on the network.
@@ -1321,19 +1319,29 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 	       ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len);
 
   	if (sctp_sk(ep->base.sk)->hmac) {
+		struct hash_desc desc;
+
 		/* Sign the message.  */
 		sg.page = virt_to_page(&cookie->c);
 		sg.offset = (unsigned long)(&cookie->c) % PAGE_SIZE;
 		sg.length = bodysize;
 		keylen = SCTP_SECRET_SIZE;
 		key = (char *)ep->secret_key[ep->current_key];
+  		desc.tfm = sctp_sk(ep->base.sk)->hmac;
+  		desc.flags = 0;
 
-		sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen,
-				 &sg, 1, cookie->signature);
+		if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+		    crypto_hash_digest(&desc, &sg, bodysize, cookie->signature))
+			goto free_cookie;
 	}
 
-nodata:
 	return retval;
+
+free_cookie:
+	kfree(retval);
+nodata:
+	*cookie_len = 0;
+	return NULL;
 }
 
 /* Unpack the cookie from COOKIE ECHO chunk, recreating the association.  */
@@ -1354,6 +1362,7 @@ struct sctp_association *sctp_unpack_cookie(
 	sctp_scope_t scope;
 	struct sk_buff *skb = chunk->skb;
 	struct timeval tv;
+	struct hash_desc desc;
 
 	/* Header size is static data prior to the actual cookie, including
 	 * any padding.
@@ -1389,17 +1398,25 @@ struct sctp_association *sctp_unpack_cookie(
 	sg.offset = (unsigned long)(bear_cookie) % PAGE_SIZE;
 	sg.length = bodysize;
 	key = (char *)ep->secret_key[ep->current_key];
+	desc.tfm = sctp_sk(ep->base.sk)->hmac;
+	desc.flags = 0;
 
 	memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
-	sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, &sg,
-			 1, digest);
+	if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+	    crypto_hash_digest(&desc, &sg, bodysize, digest)) {
+		*error = -SCTP_IERROR_NOMEM;
+		goto fail;
+	}
 
 	if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
 		/* Try the previous key. */
 		key = (char *)ep->secret_key[ep->last_key];
 		memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
-		sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen,
-				 &sg, 1, digest);
+		if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+		    crypto_hash_digest(&desc, &sg, bodysize, digest)) {
+			*error = -SCTP_IERROR_NOMEM;
+			goto fail;
+		}
 
 		if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
 			/* Yikes!  Still bad signature! */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index dab15949958e9..85caf79638867 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4898,7 +4898,7 @@ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog)
 int sctp_inet_listen(struct socket *sock, int backlog)
 {
 	struct sock *sk = sock->sk;
-	struct crypto_tfm *tfm=NULL;
+	struct crypto_hash *tfm = NULL;
 	int err = -EINVAL;
 
 	if (unlikely(backlog < 0))
@@ -4911,7 +4911,7 @@ int sctp_inet_listen(struct socket *sock, int backlog)
 
 	/* Allocate HMAC for generating cookie. */
 	if (sctp_hmac_alg) {
-		tfm = sctp_crypto_alloc_tfm(sctp_hmac_alg, 0);
+		tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
 		if (!tfm) {
 			err = -ENOSYS;
 			goto out;
@@ -4937,7 +4937,7 @@ int sctp_inet_listen(struct socket *sock, int backlog)
 	sctp_release_sock(sk);
 	return err;
 cleanup:
-	sctp_crypto_free_tfm(tfm);
+	crypto_free_hash(tfm);
 	goto out;
 }
 
-- 
GitLab


From 878b9014666217555d16073764f30e825cf18d2f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 20 Aug 2006 15:17:04 +1000
Subject: [PATCH 0330/1063] [CRYPTO] doc: Update documentation for hash and me

This patch updates the documentation to reflect the switch from digest
to hash.  It also replaces notes about emailing James Morris to refer
to me instead.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/crypto/api-intro.txt | 36 ++++++++++++++++++------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/Documentation/crypto/api-intro.txt b/Documentation/crypto/api-intro.txt
index 74dffc68ff9f7..5a03a2801d676 100644
--- a/Documentation/crypto/api-intro.txt
+++ b/Documentation/crypto/api-intro.txt
@@ -19,15 +19,14 @@ At the lowest level are algorithms, which register dynamically with the
 API.
 
 'Transforms' are user-instantiated objects, which maintain state, handle all
-of the implementation logic (e.g. manipulating page vectors), provide an 
-abstraction to the underlying algorithms, and handle common logical 
-operations (e.g. cipher modes, HMAC for digests).  However, at the user 
+of the implementation logic (e.g. manipulating page vectors) and provide an 
+abstraction to the underlying algorithms.  However, at the user 
 level they are very simple.
 
 Conceptually, the API layering looks like this:
 
   [transform api]  (user interface)
-  [transform ops]  (per-type logic glue e.g. cipher.c, digest.c)
+  [transform ops]  (per-type logic glue e.g. cipher.c, compress.c)
   [algorithm api]  (for registering algorithms)
   
 The idea is to make the user interface and algorithm registration API
@@ -44,22 +43,27 @@ under development.
 Here's an example of how to use the API:
 
 	#include <linux/crypto.h>
+	#include <linux/err.h>
+	#include <linux/scatterlist.h>
 	
 	struct scatterlist sg[2];
 	char result[128];
-	struct crypto_tfm *tfm;
+	struct crypto_hash *tfm;
+	struct hash_desc desc;
 	
-	tfm = crypto_alloc_tfm("md5", 0);
-	if (tfm == NULL)
+	tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
 		fail();
 		
 	/* ... set up the scatterlists ... */
+
+	desc.tfm = tfm;
+	desc.flags = 0;
 	
-	crypto_digest_init(tfm);
-	crypto_digest_update(tfm, &sg, 2);
-	crypto_digest_final(tfm, result);
+	if (crypto_hash_digest(&desc, &sg, 2, result))
+		fail();
 	
-	crypto_free_tfm(tfm);
+	crypto_free_hash(tfm);
 
     
 Many real examples are available in the regression test module (tcrypt.c).
@@ -126,7 +130,7 @@ might already be working on.
 BUGS
 
 Send bug reports to:
-James Morris <jmorris@redhat.com>
+Herbert Xu <herbert@gondor.apana.org.au>
 Cc: David S. Miller <davem@redhat.com>
 
 
@@ -134,13 +138,14 @@ FURTHER INFORMATION
 
 For further patches and various updates, including the current TODO
 list, see:
-http://samba.org/~jamesm/crypto/
+http://gondor.apana.org.au/~herbert/crypto/
 
 
 AUTHORS
 
 James Morris
 David S. Miller
+Herbert Xu
 
 
 CREDITS
@@ -238,8 +243,11 @@ Anubis algorithm contributors:
 Tiger algorithm contributors:
   Aaron Grothe
 
+VIA PadLock contributors:
+  Michal Ludvig
+
 Generic scatterwalk code by Adam J. Richter <adam@yggdrasil.com>
 
 Please send any credits updates or corrections to:
-James Morris <jmorris@redhat.com>
+Herbert Xu <herbert@gondor.apana.org.au>
 
-- 
GitLab


From 8425165dfed27945e8509c141cea245d1739e372 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 20 Aug 2006 15:25:22 +1000
Subject: [PATCH 0331/1063] [CRYPTO] digest: Remove old HMAC implementation

This patch removes the old HMAC implementation now that nobody uses it
anymore.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/Kconfig         |   2 +-
 crypto/digest.c        |   3 +-
 crypto/hmac.c          | 101 -----------------------------------------
 crypto/internal.h      |  13 ------
 include/linux/crypto.h |  16 -------
 5 files changed, 2 insertions(+), 133 deletions(-)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index f07d9237950f5..1e2f39c211801 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -33,7 +33,7 @@ config CRYPTO_MANAGER
 	  cbc(aes).
 
 config CRYPTO_HMAC
-	bool "HMAC support"
+	tristate "HMAC support"
 	select CRYPTO_HASH
 	help
 	  HMAC: Keyed-Hashing for Message Authentication (RFC2104).
diff --git a/crypto/digest.c b/crypto/digest.c
index 5873063db8400..0155a94e4b154 100644
--- a/crypto/digest.c
+++ b/crypto/digest.c
@@ -191,10 +191,9 @@ int crypto_init_digest_ops(struct crypto_tfm *tfm)
 	ops->setkey	= dalg->dia_setkey ? setkey : nosetkey;
 	ops->digestsize	= dalg->dia_digestsize;
 	
-	return crypto_alloc_hmac_block(tfm);
+	return 0;
 }
 
 void crypto_exit_digest_ops(struct crypto_tfm *tfm)
 {
-	crypto_free_hmac_block(tfm);
 }
diff --git a/crypto/hmac.c b/crypto/hmac.c
index eac77e2947407..f403b69460471 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -29,107 +29,6 @@ struct hmac_ctx {
 	struct crypto_hash *child;
 };
 
-static void hash_key(struct crypto_tfm *tfm, u8 *key, unsigned int keylen)
-{
-	struct scatterlist tmp;
-	
-	sg_set_buf(&tmp, key, keylen);
-	crypto_digest_digest(tfm, &tmp, 1, key);
-}
-
-int crypto_alloc_hmac_block(struct crypto_tfm *tfm)
-{
-	int ret = 0;
-
-	BUG_ON(!crypto_tfm_alg_blocksize(tfm));
-	
-	tfm->crt_hash.hmac_block = kmalloc(crypto_tfm_alg_blocksize(tfm),
-					   GFP_KERNEL);
-	if (tfm->crt_hash.hmac_block == NULL)
-		ret = -ENOMEM;
-
-	return ret;
-		
-}
-
-void crypto_free_hmac_block(struct crypto_tfm *tfm)
-{
-	kfree(tfm->crt_hash.hmac_block);
-}
-
-void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen)
-{
-	unsigned int i;
-	struct scatterlist tmp;
-	char *ipad = tfm->crt_hash.hmac_block;
-	
-	if (*keylen > crypto_tfm_alg_blocksize(tfm)) {
-		hash_key(tfm, key, *keylen);
-		*keylen = crypto_tfm_alg_digestsize(tfm);
-	}
-
-	memset(ipad, 0, crypto_tfm_alg_blocksize(tfm));
-	memcpy(ipad, key, *keylen);
-
-	for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++)
-		ipad[i] ^= 0x36;
-
-	sg_set_buf(&tmp, ipad, crypto_tfm_alg_blocksize(tfm));
-	
-	crypto_digest_init(tfm);
-	crypto_digest_update(tfm, &tmp, 1);
-}
-
-void crypto_hmac_update(struct crypto_tfm *tfm,
-                        struct scatterlist *sg, unsigned int nsg)
-{
-	crypto_digest_update(tfm, sg, nsg);
-}
-
-void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key,
-                       unsigned int *keylen, u8 *out)
-{
-	unsigned int i;
-	struct scatterlist tmp;
-	char *opad = tfm->crt_hash.hmac_block;
-	
-	if (*keylen > crypto_tfm_alg_blocksize(tfm)) {
-		hash_key(tfm, key, *keylen);
-		*keylen = crypto_tfm_alg_digestsize(tfm);
-	}
-
-	crypto_digest_final(tfm, out);
-
-	memset(opad, 0, crypto_tfm_alg_blocksize(tfm));
-	memcpy(opad, key, *keylen);
-		
-	for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++)
-		opad[i] ^= 0x5c;
-
-	sg_set_buf(&tmp, opad, crypto_tfm_alg_blocksize(tfm));
-
-	crypto_digest_init(tfm);
-	crypto_digest_update(tfm, &tmp, 1);
-	
-	sg_set_buf(&tmp, out, crypto_tfm_alg_digestsize(tfm));
-	
-	crypto_digest_update(tfm, &tmp, 1);
-	crypto_digest_final(tfm, out);
-}
-
-void crypto_hmac(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen,
-                 struct scatterlist *sg, unsigned int nsg, u8 *out)
-{
-	crypto_hmac_init(tfm, key, keylen);
-	crypto_hmac_update(tfm, sg, nsg);
-	crypto_hmac_final(tfm, key, keylen, out);
-}
-
-EXPORT_SYMBOL_GPL(crypto_hmac_init);
-EXPORT_SYMBOL_GPL(crypto_hmac_update);
-EXPORT_SYMBOL_GPL(crypto_hmac_final);
-EXPORT_SYMBOL_GPL(crypto_hmac);
-
 static inline void *align_ptr(void *p, unsigned int align)
 {
 	return (void *)ALIGN((unsigned long)p, align);
diff --git a/crypto/internal.h b/crypto/internal.h
index 93d9b10ff9145..2da6ad4f3593a 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -73,19 +73,6 @@ static inline void crypto_yield(u32 flags)
 		cond_resched();
 }
 
-#ifdef CONFIG_CRYPTO_HMAC
-int crypto_alloc_hmac_block(struct crypto_tfm *tfm);
-void crypto_free_hmac_block(struct crypto_tfm *tfm);
-#else
-static inline int crypto_alloc_hmac_block(struct crypto_tfm *tfm)
-{
-	return 0;
-}
-
-static inline void crypto_free_hmac_block(struct crypto_tfm *tfm)
-{ }
-#endif
-
 #ifdef CONFIG_PROC_FS
 void __init crypto_init_proc(void);
 void __exit crypto_exit_proc(void);
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 40c0aab8ad4c9..929fb9ad13147 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -295,9 +295,6 @@ struct hash_tfm {
 		      unsigned int nsg, u8 *out);
 	int (*setkey)(struct crypto_hash *tfm, const u8 *key,
 		      unsigned int keylen);
-#ifdef CONFIG_CRYPTO_HMAC
-	void *hmac_block;
-#endif
 	unsigned int digestsize;
 };
 
@@ -872,18 +869,5 @@ static inline int crypto_comp_decompress(struct crypto_tfm *tfm,
 	return tfm->crt_compress.cot_decompress(tfm, src, slen, dst, dlen);
 }
 
-/*
- * HMAC support.
- */
-#ifdef CONFIG_CRYPTO_HMAC
-void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen);
-void crypto_hmac_update(struct crypto_tfm *tfm,
-                        struct scatterlist *sg, unsigned int nsg);
-void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key,
-                       unsigned int *keylen, u8 *out);
-void crypto_hmac(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen,
-                 struct scatterlist *sg, unsigned int nsg, u8 *out);
-#endif	/* CONFIG_CRYPTO_HMAC */
-
 #endif	/* _LINUX_CRYPTO_H */
 
-- 
GitLab


From dc64ddf4918f0da52df10d83c2a5941a547c2035 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 24 Aug 2006 18:45:50 +1000
Subject: [PATCH 0332/1063] [SCSI] iscsi: Use crypto_hash interface instead of
 crypto_digest

This patch converts ISCSI to use the new crypto_hash interface instead
of crypto_digest.  It's a fairly straightforward substitution.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/scsi/iscsi_tcp.c | 134 +++++++++++++++++++++------------------
 drivers/scsi/iscsi_tcp.h |   9 +--
 2 files changed, 78 insertions(+), 65 deletions(-)

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 058f094f945ab..66a1ae1d6982b 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -26,6 +26,7 @@
  *	Zhenyu Wang
  */
 
+#include <linux/err.h>
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/inet.h>
@@ -107,8 +108,11 @@ iscsi_hdr_digest(struct iscsi_conn *conn, struct iscsi_buf *buf,
 		 u8* crc)
 {
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	struct hash_desc desc;
 
-	crypto_digest_digest(tcp_conn->tx_tfm, &buf->sg, 1, crc);
+	desc.tfm = tcp_conn->tx_tfm;
+	desc.flags = 0;
+	crypto_hash_digest(&desc, &buf->sg, buf->sg.length, crc);
 	buf->sg.length += sizeof(uint32_t);
 }
 
@@ -452,11 +456,14 @@ iscsi_tcp_hdr_recv(struct iscsi_conn *conn)
 	}
 
 	if (conn->hdrdgst_en) {
+		struct hash_desc desc;
 		struct scatterlist sg;
 
 		sg_init_one(&sg, (u8 *)hdr,
 			    sizeof(struct iscsi_hdr) + ahslen);
-		crypto_digest_digest(tcp_conn->rx_tfm, &sg, 1, (u8 *)&cdgst);
+		desc.tfm = tcp_conn->rx_tfm;
+		desc.flags = 0;
+		crypto_hash_digest(&desc, &sg, sg.length, (u8 *)&cdgst);
 		rdgst = *(uint32_t*)((char*)hdr + sizeof(struct iscsi_hdr) +
 				     ahslen);
 		if (cdgst != rdgst) {
@@ -673,7 +680,7 @@ partial_sg_digest_update(struct iscsi_tcp_conn *tcp_conn,
 	memcpy(&temp, sg, sizeof(struct scatterlist));
 	temp.offset = offset;
 	temp.length = length;
-	crypto_digest_update(tcp_conn->data_rx_tfm, &temp, 1);
+	crypto_hash_update(&tcp_conn->data_rx_hash, &temp, length);
 }
 
 static void
@@ -682,7 +689,7 @@ iscsi_recv_digest_update(struct iscsi_tcp_conn *tcp_conn, char* buf, int len)
 	struct scatterlist tmp;
 
 	sg_init_one(&tmp, buf, len);
-	crypto_digest_update(tcp_conn->data_rx_tfm, &tmp, 1);
+	crypto_hash_update(&tcp_conn->data_rx_hash, &tmp, len);
 }
 
 static int iscsi_scsi_data_in(struct iscsi_conn *conn)
@@ -736,9 +743,9 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
 		if (!rc) {
 			if (conn->datadgst_en) {
 				if (!offset)
-					crypto_digest_update(
-							tcp_conn->data_rx_tfm,
-							&sg[i], 1);
+					crypto_hash_update(
+							&tcp_conn->data_rx_hash,
+							&sg[i], sg[i].length);
 				else
 					partial_sg_digest_update(tcp_conn,
 							&sg[i],
@@ -877,8 +884,7 @@ iscsi_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 		rc = iscsi_tcp_hdr_recv(conn);
 		if (!rc && tcp_conn->in.datalen) {
 			if (conn->datadgst_en) {
-				BUG_ON(!tcp_conn->data_rx_tfm);
-				crypto_digest_init(tcp_conn->data_rx_tfm);
+				crypto_hash_init(&tcp_conn->data_rx_hash);
 			}
 			tcp_conn->in_progress = IN_PROGRESS_DATA_RECV;
 		} else if (rc) {
@@ -931,11 +937,11 @@ iscsi_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 					  tcp_conn->in.padding);
 				memset(pad, 0, tcp_conn->in.padding);
 				sg_init_one(&sg, pad, tcp_conn->in.padding);
-				crypto_digest_update(tcp_conn->data_rx_tfm,
-						     &sg, 1);
+				crypto_hash_update(&tcp_conn->data_rx_hash,
+						   &sg, sg.length);
 			}
-			crypto_digest_final(tcp_conn->data_rx_tfm,
-					    (u8 *) & tcp_conn->in.datadgst);
+			crypto_hash_final(&tcp_conn->data_rx_hash,
+					  (u8 *)&tcp_conn->in.datadgst);
 			debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst);
 			tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV;
 		} else
@@ -1181,8 +1187,7 @@ iscsi_data_digest_init(struct iscsi_tcp_conn *tcp_conn,
 {
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
 
-	BUG_ON(!tcp_conn->data_tx_tfm);
-	crypto_digest_init(tcp_conn->data_tx_tfm);
+	crypto_hash_init(&tcp_conn->data_tx_hash);
 	tcp_ctask->digest_count = 4;
 }
 
@@ -1196,7 +1201,7 @@ iscsi_digest_final_send(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 	int sent = 0;
 
 	if (final)
-		crypto_digest_final(tcp_conn->data_tx_tfm, (u8*)digest);
+		crypto_hash_final(&tcp_conn->data_tx_hash, (u8 *)digest);
 
 	iscsi_buf_init_iov(buf, (char*)digest, 4);
 	rc = iscsi_sendpage(conn, buf, &tcp_ctask->digest_count, &sent);
@@ -1491,16 +1496,17 @@ handle_xmstate_imm_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 		if (rc) {
 			tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
 			if (conn->datadgst_en) {
-				crypto_digest_final(tcp_conn->data_tx_tfm,
-						(u8*)&tcp_ctask->immdigest);
+				crypto_hash_final(&tcp_conn->data_tx_hash,
+						  (u8 *)&tcp_ctask->immdigest);
 				debug_tcp("tx imm sendpage fail 0x%x\n",
 					  tcp_ctask->datadigest);
 			}
 			return rc;
 		}
 		if (conn->datadgst_en)
-			crypto_digest_update(tcp_conn->data_tx_tfm,
-					     &tcp_ctask->sendbuf.sg, 1);
+			crypto_hash_update(&tcp_conn->data_tx_hash,
+					   &tcp_ctask->sendbuf.sg,
+					   tcp_ctask->sendbuf.sg.length);
 
 		if (!ctask->imm_count)
 			break;
@@ -1577,8 +1583,8 @@ handle_xmstate_uns_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 			tcp_ctask->xmstate |= XMSTATE_UNS_DATA;
 			/* will continue with this ctask later.. */
 			if (conn->datadgst_en) {
-				crypto_digest_final(tcp_conn->data_tx_tfm,
-						(u8 *)&dtask->digest);
+				crypto_hash_final(&tcp_conn->data_tx_hash,
+						  (u8 *)&dtask->digest);
 				debug_tcp("tx uns data fail 0x%x\n",
 					  dtask->digest);
 			}
@@ -1593,8 +1599,9 @@ handle_xmstate_uns_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 		 * so pass it
 		 */
 		if (conn->datadgst_en && tcp_ctask->sent - start > 0)
-			crypto_digest_update(tcp_conn->data_tx_tfm,
-					     &tcp_ctask->sendbuf.sg, 1);
+			crypto_hash_update(&tcp_conn->data_tx_hash,
+					   &tcp_ctask->sendbuf.sg,
+					   tcp_ctask->sendbuf.sg.length);
 
 		if (!ctask->data_count)
 			break;
@@ -1668,7 +1675,7 @@ handle_xmstate_sol_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
 		/* will continue with this ctask later.. */
 		if (conn->datadgst_en) {
-			crypto_digest_final(tcp_conn->data_tx_tfm,
+			crypto_hash_final(&tcp_conn->data_tx_hash,
 					  (u8 *)&dtask->digest);
 			debug_tcp("r2t data send fail 0x%x\n", dtask->digest);
 		}
@@ -1677,8 +1684,8 @@ handle_xmstate_sol_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 
 	BUG_ON(r2t->data_count < 0);
 	if (conn->datadgst_en)
-		crypto_digest_update(tcp_conn->data_tx_tfm, &r2t->sendbuf.sg,
-				     1);
+		crypto_hash_update(&tcp_conn->data_tx_hash, &r2t->sendbuf.sg,
+				   r2t->sendbuf.sg.length);
 
 	if (r2t->data_count) {
 		BUG_ON(ctask->sc->use_sg == 0);
@@ -1766,8 +1773,9 @@ handle_xmstate_w_pad(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 	}
 
 	if (conn->datadgst_en) {
-		crypto_digest_update(tcp_conn->data_tx_tfm,
-				     &tcp_ctask->sendbuf.sg, 1);
+		crypto_hash_update(&tcp_conn->data_tx_hash,
+				   &tcp_ctask->sendbuf.sg,
+				   tcp_ctask->sendbuf.sg.length);
 		/* imm data? */
 		if (!dtask) {
 			rc = iscsi_digest_final_send(conn, ctask,
@@ -1963,13 +1971,13 @@ iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
 	/* now free tcp_conn */
 	if (digest) {
 		if (tcp_conn->tx_tfm)
-			crypto_free_tfm(tcp_conn->tx_tfm);
+			crypto_free_hash(tcp_conn->tx_tfm);
 		if (tcp_conn->rx_tfm)
-			crypto_free_tfm(tcp_conn->rx_tfm);
-		if (tcp_conn->data_tx_tfm)
-			crypto_free_tfm(tcp_conn->data_tx_tfm);
-		if (tcp_conn->data_rx_tfm)
-			crypto_free_tfm(tcp_conn->data_rx_tfm);
+			crypto_free_hash(tcp_conn->rx_tfm);
+		if (tcp_conn->data_tx_hash.tfm)
+			crypto_free_hash(tcp_conn->data_tx_hash.tfm);
+		if (tcp_conn->data_rx_hash.tfm)
+			crypto_free_hash(tcp_conn->data_rx_hash.tfm);
 	}
 
 	kfree(tcp_conn);
@@ -2130,44 +2138,48 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
 		if (conn->hdrdgst_en) {
 			tcp_conn->hdr_size += sizeof(__u32);
 			if (!tcp_conn->tx_tfm)
-				tcp_conn->tx_tfm = crypto_alloc_tfm("crc32c",
-								    0);
-			if (!tcp_conn->tx_tfm)
-				return -ENOMEM;
+				tcp_conn->tx_tfm =
+					crypto_alloc_hash("crc32c", 0,
+							  CRYPTO_ALG_ASYNC);
+			if (IS_ERR(tcp_conn->tx_tfm))
+				return PTR_ERR(tcp_conn->tx_tfm);
 			if (!tcp_conn->rx_tfm)
-				tcp_conn->rx_tfm = crypto_alloc_tfm("crc32c",
-								    0);
-			if (!tcp_conn->rx_tfm) {
-				crypto_free_tfm(tcp_conn->tx_tfm);
-				return -ENOMEM;
+				tcp_conn->rx_tfm =
+					crypto_alloc_hash("crc32c", 0,
+							  CRYPTO_ALG_ASYNC);
+			if (IS_ERR(tcp_conn->rx_tfm)) {
+				crypto_free_hash(tcp_conn->tx_tfm);
+				return PTR_ERR(tcp_conn->rx_tfm);
 			}
 		} else {
 			if (tcp_conn->tx_tfm)
-				crypto_free_tfm(tcp_conn->tx_tfm);
+				crypto_free_hash(tcp_conn->tx_tfm);
 			if (tcp_conn->rx_tfm)
-				crypto_free_tfm(tcp_conn->rx_tfm);
+				crypto_free_hash(tcp_conn->rx_tfm);
 		}
 		break;
 	case ISCSI_PARAM_DATADGST_EN:
 		iscsi_set_param(cls_conn, param, buf, buflen);
 		if (conn->datadgst_en) {
-			if (!tcp_conn->data_tx_tfm)
-				tcp_conn->data_tx_tfm =
-				    crypto_alloc_tfm("crc32c", 0);
-			if (!tcp_conn->data_tx_tfm)
-				return -ENOMEM;
-			if (!tcp_conn->data_rx_tfm)
-				tcp_conn->data_rx_tfm =
-				    crypto_alloc_tfm("crc32c", 0);
-			if (!tcp_conn->data_rx_tfm) {
-				crypto_free_tfm(tcp_conn->data_tx_tfm);
-				return -ENOMEM;
+			if (!tcp_conn->data_tx_hash.tfm)
+				tcp_conn->data_tx_hash.tfm =
+				    crypto_alloc_hash("crc32c", 0,
+						      CRYPTO_ALG_ASYNC);
+			if (IS_ERR(tcp_conn->data_tx_hash.tfm))
+				return PTR_ERR(tcp_conn->data_tx_hash.tfm);
+			if (!tcp_conn->data_rx_hash.tfm)
+				tcp_conn->data_rx_hash.tfm =
+				    crypto_alloc_hash("crc32c", 0,
+						      CRYPTO_ALG_ASYNC);
+			if (IS_ERR(tcp_conn->data_rx_hash.tfm)) {
+				crypto_free_hash(tcp_conn->data_tx_hash.tfm);
+				return PTR_ERR(tcp_conn->data_rx_hash.tfm);
 			}
 		} else {
-			if (tcp_conn->data_tx_tfm)
-				crypto_free_tfm(tcp_conn->data_tx_tfm);
-			if (tcp_conn->data_rx_tfm)
-				crypto_free_tfm(tcp_conn->data_rx_tfm);
+			if (tcp_conn->data_tx_hash.tfm)
+				crypto_free_hash(tcp_conn->data_tx_hash.tfm);
+			if (tcp_conn->data_rx_hash.tfm)
+				crypto_free_hash(tcp_conn->data_rx_hash.tfm);
 		}
 		tcp_conn->sendpage = conn->datadgst_en ?
 			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h
index 6a4ee704e46e8..e35701305fc96 100644
--- a/drivers/scsi/iscsi_tcp.h
+++ b/drivers/scsi/iscsi_tcp.h
@@ -51,6 +51,7 @@
 #define ISCSI_SG_TABLESIZE		SG_ALL
 #define ISCSI_TCP_MAX_CMD_LEN		16
 
+struct crypto_hash;
 struct socket;
 
 /* Socket connection recieve helper */
@@ -84,8 +85,8 @@ struct iscsi_tcp_conn {
 	/* iSCSI connection-wide sequencing */
 	int			hdr_size;	/* PDU header size */
 
-	struct crypto_tfm	*rx_tfm;	/* CRC32C (Rx) */
-	struct crypto_tfm	*data_rx_tfm;	/* CRC32C (Rx) for data */
+	struct crypto_hash	*rx_tfm;	/* CRC32C (Rx) */
+	struct hash_desc	data_rx_hash;	/* CRC32C (Rx) for data */
 
 	/* control data */
 	struct iscsi_tcp_recv	in;		/* TCP receive context */
@@ -97,8 +98,8 @@ struct iscsi_tcp_conn {
 	void			(*old_write_space)(struct sock *);
 
 	/* xmit */
-	struct crypto_tfm	*tx_tfm;	/* CRC32C (Tx) */
-	struct crypto_tfm	*data_tx_tfm;	/* CRC32C (Tx) for data */
+	struct crypto_hash	*tx_tfm;	/* CRC32C (Tx) */
+	struct hash_desc	data_tx_hash;	/* CRC32C (Tx) for data */
 
 	/* MIB custom statistics */
 	uint32_t		sendpage_failures_cnt;
-- 
GitLab


From 35058687912aa2f0b4554383cc10be4e0683b9a4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 24 Aug 2006 19:10:20 +1000
Subject: [PATCH 0333/1063] [CRYPTO] users: Use crypto_hash interface instead
 of crypto_digest

This patch converts all remaining crypto_digest users to use the new
crypto_hash interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/md/dm-crypt.c                 | 30 +++++++++++----------
 drivers/net/ppp_mppe.c                | 34 +++++++++++++++---------
 fs/nfsd/nfs4recover.c                 | 21 ++++++++-------
 net/ieee80211/ieee80211_crypt_tkip.c  | 25 ++++++++++--------
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 38 +++++++++++++++++----------
 security/seclvl.c                     | 18 +++++++------
 6 files changed, 97 insertions(+), 69 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 91d4081cb00ed..73f8be837a458 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -122,7 +122,8 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
 	                      const char *opts)
 {
 	struct crypto_cipher *essiv_tfm;
-	struct crypto_tfm *hash_tfm;
+	struct crypto_hash *hash_tfm;
+	struct hash_desc desc;
 	struct scatterlist sg;
 	unsigned int saltsize;
 	u8 *salt;
@@ -134,29 +135,30 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
 	}
 
 	/* Hash the cipher key with the given hash algorithm */
-	hash_tfm = crypto_alloc_tfm(opts, CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (hash_tfm == NULL) {
+	hash_tfm = crypto_alloc_hash(opts, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hash_tfm)) {
 		ti->error = "Error initializing ESSIV hash";
-		return -EINVAL;
-	}
-
-	if (crypto_tfm_alg_type(hash_tfm) != CRYPTO_ALG_TYPE_DIGEST) {
-		ti->error = "Expected digest algorithm for ESSIV hash";
-		crypto_free_tfm(hash_tfm);
-		return -EINVAL;
+		return PTR_ERR(hash_tfm);
 	}
 
-	saltsize = crypto_tfm_alg_digestsize(hash_tfm);
+	saltsize = crypto_hash_digestsize(hash_tfm);
 	salt = kmalloc(saltsize, GFP_KERNEL);
 	if (salt == NULL) {
 		ti->error = "Error kmallocing salt storage in ESSIV";
-		crypto_free_tfm(hash_tfm);
+		crypto_free_hash(hash_tfm);
 		return -ENOMEM;
 	}
 
 	sg_set_buf(&sg, cc->key, cc->key_size);
-	crypto_digest_digest(hash_tfm, &sg, 1, salt);
-	crypto_free_tfm(hash_tfm);
+	desc.tfm = hash_tfm;
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = crypto_hash_digest(&desc, &sg, cc->key_size, salt);
+	crypto_free_hash(hash_tfm);
+
+	if (err) {
+		ti->error = "Error calculating hash in ESSIV";
+		return err;
+	}
 
 	/* Setup the essiv_tfm with the given salt */
 	essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c
index 495d8667419a9..e7a0eb4fca603 100644
--- a/drivers/net/ppp_mppe.c
+++ b/drivers/net/ppp_mppe.c
@@ -65,12 +65,13 @@ MODULE_LICENSE("Dual BSD/GPL");
 MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE));
 MODULE_VERSION("1.0.2");
 
-static void
+static unsigned int
 setup_sg(struct scatterlist *sg, const void *address, unsigned int length)
 {
 	sg[0].page = virt_to_page(address);
 	sg[0].offset = offset_in_page(address);
 	sg[0].length = length;
+	return length;
 }
 
 #define SHA1_PAD_SIZE 40
@@ -97,7 +98,7 @@ static inline void sha_pad_init(struct sha_pad *shapad)
  */
 struct ppp_mppe_state {
 	struct crypto_blkcipher *arc4;
-	struct crypto_tfm *sha1;
+	struct crypto_hash *sha1;
 	unsigned char *sha1_digest;
 	unsigned char master_key[MPPE_MAX_KEY_LEN];
 	unsigned char session_key[MPPE_MAX_KEY_LEN];
@@ -137,14 +138,21 @@ struct ppp_mppe_state {
  */
 static void get_new_key_from_sha(struct ppp_mppe_state * state, unsigned char *InterimKey)
 {
+	struct hash_desc desc;
 	struct scatterlist sg[4];
+	unsigned int nbytes;
 
-	setup_sg(&sg[0], state->master_key, state->keylen);
-	setup_sg(&sg[1], sha_pad->sha_pad1, sizeof(sha_pad->sha_pad1));
-	setup_sg(&sg[2], state->session_key, state->keylen);
-	setup_sg(&sg[3], sha_pad->sha_pad2, sizeof(sha_pad->sha_pad2));
+	nbytes = setup_sg(&sg[0], state->master_key, state->keylen);
+	nbytes += setup_sg(&sg[1], sha_pad->sha_pad1,
+			   sizeof(sha_pad->sha_pad1));
+	nbytes += setup_sg(&sg[2], state->session_key, state->keylen);
+	nbytes += setup_sg(&sg[3], sha_pad->sha_pad2,
+			   sizeof(sha_pad->sha_pad2));
 
-	crypto_digest_digest (state->sha1, sg, 4, state->sha1_digest);
+	desc.tfm = state->sha1;
+	desc.flags = 0;
+
+	crypto_hash_digest(&desc, sg, nbytes, state->sha1_digest);
 
 	memcpy(InterimKey, state->sha1_digest, state->keylen);
 }
@@ -204,11 +212,13 @@ static void *mppe_alloc(unsigned char *options, int optlen)
 		goto out_free;
 	}
 
-	state->sha1 = crypto_alloc_tfm("sha1", 0);
-	if (!state->sha1)
+	state->sha1 = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(state->sha1)) {
+		state->sha1 = NULL;
 		goto out_free;
+	}
 
-	digestsize = crypto_tfm_alg_digestsize(state->sha1);
+	digestsize = crypto_hash_digestsize(state->sha1);
 	if (digestsize < MPPE_MAX_KEY_LEN)
 		goto out_free;
 
@@ -233,7 +243,7 @@ static void *mppe_alloc(unsigned char *options, int optlen)
 	    if (state->sha1_digest)
 		kfree(state->sha1_digest);
 	    if (state->sha1)
-		crypto_free_tfm(state->sha1);
+		crypto_free_hash(state->sha1);
 	    if (state->arc4)
 		crypto_free_blkcipher(state->arc4);
 	    kfree(state);
@@ -251,7 +261,7 @@ static void mppe_free(void *arg)
 	    if (state->sha1_digest)
 		kfree(state->sha1_digest);
 	    if (state->sha1)
-		crypto_free_tfm(state->sha1);
+		crypto_free_hash(state->sha1);
 	    if (state->arc4)
 		crypto_free_blkcipher(state->arc4);
 	    kfree(state);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 06da7506363cb..e35d7e52fdebd 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,7 +33,7 @@
 *
 */
 
-
+#include <linux/err.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfs4.h>
@@ -87,34 +87,35 @@ int
 nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
 {
 	struct xdr_netobj cksum;
-	struct crypto_tfm *tfm;
+	struct hash_desc desc;
 	struct scatterlist sg[1];
 	int status = nfserr_resource;
 
 	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
 			clname->len, clname->data);
-	tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (tfm == NULL)
-		goto out;
-	cksum.len = crypto_tfm_alg_digestsize(tfm);
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm))
+		goto out_no_tfm;
+	cksum.len = crypto_hash_digestsize(desc.tfm);
 	cksum.data = kmalloc(cksum.len, GFP_KERNEL);
 	if (cksum.data == NULL)
  		goto out;
-	crypto_digest_init(tfm);
 
 	sg[0].page = virt_to_page(clname->data);
 	sg[0].offset = offset_in_page(clname->data);
 	sg[0].length = clname->len;
 
-	crypto_digest_update(tfm, sg, 1);
-	crypto_digest_final(tfm, cksum.data);
+	if (crypto_hash_digest(&desc, sg, sg->length, cksum.data))
+		goto out;
 
 	md5_to_hex(dname, cksum.data);
 
 	kfree(cksum.data);
 	status = nfs_ok;
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_hash(desc.tfm);
+out_no_tfm:
 	return status;
 }
 
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index d60ce9b49b4fe..407a17495b61a 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -54,7 +54,7 @@ struct ieee80211_tkip_data {
 	int key_idx;
 
 	struct crypto_blkcipher *tfm_arc4;
-	struct crypto_tfm *tfm_michael;
+	struct crypto_hash *tfm_michael;
 
 	/* scratch buffers for virt_to_page() (crypto API) */
 	u8 rx_hdr[16], tx_hdr[16];
@@ -95,10 +95,12 @@ static void *ieee80211_tkip_init(int key_idx)
 		goto fail;
 	}
 
-	priv->tfm_michael = crypto_alloc_tfm("michael_mic", 0);
-	if (priv->tfm_michael == NULL) {
+	priv->tfm_michael = crypto_alloc_hash("michael_mic", 0,
+					      CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tfm_michael)) {
 		printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
 		       "crypto API michael_mic\n");
+		priv->tfm_michael = NULL;
 		goto fail;
 	}
 
@@ -107,7 +109,7 @@ static void *ieee80211_tkip_init(int key_idx)
       fail:
 	if (priv) {
 		if (priv->tfm_michael)
-			crypto_free_tfm(priv->tfm_michael);
+			crypto_free_hash(priv->tfm_michael);
 		if (priv->tfm_arc4)
 			crypto_free_blkcipher(priv->tfm_arc4);
 		kfree(priv);
@@ -120,7 +122,7 @@ static void ieee80211_tkip_deinit(void *priv)
 {
 	struct ieee80211_tkip_data *_priv = priv;
 	if (_priv && _priv->tfm_michael)
-		crypto_free_tfm(_priv->tfm_michael);
+		crypto_free_hash(_priv->tfm_michael);
 	if (_priv && _priv->tfm_arc4)
 		crypto_free_blkcipher(_priv->tfm_arc4);
 	kfree(priv);
@@ -485,6 +487,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr,
 		       u8 * data, size_t data_len, u8 * mic)
 {
+	struct hash_desc desc;
 	struct scatterlist sg[2];
 
 	if (tkey->tfm_michael == NULL) {
@@ -499,12 +502,12 @@ static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr,
 	sg[1].offset = offset_in_page(data);
 	sg[1].length = data_len;
 
-	crypto_digest_init(tkey->tfm_michael);
-	crypto_digest_setkey(tkey->tfm_michael, key, 8);
-	crypto_digest_update(tkey->tfm_michael, sg, 2);
-	crypto_digest_final(tkey->tfm_michael, mic);
+	if (crypto_hash_setkey(tkey->tfm_michael, key, 8))
+		return -1;
 
-	return 0;
+	desc.tfm = tkey->tfm_michael;
+	desc.flags = 0;
+	return crypto_hash_digest(&desc, sg, data_len + 16, mic);
 }
 
 static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
@@ -628,7 +631,7 @@ static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
 {
 	struct ieee80211_tkip_data *tkey = priv;
 	int keyidx;
-	struct crypto_tfm *tfm = tkey->tfm_michael;
+	struct crypto_hash *tfm = tkey->tfm_michael;
 	struct crypto_blkcipher *tfm2 = tkey->tfm_arc4;
 
 	keyidx = tkey->key_idx;
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 57192dfe30656..e11a40b25cce6 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,6 +34,7 @@
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  */
 
+#include <linux/err.h>
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
@@ -199,11 +200,9 @@ process_xdr_buf(struct xdr_buf *buf, int offset, int len,
 static int
 checksummer(struct scatterlist *sg, void *data)
 {
-	struct crypto_tfm *tfm = (struct crypto_tfm *)data;
+	struct hash_desc *desc = data;
 
-	crypto_digest_update(tfm, sg, 1);
-
-	return 0;
+	return crypto_hash_update(desc, sg, sg->length);
 }
 
 /* checksum the plaintext data and hdrlen bytes of the token header */
@@ -212,8 +211,9 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
 		   int body_offset, struct xdr_netobj *cksum)
 {
 	char                            *cksumname;
-	struct crypto_tfm               *tfm = NULL; /* XXX add to ctx? */
+	struct hash_desc                desc; /* XXX add to ctx? */
 	struct scatterlist              sg[1];
+	int err;
 
 	switch (cksumtype) {
 		case CKSUMTYPE_RSA_MD5:
@@ -224,18 +224,28 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
 				" unsupported checksum %d", cksumtype);
 			return GSS_S_FAILURE;
 	}
-	if (!(tfm = crypto_alloc_tfm(cksumname, CRYPTO_TFM_REQ_MAY_SLEEP)))
+	desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm))
 		return GSS_S_FAILURE;
-	cksum->len = crypto_tfm_alg_digestsize(tfm);
+	cksum->len = crypto_hash_digestsize(desc.tfm);
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
-	crypto_digest_init(tfm);
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
 	sg_set_buf(sg, header, hdrlen);
-	crypto_digest_update(tfm, sg, 1);
-	process_xdr_buf(body, body_offset, body->len - body_offset,
-			checksummer, tfm);
-	crypto_digest_final(tfm, cksum->data);
-	crypto_free_tfm(tfm);
-	return 0;
+	err = crypto_hash_update(&desc, sg, hdrlen);
+	if (err)
+		goto out;
+	err = process_xdr_buf(body, body_offset, body->len - body_offset,
+			      checksummer, &desc);
+	if (err)
+		goto out;
+	err = crypto_hash_final(&desc, cksum->data);
+
+out:
+	crypto_free_hash(desc.tfm);
+	return err ? GSS_S_FAILURE : 0;
 }
 
 EXPORT_SYMBOL(make_checksum);
diff --git a/security/seclvl.c b/security/seclvl.c
index c26dd7de04712..8f6291991fbcb 100644
--- a/security/seclvl.c
+++ b/security/seclvl.c
@@ -16,6 +16,7 @@
  *	(at your option) any later version.
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kernel.h>
@@ -197,26 +198,27 @@ static unsigned char hashedPassword[SHA1_DIGEST_SIZE];
 static int
 plaintext_to_sha1(unsigned char *hash, const char *plaintext, unsigned int len)
 {
-	struct crypto_tfm *tfm;
+	struct hash_desc desc;
 	struct scatterlist sg;
+	int err;
+
 	if (len > PAGE_SIZE) {
 		seclvl_printk(0, KERN_ERR, "Plaintext password too large (%d "
 			      "characters).  Largest possible is %lu "
 			      "bytes.\n", len, PAGE_SIZE);
 		return -EINVAL;
 	}
-	tfm = crypto_alloc_tfm("sha1", CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (tfm == NULL) {
+	desc.tfm = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm)) {
 		seclvl_printk(0, KERN_ERR,
 			      "Failed to load transform for SHA1\n");
 		return -EINVAL;
 	}
 	sg_init_one(&sg, (u8 *)plaintext, len);
-	crypto_digest_init(tfm);
-	crypto_digest_update(tfm, &sg, 1);
-	crypto_digest_final(tfm, hash);
-	crypto_free_tfm(tfm);
-	return 0;
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = crypto_hash_digest(&desc, &sg, len, hash);
+	crypto_free_hash(desc.tfm);
+	return err;
 }
 
 /**
-- 
GitLab


From fce32d70ba834129b164c40c2d4260e5a7a7d850 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 26 Aug 2006 17:35:45 +1000
Subject: [PATCH 0334/1063] [CRYPTO] api: Add crypto_comp and crypto_has_*

This patch adds the crypto_comp type to complete the compile-time checking
conversion.  The functions crypto_has_alg and crypto_has_cipher, etc. are
also added to replace crypto_alg_available.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/api.c           | 14 +++++++
 include/linux/crypto.h | 90 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 98 insertions(+), 6 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index edaa843d8e83b..2e84d4b547902 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -482,3 +482,17 @@ int crypto_alg_available(const char *name, u32 flags)
 EXPORT_SYMBOL_GPL(crypto_alloc_tfm);
 EXPORT_SYMBOL_GPL(crypto_free_tfm);
 EXPORT_SYMBOL_GPL(crypto_alg_available);
+
+int crypto_has_alg(const char *name, u32 type, u32 mask)
+{
+	int ret = 0;
+	struct crypto_alg *alg = crypto_alg_mod_lookup(name, type, mask);
+	
+	if (!IS_ERR(alg)) {
+		crypto_mod_put(alg);
+		ret = 1;
+	}
+	
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_has_alg);
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 929fb9ad13147..cf91c4c0638bb 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -236,11 +236,17 @@ int crypto_unregister_alg(struct crypto_alg *alg);
  */
 #ifdef CONFIG_CRYPTO
 int crypto_alg_available(const char *name, u32 flags);
+int crypto_has_alg(const char *name, u32 type, u32 mask);
 #else
 static inline int crypto_alg_available(const char *name, u32 flags)
 {
 	return 0;
 }
+
+static inline int crypto_has_alg(const char *name, u32 type, u32 mask)
+{
+	return 0;
+}
 #endif
 
 /*
@@ -329,6 +335,7 @@ struct crypto_tfm {
 };
 
 #define crypto_cipher crypto_tfm
+#define crypto_comp crypto_tfm
 
 struct crypto_blkcipher {
 	struct crypto_tfm base;
@@ -485,6 +492,15 @@ static inline void crypto_free_blkcipher(struct crypto_blkcipher *tfm)
 	crypto_free_tfm(crypto_blkcipher_tfm(tfm));
 }
 
+static inline int crypto_has_blkcipher(const char *alg_name, u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_BLKCIPHER;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return crypto_has_alg(alg_name, type, mask);
+}
+
 static inline const char *crypto_blkcipher_name(struct crypto_blkcipher *tfm)
 {
 	return crypto_tfm_alg_name(crypto_blkcipher_tfm(tfm));
@@ -620,6 +636,15 @@ static inline void crypto_free_cipher(struct crypto_cipher *tfm)
 	crypto_free_tfm(crypto_cipher_tfm(tfm));
 }
 
+static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_CIPHER;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return crypto_has_alg(alg_name, type, mask);
+}
+
 static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm)
 {
 	return &crypto_cipher_tfm(tfm)->crt_cipher;
@@ -718,6 +743,15 @@ static inline void crypto_free_hash(struct crypto_hash *tfm)
 	crypto_free_tfm(crypto_hash_tfm(tfm));
 }
 
+static inline int crypto_has_hash(const char *alg_name, u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_HASH;
+	mask |= CRYPTO_ALG_TYPE_HASH_MASK;
+
+	return crypto_has_alg(alg_name, type, mask);
+}
+
 static inline struct hash_tfm *crypto_hash_crt(struct crypto_hash *tfm)
 {
 	return &crypto_hash_tfm(tfm)->crt_hash;
@@ -853,20 +887,64 @@ static inline void crypto_cipher_get_iv(struct crypto_tfm *tfm,
 	memcpy(dst, tfm->crt_cipher.cit_iv, len);
 }
 
-static inline int crypto_comp_compress(struct crypto_tfm *tfm,
+static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm)
+{
+	return (struct crypto_comp *)tfm;
+}
+
+static inline struct crypto_comp *crypto_comp_cast(struct crypto_tfm *tfm)
+{
+	BUG_ON((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_COMPRESS) &
+	       CRYPTO_ALG_TYPE_MASK);
+	return __crypto_comp_cast(tfm);
+}
+
+static inline struct crypto_comp *crypto_alloc_comp(const char *alg_name,
+						    u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_COMPRESS;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return __crypto_comp_cast(crypto_alloc_base(alg_name, type, mask));
+}
+
+static inline struct crypto_tfm *crypto_comp_tfm(struct crypto_comp *tfm)
+{
+	return tfm;
+}
+
+static inline void crypto_free_comp(struct crypto_comp *tfm)
+{
+	crypto_free_tfm(crypto_comp_tfm(tfm));
+}
+
+static inline int crypto_has_comp(const char *alg_name, u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_COMPRESS;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return crypto_has_alg(alg_name, type, mask);
+}
+
+static inline struct compress_tfm *crypto_comp_crt(struct crypto_comp *tfm)
+{
+	return &crypto_comp_tfm(tfm)->crt_compress;
+}
+
+static inline int crypto_comp_compress(struct crypto_comp *tfm,
                                        const u8 *src, unsigned int slen,
                                        u8 *dst, unsigned int *dlen)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_COMPRESS);
-	return tfm->crt_compress.cot_compress(tfm, src, slen, dst, dlen);
+	return crypto_comp_crt(tfm)->cot_compress(tfm, src, slen, dst, dlen);
 }
 
-static inline int crypto_comp_decompress(struct crypto_tfm *tfm,
+static inline int crypto_comp_decompress(struct crypto_comp *tfm,
                                          const u8 *src, unsigned int slen,
                                          u8 *dst, unsigned int *dlen)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_COMPRESS);
-	return tfm->crt_compress.cot_decompress(tfm, src, slen, dst, dlen);
+	return crypto_comp_crt(tfm)->cot_decompress(tfm, src, slen, dst, dlen);
 }
 
 #endif	/* _LINUX_CRYPTO_H */
-- 
GitLab


From e4d5b79c661c7cfca9d8d5afd040a295f128d3cb Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 26 Aug 2006 18:12:40 +1000
Subject: [PATCH 0335/1063] [CRYPTO] users: Use crypto_comp and crypto_has_*

This patch converts all users to use the new crypto_comp type and the
crypto_has_* functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c          |  8 ++++----
 drivers/crypto/padlock.c |  6 +++---
 drivers/net/ppp_mppe.c   |  4 ++--
 include/linux/crypto.h   |  5 +++++
 include/net/ipcomp.h     |  5 ++---
 net/ipv4/ipcomp.c        | 25 +++++++++++++------------
 net/ipv6/ipcomp6.c       | 25 +++++++++++++------------
 net/xfrm/xfrm_algo.c     | 27 ++++++++++++++++++---------
 8 files changed, 60 insertions(+), 45 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 840ab8be0b967..83307420d31c8 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -749,7 +749,7 @@ static void test_deflate(void)
 {
 	unsigned int i;
 	char result[COMP_BUF_SIZE];
-	struct crypto_tfm *tfm;
+	struct crypto_comp *tfm;
 	struct comp_testvec *tv;
 	unsigned int tsize;
 
@@ -821,7 +821,7 @@ static void test_deflate(void)
 		       ilen, dlen);
 	}
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_comp(tfm);
 }
 
 static void test_available(void)
@@ -830,8 +830,8 @@ static void test_available(void)
 
 	while (*name) {
 		printk("alg %s ", *name);
-		printk((crypto_alg_available(*name, 0)) ?
-			"found\n" : "not found\n");
+		printk(crypto_has_alg(*name, 0, CRYPTO_ALG_ASYNC) ?
+		       "found\n" : "not found\n");
 		name++;
 	}
 }
diff --git a/drivers/crypto/padlock.c b/drivers/crypto/padlock.c
index ce581684f4b4a..d6d7dd5bb98c1 100644
--- a/drivers/crypto/padlock.c
+++ b/drivers/crypto/padlock.c
@@ -26,13 +26,13 @@ static int __init padlock_init(void)
 {
 	int success = 0;
 
-	if (crypto_alg_available("aes-padlock", 0))
+	if (crypto_has_cipher("aes-padlock", 0, 0))
 		success++;
 
-	if (crypto_alg_available("sha1-padlock", 0))
+	if (crypto_has_hash("sha1-padlock", 0, 0))
 		success++;
 
-	if (crypto_alg_available("sha256-padlock", 0))
+	if (crypto_has_hash("sha256-padlock", 0, 0))
 		success++;
 
 	if (!success) {
diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c
index e7a0eb4fca603..f3655fd772f5b 100644
--- a/drivers/net/ppp_mppe.c
+++ b/drivers/net/ppp_mppe.c
@@ -710,8 +710,8 @@ static struct compressor ppp_mppe = {
 static int __init ppp_mppe_init(void)
 {
 	int answer;
-	if (!(crypto_alg_available("ecb(arc4)", 0) &&
-	      crypto_alg_available("sha1", 0)))
+	if (!(crypto_has_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC) &&
+	      crypto_has_hash("sha1", 0, CRYPTO_ALG_ASYNC)))
 		return -ENODEV;
 
 	sha_pad = kmalloc(sizeof(struct sha_pad), GFP_KERNEL);
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index cf91c4c0638bb..d4f9948b64b11 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -928,6 +928,11 @@ static inline int crypto_has_comp(const char *alg_name, u32 type, u32 mask)
 	return crypto_has_alg(alg_name, type, mask);
 }
 
+static inline const char *crypto_comp_name(struct crypto_comp *tfm)
+{
+	return crypto_tfm_alg_name(crypto_comp_tfm(tfm));
+}
+
 static inline struct compress_tfm *crypto_comp_crt(struct crypto_comp *tfm)
 {
 	return &crypto_comp_tfm(tfm)->crt_compress;
diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h
index b94e3047b4d9b..87c1af3e5e824 100644
--- a/include/net/ipcomp.h
+++ b/include/net/ipcomp.h
@@ -1,15 +1,14 @@
 #ifndef _NET_IPCOMP_H
 #define _NET_IPCOMP_H
 
+#include <linux/crypto.h>
 #include <linux/types.h>
 
 #define IPCOMP_SCRATCH_SIZE     65400
 
-struct crypto_tfm;
-
 struct ipcomp_data {
 	u16 threshold;
-	struct crypto_tfm **tfms;
+	struct crypto_comp **tfms;
 };
 
 #endif
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index a0c28b2b756e0..5bb9c9f03fb6d 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -32,7 +32,7 @@
 
 struct ipcomp_tfms {
 	struct list_head list;
-	struct crypto_tfm **tfms;
+	struct crypto_comp **tfms;
 	int users;
 };
 
@@ -46,7 +46,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 	int err, plen, dlen;
 	struct ipcomp_data *ipcd = x->data;
 	u8 *start, *scratch;
-	struct crypto_tfm *tfm;
+	struct crypto_comp *tfm;
 	int cpu;
 	
 	plen = skb->len;
@@ -107,7 +107,7 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
 	struct iphdr *iph = skb->nh.iph;
 	struct ipcomp_data *ipcd = x->data;
 	u8 *start, *scratch;
-	struct crypto_tfm *tfm;
+	struct crypto_comp *tfm;
 	int cpu;
 	
 	ihlen = iph->ihl * 4;
@@ -302,7 +302,7 @@ static void **ipcomp_alloc_scratches(void)
 	return scratches;
 }
 
-static void ipcomp_free_tfms(struct crypto_tfm **tfms)
+static void ipcomp_free_tfms(struct crypto_comp **tfms)
 {
 	struct ipcomp_tfms *pos;
 	int cpu;
@@ -324,28 +324,28 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms)
 		return;
 
 	for_each_possible_cpu(cpu) {
-		struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
-		crypto_free_tfm(tfm);
+		struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
+		crypto_free_comp(tfm);
 	}
 	free_percpu(tfms);
 }
 
-static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name)
+static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
 {
 	struct ipcomp_tfms *pos;
-	struct crypto_tfm **tfms;
+	struct crypto_comp **tfms;
 	int cpu;
 
 	/* This can be any valid CPU ID so we don't need locking. */
 	cpu = raw_smp_processor_id();
 
 	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
-		struct crypto_tfm *tfm;
+		struct crypto_comp *tfm;
 
 		tfms = pos->tfms;
 		tfm = *per_cpu_ptr(tfms, cpu);
 
-		if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
+		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
 			pos->users++;
 			return tfms;
 		}
@@ -359,12 +359,13 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name)
 	INIT_LIST_HEAD(&pos->list);
 	list_add(&pos->list, &ipcomp_tfms_list);
 
-	pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
+	pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
 	if (!tfms)
 		goto error;
 
 	for_each_possible_cpu(cpu) {
-		struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
+		struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
+							    CRYPTO_ALG_ASYNC);
 		if (!tfm)
 			goto error;
 		*per_cpu_ptr(tfms, cpu) = tfm;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 7e4d1c17bfbc1..a81e9e9d93bd2 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -53,7 +53,7 @@
 
 struct ipcomp6_tfms {
 	struct list_head list;
-	struct crypto_tfm **tfms;
+	struct crypto_comp **tfms;
 	int users;
 };
 
@@ -70,7 +70,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	int plen, dlen;
 	struct ipcomp_data *ipcd = x->data;
 	u8 *start, *scratch;
-	struct crypto_tfm *tfm;
+	struct crypto_comp *tfm;
 	int cpu;
 
 	if (skb_linearize_cow(skb))
@@ -129,7 +129,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	struct ipcomp_data *ipcd = x->data;
 	int plen, dlen;
 	u8 *start, *scratch;
-	struct crypto_tfm *tfm;
+	struct crypto_comp *tfm;
 	int cpu;
 
 	hdr_len = skb->h.raw - skb->data;
@@ -301,7 +301,7 @@ static void **ipcomp6_alloc_scratches(void)
 	return scratches;
 }
 
-static void ipcomp6_free_tfms(struct crypto_tfm **tfms)
+static void ipcomp6_free_tfms(struct crypto_comp **tfms)
 {
 	struct ipcomp6_tfms *pos;
 	int cpu;
@@ -323,28 +323,28 @@ static void ipcomp6_free_tfms(struct crypto_tfm **tfms)
 		return;
 
 	for_each_possible_cpu(cpu) {
-		struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
-		crypto_free_tfm(tfm);
+		struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
+		crypto_free_comp(tfm);
 	}
 	free_percpu(tfms);
 }
 
-static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name)
+static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name)
 {
 	struct ipcomp6_tfms *pos;
-	struct crypto_tfm **tfms;
+	struct crypto_comp **tfms;
 	int cpu;
 
 	/* This can be any valid CPU ID so we don't need locking. */
 	cpu = raw_smp_processor_id();
 
 	list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
-		struct crypto_tfm *tfm;
+		struct crypto_comp *tfm;
 
 		tfms = pos->tfms;
 		tfm = *per_cpu_ptr(tfms, cpu);
 
-		if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
+		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
 			pos->users++;
 			return tfms;
 		}
@@ -358,12 +358,13 @@ static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name)
 	INIT_LIST_HEAD(&pos->list);
 	list_add(&pos->list, &ipcomp6_tfms_list);
 
-	pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
+	pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
 	if (!tfms)
 		goto error;
 
 	for_each_possible_cpu(cpu) {
-		struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
+		struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
+							    CRYPTO_ALG_ASYNC);
 		if (!tfm)
 			goto error;
 		*per_cpu_ptr(tfms, cpu) = tfm;
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 87918f281bb43..5a0dbeb6bbe81 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -363,8 +363,8 @@ struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id)
 EXPORT_SYMBOL_GPL(xfrm_calg_get_byid);
 
 static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list,
-					      int entries, char *name,
-					      int probe)
+					      int entries, u32 type, u32 mask,
+					      char *name, int probe)
 {
 	int i, status;
 
@@ -382,7 +382,7 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list,
 		if (!probe)
 			break;
 
-		status = crypto_alg_available(name, 0);
+		status = crypto_has_alg(name, type, mask | CRYPTO_ALG_ASYNC);
 		if (!status)
 			break;
 
@@ -394,19 +394,25 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list,
 
 struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe)
 {
-	return xfrm_get_byname(aalg_list, aalg_entries(), name, probe);
+	return xfrm_get_byname(aalg_list, aalg_entries(),
+			       CRYPTO_ALG_TYPE_HASH, CRYPTO_ALG_TYPE_HASH_MASK,
+			       name, probe);
 }
 EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname);
 
 struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe)
 {
-	return xfrm_get_byname(ealg_list, ealg_entries(), name, probe);
+	return xfrm_get_byname(ealg_list, ealg_entries(),
+			       CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_MASK,
+			       name, probe);
 }
 EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname);
 
 struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe)
 {
-	return xfrm_get_byname(calg_list, calg_entries(), name, probe);
+	return xfrm_get_byname(calg_list, calg_entries(),
+			       CRYPTO_ALG_TYPE_COMPRESS, CRYPTO_ALG_TYPE_MASK,
+			       name, probe);
 }
 EXPORT_SYMBOL_GPL(xfrm_calg_get_byname);
 
@@ -441,19 +447,22 @@ void xfrm_probe_algs(void)
 	BUG_ON(in_softirq());
 
 	for (i = 0; i < aalg_entries(); i++) {
-		status = crypto_alg_available(aalg_list[i].name, 0);
+		status = crypto_has_hash(aalg_list[i].name, 0,
+					 CRYPTO_ALG_ASYNC);
 		if (aalg_list[i].available != status)
 			aalg_list[i].available = status;
 	}
 	
 	for (i = 0; i < ealg_entries(); i++) {
-		status = crypto_alg_available(ealg_list[i].name, 0);
+		status = crypto_has_blkcipher(ealg_list[i].name, 0,
+					      CRYPTO_ALG_ASYNC);
 		if (ealg_list[i].available != status)
 			ealg_list[i].available = status;
 	}
 	
 	for (i = 0; i < calg_entries(); i++) {
-		status = crypto_alg_available(calg_list[i].name, 0);
+		status = crypto_has_comp(calg_list[i].name, 0,
+					 CRYPTO_ALG_ASYNC);
 		if (calg_list[i].available != status)
 			calg_list[i].available = status;
 	}
-- 
GitLab


From 6010439f47e6b308c031dad7d99686030ef942dd Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 26 Aug 2006 18:34:10 +1000
Subject: [PATCH 0336/1063] [CRYPTO] padlock: Convert padlock-sha to use
 crypto_hash

This patch converts padlock-sha to use crypto_hash for its fallback.
It also changes the fallback selection to use selection by type instead
of name.  This is done through the new CRYPTO_ALG_NEED_FALLBACK bit,
which is set if and only if an algorithm needs a fallback of the same
type.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/padlock-sha.c | 91 ++++++++++++++----------------------
 include/linux/crypto.h       |  6 +++
 2 files changed, 41 insertions(+), 56 deletions(-)

diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index b028db61c3010..a781fd23b607d 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -12,10 +12,11 @@
  *
  */
 
+#include <crypto/algapi.h>
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/errno.h>
-#include <linux/crypto.h>
 #include <linux/cryptohash.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
@@ -30,28 +31,17 @@
 #define SHA256_DIGEST_SIZE      32
 #define SHA256_HMAC_BLOCK_SIZE  64
 
-static char *sha1_fallback = SHA1_DEFAULT_FALLBACK;
-static char *sha256_fallback = SHA256_DEFAULT_FALLBACK;
-
-module_param(sha1_fallback, charp, 0644);
-module_param(sha256_fallback, charp, 0644);
-
-MODULE_PARM_DESC(sha1_fallback, "Fallback driver for SHA1. Default is "
-		 SHA1_DEFAULT_FALLBACK);
-MODULE_PARM_DESC(sha256_fallback, "Fallback driver for SHA256. Default is "
-		 SHA256_DEFAULT_FALLBACK);
-
 struct padlock_sha_ctx {
 	char		*data;
 	size_t		used;
 	int		bypass;
 	void (*f_sha_padlock)(const char *in, char *out, int count);
-	struct crypto_tfm *fallback_tfm;
+	struct hash_desc fallback;
 };
 
 static inline struct padlock_sha_ctx *ctx(struct crypto_tfm *tfm)
 {
-	return (struct padlock_sha_ctx *)(crypto_tfm_ctx(tfm));
+	return crypto_tfm_ctx(tfm);
 }
 
 /* We'll need aligned address on the stack */
@@ -65,14 +55,12 @@ static void padlock_sha_bypass(struct crypto_tfm *tfm)
 	if (ctx(tfm)->bypass)
 		return;
 
-	BUG_ON(!ctx(tfm)->fallback_tfm);
-
-	crypto_digest_init(ctx(tfm)->fallback_tfm);
+	crypto_hash_init(&ctx(tfm)->fallback);
 	if (ctx(tfm)->data && ctx(tfm)->used) {
 		struct scatterlist sg;
 
 		sg_set_buf(&sg, ctx(tfm)->data, ctx(tfm)->used);
-		crypto_digest_update(ctx(tfm)->fallback_tfm, &sg, 1);
+		crypto_hash_update(&ctx(tfm)->fallback, &sg, sg.length);
 	}
 
 	ctx(tfm)->used = 0;
@@ -95,9 +83,8 @@ static void padlock_sha_update(struct crypto_tfm *tfm,
 
 	if (unlikely(ctx(tfm)->bypass)) {
 		struct scatterlist sg;
-		BUG_ON(!ctx(tfm)->fallback_tfm);
 		sg_set_buf(&sg, (uint8_t *)data, length);
-		crypto_digest_update(ctx(tfm)->fallback_tfm, &sg, 1);
+		crypto_hash_update(&ctx(tfm)->fallback, &sg, length);
 		return;
 	}
 
@@ -160,8 +147,7 @@ static void padlock_do_sha256(const char *in, char *out, int count)
 static void padlock_sha_final(struct crypto_tfm *tfm, uint8_t *out)
 {
 	if (unlikely(ctx(tfm)->bypass)) {
-		BUG_ON(!ctx(tfm)->fallback_tfm);
-		crypto_digest_final(ctx(tfm)->fallback_tfm, out);
+		crypto_hash_final(&ctx(tfm)->fallback, out);
 		ctx(tfm)->bypass = 0;
 		return;
 	}
@@ -172,8 +158,11 @@ static void padlock_sha_final(struct crypto_tfm *tfm, uint8_t *out)
 	ctx(tfm)->used = 0;
 }
 
-static int padlock_cra_init(struct crypto_tfm *tfm, const char *fallback_driver_name)
+static int padlock_cra_init(struct crypto_tfm *tfm)
 {
+	const char *fallback_driver_name = tfm->__crt_alg->cra_name;
+	struct crypto_hash *fallback_tfm;
+
 	/* For now we'll allocate one page. This
 	 * could eventually be configurable one day. */
 	ctx(tfm)->data = (char *)__get_free_page(GFP_KERNEL);
@@ -181,14 +170,17 @@ static int padlock_cra_init(struct crypto_tfm *tfm, const char *fallback_driver_
 		return -ENOMEM;
 
 	/* Allocate a fallback and abort if it failed. */
-	ctx(tfm)->fallback_tfm = crypto_alloc_tfm(fallback_driver_name, 0);
-	if (!ctx(tfm)->fallback_tfm) {
+	fallback_tfm = crypto_alloc_hash(fallback_driver_name, 0,
+					 CRYPTO_ALG_ASYNC |
+					 CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm)) {
 		printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
 		       fallback_driver_name);
 		free_page((unsigned long)(ctx(tfm)->data));
-		return -ENOENT;
+		return PTR_ERR(fallback_tfm);
 	}
 
+	ctx(tfm)->fallback.tfm = fallback_tfm;
 	return 0;
 }
 
@@ -196,14 +188,14 @@ static int padlock_sha1_cra_init(struct crypto_tfm *tfm)
 {
 	ctx(tfm)->f_sha_padlock = padlock_do_sha1;
 
-	return padlock_cra_init(tfm, sha1_fallback);
+	return padlock_cra_init(tfm);
 }
 
 static int padlock_sha256_cra_init(struct crypto_tfm *tfm)
 {
 	ctx(tfm)->f_sha_padlock = padlock_do_sha256;
 
-	return padlock_cra_init(tfm, sha256_fallback);
+	return padlock_cra_init(tfm);
 }
 
 static void padlock_cra_exit(struct crypto_tfm *tfm)
@@ -213,16 +205,16 @@ static void padlock_cra_exit(struct crypto_tfm *tfm)
 		ctx(tfm)->data = NULL;
 	}
 
-	BUG_ON(!ctx(tfm)->fallback_tfm);
-	crypto_free_tfm(ctx(tfm)->fallback_tfm);
-	ctx(tfm)->fallback_tfm = NULL;
+	crypto_free_hash(ctx(tfm)->fallback.tfm);
+	ctx(tfm)->fallback.tfm = NULL;
 }
 
 static struct crypto_alg sha1_alg = {
 	.cra_name		=	"sha1",
 	.cra_driver_name	=	"sha1-padlock",
 	.cra_priority		=	PADLOCK_CRA_PRIORITY,
-	.cra_flags		=	CRYPTO_ALG_TYPE_DIGEST,
+	.cra_flags		=	CRYPTO_ALG_TYPE_DIGEST |
+					CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		=	SHA1_HMAC_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
 	.cra_module		=	THIS_MODULE,
@@ -243,7 +235,8 @@ static struct crypto_alg sha256_alg = {
 	.cra_name		=	"sha256",
 	.cra_driver_name	=	"sha256-padlock",
 	.cra_priority		=	PADLOCK_CRA_PRIORITY,
-	.cra_flags		=	CRYPTO_ALG_TYPE_DIGEST,
+	.cra_flags		=	CRYPTO_ALG_TYPE_DIGEST |
+					CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		=	SHA256_HMAC_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
 	.cra_module		=	THIS_MODULE,
@@ -262,29 +255,15 @@ static struct crypto_alg sha256_alg = {
 
 static void __init padlock_sha_check_fallbacks(void)
 {
-	struct crypto_tfm *tfm;
-
-	/* We'll try to allocate one TFM for each fallback
-	 * to test that the modules are available. */
-	tfm = crypto_alloc_tfm(sha1_fallback, 0);
-	if (!tfm) {
-		printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n",
-		       sha1_alg.cra_name, sha1_fallback);
-	} else {
-		printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha1_alg.cra_name,
-		       crypto_tfm_alg_driver_name(tfm), crypto_tfm_alg_priority(tfm));
-		crypto_free_tfm(tfm);
-	}
-
-	tfm = crypto_alloc_tfm(sha256_fallback, 0);
-	if (!tfm) {
-		printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n",
-		       sha256_alg.cra_name, sha256_fallback);
-	} else {
-		printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha256_alg.cra_name,
-		       crypto_tfm_alg_driver_name(tfm), crypto_tfm_alg_priority(tfm));
-		crypto_free_tfm(tfm);
-	}
+	if (!crypto_has_hash("sha1", 0, CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_NEED_FALLBACK))
+		printk(KERN_WARNING PFX
+		       "Couldn't load fallback module for sha1.\n");
+
+	if (!crypto_has_hash("sha256", 0, CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_NEED_FALLBACK))
+		printk(KERN_WARNING PFX
+		       "Couldn't load fallback module for sha256.\n");
 }
 
 static int __init padlock_init(void)
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index d4f9948b64b11..187c6ea919595 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -42,6 +42,12 @@
 #define CRYPTO_ALG_DYING		0x00000040
 #define CRYPTO_ALG_ASYNC		0x00000080
 
+/*
+ * Set this bit if and only if the algorithm requires another algorithm of
+ * the same type to handle corner cases.
+ */
+#define CRYPTO_ALG_NEED_FALLBACK	0x00000100
+
 /*
  * Transform masks and values (for crt_flags).
  */
-- 
GitLab


From 3ad819c61f5f8347f39cdcbe652b3c60ec615888 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 26 Aug 2006 18:44:31 +1000
Subject: [PATCH 0337/1063] [CRYPTO] api: Deprecate crypto_digest_* and
 crypto_alg_available

This patch marks the crypto_digest_* functions and crypto_alg_available
as deprecated.  They've been replaced by crypto_hash_* and crypto_has_*
respectively.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 187c6ea919595..8f2ffa4caabfa 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -241,9 +241,12 @@ int crypto_unregister_alg(struct crypto_alg *alg);
  * Algorithm query interface.
  */
 #ifdef CONFIG_CRYPTO
-int crypto_alg_available(const char *name, u32 flags);
+int crypto_alg_available(const char *name, u32 flags)
+	__deprecated_for_modules;
 int crypto_has_alg(const char *name, u32 type, u32 mask);
 #else
+static int crypto_alg_available(const char *name, u32 flags);
+	__deprecated_for_modules;
 static inline int crypto_alg_available(const char *name, u32 flags)
 {
 	return 0;
@@ -704,12 +707,15 @@ static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm,
 						dst, src);
 }
 
-void crypto_digest_init(struct crypto_tfm *tfm);
+void crypto_digest_init(struct crypto_tfm *tfm) __deprecated_for_modules;
 void crypto_digest_update(struct crypto_tfm *tfm,
-			  struct scatterlist *sg, unsigned int nsg);
-void crypto_digest_final(struct crypto_tfm *tfm, u8 *out);
+			  struct scatterlist *sg, unsigned int nsg)
+	__deprecated_for_modules;
+void crypto_digest_final(struct crypto_tfm *tfm, u8 *out)
+	__deprecated_for_modules;
 void crypto_digest_digest(struct crypto_tfm *tfm,
-			  struct scatterlist *sg, unsigned int nsg, u8 *out);
+			  struct scatterlist *sg, unsigned int nsg, u8 *out)
+	__deprecated_for_modules;
 
 static inline struct crypto_hash *__crypto_hash_cast(struct crypto_tfm *tfm)
 {
@@ -723,6 +729,8 @@ static inline struct crypto_hash *crypto_hash_cast(struct crypto_tfm *tfm)
 	return __crypto_hash_cast(tfm);
 }
 
+static int crypto_digest_setkey(struct crypto_tfm *tfm, const u8 *key,
+				unsigned int keylen) __deprecated;
 static inline int crypto_digest_setkey(struct crypto_tfm *tfm,
                                        const u8 *key, unsigned int keylen)
 {
-- 
GitLab


From 3c164bd8153c4644a22dc2101b003c67cd2a0d0a Mon Sep 17 00:00:00 2001
From: Rik Snel <rsnel@cube.dyndns.org>
Date: Sat, 2 Sep 2006 18:17:33 +1000
Subject: [PATCH 0338/1063] [BLOCK] dm-crypt: trivial comment improvements

Just some minor comment nits.

- little-endian is better than low-endian
- and since it is called essiv everywere it should also be essiv
  in the comments (and not ess_iv)

Signed-off-by: Rik Snel <rsnel@cube.dyndns.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/md/dm-crypt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 73f8be837a458..bdbd34993a80c 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -99,12 +99,12 @@ static kmem_cache_t *_crypt_io_pool;
 /*
  * Different IV generation algorithms:
  *
- * plain: the initial vector is the 32-bit low-endian version of the sector
+ * plain: the initial vector is the 32-bit little-endian version of the sector
  *        number, padded with zeros if neccessary.
  *
- * ess_iv: "encrypted sector|salt initial vector", the sector number is
- *         encrypted with the bulk cipher using a salt as key. The salt
- *         should be derived from the bulk cipher's key via hashing.
+ * essiv: "encrypted sector|salt initial vector", the sector number is
+ *        encrypted with the bulk cipher using a salt as key. The salt
+ *        should be derived from the bulk cipher's key via hashing.
  *
  * plumb: unimplemented, see:
  * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454
-- 
GitLab


From 8ce90907ea534f10075a9eba5f83d6dd77b39cb6 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Thu, 21 Sep 2006 00:06:21 -0400
Subject: [PATCH 0339/1063] [netdrvr] lp486e: fix typo

inside #if 0'd code, but it bugged me.

Really, we should probably just delete the driver.

Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/net/lp486e.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c
index b783a6984abc0..393aba95cf127 100644
--- a/drivers/net/lp486e.c
+++ b/drivers/net/lp486e.c
@@ -442,16 +442,16 @@ init_rx_bufs(struct net_device *dev, int num) {
 		if (rbd) {
 			rbd->pad = 0;
 			rbd->count = 0;
-			rbd->skb = dev_alloc_skb(RX_SKB_SIZE);
+			rbd->skb = dev_alloc_skb(RX_SKBSIZE);
 			if (!rbd->skb) {
 				printk("dev_alloc_skb failed");
 			}
 			rbd->next = rfd->rbd;
 			if (i) {
 				rfd->rbd->prev = rbd;
-				rbd->size = RX_SKB_SIZE;
+				rbd->size = RX_SKBSIZE;
 			} else {
-				rbd->size = (RX_SKB_SIZE | RBD_EL);
+				rbd->size = (RX_SKBSIZE | RBD_EL);
 				lp->rbd_tail = rbd;
 			}
 
-- 
GitLab


From 54caf44da31995df1f51174468fd9e83ca5c67a2 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Thu, 21 Sep 2006 00:08:10 -0400
Subject: [PATCH 0340/1063] [netdrvr] mv643xx_eth: fix obvious typo, which
 caused build breakage

The last minute fix submitted by the author fixed a bug, but
broke the driver build.

Noticed by Al Viro, since I can't build on said platform.

Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/net/mv643xx_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index eeab1df5bef3f..59de3e74d2d76 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -385,7 +385,7 @@ static int mv643xx_eth_receive_queue(struct net_device *dev, int budget)
 	struct pkt_info pkt_info;
 
 	while (budget-- > 0 && eth_port_receive(mp, &pkt_info) == ETH_OK) {
-		dma_unmap_single(NULL, pkt_info.buf_ptr, RX_SKB_SIZE,
+		dma_unmap_single(NULL, pkt_info.buf_ptr, ETH_RX_SKB_SIZE,
 							DMA_FROM_DEVICE);
 		mp->rx_desc_count--;
 		received_packets++;
-- 
GitLab


From 2fe87f02a04ad6e7075023a87fe38eb458a4bb9d Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 21 Sep 2006 07:02:52 +0000
Subject: [PATCH 0341/1063] [CIFS] Support deep tree mounts (e.g. mounts to
 //server/share/path)

Samba bugzilla #4040

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES      |  7 ++++++-
 fs/cifs/cifs_fs_sb.h |  2 ++
 fs/cifs/cifsfs.h     |  2 +-
 fs/cifs/cifspdu.h    |  2 ++
 fs/cifs/connect.c    | 47 ++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/dir.c        | 23 ++++++++++++++--------
 fs/cifs/xattr.c      |  2 +-
 7 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 0feb3bd49cb85..1eb9a2ec0a3b2 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,7 @@
+Version 1.46
+------------
+Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
+
 Version 1.45
 ------------
 Do not time out lockw calls when using posix extensions. Do not
@@ -6,7 +10,8 @@ on requests on other threads.  Improve POSIX locking emulation,
 (lock cancel now works, and unlock of merged range works even
 to Windows servers now).  Fix oops on mount to lanman servers
 (win9x, os/2 etc.) when null password.  Do not send listxattr
-(SMB to query all EAs) if nouser_xattr specified.
+(SMB to query all EAs) if nouser_xattr specified.  Fix SE Linux
+problem (instantiate inodes/dentries in right order for readdir).
 
 Version 1.44
 ------------
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ad58eb0c4d6d8..fd1e52ebcee6c 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -40,5 +40,7 @@ struct cifs_sb_info {
 	mode_t	mnt_file_mode;
 	mode_t	mnt_dir_mode;
 	int     mnt_cifs_flags;
+	int	prepathlen;
+	char *  prepath;
 };
 #endif				/* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 39ee8ef3bdeb0..bea875d9a46ac 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@ extern ssize_t	cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
 extern int cifs_ioctl (struct inode * inode, struct file * filep,
 		       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.45"
+#define CIFS_VERSION   "1.46"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 86239023545b2..81df2bf8e75a7 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1344,6 +1344,7 @@ struct smb_t2_rsp {
 #define SMB_QUERY_ATTR_FLAGS            0x206  /* append,immutable etc. */
 #define SMB_QUERY_POSIX_PERMISSION      0x207
 #define SMB_QUERY_POSIX_LOCK            0x208
+/* #define SMB_POSIX_OPEN  		0x209 */
 #define SMB_QUERY_FILE_INTERNAL_INFO    0x3ee
 #define SMB_QUERY_FILE_ACCESS_INFO      0x3f0
 #define SMB_QUERY_FILE_NAME_INFO2       0x3f1 /* 0x30 bytes */
@@ -1363,6 +1364,7 @@ struct smb_t2_rsp {
 #define SMB_SET_XATTR                   0x205
 #define SMB_SET_ATTR_FLAGS              0x206  /* append, immutable etc. */
 #define SMB_SET_POSIX_LOCK              0x208
+#define SMB_POSIX_OPEN                  0x209
 #define SMB_SET_FILE_BASIC_INFO2        0x3ec
 #define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo level too */
 #define SMB_FILE_ALL_INFO2              0x3fa
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5d394c7268607..0e9ba0b9d71eb 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -89,6 +89,7 @@ struct smb_vol {
 	unsigned int wsize;
 	unsigned int sockopt;
 	unsigned short int port;
+	char * prepath;
 };
 
 static int ipv4_connect(struct sockaddr_in *psin_server, 
@@ -993,6 +994,28 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 				printk(KERN_WARNING "CIFS: domain name too long\n");
 				return 1;
 			}
+                } else if (strnicmp(data, "prefixpath", 10) == 0) {
+                        if (!value || !*value) {
+                                printk(KERN_WARNING
+                                       "CIFS: invalid path prefix\n");
+                                return 1;       /* needs_arg; */
+                        }
+                        if ((temp_len = strnlen(value, 1024)) < 1024) {
+				if(value[0] != '/')
+					temp_len++;  /* missing leading slash */
+                                vol->prepath = kmalloc(temp_len+1,GFP_KERNEL);
+                                if(vol->prepath == NULL)
+                                        return 1;
+				if(value[0] != '/') {
+					vol->prepath[0] = '/';
+	                                strcpy(vol->prepath+1,value);
+				} else
+					strcpy(vol->prepath,value);
+				cFYI(1,("prefix path %s",vol->prepath));
+                        } else {
+                                printk(KERN_WARNING "CIFS: prefix too long\n");
+                                return 1;
+                        }
 		} else if (strnicmp(data, "iocharset", 9) == 0) {
 			if (!value || !*value) {
 				printk(KERN_WARNING "CIFS: invalid iocharset specified\n");
@@ -1605,6 +1628,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 	if (cifs_parse_mount_options(mount_data, devname, &volume_info)) {
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1619,6 +1643,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
            locations such as env variables and files on disk */
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1639,6 +1664,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			/* we failed translating address */
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return -EINVAL;
 		}
@@ -1651,6 +1677,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		cERROR(1,("Connecting to DFS root not implemented yet"));
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	} else /* which servers DFS root would we conect to */ {
@@ -1658,6 +1685,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		       ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified"));
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1672,6 +1700,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			cERROR(1,("CIFS mount error: iocharset %s not found",volume_info.iocharset));
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return -ELIBACC;
 		}
@@ -1688,6 +1717,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 	else {
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1710,6 +1740,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 				sock_release(csocket);
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return rc;
 		}
@@ -1720,6 +1751,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			sock_release(csocket);
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return rc;
 		} else {
@@ -1744,6 +1776,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 				sock_release(csocket);
 				kfree(volume_info.UNC);
 				kfree(volume_info.password);
+				kfree(volume_info.prepath);
 				FreeXid(xid);
 				return rc;
 			}
@@ -1831,6 +1864,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			/* Windows ME may prefer this */
 			cFYI(1,("readsize set to minimum 2048"));
 		}
+		/* calculate prepath */
+		cifs_sb->prepath = volume_info.prepath;
+		if(cifs_sb->prepath) {
+			cifs_sb->prepathlen = strlen(cifs_sb->prepath);
+			cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb);
+			volume_info.prepath = NULL;
+		} else 
+			cifs_sb->prepathlen = 0;
 		cifs_sb->mnt_uid = volume_info.linux_uid;
 		cifs_sb->mnt_gid = volume_info.linux_gid;
 		cifs_sb->mnt_file_mode = volume_info.file_mode;
@@ -2008,6 +2049,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 	the password ptr is put in the new session structure (in which case the
 	password will be freed at unmount time) */
 	kfree(volume_info.UNC);
+	kfree(volume_info.prepath);
 	FreeXid(xid);
 	return rc;
 }
@@ -3195,6 +3237,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 	int xid;
 	struct cifsSesInfo *ses = NULL;
 	struct task_struct *cifsd_task;
+	char * tmp;
 
 	xid = GetXid();
 
@@ -3228,6 +3271,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 	}
 	
 	cifs_sb->tcon = NULL;
+	tmp = cifs_sb->prepath;
+	cifs_sb->prepathlen = 0;
+	cifs_sb->prepath = NULL;
+	kfree(tmp);
 	if (ses)
 		schedule_timeout_interruptible(msecs_to_jiffies(500));
 	if (ses)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 914239d53634d..66b825ade3e19 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -46,7 +46,8 @@ char *
 build_path_from_dentry(struct dentry *direntry)
 {
 	struct dentry *temp;
-	int namelen = 0;
+	int namelen;
+	int pplen;
 	char *full_path;
 	char dirsep;
 
@@ -56,7 +57,9 @@ build_path_from_dentry(struct dentry *direntry)
 		when the server crashed */
 
 	dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb));
+	pplen = CIFS_SB(direntry->d_sb)->prepathlen;
 cifs_bp_rename_retry:
+	namelen = pplen; 
 	for (temp = direntry; !IS_ROOT(temp);) {
 		namelen += (1 + temp->d_name.len);
 		temp = temp->d_parent;
@@ -70,7 +73,6 @@ build_path_from_dentry(struct dentry *direntry)
 	if(full_path == NULL)
 		return full_path;
 	full_path[namelen] = 0;	/* trailing null */
-
 	for (temp = direntry; !IS_ROOT(temp);) {
 		namelen -= 1 + temp->d_name.len;
 		if (namelen < 0) {
@@ -79,7 +81,7 @@ build_path_from_dentry(struct dentry *direntry)
 			full_path[namelen] = dirsep;
 			strncpy(full_path + namelen + 1, temp->d_name.name,
 				temp->d_name.len);
-			cFYI(0, (" name: %s ", full_path + namelen));
+			cFYI(0, ("name: %s", full_path + namelen));
 		}
 		temp = temp->d_parent;
 		if(temp == NULL) {
@@ -88,18 +90,23 @@ build_path_from_dentry(struct dentry *direntry)
 			return NULL;
 		}
 	}
-	if (namelen != 0) {
+	if (namelen != pplen) {
 		cERROR(1,
-		       ("We did not end path lookup where we expected namelen is %d",
+		       ("did not end path lookup where expected namelen is %d",
 			namelen));
-		/* presumably this is only possible if we were racing with a rename 
+		/* presumably this is only possible if racing with a rename 
 		of one of the parent directories  (we can not lock the dentries
 		above us to prevent this, but retrying should be harmless) */
 		kfree(full_path);
-		namelen = 0;
 		goto cifs_bp_rename_retry;
 	}
-
+	/* DIR_SEP already set for byte  0 / vs \ but not for
+	   subsequent slashes in prepath which currently must
+	   be entered the right way - not sure if there is an alternative
+	   since the '\' is a valid posix character so we can not switch
+	   those safely to '/' if any are found in the middle of the prepath */
+	/* BB test paths to Windows with '/' in the midst of prepath */
+	strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen);
 	return full_path;
 }
 
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 067648b7179b5..18fcec190f8b5 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -269,7 +269,7 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
 				rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
 					ea_value, buf_size,
 					ACL_TYPE_ACCESS);
-				CIFSSMBClose(xid, pTcon, fid)
+				CIFSSMBClose(xid, pTcon, fid);
 			}
 		} */  /* BB enable after fixing up return data */
                   		
-- 
GitLab


From 6b70c9559bcf381a6521e38b0dd5d3d4d905868a Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 21 Sep 2006 07:35:29 +0000
Subject: [PATCH 0342/1063] [CIFS] New POSIX locking code not setting rc
 properly to zero on successful unlock in case where server does not support
 POSIX locks and nobrl is not specified.

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e9c5ba9084fc6..ddb012a68023f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -752,6 +752,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 			int stored_rc = 0;
 			struct cifsLockInfo *li, *tmp;
 
+			rc = 0;
 			down(&fid->lock_sem);
 			list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
 				if (pfLock->fl_start <= li->offset &&
@@ -766,7 +767,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 					kfree(li);
 				}
 			}
-		up(&fid->lock_sem);
+			up(&fid->lock_sem);
 		}
 	}
 
-- 
GitLab


From 838fdb4d2d0e4730364220b51be28a42d04c665e Mon Sep 17 00:00:00 2001
From: Peter Bergner <bergner@vnet.ibm.com>
Date: Thu, 14 Sep 2006 14:18:38 -0500
Subject: [PATCH 0343/1063] [POWERPC] Add AT_PLATFORM value for Xilinx Virtex-4
 FX

Jakub noticed the cputable.c entry for Xilinx Virtex-4 FX was missing
a .platform value, so the AT_PLATFORM value wouldn't be set correctly.
This adds it.

Signed-off-by: Peter Bergner <bergner@vnet.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/cputable.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index db65c9f6559ae..190a57e207654 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -949,6 +949,7 @@ struct cpu_spec	cpu_specs[] = {
 			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
+		.platform		= "ppc405",
 	},
 	{	/* 405EP */
 		.pvr_mask		= 0xffff0000,
-- 
GitLab


From 4dbefe6459555d6fb9d08743615fbaa53894beb2 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jdub@us.ibm.com>
Date: Fri, 15 Sep 2006 14:53:10 -0500
Subject: [PATCH 0344/1063] [POWERPC] PPC: Fix xmon stack frame address in
 backtrace

The stack frame address was being printed incorrectly in the backtrace
option of XMON on PPC.  This patch fixes it to print the actual stack
address instead of the address of the local variable that contains it.

Signed-off-by: Josh Boyer <jdub@us.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/ppc/xmon/xmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ppc/xmon/xmon.c b/arch/ppc/xmon/xmon.c
index 25d032b2aec7e..b1a91744fd2db 100644
--- a/arch/ppc/xmon/xmon.c
+++ b/arch/ppc/xmon/xmon.c
@@ -806,7 +806,7 @@ backtrace(struct pt_regs *excp)
 	for (; sp != 0; sp = stack[0]) {
 		if (mread(sp, stack, sizeof(stack)) != sizeof(stack))
 			break;
-		printf("[%.8lx] ", stack);
+		printf("[%.8lx] ", stack[0]);
 		xmon_print_symbol(stack[1], " ", "\n");
 		if (stack[1] == (unsigned) &ret_from_except
 		    || stack[1] == (unsigned) &ret_from_except_full
-- 
GitLab


From af525592187951a595c73de11b48969a13b5d5a3 Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linas@austin.ibm.com>
Date: Fri, 15 Sep 2006 18:55:34 -0500
Subject: [PATCH 0345/1063] [POWERPC] EEH: balance pcidev_get/put calls

This corrects a pci_dev get/put imbalance that can occur only in
highly unlikely situations (kmalloc failures, pci devices with
overlapping resource addresses).  No actual failures seen, this was
spotted during code review.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/pseries/eeh_cache.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index c37a8497c60fc..b6b462d3c6046 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -157,6 +157,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
 	if (!piar)
 		return NULL;
 
+	pci_dev_get(dev);
 	piar->addr_lo = alo;
 	piar->addr_hi = ahi;
 	piar->pcidev = dev;
@@ -178,7 +179,6 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
 	struct device_node *dn;
 	struct pci_dn *pdn;
 	int i;
-	int inserted = 0;
 
 	dn = pci_device_to_OF_node(dev);
 	if (!dn) {
@@ -197,9 +197,6 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
 		return;
 	}
 
-	/* The cache holds a reference to the device... */
-	pci_dev_get(dev);
-
 	/* Walk resources on this device, poke them into the tree */
 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
 		unsigned long start = pci_resource_start(dev,i);
@@ -212,12 +209,7 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
 		if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
 			 continue;
 		pci_addr_cache_insert(dev, start, end, flags);
-		inserted = 1;
 	}
-
-	/* If there was nothing to add, the cache has no reference... */
-	if (!inserted)
-		pci_dev_put(dev);
 }
 
 /**
@@ -240,7 +232,6 @@ void pci_addr_cache_insert_device(struct pci_dev *dev)
 static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
 {
 	struct rb_node *n;
-	int removed = 0;
 
 restart:
 	n = rb_first(&pci_io_addr_cache_root.rb_root);
@@ -250,16 +241,12 @@ static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
 
 		if (piar->pcidev == dev) {
 			rb_erase(n, &pci_io_addr_cache_root.rb_root);
-			removed = 1;
+			pci_dev_put(piar->pcidev);
 			kfree(piar);
 			goto restart;
 		}
 		n = rb_next(n);
 	}
-
-	/* The cache no longer holds its reference to this device... */
-	if (removed)
-		pci_dev_put(dev);
 }
 
 /**
-- 
GitLab


From cb5b562444c27cf53f5d297bd7a89807ea614cf3 Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linas@austin.ibm.com>
Date: Fri, 15 Sep 2006 18:56:35 -0500
Subject: [PATCH 0346/1063] [POWERPC] EEH: code comment cleanup

Clean up subroutine documentation; mostly formatting changes, with
some new content.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/pseries/eeh.c        | 19 +++++++++++----
 arch/powerpc/platforms/pseries/eeh_driver.c | 27 ++++++++++++++++-----
 2 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 5a23ce5e16ff1..fb91842fc8194 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -449,7 +449,11 @@ EXPORT_SYMBOL(eeh_check_failure);
 /* ------------------------------------------------------------- */
 /* The code below deals with error recovery */
 
-/** Return negative value if a permanent error, else return
+/**
+ * eeh_slot_availability - returns error status of slot
+ * @pdn pci device node
+ *
+ * Return negative value if a permanent error, else return
  * a number of milliseconds to wait until the PCI slot is
  * ready to be used.
  */
@@ -477,8 +481,10 @@ eeh_slot_availability(struct pci_dn *pdn)
 	return -1;
 }
 
-/** rtas_pci_slot_reset raises/lowers the pci #RST line
- *  state: 1/0 to raise/lower the #RST
+/**
+ * rtas_pci_slot_reset - raises/lowers the pci #RST line
+ * @pdn pci device node
+ * @state: 1/0 to raise/lower the #RST
  *
  * Clear the EEH-frozen condition on a slot.  This routine
  * asserts the PCI #RST line if the 'state' argument is '1',
@@ -518,8 +524,9 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
 	}
 }
 
-/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
- *  dn -- device node to be reset.
+/**
+ * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
+ * @pdn: pci device node to be reset.
  *
  *  Return 0 if success, else a non-zero value.
  */
@@ -582,6 +589,8 @@ rtas_set_slot_reset(struct pci_dn *pdn)
 
 /**
  * __restore_bars - Restore the Base Address Registers
+ * @pdn: pci device node
+ *
  * Loads the PCI configuration space base address registers,
  * the expansion ROM base address, the latency timer, and etc.
  * from the saved values in the device node.
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 3269d2cd428bc..045cd7a373390 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -77,8 +77,12 @@ static int irq_in_use(unsigned int irq)
 }
 
 /* ------------------------------------------------------- */
-/** eeh_report_error - report an EEH error to each device,
- *  collect up and merge the device responses.
+/**
+ * eeh_report_error - report pci error to each device driver
+ * 
+ * Report an EEH error to each device driver, collect up and 
+ * merge the device driver responses. Cumulative response 
+ * passed back in "userdata".
  */
 
 static void eeh_report_error(struct pci_dev *dev, void *userdata)
@@ -108,8 +112,8 @@ static void eeh_report_error(struct pci_dev *dev, void *userdata)
 	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 }
 
-/** eeh_report_reset -- tell this device that the pci slot
- *  has been reset.
+/**
+ * eeh_report_reset - tell device that slot has been reset
  */
 
 static void eeh_report_reset(struct pci_dev *dev, void *userdata)
@@ -132,6 +136,10 @@ static void eeh_report_reset(struct pci_dev *dev, void *userdata)
 	driver->err_handler->slot_reset(dev);
 }
 
+/**
+ * eeh_report_resume - tell device to resume normal operations
+ */
+
 static void eeh_report_resume(struct pci_dev *dev, void *userdata)
 {
 	struct pci_driver *driver = dev->driver;
@@ -148,6 +156,13 @@ static void eeh_report_resume(struct pci_dev *dev, void *userdata)
 	driver->err_handler->resume(dev);
 }
 
+/**
+ * eeh_report_failure - tell device driver that device is dead.
+ *
+ * This informs the device driver that the device is permanently
+ * dead, and that no further recovery attempts will be made on it.
+ */
+
 static void eeh_report_failure(struct pci_dev *dev, void *userdata)
 {
 	struct pci_driver *driver = dev->driver;
@@ -190,11 +205,11 @@ static void eeh_report_failure(struct pci_dev *dev, void *userdata)
 
 /**
  * eeh_reset_device() -- perform actual reset of a pci slot
- * Args: bus: pointer to the pci bus structure corresponding
+ * @bus: pointer to the pci bus structure corresponding
  *            to the isolated slot. A non-null value will
  *            cause all devices under the bus to be removed
  *            and then re-added.
- *     pe_dn: pointer to a "Partionable Endpoint" device node.
+ * @pe_dn: pointer to a "Partionable Endpoint" device node.
  *            This is the top-level structure on which pci
  *            bus resets can be performed.
  */
-- 
GitLab


From 47b5c838af92d3504e99633bf568578203b7305f Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linas@austin.ibm.com>
Date: Fri, 15 Sep 2006 18:57:42 -0500
Subject: [PATCH 0347/1063] [POWERPC] EEH: enable MMIO/DMA on frozen slot

Add wrapper around the rtas call to enable MMIO or DMA on a frozen pci
slot.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/pseries/eeh.c | 29 ++++++++++++++++++++++++++++
 include/asm-powerpc/ppc-pci.h        | 11 +++++++++++
 2 files changed, 40 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index fb91842fc8194..4534886e3b4e5 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -481,6 +481,35 @@ eeh_slot_availability(struct pci_dn *pdn)
 	return -1;
 }
 
+/**
+ * rtas_pci_enable - enable MMIO or DMA transfers for this slot
+ * @pdn pci device node
+ */
+
+int
+rtas_pci_enable(struct pci_dn *pdn, int function)
+{
+	int config_addr;
+	int rc;
+
+	/* Use PE configuration address, if present */
+	config_addr = pdn->eeh_config_addr;
+	if (pdn->eeh_pe_config_addr)
+		config_addr = pdn->eeh_pe_config_addr;
+
+	rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
+	               config_addr,
+	               BUID_HI(pdn->phb->buid),
+	               BUID_LO(pdn->phb->buid),
+		            function);
+
+	if (rc)
+		printk(KERN_WARNING "EEH: Cannot enable function %d, err=%d dn=%s\n",
+		        function, rc, pdn->node->full_name);
+
+	return rc;
+}
+
 /**
  * rtas_pci_slot_reset - raises/lowers the pci #RST line
  * @pdn pci device node
diff --git a/include/asm-powerpc/ppc-pci.h b/include/asm-powerpc/ppc-pci.h
index cf79bc7ebb555..1115756c79f94 100644
--- a/include/asm-powerpc/ppc-pci.h
+++ b/include/asm-powerpc/ppc-pci.h
@@ -68,6 +68,17 @@ struct pci_dev *pci_get_device_by_addr(unsigned long addr);
  */
 void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
 
+/**
+ * rtas_pci_enableo - enable IO transfers for this slot
+ * @pdn:       pci device node
+ * @function:  either EEH_THAW_MMIO or EEH_THAW_DMA 
+ *
+ * Enable I/O transfers to this slot 
+ */
+#define EEH_THAW_MMIO 2
+#define EEH_THAW_DMA  3
+int rtas_pci_enable(struct pci_dn *pdn, int function);
+
 /**
  * rtas_set_slot_reset -- unfreeze a frozen slot
  *
-- 
GitLab


From 6a1ca373a16b0e170164ab8a2d6d01eab2a22f6e Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linas@austin.ibm.com>
Date: Fri, 15 Sep 2006 18:58:59 -0500
Subject: [PATCH 0348/1063] [POWERPC] EEH: support MMIO enable recovery step

Update to the PowerPC PCI error recovery code.

Add code to enable MMIO if a device driver reports that it is capable
of recovering on its own.  One anticipated use of this having a device
driver enable MMIO so that it can take a register dump, which might
then be followed by the device driver requesting a full reset.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/pseries/eeh_driver.c | 81 ++++++++++++++++-----
 1 file changed, 64 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 045cd7a373390..c2bc9904f1cb9 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -100,14 +100,38 @@ static void eeh_report_error(struct pci_dev *dev, void *userdata)
 		PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
 		disable_irq_nosync(dev->irq);
 	}
-	if (!driver->err_handler)
-		return;
-	if (!driver->err_handler->error_detected)
+	if (!driver->err_handler ||
+	    !driver->err_handler->error_detected)
 		return;
 
 	rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);
 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
-	if (*res == PCI_ERS_RESULT_NEED_RESET) return;
+	if (*res == PCI_ERS_RESULT_DISCONNECT &&
+	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+}
+
+/**
+ * eeh_report_mmio_enabled - tell drivers that MMIO has been enabled
+ *
+ * Report an EEH error to each device driver, collect up and
+ * merge the device driver responses. Cumulative response
+ * passed back in "userdata".
+ */
+
+static void eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
+{
+	enum pci_ers_result rc, *res = userdata;
+	struct pci_driver *driver = dev->driver;
+
+	// dev->error_state = pci_channel_mmio_enabled;
+
+	if (!driver ||
+	    !driver->err_handler ||
+	    !driver->err_handler->mmio_enabled)
+		return;
+
+	rc = driver->err_handler->mmio_enabled (dev);
+	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 	if (*res == PCI_ERS_RESULT_DISCONNECT &&
 	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 }
@@ -118,6 +142,7 @@ static void eeh_report_error(struct pci_dev *dev, void *userdata)
 
 static void eeh_report_reset(struct pci_dev *dev, void *userdata)
 {
+	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver = dev->driver;
 	struct device_node *dn = pci_device_to_OF_node(dev);
 
@@ -128,12 +153,14 @@ static void eeh_report_reset(struct pci_dev *dev, void *userdata)
 		PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
 		enable_irq(dev->irq);
 	}
-	if (!driver->err_handler)
-		return;
-	if (!driver->err_handler->slot_reset)
+	if (!driver->err_handler ||
+	    !driver->err_handler->slot_reset)
 		return;
 
-	driver->err_handler->slot_reset(dev);
+	rc = driver->err_handler->slot_reset(dev);
+	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+	if (*res == PCI_ERS_RESULT_DISCONNECT &&
+	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 }
 
 /**
@@ -362,23 +389,43 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 			goto hard_fail;
 	}
 
-	/* If any device called out for a reset, then reset the slot */
-	if (result == PCI_ERS_RESULT_NEED_RESET) {
-		rc = eeh_reset_device(frozen_pdn, NULL);
-		if (rc)
-			goto hard_fail;
-		pci_walk_bus(frozen_bus, eeh_report_reset, NULL);
+	/* If all devices reported they can proceed, then re-enable MMIO */
+	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+		rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO);
+
+		if (rc) {
+			result = PCI_ERS_RESULT_NEED_RESET;
+		} else {
+			result = PCI_ERS_RESULT_NONE;
+			pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result);
+		}
 	}
 
-	/* If all devices reported they can proceed, the re-enable PIO */
+	/* If all devices reported they can proceed, then re-enable DMA */
 	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-		/* XXX Not supported; we brute-force reset the device */
+		rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA);
+
+		if (rc)
+			result = PCI_ERS_RESULT_NEED_RESET;
+	}
+
+	/* If any device has a hard failure, then shut off everything. */
+	if (result == PCI_ERS_RESULT_DISCONNECT)
+		goto hard_fail;
+
+	/* If any device called out for a reset, then reset the slot */
+	if (result == PCI_ERS_RESULT_NEED_RESET) {
 		rc = eeh_reset_device(frozen_pdn, NULL);
 		if (rc)
 			goto hard_fail;
-		pci_walk_bus(frozen_bus, eeh_report_reset, NULL);
+		result = PCI_ERS_RESULT_NONE;
+		pci_walk_bus(frozen_bus, eeh_report_reset, &result);
 	}
 
+	/* All devices should claim they have recovered by now. */
+	if (result != PCI_ERS_RESULT_RECOVERED)
+		goto hard_fail;
+
 	/* Tell all device drivers that they can resume operations */
 	pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
 
-- 
GitLab


From 8b9b5a77e3aeb9650b511a8be4c61632999537db Mon Sep 17 00:00:00 2001
From: Amy Fong <amy.fong@windriver.com>
Date: Mon, 18 Sep 2006 23:07:24 -0400
Subject: [PATCH 0349/1063] [POWERPC] Fix compile error in sbc8560

The following fixes compile errors in sbc8560.

Signed-off-by: Amy Fong <amy.fong@windriver.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/ppc/platforms/85xx/sbc8560.h |  1 +
 arch/ppc/platforms/85xx/sbc85xx.h | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/arch/ppc/platforms/85xx/sbc8560.h b/arch/ppc/platforms/85xx/sbc8560.h
index c7d61cf3a449b..e5e156f601005 100644
--- a/arch/ppc/platforms/85xx/sbc8560.h
+++ b/arch/ppc/platforms/85xx/sbc8560.h
@@ -14,6 +14,7 @@
 #define __MACH_SBC8560_H__
  
 #include <platforms/85xx/sbc85xx.h>
+#include <asm/irq.h>
 
 #define CPM_MAP_ADDR    (CCSRBAR + MPC85xx_CPM_OFFSET)
  
diff --git a/arch/ppc/platforms/85xx/sbc85xx.h b/arch/ppc/platforms/85xx/sbc85xx.h
index 21ea7a55639ba..51df4dc04e22a 100644
--- a/arch/ppc/platforms/85xx/sbc85xx.h
+++ b/arch/ppc/platforms/85xx/sbc85xx.h
@@ -49,4 +49,22 @@ extern void sbc8560_init_IRQ(void) __init;
 
 #define MPC85XX_PCI1_IO_SIZE	0x01000000
 
+/* FCC1 Clock Source Configuration.  These can be
+ * redefined in the board specific file.
+ *    Can only choose from CLK9-12 */
+#define F1_RXCLK       12
+#define F1_TXCLK       11
+
+/* FCC2 Clock Source Configuration.  These can be
+ * redefined in the board specific file.
+ *    Can only choose from CLK13-16 */
+#define F2_RXCLK       13
+#define F2_TXCLK       14
+
+/* FCC3 Clock Source Configuration.  These can be
+ * redefined in the board specific file.
+ *    Can only choose from CLK13-16 */
+#define F3_RXCLK       15
+#define F3_TXCLK       16
+
 #endif /* __PLATFORMS_85XX_SBC85XX_H__ */
-- 
GitLab


From 7da8a2e5c1fd2ee513fdeac8d13c4f3623838fd0 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jdub@us.ibm.com>
Date: Wed, 20 Sep 2006 09:11:59 -0500
Subject: [PATCH 0350/1063] [POWERPC] 40x: Fix debug status register defines

This fixes some debug register defines on PPC 40x that were incorrect.

Signed-off-by: Josh Boyer <jdub@us.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-ppc/reg_booke.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/asm-ppc/reg_booke.h b/include/asm-ppc/reg_booke.h
index 4944c0fb8beaf..602fbadeaf480 100644
--- a/include/asm-ppc/reg_booke.h
+++ b/include/asm-ppc/reg_booke.h
@@ -300,14 +300,14 @@ do {						\
 #define DBSR_IC		0x80000000	/* Instruction Completion */
 #define DBSR_BT		0x40000000	/* Branch taken */
 #define DBSR_TIE	0x10000000	/* Trap Instruction debug Event */
-#define DBSR_IAC1	0x00800000	/* Instruction Address Compare 1 Event */
-#define DBSR_IAC2	0x00400000	/* Instruction Address Compare 2 Event */
-#define DBSR_IAC3	0x00200000	/* Instruction Address Compare 3 Event */
-#define DBSR_IAC4	0x00100000	/* Instruction Address Compare 4 Event */
-#define DBSR_DAC1R	0x00080000	/* Data Address Compare 1 Read Event */
-#define DBSR_DAC1W	0x00040000	/* Data Address Compare 1 Write Event */
-#define DBSR_DAC2R	0x00020000	/* Data Address Compare 2 Read Event */
-#define DBSR_DAC2W	0x00010000	/* Data Address Compare 2 Write Event */
+#define DBSR_IAC1	0x04000000	/* Instruction Address Compare 1 Event */
+#define DBSR_IAC2	0x02000000	/* Instruction Address Compare 2 Event */
+#define DBSR_IAC3	0x00080000	/* Instruction Address Compare 3 Event */
+#define DBSR_IAC4	0x00040000	/* Instruction Address Compare 4 Event */
+#define DBSR_DAC1R	0x01000000	/* Data Address Compare 1 Read Event */
+#define DBSR_DAC1W	0x00800000	/* Data Address Compare 1 Write Event */
+#define DBSR_DAC2R	0x00400000	/* Data Address Compare 2 Read Event */
+#define DBSR_DAC2W	0x00200000	/* Data Address Compare 2 Write Event */
 #endif
 
 /* Bit definitions related to the ESR. */
-- 
GitLab


From b8c06a2ab68661bf841e21003f4447f8d422aed3 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Fri, 22 Sep 2006 01:14:52 +0000
Subject: [PATCH 0351/1063] [CIFS] statfs for cifs unix extensions no longer
 experimental

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3cd750029be29..c3ef1c0d0e684 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -189,7 +189,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_files = 0;	/* undefined */
 	buf->f_ffree = 0;	/* unlimited */
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 /* BB we could add a second check for a QFS Unix capability bit */
 /* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */
     if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS &
@@ -199,7 +198,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
     /* Only need to call the old QFSInfo if failed
     on newer one */
     if(rc)
-#endif /* CIFS_EXPERIMENTAL */
 	rc = CIFSSMBQFSInfo(xid, pTcon, buf);
 
 	/* Old Windows servers do not support level 103, retry with level 
-- 
GitLab


From caf81329c39b5c48f6cc0d78fa159b5a587e37f9 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 21 Sep 2006 18:00:00 +1000
Subject: [PATCH 0352/1063] [POWERPC] Merge iSeries i/o operations with the
 rest

This patch changes the io operations so that they are out of line if
CONFIG_PPC_ISERIES is set and includes a firmware feature check in
that case.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/io.c                 |  14 ++
 arch/powerpc/platforms/iseries/pci.c     | 280 ++++++++++++++++++-----
 include/asm-powerpc/io.h                 | 148 ++++++------
 include/asm-powerpc/iseries/iseries_io.h |  60 -----
 4 files changed, 319 insertions(+), 183 deletions(-)
 delete mode 100644 include/asm-powerpc/iseries/iseries_io.h

diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index 80a3209acef4b..e98180686b352 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -22,12 +22,16 @@
 #include <linux/module.h>
 
 #include <asm/io.h>
+#include <asm/firmware.h>
+#include <asm/bug.h>
 
 void _insb(volatile u8 __iomem *port, void *buf, long count)
 {
 	u8 *tbuf = buf;
 	u8 tmp;
 
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
 	if (unlikely(count <= 0))
 		return;
 	asm volatile("sync");
@@ -44,6 +48,8 @@ void _outsb(volatile u8 __iomem *port, const void *buf, long count)
 {
 	const u8 *tbuf = buf;
 
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
 	if (unlikely(count <= 0))
 		return;
 	asm volatile("sync");
@@ -59,6 +65,8 @@ void _insw_ns(volatile u16 __iomem *port, void *buf, long count)
 	u16 *tbuf = buf;
 	u16 tmp;
 
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
 	if (unlikely(count <= 0))
 		return;
 	asm volatile("sync");
@@ -75,6 +83,8 @@ void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count)
 {
 	const u16 *tbuf = buf;
 
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
 	if (unlikely(count <= 0))
 		return;
 	asm volatile("sync");
@@ -90,6 +100,8 @@ void _insl_ns(volatile u32 __iomem *port, void *buf, long count)
 	u32 *tbuf = buf;
 	u32 tmp;
 
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
 	if (unlikely(count <= 0))
 		return;
 	asm volatile("sync");
@@ -106,6 +118,8 @@ void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count)
 {
 	const u32 *tbuf = buf;
 
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
 	if (unlikely(count <= 0))
 		return;
 	asm volatile("sync");
diff --git a/arch/powerpc/platforms/iseries/pci.c b/arch/powerpc/platforms/iseries/pci.c
index f4d427a7bb2d6..3eb12065df23e 100644
--- a/arch/powerpc/platforms/iseries/pci.c
+++ b/arch/powerpc/platforms/iseries/pci.c
@@ -34,6 +34,7 @@
 #include <asm/pci-bridge.h>
 #include <asm/iommu.h>
 #include <asm/abs_addr.h>
+#include <asm/firmware.h>
 
 #include <asm/iseries/hv_call_xm.h>
 #include <asm/iseries/mf.h>
@@ -269,46 +270,6 @@ void pcibios_fixup_resources(struct pci_dev *pdev)
 {
 }
 
-/*
- * I/0 Memory copy MUST use mmio commands on iSeries
- * To do; For performance, include the hv call directly
- */
-void iSeries_memset_io(volatile void __iomem *dest, char c, size_t Count)
-{
-	u8 ByteValue = c;
-	long NumberOfBytes = Count;
-
-	while (NumberOfBytes > 0) {
-		iSeries_Write_Byte(ByteValue, dest++);
-		-- NumberOfBytes;
-	}
-}
-EXPORT_SYMBOL(iSeries_memset_io);
-
-void iSeries_memcpy_toio(volatile void __iomem *dest, void *source, size_t count)
-{
-	char *src = source;
-	long NumberOfBytes = count;
-
-	while (NumberOfBytes > 0) {
-		iSeries_Write_Byte(*src++, dest++);
-		-- NumberOfBytes;
-	}
-}
-EXPORT_SYMBOL(iSeries_memcpy_toio);
-
-void iSeries_memcpy_fromio(void *dest, const volatile void __iomem *src, size_t count)
-{
-	char *dst = dest;
-	long NumberOfBytes = count;
-
-	while (NumberOfBytes > 0) {
-		*dst++ = iSeries_Read_Byte(src++);
-		-- NumberOfBytes;
-	}
-}
-EXPORT_SYMBOL(iSeries_memcpy_fromio);
-
 /*
  * Look down the chain to find the matching Device Device
  */
@@ -491,7 +452,7 @@ static inline struct device_node *xlate_iomm_address(
  * iSeries_Read_Word = Read Word  (16 bit)
  * iSeries_Read_Long = Read Long  (32 bit)
  */
-u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress)
+static u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -518,9 +479,8 @@ u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress)
 
 	return (u8)ret.value;
 }
-EXPORT_SYMBOL(iSeries_Read_Byte);
 
-u16 iSeries_Read_Word(const volatile void __iomem *IoAddress)
+static u16 iSeries_Read_Word(const volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -548,9 +508,8 @@ u16 iSeries_Read_Word(const volatile void __iomem *IoAddress)
 
 	return swab16((u16)ret.value);
 }
-EXPORT_SYMBOL(iSeries_Read_Word);
 
-u32 iSeries_Read_Long(const volatile void __iomem *IoAddress)
+static u32 iSeries_Read_Long(const volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -578,7 +537,6 @@ u32 iSeries_Read_Long(const volatile void __iomem *IoAddress)
 
 	return swab32((u32)ret.value);
 }
-EXPORT_SYMBOL(iSeries_Read_Long);
 
 /*
  * Write MM I/O Instructions for the iSeries
@@ -587,7 +545,7 @@ EXPORT_SYMBOL(iSeries_Read_Long);
  * iSeries_Write_Word = Write Word(16 bit)
  * iSeries_Write_Long = Write Long(32 bit)
  */
-void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress)
+static void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -612,9 +570,8 @@ void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress)
 		rc = HvCall4(HvCallPciBarStore8, dsa, BarOffset, data, 0);
 	} while (CheckReturnCode("WWB", DevNode, &retry, rc) != 0);
 }
-EXPORT_SYMBOL(iSeries_Write_Byte);
 
-void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress)
+static void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -639,9 +596,8 @@ void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress)
 		rc = HvCall4(HvCallPciBarStore16, dsa, BarOffset, swab16(data), 0);
 	} while (CheckReturnCode("WWW", DevNode, &retry, rc) != 0);
 }
-EXPORT_SYMBOL(iSeries_Write_Word);
 
-void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress)
+static void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -666,4 +622,224 @@ void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress)
 		rc = HvCall4(HvCallPciBarStore32, dsa, BarOffset, swab32(data), 0);
 	} while (CheckReturnCode("WWL", DevNode, &retry, rc) != 0);
 }
-EXPORT_SYMBOL(iSeries_Write_Long);
+
+extern unsigned char __raw_readb(const volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return *(volatile unsigned char __force *)addr;
+}
+EXPORT_SYMBOL(__raw_readb);
+
+extern unsigned short __raw_readw(const volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return *(volatile unsigned short __force *)addr;
+}
+EXPORT_SYMBOL(__raw_readw);
+
+extern unsigned int __raw_readl(const volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return *(volatile unsigned int __force *)addr;
+}
+EXPORT_SYMBOL(__raw_readl);
+
+extern unsigned long __raw_readq(const volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return *(volatile unsigned long __force *)addr;
+}
+EXPORT_SYMBOL(__raw_readq);
+
+extern void __raw_writeb(unsigned char v, volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	*(volatile unsigned char __force *)addr = v;
+}
+EXPORT_SYMBOL(__raw_writeb);
+
+extern void __raw_writew(unsigned short v, volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	*(volatile unsigned short __force *)addr = v;
+}
+EXPORT_SYMBOL(__raw_writew);
+
+extern void __raw_writel(unsigned int v, volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	*(volatile unsigned int __force *)addr = v;
+}
+EXPORT_SYMBOL(__raw_writel);
+
+extern void __raw_writeq(unsigned long v, volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	*(volatile unsigned long __force *)addr = v;
+}
+EXPORT_SYMBOL(__raw_writeq);
+
+int in_8(const volatile unsigned char __iomem *addr)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		return iSeries_Read_Byte(addr);
+	return __in_8(addr);
+}
+EXPORT_SYMBOL(in_8);
+
+void out_8(volatile unsigned char __iomem *addr, int val)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		iSeries_Write_Byte(val, addr);
+	else
+		__out_8(addr, val);
+}
+EXPORT_SYMBOL(out_8);
+
+int in_le16(const volatile unsigned short __iomem *addr)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		return iSeries_Read_Word(addr);
+	return __in_le16(addr);
+}
+EXPORT_SYMBOL(in_le16);
+
+int in_be16(const volatile unsigned short __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return __in_be16(addr);
+}
+EXPORT_SYMBOL(in_be16);
+
+void out_le16(volatile unsigned short __iomem *addr, int val)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		iSeries_Write_Word(val, addr);
+	else
+		__out_le16(addr, val);
+}
+EXPORT_SYMBOL(out_le16);
+
+void out_be16(volatile unsigned short __iomem *addr, int val)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	__out_be16(addr, val);
+}
+EXPORT_SYMBOL(out_be16);
+
+unsigned in_le32(const volatile unsigned __iomem *addr)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		return iSeries_Read_Long(addr);
+	return __in_le32(addr);
+}
+EXPORT_SYMBOL(in_le32);
+
+unsigned in_be32(const volatile unsigned __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return __in_be32(addr);
+}
+EXPORT_SYMBOL(in_be32);
+
+void out_le32(volatile unsigned __iomem *addr, int val)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		iSeries_Write_Long(val, addr);
+	else
+		__out_le32(addr, val);
+}
+EXPORT_SYMBOL(out_le32);
+
+void out_be32(volatile unsigned __iomem *addr, int val)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	__out_be32(addr, val);
+}
+EXPORT_SYMBOL(out_be32);
+
+unsigned long in_le64(const volatile unsigned long __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return __in_le64(addr);
+}
+EXPORT_SYMBOL(in_le64);
+
+unsigned long in_be64(const volatile unsigned long __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return __in_be64(addr);
+}
+EXPORT_SYMBOL(in_be64);
+
+void out_le64(volatile unsigned long __iomem *addr, unsigned long val)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	__out_le64(addr, val);
+}
+EXPORT_SYMBOL(out_le64);
+
+void out_be64(volatile unsigned long __iomem *addr, unsigned long val)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	__out_be64(addr, val);
+}
+EXPORT_SYMBOL(out_be64);
+
+void memset_io(volatile void __iomem *addr, int c, unsigned long n)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES)) {
+		volatile char __iomem *d = addr;
+
+		while (n-- > 0) {
+			iSeries_Write_Byte(c, d++);
+		}
+	} else
+		eeh_memset_io(addr, c, n);
+}
+EXPORT_SYMBOL(memset_io);
+
+void memcpy_fromio(void *dest, const volatile void __iomem *src,
+                                 unsigned long n)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES)) {
+		char *d = dest;
+		const volatile char __iomem *s = src;
+
+		while (n-- > 0) {
+			*d++ = iSeries_Read_Byte(s++);
+		}
+	} else
+		eeh_memcpy_fromio(dest, src, n);
+}
+EXPORT_SYMBOL(memcpy_fromio);
+
+void memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES)) {
+		const char *s = src;
+		volatile char __iomem *d = dest;
+
+		while (n-- > 0) {
+			iSeries_Write_Byte(*s++, d++);
+		}
+	} else
+		eeh_memcpy_toio(dest, src, n);
+}
+EXPORT_SYMBOL(memcpy_toio);
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index 174fb89d5edaf..46bae1cf385b9 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -20,9 +20,6 @@ extern int check_legacy_ioport(unsigned long base_port);
 #include <asm/page.h>
 #include <asm/byteorder.h>
 #include <asm/paca.h>
-#ifdef CONFIG_PPC_ISERIES 
-#include <asm/iseries/iseries_io.h>
-#endif  
 #include <asm/synch.h>
 #include <asm/delay.h>
 
@@ -37,41 +34,53 @@ extern unsigned long isa_io_base;
 extern unsigned long pci_io_base;
 
 #ifdef CONFIG_PPC_ISERIES
-/* __raw_* accessors aren't supported on iSeries */
-#define __raw_readb(addr)	{ BUG(); 0; }
-#define __raw_readw(addr)       { BUG(); 0; }
-#define __raw_readl(addr)       { BUG(); 0; }
-#define __raw_readq(addr)       { BUG(); 0; }
-#define __raw_writeb(v, addr)   { BUG(); 0; }
-#define __raw_writew(v, addr)   { BUG(); 0; }
-#define __raw_writel(v, addr)   { BUG(); 0; }
-#define __raw_writeq(v, addr)   { BUG(); 0; }
-#define readb(addr)		iSeries_Read_Byte(addr)
-#define readw(addr)		iSeries_Read_Word(addr)
-#define readl(addr)		iSeries_Read_Long(addr)
-#define writeb(data, addr)	iSeries_Write_Byte((data),(addr))
-#define writew(data, addr)	iSeries_Write_Word((data),(addr))
-#define writel(data, addr)	iSeries_Write_Long((data),(addr))
-#define memset_io(a,b,c)	iSeries_memset_io((a),(b),(c))
-#define memcpy_fromio(a,b,c)	iSeries_memcpy_fromio((a), (b), (c))
-#define memcpy_toio(a,b,c)	iSeries_memcpy_toio((a), (b), (c))
-
-#define inb(addr)		readb(((void __iomem *)(long)(addr)))
-#define inw(addr)		readw(((void __iomem *)(long)(addr)))
-#define inl(addr)		readl(((void __iomem *)(long)(addr)))
-#define outb(data,addr)		writeb(data,((void __iomem *)(long)(addr)))
-#define outw(data,addr)		writew(data,((void __iomem *)(long)(addr)))
-#define outl(data,addr)		writel(data,((void __iomem *)(long)(addr)))
-/*
- * The *_ns versions below don't do byte-swapping.
- * Neither do the standard versions now, these are just here
- * for older code.
- */
-#define insb(port, buf, ns)	_insb((u8 __iomem *)((port)+pci_io_base), (buf), (ns))
-#define insw(port, buf, ns)	_insw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns))
-#define insl(port, buf, nl)	_insl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl))
 
-#else
+extern int in_8(const volatile unsigned char __iomem *addr);
+extern void out_8(volatile unsigned char __iomem *addr, int val);
+extern int in_le16(const volatile unsigned short __iomem *addr);
+extern int in_be16(const volatile unsigned short __iomem *addr);
+extern void out_le16(volatile unsigned short __iomem *addr, int val);
+extern void out_be16(volatile unsigned short __iomem *addr, int val);
+extern unsigned in_le32(const volatile unsigned __iomem *addr);
+extern unsigned in_be32(const volatile unsigned __iomem *addr);
+extern void out_le32(volatile unsigned __iomem *addr, int val);
+extern void out_be32(volatile unsigned __iomem *addr, int val);
+extern unsigned long in_le64(const volatile unsigned long __iomem *addr);
+extern unsigned long in_be64(const volatile unsigned long __iomem *addr);
+extern void out_le64(volatile unsigned long __iomem *addr, unsigned long val);
+extern void out_be64(volatile unsigned long __iomem *addr, unsigned long val);
+
+extern unsigned char __raw_readb(const volatile void __iomem *addr);
+extern unsigned short __raw_readw(const volatile void __iomem *addr);
+extern unsigned int __raw_readl(const volatile void __iomem *addr);
+extern unsigned long __raw_readq(const volatile void __iomem *addr);
+extern void __raw_writeb(unsigned char v, volatile void __iomem *addr);
+extern void __raw_writew(unsigned short v, volatile void __iomem *addr);
+extern void __raw_writel(unsigned int v, volatile void __iomem *addr);
+extern void __raw_writeq(unsigned long v, volatile void __iomem *addr);
+
+extern void memset_io(volatile void __iomem *addr, int c, unsigned long n);
+extern void memcpy_fromio(void *dest, const volatile void __iomem *src,
+                                 unsigned long n);
+extern void memcpy_toio(volatile void __iomem *dest, const void *src,
+                                 unsigned long n);
+
+#else /* CONFIG_PPC_ISERIES */
+
+#define in_8(addr)		__in_8((addr))
+#define out_8(addr, val)	__out_8((addr), (val))
+#define in_le16(addr)		__in_le16((addr))
+#define in_be16(addr)		__in_be16((addr))
+#define out_le16(addr, val)	__out_le16((addr), (val))
+#define out_be16(addr, val)	__out_be16((addr), (val))
+#define in_le32(addr)		__in_le32((addr))
+#define in_be32(addr)		__in_be32((addr))
+#define out_le32(addr, val)	__out_le32((addr), (val))
+#define out_be32(addr, val)	__out_be32((addr), (val))
+#define in_le64(addr)		__in_le64((addr))
+#define in_be64(addr)		__in_be64((addr))
+#define out_le64(addr, val)	__out_le64((addr), (val))
+#define out_be64(addr, val)	__out_be64((addr), (val))
 
 static inline unsigned char __raw_readb(const volatile void __iomem *addr)
 {
@@ -105,23 +114,11 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr)
 {
 	*(volatile unsigned long __force *)addr = v;
 }
-#define readb(addr)		eeh_readb(addr)
-#define readw(addr)		eeh_readw(addr)
-#define readl(addr)		eeh_readl(addr)
-#define readq(addr)		eeh_readq(addr)
-#define writeb(data, addr)	eeh_writeb((data), (addr))
-#define writew(data, addr)	eeh_writew((data), (addr))
-#define writel(data, addr)	eeh_writel((data), (addr))
-#define writeq(data, addr)	eeh_writeq((data), (addr))
 #define memset_io(a,b,c)	eeh_memset_io((a),(b),(c))
 #define memcpy_fromio(a,b,c)	eeh_memcpy_fromio((a),(b),(c))
 #define memcpy_toio(a,b,c)	eeh_memcpy_toio((a),(b),(c))
-#define inb(port)		eeh_inb((unsigned long)port)
-#define outb(val, port)		eeh_outb(val, (unsigned long)port)
-#define inw(port)		eeh_inw((unsigned long)port)
-#define outw(val, port)		eeh_outw(val, (unsigned long)port)
-#define inl(port)		eeh_inl((unsigned long)port)
-#define outl(val, port)		eeh_outl(val, (unsigned long)port)
+
+#endif /* CONFIG_PPC_ISERIES */
 
 /*
  * The insw/outsw/insl/outsl macros don't do byte-swapping.
@@ -132,12 +129,25 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr)
 #define insw(port, buf, ns)	eeh_insw_ns((port), (buf), (ns))
 #define insl(port, buf, nl)	eeh_insl_ns((port), (buf), (nl))
 
-#endif
-
 #define outsb(port, buf, ns)  _outsb((u8 __iomem *)((port)+pci_io_base), (buf), (ns))
 #define outsw(port, buf, ns)  _outsw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns))
 #define outsl(port, buf, nl)  _outsl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl))
 
+#define readb(addr)		eeh_readb(addr)
+#define readw(addr)		eeh_readw(addr)
+#define readl(addr)		eeh_readl(addr)
+#define readq(addr)		eeh_readq(addr)
+#define writeb(data, addr)	eeh_writeb((data), (addr))
+#define writew(data, addr)	eeh_writew((data), (addr))
+#define writel(data, addr)	eeh_writel((data), (addr))
+#define writeq(data, addr)	eeh_writeq((data), (addr))
+#define inb(port)		eeh_inb((unsigned long)port)
+#define outb(val, port)		eeh_outb(val, (unsigned long)port)
+#define inw(port)		eeh_inw((unsigned long)port)
+#define outw(val, port)		eeh_outw(val, (unsigned long)port)
+#define inl(port)		eeh_inl((unsigned long)port)
+#define outl(val, port)		eeh_outl(val, (unsigned long)port)
+
 #define readb_relaxed(addr) readb(addr)
 #define readw_relaxed(addr) readw(addr)
 #define readl_relaxed(addr) readl(addr)
@@ -258,7 +268,7 @@ static inline void iosync(void)
  * and should not be used directly by device drivers.  Use inb/readb
  * instead.
  */
-static inline int in_8(const volatile unsigned char __iomem *addr)
+static inline int __in_8(const volatile unsigned char __iomem *addr)
 {
 	int ret;
 
@@ -267,14 +277,14 @@ static inline int in_8(const volatile unsigned char __iomem *addr)
 	return ret;
 }
 
-static inline void out_8(volatile unsigned char __iomem *addr, int val)
+static inline void __out_8(volatile unsigned char __iomem *addr, int val)
 {
 	__asm__ __volatile__("sync; stb%U0%X0 %1,%0"
 			     : "=m" (*addr) : "r" (val));
 	get_paca()->io_sync = 1;
 }
 
-static inline int in_le16(const volatile unsigned short __iomem *addr)
+static inline int __in_le16(const volatile unsigned short __iomem *addr)
 {
 	int ret;
 
@@ -283,7 +293,7 @@ static inline int in_le16(const volatile unsigned short __iomem *addr)
 	return ret;
 }
 
-static inline int in_be16(const volatile unsigned short __iomem *addr)
+static inline int __in_be16(const volatile unsigned short __iomem *addr)
 {
 	int ret;
 
@@ -292,21 +302,21 @@ static inline int in_be16(const volatile unsigned short __iomem *addr)
 	return ret;
 }
 
-static inline void out_le16(volatile unsigned short __iomem *addr, int val)
+static inline void __out_le16(volatile unsigned short __iomem *addr, int val)
 {
 	__asm__ __volatile__("sync; sthbrx %1,0,%2"
 			     : "=m" (*addr) : "r" (val), "r" (addr));
 	get_paca()->io_sync = 1;
 }
 
-static inline void out_be16(volatile unsigned short __iomem *addr, int val)
+static inline void __out_be16(volatile unsigned short __iomem *addr, int val)
 {
 	__asm__ __volatile__("sync; sth%U0%X0 %1,%0"
 			     : "=m" (*addr) : "r" (val));
 	get_paca()->io_sync = 1;
 }
 
-static inline unsigned in_le32(const volatile unsigned __iomem *addr)
+static inline unsigned __in_le32(const volatile unsigned __iomem *addr)
 {
 	unsigned ret;
 
@@ -315,7 +325,7 @@ static inline unsigned in_le32(const volatile unsigned __iomem *addr)
 	return ret;
 }
 
-static inline unsigned in_be32(const volatile unsigned __iomem *addr)
+static inline unsigned __in_be32(const volatile unsigned __iomem *addr)
 {
 	unsigned ret;
 
@@ -324,21 +334,21 @@ static inline unsigned in_be32(const volatile unsigned __iomem *addr)
 	return ret;
 }
 
-static inline void out_le32(volatile unsigned __iomem *addr, int val)
+static inline void __out_le32(volatile unsigned __iomem *addr, int val)
 {
 	__asm__ __volatile__("sync; stwbrx %1,0,%2" : "=m" (*addr)
 			     : "r" (val), "r" (addr));
 	get_paca()->io_sync = 1;
 }
 
-static inline void out_be32(volatile unsigned __iomem *addr, int val)
+static inline void __out_be32(volatile unsigned __iomem *addr, int val)
 {
 	__asm__ __volatile__("sync; stw%U0%X0 %1,%0"
 			     : "=m" (*addr) : "r" (val));
 	get_paca()->io_sync = 1;
 }
 
-static inline unsigned long in_le64(const volatile unsigned long __iomem *addr)
+static inline unsigned long __in_le64(const volatile unsigned long __iomem *addr)
 {
 	unsigned long tmp, ret;
 
@@ -358,7 +368,7 @@ static inline unsigned long in_le64(const volatile unsigned long __iomem *addr)
 	return ret;
 }
 
-static inline unsigned long in_be64(const volatile unsigned long __iomem *addr)
+static inline unsigned long __in_be64(const volatile unsigned long __iomem *addr)
 {
 	unsigned long ret;
 
@@ -367,7 +377,7 @@ static inline unsigned long in_be64(const volatile unsigned long __iomem *addr)
 	return ret;
 }
 
-static inline void out_le64(volatile unsigned long __iomem *addr, unsigned long val)
+static inline void __out_le64(volatile unsigned long __iomem *addr, unsigned long val)
 {
 	unsigned long tmp;
 
@@ -385,15 +395,13 @@ static inline void out_le64(volatile unsigned long __iomem *addr, unsigned long
 	get_paca()->io_sync = 1;
 }
 
-static inline void out_be64(volatile unsigned long __iomem *addr, unsigned long val)
+static inline void __out_be64(volatile unsigned long __iomem *addr, unsigned long val)
 {
 	__asm__ __volatile__("sync; std%U0%X0 %1,%0" : "=m" (*addr) : "r" (val));
 	get_paca()->io_sync = 1;
 }
 
-#ifndef CONFIG_PPC_ISERIES 
 #include <asm/eeh.h>
-#endif
 
 /**
  *	check_signature		-	find BIOS signatures
@@ -409,7 +417,6 @@ static inline int check_signature(const volatile void __iomem * io_addr,
 	const unsigned char *signature, int length)
 {
 	int retval = 0;
-#ifndef CONFIG_PPC_ISERIES 
 	do {
 		if (readb(io_addr) != *signature)
 			goto out;
@@ -419,7 +426,6 @@ static inline int check_signature(const volatile void __iomem * io_addr,
 	} while (length);
 	retval = 1;
 out:
-#endif
 	return retval;
 }
 
diff --git a/include/asm-powerpc/iseries/iseries_io.h b/include/asm-powerpc/iseries/iseries_io.h
deleted file mode 100644
index f29009bd63c98..0000000000000
--- a/include/asm-powerpc/iseries/iseries_io.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _ASM_POWERPC_ISERIES_ISERIES_IO_H
-#define _ASM_POWERPC_ISERIES_ISERIES_IO_H
-
-
-#ifdef CONFIG_PPC_ISERIES
-#include <linux/types.h>
-/*
- * Created by Allan Trautman on Thu Dec 28 2000.
- *
- * Remaps the io.h for the iSeries Io
- * Copyright (C) 2000  Allan H Trautman, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the:
- * Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330,
- * Boston, MA  02111-1307  USA
- *
- * Change Activity:
- *   Created December 28, 2000
- * End Change Activity
- */
-
-#ifdef CONFIG_PCI
-extern u8   iSeries_Read_Byte(const volatile void __iomem * IoAddress);
-extern u16  iSeries_Read_Word(const volatile void __iomem * IoAddress);
-extern u32  iSeries_Read_Long(const volatile void __iomem * IoAddress);
-extern void iSeries_Write_Byte(u8  IoData, volatile void __iomem * IoAddress);
-extern void iSeries_Write_Word(u16 IoData, volatile void __iomem * IoAddress);
-extern void iSeries_Write_Long(u32 IoData, volatile void __iomem * IoAddress);
-
-extern void iSeries_memset_io(volatile void __iomem *dest, char x, size_t n);
-extern void iSeries_memcpy_toio(volatile void __iomem *dest, void *source,
-		size_t n);
-extern void iSeries_memcpy_fromio(void *dest,
-		const volatile void __iomem *source, size_t n);
-#else
-static inline u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress)
-{
-	return 0xff;
-}
-
-static inline void iSeries_Write_Byte(u8 IoData,
-		volatile void __iomem *IoAddress)
-{
-}
-#endif	/* CONFIG_PCI */
-
-#endif /* CONFIG_PPC_ISERIES */
-#endif /* _ASM_POWERPC_ISERIES_ISERIES_IO_H */
-- 
GitLab


From 2954da897c40de0f3abdd6a100f2978f30d04068 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 21 Sep 2006 18:21:35 +1000
Subject: [PATCH 0353/1063] [POWERPC] Remove DISCONTIGMEM cruft from page.h

This looks like cruft to me, these functions don't exist AFAICT,
and I can't see that it's possible to even enable DISCONTIGMEM on
powerpc anymore. CC'ing some folks who might know better, based on
the who-touched-it-last principle.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/page.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h
index fb597b37c2a2f..b4d38b0b15f83 100644
--- a/include/asm-powerpc/page.h
+++ b/include/asm-powerpc/page.h
@@ -55,12 +55,6 @@
 #define PAGE_OFFSET     ASM_CONST(CONFIG_KERNEL_START)
 #define KERNELBASE      (PAGE_OFFSET + PHYSICAL_START)
 
-#ifdef CONFIG_DISCONTIGMEM
-#define page_to_pfn(page)	discontigmem_page_to_pfn(page)
-#define pfn_to_page(pfn)	discontigmem_pfn_to_page(pfn)
-#define pfn_valid(pfn)		discontigmem_pfn_valid(pfn)
-#endif
-
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif
-- 
GitLab


From 7d452c326c2ac879aced884411a0fe3ba75d9c87 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Date: Thu, 21 Sep 2006 12:29:51 +0200
Subject: [PATCH 0354/1063] [POWERPC] powerpc: fix building gdb against
 asm/ptrace.h

Ulrich Weigand found a bug with the current version of the
asm-powerpc/ptrace.h that prevents building at least the
SPU target version of gdb, since some ptrace opcodes are
not defined.

The problem seems to have originated in the merging of 32 and
64 bit versions of that file, the problem is that some opcodes
are only valid on 64 bit kernels, but are also used by 32 bit
programs, so they can't depends on the __powerpc64__ symbol.

Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/ptrace.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/asm-powerpc/ptrace.h b/include/asm-powerpc/ptrace.h
index dc4cb9cc73a1b..4435efe85d0ed 100644
--- a/include/asm-powerpc/ptrace.h
+++ b/include/asm-powerpc/ptrace.h
@@ -215,12 +215,10 @@ do {									      \
 #define PTRACE_GETVRREGS	18
 #define PTRACE_SETVRREGS	19
 
-#ifndef __powerpc64__
 /* Get/set all the upper 32-bits of the SPE registers, accumulator, and
  * spefscr, in one go */
 #define PTRACE_GETEVRREGS	20
 #define PTRACE_SETEVRREGS	21
-#endif /* __powerpc64__ */
 
 /*
  * Get or set a debug register. The first 16 are DABR registers and the
@@ -235,7 +233,6 @@ do {									      \
 #define PPC_PTRACE_GETFPREGS	0x97	/* Get FPRs 0 - 31 */
 #define PPC_PTRACE_SETFPREGS	0x96	/* Set FPRs 0 - 31 */
 
-#ifdef __powerpc64__
 /* Calls to trace a 64bit program from a 32bit program */
 #define PPC_PTRACE_PEEKTEXT_3264 0x95
 #define PPC_PTRACE_PEEKDATA_3264 0x94
@@ -243,6 +240,5 @@ do {									      \
 #define PPC_PTRACE_POKEDATA_3264 0x92
 #define PPC_PTRACE_PEEKUSR_3264  0x91
 #define PPC_PTRACE_POKEUSR_3264  0x90
-#endif /* __powerpc64__ */
 
 #endif /* _ASM_POWERPC_PTRACE_H */
-- 
GitLab


From ed709d134deeaea7925a3d748b33ca7e58cc683d Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Thu, 21 Sep 2006 13:10:51 -0500
Subject: [PATCH 0355/1063] [POWERPC] Fix IPIC pending register assignments

This patch fixes the assignment of pending registers to IRQ numbers for
the IPIC; the code previously assigned all IRQs to the high pending word
regardless of which word the interrupt belonged to.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/sysdev/ipic.c | 42 +++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index 70e707785d492..0251b7c68d0e9 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -210,7 +210,7 @@ static struct ipic_info ipic_info[] = {
 		.prio_mask = 4,
 	},
 	[64] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_A,
 		.force	= IPIC_SIFCR_L,
@@ -218,7 +218,7 @@ static struct ipic_info ipic_info[] = {
 		.prio_mask = 0,
 	},
 	[65] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_A,
 		.force	= IPIC_SIFCR_L,
@@ -226,7 +226,7 @@ static struct ipic_info ipic_info[] = {
 		.prio_mask = 1,
 	},
 	[66] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_A,
 		.force	= IPIC_SIFCR_L,
@@ -234,7 +234,7 @@ static struct ipic_info ipic_info[] = {
 		.prio_mask = 2,
 	},
 	[67] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_A,
 		.force	= IPIC_SIFCR_L,
@@ -242,7 +242,7 @@ static struct ipic_info ipic_info[] = {
 		.prio_mask = 3,
 	},
 	[68] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_B,
 		.force	= IPIC_SIFCR_L,
@@ -250,7 +250,7 @@ static struct ipic_info ipic_info[] = {
 		.prio_mask = 0,
 	},
 	[69] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_B,
 		.force	= IPIC_SIFCR_L,
@@ -258,7 +258,7 @@ static struct ipic_info ipic_info[] = {
 		.prio_mask = 1,
 	},
 	[70] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_B,
 		.force	= IPIC_SIFCR_L,
@@ -266,7 +266,7 @@ static struct ipic_info ipic_info[] = {
 		.prio_mask = 2,
 	},
 	[71] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_B,
 		.force	= IPIC_SIFCR_L,
@@ -274,91 +274,91 @@ static struct ipic_info ipic_info[] = {
 		.prio_mask = 3,
 	},
 	[72] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 8,
 	},
 	[73] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 9,
 	},
 	[74] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 10,
 	},
 	[75] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 11,
 	},
 	[76] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 12,
 	},
 	[77] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 13,
 	},
 	[78] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 14,
 	},
 	[79] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 15,
 	},
 	[80] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 16,
 	},
 	[84] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 20,
 	},
 	[85] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 21,
 	},
 	[90] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 26,
 	},
 	[91] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
-- 
GitLab


From d608df5c7da6ee968aa2ad43c596d5f8d4022299 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Thu, 21 Sep 2006 14:25:34 -0500
Subject: [PATCH 0356/1063] [POWERPC] Maple U3 HT - reject inappropriate config
 space access

When there is a PCI-X mode 2 capable device behind the HT<->PCI-X
bridge, the pci core decides that the device has the extended 4K
config space, even though the bus is not operating in mode 2.  This is
because the u3_ht pci ops silently accept offsets greater than 255 but
use only the 8 least significant bits, which means reading at offset
0x100 gets the data at offset 0x0, and causes confusion for lspci.

Reject accesses to configuration space offsets greater than 255.

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/maple/pci.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index dc05af5156a99..ec5c14f5ba491 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -213,6 +213,9 @@ static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
 	if (hose == NULL)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
+	if (offset > 0xff)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
 	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
 	if (!addr)
 		return PCIBIOS_DEVICE_NOT_FOUND;
@@ -245,6 +248,9 @@ static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn,
 	if (hose == NULL)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
+	if (offset > 0xff)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
 	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
 	if (!addr)
 		return PCIBIOS_DEVICE_NOT_FOUND;
-- 
GitLab


From cc9881ce371dc7ff3ef5404feda59566fabaf521 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Thu, 21 Sep 2006 14:31:13 -0500
Subject: [PATCH 0357/1063] [POWERPC] Demacrofy
 arch/powerpc/platforms/maple/pci.c

Noticed that the U3_*CFA macros have some typos:

#define U3_HT_CFA0(devfn, off)		\
		((((unsigned long)devfn) << 8) | offset)

(refers to offset rather than off)

#define U3_AGP_CFA0(devfn, off)	\
	((1 << (unsigned long)PCI_SLOT(dev_fn)) \
	| (((unsigned long)PCI_FUNC(dev_fn)) << 8) \

(refers to dev_fn rather than devfn)

Things happen to work, but there doesn't seem to be any reason these
shouldn't be functions.  Overall behavior should be unchanged.

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/maple/pci.c | 44 +++++++++++++++++-------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index ec5c14f5ba491..c3aa46b8e2b9d 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -81,16 +81,20 @@ static void __init fixup_bus_range(struct device_node *bridge)
 }
 
 
-#define U3_AGP_CFA0(devfn, off)	\
-	((1 << (unsigned long)PCI_SLOT(dev_fn)) \
-	| (((unsigned long)PCI_FUNC(dev_fn)) << 8) \
-	| (((unsigned long)(off)) & 0xFCUL))
+static unsigned long u3_agp_cfa0(u8 devfn, u8 off)
+{
+	return (1 << (unsigned long)PCI_SLOT(devfn)) |
+		((unsigned long)PCI_FUNC(devfn) << 8) |
+		((unsigned long)off & 0xFCUL);
+}
 
-#define U3_AGP_CFA1(bus, devfn, off)	\
-	((((unsigned long)(bus)) << 16) \
-	|(((unsigned long)(devfn)) << 8) \
-	|(((unsigned long)(off)) & 0xFCUL) \
-	|1UL)
+static unsigned long u3_agp_cfa1(u8 bus, u8 devfn, u8 off)
+{
+	return ((unsigned long)bus << 16) |
+		((unsigned long)devfn << 8) |
+		((unsigned long)off & 0xFCUL) |
+		1UL;
+}
 
 static unsigned long u3_agp_cfg_access(struct pci_controller* hose,
 				       u8 bus, u8 dev_fn, u8 offset)
@@ -100,9 +104,9 @@ static unsigned long u3_agp_cfg_access(struct pci_controller* hose,
 	if (bus == hose->first_busno) {
 		if (dev_fn < (11 << 3))
 			return 0;
-		caddr = U3_AGP_CFA0(dev_fn, offset);
+		caddr = u3_agp_cfa0(dev_fn, offset);
 	} else
-		caddr = U3_AGP_CFA1(bus, dev_fn, offset);
+		caddr = u3_agp_cfa1(bus, dev_fn, offset);
 
 	/* Uninorth will return garbage if we don't read back the value ! */
 	do {
@@ -184,13 +188,15 @@ static struct pci_ops u3_agp_pci_ops =
 	u3_agp_write_config
 };
 
+static unsigned long u3_ht_cfa0(u8 devfn, u8 off)
+{
+	return (devfn << 8) | off;
+}
 
-#define U3_HT_CFA0(devfn, off)		\
-		((((unsigned long)devfn) << 8) | offset)
-#define U3_HT_CFA1(bus, devfn, off)	\
-		(U3_HT_CFA0(devfn, off) \
-		+ (((unsigned long)bus) << 16) \
-		+ 0x01000000UL)
+static unsigned long u3_ht_cfa1(u8 bus, u8 devfn, u8 off)
+{
+	return u3_ht_cfa0(devfn, off) + (bus << 16) + 0x01000000UL;
+}
 
 static unsigned long u3_ht_cfg_access(struct pci_controller* hose,
 				      u8 bus, u8 devfn, u8 offset)
@@ -198,9 +204,9 @@ static unsigned long u3_ht_cfg_access(struct pci_controller* hose,
 	if (bus == hose->first_busno) {
 		if (PCI_SLOT(devfn) == 0)
 			return 0;
-		return ((unsigned long)hose->cfg_data) + U3_HT_CFA0(devfn, offset);
+		return ((unsigned long)hose->cfg_data) + u3_ht_cfa0(devfn, offset);
 	} else
-		return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset);
+		return ((unsigned long)hose->cfg_data) + u3_ht_cfa1(bus, devfn, offset);
 }
 
 static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
-- 
GitLab


From 3d574abd59d49173ac3096a19575a2f7430505be Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Thu, 21 Sep 2006 13:11:52 -0500
Subject: [PATCH 0358/1063] [POWERPC] Include <asm/mmu.h> in
 arch/powerpc/sysdev/fsl_soc.h for phys_addr_t.

This patch causes fsl_soc.h to import the definition of phys_addr_t
itself, rather than relying on its includer to do so.

Signed-off-by: Scott Wood <scott@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/sysdev/fsl_soc.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
index c433d3f39eddc..5a3dd480d2fd8 100644
--- a/arch/powerpc/sysdev/fsl_soc.h
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -2,6 +2,8 @@
 #define __PPC_FSL_SOC_H
 #ifdef __KERNEL__
 
+#include <asm/mmu.h>
+
 extern phys_addr_t get_immrbase(void);
 
 #endif
-- 
GitLab


From e102926385b56e593b995ecc433f041b498a49e1 Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linas@austin.ibm.com>
Date: Thu, 21 Sep 2006 18:25:56 -0500
Subject: [PATCH 0359/1063] [POWERPC] EEH: Power4 systems sometimes need
 multiple resets.

On detection of an EEH error, some Power4 systems seem to occasionally
want to be reset twice before they report themselves as fully recovered.
This patch re-arranges the code to attempt additional resets if the first
one doesn't take.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/pseries/eeh.c | 36 ++++++++++++++++++----------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 4534886e3b4e5..84bc8f7e17ef7 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -478,7 +478,7 @@ eeh_slot_availability(struct pci_dn *pdn)
 
 	printk (KERN_ERR "EEH: Slot unavailable: rc=%d, rets=%d %d %d\n",
 		rc, rets[0], rets[1], rets[2]);
-	return -1;
+	return -2;
 }
 
 /**
@@ -546,11 +546,10 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
 	               BUID_HI(pdn->phb->buid),
 	               BUID_LO(pdn->phb->buid),
 	               state);
-	if (rc) {
-		printk (KERN_WARNING "EEH: Unable to reset the failed slot, (%d) #RST=%d dn=%s\n", 
+	if (rc)
+		printk (KERN_WARNING "EEH: Unable to reset the failed slot,"
+		        " (%d) #RST=%d dn=%s\n",
 		        rc, state, pdn->node->full_name);
-		return;
-	}
 }
 
 /**
@@ -560,11 +559,8 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
  *  Return 0 if success, else a non-zero value.
  */
 
-int
-rtas_set_slot_reset(struct pci_dn *pdn)
+static void __rtas_set_slot_reset(struct pci_dn *pdn)
 {
-	int i, rc;
-
 	rtas_pci_slot_reset (pdn, 1);
 
 	/* The PCI bus requires that the reset be held high for at least
@@ -585,17 +581,33 @@ rtas_set_slot_reset(struct pci_dn *pdn)
 	 * up traffic. */
 #define PCI_BUS_SETTLE_TIME_MSEC 1800
 	msleep (PCI_BUS_SETTLE_TIME_MSEC);
+}
+
+int rtas_set_slot_reset(struct pci_dn *pdn)
+{
+	int i, rc;
+
+	__rtas_set_slot_reset(pdn);
 
 	/* Now double check with the firmware to make sure the device is
 	 * ready to be used; if not, wait for recovery. */
 	for (i=0; i<10; i++) {
 		rc = eeh_slot_availability (pdn);
-		if (rc < 0)
-			printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n", rc, pdn->node->full_name);
 		if (rc == 0)
 			return 0;
-		if (rc < 0)
+
+		if (rc == -2) {
+			printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n",
+			        i, pdn->node->full_name);
+			__rtas_set_slot_reset(pdn);
+			continue;
+		}
+
+		if (rc < 0) {
+			printk (KERN_ERR "EEH: unrecoverable slot failure %s\n",
+			        pdn->node->full_name);
 			return -1;
+		}
 
 		msleep (rc+100);
 	}
-- 
GitLab


From 69917c26c840e7de94522bf90fb190de63bf92bd Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 22 Sep 2006 12:56:30 +1000
Subject: [PATCH 0360/1063] [POWERPC] Fix ohare IDE irq workaround on old
 powermacs

Looks like a workaround for old bogus OF bitrot...  This fixes it and
hence fixes boot on some performa machines.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/ide/ppc/pmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index fa46856e8068c..996c694341bc3 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1326,7 +1326,7 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match)
 	if (macio_irq_count(mdev) == 0) {
 		printk(KERN_WARNING "ide%d: no intrs for device %s, using 13\n",
 			i, mdev->ofdev.node->full_name);
-		irq = 13;
+		irq = irq_create_mapping(NULL, 13);
 	} else
 		irq = macio_irq(mdev, 0);
 
-- 
GitLab


From 187ef15268e638603dea55a91fdfa29feaed6d13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5vard=20Skinnemoen?= <hskinnemoen@atmel.com>
Date: Fri, 22 Sep 2006 10:07:08 +0100
Subject: [PATCH 0361/1063] [MTD] Unlock NOR flash automatically where
 necessary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce the MTD_STUPID_LOCK flag which indicates that the flash chip is
always locked after power-up, so all sectors need to be unlocked before it
is usable.

If this flag is set, and the chip provides an unlock() operation,
mtd_add_device will unlock the whole MTD device if it's writeable.  This
means that non-writeable partitions will stay locked.

Set MTD_STUPID_LOCK in fixup_use_atmel_lock() so that these chips will work
as expected.

Signed-off-by: HÃ¥vard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/chips/cfi_cmdset_0002.c |  1 +
 drivers/mtd/mtdcore.c               | 10 ++++++++++
 include/mtd/mtd-abi.h               |  1 +
 3 files changed, 12 insertions(+)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index a482e8922de10..702ae4cd8691c 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -212,6 +212,7 @@ static void fixup_use_atmel_lock(struct mtd_info *mtd, void *param)
 {
 	mtd->lock = cfi_atmel_lock;
 	mtd->unlock = cfi_atmel_unlock;
+	mtd->flags |= MTD_STUPID_LOCK;
 }
 
 static struct cfi_fixup cfi_fixup_table[] = {
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 168d3ba063c36..c4d26de74349e 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -57,6 +57,16 @@ int add_mtd_device(struct mtd_info *mtd)
 			mtd->index = i;
 			mtd->usecount = 0;
 
+			/* Some chips always power up locked. Unlock them now */
+			if ((mtd->flags & MTD_WRITEABLE)
+			    && (mtd->flags & MTD_STUPID_LOCK) && mtd->unlock) {
+				if (mtd->unlock(mtd, 0, mtd->size))
+					printk(KERN_WARNING
+					       "%s: unlock failed, "
+					       "writes may not work\n",
+					       mtd->name);
+			}
+
 			DEBUG(0, "mtd: Giving out device %d to %s\n",i, mtd->name);
 			/* No need to get a refcount on the module containing
 			   the notifier, since we hold the mtd_table_mutex */
diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index 1da3f7fa79934..b0a67b7ffdcd2 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -34,6 +34,7 @@ struct mtd_oob_buf {
 #define MTD_WRITEABLE		0x400	/* Device is writeable */
 #define MTD_BIT_WRITEABLE	0x800	/* Single bits can be flipped */
 #define MTD_NO_ERASE		0x1000	/* No erase necessary */
+#define MTD_STUPID_LOCK		0x2000	/* Always locked after reset */
 
 // Some common devices / combinations of capabilities
 #define MTD_CAP_ROM		0
-- 
GitLab


From cbc88ba83ff772d8c47d95ba0fef38ad888d6fcf Mon Sep 17 00:00:00 2001
From: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
Date: Fri, 22 Sep 2006 10:09:44 +0100
Subject: [PATCH 0362/1063] [MTD NAND] Remove old code in au1550nd.c

Signed-off-by: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/nand/au1550nd.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c
index 31228334da124..09e421a968934 100644
--- a/drivers/mtd/nand/au1550nd.c
+++ b/drivers/mtd/nand/au1550nd.c
@@ -21,18 +21,7 @@
 #include <linux/version.h>
 #include <asm/io.h>
 
-/* fixme: this is ugly */
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 0)
 #include <asm/mach-au1x00/au1xxx.h>
-#else
-#include <asm/au1000.h>
-#ifdef CONFIG_MIPS_PB1550
-#include <asm/pb1550.h>
-#endif
-#ifdef CONFIG_MIPS_DB1550
-#include <asm/db1x00.h>
-#endif
-#endif
 
 /*
  * MTD structure for NAND controller
-- 
GitLab


From f2dd117fa51dd8fc071b1352254c0d14d2399b0a Mon Sep 17 00:00:00 2001
From: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
Date: Fri, 22 Sep 2006 10:13:46 +0100
Subject: [PATCH 0363/1063] [JFFS2] Remove unneeded ifdefs from jffs2_fs_i.h

We certainly don't need the check for Linux version > 2.5.2, and in fact
we can also live without the __ECOS check, since we can just add it back
in the eCos git tree which is automatically derived from the Linux fs/jffs2
subdirectory in the upstream git tree.

Signed-off-by: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 fs/jffs2/jffs2_fs_i.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 2e0cc8e00b858..3a566077ac955 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -41,11 +41,7 @@ struct jffs2_inode_info {
 
 	uint16_t flags;
 	uint8_t usercompr;
-#if !defined (__ECOS)
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2)
 	struct inode vfs_inode;
-#endif
-#endif
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
 	struct posix_acl *i_acl_access;
 	struct posix_acl *i_acl_default;
-- 
GitLab


From f40a6f1cc7fe522e51e1ac4c7ab3035a434f2cef Mon Sep 17 00:00:00 2001
From: Brian Walsh <brian@walsh.ws>
Date: Fri, 22 Sep 2006 10:16:16 +0100
Subject: [PATCH 0364/1063] [MTD] Fix ixp4xx partition parsing.

If the amount of flash is not divisible by 2 then the mask in
parse_mtd_partitions would fail to work as designed.  Passing in the base
address corrects this problem.

Signed-off-by: Brian Walsh <brian@walsh.ws>
Cc: Deepak Sanexa <dsanexa@mvista.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/maps/ixp4xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/maps/ixp4xx.c b/drivers/mtd/maps/ixp4xx.c
index 986c586283907..7a828e3e6446d 100644
--- a/drivers/mtd/maps/ixp4xx.c
+++ b/drivers/mtd/maps/ixp4xx.c
@@ -253,7 +253,7 @@ static int ixp4xx_flash_probe(struct platform_device *dev)
 	/* Use the fast version */
 	info->map.write = ixp4xx_write16,
 
-	err = parse_mtd_partitions(info->mtd, probes, &info->partitions, 0);
+	err = parse_mtd_partitions(info->mtd, probes, &info->partitions, dev->resource->start);
 	if (err > 0) {
 		err = add_mtd_partitions(info->mtd, info->partitions, err);
 		if(err)
-- 
GitLab


From 668040fcd1e06fc3e68a92708cbdfa5a52c37d3c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Fri, 22 Sep 2006 10:17:28 +0100
Subject: [PATCH 0365/1063] [MTD] blkdev helper code: fix printk format warning

Fix printk format warning(s):
drivers/mtd/mtd_blkdevs.c:72: warning: long int format, different type arg (arg 2)

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/mtd_blkdevs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 458d3c8ae1eee..302bed5f08452 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -69,7 +69,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 		return 1;
 
 	default:
-		printk(KERN_NOTICE "Unknown request %ld\n", rq_data_dir(req));
+		printk(KERN_NOTICE "Unknown request %d\n", rq_data_dir(req));
 		return 0;
 	}
 }
-- 
GitLab


From dd8e9ed6ed544e2b924429d29cd2a6b55590109b Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 22 Sep 2006 10:19:20 +0100
Subject: [PATCH 0366/1063] [MTD] Switch to pci_get_device and do ref counting

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/devices/pmc551.c       | 11 ++++++++++-
 drivers/mtd/maps/amd76xrom.c       |  5 +++--
 drivers/mtd/maps/ichxrom.c         |  3 ++-
 drivers/mtd/maps/l440gx.c          | 12 ++++++++++--
 drivers/mtd/maps/scx200_docflash.c |  9 +++++++--
 5 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index 2c01497087398..a03a8a79e5c55 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -674,7 +674,7 @@ static int __init init_pmc551(void)
          */
         for( count = 0; count < MAX_MTD_DEVICES; count++ ) {
 
-                if ((PCI_Device = pci_find_device(PCI_VENDOR_ID_V3_SEMI,
+                if ((PCI_Device = pci_get_device(PCI_VENDOR_ID_V3_SEMI,
                                                   PCI_DEVICE_ID_V3_SEMI_V370PDC,
 						  PCI_Device ) ) == NULL) {
                         break;
@@ -783,6 +783,10 @@ static int __init init_pmc551(void)
                         kfree(mtd);
                         break;
                 }
+
+                /* Keep a reference as the add_mtd_device worked */
+                pci_dev_get(PCI_Device);
+
                 printk(KERN_NOTICE "Registered pmc551 memory device.\n");
                 printk(KERN_NOTICE "Mapped %dM of memory from 0x%p to 0x%p\n",
                        priv->asize>>20,
@@ -797,6 +801,10 @@ static int __init init_pmc551(void)
 		found++;
         }
 
+        /* Exited early, reference left over */
+        if (PCI_Device)
+        	pci_dev_put(PCI_Device);
+
         if( !pmc551list ) {
                 printk(KERN_NOTICE "pmc551: not detected\n");
                 return -ENODEV;
@@ -824,6 +832,7 @@ static void __exit cleanup_pmc551(void)
 				priv->asize>>20, priv->start);
 			iounmap (priv->start);
 		}
+		pci_dev_put(priv->dev);
 
 		kfree (mtd->priv);
 		del_mtd_device (mtd);
diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index 447955be18af7..797caffb20b13 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -57,6 +57,7 @@ static void amd76xrom_cleanup(struct amd76xrom_window *window)
 		/* Disable writes through the rom window */
 		pci_read_config_byte(window->pdev, 0x40, &byte);
 		pci_write_config_byte(window->pdev, 0x40, byte & ~1);
+		pci_dev_put(window->pdev);
 	}
 
 	/* Free all of the mtd devices */
@@ -91,7 +92,7 @@ static int __devinit amd76xrom_init_one (struct pci_dev *pdev,
 	struct amd76xrom_map_info *map = NULL;
 	unsigned long map_top;
 
-	/* Remember the pci dev I find the window in */
+	/* Remember the pci dev I find the window in - already have a ref */
 	window->pdev = pdev;
 
 	/* Assume the rom window is properly setup, and find it's size */
@@ -302,7 +303,7 @@ static int __init init_amd76xrom(void)
 	struct pci_device_id *id;
 	pdev = NULL;
 	for(id = amd76xrom_pci_tbl; id->vendor; id++) {
-		pdev = pci_find_device(id->vendor, id->device, NULL);
+		pdev = pci_get_device(id->vendor, id->device, NULL);
 		if (pdev) {
 			break;
 		}
diff --git a/drivers/mtd/maps/ichxrom.c b/drivers/mtd/maps/ichxrom.c
index db4b570d874ab..2bb3e63606e5d 100644
--- a/drivers/mtd/maps/ichxrom.c
+++ b/drivers/mtd/maps/ichxrom.c
@@ -61,6 +61,7 @@ static void ichxrom_cleanup(struct ichxrom_window *window)
 	/* Disable writes through the rom window */
 	pci_read_config_word(window->pdev, BIOS_CNTL, &word);
 	pci_write_config_word(window->pdev, BIOS_CNTL, word & ~1);
+	pci_dev_put(window->pdev);
 
 	/* Free all of the mtd devices */
 	list_for_each_entry_safe(map, scratch, &window->maps, list) {
@@ -355,7 +356,7 @@ static int __init init_ichxrom(void)
 
 	pdev = NULL;
 	for (id = ichxrom_pci_tbl; id->vendor; id++) {
-		pdev = pci_find_device(id->vendor, id->device, NULL);
+		pdev = pci_get_device(id->vendor, id->device, NULL);
 		if (pdev) {
 			break;
 		}
diff --git a/drivers/mtd/maps/l440gx.c b/drivers/mtd/maps/l440gx.c
index 6b784ef5ee702..67620adf48114 100644
--- a/drivers/mtd/maps/l440gx.c
+++ b/drivers/mtd/maps/l440gx.c
@@ -61,14 +61,17 @@ static int __init init_l440gx(void)
 	struct resource *pm_iobase;
 	__u16 word;
 
-	dev = pci_find_device(PCI_VENDOR_ID_INTEL,
+	dev = pci_get_device(PCI_VENDOR_ID_INTEL,
 		PCI_DEVICE_ID_INTEL_82371AB_0, NULL);
 
-	pm_dev = pci_find_device(PCI_VENDOR_ID_INTEL,
+	pm_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
 		PCI_DEVICE_ID_INTEL_82371AB_3, NULL);
 
+	pci_dev_put(dev);
+
 	if (!dev || !pm_dev) {
 		printk(KERN_NOTICE "L440GX flash mapping: failed to find PIIX4 ISA bridge, cannot continue\n");
+		pci_dev_put(pm_dev);
 		return -ENODEV;
 	}
 
@@ -76,6 +79,7 @@ static int __init init_l440gx(void)
 
 	if (!l440gx_map.virt) {
 		printk(KERN_WARNING "Failed to ioremap L440GX flash region\n");
+		pci_dev_put(pm_dev);
 		return -ENOMEM;
 	}
 	simple_map_init(&l440gx_map);
@@ -99,8 +103,12 @@ static int __init init_l440gx(void)
 		pm_iobase->start += iobase & ~1;
 		pm_iobase->end += iobase & ~1;
 
+		pci_dev_put(pm_dev);
+
 		/* Allocate the resource region */
 		if (pci_assign_resource(pm_dev, PIIXE_IOBASE_RESOURCE) != 0) {
+			pci_dev_put(dev);
+			pci_dev_put(pm_dev);
 			printk(KERN_WARNING "Could not allocate pm iobase resource\n");
 			iounmap(l440gx_map.virt);
 			return -ENXIO;
diff --git a/drivers/mtd/maps/scx200_docflash.c b/drivers/mtd/maps/scx200_docflash.c
index 7391fd544e86f..5e2bce22f37ca 100644
--- a/drivers/mtd/maps/scx200_docflash.c
+++ b/drivers/mtd/maps/scx200_docflash.c
@@ -87,19 +87,23 @@ static int __init init_scx200_docflash(void)
 
 	printk(KERN_DEBUG NAME ": NatSemi SCx200 DOCCS Flash Driver\n");
 
-	if ((bridge = pci_find_device(PCI_VENDOR_ID_NS,
+	if ((bridge = pci_get_device(PCI_VENDOR_ID_NS,
 				      PCI_DEVICE_ID_NS_SCx200_BRIDGE,
 				      NULL)) == NULL)
 		return -ENODEV;
 
 	/* check that we have found the configuration block */
-	if (!scx200_cb_present())
+	if (!scx200_cb_present()) {
+		pci_dev_put(bridge);
 		return -ENODEV;
+	}
 
 	if (probe) {
 		/* Try to use the present flash mapping if any */
 		pci_read_config_dword(bridge, SCx200_DOCCS_BASE, &base);
 		pci_read_config_dword(bridge, SCx200_DOCCS_CTRL, &ctrl);
+		pci_dev_put(bridge);
+
 		pmr = inl(scx200_cb_base + SCx200_PMR);
 
 		if (base == 0
@@ -127,6 +131,7 @@ static int __init init_scx200_docflash(void)
 			return -ENOMEM;
 		}
 	} else {
+		pci_dev_put(bridge);
 		for (u = size; u > 1; u >>= 1)
 			;
 		if (u != 1) {
-- 
GitLab


From 25f0c659fe64832d8ee06aa623fffaad708dcf8b Mon Sep 17 00:00:00 2001
From: Amol Lad <amol@verismonetworks.com>
Date: Thu, 21 Sep 2006 18:12:43 +0530
Subject: [PATCH 0367/1063] ioremap balanced with iounmap for drivers/mtd
 subsystem

ioremap must be balanced by an iounmap and failing to do so can result
in a memory leak.

Tested (compilation only) with:
- allmodconfig
- Modifying drivers/mtd/maps/Kconfig and drivers/mtd/nand/Kconfig to
make sure that the changed file is compiling without warning

Signed-off-by: Amol Lad <amol@verismonetworks.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/maps/arctic-mtd.c     | 14 +++++++++++--
 drivers/mtd/maps/beech-mtd.c      | 14 +++++++++++--
 drivers/mtd/maps/cstm_mips_ixx.c  | 18 ++++++++++++++--
 drivers/mtd/maps/ebony.c          |  4 ++++
 drivers/mtd/maps/fortunet.c       |  3 +++
 drivers/mtd/maps/lasat.c          |  2 ++
 drivers/mtd/maps/nettel.c         | 34 ++++++++++++++++++++++++-------
 drivers/mtd/maps/ocotea.c         |  4 ++++
 drivers/mtd/maps/pcmciamtd.c      |  4 ++++
 drivers/mtd/maps/redwood.c        | 11 +++++++++-
 drivers/mtd/maps/sbc8240.c        | 11 +++++++++-
 drivers/mtd/maps/walnut.c         |  4 ++++
 drivers/mtd/nand/edb7312.c        |  3 +++
 drivers/mtd/nand/ppchameleonevb.c |  7 +++++++
 include/linux/utsrelease.h        |  1 +
 15 files changed, 119 insertions(+), 15 deletions(-)
 create mode 100644 include/linux/utsrelease.h

diff --git a/drivers/mtd/maps/arctic-mtd.c b/drivers/mtd/maps/arctic-mtd.c
index d95ae582fbe9a..642d96bc89198 100644
--- a/drivers/mtd/maps/arctic-mtd.c
+++ b/drivers/mtd/maps/arctic-mtd.c
@@ -96,6 +96,8 @@ static struct mtd_partition arctic_partitions[PARTITIONS] = {
 static int __init
 init_arctic_mtd(void)
 {
+	int err = 0;
+
 	printk("%s: 0x%08x at 0x%08x\n", NAME, SIZE, PADDR);
 
 	arctic_mtd_map.virt = ioremap(PADDR, SIZE);
@@ -109,12 +111,20 @@ init_arctic_mtd(void)
 	printk("%s: probing %d-bit flash bus\n", NAME, BUSWIDTH * 8);
 	arctic_mtd = do_map_probe("cfi_probe", &arctic_mtd_map);
 
-	if (!arctic_mtd)
+	if (!arctic_mtd) {
+		iounmap((void *) arctic_mtd_map.virt);
 		return -ENXIO;
+	}
 
 	arctic_mtd->owner = THIS_MODULE;
 
-	return add_mtd_partitions(arctic_mtd, arctic_partitions, PARTITIONS);
+	err = add_mtd_partitions(arctic_mtd, arctic_partitions, PARTITIONS);
+	if (err) {
+		printk("%s: add_mtd_partitions failed\n", NAME);
+		iounmap((void *) arctic_mtd_map.virt);
+	}
+
+	return err;
 }
 
 static void __exit
diff --git a/drivers/mtd/maps/beech-mtd.c b/drivers/mtd/maps/beech-mtd.c
index 5df7361d14079..a64b1a5ab3161 100644
--- a/drivers/mtd/maps/beech-mtd.c
+++ b/drivers/mtd/maps/beech-mtd.c
@@ -72,6 +72,8 @@ static struct mtd_partition beech_partitions[2] = {
 static int __init
 init_beech_mtd(void)
 {
+	int err = 0;
+
 	printk("%s: 0x%08x at 0x%08x\n", NAME, SIZE, PADDR);
 
 	beech_mtd_map.virt = ioremap(PADDR, SIZE);
@@ -86,12 +88,20 @@ init_beech_mtd(void)
 	printk("%s: probing %d-bit flash bus\n", NAME, BUSWIDTH * 8);
 	beech_mtd = do_map_probe("cfi_probe", &beech_mtd_map);
 
-	if (!beech_mtd)
+	if (!beech_mtd) {
+		iounmap((void *) beech_mtd_map.virt);
 		return -ENXIO;
+	}
 
 	beech_mtd->owner = THIS_MODULE;
 
-	return add_mtd_partitions(beech_mtd, beech_partitions, 2);
+	err = add_mtd_partitions(beech_mtd, beech_partitions, 2);
+	if (err) {
+		printk("%s: add_mtd_partitions failed\n", NAME);
+		iounmap((void *) beech_mtd_map.virt);
+	}
+
+	return err;
 }
 
 static void __exit
diff --git a/drivers/mtd/maps/cstm_mips_ixx.c b/drivers/mtd/maps/cstm_mips_ixx.c
index aa56defb94c80..d6bef100d69a2 100644
--- a/drivers/mtd/maps/cstm_mips_ixx.c
+++ b/drivers/mtd/maps/cstm_mips_ixx.c
@@ -171,7 +171,14 @@ int __init init_cstm_mips_ixx(void)
 		cstm_mips_ixx_map[i].phys = cstm_mips_ixx_board_desc[i].window_addr;
 		cstm_mips_ixx_map[i].virt = ioremap(cstm_mips_ixx_board_desc[i].window_addr, cstm_mips_ixx_board_desc[i].window_size);
 		if (!cstm_mips_ixx_map[i].virt) {
+			int j = 0;
 			printk(KERN_WARNING "Failed to ioremap\n");
+			for (j = 0; j < i; j++) {
+				if (cstm_mips_ixx_map[j].virt) {
+					iounmap((void *)cstm_mips_ixx_map[j].virt);
+					cstm_mips_ixx_map[j].virt = 0;
+				}
+			}
 			return -EIO;
 	        }
 		cstm_mips_ixx_map[i].name = cstm_mips_ixx_board_desc[i].name;
@@ -204,8 +211,15 @@ int __init init_cstm_mips_ixx(void)
 	                cstm_mips_ixx_map[i].map_priv_2 = (unsigned long)mymtd;
 		        add_mtd_partitions(mymtd, parts, cstm_mips_ixx_board_desc[i].num_partitions);
 		}
-		else
-	           return -ENXIO;
+		else {
+			for (i = 0; i < PHYSMAP_NUMBER; i++) {
+				if (cstm_mips_ixx_map[i].virt) {
+					iounmap((void *)cstm_mips_ixx_map[i].virt);
+					cstm_mips_ixx_map[i].virt = 0;
+				}
+			}
+			return -ENXIO;
+		}
 	}
 	return 0;
 }
diff --git a/drivers/mtd/maps/ebony.c b/drivers/mtd/maps/ebony.c
index 641e1dd8479e4..1488bb92f26fa 100644
--- a/drivers/mtd/maps/ebony.c
+++ b/drivers/mtd/maps/ebony.c
@@ -108,6 +108,7 @@ int __init init_ebony(void)
 					ARRAY_SIZE(ebony_small_partitions));
 	} else {
 		printk("map probe failed for flash\n");
+		iounmap(ebony_small_map.virt);
 		return -ENXIO;
 	}
 
@@ -117,6 +118,7 @@ int __init init_ebony(void)
 
 	if (!ebony_large_map.virt) {
 		printk("Failed to ioremap flash\n");
+		iounmap(ebony_small_map.virt);
 		return -EIO;
 	}
 
@@ -129,6 +131,8 @@ int __init init_ebony(void)
 					ARRAY_SIZE(ebony_large_partitions));
 	} else {
 		printk("map probe failed for flash\n");
+		iounmap(ebony_small_map.virt);
+		iounmap(ebony_large_map.virt);
 		return -ENXIO;
 	}
 
diff --git a/drivers/mtd/maps/fortunet.c b/drivers/mtd/maps/fortunet.c
index c6bf4e1219ef1..7c50c271651c7 100644
--- a/drivers/mtd/maps/fortunet.c
+++ b/drivers/mtd/maps/fortunet.c
@@ -218,8 +218,11 @@ int __init init_fortunet(void)
 				map_regions[ix].map_info.size);
 			if(!map_regions[ix].map_info.virt)
 			{
+				int j = 0;
 				printk(MTD_FORTUNET_PK "%s flash failed to ioremap!\n",
 					map_regions[ix].map_info.name);
+				for (j = 0 ; j < ix; j++)
+					iounmap(map_regions[j].map_info.virt);
 				return -ENXIO;
 			}
 			simple_map_init(&map_regions[ix].map_info);
diff --git a/drivers/mtd/maps/lasat.c b/drivers/mtd/maps/lasat.c
index 1c13d2dc0cdff..e343763210503 100644
--- a/drivers/mtd/maps/lasat.c
+++ b/drivers/mtd/maps/lasat.c
@@ -79,6 +79,7 @@ static int __init init_lasat(void)
 		return 0;
 	}
 
+	iounmap(lasat_map.virt);
 	return -ENXIO;
 }
 
@@ -89,6 +90,7 @@ static void __exit cleanup_lasat(void)
 		map_destroy(lasat_mtd);
 	}
 	if (lasat_map.virt) {
+		iounmap(lasat_map.virt);
 		lasat_map.virt = 0;
 	}
 }
diff --git a/drivers/mtd/maps/nettel.c b/drivers/mtd/maps/nettel.c
index 0994b5b2e3313..198e840ff6db9 100644
--- a/drivers/mtd/maps/nettel.c
+++ b/drivers/mtd/maps/nettel.c
@@ -277,6 +277,7 @@ int __init nettel_init(void)
 	nettel_amd_map.virt = ioremap_nocache(amdaddr, maxsize);
 	if (!nettel_amd_map.virt) {
 		printk("SNAPGEAR: failed to ioremap() BOOTCS\n");
+		iounmap(nettel_mmcrp);
 		return(-EIO);
 	}
 	simple_map_init(&nettel_amd_map);
@@ -337,7 +338,8 @@ int __init nettel_init(void)
 		nettel_amd_map.virt = NULL;
 #else
 		/* Only AMD flash supported */
-		return(-ENXIO);
+		rc = -ENXIO;
+		goto out_unmap2;
 #endif
 	}
 
@@ -361,14 +363,15 @@ int __init nettel_init(void)
 	nettel_intel_map.virt = ioremap_nocache(intel0addr, maxsize);
 	if (!nettel_intel_map.virt) {
 		printk("SNAPGEAR: failed to ioremap() ROMCS1\n");
-		return(-EIO);
+		rc = -EIO;
+		goto out_unmap2;
 	}
 	simple_map_init(&nettel_intel_map);
 
 	intel_mtd = do_map_probe("cfi_probe", &nettel_intel_map);
 	if (!intel_mtd) {
-		iounmap(nettel_intel_map.virt);
-		return(-ENXIO);
+		rc = -ENXIO;
+		goto out_unmap1;
 	}
 
 	/* Set PAR to the detected size */
@@ -394,13 +397,14 @@ int __init nettel_init(void)
 	nettel_intel_map.virt = ioremap_nocache(intel0addr, maxsize);
 	if (!nettel_intel_map.virt) {
 		printk("SNAPGEAR: failed to ioremap() ROMCS1/2\n");
-		return(-EIO);
+		rc = -EIO;
+		goto out_unmap2;
 	}
 
 	intel_mtd = do_map_probe("cfi_probe", &nettel_intel_map);
 	if (! intel_mtd) {
-		iounmap((void *) nettel_intel_map.virt);
-		return(-ENXIO);
+		rc = -ENXIO;
+		goto out_unmap1;
 	}
 
 	intel1size = intel_mtd->size - intel0size;
@@ -456,6 +460,18 @@ int __init nettel_init(void)
 #endif
 
 	return(rc);
+
+#ifdef CONFIG_MTD_CFI_INTELEXT
+out_unmap1:
+	iounmap((void *) nettel_intel_map.virt);
+#endif
+
+out_unmap2:
+	iounmap(nettel_mmcrp);
+	iounmap(nettel_amd_map.virt);
+
+	return(rc);
+		
 }
 
 /****************************************************************************/
@@ -469,6 +485,10 @@ void __exit nettel_cleanup(void)
 		del_mtd_partitions(amd_mtd);
 		map_destroy(amd_mtd);
 	}
+	if (nettel_mmcrp) {
+		iounmap(nettel_mmcrp);
+		nettel_mmcrp = NULL;
+	}
 	if (nettel_amd_map.virt) {
 		iounmap(nettel_amd_map.virt);
 		nettel_amd_map.virt = NULL;
diff --git a/drivers/mtd/maps/ocotea.c b/drivers/mtd/maps/ocotea.c
index 2f07602ba9408..5522eac8c9808 100644
--- a/drivers/mtd/maps/ocotea.c
+++ b/drivers/mtd/maps/ocotea.c
@@ -97,6 +97,7 @@ int __init init_ocotea(void)
 					ARRAY_SIZE(ocotea_small_partitions));
 	} else {
 		printk("map probe failed for flash\n");
+		iounmap(ocotea_small_map.virt);
 		return -ENXIO;
 	}
 
@@ -106,6 +107,7 @@ int __init init_ocotea(void)
 
 	if (!ocotea_large_map.virt) {
 		printk("Failed to ioremap flash\n");
+		iounmap(ocotea_small_map.virt);
 		return -EIO;
 	}
 
@@ -118,6 +120,8 @@ int __init init_ocotea(void)
 					ARRAY_SIZE(ocotea_large_partitions));
 	} else {
 		printk("map probe failed for flash\n");
+		iounmap(ocotea_small_map.virt);
+		iounmap(ocotea_large_map.virt);
 		return -ENXIO;
 	}
 
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index c861134cbc48d..995347b1beba5 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -602,6 +602,10 @@ static int pcmciamtd_config(struct pcmcia_device *link)
 	ret = pcmcia_request_configuration(link, &link->conf);
 	if(ret != CS_SUCCESS) {
 		cs_error(link, RequestConfiguration, ret);
+		if (dev->win_base) {
+			iounmap(dev->win_base);
+			dev->win_base = NULL;
+		}
 		return -ENODEV;
 	}
 
diff --git a/drivers/mtd/maps/redwood.c b/drivers/mtd/maps/redwood.c
index ec8fdae1dd99c..2257d2b500c00 100644
--- a/drivers/mtd/maps/redwood.c
+++ b/drivers/mtd/maps/redwood.c
@@ -126,6 +126,8 @@ static struct mtd_info *redwood_mtd;
 
 int __init init_redwood_flash(void)
 {
+	int err = 0;
+
 	printk(KERN_NOTICE "redwood: flash mapping: %x at %x\n",
 			WINDOW_SIZE, WINDOW_ADDR);
 
@@ -141,11 +143,18 @@ int __init init_redwood_flash(void)
 
 	if (redwood_mtd) {
 		redwood_mtd->owner = THIS_MODULE;
-		return add_mtd_partitions(redwood_mtd,
+		err = add_mtd_partitions(redwood_mtd,
 				redwood_flash_partitions,
 				NUM_REDWOOD_FLASH_PARTITIONS);
+		if (err) {
+			printk("init_redwood_flash: add_mtd_partitions failed\n");
+			iounmap(redwood_flash_map.virt);
+		}
+		return err;
+
 	}
 
+	iounmap(redwood_flash_map.virt);
 	return -ENXIO;
 }
 
diff --git a/drivers/mtd/maps/sbc8240.c b/drivers/mtd/maps/sbc8240.c
index 7d0fcf8f4f338..b8c1331b7a04e 100644
--- a/drivers/mtd/maps/sbc8240.c
+++ b/drivers/mtd/maps/sbc8240.c
@@ -156,7 +156,7 @@ int __init init_sbc8240_mtd (void)
 	};
 
 	int devicesfound = 0;
-	int i;
+	int i,j;
 
 	for (i = 0; i < NUM_FLASH_BANKS; i++) {
 		printk (KERN_NOTICE MSG_PREFIX
@@ -166,6 +166,10 @@ int __init init_sbc8240_mtd (void)
 			(unsigned long) ioremap (pt[i].addr, pt[i].size);
 		if (!sbc8240_map[i].map_priv_1) {
 			printk (MSG_PREFIX "failed to ioremap\n");
+			for (j = 0; j < i; j++) {
+				iounmap((void *) sbc8240_map[j].map_priv_1);
+				sbc8240_map[j].map_priv_1 = 0;
+			}
 			return -EIO;
 		}
 		simple_map_init(&sbc8240_mtd[i]);
@@ -175,6 +179,11 @@ int __init init_sbc8240_mtd (void)
 		if (sbc8240_mtd[i]) {
 			sbc8240_mtd[i]->module = THIS_MODULE;
 			devicesfound++;
+		} else {
+			if (sbc8240_map[i].map_priv_1) {
+				iounmap((void *) sbc8240_map[i].map_priv_1);
+				sbc8240_map[i].map_priv_1 = 0;
+			}
 		}
 	}
 
diff --git a/drivers/mtd/maps/walnut.c b/drivers/mtd/maps/walnut.c
index ec80eec376bfc..ca932122fb640 100644
--- a/drivers/mtd/maps/walnut.c
+++ b/drivers/mtd/maps/walnut.c
@@ -68,6 +68,7 @@ int __init init_walnut(void)
 
 	if (WALNUT_FLASH_ONBD_N(fpga_brds1)) {
 		printk("The on-board flash is disabled (U79 sw 5)!");
+		iounmap(fpga_status_adr);
 		return -EIO;
 	}
 	if (WALNUT_FLASH_SRAM_SEL(fpga_brds1))
@@ -81,6 +82,7 @@ int __init init_walnut(void)
 
 	if (!walnut_map.virt) {
 		printk("Failed to ioremap flash.\n");
+		iounmap(fpga_status_adr);
 		return -EIO;
 	}
 
@@ -93,9 +95,11 @@ int __init init_walnut(void)
 					ARRAY_SIZE(walnut_partitions));
 	} else {
 		printk("map probe failed for flash\n");
+		iounmap(fpga_status_adr);
 		return -ENXIO;
 	}
 
+	iounmap(fpga_status_adr);
 	return 0;
 }
 
diff --git a/drivers/mtd/nand/edb7312.c b/drivers/mtd/nand/edb7312.c
index 516c0e5e564cb..12017f3c6bd68 100644
--- a/drivers/mtd/nand/edb7312.c
+++ b/drivers/mtd/nand/edb7312.c
@@ -198,6 +198,9 @@ static void __exit ep7312_cleanup(void)
 	/* Release resources, unregister device */
 	nand_release(ap7312_mtd);
 
+	/* Release io resource */
+	iounmap((void *)this->IO_ADDR_R);
+
 	/* Free the MTD device structure */
 	kfree(ep7312_mtd);
 }
diff --git a/drivers/mtd/nand/ppchameleonevb.c b/drivers/mtd/nand/ppchameleonevb.c
index 22fa65c12ab9f..eb7d4d443deb1 100644
--- a/drivers/mtd/nand/ppchameleonevb.c
+++ b/drivers/mtd/nand/ppchameleonevb.c
@@ -276,6 +276,7 @@ static int __init ppchameleonevb_init(void)
 	/* Scan to find existence of the device (it could not be mounted) */
 	if (nand_scan(ppchameleon_mtd, 1)) {
 		iounmap((void *)ppchameleon_fio_base);
+		ppchameleon_fio_base = NULL;
 		kfree(ppchameleon_mtd);
 		goto nand_evb_init;
 	}
@@ -314,6 +315,8 @@ static int __init ppchameleonevb_init(void)
 	ppchameleonevb_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL);
 	if (!ppchameleonevb_mtd) {
 		printk("Unable to allocate PPChameleonEVB NAND MTD device structure.\n");
+		if (ppchameleon_fio_base)
+			iounmap(ppchameleon_fio_base);
 		return -ENOMEM;
 	}
 
@@ -322,6 +325,8 @@ static int __init ppchameleonevb_init(void)
 	if (!ppchameleonevb_fio_base) {
 		printk("ioremap PPChameleonEVB NAND flash failed\n");
 		kfree(ppchameleonevb_mtd);
+		if (ppchameleon_fio_base)
+			iounmap(ppchameleon_fio_base);
 		return -EIO;
 	}
 
@@ -378,6 +383,8 @@ static int __init ppchameleonevb_init(void)
 	if (nand_scan(ppchameleonevb_mtd, 1)) {
 		iounmap((void *)ppchameleonevb_fio_base);
 		kfree(ppchameleonevb_mtd);
+		if (ppchameleon_fio_base)
+			iounmap(ppchameleon_fio_base);
 		return -ENXIO;
 	}
 #ifdef CONFIG_MTD_PARTITIONS
diff --git a/include/linux/utsrelease.h b/include/linux/utsrelease.h
new file mode 100644
index 0000000000000..7fe1de0ad4c17
--- /dev/null
+++ b/include/linux/utsrelease.h
@@ -0,0 +1 @@
+#define UTS_RELEASE "2.6.18"
-- 
GitLab


From 17c2dae3aaff9b1e5d83996a5f098ad693f3aeca Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 21 Sep 2006 23:16:48 +0200
Subject: [PATCH 0368/1063] [MTD] physmap: add power management support

Implement PM handling for physmap.  Idea from Steven Scholz, patch
by David Anders.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/maps/physmap.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 7799a25a7f2ae..bc7cc71788bc2 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -158,9 +158,42 @@ static int physmap_flash_probe(struct platform_device *dev)
 	return err;
 }
 
+#ifdef CONFIG_PM
+static int physmap_flash_suspend(struct platform_device *dev, pm_message_t state)
+{
+	struct physmap_flash_info *info = platform_get_drvdata(dev);
+	int ret = 0;
+
+	if (info)
+		ret = info->mtd->suspend(info->mtd);
+
+	return ret;
+}
+
+static int physmap_flash_resume(struct platform_device *dev)
+{
+	struct physmap_flash_info *info = platform_get_drvdata(dev);
+	if (info)
+		info->mtd->resume(info->mtd);
+	return 0;
+}
+
+static void physmap_flash_shutdown(struct platform_device *dev)
+{
+	struct physmap_flash_info *info = platform_get_drvdata(dev);
+	if (info && info->mtd->suspend(info->mtd) == 0)
+		info->mtd->resume(info->mtd);
+}
+#endif
+
 static struct platform_driver physmap_flash_driver = {
 	.probe		= physmap_flash_probe,
 	.remove		= physmap_flash_remove,
+#ifdef CONFIG_PM
+	.suspend	= physmap_flash_suspend,
+	.resume		= physmap_flash_resume,
+	.shutdown	= physmap_flash_shutdown,
+#endif
 	.driver		= {
 		.name	= "physmap-flash",
 	},
-- 
GitLab


From 6a545a0d6021a4d759ba6d0c1082d1abf8d64c84 Mon Sep 17 00:00:00 2001
From: Frank Haverkamp <haver@vnet.ibm.com>
Date: Wed, 20 Sep 2006 17:24:52 +0200
Subject: [PATCH 0369/1063] [MTD NAND] Fix in typo ndfc.c causing wrong ECC
 layout

Due to this typo, a wrong ECC layout table is chosen.

Signed-off-by: Frank Haverkamp <haver@vnet.ibm.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/nand/ndfc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c
index e5bd88f2d560a..039c759cfbfc5 100644
--- a/drivers/mtd/nand/ndfc.c
+++ b/drivers/mtd/nand/ndfc.c
@@ -168,7 +168,7 @@ static void ndfc_chip_init(struct ndfc_nand_mtd *mtd)
 	chip->ecc.mode = NAND_ECC_HW;
 	chip->ecc.size = 256;
 	chip->ecc.bytes = 3;
-	chip->ecclayout = mtd->pl_chip->ecclayout;
+	chip->ecclayout = chip->ecc.layout = mtd->pl_chip->ecclayout;
 	mtd->mtd.priv = chip;
 	mtd->mtd.owner = THIS_MODULE;
 }
-- 
GitLab


From e417fcfb857b809e5dabc9b252ad70f090d553d1 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 21 Sep 2006 03:47:48 +0200
Subject: [PATCH 0370/1063] [MTD] Remove iq80310 map driver

The iq80310 mtd map driver depends on ARCH_IQ80310, which isn't
defined anywhere in the tree (as we don't have 80310 support), and
furthermore, everything the driver does can be done with physmap
instead.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/maps/Kconfig   |   8 ---
 drivers/mtd/maps/Makefile  |   1 -
 drivers/mtd/maps/iq80310.c | 118 -------------------------------------
 3 files changed, 127 deletions(-)
 delete mode 100644 drivers/mtd/maps/iq80310.c

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 64d1b6a6c920b..24747bdc3e19f 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -447,14 +447,6 @@ config MTD_DC21285
 	  21285 bridge used with Intel's StrongARM processors. More info at
 	  <http://www.intel.com/design/bridge/docs/21285_documentation.htm>.
 
-config MTD_IQ80310
-	tristate "CFI Flash device mapped on the XScale IQ80310 board"
-	depends on MTD_CFI && ARCH_IQ80310
-	help
-	  This enables access routines for the flash chips on the Intel XScale
-	  IQ80310 evaluation board. If you have one of these boards and would
-	  like to use the flash chips on it, say 'Y'.
-
 config MTD_IXP4XX
 	tristate "CFI Flash device mapped on Intel IXP4xx based systems"
 	depends on MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP4XX
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index ab71f172eb770..191c1928bbeca 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -15,7 +15,6 @@ obj-$(CONFIG_MTD_CFI_FLAGADM)	+= cfi_flagadm.o
 obj-$(CONFIG_MTD_CSTM_MIPS_IXX)	+= cstm_mips_ixx.o
 obj-$(CONFIG_MTD_DC21285)	+= dc21285.o
 obj-$(CONFIG_MTD_DILNETPC)	+= dilnetpc.o
-obj-$(CONFIG_MTD_IQ80310)	+= iq80310.o
 obj-$(CONFIG_MTD_L440GX)	+= l440gx.o
 obj-$(CONFIG_MTD_AMD76XROM)	+= amd76xrom.o
 obj-$(CONFIG_MTD_ICHXROM)	+= ichxrom.o
diff --git a/drivers/mtd/maps/iq80310.c b/drivers/mtd/maps/iq80310.c
deleted file mode 100644
index 62d9e87d84e28..0000000000000
--- a/drivers/mtd/maps/iq80310.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * $Id: iq80310.c,v 1.21 2005/11/07 11:14:27 gleixner Exp $
- *
- * Mapping for the Intel XScale IQ80310 evaluation board
- *
- * Author:	Nicolas Pitre
- * Copyright:	(C) 2001 MontaVista Software Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <asm/io.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-
-
-#define WINDOW_ADDR 	0
-#define WINDOW_SIZE 	8*1024*1024
-#define BUSWIDTH 	1
-
-static struct mtd_info *mymtd;
-
-static struct map_info iq80310_map = {
-	.name = "IQ80310 flash",
-	.size = WINDOW_SIZE,
-	.bankwidth = BUSWIDTH,
-	.phys = WINDOW_ADDR
-};
-
-static struct mtd_partition iq80310_partitions[4] = {
-	{
-		.name =		"Firmware",
-		.size =		0x00080000,
-		.offset =	0,
-		.mask_flags =	MTD_WRITEABLE  /* force read-only */
-	},{
-		.name =		"Kernel",
-		.size =		0x000a0000,
-		.offset =	0x00080000,
-	},{
-		.name =		"Filesystem",
-		.size =		0x00600000,
-		.offset =	0x00120000
-	},{
-		.name =		"RedBoot",
-		.size =		0x000e0000,
-		.offset =	0x00720000,
-		.mask_flags =	MTD_WRITEABLE
-	}
-};
-
-static struct mtd_info *mymtd;
-static struct mtd_partition *parsed_parts;
-static const char *probes[] = { "RedBoot", "cmdlinepart", NULL };
-
-static int __init init_iq80310(void)
-{
-	struct mtd_partition *parts;
-	int nb_parts = 0;
-	int parsed_nr_parts = 0;
-	int ret;
-
-	iq80310_map.virt = ioremap(WINDOW_ADDR, WINDOW_SIZE);
-	if (!iq80310_map.virt) {
-		printk("Failed to ioremap\n");
-		return -EIO;
-	}
-	simple_map_init(&iq80310_map);
-
-	mymtd = do_map_probe("cfi_probe", &iq80310_map);
-	if (!mymtd) {
-		iounmap((void *)iq80310_map.virt);
-		return -ENXIO;
-	}
-	mymtd->owner = THIS_MODULE;
-
-	ret = parse_mtd_partitions(mymtd, probes, &parsed_parts, 0);
-
-	if (ret > 0)
-		parsed_nr_parts = ret;
-
-	if (parsed_nr_parts > 0) {
-		parts = parsed_parts;
-		nb_parts = parsed_nr_parts;
-	} else {
-		parts = iq80310_partitions;
-		nb_parts = ARRAY_SIZE(iq80310_partitions);
-	}
-	add_mtd_partitions(mymtd, parts, nb_parts);
-	return 0;
-}
-
-static void __exit cleanup_iq80310(void)
-{
-	if (mymtd) {
-		del_mtd_partitions(mymtd);
-		map_destroy(mymtd);
-		kfree(parsed_parts);
-	}
-	if (iq80310_map.virt)
-		iounmap((void *)iq80310_map.virt);
-}
-
-module_init(init_iq80310);
-module_exit(cleanup_iq80310);
-
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Nicolas Pitre <nico@cam.org>");
-MODULE_DESCRIPTION("MTD map driver for Intel XScale IQ80310 evaluation board");
-- 
GitLab


From cdf0a7d16980858e72f5d26bfe48abf01112fab5 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Tue, 19 Sep 2006 21:55:06 +0200
Subject: [PATCH 0371/1063] [MTD] pmc551 whitespace cleanup

Spaces were used for indent, there was more than 80 columns per line. Get
rid of that stuff.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/devices/pmc551.c | 1163 +++++++++++++++++-----------------
 1 file changed, 596 insertions(+), 567 deletions(-)

diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index a03a8a79e5c55..4d4023601d22a 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -4,82 +4,82 @@
  * PMC551 PCI Mezzanine Ram Device
  *
  * Author:
- *       Mark Ferrell <mferrell@mvista.com>
- *       Copyright 1999,2000 Nortel Networks
+ *	Mark Ferrell <mferrell@mvista.com>
+ *	Copyright 1999,2000 Nortel Networks
  *
  * License:
- *	 As part of this driver was derived from the slram.c driver it
- *	 falls under the same license, which is GNU General Public
- *	 License v2
+ *	As part of this driver was derived from the slram.c driver it
+ *	falls under the same license, which is GNU General Public
+ *	License v2
  *
  * Description:
- *	 This driver is intended to support the PMC551 PCI Ram device
- *	 from Ramix Inc.  The PMC551 is a PMC Mezzanine module for
- *	 cPCI embedded systems.  The device contains a single SROM
- *	 that initially programs the V370PDC chipset onboard the
- *	 device, and various banks of DRAM/SDRAM onboard.  This driver
- *	 implements this PCI Ram device as an MTD (Memory Technology
- *	 Device) so that it can be used to hold a file system, or for
- *	 added swap space in embedded systems.  Since the memory on
- *	 this board isn't as fast as main memory we do not try to hook
- *	 it into main memory as that would simply reduce performance
- *	 on the system.  Using it as a block device allows us to use
- *	 it as high speed swap or for a high speed disk device of some
- *	 sort.  Which becomes very useful on diskless systems in the
- *	 embedded market I might add.
+ *	This driver is intended to support the PMC551 PCI Ram device
+ *	from Ramix Inc.  The PMC551 is a PMC Mezzanine module for
+ *	cPCI embedded systems.  The device contains a single SROM
+ *	that initially programs the V370PDC chipset onboard the
+ *	device, and various banks of DRAM/SDRAM onboard.  This driver
+ *	implements this PCI Ram device as an MTD (Memory Technology
+ *	Device) so that it can be used to hold a file system, or for
+ *	added swap space in embedded systems.  Since the memory on
+ *	this board isn't as fast as main memory we do not try to hook
+ *	it into main memory as that would simply reduce performance
+ *	on the system.  Using it as a block device allows us to use
+ *	it as high speed swap or for a high speed disk device of some
+ *	sort.  Which becomes very useful on diskless systems in the
+ *	embedded market I might add.
  *
  * Notes:
- *	 Due to what I assume is more buggy SROM, the 64M PMC551 I
- *	 have available claims that all 4 of it's DRAM banks have 64M
- *	 of ram configured (making a grand total of 256M onboard).
- *	 This is slightly annoying since the BAR0 size reflects the
- *	 aperture size, not the dram size, and the V370PDC supplies no
- *	 other method for memory size discovery.  This problem is
- *	 mostly only relevant when compiled as a module, as the
- *	 unloading of the module with an aperture size smaller then
- *	 the ram will cause the driver to detect the onboard memory
- *	 size to be equal to the aperture size when the module is
- *	 reloaded.  Soooo, to help, the module supports an msize
- *	 option to allow the specification of the onboard memory, and
- *	 an asize option, to allow the specification of the aperture
- *	 size.  The aperture must be equal to or less then the memory
- *	 size, the driver will correct this if you screw it up.  This
- *	 problem is not relevant for compiled in drivers as compiled
- *	 in drivers only init once.
+ *	Due to what I assume is more buggy SROM, the 64M PMC551 I
+ *	have available claims that all 4 of it's DRAM banks have 64M
+ *	of ram configured (making a grand total of 256M onboard).
+ *	This is slightly annoying since the BAR0 size reflects the
+ *	aperture size, not the dram size, and the V370PDC supplies no
+ *	other method for memory size discovery.  This problem is
+ *	mostly only relevant when compiled as a module, as the
+ *	unloading of the module with an aperture size smaller then
+ *	the ram will cause the driver to detect the onboard memory
+ *	size to be equal to the aperture size when the module is
+ *	reloaded.  Soooo, to help, the module supports an msize
+ *	option to allow the specification of the onboard memory, and
+ *	an asize option, to allow the specification of the aperture
+ *	size.  The aperture must be equal to or less then the memory
+ *	size, the driver will correct this if you screw it up.  This
+ *	problem is not relevant for compiled in drivers as compiled
+ *	in drivers only init once.
  *
  * Credits:
- *       Saeed Karamooz <saeed@ramix.com> of Ramix INC. for the
- *       initial example code of how to initialize this device and for
- *       help with questions I had concerning operation of the device.
+ *	Saeed Karamooz <saeed@ramix.com> of Ramix INC. for the
+ *	initial example code of how to initialize this device and for
+ *	help with questions I had concerning operation of the device.
  *
- *       Most of the MTD code for this driver was originally written
- *       for the slram.o module in the MTD drivers package which
- *       allows the mapping of system memory into an MTD device.
- *       Since the PMC551 memory module is accessed in the same
- *       fashion as system memory, the slram.c code became a very nice
- *       fit to the needs of this driver.  All we added was PCI
- *       detection/initialization to the driver and automatically figure
- *       out the size via the PCI detection.o, later changes by Corey
- *       Minyard set up the card to utilize a 1M sliding apature.
+ *	Most of the MTD code for this driver was originally written
+ *	for the slram.o module in the MTD drivers package which
+ *	allows the mapping of system memory into an MTD device.
+ *	Since the PMC551 memory module is accessed in the same
+ *	fashion as system memory, the slram.c code became a very nice
+ *	fit to the needs of this driver.  All we added was PCI
+ *	detection/initialization to the driver and automatically figure
+ *	out the size via the PCI detection.o, later changes by Corey
+ *	Minyard set up the card to utilize a 1M sliding apature.
  *
- *	 Corey Minyard <minyard@nortelnetworks.com>
- *       * Modified driver to utilize a sliding aperture instead of
- *         mapping all memory into kernel space which turned out to
- *         be very wasteful.
- *       * Located a bug in the SROM's initialization sequence that
- *         made the memory unusable, added a fix to code to touch up
- *         the DRAM some.
+ *	Corey Minyard <minyard@nortelnetworks.com>
+ *	* Modified driver to utilize a sliding aperture instead of
+ *	 mapping all memory into kernel space which turned out to
+ *	 be very wasteful.
+ *	* Located a bug in the SROM's initialization sequence that
+ *	 made the memory unusable, added a fix to code to touch up
+ *	 the DRAM some.
  *
  * Bugs/FIXME's:
- *       * MUST fix the init function to not spin on a register
- *       waiting for it to set .. this does not safely handle busted
- *       devices that never reset the register correctly which will
- *       cause the system to hang w/ a reboot being the only chance at
- *       recover. [sort of fixed, could be better]
- *       * Add I2C handling of the SROM so we can read the SROM's information
- *       about the aperture size.  This should always accurately reflect the
- *       onboard memory size.
- *       * Comb the init routine.  It's still a bit cludgy on a few things.
+ *	* MUST fix the init function to not spin on a register
+ *	waiting for it to set .. this does not safely handle busted
+ *	devices that never reset the register correctly which will
+ *	cause the system to hang w/ a reboot being the only chance at
+ *	recover. [sort of fixed, could be better]
+ *	* Add I2C handling of the SROM so we can read the SROM's information
+ *	about the aperture size.  This should always accurately reflect the
+ *	onboard memory size.
+ *	* Comb the init routine.  It's still a bit cludgy on a few things.
  */
 
 #include <linux/kernel.h>
@@ -105,74 +105,77 @@
 
 static struct mtd_info *pmc551list;
 
-static int pmc551_erase (struct mtd_info *mtd, struct erase_info *instr)
+static int pmc551_erase(struct mtd_info *mtd, struct erase_info *instr)
 {
-        struct mypriv *priv = mtd->priv;
-        u32 soff_hi, soff_lo; /* start address offset hi/lo */
-        u32 eoff_hi, eoff_lo; /* end address offset hi/lo */
-        unsigned long end;
+	struct mypriv *priv = mtd->priv;
+	u32 soff_hi, soff_lo;	/* start address offset hi/lo */
+	u32 eoff_hi, eoff_lo;	/* end address offset hi/lo */
+	unsigned long end;
 	u_char *ptr;
 	size_t retlen;
 
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_erase(pos:%ld, len:%ld)\n", (long)instr->addr, (long)instr->len);
+	printk(KERN_DEBUG "pmc551_erase(pos:%ld, len:%ld)\n", (long)instr->addr,
+		(long)instr->len);
 #endif
 
-        end = instr->addr + instr->len - 1;
+	end = instr->addr + instr->len - 1;
 
-        /* Is it past the end? */
-        if ( end > mtd->size ) {
+	/* Is it past the end? */
+	if (end > mtd->size) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_erase() out of bounds (%ld > %ld)\n", (long)end, (long)mtd->size);
+		printk(KERN_DEBUG "pmc551_erase() out of bounds (%ld > %ld)\n",
+			(long)end, (long)mtd->size);
 #endif
-                return -EINVAL;
-        }
-
-        eoff_hi = end & ~(priv->asize - 1);
-        soff_hi = instr->addr & ~(priv->asize - 1);
-        eoff_lo = end & (priv->asize - 1);
-        soff_lo = instr->addr & (priv->asize - 1);
-
-	pmc551_point (mtd, instr->addr, instr->len, &retlen, &ptr);
-
-        if ( soff_hi == eoff_hi || mtd->size == priv->asize) {
-                /* The whole thing fits within one access, so just one shot
-                   will do it. */
-                memset(ptr, 0xff, instr->len);
-        } else {
-                /* We have to do multiple writes to get all the data
-                   written. */
-                while (soff_hi != eoff_hi) {
+		return -EINVAL;
+	}
+
+	eoff_hi = end & ~(priv->asize - 1);
+	soff_hi = instr->addr & ~(priv->asize - 1);
+	eoff_lo = end & (priv->asize - 1);
+	soff_lo = instr->addr & (priv->asize - 1);
+
+	pmc551_point(mtd, instr->addr, instr->len, &retlen, &ptr);
+
+	if (soff_hi == eoff_hi || mtd->size == priv->asize) {
+		/* The whole thing fits within one access, so just one shot
+		   will do it. */
+		memset(ptr, 0xff, instr->len);
+	} else {
+		/* We have to do multiple writes to get all the data
+		   written. */
+		while (soff_hi != eoff_hi) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-			printk( KERN_DEBUG "pmc551_erase() soff_hi: %ld, eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
+			printk(KERN_DEBUG "pmc551_erase() soff_hi: %ld, "
+				"eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
 #endif
-                        memset(ptr, 0xff, priv->asize);
-                        if (soff_hi + priv->asize >= mtd->size) {
-                                goto out;
-                        }
-                        soff_hi += priv->asize;
-			pmc551_point (mtd,(priv->base_map0|soff_hi),
-				      priv->asize, &retlen, &ptr);
-                }
-                memset (ptr, 0xff, eoff_lo);
-        }
-
-out:
+			memset(ptr, 0xff, priv->asize);
+			if (soff_hi + priv->asize >= mtd->size) {
+				goto out;
+			}
+			soff_hi += priv->asize;
+			pmc551_point(mtd, (priv->base_map0 | soff_hi),
+				     priv->asize, &retlen, &ptr);
+		}
+		memset(ptr, 0xff, eoff_lo);
+	}
+
+      out:
 	instr->state = MTD_ERASE_DONE;
 #ifdef CONFIG_MTD_PMC551_DEBUG
 	printk(KERN_DEBUG "pmc551_erase() done\n");
 #endif
 
-        mtd_erase_callback(instr);
-        return 0;
+	mtd_erase_callback(instr);
+	return 0;
 }
 
-
-static int pmc551_point (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char **mtdbuf)
+static int pmc551_point(struct mtd_info *mtd, loff_t from, size_t len,
+			size_t * retlen, u_char ** mtdbuf)
 {
-        struct mypriv *priv = mtd->priv;
-        u32 soff_hi;
-        u32 soff_lo;
+	struct mypriv *priv = mtd->priv;
+	u32 soff_hi;
+	u32 soff_lo;
 
 #ifdef CONFIG_MTD_PMC551_DEBUG
 	printk(KERN_DEBUG "pmc551_point(%ld, %ld)\n", (long)from, (long)len);
@@ -180,18 +183,19 @@ static int pmc551_point (struct mtd_info *mtd, loff_t from, size_t len, size_t *
 
 	if (from + len > mtd->size) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-		printk(KERN_DEBUG "pmc551_point() out of bounds (%ld > %ld)\n", (long)from+len, (long)mtd->size);
+		printk(KERN_DEBUG "pmc551_point() out of bounds (%ld > %ld)\n",
+			(long)from + len, (long)mtd->size);
 #endif
 		return -EINVAL;
 	}
 
-        soff_hi = from & ~(priv->asize - 1);
-        soff_lo = from & (priv->asize - 1);
+	soff_hi = from & ~(priv->asize - 1);
+	soff_lo = from & (priv->asize - 1);
 
 	/* Cheap hack optimization */
-	if( priv->curr_map0 != from ) {
-        	pci_write_config_dword ( priv->dev, PMC551_PCI_MEM_MAP0,
-                                 	(priv->base_map0 | soff_hi) );
+	if (priv->curr_map0 != from) {
+		pci_write_config_dword(priv->dev, PMC551_PCI_MEM_MAP0,
+					(priv->base_map0 | soff_hi));
 		priv->curr_map0 = soff_hi;
 	}
 
@@ -200,137 +204,144 @@ static int pmc551_point (struct mtd_info *mtd, loff_t from, size_t len, size_t *
 	return 0;
 }
 
-
-static void pmc551_unpoint (struct mtd_info *mtd, u_char *addr, loff_t from, size_t len)
+static void pmc551_unpoint(struct mtd_info *mtd, u_char * addr, loff_t from,
+			   size_t len)
 {
 #ifdef CONFIG_MTD_PMC551_DEBUG
 	printk(KERN_DEBUG "pmc551_unpoint()\n");
 #endif
 }
 
-
-static int pmc551_read (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf)
+static int pmc551_read(struct mtd_info *mtd, loff_t from, size_t len,
+			size_t * retlen, u_char * buf)
 {
-        struct mypriv *priv = mtd->priv;
-        u32 soff_hi, soff_lo; /* start address offset hi/lo */
-        u32 eoff_hi, eoff_lo; /* end address offset hi/lo */
-        unsigned long end;
+	struct mypriv *priv = mtd->priv;
+	u32 soff_hi, soff_lo;	/* start address offset hi/lo */
+	u32 eoff_hi, eoff_lo;	/* end address offset hi/lo */
+	unsigned long end;
 	u_char *ptr;
-        u_char *copyto = buf;
+	u_char *copyto = buf;
 
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_read(pos:%ld, len:%ld) asize: %ld\n", (long)from, (long)len, (long)priv->asize);
+	printk(KERN_DEBUG "pmc551_read(pos:%ld, len:%ld) asize: %ld\n",
+		(long)from, (long)len, (long)priv->asize);
 #endif
 
-        end = from + len - 1;
+	end = from + len - 1;
 
-        /* Is it past the end? */
-        if (end > mtd->size) {
+	/* Is it past the end? */
+	if (end > mtd->size) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_read() out of bounds (%ld > %ld)\n", (long) end, (long)mtd->size);
+		printk(KERN_DEBUG "pmc551_read() out of bounds (%ld > %ld)\n",
+			(long)end, (long)mtd->size);
 #endif
-                return -EINVAL;
-        }
-
-        soff_hi = from & ~(priv->asize - 1);
-        eoff_hi = end & ~(priv->asize - 1);
-        soff_lo = from & (priv->asize - 1);
-        eoff_lo = end & (priv->asize - 1);
-
-	pmc551_point (mtd, from, len, retlen, &ptr);
-
-        if (soff_hi == eoff_hi) {
-                /* The whole thing fits within one access, so just one shot
-                   will do it. */
-                memcpy(copyto, ptr, len);
-                copyto += len;
-        } else {
-                /* We have to do multiple writes to get all the data
-                   written. */
-                while (soff_hi != eoff_hi) {
+		return -EINVAL;
+	}
+
+	soff_hi = from & ~(priv->asize - 1);
+	eoff_hi = end & ~(priv->asize - 1);
+	soff_lo = from & (priv->asize - 1);
+	eoff_lo = end & (priv->asize - 1);
+
+	pmc551_point(mtd, from, len, retlen, &ptr);
+
+	if (soff_hi == eoff_hi) {
+		/* The whole thing fits within one access, so just one shot
+		   will do it. */
+		memcpy(copyto, ptr, len);
+		copyto += len;
+	} else {
+		/* We have to do multiple writes to get all the data
+		   written. */
+		while (soff_hi != eoff_hi) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-			printk( KERN_DEBUG "pmc551_read() soff_hi: %ld, eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
+			printk(KERN_DEBUG "pmc551_read() soff_hi: %ld, "
+				"eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
 #endif
-                        memcpy(copyto, ptr, priv->asize);
-                        copyto += priv->asize;
-                        if (soff_hi + priv->asize >= mtd->size) {
-                                goto out;
-                        }
-                        soff_hi += priv->asize;
-			pmc551_point (mtd, soff_hi, priv->asize, retlen, &ptr);
-                }
-                memcpy(copyto, ptr, eoff_lo);
-                copyto += eoff_lo;
-        }
-
-out:
+			memcpy(copyto, ptr, priv->asize);
+			copyto += priv->asize;
+			if (soff_hi + priv->asize >= mtd->size) {
+				goto out;
+			}
+			soff_hi += priv->asize;
+			pmc551_point(mtd, soff_hi, priv->asize, retlen, &ptr);
+		}
+		memcpy(copyto, ptr, eoff_lo);
+		copyto += eoff_lo;
+	}
+
+      out:
 #ifdef CONFIG_MTD_PMC551_DEBUG
 	printk(KERN_DEBUG "pmc551_read() done\n");
 #endif
-        *retlen = copyto - buf;
-        return 0;
+	*retlen = copyto - buf;
+	return 0;
 }
 
-static int pmc551_write (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf)
+static int pmc551_write(struct mtd_info *mtd, loff_t to, size_t len,
+			size_t * retlen, const u_char * buf)
 {
-        struct mypriv *priv = mtd->priv;
-        u32 soff_hi, soff_lo; /* start address offset hi/lo */
-        u32 eoff_hi, eoff_lo; /* end address offset hi/lo */
-        unsigned long end;
+	struct mypriv *priv = mtd->priv;
+	u32 soff_hi, soff_lo;	/* start address offset hi/lo */
+	u32 eoff_hi, eoff_lo;	/* end address offset hi/lo */
+	unsigned long end;
 	u_char *ptr;
-        const u_char *copyfrom = buf;
-
+	const u_char *copyfrom = buf;
 
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_write(pos:%ld, len:%ld) asize:%ld\n", (long)to, (long)len, (long)priv->asize);
+	printk(KERN_DEBUG "pmc551_write(pos:%ld, len:%ld) asize:%ld\n",
+		(long)to, (long)len, (long)priv->asize);
 #endif
 
-        end = to + len - 1;
-        /* Is it past the end?  or did the u32 wrap? */
-        if (end > mtd->size ) {
+	end = to + len - 1;
+	/* Is it past the end?  or did the u32 wrap? */
+	if (end > mtd->size) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_write() out of bounds (end: %ld, size: %ld, to: %ld)\n", (long) end, (long)mtd->size, (long)to);
+		printk(KERN_DEBUG "pmc551_write() out of bounds (end: %ld, "
+			"size: %ld, to: %ld)\n", (long)end, (long)mtd->size,
+			(long)to);
 #endif
-                return -EINVAL;
-        }
-
-        soff_hi = to & ~(priv->asize - 1);
-        eoff_hi = end & ~(priv->asize - 1);
-        soff_lo = to & (priv->asize - 1);
-        eoff_lo = end & (priv->asize - 1);
-
-	pmc551_point (mtd, to, len, retlen, &ptr);
-
-        if (soff_hi == eoff_hi) {
-                /* The whole thing fits within one access, so just one shot
-                   will do it. */
-                memcpy(ptr, copyfrom, len);
-                copyfrom += len;
-        } else {
-                /* We have to do multiple writes to get all the data
-                   written. */
-                while (soff_hi != eoff_hi) {
+		return -EINVAL;
+	}
+
+	soff_hi = to & ~(priv->asize - 1);
+	eoff_hi = end & ~(priv->asize - 1);
+	soff_lo = to & (priv->asize - 1);
+	eoff_lo = end & (priv->asize - 1);
+
+	pmc551_point(mtd, to, len, retlen, &ptr);
+
+	if (soff_hi == eoff_hi) {
+		/* The whole thing fits within one access, so just one shot
+		   will do it. */
+		memcpy(ptr, copyfrom, len);
+		copyfrom += len;
+	} else {
+		/* We have to do multiple writes to get all the data
+		   written. */
+		while (soff_hi != eoff_hi) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-			printk( KERN_DEBUG "pmc551_write() soff_hi: %ld, eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
+			printk(KERN_DEBUG "pmc551_write() soff_hi: %ld, "
+				"eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
 #endif
-                	memcpy(ptr, copyfrom, priv->asize);
-                	copyfrom += priv->asize;
-                        if (soff_hi >= mtd->size) {
-                                goto out;
-                        }
-                        soff_hi += priv->asize;
-			pmc551_point (mtd, soff_hi, priv->asize, retlen, &ptr);
-                }
-                memcpy(ptr, copyfrom, eoff_lo);
-                copyfrom += eoff_lo;
-        }
-
-out:
+			memcpy(ptr, copyfrom, priv->asize);
+			copyfrom += priv->asize;
+			if (soff_hi >= mtd->size) {
+				goto out;
+			}
+			soff_hi += priv->asize;
+			pmc551_point(mtd, soff_hi, priv->asize, retlen, &ptr);
+		}
+		memcpy(ptr, copyfrom, eoff_lo);
+		copyfrom += eoff_lo;
+	}
+
+      out:
 #ifdef CONFIG_MTD_PMC551_DEBUG
 	printk(KERN_DEBUG "pmc551_write() done\n");
 #endif
-        *retlen = copyfrom - buf;
-        return 0;
+	*retlen = copyfrom - buf;
+	return 0;
 }
 
 /*
@@ -345,58 +356,58 @@ static int pmc551_write (struct mtd_info *mtd, loff_t to, size_t len, size_t *re
  * mechanism
  * returns the size of the memory region found.
  */
-static u32 fixup_pmc551 (struct pci_dev *dev)
+static u32 fixup_pmc551(struct pci_dev *dev)
 {
 #ifdef CONFIG_MTD_PMC551_BUGFIX
-        u32 dram_data;
+	u32 dram_data;
 #endif
-        u32 size, dcmd, cfg, dtmp;
-        u16 cmd, tmp, i;
+	u32 size, dcmd, cfg, dtmp;
+	u16 cmd, tmp, i;
 	u8 bcmd, counter;
 
-        /* Sanity Check */
-        if(!dev) {
-                return -ENODEV;
-        }
+	/* Sanity Check */
+	if (!dev) {
+		return -ENODEV;
+	}
 
 	/*
 	 * Attempt to reset the card
 	 * FIXME: Stop Spinning registers
 	 */
-	counter=0;
+	counter = 0;
 	/* unlock registers */
-	pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, 0xA5 );
+	pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, 0xA5);
 	/* read in old data */
-	pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd );
+	pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd);
 	/* bang the reset line up and down for a few */
-	for(i=0;i<10;i++) {
-		counter=0;
+	for (i = 0; i < 10; i++) {
+		counter = 0;
 		bcmd &= ~0x80;
-		while(counter++ < 100) {
+		while (counter++ < 100) {
 			pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, bcmd);
 		}
-		counter=0;
+		counter = 0;
 		bcmd |= 0x80;
-		while(counter++ < 100) {
+		while (counter++ < 100) {
 			pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, bcmd);
 		}
 	}
-	bcmd |= (0x40|0x20);
+	bcmd |= (0x40 | 0x20);
 	pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, bcmd);
 
-        /*
+	/*
 	 * Take care and turn off the memory on the device while we
 	 * tweak the configurations
 	 */
-        pci_read_config_word(dev, PCI_COMMAND, &cmd);
-        tmp = cmd & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY);
-        pci_write_config_word(dev, PCI_COMMAND, tmp);
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	tmp = cmd & ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+	pci_write_config_word(dev, PCI_COMMAND, tmp);
 
 	/*
 	 * Disable existing aperture before probing memory size
 	 */
 	pci_read_config_dword(dev, PMC551_PCI_MEM_MAP0, &dcmd);
-        dtmp=(dcmd|PMC551_PCI_MEM_MAP_ENABLE|PMC551_PCI_MEM_MAP_REG_EN);
+	dtmp = (dcmd | PMC551_PCI_MEM_MAP_ENABLE | PMC551_PCI_MEM_MAP_REG_EN);
 	pci_write_config_dword(dev, PMC551_PCI_MEM_MAP0, dtmp);
 	/*
 	 * Grab old BAR0 config so that we can figure out memory size
@@ -407,220 +418,231 @@ static u32 fixup_pmc551 (struct pci_dev *dev)
 	 * then write all 1's to the memory space, read back the result into
 	 * "size", and then write back all the old config.
 	 */
-	pci_read_config_dword( dev, PCI_BASE_ADDRESS_0, &cfg );
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &cfg);
 #ifndef CONFIG_MTD_PMC551_BUGFIX
-	pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, ~0 );
-	pci_read_config_dword( dev, PCI_BASE_ADDRESS_0, &size );
-	size = (size&PCI_BASE_ADDRESS_MEM_MASK);
-	size &= ~(size-1);
-	pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, cfg );
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, ~0);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &size);
+	size = (size & PCI_BASE_ADDRESS_MEM_MASK);
+	size &= ~(size - 1);
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, cfg);
 #else
-        /*
-         * Get the size of the memory by reading all the DRAM size values
-         * and adding them up.
-         *
-         * KLUDGE ALERT: the boards we are using have invalid column and
-         * row mux values.  We fix them here, but this will break other
-         * memory configurations.
-         */
-        pci_read_config_dword(dev, PMC551_DRAM_BLK0, &dram_data);
-        size = PMC551_DRAM_BLK_GET_SIZE(dram_data);
-        dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
-        dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
-        pci_write_config_dword(dev, PMC551_DRAM_BLK0, dram_data);
-
-        pci_read_config_dword(dev, PMC551_DRAM_BLK1, &dram_data);
-        size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
-        dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
-        dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
-        pci_write_config_dword(dev, PMC551_DRAM_BLK1, dram_data);
-
-        pci_read_config_dword(dev, PMC551_DRAM_BLK2, &dram_data);
-        size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
-        dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
-        dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
-        pci_write_config_dword(dev, PMC551_DRAM_BLK2, dram_data);
-
-        pci_read_config_dword(dev, PMC551_DRAM_BLK3, &dram_data);
-        size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
-        dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
-        dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
-        pci_write_config_dword(dev, PMC551_DRAM_BLK3, dram_data);
-
-        /*
-         * Oops .. something went wrong
-         */
-        if( (size &= PCI_BASE_ADDRESS_MEM_MASK) == 0) {
-                return -ENODEV;
-        }
-#endif /* CONFIG_MTD_PMC551_BUGFIX */
-
-	if ((cfg&PCI_BASE_ADDRESS_SPACE) != PCI_BASE_ADDRESS_SPACE_MEMORY) {
-                return -ENODEV;
+	/*
+	 * Get the size of the memory by reading all the DRAM size values
+	 * and adding them up.
+	 *
+	 * KLUDGE ALERT: the boards we are using have invalid column and
+	 * row mux values.  We fix them here, but this will break other
+	 * memory configurations.
+	 */
+	pci_read_config_dword(dev, PMC551_DRAM_BLK0, &dram_data);
+	size = PMC551_DRAM_BLK_GET_SIZE(dram_data);
+	dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
+	dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
+	pci_write_config_dword(dev, PMC551_DRAM_BLK0, dram_data);
+
+	pci_read_config_dword(dev, PMC551_DRAM_BLK1, &dram_data);
+	size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
+	dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
+	dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
+	pci_write_config_dword(dev, PMC551_DRAM_BLK1, dram_data);
+
+	pci_read_config_dword(dev, PMC551_DRAM_BLK2, &dram_data);
+	size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
+	dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
+	dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
+	pci_write_config_dword(dev, PMC551_DRAM_BLK2, dram_data);
+
+	pci_read_config_dword(dev, PMC551_DRAM_BLK3, &dram_data);
+	size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
+	dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
+	dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
+	pci_write_config_dword(dev, PMC551_DRAM_BLK3, dram_data);
+
+	/*
+	 * Oops .. something went wrong
+	 */
+	if ((size &= PCI_BASE_ADDRESS_MEM_MASK) == 0) {
+		return -ENODEV;
 	}
+#endif				/* CONFIG_MTD_PMC551_BUGFIX */
 
-        /*
-         * Precharge Dram
-         */
-        pci_write_config_word( dev, PMC551_SDRAM_MA, 0x0400 );
-        pci_write_config_word( dev, PMC551_SDRAM_CMD, 0x00bf );
-
-        /*
-         * Wait until command has gone through
-         * FIXME: register spinning issue
-         */
-        do {	pci_read_config_word( dev, PMC551_SDRAM_CMD, &cmd );
-		if(counter++ > 100)break;
-        } while ( (PCI_COMMAND_IO) & cmd );
-
-        /*
+	if ((cfg & PCI_BASE_ADDRESS_SPACE) != PCI_BASE_ADDRESS_SPACE_MEMORY) {
+		return -ENODEV;
+	}
+
+	/*
+	 * Precharge Dram
+	 */
+	pci_write_config_word(dev, PMC551_SDRAM_MA, 0x0400);
+	pci_write_config_word(dev, PMC551_SDRAM_CMD, 0x00bf);
+
+	/*
+	 * Wait until command has gone through
+	 * FIXME: register spinning issue
+	 */
+	do {
+		pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd);
+		if (counter++ > 100)
+			break;
+	} while ((PCI_COMMAND_IO) & cmd);
+
+	/*
 	 * Turn on auto refresh
 	 * The loop is taken directly from Ramix's example code.  I assume that
 	 * this must be held high for some duration of time, but I can find no
 	 * documentation refrencing the reasons why.
-         */
-        for ( i = 1; i<=8 ; i++) {
-                pci_write_config_word (dev, PMC551_SDRAM_CMD, 0x0df);
-
-                /*
-                 * Make certain command has gone through
-                 * FIXME: register spinning issue
-                 */
-		counter=0;
-                do {	pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd);
-			if(counter++ > 100)break;
-                } while ( (PCI_COMMAND_IO) & cmd );
-        }
-
-        pci_write_config_word ( dev, PMC551_SDRAM_MA, 0x0020);
-        pci_write_config_word ( dev, PMC551_SDRAM_CMD, 0x0ff);
-
-        /*
-         * Wait until command completes
-         * FIXME: register spinning issue
-         */
-	counter=0;
-        do {	pci_read_config_word ( dev, PMC551_SDRAM_CMD, &cmd);
-		if(counter++ > 100)break;
-        } while ( (PCI_COMMAND_IO) & cmd );
-
-        pci_read_config_dword ( dev, PMC551_DRAM_CFG, &dcmd);
-        dcmd |= 0x02000000;
-        pci_write_config_dword ( dev, PMC551_DRAM_CFG, dcmd);
-
-        /*
-         * Check to make certain fast back-to-back, if not
-         * then set it so
-         */
-        pci_read_config_word( dev, PCI_STATUS, &cmd);
-        if((cmd&PCI_COMMAND_FAST_BACK) == 0) {
-                cmd |= PCI_COMMAND_FAST_BACK;
-                pci_write_config_word( dev, PCI_STATUS, cmd);
-        }
-
-        /*
-         * Check to make certain the DEVSEL is set correctly, this device
-         * has a tendancy to assert DEVSEL and TRDY when a write is performed
-         * to the memory when memory is read-only
-         */
-        if((cmd&PCI_STATUS_DEVSEL_MASK) != 0x0) {
-                cmd &= ~PCI_STATUS_DEVSEL_MASK;
-                pci_write_config_word( dev, PCI_STATUS, cmd );
-        }
-        /*
-         * Set to be prefetchable and put everything back based on old cfg.
+	 */
+	for (i = 1; i <= 8; i++) {
+		pci_write_config_word(dev, PMC551_SDRAM_CMD, 0x0df);
+
+		/*
+		 * Make certain command has gone through
+		 * FIXME: register spinning issue
+		 */
+		counter = 0;
+		do {
+			pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd);
+			if (counter++ > 100)
+				break;
+		} while ((PCI_COMMAND_IO) & cmd);
+	}
+
+	pci_write_config_word(dev, PMC551_SDRAM_MA, 0x0020);
+	pci_write_config_word(dev, PMC551_SDRAM_CMD, 0x0ff);
+
+	/*
+	 * Wait until command completes
+	 * FIXME: register spinning issue
+	 */
+	counter = 0;
+	do {
+		pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd);
+		if (counter++ > 100)
+			break;
+	} while ((PCI_COMMAND_IO) & cmd);
+
+	pci_read_config_dword(dev, PMC551_DRAM_CFG, &dcmd);
+	dcmd |= 0x02000000;
+	pci_write_config_dword(dev, PMC551_DRAM_CFG, dcmd);
+
+	/*
+	 * Check to make certain fast back-to-back, if not
+	 * then set it so
+	 */
+	pci_read_config_word(dev, PCI_STATUS, &cmd);
+	if ((cmd & PCI_COMMAND_FAST_BACK) == 0) {
+		cmd |= PCI_COMMAND_FAST_BACK;
+		pci_write_config_word(dev, PCI_STATUS, cmd);
+	}
+
+	/*
+	 * Check to make certain the DEVSEL is set correctly, this device
+	 * has a tendancy to assert DEVSEL and TRDY when a write is performed
+	 * to the memory when memory is read-only
+	 */
+	if ((cmd & PCI_STATUS_DEVSEL_MASK) != 0x0) {
+		cmd &= ~PCI_STATUS_DEVSEL_MASK;
+		pci_write_config_word(dev, PCI_STATUS, cmd);
+	}
+	/*
+	 * Set to be prefetchable and put everything back based on old cfg.
 	 * it's possible that the reset of the V370PDC nuked the original
 	 * setup
-         */
+	 */
 	/*
-        cfg |= PCI_BASE_ADDRESS_MEM_PREFETCH;
-	pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, cfg );
-	*/
-
-        /*
-         * Turn PCI memory and I/O bus access back on
-         */
-        pci_write_config_word( dev, PCI_COMMAND,
-                               PCI_COMMAND_MEMORY | PCI_COMMAND_IO );
+	   cfg |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+	   pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, cfg );
+	 */
+
+	/*
+	 * Turn PCI memory and I/O bus access back on
+	 */
+	pci_write_config_word(dev, PCI_COMMAND,
+			      PCI_COMMAND_MEMORY | PCI_COMMAND_IO);
 #ifdef CONFIG_MTD_PMC551_DEBUG
-        /*
-         * Some screen fun
-         */
-        printk(KERN_DEBUG "pmc551: %d%c (0x%x) of %sprefetchable memory at 0x%llx\n",
-	       (size<1024)?size:(size<1048576)?size>>10:size>>20,
-               (size<1024)?'B':(size<1048576)?'K':'M',
-	       size, ((dcmd&(0x1<<3)) == 0)?"non-":"",
-               (unsigned long long)((dev->resource[0].start)&PCI_BASE_ADDRESS_MEM_MASK));
-
-        /*
-         * Check to see the state of the memory
-         */
-        pci_read_config_dword( dev, PMC551_DRAM_BLK0, &dcmd );
-        printk(KERN_DEBUG "pmc551: DRAM_BLK0 Flags: %s,%s\n"
-			  "pmc551: DRAM_BLK0 Size: %d at %d\n"
-			  "pmc551: DRAM_BLK0 Row MUX: %d, Col MUX: %d\n",
-               (((0x1<<1)&dcmd) == 0)?"RW":"RO",
-               (((0x1<<0)&dcmd) == 0)?"Off":"On",
-	       PMC551_DRAM_BLK_GET_SIZE(dcmd),
-	       ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) );
-
-        pci_read_config_dword( dev, PMC551_DRAM_BLK1, &dcmd );
-        printk(KERN_DEBUG "pmc551: DRAM_BLK1 Flags: %s,%s\n"
-			  "pmc551: DRAM_BLK1 Size: %d at %d\n"
-			  "pmc551: DRAM_BLK1 Row MUX: %d, Col MUX: %d\n",
-               (((0x1<<1)&dcmd) == 0)?"RW":"RO",
-               (((0x1<<0)&dcmd) == 0)?"Off":"On",
-	       PMC551_DRAM_BLK_GET_SIZE(dcmd),
-	       ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) );
-
-        pci_read_config_dword( dev, PMC551_DRAM_BLK2, &dcmd );
-        printk(KERN_DEBUG "pmc551: DRAM_BLK2 Flags: %s,%s\n"
-			  "pmc551: DRAM_BLK2 Size: %d at %d\n"
-			  "pmc551: DRAM_BLK2 Row MUX: %d, Col MUX: %d\n",
-               (((0x1<<1)&dcmd) == 0)?"RW":"RO",
-               (((0x1<<0)&dcmd) == 0)?"Off":"On",
-	       PMC551_DRAM_BLK_GET_SIZE(dcmd),
-	       ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) );
-
-        pci_read_config_dword( dev, PMC551_DRAM_BLK3, &dcmd );
-        printk(KERN_DEBUG "pmc551: DRAM_BLK3 Flags: %s,%s\n"
-			  "pmc551: DRAM_BLK3 Size: %d at %d\n"
-			  "pmc551: DRAM_BLK3 Row MUX: %d, Col MUX: %d\n",
-               (((0x1<<1)&dcmd) == 0)?"RW":"RO",
-               (((0x1<<0)&dcmd) == 0)?"Off":"On",
-	       PMC551_DRAM_BLK_GET_SIZE(dcmd),
-	       ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) );
-
-        pci_read_config_word( dev, PCI_COMMAND, &cmd );
-        printk( KERN_DEBUG "pmc551: Memory Access %s\n",
-                (((0x1<<1)&cmd) == 0)?"off":"on" );
-        printk( KERN_DEBUG "pmc551: I/O Access %s\n",
-                (((0x1<<0)&cmd) == 0)?"off":"on" );
-
-        pci_read_config_word( dev, PCI_STATUS, &cmd );
-        printk( KERN_DEBUG "pmc551: Devsel %s\n",
-                ((PCI_STATUS_DEVSEL_MASK&cmd)==0x000)?"Fast":
-                ((PCI_STATUS_DEVSEL_MASK&cmd)==0x200)?"Medium":
-                ((PCI_STATUS_DEVSEL_MASK&cmd)==0x400)?"Slow":"Invalid" );
-
-        printk( KERN_DEBUG "pmc551: %sFast Back-to-Back\n",
-                ((PCI_COMMAND_FAST_BACK&cmd) == 0)?"Not ":"" );
-
-	pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd );
-	printk( KERN_DEBUG "pmc551: EEPROM is under %s control\n"
-			   "pmc551: System Control Register is %slocked to PCI access\n"
-			   "pmc551: System Control Register is %slocked to EEPROM access\n",
-		(bcmd&0x1)?"software":"hardware",
-		(bcmd&0x20)?"":"un", (bcmd&0x40)?"":"un");
+	/*
+	 * Some screen fun
+	 */
+	printk(KERN_DEBUG "pmc551: %d%c (0x%x) of %sprefetchable memory at "
+		"0x%llx\n", (size < 1024) ? size : (size < 1048576) ?
+		size >> 10 : size >> 20,
+		(size < 1024) ? 'B' : (size < 1048576) ? 'K' : 'M', size,
+		((dcmd & (0x1 << 3)) == 0) ? "non-" : "",
+		(unsigned long long)((dev->resource[0].start) &
+				    PCI_BASE_ADDRESS_MEM_MASK));
+
+	/*
+	 * Check to see the state of the memory
+	 */
+	pci_read_config_dword(dev, PMC551_DRAM_BLK0, &dcmd);
+	printk(KERN_DEBUG "pmc551: DRAM_BLK0 Flags: %s,%s\n"
+		"pmc551: DRAM_BLK0 Size: %d at %d\n"
+		"pmc551: DRAM_BLK0 Row MUX: %d, Col MUX: %d\n",
+		(((0x1 << 1) & dcmd) == 0) ? "RW" : "RO",
+		(((0x1 << 0) & dcmd) == 0) ? "Off" : "On",
+		PMC551_DRAM_BLK_GET_SIZE(dcmd),
+		((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7),
+		((dcmd >> 9) & 0xF));
+
+	pci_read_config_dword(dev, PMC551_DRAM_BLK1, &dcmd);
+	printk(KERN_DEBUG "pmc551: DRAM_BLK1 Flags: %s,%s\n"
+		"pmc551: DRAM_BLK1 Size: %d at %d\n"
+		"pmc551: DRAM_BLK1 Row MUX: %d, Col MUX: %d\n",
+		(((0x1 << 1) & dcmd) == 0) ? "RW" : "RO",
+		(((0x1 << 0) & dcmd) == 0) ? "Off" : "On",
+		PMC551_DRAM_BLK_GET_SIZE(dcmd),
+		((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7),
+		((dcmd >> 9) & 0xF));
+
+	pci_read_config_dword(dev, PMC551_DRAM_BLK2, &dcmd);
+	printk(KERN_DEBUG "pmc551: DRAM_BLK2 Flags: %s,%s\n"
+		"pmc551: DRAM_BLK2 Size: %d at %d\n"
+		"pmc551: DRAM_BLK2 Row MUX: %d, Col MUX: %d\n",
+		(((0x1 << 1) & dcmd) == 0) ? "RW" : "RO",
+		(((0x1 << 0) & dcmd) == 0) ? "Off" : "On",
+		PMC551_DRAM_BLK_GET_SIZE(dcmd),
+		((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7),
+		((dcmd >> 9) & 0xF));
+
+	pci_read_config_dword(dev, PMC551_DRAM_BLK3, &dcmd);
+	printk(KERN_DEBUG "pmc551: DRAM_BLK3 Flags: %s,%s\n"
+		"pmc551: DRAM_BLK3 Size: %d at %d\n"
+		"pmc551: DRAM_BLK3 Row MUX: %d, Col MUX: %d\n",
+		(((0x1 << 1) & dcmd) == 0) ? "RW" : "RO",
+		(((0x1 << 0) & dcmd) == 0) ? "Off" : "On",
+		PMC551_DRAM_BLK_GET_SIZE(dcmd),
+		((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7),
+		((dcmd >> 9) & 0xF));
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	printk(KERN_DEBUG "pmc551: Memory Access %s\n",
+		(((0x1 << 1) & cmd) == 0) ? "off" : "on");
+	printk(KERN_DEBUG "pmc551: I/O Access %s\n",
+		(((0x1 << 0) & cmd) == 0) ? "off" : "on");
+
+	pci_read_config_word(dev, PCI_STATUS, &cmd);
+	printk(KERN_DEBUG "pmc551: Devsel %s\n",
+		((PCI_STATUS_DEVSEL_MASK & cmd) == 0x000) ? "Fast" :
+		((PCI_STATUS_DEVSEL_MASK & cmd) == 0x200) ? "Medium" :
+		((PCI_STATUS_DEVSEL_MASK & cmd) == 0x400) ? "Slow" : "Invalid");
+
+	printk(KERN_DEBUG "pmc551: %sFast Back-to-Back\n",
+		((PCI_COMMAND_FAST_BACK & cmd) == 0) ? "Not " : "");
+
+	pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd);
+	printk(KERN_DEBUG "pmc551: EEPROM is under %s control\n"
+		"pmc551: System Control Register is %slocked to PCI access\n"
+		"pmc551: System Control Register is %slocked to EEPROM access\n",
+		(bcmd & 0x1) ? "software" : "hardware",
+		(bcmd & 0x20) ? "" : "un", (bcmd & 0x40) ? "" : "un");
 #endif
-        return size;
+	return size;
 }
 
 /*
  * Kernel version specific module stuffages
  */
 
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Mark Ferrell <mferrell@mvista.com>");
 MODULE_DESCRIPTION(PMC551_VERSION);
@@ -628,11 +650,11 @@ MODULE_DESCRIPTION(PMC551_VERSION);
 /*
  * Stuff these outside the ifdef so as to not bust compiled in driver support
  */
-static int msize=0;
+static int msize = 0;
 #if defined(CONFIG_MTD_PMC551_APERTURE_SIZE)
-static int asize=CONFIG_MTD_PMC551_APERTURE_SIZE
+static int asize = CONFIG_MTD_PMC551_APERTURE_SIZE
 #else
-static int asize=0;
+static int asize = 0;
 #endif
 
 module_param(msize, int, 0);
@@ -645,172 +667,179 @@ MODULE_PARM_DESC(asize, "aperture size, must be <= memsize [1-1024]");
  */
 static int __init init_pmc551(void)
 {
-        struct pci_dev *PCI_Device = NULL;
-        struct mypriv *priv;
-        int count, found=0;
-        struct mtd_info *mtd;
-        u32 length = 0;
-
-	if(msize) {
-		msize = (1 << (ffs(msize) - 1))<<20;
-		if (msize > (1<<30)) {
-			printk(KERN_NOTICE "pmc551: Invalid memory size [%d]\n", msize);
+	struct pci_dev *PCI_Device = NULL;
+	struct mypriv *priv;
+	int count, found = 0;
+	struct mtd_info *mtd;
+	u32 length = 0;
+
+	if (msize) {
+		msize = (1 << (ffs(msize) - 1)) << 20;
+		if (msize > (1 << 30)) {
+			printk(KERN_NOTICE "pmc551: Invalid memory size [%d]\n",
+				msize);
 			return -EINVAL;
 		}
 	}
 
-	if(asize) {
-		asize = (1 << (ffs(asize) - 1))<<20;
-		if (asize > (1<<30) ) {
-			printk(KERN_NOTICE "pmc551: Invalid aperture size [%d]\n", asize);
+	if (asize) {
+		asize = (1 << (ffs(asize) - 1)) << 20;
+		if (asize > (1 << 30)) {
+			printk(KERN_NOTICE "pmc551: Invalid aperture size "
+				"[%d]\n", asize);
 			return -EINVAL;
 		}
 	}
 
-        printk(KERN_INFO PMC551_VERSION);
-
-        /*
-         * PCU-bus chipset probe.
-         */
-        for( count = 0; count < MAX_MTD_DEVICES; count++ ) {
-
-                if ((PCI_Device = pci_get_device(PCI_VENDOR_ID_V3_SEMI,
-                                                  PCI_DEVICE_ID_V3_SEMI_V370PDC,
-						  PCI_Device ) ) == NULL) {
-                        break;
-                }
-
-                printk(KERN_NOTICE "pmc551: Found PCI V370PDC at 0x%llx\n",
-				    (unsigned long long)PCI_Device->resource[0].start);
-
-                /*
-                 * The PMC551 device acts VERY weird if you don't init it
-                 * first.  i.e. it will not correctly report devsel.  If for
-                 * some reason the sdram is in a wrote-protected state the
-                 * device will DEVSEL when it is written to causing problems
-                 * with the oldproc.c driver in
-                 * some kernels (2.2.*)
-                 */
-                if((length = fixup_pmc551(PCI_Device)) <= 0) {
-                        printk(KERN_NOTICE "pmc551: Cannot init SDRAM\n");
-                        break;
-                }
+	printk(KERN_INFO PMC551_VERSION);
+
+	/*
+	 * PCU-bus chipset probe.
+	 */
+	for (count = 0; count < MAX_MTD_DEVICES; count++) {
+
+		if ((PCI_Device = pci_get_device(PCI_VENDOR_ID_V3_SEMI,
+						  PCI_DEVICE_ID_V3_SEMI_V370PDC,
+						  PCI_Device)) == NULL) {
+			break;
+		}
+
+		printk(KERN_NOTICE "pmc551: Found PCI V370PDC at 0x%llx\n",
+			(unsigned long long)PCI_Device->resource[0].start);
+
+		/*
+		 * The PMC551 device acts VERY weird if you don't init it
+		 * first.  i.e. it will not correctly report devsel.  If for
+		 * some reason the sdram is in a wrote-protected state the
+		 * device will DEVSEL when it is written to causing problems
+		 * with the oldproc.c driver in
+		 * some kernels (2.2.*)
+		 */
+		if ((length = fixup_pmc551(PCI_Device)) <= 0) {
+			printk(KERN_NOTICE "pmc551: Cannot init SDRAM\n");
+			break;
+		}
 
 		/*
 		 * This is needed until the driver is capable of reading the
 		 * onboard I2C SROM to discover the "real" memory size.
 		 */
-		if(msize) {
+		if (msize) {
 			length = msize;
-			printk(KERN_NOTICE "pmc551: Using specified memory size 0x%x\n", length);
+			printk(KERN_NOTICE "pmc551: Using specified memory "
+				"size 0x%x\n", length);
 		} else {
 			msize = length;
 		}
 
-                mtd = kmalloc(sizeof(struct mtd_info), GFP_KERNEL);
-                if (!mtd) {
-                        printk(KERN_NOTICE "pmc551: Cannot allocate new MTD device.\n");
-                        break;
-                }
-
-                memset(mtd, 0, sizeof(struct mtd_info));
-
-                priv = kmalloc (sizeof(struct mypriv), GFP_KERNEL);
-                if (!priv) {
-                        printk(KERN_NOTICE "pmc551: Cannot allocate new MTD device.\n");
-                        kfree(mtd);
-                        break;
-                }
-                memset(priv, 0, sizeof(*priv));
-                mtd->priv = priv;
-                priv->dev = PCI_Device;
-
-		if(asize > length) {
-			printk(KERN_NOTICE "pmc551: reducing aperture size to fit %dM\n",length>>20);
+		mtd = kmalloc(sizeof(struct mtd_info), GFP_KERNEL);
+		if (!mtd) {
+			printk(KERN_NOTICE "pmc551: Cannot allocate new MTD "
+				"device.\n");
+			break;
+		}
+
+		memset(mtd, 0, sizeof(struct mtd_info));
+
+		priv = kmalloc(sizeof(struct mypriv), GFP_KERNEL);
+		if (!priv) {
+			printk(KERN_NOTICE "pmc551: Cannot allocate new MTD "
+				"device.\n");
+			kfree(mtd);
+			break;
+		}
+		memset(priv, 0, sizeof(*priv));
+		mtd->priv = priv;
+		priv->dev = PCI_Device;
+
+		if (asize > length) {
+			printk(KERN_NOTICE "pmc551: reducing aperture size to "
+				"fit %dM\n", length >> 20);
 			priv->asize = asize = length;
 		} else if (asize == 0 || asize == length) {
-			printk(KERN_NOTICE "pmc551: Using existing aperture size %dM\n", length>>20);
+			printk(KERN_NOTICE "pmc551: Using existing aperture "
+				"size %dM\n", length >> 20);
 			priv->asize = asize = length;
 		} else {
-			printk(KERN_NOTICE "pmc551: Using specified aperture size %dM\n", asize>>20);
+			printk(KERN_NOTICE "pmc551: Using specified aperture "
+				"size %dM\n", asize >> 20);
 			priv->asize = asize;
 		}
-                priv->start = ioremap(((PCI_Device->resource[0].start)
-                                       & PCI_BASE_ADDRESS_MEM_MASK),
-                                      priv->asize);
+		priv->start = ioremap(((PCI_Device->resource[0].start)
+					& PCI_BASE_ADDRESS_MEM_MASK),
+				      priv->asize);
 
 		if (!priv->start) {
 			printk(KERN_NOTICE "pmc551: Unable to map IO space\n");
-                        kfree(mtd->priv);
-                        kfree(mtd);
+			kfree(mtd->priv);
+			kfree(mtd);
 			break;
 		}
-
 #ifdef CONFIG_MTD_PMC551_DEBUG
-		printk( KERN_DEBUG "pmc551: setting aperture to %d\n",
-			ffs(priv->asize>>20)-1);
+		printk(KERN_DEBUG "pmc551: setting aperture to %d\n",
+			ffs(priv->asize >> 20) - 1);
 #endif
 
-                priv->base_map0 = ( PMC551_PCI_MEM_MAP_REG_EN
-				  | PMC551_PCI_MEM_MAP_ENABLE
-				  | (ffs(priv->asize>>20)-1)<<4 );
-                priv->curr_map0 = priv->base_map0;
-                pci_write_config_dword ( priv->dev, PMC551_PCI_MEM_MAP0,
-                                         priv->curr_map0 );
+		priv->base_map0 = (PMC551_PCI_MEM_MAP_REG_EN
+				   | PMC551_PCI_MEM_MAP_ENABLE
+				   | (ffs(priv->asize >> 20) - 1) << 4);
+		priv->curr_map0 = priv->base_map0;
+		pci_write_config_dword(priv->dev, PMC551_PCI_MEM_MAP0,
+					priv->curr_map0);
 
 #ifdef CONFIG_MTD_PMC551_DEBUG
-		printk( KERN_DEBUG "pmc551: aperture set to %d\n",
-			(priv->base_map0 & 0xF0)>>4 );
+		printk(KERN_DEBUG "pmc551: aperture set to %d\n",
+			(priv->base_map0 & 0xF0) >> 4);
 #endif
 
-                mtd->size 	= msize;
-                mtd->flags 	= MTD_CAP_RAM;
-                mtd->erase 	= pmc551_erase;
-                mtd->read 	= pmc551_read;
-                mtd->write 	= pmc551_write;
-                mtd->point 	= pmc551_point;
-                mtd->unpoint 	= pmc551_unpoint;
-                mtd->type 	= MTD_RAM;
-                mtd->name 	= "PMC551 RAM board";
-                mtd->erasesize 	= 0x10000;
-                mtd->writesize  = 1;
-                mtd->owner = THIS_MODULE;
-
-                if (add_mtd_device(mtd)) {
-                        printk(KERN_NOTICE "pmc551: Failed to register new device\n");
+		mtd->size = msize;
+		mtd->flags = MTD_CAP_RAM;
+		mtd->erase = pmc551_erase;
+		mtd->read = pmc551_read;
+		mtd->write = pmc551_write;
+		mtd->point = pmc551_point;
+		mtd->unpoint = pmc551_unpoint;
+		mtd->type = MTD_RAM;
+		mtd->name = "PMC551 RAM board";
+		mtd->erasesize = 0x10000;
+		mtd->writesize = 1;
+		mtd->owner = THIS_MODULE;
+
+		if (add_mtd_device(mtd)) {
+			printk(KERN_NOTICE "pmc551: Failed to register new "
+				"device\n");
 			iounmap(priv->start);
-                        kfree(mtd->priv);
-                        kfree(mtd);
-                        break;
-                }
-
-                /* Keep a reference as the add_mtd_device worked */
-                pci_dev_get(PCI_Device);
-
-                printk(KERN_NOTICE "Registered pmc551 memory device.\n");
-                printk(KERN_NOTICE "Mapped %dM of memory from 0x%p to 0x%p\n",
-                       priv->asize>>20,
-                       priv->start,
-                       priv->start + priv->asize);
-                printk(KERN_NOTICE "Total memory is %d%c\n",
-	       		(length<1024)?length:
-				(length<1048576)?length>>10:length>>20,
-               		(length<1024)?'B':(length<1048576)?'K':'M');
+			kfree(mtd->priv);
+			kfree(mtd);
+			break;
+		}
+
+		/* Keep a reference as the add_mtd_device worked */
+		pci_dev_get(PCI_Device);
+
+		printk(KERN_NOTICE "Registered pmc551 memory device.\n");
+		printk(KERN_NOTICE "Mapped %dM of memory from 0x%p to 0x%p\n",
+			priv->asize >> 20,
+			priv->start, priv->start + priv->asize);
+		printk(KERN_NOTICE "Total memory is %d%c\n",
+			(length < 1024) ? length :
+			(length < 1048576) ? length >> 10 : length >> 20,
+			(length < 1024) ? 'B' : (length < 1048576) ? 'K' : 'M');
 		priv->nextpmc551 = pmc551list;
 		pmc551list = mtd;
 		found++;
-        }
+	}
 
-        /* Exited early, reference left over */
-        if (PCI_Device)
-        	pci_dev_put(PCI_Device);
+	/* Exited early, reference left over */
+	if (PCI_Device)
+		pci_dev_put(PCI_Device);
 
-        if( !pmc551list ) {
-                printk(KERN_NOTICE "pmc551: not detected\n");
-                return -ENODEV;
-        } else {
+	if (!pmc551list) {
+		printk(KERN_NOTICE "pmc551: not detected\n");
+		return -ENODEV;
+	} else {
 		printk(KERN_NOTICE "pmc551: %d pmc551 devices loaded\n", found);
-                return 0;
+		return 0;
 	}
 }
 
@@ -819,24 +848,24 @@ static int __init init_pmc551(void)
  */
 static void __exit cleanup_pmc551(void)
 {
-        int found=0;
-        struct mtd_info *mtd;
+	int found = 0;
+	struct mtd_info *mtd;
 	struct mypriv *priv;
 
-	while((mtd=pmc551list)) {
+	while ((mtd = pmc551list)) {
 		priv = mtd->priv;
 		pmc551list = priv->nextpmc551;
 
-		if(priv->start) {
-			printk (KERN_DEBUG "pmc551: unmapping %dM starting at 0x%p\n",
-				priv->asize>>20, priv->start);
-			iounmap (priv->start);
+		if (priv->start) {
+			printk(KERN_DEBUG "pmc551: unmapping %dM starting at "
+				"0x%p\n", priv->asize >> 20, priv->start);
+			iounmap(priv->start);
 		}
 		pci_dev_put(priv->dev);
 
-		kfree (mtd->priv);
-		del_mtd_device (mtd);
-		kfree (mtd);
+		kfree(mtd->priv);
+		del_mtd_device(mtd);
+		kfree(mtd);
 		found++;
 	}
 
-- 
GitLab


From 7fefb924d7aed7116fe2a68cdbfc9e36318e7300 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Tue, 19 Sep 2006 21:55:18 +0200
Subject: [PATCH 0372/1063] [MTD] pmc551 use kzalloc

Use kzalloc instad of kmalloc+memset(0).

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/devices/pmc551.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index 4d4023601d22a..62a918895df4c 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -732,23 +732,20 @@ static int __init init_pmc551(void)
 			msize = length;
 		}
 
-		mtd = kmalloc(sizeof(struct mtd_info), GFP_KERNEL);
+		mtd = kzalloc(sizeof(struct mtd_info), GFP_KERNEL);
 		if (!mtd) {
 			printk(KERN_NOTICE "pmc551: Cannot allocate new MTD "
 				"device.\n");
 			break;
 		}
 
-		memset(mtd, 0, sizeof(struct mtd_info));
-
-		priv = kmalloc(sizeof(struct mypriv), GFP_KERNEL);
+		priv = kzalloc(sizeof(struct mypriv), GFP_KERNEL);
 		if (!priv) {
 			printk(KERN_NOTICE "pmc551: Cannot allocate new MTD "
 				"device.\n");
 			kfree(mtd);
 			break;
 		}
-		memset(priv, 0, sizeof(*priv));
 		mtd->priv = priv;
 		priv->dev = PCI_Device;
 
-- 
GitLab


From 98aacdfde05ccf512d4395eed0d4894eea2d163c Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Tue, 19 Sep 2006 21:55:28 +0200
Subject: [PATCH 0373/1063] [MTD] pmc551 pci cleanup

Use pci_resource_start for getting start of regions and pci_iomap to not
doing this directly by using dev->resource... (Thanks to Rolf Eike Beer)

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/devices/pmc551.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index 62a918895df4c..354e1657cc26a 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -568,8 +568,7 @@ static u32 fixup_pmc551(struct pci_dev *dev)
 		size >> 10 : size >> 20,
 		(size < 1024) ? 'B' : (size < 1048576) ? 'K' : 'M', size,
 		((dcmd & (0x1 << 3)) == 0) ? "non-" : "",
-		(unsigned long long)((dev->resource[0].start) &
-				    PCI_BASE_ADDRESS_MEM_MASK));
+		(unsigned long long)pci_resource_start(dev, 0));
 
 	/*
 	 * Check to see the state of the memory
@@ -705,7 +704,7 @@ static int __init init_pmc551(void)
 		}
 
 		printk(KERN_NOTICE "pmc551: Found PCI V370PDC at 0x%llx\n",
-			(unsigned long long)PCI_Device->resource[0].start);
+			(unsigned long long)pci_resource_start(PCI_Device, 0));
 
 		/*
 		 * The PMC551 device acts VERY weird if you don't init it
@@ -762,9 +761,7 @@ static int __init init_pmc551(void)
 				"size %dM\n", asize >> 20);
 			priv->asize = asize;
 		}
-		priv->start = ioremap(((PCI_Device->resource[0].start)
-					& PCI_BASE_ADDRESS_MEM_MASK),
-				      priv->asize);
+		priv->start = pci_iomap(PCI_Device, 0, priv->asize);
 
 		if (!priv->start) {
 			printk(KERN_NOTICE "pmc551: Unable to map IO space\n");
@@ -805,7 +802,7 @@ static int __init init_pmc551(void)
 		if (add_mtd_device(mtd)) {
 			printk(KERN_NOTICE "pmc551: Failed to register new "
 				"device\n");
-			iounmap(priv->start);
+			pci_iounmap(PCI_Device, priv->start);
 			kfree(mtd->priv);
 			kfree(mtd);
 			break;
@@ -856,7 +853,7 @@ static void __exit cleanup_pmc551(void)
 		if (priv->start) {
 			printk(KERN_DEBUG "pmc551: unmapping %dM starting at "
 				"0x%p\n", priv->asize >> 20, priv->start);
-			iounmap(priv->start);
+			pci_iounmap(priv->dev, priv->start);
 		}
 		pci_dev_put(priv->dev);
 
-- 
GitLab


From 51197abf29657373bcf9803d87da3c3d8fc3a37e Mon Sep 17 00:00:00 2001
From: Claudio Lanconelli <lanconelli.claudio@eptar.com>
Date: Fri, 22 Sep 2006 11:01:37 +0100
Subject: [PATCH 0374/1063] [MTD] Add SSFDC (SmartMedia) read-only translation
 layer

Signed-off-by: Claudio Lanconelli <lanconelli.claudio@eptar.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/Kconfig  |   8 +
 drivers/mtd/Makefile |   1 +
 drivers/mtd/ssfdc.c  | 468 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 477 insertions(+)
 create mode 100644 drivers/mtd/ssfdc.c

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 1344ad7a4b14a..717e90448fc6b 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -263,6 +263,14 @@ config RFD_FTL
 
 		http://www.gensw.com/pages/prod/bios/rfd.htm
 
+config SSFDC
+	bool "NAND SSFDC (SmartMedia) read only translation layer"
+	depends on MTD
+	default n
+	help
+	  This enables read only access to SmartMedia formatted NAND
+	  flash. You can mount it with FAT file system.
+
 source "drivers/mtd/chips/Kconfig"
 
 source "drivers/mtd/maps/Kconfig"
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index fc9374407c2bc..1e36b9aed98b1 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_FTL)		+= ftl.o mtd_blkdevs.o
 obj-$(CONFIG_NFTL)		+= nftl.o mtd_blkdevs.o
 obj-$(CONFIG_INFTL)		+= inftl.o mtd_blkdevs.o
 obj-$(CONFIG_RFD_FTL)		+= rfd_ftl.o mtd_blkdevs.o
+obj-$(CONFIG_SSFDC)		+= ssfdc.o mtd_blkdevs.o
 
 nftl-objs		:= nftlcore.o nftlmount.o
 inftl-objs		:= inftlcore.o inftlmount.o
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
new file mode 100644
index 0000000000000..ddbf015f4119d
--- /dev/null
+++ b/drivers/mtd/ssfdc.c
@@ -0,0 +1,468 @@
+/*
+ * Linux driver for SSFDC Flash Translation Layer (Read only)
+ * (c) 2005 Eptar srl
+ * Author: Claudio Lanconelli <lanconelli.claudio@eptar.com>
+ *
+ * Based on NTFL and MTDBLOCK_RO drivers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/hdreg.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/blktrans.h>
+
+struct ssfdcr_record {
+	struct mtd_blktrans_dev mbd;
+	int usecount;
+	unsigned char heads;
+	unsigned char sectors;
+	unsigned short cylinders;
+	int cis_block;			/* block n. containing CIS/IDI */
+	int erase_size;			/* phys_block_size */
+	unsigned short *logic_block_map; /* all zones (max 8192 phys blocks on
+					    the 128MB) */
+	int map_len;			/* n. phys_blocks on the card */
+};
+
+#define SSFDCR_MAJOR		257
+#define SSFDCR_PARTN_BITS	3
+
+#define SECTOR_SIZE		512
+#define SECTOR_SHIFT		9
+#define OOB_SIZE		16
+
+#define MAX_LOGIC_BLK_PER_ZONE	1000
+#define MAX_PHYS_BLK_PER_ZONE	1024
+
+#define KB(x)	( (x) * 1024L )
+#define MB(x)	( KB(x) * 1024L )
+
+/** CHS Table
+		1MB	2MB	4MB	8MB	16MB	32MB	64MB	128MB
+NCylinder	125	125	250	250	500	500	500	500
+NHead		4	4	4	4	4	8	8	16
+NSector		4	8	8	16	16	16	32	32
+SumSector	2,000	4,000	8,000	16,000	32,000	64,000	128,000	256,000
+SectorSize	512	512	512	512	512	512	512	512
+**/
+
+typedef struct {
+	unsigned long size;
+	unsigned short cyl;
+	unsigned char head;
+	unsigned char sec;
+} chs_entry_t;
+
+/* Must be ordered by size */
+static const chs_entry_t chs_table[] = {
+	{ MB(  1), 125,  4,  4 },
+	{ MB(  2), 125,  4,  8 },
+	{ MB(  4), 250,  4,  8 },
+	{ MB(  8), 250,  4, 16 },
+	{ MB( 16), 500,  4, 16 },
+	{ MB( 32), 500,  8, 16 },
+	{ MB( 64), 500,  8, 32 },
+	{ MB(128), 500, 16, 32 },
+	{ 0 },
+};
+
+static int get_chs(unsigned long size, unsigned short *cyl, unsigned char *head,
+			unsigned char *sec)
+{
+	int k;
+	int found = 0;
+
+	k = 0;
+	while (chs_table[k].size > 0 && size > chs_table[k].size)
+		k++;
+
+	if (chs_table[k].size > 0) {
+		if (cyl)
+			*cyl = chs_table[k].cyl;
+		if (head)
+			*head = chs_table[k].head;
+		if (sec)
+			*sec = chs_table[k].sec;
+		found = 1;
+	}
+
+	return found;
+}
+
+/* These bytes are the signature for the CIS/IDI sector */
+static const uint8_t cis_numbers[] = {
+	0x01, 0x03, 0xD9, 0x01, 0xFF, 0x18, 0x02, 0xDF, 0x01, 0x20
+};
+
+/* Read and check for a valid CIS sector */
+static int get_valid_cis_sector(struct mtd_info *mtd)
+{
+	int ret, k, cis_sector;
+	size_t retlen;
+	loff_t offset;
+	uint8_t sect_buf[SECTOR_SIZE];
+
+	/*
+	 * Look for CIS/IDI sector on the first GOOD block (give up after 4 bad
+	 * blocks). If the first good block doesn't contain CIS number the flash
+	 * is not SSFDC formatted
+	 */
+	cis_sector = -1;
+	for (k = 0, offset = 0; k < 4; k++, offset += mtd->erasesize) {
+		if (!mtd->block_isbad(mtd, offset)) {
+			ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen,
+				sect_buf);
+
+			/* CIS pattern match on the sector buffer */
+			if ( ret < 0 || retlen != SECTOR_SIZE ) {
+				printk(KERN_WARNING
+					"SSFDC_RO:can't read CIS/IDI sector\n");
+			} else if ( !memcmp(sect_buf, cis_numbers,
+					sizeof(cis_numbers)) ) {
+				/* Found */
+				cis_sector = (int)(offset >> SECTOR_SHIFT);
+			} else {
+				DEBUG(MTD_DEBUG_LEVEL1,
+					"SSFDC_RO: CIS/IDI sector not found"
+					" on %s (mtd%d)\n", mtd->name,
+					mtd->index);
+			}
+			break;
+		}
+	}
+
+	return cis_sector;
+}
+
+/* Read physical sector (wrapper to MTD_READ) */
+static int read_physical_sector(struct mtd_info *mtd, uint8_t *sect_buf,
+				int sect_no)
+{
+	int ret;
+	size_t retlen;
+	loff_t offset = (loff_t)sect_no << SECTOR_SHIFT;
+
+	ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen, sect_buf);
+	if (ret < 0 || retlen != SECTOR_SIZE)
+		return -1;
+
+	return 0;
+}
+
+/* Read redundancy area (wrapper to MTD_READ_OOB */
+static int read_raw_oob(struct mtd_info *mtd, loff_t offs, uint8_t *buf)
+{
+	struct mtd_oob_ops ops;
+	int ret;
+
+	ops.mode = MTD_OOB_RAW;
+	ops.ooboffs = 0;
+	ops.ooblen = mtd->oobsize;
+	ops.len = OOB_SIZE;
+	ops.oobbuf = buf;
+	ops.datbuf = NULL;
+
+	ret = mtd->read_oob(mtd, offs, &ops);
+	if (ret < 0 || ops.retlen != OOB_SIZE)
+		return -1;
+
+	return 0;
+}
+
+/* Parity calculator on a word of n bit size */
+static int get_parity(int number, int size)
+{
+ 	int k;
+	int parity;
+
+	parity = 1;
+	for (k = 0; k < size; k++) {
+		parity += (number >> k);
+		parity &= 1;
+	}
+	return parity;
+}
+
+/* Read and validate the logical block address field stored in the OOB */
+static int get_logical_address(uint8_t *oob_buf)
+{
+	int block_address, parity;
+	int offset[2] = {6, 11}; /* offset of the 2 address fields within OOB */
+	int j;
+	int ok = 0;
+
+	/*
+	 * Look for the first valid logical address
+	 * Valid address has fixed pattern on most significant bits and
+	 * parity check
+	 */
+	for (j = 0; j < ARRAY_SIZE(offset); j++) {
+		block_address = ((int)oob_buf[offset[j]] << 8) |
+			oob_buf[offset[j]+1];
+
+		/* Check for the signature bits in the address field (MSBits) */
+		if ((block_address & ~0x7FF) == 0x1000) {
+			parity = block_address & 0x01;
+			block_address &= 0x7FF;
+			block_address >>= 1;
+
+			if (get_parity(block_address, 10) != parity) {
+				DEBUG(MTD_DEBUG_LEVEL0,
+					"SSFDC_RO: logical address field%d"
+					"parity error(0x%04X)\n", j+1,
+					block_address);
+			} else {
+				ok = 1;
+				break;
+			}
+		}
+	}
+
+	if ( !ok )
+		block_address = -2;
+
+	DEBUG(MTD_DEBUG_LEVEL3, "SSFDC_RO: get_logical_address() %d\n",
+		block_address);
+
+	return block_address;
+}
+
+/* Build the logic block map */
+static int build_logical_block_map(struct ssfdcr_record *ssfdc)
+{
+	unsigned long offset;
+	uint8_t oob_buf[OOB_SIZE];
+	int ret, block_address, phys_block;
+	struct mtd_info *mtd = ssfdc->mbd.mtd;
+
+	DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: build_block_map() nblks=%d (%luK)\n",
+		ssfdc->map_len, (unsigned long)ssfdc->map_len *
+		ssfdc->erase_size / 1024 );
+
+	/* Scan every physical block, skip CIS block */
+	for (phys_block = ssfdc->cis_block + 1; phys_block < ssfdc->map_len;
+			phys_block++) {
+		offset = (unsigned long)phys_block * ssfdc->erase_size;
+		if (mtd->block_isbad(mtd, offset))
+			continue;	/* skip bad blocks */
+
+		ret = read_raw_oob(mtd, offset, oob_buf);
+		if (ret < 0) {
+			DEBUG(MTD_DEBUG_LEVEL0,
+				"SSFDC_RO: mtd read_oob() failed at %lu\n",
+				offset);
+			return -1;
+		}
+		block_address = get_logical_address(oob_buf);
+
+		/* Skip invalid addresses */
+		if (block_address >= 0 &&
+				block_address < MAX_LOGIC_BLK_PER_ZONE) {
+			int zone_index;
+
+			zone_index = phys_block / MAX_PHYS_BLK_PER_ZONE;
+			block_address += zone_index * MAX_LOGIC_BLK_PER_ZONE;
+			ssfdc->logic_block_map[block_address] =
+				(unsigned short)phys_block;
+
+			DEBUG(MTD_DEBUG_LEVEL2,
+				"SSFDC_RO: build_block_map() phys_block=%d,"
+				"logic_block_addr=%d, zone=%d\n",
+				phys_block, block_address, zone_index);
+		}
+	}
+	return 0;
+}
+
+static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
+{
+	struct ssfdcr_record *ssfdc;
+	int cis_sector;
+
+	/* Check for small page NAND flash */
+	if (mtd->type != MTD_NANDFLASH || mtd->oobsize != OOB_SIZE)
+		return;
+
+	/* Check for SSDFC format by reading CIS/IDI sector */
+	cis_sector = get_valid_cis_sector(mtd);
+	if (cis_sector == -1)
+		return;
+
+	ssfdc = kzalloc(sizeof(struct ssfdcr_record), GFP_KERNEL);
+	if (!ssfdc) {
+		printk(KERN_WARNING
+			"SSFDC_RO: out of memory for data structures\n");
+		return;
+	}
+
+	ssfdc->mbd.mtd = mtd;
+	ssfdc->mbd.devnum = -1;
+	ssfdc->mbd.blksize = SECTOR_SIZE;
+	ssfdc->mbd.tr = tr;
+	ssfdc->mbd.readonly = 1;
+
+	ssfdc->cis_block = cis_sector / (mtd->erasesize >> SECTOR_SHIFT);
+	ssfdc->erase_size = mtd->erasesize;
+	ssfdc->map_len = mtd->size / mtd->erasesize;
+
+	DEBUG(MTD_DEBUG_LEVEL1,
+		"SSFDC_RO: cis_block=%d,erase_size=%d,map_len=%d,n_zones=%d\n",
+		ssfdc->cis_block, ssfdc->erase_size, ssfdc->map_len,
+		(ssfdc->map_len + MAX_PHYS_BLK_PER_ZONE - 1) /
+		MAX_PHYS_BLK_PER_ZONE);
+
+	/* Set geometry */
+	ssfdc->heads = 16;
+	ssfdc->sectors = 32;
+	get_chs( mtd->size, NULL, &ssfdc->heads, &ssfdc->sectors);
+	ssfdc->cylinders = (unsigned short)((mtd->size >> SECTOR_SHIFT) /
+			((long)ssfdc->sectors * (long)ssfdc->heads));
+
+	DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: using C:%d H:%d S:%d == %ld sects\n",
+		ssfdc->cylinders, ssfdc->heads , ssfdc->sectors,
+		(long)ssfdc->cylinders * (long)ssfdc->heads *
+		(long)ssfdc->sectors );
+
+	ssfdc->mbd.size = (long)ssfdc->heads * (long)ssfdc->cylinders *
+				(long)ssfdc->sectors;
+
+	/* Allocate logical block map */
+	ssfdc->logic_block_map = kmalloc( sizeof(ssfdc->logic_block_map[0]) *
+						ssfdc->map_len, GFP_KERNEL);
+	if (!ssfdc->logic_block_map) {
+		printk(KERN_WARNING
+			"SSFDC_RO: out of memory for data structures\n");
+		goto out_err;
+	}
+	memset(ssfdc->logic_block_map, 0xff, sizeof(ssfdc->logic_block_map[0]) *
+		ssfdc->map_len);
+
+	/* Build logical block map */
+	if (build_logical_block_map(ssfdc) < 0)
+		goto out_err;
+
+	/* Register device + partitions */
+	if (add_mtd_blktrans_dev(&ssfdc->mbd))
+		goto out_err;
+
+	printk(KERN_INFO "SSFDC_RO: Found ssfdc%c on mtd%d (%s)\n",
+		ssfdc->mbd.devnum + 'a', mtd->index, mtd->name);
+	return;
+
+out_err:
+	kfree(ssfdc->logic_block_map);
+        kfree(ssfdc);
+}
+
+static void ssfdcr_remove_dev(struct mtd_blktrans_dev *dev)
+{
+	struct ssfdcr_record *ssfdc = (struct ssfdcr_record *)dev;
+
+	DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: remove_dev (i=%d)\n", dev->devnum);
+
+	del_mtd_blktrans_dev(dev);
+	kfree(ssfdc->logic_block_map);
+	kfree(ssfdc);
+}
+
+static int ssfdcr_readsect(struct mtd_blktrans_dev *dev,
+				unsigned long logic_sect_no, char *buf)
+{
+	struct ssfdcr_record *ssfdc = (struct ssfdcr_record *)dev;
+	int sectors_per_block, offset, block_address;
+
+	sectors_per_block = ssfdc->erase_size >> SECTOR_SHIFT;
+	offset = (int)(logic_sect_no % sectors_per_block);
+	block_address = (int)(logic_sect_no / sectors_per_block);
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+		"SSFDC_RO: ssfdcr_readsect(%lu) sec_per_blk=%d, ofst=%d,"
+		" block_addr=%d\n", logic_sect_no, sectors_per_block, offset,
+		block_address);
+
+	if (block_address >= ssfdc->map_len)
+		BUG();
+
+	block_address = ssfdc->logic_block_map[block_address];
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+		"SSFDC_RO: ssfdcr_readsect() phys_block_addr=%d\n",
+		block_address);
+
+	if (block_address < 0xffff) {
+		unsigned long sect_no;
+
+		sect_no = (unsigned long)block_address * sectors_per_block +
+				offset;
+
+		DEBUG(MTD_DEBUG_LEVEL3,
+			"SSFDC_RO: ssfdcr_readsect() phys_sect_no=%lu\n",
+			sect_no);
+
+		if (read_physical_sector( ssfdc->mbd.mtd, buf, sect_no ) < 0)
+			return -EIO;
+	} else {
+		memset(buf, 0xff, SECTOR_SIZE);
+	}
+
+	return 0;
+}
+
+static int ssfdcr_getgeo(struct mtd_blktrans_dev *dev,  struct hd_geometry *geo)
+{
+	struct ssfdcr_record *ssfdc = (struct ssfdcr_record *)dev;
+
+	DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: ssfdcr_getgeo() C=%d, H=%d, S=%d\n",
+			ssfdc->cylinders, ssfdc->heads, ssfdc->sectors);
+
+	geo->heads = ssfdc->heads;
+	geo->sectors = ssfdc->sectors;
+	geo->cylinders = ssfdc->cylinders;
+
+	return 0;
+}
+
+/****************************************************************************
+ *
+ * Module stuff
+ *
+ ****************************************************************************/
+
+static struct mtd_blktrans_ops ssfdcr_tr = {
+	.name		= "ssfdc",
+	.major		= SSFDCR_MAJOR,
+	.part_bits	= SSFDCR_PARTN_BITS,
+	.getgeo		= ssfdcr_getgeo,
+	.readsect	= ssfdcr_readsect,
+	.add_mtd	= ssfdcr_add_mtd,
+	.remove_dev	= ssfdcr_remove_dev,
+	.owner		= THIS_MODULE,
+};
+
+static int __init init_ssfdcr(void)
+{
+	printk(KERN_INFO "SSFDC read-only Flash Translation layer\n");
+
+	return register_mtd_blktrans(&ssfdcr_tr);
+}
+
+static void __exit cleanup_ssfdcr(void)
+{
+	deregister_mtd_blktrans(&ssfdcr_tr);
+}
+
+module_init(init_ssfdcr);
+module_exit(cleanup_ssfdcr);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Claudio Lanconelli <lanconelli.claudio@eptar.com>");
+MODULE_DESCRIPTION("Flash Translation Layer for read-only SSFDC SmartMedia card");
-- 
GitLab


From e4e3295f0c0e1a09a46522359e24c0569e2e7b47 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Fri, 22 Sep 2006 11:05:42 +0100
Subject: [PATCH 0375/1063] Revert "[MTD] blkdev helper code: fix printk format
 warning"

This reverts commit 668040fcd1e06fc3e68a92708cbdfa5a52c37d3c.

The 'flags' field of the struct request is 'unsigned long'. Quite
how Randy came to see 'long int format, different type arg' I don't
know, but it doesn't seem to be the case any more.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/mtd_blkdevs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 302bed5f08452..458d3c8ae1eee 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -69,7 +69,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 		return 1;
 
 	default:
-		printk(KERN_NOTICE "Unknown request %d\n", rq_data_dir(req));
+		printk(KERN_NOTICE "Unknown request %ld\n", rq_data_dir(req));
 		return 0;
 	}
 }
-- 
GitLab


From 734a56285dbeedc6cc10aef6f700eeab7c65ea9f Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Fri, 22 Sep 2006 18:41:20 +0100
Subject: [PATCH 0376/1063] Remove accidentally-added
 include/linux/utsrelease.h

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/utsrelease.h | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 include/linux/utsrelease.h

diff --git a/include/linux/utsrelease.h b/include/linux/utsrelease.h
deleted file mode 100644
index 7fe1de0ad4c17..0000000000000
--- a/include/linux/utsrelease.h
+++ /dev/null
@@ -1 +0,0 @@
-#define UTS_RELEASE "2.6.18"
-- 
GitLab


From 51bd39860ff829475aef611a3234309e37e090d9 Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Mon, 24 Jul 2006 23:26:30 -0700
Subject: [PATCH 0377/1063] [MLSXFRM]: Granular IPSec associations for use in
 MLS environments

The current approach to labeling Security Associations for SELinux
purposes uses a one-to-one mapping between xfrm policy rules and
security associations.

This doesn't address the needs of real world MLS (Multi-level System,
traditional Bell-LaPadula) environments where a single xfrm policy
rule (pertaining to a range, classified to secret for example) might
need to map to multiple Security Associations (one each for
classified, secret, top secret and all the compartments applicable to
these security levels).

This patch set addresses the above problem by allowing for the mapping
of a single xfrm policy rule to multiple security associations, with
each association used in the security context it is defined for. It
also includes the security context to be used in IKE negotiation in
the acquire messages sent to the IKE daemon so that a unique SA can be
negotiated for each unique security context. A couple of bug fixes are
also included; checks to make sure the SAs used by a packet match
policy (security context-wise) on the inbound and also that the bundle
used for the outbound matches the security context of the flow. This
patch set also makes the use of the SELinux sid in flow cache lookups
seemless by including the sid in the flow key itself. Also, open
requests as well as connection-oriented child sockets are labeled
automatically to be at the same level as the peer to allow for use of
appropriately labeled IPSec associations.

Description of changes:

A "sid" member has been added to the flow cache key resulting in the
sid being available at all needed locations and the flow cache lookups
automatically using the sid. The flow sid is derived from the socket
on the outbound and the SAs (unlabeled where an SA was not used) on
the inbound.

Outbound case:
1. Find policy for the socket.

2. OLD: Find an SA that matches the policy.
 NEW: Find an SA that matches BOTH the policy and the flow/socket.
   This is necessary since not every SA that matches the policy
   can be used for the flow/socket. Consider policy range Secret-TS,
   and SAs each for Secret and TS. We don't want a TS socket to
   use the Secret SA. Hence the additional check for the SA Vs. flow/socket.

3. NEW: When looking thru bundles for a policy, make sure the
        flow/socket can use the bundle. If a bundle is not found,
        create one, calling for IKE if necessary. If using IKE,
        include the security context in the acquire message to the IKE
        daemon.

Inbound case:
1. OLD: Find policy for the socket.
 NEW: Find policy for the incoming packet based on the sid of the
      SA(s) it used or the unlabeled sid if no SAs were
      used. (Consider a case where a socket is "authorized" for two
      policies (unclassified-confidential, secret-top_secret). If the
      packet has come in using a secret SA, we really ought to be
      using the latter policy (secret-top_secret).)

2. OLD: BUG: No check to see if the SAs used by the packet agree with
             the policy sec_ctx-wise.

             (It was indicated in selinux_xfrm_sock_rcv_skb() that
              this was being accomplished by
              (x->id.spi == tmpl->id.spi || !tmpl->id.spi) in xfrm_state_ok,
	      but it turns out tmpl->id.spi
              would normally be zero (unless xfrm policy rules specify one
              at the template level, which they usually don't).
 NEW: The socket is checked for access to the SAs used (based on the
      sid of the SAs) in selinux_xfrm_sock_rcv_skb().

Forward case:
 This would be Step 1 from the Inbound case, followed by Steps 2 and 3
from the Outbound case.

Outstanding items/issues:

- Timewait acknowledgements and such are generated in the
  current/upstream implementation using a NULL socket resulting in the
  any_socket sid (SYSTEM_HIGH) to be used. This problem is not addressed
  by this patch set.

This patch: Add new flask definitions to SELinux

Adds a new avperm "polmatch" to arbitrate flow/state access to a xfrm
policy rule.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/selinux/include/av_perm_to_string.h | 1 +
 security/selinux/include/av_permissions.h    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h
index 7c9b583808337..09fc8a2345eb2 100644
--- a/security/selinux/include/av_perm_to_string.h
+++ b/security/selinux/include/av_perm_to_string.h
@@ -241,6 +241,7 @@
    S_(SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, "sendto")
    S_(SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, "recvfrom")
    S_(SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, "setcontext")
+   S_(SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, "polmatch")
    S_(SECCLASS_PACKET, PACKET__SEND, "send")
    S_(SECCLASS_PACKET, PACKET__RECV, "recv")
    S_(SECCLASS_PACKET, PACKET__RELABELTO, "relabelto")
diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h
index 69fd4b48202ce..81f4f526c8b1a 100644
--- a/security/selinux/include/av_permissions.h
+++ b/security/selinux/include/av_permissions.h
@@ -911,6 +911,7 @@
 #define ASSOCIATION__SENDTO                       0x00000001UL
 #define ASSOCIATION__RECVFROM                     0x00000002UL
 #define ASSOCIATION__SETCONTEXT                   0x00000004UL
+#define ASSOCIATION__POLMATCH                     0x00000008UL
 
 #define NETLINK_KOBJECT_UEVENT_SOCKET__IOCTL      0x00000001UL
 #define NETLINK_KOBJECT_UEVENT_SOCKET__READ       0x00000002UL
-- 
GitLab


From 08554d6b33e60aa8ee40bbef94505941c0eefef2 Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Mon, 24 Jul 2006 23:27:16 -0700
Subject: [PATCH 0378/1063] [MLSXFRM]: Define new SELinux service routine

This defines a routine that combines the Type Enforcement portion of
one sid with the MLS portion from the other sid to arrive at a new
sid. This would be used to define a sid for a security association
that is to be negotiated by IKE as well as for determing the sid for
open requests and connection-oriented child sockets.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/selinux/include/security.h |  2 +
 security/selinux/ss/mls.c           | 20 ---------
 security/selinux/ss/mls.h           | 20 +++++++++
 security/selinux/ss/services.c      | 69 +++++++++++++++++++++++++++++
 4 files changed, 91 insertions(+), 20 deletions(-)

diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 063af47bb2317..911954a692fac 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -78,6 +78,8 @@ int security_node_sid(u16 domain, void *addr, u32 addrlen,
 int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid,
                                  u16 tclass);
 
+int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid);
+
 #define SECURITY_FS_USE_XATTR		1 /* use xattr */
 #define SECURITY_FS_USE_TRANS		2 /* use transition SIDs, e.g. devpts/tmpfs */
 #define SECURITY_FS_USE_TASK		3 /* use task SIDs, e.g. pipefs/sockfs */
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 7bc5b6440f70b..e15f7e0399b8e 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -211,26 +211,6 @@ int mls_context_isvalid(struct policydb *p, struct context *c)
 	return 1;
 }
 
-/*
- * Copies the MLS range from `src' into `dst'.
- */
-static inline int mls_copy_context(struct context *dst,
-				   struct context *src)
-{
-	int l, rc = 0;
-
-	/* Copy the MLS range from the source context */
-	for (l = 0; l < 2; l++) {
-		dst->range.level[l].sens = src->range.level[l].sens;
-		rc = ebitmap_cpy(&dst->range.level[l].cat,
-				 &src->range.level[l].cat);
-		if (rc)
-			break;
-	}
-
-	return rc;
-}
-
 /*
  * Set the MLS fields in the security context structure
  * `context' based on the string representation in
diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h
index fbb42f07dd7c0..90c5e88987fa8 100644
--- a/security/selinux/ss/mls.h
+++ b/security/selinux/ss/mls.h
@@ -17,6 +17,26 @@
 #include "context.h"
 #include "policydb.h"
 
+/*
+ * Copies the MLS range from `src' into `dst'.
+ */
+static inline int mls_copy_context(struct context *dst,
+				   struct context *src)
+{
+	int l, rc = 0;
+
+	/* Copy the MLS range from the source context */
+	for (l = 0; l < 2; l++) {
+		dst->range.level[l].sens = src->range.level[l].sens;
+		rc = ebitmap_cpy(&dst->range.level[l].cat,
+				 &src->range.level[l].cat);
+		if (rc)
+			break;
+	}
+
+	return rc;
+}
+
 int mls_compute_context_len(struct context *context);
 void mls_sid_to_context(struct context *context, char **scontext);
 int mls_context_isvalid(struct policydb *p, struct context *c);
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 85e4298843936..b00ec69f0ffd5 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1817,6 +1817,75 @@ int security_get_bool_value(int bool)
 	return rc;
 }
 
+/*
+ * security_sid_mls_copy() - computes a new sid based on the given
+ * sid and the mls portion of mls_sid.
+ */
+int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid)
+{
+	struct context *context1;
+	struct context *context2;
+	struct context newcon;
+	char *s;
+	u32 len;
+	int rc = 0;
+
+	if (!ss_initialized) {
+		*new_sid = sid;
+		goto out;
+	}
+
+	context_init(&newcon);
+
+	POLICY_RDLOCK;
+	context1 = sidtab_search(&sidtab, sid);
+	if (!context1) {
+		printk(KERN_ERR "security_sid_mls_copy:  unrecognized SID "
+		       "%d\n", sid);
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+
+	context2 = sidtab_search(&sidtab, mls_sid);
+	if (!context2) {
+		printk(KERN_ERR "security_sid_mls_copy:  unrecognized SID "
+		       "%d\n", mls_sid);
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+
+	newcon.user = context1->user;
+	newcon.role = context1->role;
+	newcon.type = context1->type;
+	rc = mls_copy_context(&newcon, context2);
+	if (rc)
+		goto out_unlock;
+
+
+	/* Check the validity of the new context. */
+	if (!policydb_context_isvalid(&policydb, &newcon)) {
+		rc = convert_context_handle_invalid_context(&newcon);
+		if (rc)
+			goto bad;
+	}
+
+	rc = sidtab_context_to_sid(&sidtab, &newcon, new_sid);
+	goto out_unlock;
+
+bad:
+	if (!context_struct_to_string(&newcon, &s, &len)) {
+		audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR,
+			  "security_sid_mls_copy: invalid context %s", s);
+		kfree(s);
+	}
+
+out_unlock:
+	POLICY_RDUNLOCK;
+	context_destroy(&newcon);
+out:
+	return rc;
+}
+
 struct selinux_audit_rule {
 	u32 au_seqno;
 	struct context au_ctxt;
-- 
GitLab


From 892c141e62982272b9c738b5520ad0e5e1ad7b42 Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Fri, 4 Aug 2006 23:08:56 -0700
Subject: [PATCH 0379/1063] [MLSXFRM]: Add security sid to sock

This adds security for IP sockets at the sock level. Security at the
sock level is needed to enforce the SELinux security policy for
security associations even when a sock is orphaned (such as in the TCP
LAST_ACK state).

This will also be used to enforce SELinux controls over data arriving
at or leaving a child socket while it's still waiting to be accepted.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/security.h          | 12 ++++++++++
 include/net/sock.h                | 13 +++++++++++
 net/core/sock.c                   |  2 +-
 security/dummy.c                  |  5 ++++
 security/selinux/hooks.c          | 38 +++++++++++++++++--------------
 security/selinux/include/objsec.h |  1 +
 6 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 6bc2aad494ffc..4d7fb59996b03 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -812,6 +812,8 @@ struct swap_info_struct;
  *      which is used to copy security attributes between local stream sockets.
  * @sk_free_security:
  *	Deallocate security structure.
+ * @sk_clone_security:
+ *	Clone/copy security structure.
  * @sk_getsid:
  *	Retrieve the LSM-specific sid for the sock to enable caching of network
  *	authorizations.
@@ -1332,6 +1334,7 @@ struct security_operations {
 	int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid);
 	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
 	void (*sk_free_security) (struct sock *sk);
+	void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
 	unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
@@ -2885,6 +2888,11 @@ static inline void security_sk_free(struct sock *sk)
 	return security_ops->sk_free_security(sk);
 }
 
+static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
+{
+	return security_ops->sk_clone_security(sk, newsk);
+}
+
 static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
 {
 	return security_ops->sk_getsid(sk, fl, dir);
@@ -3011,6 +3019,10 @@ static inline void security_sk_free(struct sock *sk)
 {
 }
 
+static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
+{
+}
+
 static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
 {
 	return 0;
diff --git a/include/net/sock.h b/include/net/sock.h
index 324b3ea233d60..91cdceb3c0284 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -972,6 +972,19 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 	write_unlock_bh(&sk->sk_callback_lock);
 }
 
+static inline void sock_copy(struct sock *nsk, const struct sock *osk)
+{
+#ifdef CONFIG_SECURITY_NETWORK
+	void *sptr = nsk->sk_security;
+#endif
+
+	memcpy(nsk, osk, osk->sk_prot->obj_size);
+#ifdef CONFIG_SECURITY_NETWORK
+	nsk->sk_security = sptr;
+	security_sk_clone(osk, nsk);
+#endif
+}
+
 extern int sock_i_uid(struct sock *sk);
 extern unsigned long sock_i_ino(struct sock *sk);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 51fcfbc041a73..b67d868649cdb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -911,7 +911,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 	if (newsk != NULL) {
 		struct sk_filter *filter;
 
-		memcpy(newsk, sk, sk->sk_prot->obj_size);
+		sock_copy(newsk, sk);
 
 		/* SANITY */
 		sk_node_init(&newsk->sk_node);
diff --git a/security/dummy.c b/security/dummy.c
index 58c6d399c844b..bd3bc5faa9a83 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -805,6 +805,10 @@ static inline void dummy_sk_free_security (struct sock *sk)
 {
 }
 
+static inline void dummy_sk_clone_security (const struct sock *sk, struct sock *newsk)
+{
+}
+
 static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir)
 {
 	return 0;
@@ -1060,6 +1064,7 @@ void security_fixup_ops (struct security_operations *ops)
 	set_to_dummy_if_null(ops, socket_getpeersec_dgram);
 	set_to_dummy_if_null(ops, sk_alloc_security);
 	set_to_dummy_if_null(ops, sk_free_security);
+	set_to_dummy_if_null(ops, sk_clone_security);
 	set_to_dummy_if_null(ops, sk_getsid);
  #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef  CONFIG_SECURITY_NETWORK_XFRM
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5d1b8c733199e..d67abf77584ae 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -269,15 +269,13 @@ static int sk_alloc_security(struct sock *sk, int family, gfp_t priority)
 {
 	struct sk_security_struct *ssec;
 
-	if (family != PF_UNIX)
-		return 0;
-
 	ssec = kzalloc(sizeof(*ssec), priority);
 	if (!ssec)
 		return -ENOMEM;
 
 	ssec->sk = sk;
 	ssec->peer_sid = SECINITSID_UNLABELED;
+	ssec->sid = SECINITSID_UNLABELED;
 	sk->sk_security = ssec;
 
 	return 0;
@@ -287,9 +285,6 @@ static void sk_free_security(struct sock *sk)
 {
 	struct sk_security_struct *ssec = sk->sk_security;
 
-	if (sk->sk_family != PF_UNIX)
-		return;
-
 	sk->sk_security = NULL;
 	kfree(ssec);
 }
@@ -3068,6 +3063,7 @@ static void selinux_socket_post_create(struct socket *sock, int family,
 {
 	struct inode_security_struct *isec;
 	struct task_security_struct *tsec;
+	struct sk_security_struct *sksec;
 	u32 newsid;
 
 	isec = SOCK_INODE(sock)->i_security;
@@ -3078,6 +3074,11 @@ static void selinux_socket_post_create(struct socket *sock, int family,
 	isec->sid = kern ? SECINITSID_KERNEL : newsid;
 	isec->initialized = 1;
 
+	if (sock->sk) {
+		sksec = sock->sk->sk_security;
+		sksec->sid = isec->sid;
+	}
+
 	return;
 }
 
@@ -3551,22 +3552,24 @@ static void selinux_sk_free_security(struct sock *sk)
 	sk_free_security(sk);
 }
 
-static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir)
+static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
 {
-	struct inode_security_struct *isec;
-	u32 sock_sid = SECINITSID_ANY_SOCKET;
+	struct sk_security_struct *ssec = sk->sk_security;
+	struct sk_security_struct *newssec = newsk->sk_security;
 
+	newssec->sid = ssec->sid;
+	newssec->peer_sid = ssec->peer_sid;
+}
+
+static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir)
+{
 	if (!sk)
 		return selinux_no_sk_sid(fl);
+	else {
+		struct sk_security_struct *sksec = sk->sk_security;
 
-	read_lock_bh(&sk->sk_callback_lock);
-	isec = get_sock_isec(sk);
-
-	if (isec)
-		sock_sid = isec->sid;
-
-	read_unlock_bh(&sk->sk_callback_lock);
-	return sock_sid;
+		return sksec->sid;
+	}
 }
 
 static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
@@ -4618,6 +4621,7 @@ static struct security_operations selinux_ops = {
 	.socket_getpeersec_dgram =	selinux_socket_getpeersec_dgram,
 	.sk_alloc_security =		selinux_sk_alloc_security,
 	.sk_free_security =		selinux_sk_free_security,
+	.sk_clone_security =		selinux_sk_clone_security,
 	.sk_getsid = 			selinux_sk_getsid_security,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 940178865fc78..79b9e0af19a0c 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -99,6 +99,7 @@ struct netif_security_struct {
 
 struct sk_security_struct {
 	struct sock *sk;		/* back pointer to sk object */
+	u32 sid;			/* SID of this object */
 	u32 peer_sid;			/* SID of peer */
 };
 
-- 
GitLab


From b6340fcd761acf9249b3acbc95c4dc555d9beb07 Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Mon, 24 Jul 2006 23:28:37 -0700
Subject: [PATCH 0380/1063] [MLSXFRM]: Add security sid to flowi

This adds security to flow key for labeling of flows as also to allow
for making flow cache lookups based on the security label seemless.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/secid.txt | 14 ++++++++++++++
 include/net/flow.h                 |  1 +
 2 files changed, 15 insertions(+)
 create mode 100644 Documentation/networking/secid.txt

diff --git a/Documentation/networking/secid.txt b/Documentation/networking/secid.txt
new file mode 100644
index 0000000000000..95ea067843336
--- /dev/null
+++ b/Documentation/networking/secid.txt
@@ -0,0 +1,14 @@
+flowi structure:
+
+The secid member in the flow structure is used in LSMs (e.g. SELinux) to indicate
+the label of the flow. This label of the flow is currently used in selecting
+matching labeled xfrm(s).
+
+If this is an outbound flow, the label is derived from the socket, if any, or
+the incoming packet this flow is being generated as a response to (e.g. tcp
+resets, timewait ack, etc.). It is also conceivable that the label could be
+derived from other sources such as process context, device, etc., in special
+cases, as may be appropriate.
+
+If this is an inbound flow, the label is derived from the IPSec security
+associations, if any, used by the packet.
diff --git a/include/net/flow.h b/include/net/flow.h
index 04d89f7634516..1cee5a83433ab 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -78,6 +78,7 @@ struct flowi {
 #define fl_icmp_type	uli_u.icmpt.type
 #define fl_icmp_code	uli_u.icmpt.code
 #define fl_ipsec_spi	uli_u.spi
+	__u32           secid;	/* used by xfrm; see secid.txt */
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 #define FLOW_DIR_IN	0
-- 
GitLab


From e0d1caa7b0d5f02e4f34aa09c695d04251310c6c Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Mon, 24 Jul 2006 23:29:07 -0700
Subject: [PATCH 0381/1063] [MLSXFRM]: Flow based matching of xfrm policy and
 state

This implements a seemless mechanism for xfrm policy selection and
state matching based on the flow sid. This also includes the necessary
SELinux enforcement pieces.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/security.h        | 106 ++++++++++++++---
 include/net/flow.h              |   4 +-
 net/core/flow.c                 |   7 +-
 net/xfrm/xfrm_policy.c          |  28 ++---
 net/xfrm/xfrm_state.c           |  12 +-
 security/dummy.c                |  23 +++-
 security/selinux/hooks.c        |   7 +-
 security/selinux/include/xfrm.h |  23 +++-
 security/selinux/xfrm.c         | 199 ++++++++++++++++++++++++++------
 9 files changed, 329 insertions(+), 80 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 4d7fb59996b03..2c4921d79d19b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -31,6 +31,7 @@
 #include <linux/msg.h>
 #include <linux/sched.h>
 #include <linux/key.h>
+#include <linux/xfrm.h>
 
 struct ctl_table;
 
@@ -825,9 +826,8 @@ struct swap_info_struct;
  *	used by the XFRM system.
  *	@sec_ctx contains the security context information being provided by
  *	the user-level policy update program (e.g., setkey).
- *	Allocate a security structure to the xp->security field.
- *	The security field is initialized to NULL when the xfrm_policy is
- *	allocated.
+ *	Allocate a security structure to the xp->security field; the security
+ *	field is initialized to NULL when the xfrm_policy is allocated.
  *	Return 0 if operation was successful (memory to allocate, legal context)
  * @xfrm_policy_clone_security:
  *	@old contains an existing xfrm_policy in the SPD.
@@ -846,9 +846,14 @@ struct swap_info_struct;
  *	Database by the XFRM system.
  *	@sec_ctx contains the security context information being provided by
  *	the user-level SA generation program (e.g., setkey or racoon).
- *	Allocate a security structure to the x->security field.  The
- *	security field is initialized to NULL when the xfrm_state is
- *	allocated.
+ *	@polsec contains the security context information associated with a xfrm
+ *	policy rule from which to take the base context. polsec must be NULL
+ *	when sec_ctx is specified.
+ *	@secid contains the secid from which to take the mls portion of the context.
+ *	Allocate a security structure to the x->security field; the security
+ *	field is initialized to NULL when the xfrm_state is allocated. Set the
+ *	context to correspond to either sec_ctx or polsec, with the mls portion
+ *	taken from secid in the latter case.
  *	Return 0 if operation was successful (memory to allocate, legal context).
  * @xfrm_state_free_security:
  *	@x contains the xfrm_state.
@@ -859,13 +864,26 @@ struct swap_info_struct;
  * @xfrm_policy_lookup:
  *	@xp contains the xfrm_policy for which the access control is being
  *	checked.
- *	@sk_sid contains the sock security label that is used to authorize
+ *	@fl_secid contains the flow security label that is used to authorize
  *	access to the policy xp.
  *	@dir contains the direction of the flow (input or output).
- *	Check permission when a sock selects a xfrm_policy for processing
+ *	Check permission when a flow selects a xfrm_policy for processing
  *	XFRMs on a packet.  The hook is called when selecting either a
  *	per-socket policy or a generic xfrm policy.
  *	Return 0 if permission is granted.
+ * @xfrm_state_pol_flow_match:
+ *	@x contains the state to match.
+ *	@xp contains the policy to check for a match.
+ *	@fl contains the flow to check for a match.
+ *	Return 1 if there is a match.
+ * @xfrm_flow_state_match:
+ *	@fl contains the flow key to match.
+ *	@xfrm points to the xfrm_state to match.
+ *	Return 1 if there is a match.
+ * @xfrm_decode_session:
+ *	@skb points to skb to decode.
+ *	@fl points to the flow key to set.
+ *	Return 0 if successful decoding.
  *
  * Security hooks affecting all Key Management operations
  *
@@ -1343,10 +1361,16 @@ struct security_operations {
 	int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new);
 	void (*xfrm_policy_free_security) (struct xfrm_policy *xp);
 	int (*xfrm_policy_delete_security) (struct xfrm_policy *xp);
-	int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx);
+	int (*xfrm_state_alloc_security) (struct xfrm_state *x,
+		struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *polsec,
+		u32 secid);
 	void (*xfrm_state_free_security) (struct xfrm_state *x);
 	int (*xfrm_state_delete_security) (struct xfrm_state *x);
-	int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir);
+	int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir);
+	int (*xfrm_state_pol_flow_match)(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl);
+	int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm);
+	int (*xfrm_decode_session)(struct sk_buff *skb, struct flowi *fl);
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
 	/* key management security hooks */
@@ -3050,9 +3074,18 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp)
 	return security_ops->xfrm_policy_delete_security(xp);
 }
 
-static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+static inline int security_xfrm_state_alloc(struct xfrm_state *x,
+			struct xfrm_user_sec_ctx *sec_ctx)
+{
+	return security_ops->xfrm_state_alloc_security(x, sec_ctx, NULL, 0);
+}
+
+static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
+				struct xfrm_sec_ctx *polsec, u32 secid)
 {
-	return security_ops->xfrm_state_alloc_security(x, sec_ctx);
+	if (!polsec)
+		return 0;
+	return security_ops->xfrm_state_alloc_security(x, NULL, polsec, secid);
 }
 
 static inline int security_xfrm_state_delete(struct xfrm_state *x)
@@ -3065,9 +3098,25 @@ static inline void security_xfrm_state_free(struct xfrm_state *x)
 	security_ops->xfrm_state_free_security(x);
 }
 
-static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir)
+{
+	return security_ops->xfrm_policy_lookup(xp, fl_secid, dir);
+}
+
+static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl)
+{
+	return security_ops->xfrm_state_pol_flow_match(x, xp, fl);
+}
+
+static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+{
+	return security_ops->xfrm_flow_state_match(fl, xfrm);
+}
+
+static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
 {
-	return security_ops->xfrm_policy_lookup(xp, sk_sid, dir);
+	return security_ops->xfrm_decode_session(skb, fl);
 }
 #else	/* CONFIG_SECURITY_NETWORK_XFRM */
 static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
@@ -3089,7 +3138,14 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp)
 	return 0;
 }
 
-static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+static inline int security_xfrm_state_alloc(struct xfrm_state *x,
+					struct xfrm_user_sec_ctx *sec_ctx)
+{
+	return 0;
+}
+
+static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
+					struct xfrm_sec_ctx *polsec, u32 secid)
 {
 	return 0;
 }
@@ -3103,10 +3159,28 @@ static inline int security_xfrm_state_delete(struct xfrm_state *x)
 	return 0;
 }
 
-static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir)
 {
 	return 0;
 }
+
+static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl)
+{
+	return 1;
+}
+
+static inline int security_xfrm_flow_state_match(struct flowi *fl,
+                                struct xfrm_state *xfrm)
+{
+	return 1;
+}
+
+static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+	return 0;
+}
+
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
 #ifdef CONFIG_KEYS
diff --git a/include/net/flow.h b/include/net/flow.h
index 1cee5a83433ab..21d988b2058ab 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -86,10 +86,10 @@ struct flowi {
 #define FLOW_DIR_FWD	2
 
 struct sock;
-typedef void (*flow_resolve_t)(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
+typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp);
 
-extern void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
+extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 	 		       flow_resolve_t resolver);
 extern void flow_cache_flush(void);
 extern atomic_t flow_cache_genid;
diff --git a/net/core/flow.c b/net/core/flow.c
index 2191af5f26acb..645241165e6cf 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -32,7 +32,6 @@ struct flow_cache_entry {
 	u8			dir;
 	struct flowi		key;
 	u32			genid;
-	u32			sk_sid;
 	void			*object;
 	atomic_t		*object_ref;
 };
@@ -165,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
 	return 0;
 }
 
-void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
+void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 			flow_resolve_t resolver)
 {
 	struct flow_cache_entry *fle, **head;
@@ -189,7 +188,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
 	for (fle = *head; fle; fle = fle->next) {
 		if (fle->family == family &&
 		    fle->dir == dir &&
-		    fle->sk_sid == sk_sid &&
 		    flow_key_compare(key, &fle->key) == 0) {
 			if (fle->genid == atomic_read(&flow_cache_genid)) {
 				void *ret = fle->object;
@@ -214,7 +212,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
 			*head = fle;
 			fle->family = family;
 			fle->dir = dir;
-			fle->sk_sid = sk_sid;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
 			flow_count(cpu)++;
@@ -226,7 +223,7 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
 		void *obj;
 		atomic_t *obj_ref;
 
-		resolver(key, sk_sid, family, dir, &obj, &obj_ref);
+		resolver(key, family, dir, &obj, &obj_ref);
 
 		if (fle) {
 			fle->genid = atomic_read(&flow_cache_genid);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 3da67ca2c3ce9..79405daadc52b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -597,7 +597,7 @@ EXPORT_SYMBOL(xfrm_policy_walk);
 
 /* Find policy to apply to this flow. */
 
-static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
+static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp)
 {
 	struct xfrm_policy *pol;
@@ -613,7 +613,7 @@ static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
 		match = xfrm_selector_match(sel, fl, family);
 
 		if (match) {
- 			if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
+ 			if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) {
 				xfrm_pol_hold(pol);
 				break;
 			}
@@ -641,7 +641,7 @@ static inline int policy_to_flow_dir(int dir)
 	};
 }
 
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
 {
 	struct xfrm_policy *pol;
 
@@ -652,7 +652,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
  		int err = 0;
 
 		if (match)
-		  err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
+		  err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir));
 
  		if (match && !err)
 			xfrm_pol_hold(pol);
@@ -862,19 +862,20 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 	u32 genid;
 	u16 family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
-	u32 sk_sid = security_sk_sid(sk, fl, dir);
+
+	fl->secid = security_sk_sid(sk, fl, dir);
 restart:
 	genid = atomic_read(&flow_cache_genid);
 	policy = NULL;
 	if (sk && sk->sk_policy[1])
-		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
+		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
 
 	if (!policy) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
 			return 0;
 
-		policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family,
+		policy = flow_cache_lookup(fl, dst_orig->ops->family,
 					   dir, xfrm_policy_lookup);
 	}
 
@@ -1032,13 +1033,15 @@ int
 xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	int err;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	afinfo->decode_session(skb, fl);
+	err = security_xfrm_decode_session(skb, fl);
 	xfrm_policy_put_afinfo(afinfo);
-	return 0;
+	return err;
 }
 EXPORT_SYMBOL(xfrm_decode_session);
 
@@ -1058,14 +1061,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	struct xfrm_policy *pol;
 	struct flowi fl;
 	u8 fl_dir = policy_to_flow_dir(dir);
-	u32 sk_sid;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0)
 		return 0;
 	nf_nat_decode_session(skb, &fl, family);
 
-	sk_sid = security_sk_sid(sk, &fl, fl_dir);
-
 	/* First, check used SA against their selectors. */
 	if (skb->sp) {
 		int i;
@@ -1079,10 +1079,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 	pol = NULL;
 	if (sk && sk->sk_policy[dir])
-		pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
 
 	if (!pol)
-		pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
+		pol = flow_cache_lookup(&fl, family, fl_dir,
 					xfrm_policy_lookup);
 
 	if (!pol)
@@ -1298,6 +1298,8 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
 
 		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
 			return 0;
+		if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm))
+			return 0;
 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
 			return 0;
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0021aad5db43c..be02bd981d12f 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -367,7 +367,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			 */
 			if (x->km.state == XFRM_STATE_VALID) {
 				if (!xfrm_selector_match(&x->sel, fl, family) ||
-				    !xfrm_sec_ctx_match(pol->security, x->security))
+				    !security_xfrm_state_pol_flow_match(x, pol, fl))
 					continue;
 				if (!best ||
 				    best->km.dying > x->km.dying ||
@@ -379,7 +379,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			} else if (x->km.state == XFRM_STATE_ERROR ||
 				   x->km.state == XFRM_STATE_EXPIRED) {
  				if (xfrm_selector_match(&x->sel, fl, family) &&
-				    xfrm_sec_ctx_match(pol->security, x->security))
+				    security_xfrm_state_pol_flow_match(x, pol, fl))
 					error = -ESRCH;
 			}
 		}
@@ -403,6 +403,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		 * to current session. */
 		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
 
+		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
+		if (error) {
+			x->km.state = XFRM_STATE_DEAD;
+			xfrm_state_put(x);
+			x = NULL;
+			goto out;
+		}
+
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
 			list_add_tail(&x->bydst, xfrm_state_bydst+h);
diff --git a/security/dummy.c b/security/dummy.c
index bd3bc5faa9a83..c1f10654871e8 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -835,7 +835,8 @@ static int dummy_xfrm_policy_delete_security(struct xfrm_policy *xp)
 	return 0;
 }
 
-static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+static int dummy_xfrm_state_alloc_security(struct xfrm_state *x,
+	struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid)
 {
 	return 0;
 }
@@ -853,6 +854,23 @@ static int dummy_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
 {
 	return 0;
 }
+
+static int dummy_xfrm_state_pol_flow_match(struct xfrm_state *x,
+				struct xfrm_policy *xp, struct flowi *fl)
+{
+	return 1;
+}
+
+static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+{
+	return 1;
+}
+
+static int dummy_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+	return 0;
+}
+
 #endif /* CONFIG_SECURITY_NETWORK_XFRM */
 static int dummy_register_security (const char *name, struct security_operations *ops)
 {
@@ -1076,6 +1094,9 @@ void security_fixup_ops (struct security_operations *ops)
 	set_to_dummy_if_null(ops, xfrm_state_free_security);
 	set_to_dummy_if_null(ops, xfrm_state_delete_security);
 	set_to_dummy_if_null(ops, xfrm_policy_lookup);
+	set_to_dummy_if_null(ops, xfrm_state_pol_flow_match);
+	set_to_dummy_if_null(ops, xfrm_flow_state_match);
+	set_to_dummy_if_null(ops, xfrm_decode_session);
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 #ifdef CONFIG_KEYS
 	set_to_dummy_if_null(ops, key_alloc);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d67abf77584ae..5c189da07bc9a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3468,7 +3468,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (err)
 		goto out;
 
-	err = selinux_xfrm_sock_rcv_skb(sock_sid, skb);
+	err = selinux_xfrm_sock_rcv_skb(sock_sid, skb, &ad);
 out:	
 	return err;
 }
@@ -3720,7 +3720,7 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum,
 	if (err)
 		goto out;
 
-	err = selinux_xfrm_postroute_last(isec->sid, skb);
+	err = selinux_xfrm_postroute_last(isec->sid, skb, &ad);
 out:
 	return err ? NF_DROP : NF_ACCEPT;
 }
@@ -4633,6 +4633,9 @@ static struct security_operations selinux_ops = {
 	.xfrm_state_free_security =	selinux_xfrm_state_free,
 	.xfrm_state_delete_security =	selinux_xfrm_state_delete,
 	.xfrm_policy_lookup = 		selinux_xfrm_policy_lookup,
+	.xfrm_state_pol_flow_match =	selinux_xfrm_state_pol_flow_match,
+	.xfrm_flow_state_match =	selinux_xfrm_flow_state_match,
+	.xfrm_decode_session =		selinux_xfrm_decode_session,
 #endif
 
 #ifdef CONFIG_KEYS
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index c96498a10eb8b..f51a3e84bd9b2 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -2,6 +2,7 @@
  * SELinux support for the XFRM LSM hooks
  *
  * Author : Trent Jaeger, <jaegert@us.ibm.com>
+ * Updated : Venkat Yekkirala, <vyekkirala@TrustedCS.com>
  */
 #ifndef _SELINUX_XFRM_H_
 #define _SELINUX_XFRM_H_
@@ -10,10 +11,16 @@ int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *
 int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new);
 void selinux_xfrm_policy_free(struct xfrm_policy *xp);
 int selinux_xfrm_policy_delete(struct xfrm_policy *xp);
-int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx);
+int selinux_xfrm_state_alloc(struct xfrm_state *x,
+	struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid);
 void selinux_xfrm_state_free(struct xfrm_state *x);
 int selinux_xfrm_state_delete(struct xfrm_state *x);
-int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir);
+int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir);
+int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl);
+int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm);
+int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl);
+
 
 /*
  * Extract the security blob from the sock (it's actually on the socket)
@@ -39,17 +46,21 @@ static inline u32 selinux_no_sk_sid(struct flowi *fl)
 }
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
-int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb);
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb);
+int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
+			struct avc_audit_data *ad);
+int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
+			struct avc_audit_data *ad);
 u32 selinux_socket_getpeer_stream(struct sock *sk);
 u32 selinux_socket_getpeer_dgram(struct sk_buff *skb);
 #else
-static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb)
+static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
+			struct avc_audit_data *ad)
 {
 	return 0;
 }
 
-static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb)
+static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
+			struct avc_audit_data *ad)
 {
 	return 0;
 }
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 6c985ced81027..a502b0540e3d7 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -6,7 +6,12 @@
  *  Authors:  Serge Hallyn <sergeh@us.ibm.com>
  *	      Trent Jaeger <jaegert@us.ibm.com>
  *
+ *  Updated: Venkat Yekkirala <vyekkirala@TrustedCS.com>
+ *
+ *           Granular IPSec Associations for use in MLS environments.
+ *
  *  Copyright (C) 2005 International Business Machines Corporation
+ *  Copyright (C) 2006 Trusted Computer Solutions, Inc.
  *
  *	This program is free software; you can redistribute it and/or modify
  *	it under the terms of the GNU General Public License version 2,
@@ -67,10 +72,10 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x)
 }
 
 /*
- * LSM hook implementation that authorizes that a socket can be used
- * with the corresponding xfrm_sec_ctx and direction.
+ * LSM hook implementation that authorizes that a flow can use
+ * a xfrm policy rule.
  */
-int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir)
 {
 	int rc = 0;
 	u32 sel_sid = SECINITSID_UNLABELED;
@@ -84,27 +89,129 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
 		sel_sid = ctx->ctx_sid;
 	}
 
-	rc = avc_has_perm(sk_sid, sel_sid, SECCLASS_ASSOCIATION,
-			  ((dir == FLOW_DIR_IN) ? ASSOCIATION__RECVFROM :
-			   ((dir == FLOW_DIR_OUT) ?  ASSOCIATION__SENDTO :
-			    (ASSOCIATION__SENDTO | ASSOCIATION__RECVFROM))),
+	rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__POLMATCH,
 			  NULL);
 
 	return rc;
 }
 
+/*
+ * LSM hook implementation that authorizes that a state matches
+ * the given policy, flow combo.
+ */
+
+int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp,
+			struct flowi *fl)
+{
+	u32 state_sid;
+	u32 pol_sid;
+	int err;
+
+	if (x->security)
+		state_sid = x->security->ctx_sid;
+	else
+		state_sid = SECINITSID_UNLABELED;
+
+	if (xp->security)
+		pol_sid = xp->security->ctx_sid;
+	else
+		pol_sid = SECINITSID_UNLABELED;
+
+	err = avc_has_perm(state_sid, pol_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__POLMATCH,
+			  NULL);
+
+	if (err)
+		return 0;
+
+	return selinux_xfrm_flow_state_match(fl, x);
+}
+
+/*
+ * LSM hook implementation that authorizes that a particular outgoing flow
+ * can use a given security association.
+ */
+
+int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+{
+	int rc = 0;
+	u32 sel_sid = SECINITSID_UNLABELED;
+	struct xfrm_sec_ctx *ctx;
+
+	/* Context sid is either set to label or ANY_ASSOC */
+	if ((ctx = xfrm->security)) {
+		if (!selinux_authorizable_ctx(ctx))
+			return 0;
+
+		sel_sid = ctx->ctx_sid;
+	}
+
+	rc = avc_has_perm(fl->secid, sel_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__SENDTO,
+			  NULL)? 0:1;
+
+	return rc;
+}
+
+/*
+ * LSM hook implementation that determines the sid for the session.
+ */
+
+int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+	struct sec_path *sp;
+
+	fl->secid = SECSID_NULL;
+
+	if (skb == NULL)
+		return 0;
+
+	sp = skb->sp;
+	if (sp) {
+		int i, sid_set = 0;
+
+		for (i = sp->len-1; i >= 0; i--) {
+			struct xfrm_state *x = sp->xvec[i];
+			if (selinux_authorizable_xfrm(x)) {
+				struct xfrm_sec_ctx *ctx = x->security;
+
+				if (!sid_set) {
+					fl->secid = ctx->ctx_sid;
+					sid_set = 1;
+				}
+				else if (fl->secid != ctx->ctx_sid)
+					return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Security blob allocation for xfrm_policy and xfrm_state
  * CTX does not have a meaningful value on input
  */
-static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx)
+static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
+	struct xfrm_user_sec_ctx *uctx, struct xfrm_sec_ctx *pol, u32 sid)
 {
 	int rc = 0;
 	struct task_security_struct *tsec = current->security;
-	struct xfrm_sec_ctx *ctx;
+	struct xfrm_sec_ctx *ctx = NULL;
+	char *ctx_str = NULL;
+	u32 str_len;
+	u32 ctx_sid;
+
+	BUG_ON(uctx && pol);
+
+	if (pol)
+		goto from_policy;
 
 	BUG_ON(!uctx);
-	BUG_ON(uctx->ctx_doi != XFRM_SC_ALG_SELINUX);
+
+	if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX)
+		return -EINVAL;
 
 	if (uctx->ctx_len >= PAGE_SIZE)
 		return -ENOMEM;
@@ -141,9 +248,41 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_us
 
 	return rc;
 
+from_policy:
+	BUG_ON(!pol);
+	rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid);
+	if (rc)
+		goto out;
+
+	rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len);
+	if (rc)
+		goto out;
+
+	*ctxp = ctx = kmalloc(sizeof(*ctx) +
+			      str_len,
+			      GFP_ATOMIC);
+
+	if (!ctx) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+
+	ctx->ctx_doi = XFRM_SC_DOI_LSM;
+	ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
+	ctx->ctx_sid = ctx_sid;
+	ctx->ctx_len = str_len;
+	memcpy(ctx->ctx_str,
+	       ctx_str,
+	       str_len);
+
+	goto out2;
+
 out:
 	*ctxp = NULL;
 	kfree(ctx);
+out2:
+	kfree(ctx_str);
 	return rc;
 }
 
@@ -157,7 +296,7 @@ int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *
 
 	BUG_ON(!xp);
 
-	err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx);
+	err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, 0);
 	return err;
 }
 
@@ -217,13 +356,14 @@ int selinux_xfrm_policy_delete(struct xfrm_policy *xp)
  * LSM hook implementation that allocs and transfers sec_ctx spec to
  * xfrm_state.
  */
-int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx)
+int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx,
+		struct xfrm_sec_ctx *pol, u32 secid)
 {
 	int err;
 
 	BUG_ON(!x);
 
-	err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx);
+	err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, pol, secid);
 	return err;
 }
 
@@ -329,38 +469,30 @@ int selinux_xfrm_state_delete(struct xfrm_state *x)
  * we need to check for unlabelled access since this may not have
  * gone thru the IPSec process.
  */
-int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb)
+int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
+				struct avc_audit_data *ad)
 {
 	int i, rc = 0;
 	struct sec_path *sp;
+	u32 sel_sid = SECINITSID_UNLABELED;
 
 	sp = skb->sp;
 
 	if (sp) {
-		/*
-		 * __xfrm_policy_check does not approve unless xfrm_policy_ok
-		 * says that spi's match for policy and the socket.
-		 *
-		 *  Only need to verify the existence of an authorizable sp.
-		 */
 		for (i = 0; i < sp->len; i++) {
 			struct xfrm_state *x = sp->xvec[i];
 
-			if (x && selinux_authorizable_xfrm(x))
-				goto accept;
+			if (x && selinux_authorizable_xfrm(x)) {
+				struct xfrm_sec_ctx *ctx = x->security;
+				sel_sid = ctx->ctx_sid;
+				break;
+			}
 		}
 	}
 
-	/* check SELinux sock for unlabelled access */
-	rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION,
-			  ASSOCIATION__RECVFROM, NULL);
-	if (rc)
-		goto drop;
-
-accept:
-	return 0;
+	rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__RECVFROM, ad);
 
-drop:
 	return rc;
 }
 
@@ -371,7 +503,8 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb)
  * If we do have a authorizable security association, then it has already been
  * checked in xfrm_policy_lookup hook.
  */
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb)
+int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
+					struct avc_audit_data *ad)
 {
 	struct dst_entry *dst;
 	int rc = 0;
@@ -391,7 +524,7 @@ int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb)
 	}
 
 	rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION,
-			  ASSOCIATION__SENDTO, NULL);
+			  ASSOCIATION__SENDTO, ad);
 out:
 	return rc;
 }
-- 
GitLab


From 0d681623d30c6565e8b62889f3aa3f4d4662c3e8 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serue@us.ibm.com>
Date: Mon, 24 Jul 2006 23:30:44 -0700
Subject: [PATCH 0382/1063] [MLSXFRM]: Add security context to acquire messages
 using netlink

This includes the security context of a security association created
for use by IKE in the acquire messages sent to IKE daemons using
netlink/xfrm_user. This would allow the daemons to include the
security context in the negotiation, so that the resultant association
is unique to that security context.

Signed-off-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 43 ++++++++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index fa79ddc4239e0..dac8db1088bcd 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -911,25 +911,38 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
 	return -1;
 }
 
-static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
+static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb)
 {
-	if (xp->security) {
-		int ctx_size = sizeof(struct xfrm_sec_ctx) +
-				xp->security->ctx_len;
-		struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
-		struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+	int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len;
+	struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
+	struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+	uctx->exttype = XFRMA_SEC_CTX;
+	uctx->len = ctx_size;
+	uctx->ctx_doi = s->ctx_doi;
+	uctx->ctx_alg = s->ctx_alg;
+	uctx->ctx_len = s->ctx_len;
+	memcpy(uctx + 1, s->ctx_str, s->ctx_len);
+ 	return 0;
 
-		uctx->exttype = XFRMA_SEC_CTX;
-		uctx->len = ctx_size;
-		uctx->ctx_doi = xp->security->ctx_doi;
-		uctx->ctx_alg = xp->security->ctx_alg;
-		uctx->ctx_len = xp->security->ctx_len;
-		memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len);
+ rtattr_failure:
+	return -1;
+}
+
+static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb)
+{
+	if (x->security) {
+		return copy_sec_ctx(x->security, skb);
 	}
 	return 0;
+}
 
- rtattr_failure:
-	return -1;
+static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	if (xp->security) {
+		return copy_sec_ctx(xp->security, skb);
+	}
+	return 0;
 }
 
 static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
@@ -1710,7 +1723,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
-	if (copy_to_user_sec_ctx(xp, skb))
+	if (copy_to_user_state_sec_ctx(x, skb))
 		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
-- 
GitLab


From 4e2ba18eae7f370c7c3ed96eaca747cc9b39f917 Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Mon, 24 Jul 2006 23:31:14 -0700
Subject: [PATCH 0383/1063] [MLSXFRM]: Add security context to acquire messages
 using PF_KEY

This includes the security context of a security association created
for use by IKE in the acquire messages sent to IKE daemons using
PF_KEY. This would allow the daemons to include the security context
in the negotiation, so that the resultant association is unique to
that security context.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 3a95b2ee46906..a065e1a67773f 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2708,6 +2708,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 #endif
 	int sockaddr_size;
 	int size;
+	struct sadb_x_sec_ctx *sec_ctx;
+	struct xfrm_sec_ctx *xfrm_ctx;
+	int ctx_size = 0;
 	
 	sockaddr_size = pfkey_sockaddr_size(x->props.family);
 	if (!sockaddr_size)
@@ -2723,6 +2726,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 	else if (x->id.proto == IPPROTO_ESP)
 		size += count_esp_combs(t);
 
+	if ((xfrm_ctx = x->security)) {
+		ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
+		size +=  sizeof(struct sadb_x_sec_ctx) + ctx_size;
+	}
+
 	skb =  alloc_skb(size + 16, GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
@@ -2818,6 +2826,20 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 	else if (x->id.proto == IPPROTO_ESP)
 		dump_esp_combs(skb, t);
 
+	/* security context */
+	if (xfrm_ctx) {
+		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb,
+				sizeof(struct sadb_x_sec_ctx) + ctx_size);
+		sec_ctx->sadb_x_sec_len =
+		  (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t);
+		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
+		sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
+		sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
+		sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
+		memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
+		       xfrm_ctx->ctx_len);
+	}
+
 	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
 }
 
-- 
GitLab


From beb8d13bed80f8388f1a9a107d07ddd342e627e8 Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Fri, 4 Aug 2006 23:12:42 -0700
Subject: [PATCH 0384/1063] [MLSXFRM]: Add flow labeling

This labels the flows that could utilize IPSec xfrms at the points the
flows are defined so that IPSec policy and SAs at the right label can
be used.

The following protos are currently not handled, but they should
continue to be able to use single-labeled IPSec like they currently
do.

ipmr
ip_gre
ipip
igmp
sit
sctp
ip6_tunnel (IPv6 over IPv6 tunnel device)
decnet

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/security.h         | 38 +++++++++++++++++++++-----------
 include/net/route.h              |  3 +++
 net/dccp/ipv4.c                  |  1 +
 net/dccp/ipv6.c                  |  6 +++++
 net/ipv4/af_inet.c               |  1 +
 net/ipv4/icmp.c                  |  2 ++
 net/ipv4/inet_connection_sock.c  |  1 +
 net/ipv4/ip_output.c             |  2 ++
 net/ipv4/netfilter/ipt_REJECT.c  |  1 +
 net/ipv4/raw.c                   |  1 +
 net/ipv4/syncookies.c            |  1 +
 net/ipv4/udp.c                   |  1 +
 net/ipv6/af_inet6.c              |  1 +
 net/ipv6/datagram.c              |  2 ++
 net/ipv6/icmp.c                  |  2 ++
 net/ipv6/inet6_connection_sock.c |  1 +
 net/ipv6/ndisc.c                 |  1 +
 net/ipv6/netfilter/ip6t_REJECT.c |  1 +
 net/ipv6/raw.c                   |  1 +
 net/ipv6/tcp_ipv6.c              |  7 ++++++
 net/ipv6/udp.c                   |  2 ++
 net/xfrm/xfrm_policy.c           |  3 +--
 security/dummy.c                 |  7 +++---
 security/selinux/hooks.c         |  8 +++----
 security/selinux/include/xfrm.h  | 14 +-----------
 security/selinux/xfrm.c          | 11 +++++----
 26 files changed, 79 insertions(+), 40 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 2c4921d79d19b..f3909d189fe07 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -32,6 +32,7 @@
 #include <linux/sched.h>
 #include <linux/key.h>
 #include <linux/xfrm.h>
+#include <net/flow.h>
 
 struct ctl_table;
 
@@ -815,8 +816,8 @@ struct swap_info_struct;
  *	Deallocate security structure.
  * @sk_clone_security:
  *	Clone/copy security structure.
- * @sk_getsid:
- *	Retrieve the LSM-specific sid for the sock to enable caching of network
+ * @sk_getsecid:
+ *	Retrieve the LSM-specific secid for the sock to enable caching of network
  *	authorizations.
  *
  * Security hooks for XFRM operations.
@@ -882,8 +883,9 @@ struct swap_info_struct;
  *	Return 1 if there is a match.
  * @xfrm_decode_session:
  *	@skb points to skb to decode.
- *	@fl points to the flow key to set.
- *	Return 0 if successful decoding.
+ *	@secid points to the flow key secid to set.
+ *	@ckall says if all xfrms used should be checked for same secid.
+ *	Return 0 if ckall is zero or all xfrms used have the same secid.
  *
  * Security hooks affecting all Key Management operations
  *
@@ -1353,7 +1355,7 @@ struct security_operations {
 	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
 	void (*sk_free_security) (struct sock *sk);
 	void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
-	unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir);
+	void (*sk_getsecid) (struct sock *sk, u32 *secid);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -1370,7 +1372,7 @@ struct security_operations {
 	int (*xfrm_state_pol_flow_match)(struct xfrm_state *x,
 			struct xfrm_policy *xp, struct flowi *fl);
 	int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm);
-	int (*xfrm_decode_session)(struct sk_buff *skb, struct flowi *fl);
+	int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall);
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
 	/* key management security hooks */
@@ -2917,9 +2919,9 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
 	return security_ops->sk_clone_security(sk, newsk);
 }
 
-static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
 {
-	return security_ops->sk_getsid(sk, fl, dir);
+	security_ops->sk_getsecid(sk, &fl->secid);
 }
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct socket * sock,
@@ -3047,9 +3049,8 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
 {
 }
 
-static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
 {
-	return 0;
 }
 #endif	/* CONFIG_SECURITY_NETWORK */
 
@@ -3114,9 +3115,16 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_s
 	return security_ops->xfrm_flow_state_match(fl, xfrm);
 }
 
-static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
+static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
+{
+	return security_ops->xfrm_decode_session(skb, secid, 1);
+}
+
+static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl)
 {
-	return security_ops->xfrm_decode_session(skb, fl);
+	int rc = security_ops->xfrm_decode_session(skb, &fl->secid, 0);
+
+	BUG_ON(rc);
 }
 #else	/* CONFIG_SECURITY_NETWORK_XFRM */
 static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
@@ -3176,11 +3184,15 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl,
 	return 1;
 }
 
-static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
+static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
 {
 	return 0;
 }
 
+static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl)
+{
+}
+
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
 #ifdef CONFIG_KEYS
diff --git a/include/net/route.h b/include/net/route.h
index c4a068692dcc5..7f93ac0e08998 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -32,6 +32,7 @@
 #include <linux/route.h>
 #include <linux/ip.h>
 #include <linux/cache.h>
+#include <linux/security.h>
 
 #ifndef __KERNEL__
 #warning This file is not supposed to be used outside of kernel.
@@ -166,6 +167,7 @@ static inline int ip_route_connect(struct rtable **rp, u32 dst,
 		ip_rt_put(*rp);
 		*rp = NULL;
 	}
+	security_sk_classify_flow(sk, &fl);
 	return ip_route_output_flow(rp, &fl, sk, 0);
 }
 
@@ -182,6 +184,7 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol,
 		fl.proto = protocol;
 		ip_rt_put(*rp);
 		*rp = NULL;
+		security_sk_classify_flow(sk, &fl);
 		return ip_route_output_flow(rp, &fl, sk, 0);
 	}
 	return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 7f56f7e8f5716..386498053b1c0 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -678,6 +678,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
 			   	     }
 			  };
 
+	security_skb_classify_flow(skb, &fl);
 	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 610c722ac27f0..53d255c014310 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -201,6 +201,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.oif = sk->sk_bound_dev_if;
 	fl.fl_ip_dport = usin->sin6_port;
 	fl.fl_ip_sport = inet->sport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (np->opt != NULL && np->opt->srcrt != NULL) {
 		const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -322,6 +323,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			fl.oif = sk->sk_bound_dev_if;
 			fl.fl_ip_dport = inet->dport;
 			fl.fl_ip_sport = inet->sport;
+			security_sk_classify_flow(sk, &fl);
 
 			err = ip6_dst_lookup(sk, &dst, &fl);
 			if (err) {
@@ -422,6 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 	fl.oif = ireq6->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_sk(sk)->sport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (dst == NULL) {
 		opt = np->opt;
@@ -566,6 +569,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
 	fl.oif = inet6_iif(rxskb);
 	fl.fl_ip_dport = dh->dccph_dport;
 	fl.fl_ip_sport = dh->dccph_sport;
+	security_skb_classify_flow(rxskb, &fl);
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
 	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
@@ -622,6 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb,
 	fl.oif = inet6_iif(rxskb);
 	fl.fl_ip_dport = dh->dccph_dport;
 	fl.fl_ip_sport = dh->dccph_sport;
+	security_skb_classify_flow(rxskb, &fl);
 
 	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
 		if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
@@ -842,6 +847,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
+		security_sk_classify_flow(sk, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
 			goto out;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c84a32070f8d0..fc40da3b6d399 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1074,6 +1074,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 		},
 	};
 						
+	security_sk_classify_flow(sk, &fl);
 	err = ip_route_output_flow(&rt, &fl, sk, 0);
 }
 	if (!err)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4c86ac3d882de..6ad797c14163e 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -406,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 						.saddr = rt->rt_spec_dst,
 						.tos = RT_TOS(skb->nh.iph->tos) } },
 				    .proto = IPPROTO_ICMP };
+		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			goto out_unlock;
 	}
@@ -560,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
 				}
 			}
 		};
+		security_skb_classify_flow(skb_in, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			goto out_unlock;
 	}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e50a1bfd7ccc2..772b4eac78bcb 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -327,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
 				       { .sport = inet_sk(sk)->sport,
 					 .dport = ireq->rmt_port } } };
 
+	security_sk_classify_flow(sk, &fl);
 	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a2ede167e045b..308bdeac3455a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -328,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 			 * keep trying until route appears or the connection times
 			 * itself out.
 			 */
+			security_sk_classify_flow(sk, &fl);
 			if (ip_route_output_flow(&rt, &fl, sk, 0))
 				goto no_route;
 		}
@@ -1366,6 +1367,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 					       { .sport = skb->h.th->dest,
 					         .dport = skb->h.th->source } },
 				    .proto = sk->sk_protocol };
+		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			return;
 	}
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 269bc2067cb80..7f905bf2bde57 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -90,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb,
 	fl.proto = IPPROTO_TCP;
 	fl.fl_ip_sport = tcph->dest;
 	fl.fl_ip_dport = tcph->source;
+	security_skb_classify_flow(skb, &fl);
 
 	xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 62b2762a2420b..fe44cb50a1c52 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -484,6 +484,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		if (!inet->hdrincl)
 			raw_probe_proto_opt(&fl, msg);
 
+		security_sk_classify_flow(sk, &fl);
 		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
 	}
 	if (err)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e20be3331f674..307dc3c0d635d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -259,6 +259,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 				    .uli_u = { .ports =
 					       { .sport = skb->h.th->dest,
 						 .dport = skb->h.th->source } } };
+		security_sk_classify_flow(sk, &fl);
 		if (ip_route_output_key(&rt, &fl)) {
 			reqsk_free(req);
 			goto out; 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f136cec96d959..a4d005eccc7f6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -603,6 +603,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				    .uli_u = { .ports =
 					       { .sport = inet->sport,
 						 .dport = dport } } };
+		security_sk_classify_flow(sk, &fl);
 		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
 		if (err)
 			goto out;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ac85e9c532c20..82a1b1a328dbf 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -637,6 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet->dport;
 		fl.fl_ip_sport = inet->sport;
+		security_sk_classify_flow(sk, &fl);
 
 		if (np->opt && np->opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 3b55b4c8e2d18..c73508e090a64 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -156,6 +156,8 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
 		fl.oif = np->mcast_oif;
 
+	security_sk_classify_flow(sk, &fl);
+
 	if (flowlabel) {
 		if (flowlabel->opt && flowlabel->opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 356a8a7ef22af..dbfce089e916d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -358,6 +358,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	fl.oif = iif;
 	fl.fl_icmp_type = type;
 	fl.fl_icmp_code = code;
+	security_skb_classify_flow(skb, &fl);
 
 	if (icmpv6_xmit_lock())
 		return;
@@ -472,6 +473,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 		ipv6_addr_copy(&fl.fl6_src, saddr);
 	fl.oif = skb->dev->ifindex;
 	fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
+	security_skb_classify_flow(skb, &fl);
 
 	if (icmpv6_xmit_lock())
 		return;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index bf491077b8221..7a51a258615d5 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -157,6 +157,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
 	fl.oif = sk->sk_bound_dev_if;
 	fl.fl_ip_sport = inet->sport;
 	fl.fl_ip_dport = inet->dport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b50055b9278d8..67cfc3813c32d 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -419,6 +419,7 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type,
 	fl->proto	 	= IPPROTO_ICMPV6;
 	fl->fl_icmp_type	= type;
 	fl->fl_icmp_code	= 0;
+	security_sk_classify_flow(ndisc_socket->sk, fl);
 }
 
 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 8629ba195d2d8..c4eba1aeb3233 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -96,6 +96,7 @@ static void send_reset(struct sk_buff *oldskb)
 	ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
 	fl.fl_ip_sport = otcph.dest;
 	fl.fl_ip_dport = otcph.source;
+	security_skb_classify_flow(oldskb, &fl);
 	dst = ip6_route_output(NULL, &fl);
 	if (dst == NULL)
 		return;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 15b862d8acabc..d5040e172292e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -759,6 +759,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
 		fl.oif = np->mcast_oif;
+	security_sk_classify_flow(sk, &fl);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 802a1a6b10376..46922e57e311c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -251,6 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		final_p = &final;
 	}
 
+	security_sk_classify_flow(sk, &fl);
+
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
 		goto failure;
@@ -374,6 +376,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			fl.oif = sk->sk_bound_dev_if;
 			fl.fl_ip_dport = inet->dport;
 			fl.fl_ip_sport = inet->sport;
+			security_skb_classify_flow(skb, &fl);
 
 			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
 				sk->sk_err_soft = -err;
@@ -467,6 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	fl.oif = treq->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_sk(sk)->sport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (dst == NULL) {
 		opt = np->opt;
@@ -625,6 +629,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
 	fl.oif = inet6_iif(skb);
 	fl.fl_ip_dport = t1->dest;
 	fl.fl_ip_sport = t1->source;
+	security_skb_classify_flow(skb, &fl);
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
@@ -691,6 +696,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 	fl.oif = inet6_iif(skb);
 	fl.fl_ip_dport = t1->dest;
 	fl.fl_ip_sport = t1->source;
+	security_skb_classify_flow(skb, &fl);
 
 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
 		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
@@ -923,6 +929,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
+		security_sk_classify_flow(sk, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
 			goto out;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3d54f246411e9..82c7c9cde2a86 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -782,6 +782,8 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		connected = 0;
 	}
 
+	security_sk_classify_flow(sk, fl);
+
 	err = ip6_sk_dst_lookup(sk, &dst, fl);
 	if (err)
 		goto out;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 79405daadc52b..32c963c905735 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -863,7 +863,6 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 	u16 family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
 
-	fl->secid = security_sk_sid(sk, fl, dir);
 restart:
 	genid = atomic_read(&flow_cache_genid);
 	policy = NULL;
@@ -1039,7 +1038,7 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family
 		return -EAFNOSUPPORT;
 
 	afinfo->decode_session(skb, fl);
-	err = security_xfrm_decode_session(skb, fl);
+	err = security_xfrm_decode_session(skb, &fl->secid);
 	xfrm_policy_put_afinfo(afinfo);
 	return err;
 }
diff --git a/security/dummy.c b/security/dummy.c
index c1f10654871e8..c0ff6b9bfd7dc 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -809,9 +809,8 @@ static inline void dummy_sk_clone_security (const struct sock *sk, struct sock *
 {
 }
 
-static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid)
 {
-	return 0;
 }
 #endif	/* CONFIG_SECURITY_NETWORK */
 
@@ -866,7 +865,7 @@ static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm
 	return 1;
 }
 
-static int dummy_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
+static int dummy_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall)
 {
 	return 0;
 }
@@ -1083,7 +1082,7 @@ void security_fixup_ops (struct security_operations *ops)
 	set_to_dummy_if_null(ops, sk_alloc_security);
 	set_to_dummy_if_null(ops, sk_free_security);
 	set_to_dummy_if_null(ops, sk_clone_security);
-	set_to_dummy_if_null(ops, sk_getsid);
+	set_to_dummy_if_null(ops, sk_getsecid);
  #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef  CONFIG_SECURITY_NETWORK_XFRM
 	set_to_dummy_if_null(ops, xfrm_policy_alloc_security);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5c189da07bc9a..4e5989d584ce4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3561,14 +3561,14 @@ static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
 	newssec->peer_sid = ssec->peer_sid;
 }
 
-static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir)
+static void selinux_sk_getsecid(struct sock *sk, u32 *secid)
 {
 	if (!sk)
-		return selinux_no_sk_sid(fl);
+		*secid = SECINITSID_ANY_SOCKET;
 	else {
 		struct sk_security_struct *sksec = sk->sk_security;
 
-		return sksec->sid;
+		*secid = sksec->sid;
 	}
 }
 
@@ -4622,7 +4622,7 @@ static struct security_operations selinux_ops = {
 	.sk_alloc_security =		selinux_sk_alloc_security,
 	.sk_free_security =		selinux_sk_free_security,
 	.sk_clone_security =		selinux_sk_clone_security,
-	.sk_getsid = 			selinux_sk_getsid_security,
+	.sk_getsecid = 			selinux_sk_getsecid,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index f51a3e84bd9b2..8e45c1d588a80 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -19,7 +19,7 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir);
 int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
 			struct xfrm_policy *xp, struct flowi *fl);
 int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm);
-int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl);
+int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall);
 
 
 /*
@@ -33,18 +33,6 @@ static inline struct inode_security_struct *get_sock_isec(struct sock *sk)
 	return SOCK_INODE(sk->sk_socket)->i_security;
 }
 
-
-static inline u32 selinux_no_sk_sid(struct flowi *fl)
-{
-	/* NOTE: no sock occurs on ICMP reply, forwards, ... */
-	/* icmp_reply: authorize as kernel packet */
-	if (fl && fl->proto == IPPROTO_ICMP) {
-		return SECINITSID_KERNEL;
-	}
-
-	return SECINITSID_ANY_SOCKET;
-}
-
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
 			struct avc_audit_data *ad);
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index a502b0540e3d7..c750ef7af66f0 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -158,11 +158,11 @@ int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
  * LSM hook implementation that determines the sid for the session.
  */
 
-int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
+int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
 {
 	struct sec_path *sp;
 
-	fl->secid = SECSID_NULL;
+	*sid = SECSID_NULL;
 
 	if (skb == NULL)
 		return 0;
@@ -177,10 +177,13 @@ int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
 				struct xfrm_sec_ctx *ctx = x->security;
 
 				if (!sid_set) {
-					fl->secid = ctx->ctx_sid;
+					*sid = ctx->ctx_sid;
 					sid_set = 1;
+
+					if (!ckall)
+						break;
 				}
-				else if (fl->secid != ctx->ctx_sid)
+				else if (*sid != ctx->ctx_sid)
 					return -EINVAL;
 			}
 		}
-- 
GitLab


From cb969f072b6d67770b559617f14e767f47e77ece Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Mon, 24 Jul 2006 23:32:20 -0700
Subject: [PATCH 0385/1063] [MLSXFRM]: Default labeling of socket specific
 IPSec policies

This defaults the label of socket-specific IPSec policies to be the
same as the socket they are set on.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/security.h        | 19 ++++++++++++++++---
 include/net/xfrm.h              |  2 +-
 net/key/af_key.c                | 15 +++++++++++----
 net/xfrm/xfrm_state.c           |  2 +-
 net/xfrm/xfrm_user.c            | 13 +++++++++++--
 security/dummy.c                |  3 ++-
 security/selinux/include/xfrm.h |  3 ++-
 security/selinux/xfrm.c         | 33 ++++++++++++++++++++++-----------
 8 files changed, 66 insertions(+), 24 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index f3909d189fe07..8e3dc6c51a6d8 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -827,8 +827,10 @@ struct swap_info_struct;
  *	used by the XFRM system.
  *	@sec_ctx contains the security context information being provided by
  *	the user-level policy update program (e.g., setkey).
+ *	@sk refers to the sock from which to derive the security context.
  *	Allocate a security structure to the xp->security field; the security
- *	field is initialized to NULL when the xfrm_policy is allocated.
+ *	field is initialized to NULL when the xfrm_policy is allocated. Only
+ *	one of sec_ctx or sock can be specified.
  *	Return 0 if operation was successful (memory to allocate, legal context)
  * @xfrm_policy_clone_security:
  *	@old contains an existing xfrm_policy in the SPD.
@@ -1359,7 +1361,8 @@ struct security_operations {
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
-	int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx);
+	int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp,
+			struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk);
 	int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new);
 	void (*xfrm_policy_free_security) (struct xfrm_policy *xp);
 	int (*xfrm_policy_delete_security) (struct xfrm_policy *xp);
@@ -3057,7 +3060,12 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
 {
-	return security_ops->xfrm_policy_alloc_security(xp, sec_ctx);
+	return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL);
+}
+
+static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk)
+{
+	return security_ops->xfrm_policy_alloc_security(xp, NULL, sk);
 }
 
 static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new)
@@ -3132,6 +3140,11 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm
 	return 0;
 }
 
+static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk)
+{
+	return 0;
+}
+
 static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new)
 {
 	return 0;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3ecd9fa1ed4b9..00bf86e6e82b1 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -362,7 +362,7 @@ struct xfrm_mgr
 	char			*id;
 	int			(*notify)(struct xfrm_state *x, struct km_event *c);
 	int			(*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir);
-	struct xfrm_policy	*(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir);
+	struct xfrm_policy	*(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir);
 	int			(*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport);
 	int			(*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c);
 };
diff --git a/net/key/af_key.c b/net/key/af_key.c
index a065e1a67773f..797c744a84382 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2843,14 +2843,14 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
 }
 
-static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
+static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
                                                 u8 *data, int len, int *dir)
 {
 	struct xfrm_policy *xp;
 	struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
 	struct sadb_x_sec_ctx *sec_ctx;
 
-	switch (family) {
+	switch (sk->sk_family) {
 	case AF_INET:
 		if (opt != IP_IPSEC_POLICY) {
 			*dir = -EOPNOTSUPP;
@@ -2891,7 +2891,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
 	xp->lft.hard_byte_limit = XFRM_INF;
 	xp->lft.soft_packet_limit = XFRM_INF;
 	xp->lft.hard_packet_limit = XFRM_INF;
-	xp->family = family;
+	xp->family = sk->sk_family;
 
 	xp->xfrm_nr = 0;
 	if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
@@ -2907,8 +2907,10 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
 		p += pol->sadb_x_policy_len*8;
 		sec_ctx = (struct sadb_x_sec_ctx *)p;
 		if (len < pol->sadb_x_policy_len*8 +
-		    sec_ctx->sadb_x_sec_len)
+		    sec_ctx->sadb_x_sec_len) {
+			*dir = -EINVAL;
 			goto out;
+		}
 		if ((*dir = verify_sec_ctx_len(p)))
 			goto out;
 		uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
@@ -2918,6 +2920,11 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
 		if (*dir)
 			goto out;
 	}
+	else {
+		*dir = security_xfrm_sock_policy_alloc(xp, sk);
+		if (*dir)
+			goto out;
+	}
 
 	*dir = pol->sadb_x_policy_dir-1;
 	return xp;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index be02bd981d12f..1c796087ee786 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1026,7 +1026,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
 	err = -EINVAL;
 	read_lock(&xfrm_km_lock);
 	list_for_each_entry(km, &xfrm_km_list, list) {
-		pol = km->compile_policy(sk->sk_family, optname, data,
+		pol = km->compile_policy(sk, optname, data,
 					 optlen, &err);
 		if (err >= 0)
 			break;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index dac8db1088bcd..f70e158874d26 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1757,7 +1757,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
 /* User gives us xfrm_user_policy_info followed by an array of 0
  * or more templates.
  */
-static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
+static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
 					       u8 *data, int len, int *dir)
 {
 	struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data;
@@ -1765,7 +1765,7 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
 	struct xfrm_policy *xp;
 	int nr;
 
-	switch (family) {
+	switch (sk->sk_family) {
 	case AF_INET:
 		if (opt != IP_XFRM_POLICY) {
 			*dir = -EOPNOTSUPP;
@@ -1807,6 +1807,15 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
 	copy_from_user_policy(xp, p);
 	copy_templates(xp, ut, nr);
 
+	if (!xp->security) {
+		int err = security_xfrm_sock_policy_alloc(xp, sk);
+		if (err) {
+			kfree(xp);
+			*dir = err;
+			return NULL;
+		}
+	}
+
 	*dir = p->dir;
 
 	return xp;
diff --git a/security/dummy.c b/security/dummy.c
index c0ff6b9bfd7dc..66cc064049302 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -815,7 +815,8 @@ static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid)
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
-static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
+static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp,
+		struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk)
 {
 	return 0;
 }
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 8e45c1d588a80..1822c73e5085b 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -7,7 +7,8 @@
 #ifndef _SELINUX_XFRM_H_
 #define _SELINUX_XFRM_H_
 
-int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx);
+int selinux_xfrm_policy_alloc(struct xfrm_policy *xp,
+		struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk);
 int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new);
 void selinux_xfrm_policy_free(struct xfrm_policy *xp);
 int selinux_xfrm_policy_delete(struct xfrm_policy *xp);
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index c750ef7af66f0..d3690f985135a 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -208,10 +208,8 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
 
 	BUG_ON(uctx && pol);
 
-	if (pol)
-		goto from_policy;
-
-	BUG_ON(!uctx);
+	if (!uctx)
+		goto not_from_user;
 
 	if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX)
 		return -EINVAL;
@@ -251,11 +249,14 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
 
 	return rc;
 
-from_policy:
-	BUG_ON(!pol);
-	rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid);
-	if (rc)
-		goto out;
+not_from_user:
+	if (pol) {
+		rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid);
+		if (rc)
+			goto out;
+	}
+	else
+		ctx_sid = sid;
 
 	rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len);
 	if (rc)
@@ -293,13 +294,23 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
  * LSM hook implementation that allocs and transfers uctx spec to
  * xfrm_policy.
  */
-int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *uctx)
+int selinux_xfrm_policy_alloc(struct xfrm_policy *xp,
+		struct xfrm_user_sec_ctx *uctx, struct sock *sk)
 {
 	int err;
+	u32 sid;
 
 	BUG_ON(!xp);
+	BUG_ON(uctx && sk);
+
+	if (sk) {
+		struct sk_security_struct *ssec = sk->sk_security;
+		sid = ssec->sid;
+	}
+	else
+		sid = SECSID_NULL;
 
-	err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, 0);
+	err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, sid);
 	return err;
 }
 
-- 
GitLab


From 4237c75c0a35535d7f9f2bfeeb4b4df1e068a0bf Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Mon, 24 Jul 2006 23:32:50 -0700
Subject: [PATCH 0386/1063] [MLSXFRM]: Auto-labeling of child sockets

This automatically labels the TCP, Unix stream, and dccp child sockets
as well as openreqs to be at the same MLS level as the peer. This will
result in the selection of appropriately labeled IPSec Security
Associations.

This also uses the sock's sid (as opposed to the isec sid) in SELinux
enforcement of secmark in rcv_skb and postroute_last hooks.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/security.h        |  55 +++++++++++++
 include/net/request_sock.h      |   1 +
 include/net/sock.h              |   1 +
 net/dccp/ipv4.c                 |   3 +
 net/dccp/ipv6.c                 |   7 +-
 net/ipv4/inet_connection_sock.c |   4 +-
 net/ipv4/syncookies.c           |   6 +-
 net/ipv4/tcp_ipv4.c             |   3 +
 net/ipv6/tcp_ipv6.c             |   6 +-
 security/dummy.c                |  24 ++++++
 security/selinux/hooks.c        | 137 ++++++++++++++++++++++----------
 security/selinux/xfrm.c         |   1 -
 12 files changed, 197 insertions(+), 51 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 8e3dc6c51a6d8..bb4c80fdfe7a6 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -90,6 +90,7 @@ extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 struct nfsctl_arg;
 struct sched_param;
 struct swap_info_struct;
+struct request_sock;
 
 /* bprm_apply_creds unsafe reasons */
 #define LSM_UNSAFE_SHARE	1
@@ -819,6 +820,14 @@ struct swap_info_struct;
  * @sk_getsecid:
  *	Retrieve the LSM-specific secid for the sock to enable caching of network
  *	authorizations.
+ * @sock_graft:
+ *	Sets the socket's isec sid to the sock's sid.
+ * @inet_conn_request:
+ *	Sets the openreq's sid to socket's sid with MLS portion taken from peer sid.
+ * @inet_csk_clone:
+ *	Sets the new child socket's sid to the openreq sid.
+ * @req_classify_flow:
+ *	Sets the flow's sid to the openreq sid.
  *
  * Security hooks for XFRM operations.
  *
@@ -1358,6 +1367,11 @@ struct security_operations {
 	void (*sk_free_security) (struct sock *sk);
 	void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
 	void (*sk_getsecid) (struct sock *sk, u32 *secid);
+	void (*sock_graft)(struct sock* sk, struct socket *parent);
+	int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb,
+					struct request_sock *req);
+	void (*inet_csk_clone)(struct sock *newsk, const struct request_sock *req);
+	void (*req_classify_flow)(const struct request_sock *req, struct flowi *fl);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -2926,6 +2940,28 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
 {
 	security_ops->sk_getsecid(sk, &fl->secid);
 }
+
+static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl)
+{
+	security_ops->req_classify_flow(req, fl);
+}
+
+static inline void security_sock_graft(struct sock* sk, struct socket *parent)
+{
+	security_ops->sock_graft(sk, parent);
+}
+
+static inline int security_inet_conn_request(struct sock *sk,
+			struct sk_buff *skb, struct request_sock *req)
+{
+	return security_ops->inet_conn_request(sk, skb, req);
+}
+
+static inline void security_inet_csk_clone(struct sock *newsk,
+			const struct request_sock *req)
+{
+	security_ops->inet_csk_clone(newsk, req);
+}
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct socket * sock,
 					       struct socket * other, 
@@ -3055,6 +3091,25 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
 static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
 {
 }
+
+static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl)
+{
+}
+
+static inline void security_sock_graft(struct sock* sk, struct socket *parent)
+{
+}
+
+static inline int security_inet_conn_request(struct sock *sk,
+			struct sk_buff *skb, struct request_sock *req)
+{
+	return 0;
+}
+
+static inline void security_inet_csk_clone(struct sock *newsk,
+			const struct request_sock *req)
+{
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index c5d7f920c352d..8e165ca16bd8f 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -53,6 +53,7 @@ struct request_sock {
 	unsigned long			expires;
 	struct request_sock_ops		*rsk_ops;
 	struct sock			*sk;
+	u32				secid;
 };
 
 static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops)
diff --git a/include/net/sock.h b/include/net/sock.h
index 91cdceb3c0284..337ebec84c700 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -969,6 +969,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 	sk->sk_sleep = &parent->wait;
 	parent->sk = sk;
 	sk->sk_socket = parent;
+	security_sock_graft(sk, parent);
 	write_unlock_bh(&sk->sk_callback_lock);
 }
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 386498053b1c0..171d363876eeb 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -501,6 +501,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	dccp_openreq_init(req, &dp, skb);
 
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_free;
+
 	ireq = inet_rsk(req);
 	ireq->loc_addr = daddr;
 	ireq->rmt_addr = saddr;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 53d255c014310..231bc7c7e749c 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -424,7 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 	fl.oif = ireq6->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_sk(sk)->sport;
-	security_sk_classify_flow(sk, &fl);
+	security_req_classify_flow(req, &fl);
 
 	if (dst == NULL) {
 		opt = np->opt;
@@ -626,7 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb,
 	fl.oif = inet6_iif(rxskb);
 	fl.fl_ip_dport = dh->dccph_dport;
 	fl.fl_ip_sport = dh->dccph_sport;
-	security_skb_classify_flow(rxskb, &fl);
+	security_req_classify_flow(req, &fl);
 
 	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
 		if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
@@ -709,6 +709,9 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	dccp_openreq_init(req, &dp, skb);
 
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_free;
+
 	ireq6 = inet6_rsk(req);
 	ireq = inet_rsk(req);
 	ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 772b4eac78bcb..07204391d083b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -327,7 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
 				       { .sport = inet_sk(sk)->sport,
 					 .dport = ireq->rmt_port } } };
 
-	security_sk_classify_flow(sk, &fl);
+	security_req_classify_flow(req, &fl);
 	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
@@ -510,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
 
 		/* Deinitialize accept_queue to trap illegal accesses. */
 		memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
+
+		security_inet_csk_clone(newsk, req);
 	}
 	return newsk;
 }
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 307dc3c0d635d..661e0a4bca72d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	if (!req)
 		goto out;
 
+	if (security_inet_conn_request(sk, skb, req)) {
+		reqsk_free(req);
+		goto out;
+	}
 	ireq = inet_rsk(req);
 	treq = tcp_rsk(req);
 	treq->rcv_isn		= htonl(skb->h.th->seq) - 1;
@@ -259,7 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 				    .uli_u = { .ports =
 					       { .sport = skb->h.th->dest,
 						 .dport = skb->h.th->source } } };
-		security_sk_classify_flow(sk, &fl);
+		security_req_classify_flow(req, &fl);
 		if (ip_route_output_key(&rt, &fl)) {
 			reqsk_free(req);
 			goto out; 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4b04c3edd4a95..43f6740244f8f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -798,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	tcp_openreq_init(req, &tmp_opt, skb);
 
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_free;
+
 	ireq = inet_rsk(req);
 	ireq->loc_addr = daddr;
 	ireq->rmt_addr = saddr;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 46922e57e311c..302786a11cd67 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -470,7 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	fl.oif = treq->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_sk(sk)->sport;
-	security_sk_classify_flow(sk, &fl);
+	security_req_classify_flow(req, &fl);
 
 	if (dst == NULL) {
 		opt = np->opt;
@@ -826,6 +826,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	tcp_rsk(req)->snt_isn = isn;
 
+	security_inet_conn_request(sk, skb, req);
+
 	if (tcp_v6_send_synack(sk, req, NULL))
 		goto drop;
 
@@ -929,7 +931,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
-		security_sk_classify_flow(sk, &fl);
+		security_req_classify_flow(req, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
 			goto out;
diff --git a/security/dummy.c b/security/dummy.c
index 66cc064049302..1c45f8e4aad15 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -812,6 +812,26 @@ static inline void dummy_sk_clone_security (const struct sock *sk, struct sock *
 static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid)
 {
 }
+
+static inline void dummy_sock_graft(struct sock* sk, struct socket *parent)
+{
+}
+
+static inline int dummy_inet_conn_request(struct sock *sk,
+			struct sk_buff *skb, struct request_sock *req)
+{
+	return 0;
+}
+
+static inline void dummy_inet_csk_clone(struct sock *newsk,
+			const struct request_sock *req)
+{
+}
+
+static inline void dummy_req_classify_flow(const struct request_sock *req,
+			struct flowi *fl)
+{
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -1084,6 +1104,10 @@ void security_fixup_ops (struct security_operations *ops)
 	set_to_dummy_if_null(ops, sk_free_security);
 	set_to_dummy_if_null(ops, sk_clone_security);
 	set_to_dummy_if_null(ops, sk_getsecid);
+	set_to_dummy_if_null(ops, sock_graft);
+	set_to_dummy_if_null(ops, inet_conn_request);
+	set_to_dummy_if_null(ops, inet_csk_clone);
+	set_to_dummy_if_null(ops, req_classify_flow);
  #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef  CONFIG_SECURITY_NETWORK_XFRM
 	set_to_dummy_if_null(ops, xfrm_policy_alloc_security);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4e5989d584ce4..1dc935f7b919e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3328,8 +3328,9 @@ static int selinux_socket_unix_stream_connect(struct socket *sock,
 	/* server child socket */
 	ssec = newsk->sk_security;
 	ssec->peer_sid = isec->sid;
-	
-	return 0;
+	err = security_sid_mls_copy(other_isec->sid, ssec->peer_sid, &ssec->sid);
+
+	return err;
 }
 
 static int selinux_socket_unix_may_send(struct socket *sock,
@@ -3355,11 +3356,29 @@ static int selinux_socket_unix_may_send(struct socket *sock,
 }
 
 static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
-		struct avc_audit_data *ad, u32 sock_sid, u16 sock_class,
-		u16 family, char *addrp, int len)
+		struct avc_audit_data *ad, u16 family, char *addrp, int len)
 {
 	int err = 0;
 	u32 netif_perm, node_perm, node_sid, if_sid, recv_perm = 0;
+	struct socket *sock;
+	u16 sock_class = 0;
+	u32 sock_sid = 0;
+
+ 	read_lock_bh(&sk->sk_callback_lock);
+ 	sock = sk->sk_socket;
+ 	if (sock) {
+ 		struct inode *inode;
+ 		inode = SOCK_INODE(sock);
+ 		if (inode) {
+ 			struct inode_security_struct *isec;
+ 			isec = inode->i_security;
+ 			sock_sid = isec->sid;
+ 			sock_class = isec->sclass;
+ 		}
+ 	}
+ 	read_unlock_bh(&sk->sk_callback_lock);
+ 	if (!sock_sid)
+  		goto out;
 
 	if (!skb->dev)
 		goto out;
@@ -3419,12 +3438,10 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
 static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	u16 family;
-	u16 sock_class = 0;
 	char *addrp;
 	int len, err = 0;
-	u32 sock_sid = 0;
-	struct socket *sock;
 	struct avc_audit_data ad;
+	struct sk_security_struct *sksec = sk->sk_security;
 
 	family = sk->sk_family;
 	if (family != PF_INET && family != PF_INET6)
@@ -3434,22 +3451,6 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (family == PF_INET6 && skb->protocol == ntohs(ETH_P_IP))
 		family = PF_INET;
 
- 	read_lock_bh(&sk->sk_callback_lock);
- 	sock = sk->sk_socket;
- 	if (sock) {
- 		struct inode *inode;
- 		inode = SOCK_INODE(sock);
- 		if (inode) {
- 			struct inode_security_struct *isec;
- 			isec = inode->i_security;
- 			sock_sid = isec->sid;
- 			sock_class = isec->sclass;
- 		}
- 	}
- 	read_unlock_bh(&sk->sk_callback_lock);
- 	if (!sock_sid)
-  		goto out;
-
 	AVC_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = skb->dev ? skb->dev->name : "[unknown]";
 	ad.u.net.family = family;
@@ -3459,16 +3460,15 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		goto out;
 
 	if (selinux_compat_net)
-		err = selinux_sock_rcv_skb_compat(sk, skb, &ad, sock_sid,
-						  sock_class, family,
+		err = selinux_sock_rcv_skb_compat(sk, skb, &ad, family,
 						  addrp, len);
 	else
-		err = avc_has_perm(sock_sid, skb->secmark, SECCLASS_PACKET,
+		err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET,
 				   PACKET__RECV, &ad);
 	if (err)
 		goto out;
 
-	err = selinux_xfrm_sock_rcv_skb(sock_sid, skb, &ad);
+	err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad);
 out:	
 	return err;
 }
@@ -3572,6 +3572,49 @@ static void selinux_sk_getsecid(struct sock *sk, u32 *secid)
 	}
 }
 
+void selinux_sock_graft(struct sock* sk, struct socket *parent)
+{
+	struct inode_security_struct *isec = SOCK_INODE(parent)->i_security;
+	struct sk_security_struct *sksec = sk->sk_security;
+
+	isec->sid = sksec->sid;
+}
+
+int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
+					   struct request_sock *req)
+{
+	struct sk_security_struct *sksec = sk->sk_security;
+	int err;
+	u32 newsid = 0;
+	u32 peersid;
+
+	err = selinux_xfrm_decode_session(skb, &peersid, 0);
+	BUG_ON(err);
+
+	err = security_sid_mls_copy(sksec->sid, peersid, &newsid);
+	if (err)
+		return err;
+
+	req->secid = newsid;
+	return 0;
+}
+
+void selinux_inet_csk_clone(struct sock *newsk, const struct request_sock *req)
+{
+	struct sk_security_struct *newsksec = newsk->sk_security;
+
+	newsksec->sid = req->secid;
+	/* NOTE: Ideally, we should also get the isec->sid for the
+	   new socket in sync, but we don't have the isec available yet.
+	   So we will wait until sock_graft to do it, by which
+	   time it will have been created and available. */
+}
+
+void selinux_req_classify_flow(const struct request_sock *req, struct flowi *fl)
+{
+	fl->secid = req->secid;
+}
+
 static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
 {
 	int err = 0;
@@ -3611,12 +3654,24 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
 #ifdef CONFIG_NETFILTER
 
 static int selinux_ip_postroute_last_compat(struct sock *sk, struct net_device *dev,
-					    struct inode_security_struct *isec,
 					    struct avc_audit_data *ad,
 					    u16 family, char *addrp, int len)
 {
-	int err;
+	int err = 0;
 	u32 netif_perm, node_perm, node_sid, if_sid, send_perm = 0;
+	struct socket *sock;
+	struct inode *inode;
+	struct inode_security_struct *isec;
+
+	sock = sk->sk_socket;
+	if (!sock)
+		goto out;
+
+	inode = SOCK_INODE(sock);
+	if (!inode)
+		goto out;
+
+	isec = inode->i_security;
 	
 	err = sel_netif_sids(dev, &if_sid, NULL);
 	if (err)
@@ -3681,26 +3736,16 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum,
 	char *addrp;
 	int len, err = 0;
 	struct sock *sk;
-	struct socket *sock;
-	struct inode *inode;
 	struct sk_buff *skb = *pskb;
-	struct inode_security_struct *isec;
 	struct avc_audit_data ad;
 	struct net_device *dev = (struct net_device *)out;
+	struct sk_security_struct *sksec;
 
 	sk = skb->sk;
 	if (!sk)
 		goto out;
 
-	sock = sk->sk_socket;
-	if (!sock)
-		goto out;
-
-	inode = SOCK_INODE(sock);
-	if (!inode)
-		goto out;
-
-	isec = inode->i_security;
+	sksec = sk->sk_security;
 
 	AVC_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = dev->name;
@@ -3711,16 +3756,16 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum,
 		goto out;
 
 	if (selinux_compat_net)
-		err = selinux_ip_postroute_last_compat(sk, dev, isec, &ad,
+		err = selinux_ip_postroute_last_compat(sk, dev, &ad,
 						       family, addrp, len);
 	else
-		err = avc_has_perm(isec->sid, skb->secmark, SECCLASS_PACKET,
+		err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET,
 				   PACKET__SEND, &ad);
 
 	if (err)
 		goto out;
 
-	err = selinux_xfrm_postroute_last(isec->sid, skb, &ad);
+	err = selinux_xfrm_postroute_last(sksec->sid, skb, &ad);
 out:
 	return err ? NF_DROP : NF_ACCEPT;
 }
@@ -4623,6 +4668,10 @@ static struct security_operations selinux_ops = {
 	.sk_free_security =		selinux_sk_free_security,
 	.sk_clone_security =		selinux_sk_clone_security,
 	.sk_getsecid = 			selinux_sk_getsecid,
+	.sock_graft =			selinux_sock_graft,
+	.inet_conn_request =		selinux_inet_conn_request,
+	.inet_csk_clone =		selinux_inet_csk_clone,
+	.req_classify_flow =		selinux_req_classify_flow,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index d3690f985135a..3e742b850af6e 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -271,7 +271,6 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
 		goto out;
 	}
 
-
 	ctx->ctx_doi = XFRM_SC_DOI_LSM;
 	ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
 	ctx->ctx_sid = ctx_sid;
-- 
GitLab


From a51c64f1e5c2876eab2a32955acd9e8015c91c15 Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Thu, 27 Jul 2006 22:01:34 -0700
Subject: [PATCH 0387/1063] [MLSXFRM]: Fix build with SECURITY_NETWORK_XFRM
 disabled.

The following patch will fix the build problem (encountered by Andrew
Morton) when SECURITY_NETWORK_XFRM is not enabled.

As compared to git-net-selinux_xfrm_decode_session-build-fix.patch in
-mm, this patch sets the return parameter sid to SECSID_NULL in
selinux_xfrm_decode_session() and handles this value in the caller
selinux_inet_conn_request() appropriately.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/selinux/hooks.c        | 5 +++++
 security/selinux/include/xfrm.h | 7 ++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 1dc935f7b919e..33028b3b19ce1 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3591,6 +3591,11 @@ int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 	err = selinux_xfrm_decode_session(skb, &peersid, 0);
 	BUG_ON(err);
 
+	if (peersid == SECSID_NULL) {
+		req->secid = sksec->sid;
+		return 0;
+	}
+
 	err = security_sid_mls_copy(sksec->sid, peersid, &newsid);
 	if (err)
 		return err;
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 1822c73e5085b..81eb59890162e 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -20,7 +20,6 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir);
 int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
 			struct xfrm_policy *xp, struct flowi *fl);
 int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm);
-int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall);
 
 
 /*
@@ -41,6 +40,7 @@ int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
 			struct avc_audit_data *ad);
 u32 selinux_socket_getpeer_stream(struct sock *sk);
 u32 selinux_socket_getpeer_dgram(struct sk_buff *skb);
+int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
 #else
 static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
 			struct avc_audit_data *ad)
@@ -63,6 +63,11 @@ static inline int selinux_socket_getpeer_dgram(struct sk_buff *skb)
 {
 	return SECSID_NULL;
 }
+static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
+{
+	*sid = SECSID_NULL;
+	return 0;
+}
 #endif
 
 #endif /* _SELINUX_XFRM_H_ */
-- 
GitLab


From 8802f616f6de8576805f32e47602816f141118f2 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Thu, 3 Aug 2006 16:45:49 -0700
Subject: [PATCH 0388/1063] [NetLabel]: documentation

Documentation for the NetLabel system, this includes a basic overview
of how NetLabel works, how LSM developers can integrate it into their
favorite LSM, as well as documentation on the CIPSO related sysctl
variables.  Also, due to the difficulty of finding expired IETF
drafts, I am including the IETF CIPSO draft that is the basis of the
NetLabel CIPSO implementation.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 CREDITS                                       |   7 +
 Documentation/00-INDEX                        |   2 +
 Documentation/netlabel/00-INDEX               |  10 +
 Documentation/netlabel/cipso_ipv4.txt         |  48 ++
 .../draft-ietf-cipso-ipsecurity-01.txt        | 791 ++++++++++++++++++
 Documentation/netlabel/introduction.txt       |  46 +
 Documentation/netlabel/lsm_interface.txt      |  47 ++
 Documentation/networking/ip-sysctl.txt        |  35 +
 8 files changed, 986 insertions(+)
 create mode 100644 Documentation/netlabel/00-INDEX
 create mode 100644 Documentation/netlabel/cipso_ipv4.txt
 create mode 100644 Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt
 create mode 100644 Documentation/netlabel/introduction.txt
 create mode 100644 Documentation/netlabel/lsm_interface.txt

diff --git a/CREDITS b/CREDITS
index 0fe904ebb7c7d..cc3453a55fb94 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2384,6 +2384,13 @@ N: Thomas Molina
 E: tmolina@cablespeed.com
 D: bug fixes, documentation, minor hackery
 
+N: Paul Moore
+E: paul.moore@hp.com
+D: NetLabel author
+S: Hewlett-Packard
+S: 110 Spit Brook Road
+S: Nashua, NH 03062
+
 N: James Morris
 E: jmorris@namei.org
 W: http://namei.org/
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 5f7f7d7f77d25..02457ec9c94fe 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -184,6 +184,8 @@ mtrr.txt
 	- how to use PPro Memory Type Range Registers to increase performance.
 nbd.txt
 	- info on a TCP implementation of a network block device.
+netlabel/
+	- directory with information on the NetLabel subsystem.
 networking/
 	- directory with info on various aspects of networking with Linux.
 nfsroot.txt
diff --git a/Documentation/netlabel/00-INDEX b/Documentation/netlabel/00-INDEX
new file mode 100644
index 0000000000000..837bf35990e28
--- /dev/null
+++ b/Documentation/netlabel/00-INDEX
@@ -0,0 +1,10 @@
+00-INDEX
+	- this file.
+cipso_ipv4.txt
+	- documentation on the IPv4 CIPSO protocol engine.
+draft-ietf-cipso-ipsecurity-01.txt
+	- IETF draft of the CIPSO protocol, dated 16 July 1992.
+introduction.txt
+	- NetLabel introduction, READ THIS FIRST.
+lsm_interface.txt
+	- documentation on the NetLabel kernel security module API.
diff --git a/Documentation/netlabel/cipso_ipv4.txt b/Documentation/netlabel/cipso_ipv4.txt
new file mode 100644
index 0000000000000..93dacb132c3c1
--- /dev/null
+++ b/Documentation/netlabel/cipso_ipv4.txt
@@ -0,0 +1,48 @@
+NetLabel CIPSO/IPv4 Protocol Engine
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+May 17, 2006
+
+ * Overview
+
+The NetLabel CIPSO/IPv4 protocol engine is based on the IETF Commercial IP
+Security Option (CIPSO) draft from July 16, 1992.  A copy of this draft can be
+found in this directory, consult '00-INDEX' for the filename.  While the IETF
+draft never made it to an RFC standard it has become a de-facto standard for
+labeled networking and is used in many trusted operating systems.
+
+ * Outbound Packet Processing
+
+The CIPSO/IPv4 protocol engine applies the CIPSO IP option to packets by
+adding the CIPSO label to the socket.  This causes all packets leaving the
+system through the socket to have the CIPSO IP option applied.  The socket's
+CIPSO label can be changed at any point in time, however, it is recommended
+that it is set upon the socket's creation.  The LSM can set the socket's CIPSO
+label by using the NetLabel security module API; if the NetLabel "domain" is
+configured to use CIPSO for packet labeling then a CIPSO IP option will be
+generated and attached to the socket.
+
+ * Inbound Packet Processing
+
+The CIPSO/IPv4 protocol engine validates every CIPSO IP option it finds at the
+IP layer without any special handling required by the LSM.  However, in order
+to decode and translate the CIPSO label on the packet the LSM must use the
+NetLabel security module API to extract the security attributes of the packet.
+This is typically done at the socket layer using the 'socket_sock_rcv_skb()'
+LSM hook.
+
+ * Label Translation
+
+The CIPSO/IPv4 protocol engine contains a mechanism to translate CIPSO security
+attributes such as sensitivity level and category to values which are
+appropriate for the host.  These mappings are defined as part of a CIPSO
+Domain Of Interpretation (DOI) definition and are configured through the
+NetLabel user space communication layer.  Each DOI definition can have a
+different security attribute mapping table.
+
+ * Label Translation Cache
+
+The NetLabel system provides a framework for caching security attribute
+mappings from the network labels to the corresponding LSM identifiers.  The
+CIPSO/IPv4 protocol engine supports this caching mechanism.
diff --git a/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt b/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt
new file mode 100644
index 0000000000000..256c2c9d4f50d
--- /dev/null
+++ b/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt
@@ -0,0 +1,791 @@
+IETF CIPSO Working Group
+16 July, 1992
+
+
+
+                 COMMERCIAL IP SECURITY OPTION (CIPSO 2.2)
+
+
+
+1.    Status
+
+This Internet Draft provides the high level specification for a Commercial
+IP Security Option (CIPSO).  This draft reflects the version as approved by
+the CIPSO IETF Working Group.  Distribution of this memo is unlimited.
+
+This document is an Internet Draft.  Internet Drafts are working documents
+of the Internet Engineering Task Force (IETF), its Areas, and its Working
+Groups. Note that other groups may also distribute working documents as
+Internet Drafts.
+
+Internet Drafts are draft documents valid for a maximum of six months.
+Internet Drafts may be updated, replaced, or obsoleted by other documents
+at any time.  It is not appropriate to use Internet Drafts as reference
+material or to cite them other than as a "working draft" or "work in
+progress."
+
+Please check the I-D abstract listing contained in each Internet Draft
+directory to learn the current status of this or any other Internet Draft.
+
+
+
+
+2.    Background
+
+Currently the Internet Protocol includes two security options.  One of
+these options is the DoD Basic Security Option (BSO) (Type 130) which allows
+IP datagrams to be labeled with security classifications.  This option
+provides sixteen security classifications and a variable number of handling
+restrictions.  To handle additional security information, such as security
+categories or compartments, another security option (Type 133) exists and
+is referred to as the DoD Extended Security Option (ESO).  The values for
+the fixed fields within these two options are administered by the Defense
+Information Systems Agency (DISA).
+
+Computer vendors are now building commercial operating systems with
+mandatory access controls and multi-level security.  These systems are
+no longer built specifically for a particular group in the defense or
+intelligence communities.  They are generally available commercial systems
+for use in a variety of government and civil sector environments.
+
+The small number of ESO format codes can not support all the possible
+applications of a commercial security option.  The BSO and ESO were
+designed to only support the United States DoD.  CIPSO has been designed
+to support multiple security policies.  This Internet Draft provides the
+format and procedures required to support a Mandatory Access Control
+security policy.  Support for additional security policies shall be
+defined in future RFCs.
+
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 1]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+
+3.    CIPSO Format
+
+Option type: 134 (Class 0, Number 6, Copy on Fragmentation)
+Option length: Variable
+
+This option permits security related information to be passed between
+systems within a single Domain of Interpretation (DOI).  A DOI is a
+collection of systems which agree on the meaning of particular values
+in the security option.  An authority that has been assigned a DOI
+identifier will define a mapping between appropriate CIPSO field values
+and their human readable equivalent.  This authority will distribute that
+mapping to hosts within the authority's domain.  These mappings may be
+sensitive, therefore a DOI authority is not required to make these
+mappings available to anyone other than the systems that are included in
+the DOI.
+
+This option MUST be copied on fragmentation.  This option appears at most
+once in a datagram.  All multi-octet fields in the option are defined to be
+transmitted in network byte order.  The format of this option is as follows:
+
++----------+----------+------//------+-----------//---------+
+| 10000110 | LLLLLLLL | DDDDDDDDDDDD | TTTTTTTTTTTTTTTTTTTT |
++----------+----------+------//------+-----------//---------+
+
+  TYPE=134    OPTION    DOMAIN OF               TAGS
+              LENGTH    INTERPRETATION
+
+
+                Figure 1. CIPSO Format
+
+
+3.1    Type
+
+This field is 1 octet in length.  Its value is 134.
+
+
+3.2    Length
+
+This field is 1 octet in length.  It is the total length of the option
+including the type and length fields.  With the current IP header length
+restriction of 40 octets the value of this field MUST not exceed 40.
+
+
+3.3    Domain of Interpretation Identifier
+
+This field is an unsigned 32 bit integer.  The value 0 is reserved and MUST
+not appear as the DOI identifier in any CIPSO option.  Implementations
+should assume that the DOI identifier field is not aligned on any particular
+byte boundary.
+
+To conserve space in the protocol, security levels and categories are
+represented by numbers rather than their ASCII equivalent.  This requires
+a mapping table within CIPSO hosts to map these numbers to their
+corresponding ASCII representations.  Non-related groups of systems may
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 2]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+have their own unique mappings.  For example, one group of systems may
+use the number 5 to represent Unclassified while another group may use the
+number 1 to represent that same security level.  The DOI identifier is used
+to identify which mapping was used for the values within the option.
+
+
+3.4    Tag Types
+
+A common format for passing security related information is necessary
+for interoperability.  CIPSO uses sets of "tags" to contain the security
+information relevant to the data in the IP packet.  Each tag begins with
+a tag type identifier followed by the length of the tag and ends with the
+actual security information to be passed.  All multi-octet fields in a tag
+are defined to be transmitted in network byte order.  Like the DOI
+identifier field in the CIPSO header, implementations should assume that
+all tags, as well as fields within a tag, are not aligned on any particular
+octet boundary.   The tag types defined in this document contain alignment
+bytes to assist alignment of some information, however alignment can not
+be guaranteed if CIPSO is not the first IP option.
+
+CIPSO tag types 0 through 127 are reserved for defining standard tag
+formats.  Their definitions will be published in RFCs.  Tag types whose
+identifiers are greater than 127 are defined by the DOI authority and may
+only be meaningful in certain Domains of Interpretation.  For these tag
+types, implementations will require the DOI identifier as well as the tag
+number to determine the security policy and the format associated with the
+tag.  Use of tag types above 127 are restricted to closed networks where
+interoperability with other networks will not be an issue.  Implementations
+that support a tag type greater than 127 MUST support at least one DOI that
+requires only tag types 1 to 127.
+
+Tag type 0 is reserved. Tag types 1, 2, and 5 are defined in this
+Internet Draft.  Types 3 and 4 are reserved for work in progress.
+The standard format for all current and future CIPSO tags is shown below:
+
++----------+----------+--------//--------+
+| TTTTTTTT | LLLLLLLL | IIIIIIIIIIIIIIII |
++----------+----------+--------//--------+
+    TAG       TAG         TAG
+    TYPE      LENGTH      INFORMATION
+
+    Figure 2:  Standard Tag Format
+
+In the three tag types described in this document, the length and count
+restrictions are based on the current IP limitation of 40 octets for all
+IP options.  If the IP header is later expanded, then the length and count
+restrictions specified in this document may increase to use the full area
+provided for IP options.
+
+
+3.4.1    Tag Type Classes
+
+Tag classes consist of tag types that have common processing requirements
+and support the same security policy.  The three tags defined in this
+Internet Draft belong to the Mandatory Access Control (MAC) Sensitivity
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 3]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+class and support the MAC Sensitivity security policy.
+
+
+3.4.2    Tag Type 1
+
+This is referred to as the "bit-mapped" tag type.  Tag type 1 is included
+in the MAC Sensitivity tag type class.  The format of this tag type is as
+follows:
+
++----------+----------+----------+----------+--------//---------+
+| 00000001 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCC |
++----------+----------+----------+----------+--------//---------+
+
+    TAG       TAG      ALIGNMENT  SENSITIVITY    BIT MAP OF
+    TYPE      LENGTH   OCTET      LEVEL          CATEGORIES
+
+            Figure 3. Tag Type 1 Format
+
+
+3.4.2.1    Tag Type
+
+This field is 1 octet in length and has a value of 1.
+
+
+3.4.2.2    Tag Length
+
+This field is 1 octet in length.  It is the total length of the tag type
+including the type and length fields.  With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.2.3    Alignment Octet
+
+This field is 1 octet in length and always has the value of 0.  Its purpose
+is to align the category bitmap field on an even octet boundary.  This will
+speed many implementations including router implementations.
+
+
+3.4.2.4    Sensitivity Level
+
+This field is 1 octet in length.  Its value is from 0 to 255.  The values
+are ordered with 0 being the minimum value and 255 representing the maximum
+value.
+
+
+3.4.2.5    Bit Map of Categories
+
+The length of this field is variable and ranges from 0 to 30 octets.  This
+provides representation of categories 0 to 239.  The ordering of the bits
+is left to right or MSB to LSB.  For example category 0 is represented by
+the most significant bit of the first byte and category 15 is represented
+by the least significant bit of the second byte.  Figure 4 graphically
+shows this ordering.  Bit N is binary 1 if category N is part of the label
+for the datagram, and bit N is binary 0 if category N is not part of the
+label.  Except for the optimized tag 1 format described in the next section,
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 4]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+minimal encoding SHOULD be used resulting in no trailing zero octets in the
+category bitmap.
+
+        octet 0  octet 1  octet 2  octet 3  octet 4  octet 5
+        XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX . . .
+bit     01234567 89111111 11112222 22222233 33333333 44444444
+number             012345 67890123 45678901 23456789 01234567
+
+            Figure 4. Ordering of Bits in Tag 1 Bit Map
+
+
+3.4.2.6    Optimized Tag 1 Format
+
+Routers work most efficiently when processing fixed length fields.  To
+support these routers there is an optimized form of tag type 1.  The format
+does not change.  The only change is to the category bitmap which is set to
+a constant length of 10 octets.  Trailing octets required to fill out the 10
+octets are zero filled.  Ten octets, allowing for 80 categories, was chosen
+because it makes the total length of the CIPSO option 20 octets.  If CIPSO
+is the only option then the option will be full word aligned and additional
+filler octets will not be required.
+
+
+3.4.3    Tag Type 2
+
+This is referred to as the "enumerated" tag type.  It is used to describe
+large but sparsely populated sets of categories.  Tag type 2 is in the MAC
+Sensitivity tag type class.  The format of this tag type is as follows:
+
++----------+----------+----------+----------+-------------//-------------+
+| 00000010 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCCCCCCCCCCC |
++----------+----------+----------+----------+-------------//-------------+
+
+    TAG       TAG      ALIGNMENT  SENSITIVITY         ENUMERATED
+    TYPE      LENGTH   OCTET      LEVEL               CATEGORIES
+
+                Figure 5. Tag Type 2 Format
+
+
+3.4.3.1     Tag Type
+
+This field is one octet in length and has a value of 2.
+
+
+3.4.3.2    Tag Length
+
+This field is 1 octet in length. It is the total length of the tag type
+including the type and length fields.  With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.3.3    Alignment Octet
+
+This field is 1 octet in length and always has the value of 0.  Its purpose
+is to align the category field on an even octet boundary.  This will
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 5]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+speed many implementations including router implementations.
+
+
+3.4.3.4    Sensitivity Level
+
+This field is 1 octet in length. Its value is from 0 to 255.  The values
+are ordered with 0 being the minimum value and 255 representing the
+maximum value.
+
+
+3.4.3.5    Enumerated Categories
+
+In this tag, categories are represented by their actual value rather than
+by their position within a bit field.  The length of each category is 2
+octets.  Up to 15 categories may be represented by this tag.  Valid values
+for categories are 0 to 65534.  Category 65535 is not a valid category
+value.  The categories MUST be listed in ascending order within the tag.
+
+
+3.4.4    Tag Type 5
+
+This is referred to as the "range" tag type.  It is used to represent
+labels where all categories in a range, or set of ranges, are included
+in the sensitivity label.  Tag type 5 is in the MAC Sensitivity tag type
+class.  The format of this tag type is as follows:
+
++----------+----------+----------+----------+------------//-------------+
+| 00000101 | LLLLLLLL | 00000000 | LLLLLLLL |  Top/Bottom | Top/Bottom  |
++----------+----------+----------+----------+------------//-------------+
+
+    TAG       TAG      ALIGNMENT  SENSITIVITY        CATEGORY RANGES
+    TYPE      LENGTH   OCTET      LEVEL
+
+                     Figure 6. Tag Type 5 Format
+
+
+3.4.4.1     Tag Type
+
+This field is one octet in length and has a value of 5.
+
+
+3.4.4.2    Tag Length
+
+This field is 1 octet in length. It is the total length of the tag type
+including the type and length fields.  With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.4.3    Alignment Octet
+
+This field is 1 octet in length and always has the value of 0.  Its purpose
+is to align the category range field on an even octet boundary.  This will
+speed many implementations including router implementations.
+
+
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 6]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+3.4.4.4    Sensitivity Level
+
+This field is 1 octet in length. Its value is from 0 to 255.  The values
+are ordered with 0 being the minimum value and 255 representing the maximum
+value.
+
+
+3.4.4.5    Category Ranges
+
+A category range is a 4 octet field comprised of the 2 octet index of the
+highest numbered category followed by the 2 octet index of the lowest
+numbered category.  These range endpoints are inclusive within the range of
+categories.  All categories within a range are included in the sensitivity
+label.  This tag may contain a maximum of 7 category pairs.  The bottom
+category endpoint for the last pair in the tag MAY be omitted and SHOULD be
+assumed to be 0.  The ranges MUST be non-overlapping and be listed in
+descending order.  Valid values for categories are 0 to 65534.  Category
+65535 is not a valid category value.
+
+
+3.4.5     Minimum Requirements
+
+A CIPSO implementation MUST be capable of generating at least tag type 1 in
+the non-optimized form.  In addition, a CIPSO implementation MUST be able
+to receive any valid tag type 1 even those using the optimized tag type 1
+format.
+
+
+4.    Configuration Parameters
+
+The configuration parameters defined below are required for all CIPSO hosts,
+gateways, and routers that support multiple sensitivity labels.  A CIPSO
+host is defined to be the origination or destination system for an IP
+datagram.  A CIPSO gateway provides IP routing services between two or more
+IP networks and may be required to perform label translations between
+networks.  A CIPSO gateway may be an enhanced CIPSO host or it may just
+provide gateway services with no end system CIPSO capabilities.  A CIPSO
+router is a dedicated IP router that routes IP datagrams between two or more
+IP networks.
+
+An implementation of CIPSO on a host MUST have the capability to reject a
+datagram for reasons that the information contained can not be adequately
+protected by the receiving host or if acceptance may result in violation of
+the host or network security policy.  In addition, a CIPSO gateway or router
+MUST be able to reject datagrams going to networks that can not provide
+adequate protection or may violate the network's security policy.  To
+provide this capability the following minimal set of configuration
+parameters are required for CIPSO implementations:
+
+HOST_LABEL_MAX - This parameter contains the maximum sensitivity label that
+a CIPSO host is authorized to handle.  All datagrams that have a label
+greater than this maximum MUST be rejected by the CIPSO host.  This
+parameter does not apply to CIPSO gateways or routers.  This parameter need
+not be defined explicitly as it can be implicitly derived from the
+PORT_LABEL_MAX parameters for the associated interfaces.
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 7]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+
+HOST_LABEL_MIN - This parameter contains the minimum sensitivity label that
+a CIPSO host is authorized to handle.  All datagrams that have a label less
+than this minimum MUST be rejected by the CIPSO host.  This parameter does
+not apply to CIPSO gateways or routers.  This parameter need not be defined
+explicitly as it can be implicitly derived from the PORT_LABEL_MIN
+parameters for the associated interfaces.
+
+PORT_LABEL_MAX - This parameter contains the maximum sensitivity label for
+all datagrams that may exit a particular network interface port.  All
+outgoing datagrams that have a label greater than this maximum MUST be
+rejected by the CIPSO system.  The label within this parameter MUST be
+less than or equal to the label within the HOST_LABEL_MAX parameter.  This
+parameter does not apply to CIPSO hosts that support only one network port.
+
+PORT_LABEL_MIN - This parameter contains the minimum sensitivity label for
+all datagrams that may exit a particular network interface port.  All
+outgoing datagrams that have a label less than this minimum MUST be
+rejected by the CIPSO system.  The label within this parameter MUST be
+greater than or equal to the label within the HOST_LABEL_MIN parameter.
+This parameter does not apply to CIPSO hosts that support only one network
+port.
+
+PORT_DOI - This parameter is used to assign a DOI identifier value to a
+particular network interface port.  All CIPSO labels within datagrams
+going out this port MUST use the specified DOI identifier.  All CIPSO
+hosts and gateways MUST support either this parameter, the NET_DOI
+parameter, or the HOST_DOI parameter.
+
+NET_DOI - This parameter is used to assign a DOI identifier value to a
+particular IP network address.  All CIPSO labels within datagrams destined
+for the particular IP network MUST use the specified DOI identifier.  All
+CIPSO hosts and gateways MUST support either this parameter, the PORT_DOI
+parameter, or the HOST_DOI parameter.
+
+HOST_DOI - This parameter is used to assign a DOI identifier value to a
+particular IP host address.  All CIPSO labels within datagrams destined for
+the particular IP host will use the specified DOI identifier.  All CIPSO
+hosts and gateways MUST support either this parameter, the PORT_DOI
+parameter, or the NET_DOI parameter.
+
+This list represents the minimal set of configuration parameters required
+to be compliant.  Implementors are encouraged to add to this list to
+provide enhanced functionality and control.  For example, many security
+policies may require both incoming and outgoing datagrams be checked against
+the port and host label ranges.
+
+
+4.1    Port Range Parameters
+
+The labels represented by the PORT_LABEL_MAX and PORT_LABEL_MIN parameters
+MAY be in CIPSO or local format.  Some CIPSO systems, such as routers, may
+want to have the range parameters expressed in CIPSO format so that incoming
+labels do not have to be converted to a local format before being compared
+against the range.  If multiple DOIs are supported by one of these CIPSO
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 8]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+systems then multiple port range parameters would be needed, one set for
+each DOI supported on a particular port.
+
+The port range will usually represent the total set of labels that may
+exist on the logical network accessed through the corresponding network
+interface.  It may, however, represent a subset of these labels that are
+allowed to enter the CIPSO system.
+
+
+4.2    Single Label CIPSO Hosts
+
+CIPSO implementations that support only one label are not required to
+support the parameters described above.  These limited implementations are
+only required to support a NET_LABEL parameter.  This parameter contains
+the CIPSO label that may be inserted in datagrams that exit the host.  In
+addition, the host MUST reject any incoming datagram that has a label which
+is not equivalent to the NET_LABEL parameter.
+
+
+5.    Handling Procedures
+
+This section describes the processing requirements for incoming and
+outgoing IP datagrams.  Just providing the correct CIPSO label format
+is not enough.  Assumptions will be made by one system on how a
+receiving system will handle the CIPSO label.  Wrong assumptions may
+lead to non-interoperability or even a security incident.  The
+requirements described below represent the minimal set needed for
+interoperability and that provide users some level of confidence.
+Many other requirements could be added to increase user confidence,
+however at the risk of restricting creativity and limiting vendor
+participation.
+
+
+5.1    Input Procedures
+
+All datagrams received through a network port MUST have a security label
+associated with them, either contained in the datagram or assigned to the
+receiving port.  Without this label the host, gateway, or router will not
+have the information it needs to make security decisions.  This security
+label will be obtained from the CIPSO if the option is present in the
+datagram.  See section 4.1.2 for handling procedures for unlabeled
+datagrams.  This label will be compared against the PORT (if appropriate)
+and HOST configuration parameters defined in section 3.
+
+If any field within the CIPSO option, such as the DOI identifier, is not
+recognized the IP datagram is discarded and an ICMP "parameter problem"
+(type 12) is generated and returned.  The ICMP code field is set to "bad
+parameter" (code 0) and the pointer is set to the start of the CIPSO field
+that is unrecognized.
+
+If the contents of the CIPSO are valid but the security label is
+outside of the configured host or port label range, the datagram is
+discarded and an ICMP "destination unreachable" (type 3) is generated
+and returned.  The code field of the ICMP is set to "communication with
+destination network administratively prohibited" (code 9) or to
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 9]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+"communication with destination host administratively prohibited"
+(code 10).  The value of the code field used is dependent upon whether
+the originator of the ICMP message is acting as a CIPSO host or a CIPSO
+gateway.  The recipient of the ICMP message MUST be able to handle either
+value.  The same procedure is performed if a CIPSO can not be added to an
+IP packet because it is too large to fit in the IP options area.
+
+If the error is triggered by receipt of an ICMP message, the message
+is discarded and no response is permitted (consistent with general ICMP
+processing rules).
+
+
+5.1.1    Unrecognized tag types
+
+The default condition for any CIPSO implementation is that an
+unrecognized tag type MUST be treated as a "parameter problem" and
+handled as described in section 4.1.  A CIPSO implementation MAY allow
+the system administrator to identify tag types that may safely be
+ignored.  This capability is an allowable enhancement, not a
+requirement.
+
+
+5.1.2    Unlabeled Packets
+
+A network port may be configured to not require a CIPSO label for all
+incoming  datagrams.  For this configuration a CIPSO label must be
+assigned to that network port and associated with all unlabeled IP
+datagrams.  This capability might be used for single level networks or
+networks that have CIPSO and non-CIPSO hosts and the non-CIPSO hosts
+all operate at the same label.
+
+If a CIPSO option is required and none is found, the datagram is
+discarded and an ICMP "parameter problem" (type 12) is generated and
+returned to the originator of the datagram.  The code field of the ICMP
+is set to "option missing" (code 1) and the ICMP pointer is set to 134
+(the value of the option type for the missing CIPSO option).
+
+
+5.2    Output Procedures
+
+A CIPSO option MUST appear only once in a datagram.  Only one tag type
+from the MAC Sensitivity class MAY be included in a CIPSO option.  Given
+the current set of defined tag types, this means that CIPSO labels at
+first will contain only one tag.
+
+All datagrams leaving a CIPSO system MUST meet the following condition:
+
+        PORT_LABEL_MIN <= CIPSO label <= PORT_LABEL_MAX
+
+If this condition is not satisfied the datagram MUST be discarded.
+If the CIPSO system only supports one port, the HOST_LABEL_MIN and the
+HOST_LABEL_MAX parameters MAY be substituted for the PORT parameters in
+the above condition.
+
+The DOI identifier to be used for all outgoing datagrams is configured by
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 10]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+the administrator.  If port level DOI identifier assignment is used, then
+the PORT_DOI configuration parameter MUST contain the DOI identifier to
+use.  If network level DOI assignment is used, then the NET_DOI parameter
+MUST contain the DOI identifier to use.  And if host level DOI assignment
+is employed, then the HOST_DOI parameter MUST contain the DOI identifier
+to use.  A CIPSO implementation need only support one level of DOI
+assignment.
+
+
+5.3    DOI Processing Requirements
+
+A CIPSO implementation MUST support at least one DOI and SHOULD support
+multiple DOIs.  System and network administrators are cautioned to
+ensure that at least one DOI is common within an IP network to allow for
+broadcasting of IP datagrams.
+
+CIPSO gateways MUST be capable of translating a CIPSO option from one
+DOI to another when forwarding datagrams between networks.  For
+efficiency purposes this capability is only a desired feature for CIPSO
+routers.
+
+
+5.4    Label of ICMP Messages
+
+The CIPSO label to be used on all outgoing ICMP messages MUST be equivalent
+to the label of the datagram that caused the ICMP message.  If the ICMP was
+generated due to a problem associated with the original CIPSO label then the
+following responses are allowed:
+
+  a.  Use the CIPSO label of the original IP datagram
+  b.  Drop the original datagram with no return message generated
+
+In most cases these options will have the same effect.  If you can not
+interpret the label or if it is outside the label range of your host or
+interface then an ICMP message with the same label will probably not be
+able to exit the system.
+
+
+6.    Assignment of DOI Identifier Numbers                                   =
+
+Requests for assignment of a DOI identifier number should be addressed to
+the Internet Assigned Numbers Authority (IANA).
+
+
+7.    Acknowledgements
+
+Much of the material in this RFC is based on (and copied from) work
+done by Gary Winiger of Sun Microsystems and published as Commercial
+IP Security Option at the INTEROP 89, Commercial IPSO Workshop.
+
+
+8.    Author's Address
+
+To submit mail for distribution to members of the IETF CIPSO Working
+Group, send mail to: cipso@wdl1.wdl.loral.com.
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 11]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+
+To be added to or deleted from this distribution, send mail to:
+cipso-request@wdl1.wdl.loral.com.
+
+
+9.    References
+
+RFC 1038, "Draft Revised IP Security Option", M. St. Johns, IETF, January
+1988.
+
+RFC 1108, "U.S. Department of Defense Security Options
+for the Internet Protocol", Stephen Kent, IAB, 1 March, 1991.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 12]
+
+
+
diff --git a/Documentation/netlabel/introduction.txt b/Documentation/netlabel/introduction.txt
new file mode 100644
index 0000000000000..a4ffba1694c8f
--- /dev/null
+++ b/Documentation/netlabel/introduction.txt
@@ -0,0 +1,46 @@
+NetLabel Introduction
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+August 2, 2006
+
+ * Overview
+
+NetLabel is a mechanism which can be used by kernel security modules to attach
+security attributes to outgoing network packets generated from user space
+applications and read security attributes from incoming network packets.  It
+is composed of three main components, the protocol engines, the communication
+layer, and the kernel security module API.
+
+ * Protocol Engines
+
+The protocol engines are responsible for both applying and retrieving the
+network packet's security attributes.  If any translation between the network
+security attributes and those on the host are required then the protocol
+engine will handle those tasks as well.  Other kernel subsystems should
+refrain from calling the protocol engines directly, instead they should use
+the NetLabel kernel security module API described below.
+
+Detailed information about each NetLabel protocol engine can be found in this
+directory, consult '00-INDEX' for filenames.
+
+ * Communication Layer
+
+The communication layer exists to allow NetLabel configuration and monitoring
+from user space.  The NetLabel communication layer uses a message based
+protocol built on top of the Generic NETLINK transport mechanism.  The exact
+formatting of these NetLabel messages as well as the Generic NETLINK family
+names can be found in the the 'net/netlabel/' directory as comments in the
+header files as well as in 'include/net/netlabel.h'.
+
+ * Security Module API
+
+The purpose of the NetLabel security module API is to provide a protocol
+independent interface to the underlying NetLabel protocol engines.  In addition
+to protocol independence, the security module API is designed to be completely
+LSM independent which should allow multiple LSMs to leverage the same code
+base.
+
+Detailed information about the NetLabel security module API can be found in the
+'include/net/netlabel.h' header file as well as the 'lsm_interface.txt' file
+found in this directory.
diff --git a/Documentation/netlabel/lsm_interface.txt b/Documentation/netlabel/lsm_interface.txt
new file mode 100644
index 0000000000000..98dd9f7430f2f
--- /dev/null
+++ b/Documentation/netlabel/lsm_interface.txt
@@ -0,0 +1,47 @@
+NetLabel Linux Security Module Interface
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+May 17, 2006
+
+ * Overview
+
+NetLabel is a mechanism which can set and retrieve security attributes from
+network packets.  It is intended to be used by LSM developers who want to make
+use of a common code base for several different packet labeling protocols.
+The NetLabel security module API is defined in 'include/net/netlabel.h' but a
+brief overview is given below.
+
+ * NetLabel Security Attributes
+
+Since NetLabel supports multiple different packet labeling protocols and LSMs
+it uses the concept of security attributes to refer to the packet's security
+labels.  The NetLabel security attributes are defined by the
+'netlbl_lsm_secattr' structure in the NetLabel header file.  Internally the
+NetLabel subsystem converts the security attributes to and from the correct
+low-level packet label depending on the NetLabel build time and run time
+configuration.  It is up to the LSM developer to translate the NetLabel
+security attributes into whatever security identifiers are in use for their
+particular LSM.
+
+ * NetLabel LSM Protocol Operations
+
+These are the functions which allow the LSM developer to manipulate the labels
+on outgoing packets as well as read the labels on incoming packets.  Functions
+exist to operate both on sockets as well as the sk_buffs directly.  These high
+level functions are translated into low level protocol operations based on how
+the administrator has configured the NetLabel subsystem.
+
+ * NetLabel Label Mapping Cache Operations
+
+Depending on the exact configuration, translation between the network packet
+label and the internal LSM security identifier can be time consuming.  The
+NetLabel label mapping cache is a caching mechanism which can be used to
+sidestep much of this overhead once a mapping has been established.  Once the
+LSM has received a packet, used NetLabel to decode it's security attributes,
+and translated the security attributes into a LSM internal identifier the LSM
+can use the NetLabel caching functions to associate the LSM internal
+identifier with the network packet's label.  This means that in the future
+when a incoming packet matches a cached value not only are the internal
+NetLabel translation mechanisms bypassed but the LSM translation mechanisms are
+bypassed as well which should result in a significant reduction in overhead.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 90ed78110fd49..307cd4ec8edd8 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -375,6 +375,41 @@ tcp_slow_start_after_idle - BOOLEAN
 	be timed out after an idle period.
 	Default: 1
 
+CIPSOv4 Variables:
+
+cipso_cache_enable - BOOLEAN
+	If set, enable additions to and lookups from the CIPSO label mapping
+	cache.  If unset, additions are ignored and lookups always result in a
+	miss.  However, regardless of the setting the cache is still
+	invalidated when required when means you can safely toggle this on and
+	off and the cache will always be "safe".
+	Default: 1
+
+cipso_cache_bucket_size - INTEGER
+	The CIPSO label cache consists of a fixed size hash table with each
+	hash bucket containing a number of cache entries.  This variable limits
+	the number of entries in each hash bucket; the larger the value the
+	more CIPSO label mappings that can be cached.  When the number of
+	entries in a given hash bucket reaches this limit adding new entries
+	causes the oldest entry in the bucket to be removed to make room.
+	Default: 10
+
+cipso_rbm_optfmt - BOOLEAN
+	Enable the "Optimized Tag 1 Format" as defined in section 3.4.2.6 of
+	the CIPSO draft specification (see Documentation/netlabel for details).
+	This means that when set the CIPSO tag will be padded with empty
+	categories in order to make the packet data 32-bit aligned.
+	Default: 0
+
+cipso_rbm_structvalid - BOOLEAN
+	If set, do a very strict check of the CIPSO option when
+	ip_options_compile() is called.  If unset, relax the checks done during
+	ip_options_compile().  Either way is "safe" as errors are caught else
+	where in the CIPSO processing code but setting this to 0 (False) should
+	result in less work (i.e. it should be faster) but could cause problems
+	with other implementations that require strict checking.
+	Default: 0
+
 IP Variables:
 
 ip_local_port_range - 2 INTEGERS
-- 
GitLab


From 11a03f78fbf15a866ba3bf6359a75cdfd1ced703 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Thu, 3 Aug 2006 16:46:20 -0700
Subject: [PATCH 0389/1063] [NetLabel]: core network changes

Changes to the core network stack to support the NetLabel subsystem.  This
includes changes to the IPv4 option handling to support CIPSO labels.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ip.h       |   1 +
 include/net/cipso_ipv4.h | 250 +++++++++++++++++++++++++++++++++
 include/net/inet_sock.h  |   2 +-
 include/net/netlabel.h   | 291 +++++++++++++++++++++++++++++++++++++++
 net/ipv4/ah4.c           |   2 +-
 net/ipv4/ip_options.c    |  19 +++
 6 files changed, 563 insertions(+), 2 deletions(-)
 create mode 100644 include/net/cipso_ipv4.h
 create mode 100644 include/net/netlabel.h

diff --git a/include/linux/ip.h b/include/linux/ip.h
index 4b55cf1df732e..2f4600146f833 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -57,6 +57,7 @@
 #define IPOPT_SEC	(2 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_LSRR	(3 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_TIMESTAMP	(4 |IPOPT_MEASUREMENT)
+#define IPOPT_CIPSO	(6 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_RR	(7 |IPOPT_CONTROL)
 #define IPOPT_SID	(8 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_SSRR	(9 |IPOPT_CONTROL|IPOPT_COPY)
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
new file mode 100644
index 0000000000000..c7175e7258044
--- /dev/null
+++ b/include/net/cipso_ipv4.h
@@ -0,0 +1,250 @@
+/*
+ * CIPSO - Commercial IP Security Option
+ *
+ * This is an implementation of the CIPSO 2.2 protocol as specified in
+ * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in
+ * FIPS-188, copies of both documents can be found in the Documentation
+ * directory.  While CIPSO never became a full IETF RFC standard many vendors
+ * have chosen to adopt the protocol and over the years it has become a
+ * de-facto standard for labeled networking.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _CIPSO_IPV4_H
+#define _CIPSO_IPV4_H
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <net/netlabel.h>
+
+/* known doi values */
+#define CIPSO_V4_DOI_UNKNOWN          0x00000000
+
+/* tag types */
+#define CIPSO_V4_TAG_INVALID          0
+#define CIPSO_V4_TAG_RBITMAP          1
+#define CIPSO_V4_TAG_ENUM             2
+#define CIPSO_V4_TAG_RANGE            5
+#define CIPSO_V4_TAG_PBITMAP          6
+#define CIPSO_V4_TAG_FREEFORM         7
+
+/* doi mapping types */
+#define CIPSO_V4_MAP_UNKNOWN          0
+#define CIPSO_V4_MAP_STD              1
+#define CIPSO_V4_MAP_PASS             2
+
+/* limits */
+#define CIPSO_V4_MAX_REM_LVLS         256
+#define CIPSO_V4_INV_LVL              0x80000000
+#define CIPSO_V4_MAX_LOC_LVLS         (CIPSO_V4_INV_LVL - 1)
+#define CIPSO_V4_MAX_REM_CATS         65536
+#define CIPSO_V4_INV_CAT              0x80000000
+#define CIPSO_V4_MAX_LOC_CATS         (CIPSO_V4_INV_CAT - 1)
+
+/*
+ * CIPSO DOI definitions
+ */
+
+/* DOI definition struct */
+#define CIPSO_V4_TAG_MAXCNT           5
+struct cipso_v4_doi {
+	u32 doi;
+	u32 type;
+	union {
+		struct cipso_v4_std_map_tbl *std;
+	} map;
+	u8 tags[CIPSO_V4_TAG_MAXCNT];
+
+	u32 valid;
+	struct list_head list;
+	struct rcu_head rcu;
+	struct list_head dom_list;
+};
+
+/* Standard CIPSO mapping table */
+/* NOTE: the highest order bit (i.e. 0x80000000) is an 'invalid' flag, if the
+ *       bit is set then consider that value as unspecified, meaning the
+ *       mapping for that particular level/category is invalid */
+struct cipso_v4_std_map_tbl {
+	struct {
+		u32 *cipso;
+		u32 *local;
+		u32 cipso_size;
+		u32 local_size;
+	} lvl;
+	struct {
+		u32 *cipso;
+		u32 *local;
+		u32 cipso_size;
+		u32 local_size;
+	} cat;
+};
+
+/*
+ * Sysctl Variables
+ */
+
+#ifdef CONFIG_NETLABEL
+extern int cipso_v4_cache_enabled;
+extern int cipso_v4_cache_bucketsize;
+extern int cipso_v4_rbm_optfmt;
+extern int cipso_v4_rbm_strictvalid;
+#endif
+
+/*
+ * Helper Functions
+ */
+
+#define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0)
+#define CIPSO_V4_OPTPTR(x) ((x)->nh.raw + IPCB(x)->opt.cipso)
+
+/*
+ * DOI List Functions
+ */
+
+#ifdef CONFIG_NETLABEL
+int cipso_v4_doi_add(struct cipso_v4_doi *doi_def);
+int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head));
+struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi);
+struct sk_buff *cipso_v4_doi_dump_all(size_t headroom);
+struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom);
+int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain);
+int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
+			       const char *domain);
+#else
+static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_doi_remove(u32 doi,
+				    void (*callback) (struct rcu_head * head))
+{
+	return 0;
+}
+
+static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
+{
+	return NULL;
+}
+
+static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
+{
+	return NULL;
+}
+
+static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
+{
+	return NULL;
+}
+
+static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def,
+					  const char *domain)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
+					     const char *domain)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+/*
+ * Label Mapping Cache Functions
+ */
+
+#ifdef CONFIG_NETLABEL
+void cipso_v4_cache_invalidate(void);
+int cipso_v4_cache_add(const struct sk_buff *skb,
+		       const struct netlbl_lsm_secattr *secattr);
+#else
+static inline void cipso_v4_cache_invalidate(void)
+{
+	return;
+}
+
+static inline int cipso_v4_cache_add(const struct sk_buff *skb,
+				     const struct netlbl_lsm_secattr *secattr)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+/*
+ * Protocol Handling Functions
+ */
+
+#ifdef CONFIG_NETLABEL
+void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway);
+int cipso_v4_socket_setopt(struct socket *sock,
+			   unsigned char *opt,
+			   u32 opt_len);
+int cipso_v4_socket_setattr(const struct socket *sock,
+			    const struct cipso_v4_doi *doi_def,
+			    const struct netlbl_lsm_secattr *secattr);
+int cipso_v4_socket_getopt(const struct socket *sock,
+			   unsigned char **opt,
+			   u32 *opt_len);
+int cipso_v4_socket_getattr(const struct socket *sock,
+			    struct netlbl_lsm_secattr *secattr);
+int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+			    struct netlbl_lsm_secattr *secattr);
+int cipso_v4_validate(unsigned char **option);
+#else
+static inline void cipso_v4_error(struct sk_buff *skb,
+				  int error,
+				  u32 gateway)
+{
+	return;
+}
+
+static inline int cipso_v4_socket_setattr(const struct socket *sock,
+				  const struct cipso_v4_doi *doi_def,
+				  const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_socket_getattr(const struct socket *sock,
+					  struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+					  struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_validate(unsigned char **option)
+{
+	return -ENOSYS;
+}
+#endif /* CONFIG_NETLABEL */
+
+#endif /* _CIPSO_IPV4_H */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1f4a9a60d4cc4..f4caad56cd035 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -51,7 +51,7 @@ struct ip_options {
 			ts_needtime:1,
 			ts_needaddr:1;
 	unsigned char	router_alert;
-	unsigned char	__pad1;
+	unsigned char	cipso;
 	unsigned char	__pad2;
 	unsigned char	__data[0];
 };
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
new file mode 100644
index 0000000000000..7cae730832c7f
--- /dev/null
+++ b/include/net/netlabel.h
@@ -0,0 +1,291 @@
+/*
+ * NetLabel System
+ *
+ * The NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_H
+#define _NETLABEL_H
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+
+/*
+ * NetLabel - A management interface for maintaining network packet label
+ *            mapping tables for explicit packet labling protocols.
+ *
+ * Network protocols such as CIPSO and RIPSO require a label translation layer
+ * to convert the label on the packet into something meaningful on the host
+ * machine.  In the current Linux implementation these mapping tables live
+ * inside the kernel; NetLabel provides a mechanism for user space applications
+ * to manage these mapping tables.
+ *
+ * NetLabel makes use of the Generic NETLINK mechanism as a transport layer to
+ * send messages between kernel and user space.  The general format of a
+ * NetLabel message is shown below:
+ *
+ *  +-----------------+-------------------+--------- --- -- -
+ *  | struct nlmsghdr | struct genlmsghdr | payload
+ *  +-----------------+-------------------+--------- --- -- -
+ *
+ * The 'nlmsghdr' and 'genlmsghdr' structs should be dealt with like normal.
+ * The payload is dependent on the subsystem specified in the
+ * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions
+ * should be defined in the corresponding net/netlabel/netlabel_<subsys>.h|c
+ * file.  All of the fields in the NetLabel payload are NETLINK attributes, the
+ * length of each field is the length of the NETLINK attribute payload, see
+ * include/net/netlink.h for more information on NETLINK attributes.
+ *
+ */
+
+/*
+ * NetLabel NETLINK protocol
+ */
+
+#define NETLBL_PROTO_VERSION            1
+
+/* NetLabel NETLINK types/families */
+#define NETLBL_NLTYPE_NONE              0
+#define NETLBL_NLTYPE_MGMT              1
+#define NETLBL_NLTYPE_MGMT_NAME         "NLBL_MGMT"
+#define NETLBL_NLTYPE_RIPSO             2
+#define NETLBL_NLTYPE_RIPSO_NAME        "NLBL_RIPSO"
+#define NETLBL_NLTYPE_CIPSOV4           3
+#define NETLBL_NLTYPE_CIPSOV4_NAME      "NLBL_CIPSOv4"
+#define NETLBL_NLTYPE_CIPSOV6           4
+#define NETLBL_NLTYPE_CIPSOV6_NAME      "NLBL_CIPSOv6"
+#define NETLBL_NLTYPE_UNLABELED         5
+#define NETLBL_NLTYPE_UNLABELED_NAME    "NLBL_UNLBL"
+
+/* NetLabel return codes */
+#define NETLBL_E_OK                     0
+
+/*
+ * Helper functions
+ */
+
+#define NETLBL_LEN_U8                   nla_total_size(sizeof(u8))
+#define NETLBL_LEN_U16                  nla_total_size(sizeof(u16))
+#define NETLBL_LEN_U32                  nla_total_size(sizeof(u32))
+
+/**
+ * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer
+ * @head: the amount of headroom in bytes
+ * @body: the desired size (minus headroom) in bytes
+ * @gfp_flags: the alloc flags to pass to alloc_skb()
+ *
+ * Description:
+ * Allocate a NETLINK message buffer based on the sizes given in @head and
+ * @body.  If @head is greater than zero skb_reserve() is called to reserve
+ * @head bytes at the start of the buffer.  Returns a valid sk_buff pointer on
+ * success, NULL on failure.
+ *
+ */
+static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head,
+						       size_t body,
+						       int gfp_flags)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags);
+	if (skb == NULL)
+		return NULL;
+	if (head > 0) {
+		skb_reserve(skb, head);
+		if (skb_tailroom(skb) < body) {
+			kfree_skb(skb);
+			return NULL;
+		}
+	}
+
+	return skb;
+}
+
+/*
+ * NetLabel - Kernel API for accessing the network packet label mappings.
+ *
+ * The following functions are provided for use by other kernel modules,
+ * specifically kernel LSM modules, to provide a consistent, transparent API
+ * for dealing with explicit packet labeling protocols such as CIPSO and
+ * RIPSO.  The functions defined here are implemented in the
+ * net/netlabel/netlabel_kapi.c file.
+ *
+ */
+
+/* Domain mapping definition struct */
+struct netlbl_dom_map;
+
+/* Domain mapping operations */
+int netlbl_domhsh_remove(const char *domain);
+
+/* LSM security attributes */
+struct netlbl_lsm_cache {
+	void (*free) (const void *data);
+	void *data;
+};
+struct netlbl_lsm_secattr {
+	char *domain;
+
+	u32 mls_lvl;
+	u32 mls_lvl_vld;
+	unsigned char *mls_cat;
+	size_t mls_cat_len;
+
+	struct netlbl_lsm_cache cache;
+};
+
+/*
+ * LSM security attribute operations
+ */
+
+
+/**
+ * netlbl_secattr_init - Initialize a netlbl_lsm_secattr struct
+ * @secattr: the struct to initialize
+ *
+ * Description:
+ * Initialize an already allocated netlbl_lsm_secattr struct.  Returns zero on
+ * success, negative values on error.
+ *
+ */
+static inline int netlbl_secattr_init(struct netlbl_lsm_secattr *secattr)
+{
+	memset(secattr, 0, sizeof(*secattr));
+	return 0;
+}
+
+/**
+ * netlbl_secattr_destroy - Clears a netlbl_lsm_secattr struct
+ * @secattr: the struct to clear
+ * @clear_cache: cache clear flag
+ *
+ * Description:
+ * Destroys the @secattr struct, including freeing all of the internal buffers.
+ * If @clear_cache is true then free the cache fields, otherwise leave them
+ * intact.  The struct must be reset with a call to netlbl_secattr_init()
+ * before reuse.
+ *
+ */
+static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr,
+					  u32 clear_cache)
+{
+	if (clear_cache && secattr->cache.data != NULL && secattr->cache.free)
+		secattr->cache.free(secattr->cache.data);
+	kfree(secattr->domain);
+	kfree(secattr->mls_cat);
+}
+
+/**
+ * netlbl_secattr_alloc - Allocate and initialize a netlbl_lsm_secattr struct
+ * @flags: the memory allocation flags
+ *
+ * Description:
+ * Allocate and initialize a netlbl_lsm_secattr struct.  Returns a valid
+ * pointer on success, or NULL on failure.
+ *
+ */
+static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(int flags)
+{
+	return kzalloc(sizeof(struct netlbl_lsm_secattr), flags);
+}
+
+/**
+ * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct
+ * @secattr: the struct to free
+ * @clear_cache: cache clear flag
+ *
+ * Description:
+ * Frees @secattr including all of the internal buffers.  If @clear_cache is
+ * true then free the cache fields, otherwise leave them intact.
+ *
+ */
+static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr,
+				       u32 clear_cache)
+{
+	netlbl_secattr_destroy(secattr, clear_cache);
+	kfree(secattr);
+}
+
+/*
+ * LSM protocol operations
+ */
+
+#ifdef CONFIG_NETLABEL
+int netlbl_socket_setattr(const struct socket *sock,
+			  const struct netlbl_lsm_secattr *secattr);
+int netlbl_socket_getattr(const struct socket *sock,
+			  struct netlbl_lsm_secattr *secattr);
+int netlbl_skbuff_getattr(const struct sk_buff *skb,
+			  struct netlbl_lsm_secattr *secattr);
+void netlbl_skbuff_err(struct sk_buff *skb, int error);
+#else
+static inline int netlbl_socket_setattr(const struct socket *sock,
+				     const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int netlbl_socket_getattr(const struct socket *sock,
+					struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int netlbl_skbuff_getattr(const struct sk_buff *skb,
+					struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline void netlbl_skbuff_err(struct sk_buff *skb, int error)
+{
+	return;
+}
+#endif /* CONFIG_NETLABEL */
+
+/*
+ * LSM label mapping cache operations
+ */
+
+#ifdef CONFIG_NETLABEL
+void netlbl_cache_invalidate(void);
+int netlbl_cache_add(const struct sk_buff *skb,
+		     const struct netlbl_lsm_secattr *secattr);
+#else
+static inline void netlbl_cache_invalidate(void)
+{
+	return;
+}
+
+static inline int netlbl_cache_add(const struct sk_buff *skb,
+				   const struct netlbl_lsm_secattr *secattr)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+#endif /* _NETLABEL_H */
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 2b98943e6b025..008e69d2e4239 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -35,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr)
 		switch (*optptr) {
 		case IPOPT_SEC:
 		case 0x85:	/* Some "Extended Security" crap. */
-		case 0x86:	/* Another "Commercial Security" crap. */
+		case IPOPT_CIPSO:
 		case IPOPT_RA:
 		case 0x80|21:	/* RFC1770 */
 			break;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 406056edc02ba..e0a93b4fa8cc7 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -24,6 +24,7 @@
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/route.h>
+#include <net/cipso_ipv4.h>
 
 /* 
  * Write options to IP header, record destination address to
@@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 			dopt->is_strictroute = sopt->is_strictroute;
 		}
 	}
+	if (sopt->cipso) {
+		optlen  = sptr[sopt->cipso+1];
+		dopt->cipso = dopt->optlen+sizeof(struct iphdr);
+		memcpy(dptr, sptr+sopt->cipso, optlen);
+		dptr += optlen;
+		dopt->optlen += optlen;
+	}
 	while (dopt->optlen & 3) {
 		*dptr++ = IPOPT_END;
 		dopt->optlen++;
@@ -434,6 +442,17 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
 			if (optptr[2] == 0 && optptr[3] == 0)
 				opt->router_alert = optptr - iph;
 			break;
+		      case IPOPT_CIPSO:
+		        if (opt->cipso) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			opt->cipso = optptr - iph;
+		        if (cipso_v4_validate(&optptr)) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			break;
 		      case IPOPT_SEC:
 		      case IPOPT_SID:
 		      default:
-- 
GitLab


From 446fda4f26822b2d42ab3396aafcedf38a9ff2b6 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Thu, 3 Aug 2006 16:48:06 -0700
Subject: [PATCH 0390/1063] [NetLabel]: CIPSOv4 engine

Add support for the Commercial IP Security Option (CIPSO) to the IPv4
network stack.  CIPSO has become a de-facto standard for
trusted/labeled networking amongst existing Trusted Operating Systems
such as Trusted Solaris, HP-UX CMW, etc.  This implementation is
designed to be used with the NetLabel subsystem to provide explicit
packet labeling to LSM developers.

The CIPSO/IPv4 packet labeling works by the LSM calling a NetLabel API
function which attaches a CIPSO label (IPv4 option) to a given socket;
this in turn attaches the CIPSO label to every packet leaving the
socket without any extra processing on the outbound side.  On the
inbound side the individual packet's sk_buff is examined through a
call to a NetLabel API function to determine if a CIPSO/IPv4 label is
present and if so the security attributes of the CIPSO label are
returned to the caller of the NetLabel API function.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h     |    4 +
 net/ipv4/Makefile          |    1 +
 net/ipv4/cipso_ipv4.c      | 1607 ++++++++++++++++++++++++++++++++++++
 net/ipv4/sysctl_net_ipv4.c |   35 +
 4 files changed, 1647 insertions(+)
 create mode 100644 net/ipv4/cipso_ipv4.c

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index e4b1a4d4dcf36..af61d92354090 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -411,6 +411,10 @@ enum
 	NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115,
 	NET_TCP_DMA_COPYBREAK=116,
 	NET_TCP_SLOW_START_AFTER_IDLE=117,
+	NET_CIPSOV4_CACHE_ENABLE=118,
+	NET_CIPSOV4_CACHE_BUCKET_SIZE=119,
+	NET_CIPSOV4_RBM_OPTFMT=120,
+	NET_CIPSOV4_RBM_STRICTVALID=121,
 };
 
 enum {
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4878fc5be85fe..f66049e28aebb 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
 obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
+obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
 		      xfrm4_output.o
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
new file mode 100644
index 0000000000000..b82a101c95c5b
--- /dev/null
+++ b/net/ipv4/cipso_ipv4.c
@@ -0,0 +1,1607 @@
+/*
+ * CIPSO - Commercial IP Security Option
+ *
+ * This is an implementation of the CIPSO 2.2 protocol as specified in
+ * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in
+ * FIPS-188, copies of both documents can be found in the Documentation
+ * directory.  While CIPSO never became a full IETF RFC standard many vendors
+ * have chosen to adopt the protocol and over the years it has become a
+ * de-facto standard for labeled networking.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/jhash.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+struct cipso_v4_domhsh_entry {
+	char *domain;
+	u32 valid;
+	struct list_head list;
+	struct rcu_head rcu;
+};
+
+/* List of available DOI definitions */
+/* XXX - Updates should be minimal so having a single lock for the
+ * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
+ * okay. */
+/* XXX - This currently assumes a minimal number of different DOIs in use,
+ * if in practice there are a lot of different DOIs this list should
+ * probably be turned into a hash table or something similar so we
+ * can do quick lookups. */
+DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
+static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list);
+
+/* Label mapping cache */
+int cipso_v4_cache_enabled = 1;
+int cipso_v4_cache_bucketsize = 10;
+#define CIPSO_V4_CACHE_BUCKETBITS     7
+#define CIPSO_V4_CACHE_BUCKETS        (1 << CIPSO_V4_CACHE_BUCKETBITS)
+#define CIPSO_V4_CACHE_REORDERLIMIT   10
+struct cipso_v4_map_cache_bkt {
+	spinlock_t lock;
+	u32 size;
+	struct list_head list;
+};
+struct cipso_v4_map_cache_entry {
+	u32 hash;
+	unsigned char *key;
+	size_t key_len;
+
+	struct netlbl_lsm_cache lsm_data;
+
+	u32 activity;
+	struct list_head list;
+};
+static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL;
+
+/* Restricted bitmap (tag #1) flags */
+int cipso_v4_rbm_optfmt = 0;
+int cipso_v4_rbm_strictvalid = 1;
+
+/*
+ * Helper Functions
+ */
+
+/**
+ * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit
+ * @bitmap: the bitmap
+ * @bitmap_len: length in bits
+ * @offset: starting offset
+ * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit
+ *
+ * Description:
+ * Starting at @offset, walk the bitmap from left to right until either the
+ * desired bit is found or we reach the end.  Return the bit offset, -1 if
+ * not found, or -2 if error.
+ */
+static int cipso_v4_bitmap_walk(const unsigned char *bitmap,
+				u32 bitmap_len,
+				u32 offset,
+				u8 state)
+{
+	u32 bit_spot;
+	u32 byte_offset;
+	unsigned char bitmask;
+	unsigned char byte;
+
+	/* gcc always rounds to zero when doing integer division */
+	byte_offset = offset / 8;
+	byte = bitmap[byte_offset];
+	bit_spot = offset;
+	bitmask = 0x80 >> (offset % 8);
+
+	while (bit_spot < bitmap_len) {
+		if ((state && (byte & bitmask) == bitmask) ||
+		    (state == 0 && (byte & bitmask) == 0))
+			return bit_spot;
+
+		bit_spot++;
+		bitmask >>= 1;
+		if (bitmask == 0) {
+			byte = bitmap[++byte_offset];
+			bitmask = 0x80;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap
+ * @bitmap: the bitmap
+ * @bit: the bit
+ * @state: if non-zero, set the bit (1) else clear the bit (0)
+ *
+ * Description:
+ * Set a single bit in the bitmask.  Returns zero on success, negative values
+ * on error.
+ */
+static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
+				   u32 bit,
+				   u8 state)
+{
+	u32 byte_spot;
+	u8 bitmask;
+
+	/* gcc always rounds to zero when doing integer division */
+	byte_spot = bit / 8;
+	bitmask = 0x80 >> (bit % 8);
+	if (state)
+		bitmap[byte_spot] |= bitmask;
+	else
+		bitmap[byte_spot] &= ~bitmask;
+}
+
+/**
+ * cipso_v4_doi_domhsh_free - Frees a domain list entry
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to a domain list entry can be released
+ * safely.
+ *
+ */
+static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
+{
+	struct cipso_v4_domhsh_entry *ptr;
+
+	ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
+	kfree(ptr->domain);
+	kfree(ptr);
+}
+
+/**
+ * cipso_v4_cache_entry_free - Frees a cache entry
+ * @entry: the entry to free
+ *
+ * Description:
+ * This function frees the memory associated with a cache entry.
+ *
+ */
+static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry)
+{
+	if (entry->lsm_data.free)
+		entry->lsm_data.free(entry->lsm_data.data);
+	kfree(entry->key);
+	kfree(entry);
+}
+
+/**
+ * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache
+ * @key: the hash key
+ * @key_len: the length of the key in bytes
+ *
+ * Description:
+ * The CIPSO tag hashing function.  Returns a 32-bit hash value.
+ *
+ */
+static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len)
+{
+	return jhash(key, key_len, 0);
+}
+
+/*
+ * Label Mapping Cache Functions
+ */
+
+/**
+ * cipso_v4_cache_init - Initialize the CIPSO cache
+ *
+ * Description:
+ * Initializes the CIPSO label mapping cache, this function should be called
+ * before any of the other functions defined in this file.  Returns zero on
+ * success, negative values on error.
+ *
+ */
+static int cipso_v4_cache_init(void)
+{
+	u32 iter;
+
+	cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS,
+				 sizeof(struct cipso_v4_map_cache_bkt),
+				 GFP_KERNEL);
+	if (cipso_v4_cache == NULL)
+		return -ENOMEM;
+
+	for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
+		spin_lock_init(&cipso_v4_cache[iter].lock);
+		cipso_v4_cache[iter].size = 0;
+		INIT_LIST_HEAD(&cipso_v4_cache[iter].list);
+	}
+
+	return 0;
+}
+
+/**
+ * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache
+ *
+ * Description:
+ * Invalidates and frees any entries in the CIPSO cache.  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+void cipso_v4_cache_invalidate(void)
+{
+	struct cipso_v4_map_cache_entry *entry, *tmp_entry;
+	u32 iter;
+
+	for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
+		spin_lock(&cipso_v4_cache[iter].lock);
+		list_for_each_entry_safe(entry,
+					 tmp_entry,
+					 &cipso_v4_cache[iter].list, list) {
+			list_del(&entry->list);
+			cipso_v4_cache_entry_free(entry);
+		}
+		cipso_v4_cache[iter].size = 0;
+		spin_unlock(&cipso_v4_cache[iter].lock);
+	}
+
+	return;
+}
+
+/**
+ * cipso_v4_cache_check - Check the CIPSO cache for a label mapping
+ * @key: the buffer to check
+ * @key_len: buffer length in bytes
+ * @secattr: the security attribute struct to use
+ *
+ * Description:
+ * This function checks the cache to see if a label mapping already exists for
+ * the given key.  If there is a match then the cache is adjusted and the
+ * @secattr struct is populated with the correct LSM security attributes.  The
+ * cache is adjusted in the following manner if the entry is not already the
+ * first in the cache bucket:
+ *
+ *  1. The cache entry's activity counter is incremented
+ *  2. The previous (higher ranking) entry's activity counter is decremented
+ *  3. If the difference between the two activity counters is geater than
+ *     CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped
+ *
+ * Returns zero on success, -ENOENT for a cache miss, and other negative values
+ * on error.
+ *
+ */
+static int cipso_v4_cache_check(const unsigned char *key,
+				u32 key_len,
+				struct netlbl_lsm_secattr *secattr)
+{
+	u32 bkt;
+	struct cipso_v4_map_cache_entry *entry;
+	struct cipso_v4_map_cache_entry *prev_entry = NULL;
+	u32 hash;
+
+	if (!cipso_v4_cache_enabled)
+		return -ENOENT;
+
+	hash = cipso_v4_map_cache_hash(key, key_len);
+	bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+	spin_lock(&cipso_v4_cache[bkt].lock);
+	list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
+		if (entry->hash == hash &&
+		    entry->key_len == key_len &&
+		    memcmp(entry->key, key, key_len) == 0) {
+			entry->activity += 1;
+			secattr->cache.free = entry->lsm_data.free;
+			secattr->cache.data = entry->lsm_data.data;
+			if (prev_entry == NULL) {
+				spin_unlock(&cipso_v4_cache[bkt].lock);
+				return 0;
+			}
+
+			if (prev_entry->activity > 0)
+				prev_entry->activity -= 1;
+			if (entry->activity > prev_entry->activity &&
+			    entry->activity - prev_entry->activity >
+			    CIPSO_V4_CACHE_REORDERLIMIT) {
+				__list_del(entry->list.prev, entry->list.next);
+				__list_add(&entry->list,
+					   prev_entry->list.prev,
+					   &prev_entry->list);
+			}
+
+			spin_unlock(&cipso_v4_cache[bkt].lock);
+			return 0;
+		}
+		prev_entry = entry;
+	}
+	spin_unlock(&cipso_v4_cache[bkt].lock);
+
+	return -ENOENT;
+}
+
+/**
+ * cipso_v4_cache_add - Add an entry to the CIPSO cache
+ * @skb: the packet
+ * @secattr: the packet's security attributes
+ *
+ * Description:
+ * Add a new entry into the CIPSO label mapping cache.  Add the new entry to
+ * head of the cache bucket's list, if the cache bucket is out of room remove
+ * the last entry in the list first.  It is important to note that there is
+ * currently no checking for duplicate keys.  Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int cipso_v4_cache_add(const struct sk_buff *skb,
+		       const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -EPERM;
+	u32 bkt;
+	struct cipso_v4_map_cache_entry *entry = NULL;
+	struct cipso_v4_map_cache_entry *old_entry = NULL;
+	unsigned char *cipso_ptr;
+	u32 cipso_ptr_len;
+
+	if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
+		return 0;
+
+	cipso_ptr = CIPSO_V4_OPTPTR(skb);
+	cipso_ptr_len = cipso_ptr[1];
+
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	if (entry == NULL)
+		return -ENOMEM;
+	entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC);
+	if (entry->key == NULL) {
+		ret_val = -ENOMEM;
+		goto cache_add_failure;
+	}
+	memcpy(entry->key, cipso_ptr, cipso_ptr_len);
+	entry->key_len = cipso_ptr_len;
+	entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
+	entry->lsm_data.free = secattr->cache.free;
+	entry->lsm_data.data = secattr->cache.data;
+
+	bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+	spin_lock(&cipso_v4_cache[bkt].lock);
+	if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
+		list_add(&entry->list, &cipso_v4_cache[bkt].list);
+		cipso_v4_cache[bkt].size += 1;
+	} else {
+		old_entry = list_entry(cipso_v4_cache[bkt].list.prev,
+				       struct cipso_v4_map_cache_entry, list);
+		list_del(&old_entry->list);
+		list_add(&entry->list, &cipso_v4_cache[bkt].list);
+		cipso_v4_cache_entry_free(old_entry);
+	}
+	spin_unlock(&cipso_v4_cache[bkt].lock);
+
+	return 0;
+
+cache_add_failure:
+	if (entry)
+		cipso_v4_cache_entry_free(entry);
+	return ret_val;
+}
+
+/*
+ * DOI List Functions
+ */
+
+/**
+ * cipso_v4_doi_search - Searches for a DOI definition
+ * @doi: the DOI to search for
+ *
+ * Description:
+ * Search the DOI definition list for a DOI definition with a DOI value that
+ * matches @doi.  The caller is responsibile for calling rcu_read_[un]lock().
+ * Returns a pointer to the DOI definition on success and NULL on failure.
+ */
+static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
+{
+	struct cipso_v4_doi *iter;
+
+	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+		if (iter->doi == doi && iter->valid)
+			return iter;
+	return NULL;
+}
+
+/**
+ * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine
+ * @doi_def: the DOI structure
+ *
+ * Description:
+ * The caller defines a new DOI for use by the CIPSO engine and calls this
+ * function to add it to the list of acceptable domains.  The caller must
+ * ensure that the mapping table specified in @doi_def->map meets all of the
+ * requirements of the mapping type (see cipso_ipv4.h for details).  Returns
+ * zero on success and non-zero on failure.
+ *
+ */
+int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
+{
+	if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
+		return -EINVAL;
+
+	doi_def->valid = 1;
+	INIT_RCU_HEAD(&doi_def->rcu);
+	INIT_LIST_HEAD(&doi_def->dom_list);
+
+	rcu_read_lock();
+	if (cipso_v4_doi_search(doi_def->doi) != NULL)
+		goto doi_add_failure_rlock;
+	spin_lock(&cipso_v4_doi_list_lock);
+	if (cipso_v4_doi_search(doi_def->doi) != NULL)
+		goto doi_add_failure_slock;
+	list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list);
+	spin_unlock(&cipso_v4_doi_list_lock);
+	rcu_read_unlock();
+
+	return 0;
+
+doi_add_failure_slock:
+	spin_unlock(&cipso_v4_doi_list_lock);
+doi_add_failure_rlock:
+	rcu_read_unlock();
+	return -EEXIST;
+}
+
+/**
+ * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
+ * @doi: the DOI value
+ * @callback: the DOI cleanup/free callback
+ *
+ * Description:
+ * Removes a DOI definition from the CIPSO engine, @callback is called to
+ * free any memory.  The NetLabel routines will be called to release their own
+ * LSM domain mappings as well as our own domain list.  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head))
+{
+	struct cipso_v4_doi *doi_def;
+	struct cipso_v4_domhsh_entry *dom_iter;
+
+	rcu_read_lock();
+	if (cipso_v4_doi_search(doi) != NULL) {
+		spin_lock(&cipso_v4_doi_list_lock);
+		doi_def = cipso_v4_doi_search(doi);
+		if (doi_def == NULL) {
+			spin_unlock(&cipso_v4_doi_list_lock);
+			rcu_read_unlock();
+			return -ENOENT;
+		}
+		doi_def->valid = 0;
+		list_del_rcu(&doi_def->list);
+		spin_unlock(&cipso_v4_doi_list_lock);
+		list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
+			if (dom_iter->valid)
+				netlbl_domhsh_remove(dom_iter->domain);
+		cipso_v4_cache_invalidate();
+		rcu_read_unlock();
+
+		call_rcu(&doi_def->rcu, callback);
+		return 0;
+	}
+	rcu_read_unlock();
+
+	return -ENOENT;
+}
+
+/**
+ * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition
+ * @doi: the DOI value
+ *
+ * Description:
+ * Searches for a valid DOI definition and if one is found it is returned to
+ * the caller.  Otherwise NULL is returned.  The caller must ensure that
+ * rcu_read_lock() is held while accessing the returned definition.
+ *
+ */
+struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
+{
+	return cipso_v4_doi_search(doi);
+}
+
+/**
+ * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff
+ * @headroom: the amount of headroom to allocate for the sk_buff
+ *
+ * Description:
+ * Dump a list of all the configured DOI values into a sk_buff.  The returned
+ * sk_buff has room at the front of the sk_buff for @headroom bytes.  See
+ * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format.  This
+ * function may fail if another process is changing the DOI list at the same
+ * time.  Returns a pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
+{
+	struct sk_buff *skb = NULL;
+	struct cipso_v4_doi *iter;
+	u32 doi_cnt = 0;
+	ssize_t buf_len;
+
+	buf_len = NETLBL_LEN_U32;
+	rcu_read_lock();
+	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+		if (iter->valid) {
+			doi_cnt += 1;
+			buf_len += 2 * NETLBL_LEN_U32;
+		}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto doi_dump_all_failure;
+
+	if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0)
+		goto doi_dump_all_failure;
+	buf_len -= NETLBL_LEN_U32;
+	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+		if (iter->valid) {
+			if (buf_len < 2 * NETLBL_LEN_U32)
+				goto doi_dump_all_failure;
+			if (nla_put_u32(skb, NLA_U32, iter->doi) != 0)
+				goto doi_dump_all_failure;
+			if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
+				goto doi_dump_all_failure;
+			buf_len -= 2 * NETLBL_LEN_U32;
+		}
+	rcu_read_unlock();
+
+	return skb;
+
+doi_dump_all_failure:
+	rcu_read_unlock();
+	kfree(skb);
+	return NULL;
+}
+
+/**
+ * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff
+ * @doi: the DOI value
+ * @headroom: the amount of headroom to allocate for the sk_buff
+ *
+ * Description:
+ * Lookup the DOI definition matching @doi and dump it's contents into a
+ * sk_buff.  The returned sk_buff has room at the front of the sk_buff for
+ * @headroom bytes.  See net/netlabel/netlabel_cipso_v4.h for the LIST message
+ * format.  This function may fail if another process is changing the DOI list
+ * at the same time.  Returns a pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
+{
+	struct sk_buff *skb = NULL;
+	struct cipso_v4_doi *iter;
+	u32 tag_cnt = 0;
+	u32 lvl_cnt = 0;
+	u32 cat_cnt = 0;
+	ssize_t buf_len;
+	ssize_t tmp;
+
+	rcu_read_lock();
+	iter = cipso_v4_doi_getdef(doi);
+	if (iter == NULL)
+		goto doi_dump_failure;
+	buf_len = NETLBL_LEN_U32;
+	switch (iter->type) {
+	case CIPSO_V4_MAP_PASS:
+		buf_len += NETLBL_LEN_U32;
+		while(tag_cnt < CIPSO_V4_TAG_MAXCNT &&
+		      iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
+			tag_cnt += 1;
+			buf_len += NETLBL_LEN_U8;
+		}
+		break;
+	case CIPSO_V4_MAP_STD:
+		buf_len += 3 * NETLBL_LEN_U32;
+		while (tag_cnt < CIPSO_V4_TAG_MAXCNT &&
+		       iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
+			tag_cnt += 1;
+			buf_len += NETLBL_LEN_U8;
+		}
+		for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
+			if (iter->map.std->lvl.local[tmp] !=
+			    CIPSO_V4_INV_LVL) {
+				lvl_cnt += 1;
+				buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8;
+			}
+		for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
+			if (iter->map.std->cat.local[tmp] !=
+			    CIPSO_V4_INV_CAT) {
+				cat_cnt += 1;
+				buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16;
+			}
+		break;
+	}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto doi_dump_failure;
+
+	if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
+		goto doi_dump_failure;
+	buf_len -= NETLBL_LEN_U32;
+	if (iter != cipso_v4_doi_getdef(doi))
+		goto doi_dump_failure;
+	switch (iter->type) {
+	case CIPSO_V4_MAP_PASS:
+		if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
+			goto doi_dump_failure;
+		buf_len -= NETLBL_LEN_U32;
+		for (tmp = 0;
+		     tmp < CIPSO_V4_TAG_MAXCNT &&
+			     iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
+		     tmp++) {
+			if (buf_len < NETLBL_LEN_U8)
+				goto doi_dump_failure;
+			if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
+				goto doi_dump_failure;
+			buf_len -= NETLBL_LEN_U8;
+		}
+		break;
+	case CIPSO_V4_MAP_STD:
+		if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
+			goto doi_dump_failure;
+		if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0)
+			goto doi_dump_failure;
+		if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0)
+			goto doi_dump_failure;
+		buf_len -= 3 * NETLBL_LEN_U32;
+		for (tmp = 0;
+		     tmp < CIPSO_V4_TAG_MAXCNT &&
+			     iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
+		     tmp++) {
+			if (buf_len < NETLBL_LEN_U8)
+				goto doi_dump_failure;
+			if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
+				goto doi_dump_failure;
+			buf_len -= NETLBL_LEN_U8;
+		}
+		for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
+			if (iter->map.std->lvl.local[tmp] !=
+			    CIPSO_V4_INV_LVL) {
+				if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8)
+					goto doi_dump_failure;
+				if (nla_put_u32(skb, NLA_U32, tmp) != 0)
+					goto doi_dump_failure;
+				if (nla_put_u8(skb,
+					   NLA_U8,
+					   iter->map.std->lvl.local[tmp]) != 0)
+					goto doi_dump_failure;
+				buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8;
+			}
+		for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
+			if (iter->map.std->cat.local[tmp] !=
+			    CIPSO_V4_INV_CAT) {
+				if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16)
+					goto doi_dump_failure;
+				if (nla_put_u32(skb, NLA_U32, tmp) != 0)
+					goto doi_dump_failure;
+				if (nla_put_u16(skb,
+					   NLA_U16,
+					   iter->map.std->cat.local[tmp]) != 0)
+					goto doi_dump_failure;
+				buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16;
+			}
+		break;
+	}
+	rcu_read_unlock();
+
+	return skb;
+
+doi_dump_failure:
+	rcu_read_unlock();
+	kfree(skb);
+	return NULL;
+}
+
+/**
+ * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
+ * @doi_def: the DOI definition
+ * @domain: the domain to add
+ *
+ * Description:
+ * Adds the @domain to the the DOI specified by @doi_def, this function
+ * should only be called by external functions (i.e. NetLabel).  This function
+ * does allocate memory.  Returns zero on success, negative values on failure.
+ *
+ */
+int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
+{
+	struct cipso_v4_domhsh_entry *iter;
+	struct cipso_v4_domhsh_entry *new_dom;
+
+	new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
+	if (new_dom == NULL)
+		return -ENOMEM;
+	if (domain) {
+		new_dom->domain = kstrdup(domain, GFP_KERNEL);
+		if (new_dom->domain == NULL) {
+			kfree(new_dom);
+			return -ENOMEM;
+		}
+	}
+	new_dom->valid = 1;
+	INIT_RCU_HEAD(&new_dom->rcu);
+
+	rcu_read_lock();
+	spin_lock(&cipso_v4_doi_list_lock);
+	list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
+		if (iter->valid &&
+		    ((domain != NULL && iter->domain != NULL &&
+		      strcmp(iter->domain, domain) == 0) ||
+		     (domain == NULL && iter->domain == NULL))) {
+			spin_unlock(&cipso_v4_doi_list_lock);
+			rcu_read_unlock();
+			kfree(new_dom->domain);
+			kfree(new_dom);
+			return -EEXIST;
+		}
+	list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
+	spin_unlock(&cipso_v4_doi_list_lock);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
+ * @doi_def: the DOI definition
+ * @domain: the domain to remove
+ *
+ * Description:
+ * Removes the @domain from the DOI specified by @doi_def, this function
+ * should only be called by external functions (i.e. NetLabel).   Returns zero
+ * on success and negative values on error.
+ *
+ */
+int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
+			       const char *domain)
+{
+	struct cipso_v4_domhsh_entry *iter;
+
+	rcu_read_lock();
+	spin_lock(&cipso_v4_doi_list_lock);
+	list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
+		if (iter->valid &&
+		    ((domain != NULL && iter->domain != NULL &&
+		      strcmp(iter->domain, domain) == 0) ||
+		     (domain == NULL && iter->domain == NULL))) {
+			iter->valid = 0;
+			list_del_rcu(&iter->list);
+			spin_unlock(&cipso_v4_doi_list_lock);
+			rcu_read_unlock();
+			call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
+
+			return 0;
+		}
+	spin_unlock(&cipso_v4_doi_list_lock);
+	rcu_read_unlock();
+
+	return -ENOENT;
+}
+
+/*
+ * Label Mapping Functions
+ */
+
+/**
+ * cipso_v4_map_lvl_valid - Checks to see if the given level is understood
+ * @doi_def: the DOI definition
+ * @level: the level to check
+ *
+ * Description:
+ * Checks the given level against the given DOI definition and returns a
+ * negative value if the level does not have a valid mapping and a zero value
+ * if the level is defined by the DOI.
+ *
+ */
+static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
+{
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
+			return 0;
+		break;
+	}
+
+	return -EFAULT;
+}
+
+/**
+ * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network
+ * @doi_def: the DOI definition
+ * @host_lvl: the host MLS level
+ * @net_lvl: the network/CIPSO MLS level
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS level to the correct
+ * CIPSO level using the given DOI definition.  Returns zero on success,
+ * negative values otherwise.
+ *
+ */
+static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def,
+				 u32 host_lvl,
+				 u32 *net_lvl)
+{
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		*net_lvl = host_lvl;
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		if (host_lvl < doi_def->map.std->lvl.local_size) {
+			*net_lvl = doi_def->map.std->lvl.local[host_lvl];
+			return 0;
+		}
+		break;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host
+ * @doi_def: the DOI definition
+ * @net_lvl: the network/CIPSO MLS level
+ * @host_lvl: the host MLS level
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO level to the correct local MLS
+ * level using the given DOI definition.  Returns zero on success, negative
+ * values otherwise.
+ *
+ */
+static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def,
+				 u32 net_lvl,
+				 u32 *host_lvl)
+{
+	struct cipso_v4_std_map_tbl *map_tbl;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		*host_lvl = net_lvl;
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		map_tbl = doi_def->map.std;
+		if (net_lvl < map_tbl->lvl.cipso_size &&
+		    map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) {
+			*host_lvl = doi_def->map.std->lvl.cipso[net_lvl];
+			return 0;
+		}
+		break;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid
+ * @doi_def: the DOI definition
+ * @bitmap: category bitmap
+ * @bitmap_len: bitmap length in bytes
+ *
+ * Description:
+ * Checks the given category bitmap against the given DOI definition and
+ * returns a negative value if any of the categories in the bitmap do not have
+ * a valid mapping and a zero value if all of the categories are valid.
+ *
+ */
+static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
+				      const unsigned char *bitmap,
+				      u32 bitmap_len)
+{
+	int cat = -1;
+	u32 bitmap_len_bits = bitmap_len * 8;
+	u32 cipso_cat_size = doi_def->map.std->cat.cipso_size;
+	u32 *cipso_array = doi_def->map.std->cat.cipso;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		for (;;) {
+			cat = cipso_v4_bitmap_walk(bitmap,
+						   bitmap_len_bits,
+						   cat + 1,
+						   1);
+			if (cat < 0)
+				break;
+			if (cat >= cipso_cat_size ||
+			    cipso_array[cat] >= CIPSO_V4_INV_CAT)
+				return -EFAULT;
+		}
+
+		if (cat == -1)
+			return 0;
+		break;
+	}
+
+	return -EFAULT;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network
+ * @doi_def: the DOI definition
+ * @host_cat: the category bitmap in host format
+ * @host_cat_len: the length of the host's category bitmap in bytes
+ * @net_cat: the zero'd out category bitmap in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS category bitmap to the
+ * correct CIPSO bitmap using the given DOI definition.  Returns the minimum
+ * size in bytes of the network bitmap on success, negative values otherwise.
+ *
+ */
+static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
+				     const unsigned char *host_cat,
+				     u32 host_cat_len,
+				     unsigned char *net_cat,
+				     u32 net_cat_len)
+{
+	int host_spot = -1;
+	u32 net_spot;
+	u32 net_spot_max = 0;
+	u32 host_clen_bits = host_cat_len * 8;
+	u32 net_clen_bits = net_cat_len * 8;
+	u32 host_cat_size = doi_def->map.std->cat.local_size;
+	u32 *host_cat_array = doi_def->map.std->cat.local;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		net_spot_max = host_cat_len - 1;
+		while (net_spot_max > 0 && host_cat[net_spot_max] == 0)
+			net_spot_max--;
+		if (net_spot_max > net_cat_len)
+			return -EINVAL;
+		memcpy(net_cat, host_cat, net_spot_max);
+		return net_spot_max;
+	case CIPSO_V4_MAP_STD:
+		for (;;) {
+			host_spot = cipso_v4_bitmap_walk(host_cat,
+							 host_clen_bits,
+							 host_spot + 1,
+							 1);
+			if (host_spot < 0)
+				break;
+			if (host_spot >= host_cat_size)
+				return -EPERM;
+
+			net_spot = host_cat_array[host_spot];
+			if (net_spot >= net_clen_bits)
+				return -ENOSPC;
+			cipso_v4_bitmap_setbit(net_cat, net_spot, 1);
+
+			if (net_spot > net_spot_max)
+				net_spot_max = net_spot;
+		}
+
+		if (host_spot == -2)
+			return -EFAULT;
+
+		if (++net_spot_max % 8)
+			return net_spot_max / 8 + 1;
+		return net_spot_max / 8;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host
+ * @doi_def: the DOI definition
+ * @net_cat: the category bitmap in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ * @host_cat: the zero'd out category bitmap in host format
+ * @host_cat_len: the length of the host's category bitmap in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO bitmap to the correct local
+ * MLS category bitmap using the given DOI definition.  Returns the minimum
+ * size in bytes of the host bitmap on success, negative values otherwise.
+ *
+ */
+static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
+				     const unsigned char *net_cat,
+				     u32 net_cat_len,
+				     unsigned char *host_cat,
+				     u32 host_cat_len)
+{
+	u32 host_spot;
+	u32 host_spot_max = 0;
+	int net_spot = -1;
+	u32 net_clen_bits = net_cat_len * 8;
+	u32 host_clen_bits = host_cat_len * 8;
+	u32 net_cat_size = doi_def->map.std->cat.cipso_size;
+	u32 *net_cat_array = doi_def->map.std->cat.cipso;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		if (net_cat_len > host_cat_len)
+			return -EINVAL;
+		memcpy(host_cat, net_cat, net_cat_len);
+		return net_cat_len;
+	case CIPSO_V4_MAP_STD:
+		for (;;) {
+			net_spot = cipso_v4_bitmap_walk(net_cat,
+							net_clen_bits,
+							net_spot + 1,
+							1);
+			if (net_spot < 0)
+				break;
+			if (net_spot >= net_cat_size ||
+			    net_cat_array[net_spot] >= CIPSO_V4_INV_CAT)
+				return -EPERM;
+
+			host_spot = net_cat_array[net_spot];
+			if (host_spot >= host_clen_bits)
+				return -ENOSPC;
+			cipso_v4_bitmap_setbit(host_cat, host_spot, 1);
+
+			if (host_spot > host_spot_max)
+				host_spot_max = host_spot;
+		}
+
+		if (net_spot == -2)
+			return -EFAULT;
+
+		if (++host_spot_max % 8)
+			return host_spot_max / 8 + 1;
+		return host_spot_max / 8;
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * Protocol Handling Functions
+ */
+
+#define CIPSO_V4_HDR_LEN              6
+
+/**
+ * cipso_v4_gentag_hdr - Generate a CIPSO option header
+ * @doi_def: the DOI definition
+ * @len: the total tag length in bytes
+ * @buf: the CIPSO option buffer
+ *
+ * Description:
+ * Write a CIPSO header into the beginning of @buffer.  Return zero on success,
+ * negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
+			       u32 len,
+			       unsigned char *buf)
+{
+	if (CIPSO_V4_HDR_LEN + len > 40)
+		return -ENOSPC;
+
+	buf[0] = IPOPT_CIPSO;
+	buf[1] = CIPSO_V4_HDR_LEN + len;
+	*(u32 *)&buf[2] = htonl(doi_def->doi);
+
+	return 0;
+}
+
+#define CIPSO_V4_TAG1_CAT_LEN         30
+
+/**
+ * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1)
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @buffer: the option buffer
+ * @buffer_len: length of buffer in bytes
+ *
+ * Description:
+ * Generate a CIPSO option using the restricted bitmap tag, tag type #1.  The
+ * actual buffer length may be larger than the indicated size due to
+ * translation between host and network category bitmaps.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
+			       const struct netlbl_lsm_secattr *secattr,
+			       unsigned char **buffer,
+			       u32 *buffer_len)
+{
+	int ret_val = -EPERM;
+	unsigned char *buf = NULL;
+	u32 buf_len;
+	u32 level;
+
+	if (secattr->mls_cat) {
+		buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN,
+			      GFP_ATOMIC);
+		if (buf == NULL)
+			return -ENOMEM;
+
+		ret_val = cipso_v4_map_cat_rbm_hton(doi_def,
+						    secattr->mls_cat,
+						    secattr->mls_cat_len,
+						    &buf[CIPSO_V4_HDR_LEN + 4],
+						    CIPSO_V4_TAG1_CAT_LEN);
+		if (ret_val < 0)
+			goto gentag_failure;
+
+		/* This will send packets using the "optimized" format when
+		 * possibile as specified in  section 3.4.2.6 of the
+		 * CIPSO draft. */
+		if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10))
+			ret_val = 10;
+
+		buf_len = 4 + ret_val;
+	} else {
+		buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC);
+		if (buf == NULL)
+			return -ENOMEM;
+		buf_len = 4;
+	}
+
+	ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level);
+	if (ret_val != 0)
+		goto gentag_failure;
+
+	ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf);
+	if (ret_val != 0)
+		goto gentag_failure;
+
+	buf[CIPSO_V4_HDR_LEN] = 0x01;
+	buf[CIPSO_V4_HDR_LEN + 1] = buf_len;
+	buf[CIPSO_V4_HDR_LEN + 3] = level;
+
+	*buffer = buf;
+	*buffer_len = CIPSO_V4_HDR_LEN + buf_len;
+
+	return 0;
+
+gentag_failure:
+	kfree(buf);
+	return ret_val;
+}
+
+/**
+ * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag
+ * @doi_def: the DOI definition
+ * @tag: the CIPSO tag
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security
+ * attributes in @secattr.  Return zero on success, negatives values on
+ * failure.
+ *
+ */
+static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
+				 const unsigned char *tag,
+				 struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	u8 tag_len = tag[1];
+	u32 level;
+
+	ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
+	if (ret_val != 0)
+		return ret_val;
+	secattr->mls_lvl = level;
+	secattr->mls_lvl_vld = 1;
+
+	if (tag_len > 4) {
+		switch (doi_def->type) {
+		case CIPSO_V4_MAP_PASS:
+			secattr->mls_cat_len = tag_len - 4;
+			break;
+		case CIPSO_V4_MAP_STD:
+			secattr->mls_cat_len =
+				doi_def->map.std->cat.local_size;
+			break;
+		}
+		secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC);
+		if (secattr->mls_cat == NULL)
+			return -ENOMEM;
+
+		ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def,
+						    &tag[4],
+						    tag_len - 4,
+						    secattr->mls_cat,
+						    secattr->mls_cat_len);
+		if (ret_val < 0) {
+			kfree(secattr->mls_cat);
+			return ret_val;
+		}
+		secattr->mls_cat_len = ret_val;
+	}
+
+	return 0;
+}
+
+/**
+ * cipso_v4_validate - Validate a CIPSO option
+ * @option: the start of the option, on error it is set to point to the error
+ *
+ * Description:
+ * This routine is called to validate a CIPSO option, it checks all of the
+ * fields to ensure that they are at least valid, see the draft snippet below
+ * for details.  If the option is valid then a zero value is returned and
+ * the value of @option is unchanged.  If the option is invalid then a
+ * non-zero value is returned and @option is adjusted to point to the
+ * offending portion of the option.  From the IETF draft ...
+ *
+ *  "If any field within the CIPSO options, such as the DOI identifier, is not
+ *   recognized the IP datagram is discarded and an ICMP 'parameter problem'
+ *   (type 12) is generated and returned.  The ICMP code field is set to 'bad
+ *   parameter' (code 0) and the pointer is set to the start of the CIPSO field
+ *   that is unrecognized."
+ *
+ */
+int cipso_v4_validate(unsigned char **option)
+{
+	unsigned char *opt = *option;
+	unsigned char *tag;
+	unsigned char opt_iter;
+	unsigned char err_offset = 0;
+	u8 opt_len;
+	u8 tag_len;
+	struct cipso_v4_doi *doi_def = NULL;
+	u32 tag_iter;
+
+	/* caller already checks for length values that are too large */
+	opt_len = opt[1];
+	if (opt_len < 8) {
+		err_offset = 1;
+		goto validate_return;
+	}
+
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2])));
+	if (doi_def == NULL) {
+		err_offset = 2;
+		goto validate_return_locked;
+	}
+
+	opt_iter = 6;
+	tag = opt + opt_iter;
+	while (opt_iter < opt_len) {
+		for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];)
+			if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID ||
+			    ++tag_iter == CIPSO_V4_TAG_MAXCNT) {
+				err_offset = opt_iter;
+				goto validate_return_locked;
+			}
+
+		tag_len = tag[1];
+		if (tag_len > (opt_len - opt_iter)) {
+			err_offset = opt_iter + 1;
+			goto validate_return_locked;
+		}
+
+		switch (tag[0]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			if (tag_len < 4) {
+				err_offset = opt_iter + 1;
+				goto validate_return_locked;
+			}
+
+			/* We are already going to do all the verification
+			 * necessary at the socket layer so from our point of
+			 * view it is safe to turn these checks off (and less
+			 * work), however, the CIPSO draft says we should do
+			 * all the CIPSO validations here but it doesn't
+			 * really specify _exactly_ what we need to validate
+			 * ... so, just make it a sysctl tunable. */
+			if (cipso_v4_rbm_strictvalid) {
+				if (cipso_v4_map_lvl_valid(doi_def,
+							   tag[3]) < 0) {
+					err_offset = opt_iter + 3;
+					goto validate_return_locked;
+				}
+				if (tag_len > 4 &&
+				    cipso_v4_map_cat_rbm_valid(doi_def,
+							    &tag[4],
+							    tag_len - 4) < 0) {
+					err_offset = opt_iter + 4;
+					goto validate_return_locked;
+				}
+			}
+			break;
+		default:
+			err_offset = opt_iter;
+			goto validate_return_locked;
+		}
+
+		tag += tag_len;
+		opt_iter += tag_len;
+	}
+
+validate_return_locked:
+	rcu_read_unlock();
+validate_return:
+	*option = opt + err_offset;
+	return err_offset;
+}
+
+/**
+ * cipso_v4_error - Send the correct reponse for a bad packet
+ * @skb: the packet
+ * @error: the error code
+ * @gateway: CIPSO gateway flag
+ *
+ * Description:
+ * Based on the error code given in @error, send an ICMP error message back to
+ * the originating host.  From the IETF draft ...
+ *
+ *  "If the contents of the CIPSO [option] are valid but the security label is
+ *   outside of the configured host or port label range, the datagram is
+ *   discarded and an ICMP 'destination unreachable' (type 3) is generated and
+ *   returned.  The code field of the ICMP is set to 'communication with
+ *   destination network administratively prohibited' (code 9) or to
+ *   'communication with destination host administratively prohibited'
+ *   (code 10).  The value of the code is dependent on whether the originator
+ *   of the ICMP message is acting as a CIPSO host or a CIPSO gateway.  The
+ *   recipient of the ICMP message MUST be able to handle either value.  The
+ *   same procedure is performed if a CIPSO [option] can not be added to an
+ *   IP packet because it is too large to fit in the IP options area."
+ *
+ *  "If the error is triggered by receipt of an ICMP message, the message is
+ *   discarded and no response is permitted (consistent with general ICMP
+ *   processing rules)."
+ *
+ */
+void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
+{
+	if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES)
+		return;
+
+	if (gateway)
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+	else
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+}
+
+/**
+ * cipso_v4_socket_setattr - Add a CIPSO option to a socket
+ * @sock: the socket
+ * @doi_def: the CIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Set the CIPSO option on the given socket using the DOI definition and
+ * security attributes passed to the function.  This function requires
+ * exclusive access to @sock->sk, which means it either needs to be in the
+ * process of being created or locked via lock_sock(sock->sk).  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_setattr(const struct socket *sock,
+			    const struct cipso_v4_doi *doi_def,
+			    const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -EPERM;
+	u32 iter;
+	unsigned char *buf = NULL;
+	u32 buf_len = 0;
+	u32 opt_len;
+	struct ip_options *opt = NULL;
+	struct sock *sk;
+	struct inet_sock *sk_inet;
+	struct inet_connection_sock *sk_conn;
+
+	/* In the case of sock_create_lite(), the sock->sk field is not
+	 * defined yet but it is not a problem as the only users of these
+	 * "lite" PF_INET sockets are functions which do an accept() call
+	 * afterwards so we will label the socket as part of the accept(). */
+	sk = sock->sk;
+	if (sk == NULL)
+		return 0;
+
+	/* XXX - This code assumes only one tag per CIPSO option which isn't
+	 * really a good assumption to make but since we only support the MAC
+	 * tags right now it is a safe assumption. */
+	iter = 0;
+	do {
+		switch (doi_def->tags[iter]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			ret_val = cipso_v4_gentag_rbm(doi_def,
+						      secattr,
+						      &buf,
+						      &buf_len);
+			break;
+		default:
+			ret_val = -EPERM;
+			goto socket_setattr_failure;
+		}
+
+		iter++;
+	} while (ret_val != 0 &&
+		 iter < CIPSO_V4_TAG_MAXCNT &&
+		 doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
+	if (ret_val != 0)
+		goto socket_setattr_failure;
+
+	/* We can't use ip_options_get() directly because it makes a call to
+	 * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
+	 * we can't block here. */
+	opt_len = (buf_len + 3) & ~3;
+	opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC);
+	if (opt == NULL) {
+		ret_val = -ENOMEM;
+		goto socket_setattr_failure;
+	}
+	memcpy(opt->__data, buf, buf_len);
+	opt->optlen = opt_len;
+	opt->is_data = 1;
+	kfree(buf);
+	buf = NULL;
+	ret_val = ip_options_compile(opt, NULL);
+	if (ret_val != 0)
+		goto socket_setattr_failure;
+
+	sk_inet = inet_sk(sk);
+	if (sk_inet->is_icsk) {
+		sk_conn = inet_csk(sk);
+		if (sk_inet->opt)
+			sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen;
+		sk_conn->icsk_ext_hdr_len += opt->optlen;
+		sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+	}
+	opt = xchg(&sk_inet->opt, opt);
+	kfree(opt);
+
+	return 0;
+
+socket_setattr_failure:
+	kfree(buf);
+	kfree(opt);
+	return ret_val;
+}
+
+/**
+ * cipso_v4_socket_getattr - Get the security attributes from a socket
+ * @sock: the socket
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Query @sock to see if there is a CIPSO option attached to the socket and if
+ * there is return the CIPSO security attributes in @secattr.  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_getattr(const struct socket *sock,
+			    struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -ENOMSG;
+	struct sock *sk;
+	struct inet_sock *sk_inet;
+	unsigned char *cipso_ptr;
+	u32 doi;
+	struct cipso_v4_doi *doi_def;
+
+	sk = sock->sk;
+	lock_sock(sk);
+	sk_inet = inet_sk(sk);
+	if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0)
+		goto socket_getattr_return;
+	cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso -
+		sizeof(struct iphdr);
+	ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr);
+	if (ret_val == 0)
+		goto socket_getattr_return;
+
+	doi = ntohl(*(u32 *)&cipso_ptr[2]);
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_getdef(doi);
+	if (doi_def == NULL) {
+		rcu_read_unlock();
+		goto socket_getattr_return;
+	}
+	switch (cipso_ptr[6]) {
+	case CIPSO_V4_TAG_RBITMAP:
+		ret_val = cipso_v4_parsetag_rbm(doi_def,
+						&cipso_ptr[6],
+						secattr);
+		break;
+	}
+	rcu_read_unlock();
+
+socket_getattr_return:
+	release_sock(sk);
+	return ret_val;
+}
+
+/**
+ * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse the given packet's CIPSO option and return the security attributes.
+ * Returns zero on success and negative values on failure.
+ *
+ */
+int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+			    struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -ENOMSG;
+	unsigned char *cipso_ptr;
+	u32 doi;
+	struct cipso_v4_doi *doi_def;
+
+	if (!CIPSO_V4_OPTEXIST(skb))
+		return -ENOMSG;
+	cipso_ptr = CIPSO_V4_OPTPTR(skb);
+	if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0)
+		return 0;
+
+	doi = ntohl(*(u32 *)&cipso_ptr[2]);
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_getdef(doi);
+	if (doi_def == NULL)
+		goto skbuff_getattr_return;
+	switch (cipso_ptr[6]) {
+	case CIPSO_V4_TAG_RBITMAP:
+		ret_val = cipso_v4_parsetag_rbm(doi_def,
+						&cipso_ptr[6],
+						secattr);
+		break;
+	}
+
+skbuff_getattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
+/*
+ * Setup Functions
+ */
+
+/**
+ * cipso_v4_init - Initialize the CIPSO module
+ *
+ * Description:
+ * Initialize the CIPSO module and prepare it for use.  Returns zero on success
+ * and negative values on failure.
+ *
+ */
+static int __init cipso_v4_init(void)
+{
+	int ret_val;
+
+	ret_val = cipso_v4_cache_init();
+	if (ret_val != 0)
+		panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n",
+		      ret_val);
+
+	return 0;
+}
+
+subsys_initcall(cipso_v4_init);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 70cea9d08a38f..19b2071ff319b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -17,6 +17,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/tcp.h>
+#include <net/cipso_ipv4.h>
 
 /* From af_inet.c */
 extern int sysctl_ip_nonlocal_bind;
@@ -697,6 +698,40 @@ ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+#ifdef CONFIG_NETLABEL
+	{
+		.ctl_name	= NET_CIPSOV4_CACHE_ENABLE,
+		.procname	= "cipso_cache_enable",
+		.data		= &cipso_v4_cache_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CIPSOV4_CACHE_BUCKET_SIZE,
+		.procname	= "cipso_cache_bucket_size",
+		.data		= &cipso_v4_cache_bucketsize,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CIPSOV4_RBM_OPTFMT,
+		.procname	= "cipso_rbm_optfmt",
+		.data		= &cipso_v4_rbm_optfmt,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CIPSOV4_RBM_STRICTVALID,
+		.procname	= "cipso_rbm_strictvalid",
+		.data		= &cipso_v4_rbm_strictvalid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif /* CONFIG_NETLABEL */
 	{ .ctl_name = 0 }
 };
 
-- 
GitLab


From d15c345fe3b8dfda0fa5a1d2143a35fffa746a43 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Thu, 3 Aug 2006 16:48:37 -0700
Subject: [PATCH 0391/1063] [NetLabel]: core NetLabel subsystem

Add a new kernel subsystem, NetLabel, to provide explicit packet
labeling services (CIPSO, RIPSO, etc.) to LSM developers.  NetLabel is
designed to work in conjunction with a LSM to intercept and decode
security labels on incoming network packets as well as ensure that
outgoing network packets are labeled according to the security
mechanism employed by the LSM.  The NetLabel subsystem is configured
through a Generic NETLINK interface described in the header files
included in this patch.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Makefile                       |   1 +
 net/netlabel/Kconfig               |  14 +
 net/netlabel/Makefile              |  16 +
 net/netlabel/netlabel_cipso_v4.h   | 217 ++++++++++
 net/netlabel/netlabel_domainhash.c | 513 ++++++++++++++++++++++++
 net/netlabel/netlabel_domainhash.h |  63 +++
 net/netlabel/netlabel_kapi.c       | 231 +++++++++++
 net/netlabel/netlabel_mgmt.c       | 624 +++++++++++++++++++++++++++++
 net/netlabel/netlabel_mgmt.h       | 246 ++++++++++++
 net/netlabel/netlabel_unlabeled.h  |  98 +++++
 net/netlabel/netlabel_user.c       | 158 ++++++++
 net/netlabel/netlabel_user.h       | 214 ++++++++++
 12 files changed, 2395 insertions(+)
 create mode 100644 net/netlabel/Kconfig
 create mode 100644 net/netlabel/Makefile
 create mode 100644 net/netlabel/netlabel_cipso_v4.h
 create mode 100644 net/netlabel/netlabel_domainhash.c
 create mode 100644 net/netlabel/netlabel_domainhash.h
 create mode 100644 net/netlabel/netlabel_kapi.c
 create mode 100644 net/netlabel/netlabel_mgmt.c
 create mode 100644 net/netlabel/netlabel_mgmt.h
 create mode 100644 net/netlabel/netlabel_unlabeled.h
 create mode 100644 net/netlabel/netlabel_user.c
 create mode 100644 net/netlabel/netlabel_user.h

diff --git a/net/Makefile b/net/Makefile
index 065796f5fb177..ad4d14f4bb296 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_IP_DCCP)		+= dccp/
 obj-$(CONFIG_IP_SCTP)		+= sctp/
 obj-$(CONFIG_IEEE80211)		+= ieee80211/
 obj-$(CONFIG_TIPC)		+= tipc/
+obj-$(CONFIG_NETLABEL)		+= netlabel/
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
new file mode 100644
index 0000000000000..fe23cb7f1e878
--- /dev/null
+++ b/net/netlabel/Kconfig
@@ -0,0 +1,14 @@
+#
+# NetLabel configuration
+#
+
+config NETLABEL
+	bool "NetLabel subsystem support"
+	depends on NET && SECURITY
+	default n
+	---help---
+	  NetLabel provides support for explicit network packet labeling
+	  protocols such as CIPSO and RIPSO.  For more information see
+	  Documentation/netlabel.
+
+	  If you are unsure, say N.
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
new file mode 100644
index 0000000000000..8af18c0a47d90
--- /dev/null
+++ b/net/netlabel/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for the NetLabel subsystem.
+#
+# Feb 9, 2006, Paul Moore <paul.moore@hp.com>
+#
+
+# base objects
+obj-y	:= netlabel_user.o netlabel_kapi.o netlabel_domainhash.o
+
+# management objects
+obj-y	+= netlabel_mgmt.o
+
+# protocol modules
+obj-y	+= netlabel_unlabeled.o
+obj-y	+= netlabel_cipso_v4.o
+
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
new file mode 100644
index 0000000000000..4c6ff4b93004a
--- /dev/null
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -0,0 +1,217 @@
+/*
+ * NetLabel CIPSO/IPv4 Support
+ *
+ * This file defines the CIPSO/IPv4 functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_CIPSO_V4
+#define _NETLABEL_CIPSO_V4
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the CIPSO subsystem, all
+ * of which are preceeded by the nlmsghdr struct.
+ *
+ * o ACK:
+ *   Sent by the kernel in response to an applications message, applications
+ *   should never send this message.
+ *
+ *   +----------------------+-----------------------+
+ *   | seq number (32 bits) | return code (32 bits) |
+ *   +----------------------+-----------------------+
+ *
+ *     seq number:  the sequence number of the original message, taken from the
+ *                  nlmsghdr structure
+ *     return code: return value, based on errno values
+ *
+ * o ADD:
+ *   Sent by an application to add a new DOI mapping table, after completion
+ *   of the task the kernel should ACK this message.
+ *
+ *   +---------------+--------------------+---------------------+
+ *   | DOI (32 bits) | map type (32 bits) | tag count (32 bits) | ...
+ *   +---------------+--------------------+---------------------+
+ *
+ *   +-----------------+
+ *   | tag #X (8 bits) | ... repeated
+ *   +-----------------+
+ *
+ *   +-------------- ---- --- -- -
+ *   | mapping data
+ *   +-------------- ---- --- -- -
+ *
+ *     DOI:          the DOI value
+ *     map type:     the mapping table type (defined in the cipso_ipv4.h header
+ *                   as CIPSO_V4_MAP_*)
+ *     tag count:    the number of tags, must be greater than zero
+ *     tag:          the CIPSO tag for the DOI, tags listed first are given
+ *                   higher priorirty when sending packets
+ *     mapping data: specific to the map type (see below)
+ *
+ *   CIPSO_V4_MAP_STD
+ *
+ *   +------------------+-----------------------+----------------------+
+ *   | levels (32 bits) | max l level (32 bits) | max r level (8 bits) | ...
+ *   +------------------+-----------------------+----------------------+
+ *
+ *   +----------------------+---------------------+---------------------+
+ *   | categories (32 bits) | max l cat (32 bits) | max r cat (16 bits) | ...
+ *   +----------------------+---------------------+---------------------+
+ *
+ *   +--------------------------+-------------------------+
+ *   | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
+ *   +--------------------------+-------------------------+
+ *
+ *   +-----------------------------+-----------------------------+
+ *   | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
+ *   +-----------------------------+-----------------------------+
+ *
+ *     levels:         the number of level mappings
+ *     max l level:    the highest local level
+ *     max r level:    the highest remote/CIPSO level
+ *     categories:     the number of category mappings
+ *     max l cat:      the highest local category
+ *     max r cat:      the highest remote/CIPSO category
+ *     local level:    the local part of a level mapping
+ *     CIPSO level:    the remote/CIPSO part of a level mapping
+ *     local category: the local part of a category mapping
+ *     CIPSO category: the remote/CIPSO part of a category mapping
+ *
+ *   CIPSO_V4_MAP_PASS
+ *
+ *   No mapping data is needed for this map type.
+ *
+ * o REMOVE:
+ *   Sent by an application to remove a specific DOI mapping table from the
+ *   CIPSO V4 system.  The kernel should ACK this message.
+ *
+ *   +---------------+
+ *   | DOI (32 bits) |
+ *   +---------------+
+ *
+ *     DOI:          the DOI value
+ *
+ * o LIST:
+ *   Sent by an application to list the details of a DOI definition.  The
+ *   kernel should send an ACK on error or a response as indicated below.  The
+ *   application generated message format is shown below.
+ *
+ *   +---------------+
+ *   | DOI (32 bits) |
+ *   +---------------+
+ *
+ *     DOI:          the DOI value
+ *
+ *   The valid response message format depends on the type of the DOI mapping,
+ *   the known formats are shown below.
+ *
+ *   +--------------------+
+ *   | map type (32 bits) | ...
+ *   +--------------------+
+ *
+ *     map type:       the DOI mapping table type (defined in the cipso_ipv4.h
+ *                     header as CIPSO_V4_MAP_*)
+ *
+ *   (map type == CIPSO_V4_MAP_STD)
+ *
+ *   +----------------+------------------+----------------------+
+ *   | tags (32 bits) | levels (32 bits) | categories (32 bits) | ...
+ *   +----------------+------------------+----------------------+
+ *
+ *   +-----------------+
+ *   | tag #X (8 bits) | ... repeated
+ *   +-----------------+
+ *
+ *   +--------------------------+-------------------------+
+ *   | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
+ *   +--------------------------+-------------------------+
+ *
+ *   +-----------------------------+-----------------------------+
+ *   | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
+ *   +-----------------------------+-----------------------------+
+ *
+ *     tags:           the number of CIPSO tag types
+ *     levels:         the number of level mappings
+ *     categories:     the number of category mappings
+ *     tag:            the tag number, tags listed first are given higher
+ *                     priority when sending packets
+ *     local level:    the local part of a level mapping
+ *     CIPSO level:    the remote/CIPSO part of a level mapping
+ *     local category: the local part of a category mapping
+ *     CIPSO category: the remote/CIPSO part of a category mapping
+ *
+ *   (map type == CIPSO_V4_MAP_PASS)
+ *
+ *   +----------------+
+ *   | tags (32 bits) | ...
+ *   +----------------+
+ *
+ *   +-----------------+
+ *   | tag #X (8 bits) | ... repeated
+ *   +-----------------+
+ *
+ *     tags:           the number of CIPSO tag types
+ *     tag:            the tag number, tags listed first are given higher
+ *                     priority when sending packets
+ *
+ * o LISTALL:
+ *   This message is sent by an application to list the valid DOIs on the
+ *   system.  There is no payload and the kernel should respond with an ACK
+ *   or the following message.
+ *
+ *   +---------------------+------------------+-----------------------+
+ *   | DOI count (32 bits) | DOI #X (32 bits) | map type #X (32 bits) |
+ *   +---------------------+------------------+-----------------------+
+ *
+ *   +-----------------------+
+ *   | map type #X (32 bits) | ...
+ *   +-----------------------+
+ *
+ *     DOI count:      the number of DOIs
+ *     DOI:            the DOI value
+ *     map type:       the DOI mapping table type (defined in the cipso_ipv4.h
+ *                     header as CIPSO_V4_MAP_*)
+ *
+ */
+
+/* NetLabel CIPSOv4 commands */
+enum {
+	NLBL_CIPSOV4_C_UNSPEC,
+	NLBL_CIPSOV4_C_ACK,
+	NLBL_CIPSOV4_C_ADD,
+	NLBL_CIPSOV4_C_REMOVE,
+	NLBL_CIPSOV4_C_LIST,
+	NLBL_CIPSOV4_C_LISTALL,
+	__NLBL_CIPSOV4_C_MAX,
+};
+#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_cipsov4_genl_init(void);
+
+#endif
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
new file mode 100644
index 0000000000000..5bb3fad4a1159
--- /dev/null
+++ b/net/netlabel/netlabel_domainhash.c
@@ -0,0 +1,513 @@
+/*
+ * NetLabel Domain Hash Table
+ *
+ * This file manages the domain hash table that NetLabel uses to determine
+ * which network labeling protocol to use for a given domain.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+#include "netlabel_mgmt.h"
+#include "netlabel_domainhash.h"
+
+struct netlbl_domhsh_tbl {
+	struct list_head *tbl;
+	u32 size;
+};
+
+/* Domain hash table */
+/* XXX - updates should be so rare that having one spinlock for the entire
+ * hash table should be okay */
+DEFINE_SPINLOCK(netlbl_domhsh_lock);
+static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL;
+
+/* Default domain mapping */
+DEFINE_SPINLOCK(netlbl_domhsh_def_lock);
+static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
+
+/*
+ * Domain Hash Table Helper Functions
+ */
+
+/**
+ * netlbl_domhsh_free_entry - Frees a domain hash table entry
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to a hash table entry can be released
+ * safely.
+ *
+ */
+static void netlbl_domhsh_free_entry(struct rcu_head *entry)
+{
+	struct netlbl_dom_map *ptr;
+
+	ptr = container_of(entry, struct netlbl_dom_map, rcu);
+	kfree(ptr->domain);
+	kfree(ptr);
+}
+
+/**
+ * netlbl_domhsh_hash - Hashing function for the domain hash table
+ * @domain: the domain name to hash
+ *
+ * Description:
+ * This is the hashing function for the domain hash table, it returns the
+ * correct bucket number for the domain.  The caller is responsibile for
+ * calling the rcu_read_[un]lock() functions.
+ *
+ */
+static u32 netlbl_domhsh_hash(const char *key)
+{
+	u32 iter;
+	u32 val;
+	u32 len;
+
+	/* This is taken (with slight modification) from
+	 * security/selinux/ss/symtab.c:symhash() */
+
+	for (iter = 0, val = 0, len = strlen(key); iter < len; iter++)
+		val = (val << 4 | (val >> (8 * sizeof(u32) - 4))) ^ key[iter];
+	return val & (rcu_dereference(netlbl_domhsh)->size - 1);
+}
+
+/**
+ * netlbl_domhsh_search - Search for a domain entry
+ * @domain: the domain
+ * @def: return default if no match is found
+ *
+ * Description:
+ * Searches the domain hash table and returns a pointer to the hash table
+ * entry if found, otherwise NULL is returned.  If @def is non-zero and a
+ * match is not found in the domain hash table the default mapping is returned
+ * if it exists.  The caller is responsibile for the rcu hash table locks
+ * (i.e. the caller much call rcu_read_[un]lock()).
+ *
+ */
+static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def)
+{
+	u32 bkt;
+	struct netlbl_dom_map *iter;
+
+	if (domain != NULL) {
+		bkt = netlbl_domhsh_hash(domain);
+		list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list)
+			if (iter->valid && strcmp(iter->domain, domain) == 0)
+				return iter;
+	}
+
+	if (def != 0) {
+		iter = rcu_dereference(netlbl_domhsh_def);
+		if (iter != NULL && iter->valid)
+			return iter;
+	}
+
+	return NULL;
+}
+
+/*
+ * Domain Hash Table Functions
+ */
+
+/**
+ * netlbl_domhsh_init - Init for the domain hash
+ * @size: the number of bits to use for the hash buckets
+ *
+ * Description:
+ * Initializes the domain hash table, should be called only by
+ * netlbl_user_init() during initialization.  Returns zero on success, non-zero
+ * values on error.
+ *
+ */
+int netlbl_domhsh_init(u32 size)
+{
+	u32 iter;
+	struct netlbl_domhsh_tbl *hsh_tbl;
+
+	if (size == 0)
+		return -EINVAL;
+
+	hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL);
+	if (hsh_tbl == NULL)
+		return -ENOMEM;
+	hsh_tbl->size = 1 << size;
+	hsh_tbl->tbl = kcalloc(hsh_tbl->size,
+			       sizeof(struct list_head),
+			       GFP_KERNEL);
+	if (hsh_tbl->tbl == NULL) {
+		kfree(hsh_tbl);
+		return -ENOMEM;
+	}
+	for (iter = 0; iter < hsh_tbl->size; iter++)
+		INIT_LIST_HEAD(&hsh_tbl->tbl[iter]);
+
+	rcu_read_lock();
+	spin_lock(&netlbl_domhsh_lock);
+	rcu_assign_pointer(netlbl_domhsh, hsh_tbl);
+	spin_unlock(&netlbl_domhsh_lock);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * netlbl_domhsh_add - Adds a entry to the domain hash table
+ * @entry: the entry to add
+ *
+ * Description:
+ * Adds a new entry to the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_add(struct netlbl_dom_map *entry)
+{
+	int ret_val;
+	u32 bkt;
+
+	switch (entry->type) {
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = 0;
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4,
+						  entry->domain);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (ret_val != 0)
+		return ret_val;
+
+	entry->valid = 1;
+	INIT_RCU_HEAD(&entry->rcu);
+
+	ret_val = 0;
+	rcu_read_lock();
+	if (entry->domain != NULL) {
+		bkt = netlbl_domhsh_hash(entry->domain);
+		spin_lock(&netlbl_domhsh_lock);
+		if (netlbl_domhsh_search(entry->domain, 0) == NULL)
+			list_add_tail_rcu(&entry->list,
+					  &netlbl_domhsh->tbl[bkt]);
+		else
+			ret_val = -EEXIST;
+		spin_unlock(&netlbl_domhsh_lock);
+	} else if (entry->domain == NULL) {
+		INIT_LIST_HEAD(&entry->list);
+		spin_lock(&netlbl_domhsh_def_lock);
+		if (rcu_dereference(netlbl_domhsh_def) == NULL)
+			rcu_assign_pointer(netlbl_domhsh_def, entry);
+		else
+			ret_val = -EEXIST;
+		spin_unlock(&netlbl_domhsh_def_lock);
+	} else
+		ret_val = -EINVAL;
+	rcu_read_unlock();
+
+	if (ret_val != 0) {
+		switch (entry->type) {
+		case NETLBL_NLTYPE_CIPSOV4:
+			if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
+						       entry->domain) != 0)
+				BUG();
+			break;
+		}
+	}
+
+	return ret_val;
+}
+
+/**
+ * netlbl_domhsh_add_default - Adds the default entry to the domain hash table
+ * @entry: the entry to add
+ *
+ * Description:
+ * Adds a new default entry to the domain hash table and handles any updates
+ * to the lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry)
+{
+	return netlbl_domhsh_add(entry);
+}
+
+/**
+ * netlbl_domhsh_remove - Removes an entry from the domain hash table
+ * @domain: the domain to remove
+ *
+ * Description:
+ * Removes an entry from the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_remove(const char *domain)
+{
+	int ret_val = -ENOENT;
+	struct netlbl_dom_map *entry;
+
+	rcu_read_lock();
+	if (domain != NULL)
+		entry = netlbl_domhsh_search(domain, 0);
+	else
+		entry = netlbl_domhsh_search(domain, 1);
+	if (entry == NULL)
+		goto remove_return;
+	switch (entry->type) {
+	case NETLBL_NLTYPE_UNLABELED:
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
+						     entry->domain);
+		if (ret_val != 0)
+			goto remove_return;
+		break;
+	}
+	ret_val = 0;
+	if (entry != rcu_dereference(netlbl_domhsh_def)) {
+		spin_lock(&netlbl_domhsh_lock);
+		if (entry->valid) {
+			entry->valid = 0;
+			list_del_rcu(&entry->list);
+		} else
+			ret_val = -ENOENT;
+		spin_unlock(&netlbl_domhsh_lock);
+	} else {
+		spin_lock(&netlbl_domhsh_def_lock);
+		if (entry->valid) {
+			entry->valid = 0;
+			rcu_assign_pointer(netlbl_domhsh_def, NULL);
+		} else
+			ret_val = -ENOENT;
+		spin_unlock(&netlbl_domhsh_def_lock);
+	}
+	if (ret_val == 0)
+		call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
+
+remove_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
+/**
+ * netlbl_domhsh_remove_default - Removes the default entry from the table
+ *
+ * Description:
+ * Removes/resets the default entry for the domain hash table and handles any
+ * updates to the lower level protocol handler (i.e. CIPSO).  Returns zero on
+ * success, non-zero on failure.
+ *
+ */
+int netlbl_domhsh_remove_default(void)
+{
+	return netlbl_domhsh_remove(NULL);
+}
+
+/**
+ * netlbl_domhsh_getentry - Get an entry from the domain hash table
+ * @domain: the domain name to search for
+ *
+ * Description:
+ * Look through the domain hash table searching for an entry to match @domain,
+ * return a pointer to a copy of the entry or NULL.  The caller is responsibile
+ * for ensuring that rcu_read_[un]lock() is called.
+ *
+ */
+struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
+{
+	return netlbl_domhsh_search(domain, 1);
+}
+
+/**
+ * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff
+ *
+ * Description:
+ * Dump the domain hash table into a buffer suitable for returning to an
+ * application in response to a NetLabel management DOMAIN message.  This
+ * function may fail if another process is growing the hash table at the same
+ * time.  The returned sk_buff has room at the front of the sk_buff for
+ * @headroom bytes.  See netlabel.h for the DOMAIN message format.  Returns a
+ * pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *netlbl_domhsh_dump(size_t headroom)
+{
+	struct sk_buff *skb = NULL;
+	ssize_t buf_len;
+	u32 bkt_iter;
+	u32 dom_cnt = 0;
+	struct netlbl_domhsh_tbl *hsh_tbl;
+	struct netlbl_dom_map *list_iter;
+	ssize_t tmp_len;
+
+	buf_len = NETLBL_LEN_U32;
+	rcu_read_lock();
+	hsh_tbl = rcu_dereference(netlbl_domhsh);
+	for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
+		list_for_each_entry_rcu(list_iter,
+					&hsh_tbl->tbl[bkt_iter], list) {
+			buf_len += NETLBL_LEN_U32 +
+				nla_total_size(strlen(list_iter->domain) + 1);
+			switch (list_iter->type) {
+			case NETLBL_NLTYPE_UNLABELED:
+				break;
+			case NETLBL_NLTYPE_CIPSOV4:
+				buf_len += 2 * NETLBL_LEN_U32;
+				break;
+			}
+			dom_cnt++;
+		}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto dump_failure;
+
+	if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0)
+		goto dump_failure;
+	buf_len -= NETLBL_LEN_U32;
+	hsh_tbl = rcu_dereference(netlbl_domhsh);
+	for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
+		list_for_each_entry_rcu(list_iter,
+					&hsh_tbl->tbl[bkt_iter], list) {
+			tmp_len = nla_total_size(strlen(list_iter->domain) +
+						 1);
+			if (buf_len < NETLBL_LEN_U32 + tmp_len)
+				goto dump_failure;
+			if (nla_put_string(skb,
+					   NLA_STRING,
+					   list_iter->domain) != 0)
+				goto dump_failure;
+			if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0)
+				goto dump_failure;
+			buf_len -= NETLBL_LEN_U32 + tmp_len;
+			switch (list_iter->type) {
+			case NETLBL_NLTYPE_UNLABELED:
+				break;
+			case NETLBL_NLTYPE_CIPSOV4:
+				if (buf_len < 2 * NETLBL_LEN_U32)
+					goto dump_failure;
+				if (nla_put_u32(skb,
+				       NLA_U32,
+				       list_iter->type_def.cipsov4->type) != 0)
+					goto dump_failure;
+				if (nla_put_u32(skb,
+				       NLA_U32,
+				       list_iter->type_def.cipsov4->doi) != 0)
+					goto dump_failure;
+				buf_len -= 2 * NETLBL_LEN_U32;
+				break;
+			}
+		}
+	rcu_read_unlock();
+
+	return skb;
+
+dump_failure:
+	rcu_read_unlock();
+	kfree_skb(skb);
+	return NULL;
+}
+
+/**
+ * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff
+ *
+ * Description:
+ * Dump the default domain mapping into a buffer suitable for returning to an
+ * application in response to a NetLabel management DEFDOMAIN message.  This
+ * function may fail if another process is changing the default domain mapping
+ * at the same time.  The returned sk_buff has room at the front of the
+ * skb_buff for @headroom bytes.  See netlabel.h for the DEFDOMAIN message
+ * format.  Returns a pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *netlbl_domhsh_dump_default(size_t headroom)
+{
+	struct sk_buff *skb;
+	ssize_t buf_len;
+	struct netlbl_dom_map *entry;
+
+	buf_len = NETLBL_LEN_U32;
+	rcu_read_lock();
+	entry = rcu_dereference(netlbl_domhsh_def);
+	if (entry != NULL)
+		switch (entry->type) {
+		case NETLBL_NLTYPE_UNLABELED:
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			buf_len += 2 * NETLBL_LEN_U32;
+			break;
+		}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto dump_default_failure;
+
+	if (entry != rcu_dereference(netlbl_domhsh_def))
+		goto dump_default_failure;
+	if (entry != NULL) {
+		if (nla_put_u32(skb, NLA_U32, entry->type) != 0)
+			goto dump_default_failure;
+		buf_len -= NETLBL_LEN_U32;
+		switch (entry->type) {
+		case NETLBL_NLTYPE_UNLABELED:
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			if (buf_len < 2 * NETLBL_LEN_U32)
+				goto dump_default_failure;
+			if (nla_put_u32(skb,
+					NLA_U32,
+					entry->type_def.cipsov4->type) != 0)
+				goto dump_default_failure;
+			if (nla_put_u32(skb,
+					NLA_U32,
+					entry->type_def.cipsov4->doi) != 0)
+				goto dump_default_failure;
+			buf_len -= 2 * NETLBL_LEN_U32;
+			break;
+		}
+	} else
+		nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE);
+	rcu_read_unlock();
+
+	return skb;
+
+dump_default_failure:
+	rcu_read_unlock();
+	kfree_skb(skb);
+	return NULL;
+}
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
new file mode 100644
index 0000000000000..9217863ce0d3a
--- /dev/null
+++ b/net/netlabel/netlabel_domainhash.h
@@ -0,0 +1,63 @@
+/*
+ * NetLabel Domain Hash Table
+ *
+ * This file manages the domain hash table that NetLabel uses to determine
+ * which network labeling protocol to use for a given domain.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_DOMAINHASH_H
+#define _NETLABEL_DOMAINHASH_H
+
+/* Domain hash table size */
+/* XXX - currently this number is an uneducated guess */
+#define NETLBL_DOMHSH_BITSIZE       7
+
+/* Domain mapping definition struct */
+struct netlbl_dom_map {
+	char *domain;
+	u32 type;
+	union {
+		struct cipso_v4_doi *cipsov4;
+	} type_def;
+
+	u32 valid;
+	struct list_head list;
+	struct rcu_head rcu;
+};
+
+/* init function */
+int netlbl_domhsh_init(u32 size);
+
+/* Manipulate the domain hash table */
+int netlbl_domhsh_add(struct netlbl_dom_map *entry);
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry);
+int netlbl_domhsh_remove_default(void);
+struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
+struct sk_buff *netlbl_domhsh_dump(size_t headroom);
+struct sk_buff *netlbl_domhsh_dump_default(size_t headroom);
+
+#endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
new file mode 100644
index 0000000000000..0fd8aaafe23f7
--- /dev/null
+++ b/net/netlabel/netlabel_kapi.c
@@ -0,0 +1,231 @@
+/*
+ * NetLabel Kernel API
+ *
+ * This file defines the kernel API for the NetLabel system.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <net/ip.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+#include "netlabel_domainhash.h"
+#include "netlabel_unlabeled.h"
+#include "netlabel_user.h"
+
+/*
+ * LSM Functions
+ */
+
+/**
+ * netlbl_socket_setattr - Label a socket using the correct protocol
+ * @sock: the socket to label
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given socket using the security attributes
+ * specified in @secattr.  This function requires exclusive access to
+ * @sock->sk, which means it either needs to be in the process of being
+ * created or locked via lock_sock(sock->sk).  Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int netlbl_socket_setattr(const struct socket *sock,
+			  const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -ENOENT;
+	struct netlbl_dom_map *dom_entry;
+
+	rcu_read_lock();
+	dom_entry = netlbl_domhsh_getentry(secattr->domain);
+	if (dom_entry == NULL)
+		goto socket_setattr_return;
+	switch (dom_entry->type) {
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = cipso_v4_socket_setattr(sock,
+						  dom_entry->type_def.cipsov4,
+						  secattr);
+		break;
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = 0;
+		break;
+	default:
+		ret_val = -ENOENT;
+	}
+
+socket_setattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
+/**
+ * netlbl_socket_getattr - Determine the security attributes of a socket
+ * @sock: the socket
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Examines the given socket to see any NetLabel style labeling has been
+ * applied to the socket, if so it parses the socket label and returns the
+ * security attributes in @secattr.  Returns zero on success, negative values
+ * on failure.
+ *
+ */
+int netlbl_socket_getattr(const struct socket *sock,
+			  struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+
+	ret_val = cipso_v4_socket_getattr(sock, secattr);
+	if (ret_val == 0)
+		return 0;
+
+	return netlbl_unlabel_getattr(secattr);
+}
+
+/**
+ * netlbl_skbuff_getattr - Determine the security attributes of a packet
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Examines the given packet to see if a recognized form of packet labeling
+ * is present, if so it parses the packet label and returns the security
+ * attributes in @secattr.  Returns zero on success, negative values on
+ * failure.
+ *
+ */
+int netlbl_skbuff_getattr(const struct sk_buff *skb,
+			  struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+
+	ret_val = cipso_v4_skbuff_getattr(skb, secattr);
+	if (ret_val == 0)
+		return 0;
+
+	return netlbl_unlabel_getattr(secattr);
+}
+
+/**
+ * netlbl_skbuff_err - Handle a LSM error on a sk_buff
+ * @skb: the packet
+ * @error: the error code
+ *
+ * Description:
+ * Deal with a LSM problem when handling the packet in @skb, typically this is
+ * a permission denied problem (-EACCES).  The correct action is determined
+ * according to the packet's labeling protocol.
+ *
+ */
+void netlbl_skbuff_err(struct sk_buff *skb, int error)
+{
+	if (CIPSO_V4_OPTEXIST(skb))
+		cipso_v4_error(skb, error, 0);
+}
+
+/**
+ * netlbl_cache_invalidate - Invalidate all of the NetLabel protocol caches
+ *
+ * Description:
+ * For all of the NetLabel protocols that support some form of label mapping
+ * cache, invalidate the cache.  Returns zero on success, negative values on
+ * error.
+ *
+ */
+void netlbl_cache_invalidate(void)
+{
+	cipso_v4_cache_invalidate();
+}
+
+/**
+ * netlbl_cache_add - Add an entry to a NetLabel protocol cache
+ * @skb: the packet
+ * @secattr: the packet's security attributes
+ *
+ * Description:
+ * Add the LSM security attributes for the given packet to the underlying
+ * NetLabel protocol's label mapping cache.  Returns zero on success, negative
+ * values on error.
+ *
+ */
+int netlbl_cache_add(const struct sk_buff *skb,
+		     const struct netlbl_lsm_secattr *secattr)
+{
+	if (secattr->cache.data == NULL)
+		return -ENOMSG;
+
+	if (CIPSO_V4_OPTEXIST(skb))
+		return cipso_v4_cache_add(skb, secattr);
+
+	return -ENOMSG;
+}
+
+/*
+ * Setup Functions
+ */
+
+/**
+ * netlbl_init - Initialize NetLabel
+ *
+ * Description:
+ * Perform the required NetLabel initialization before first use.
+ *
+ */
+static int __init netlbl_init(void)
+{
+	int ret_val;
+
+	printk(KERN_INFO "NetLabel: Initializing\n");
+	printk(KERN_INFO "NetLabel:  domain hash size = %u\n",
+	       (1 << NETLBL_DOMHSH_BITSIZE));
+	printk(KERN_INFO "NetLabel:  protocols ="
+	       " UNLABELED"
+	       " CIPSOv4"
+	       "\n");
+
+	ret_val = netlbl_domhsh_init(NETLBL_DOMHSH_BITSIZE);
+	if (ret_val != 0)
+		goto init_failure;
+
+	ret_val = netlbl_netlink_init();
+	if (ret_val != 0)
+		goto init_failure;
+
+	ret_val = netlbl_unlabel_defconf();
+	if (ret_val != 0)
+		goto init_failure;
+	printk(KERN_INFO "NetLabel:  unlabeled traffic allowed by default\n");
+
+	return 0;
+
+init_failure:
+	panic("NetLabel: failed to initialize properly (%d)\n", ret_val);
+}
+
+subsys_initcall(netlbl_init);
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
new file mode 100644
index 0000000000000..85bc11a1fc46a
--- /dev/null
+++ b/net/netlabel/netlabel_mgmt.c
@@ -0,0 +1,624 @@
+/*
+ * NetLabel Management Support
+ *
+ * This file defines the management functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+
+#include "netlabel_domainhash.h"
+#include "netlabel_user.h"
+#include "netlabel_mgmt.h"
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_mgmt_gnl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_MGMT_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = 0,
+};
+
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_mgmt_add - Handle an ADD message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ADD message and add the domains from the message
+ * to the hash table.  See netlabel.h for a description of the message format.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
+	int msg_len = netlbl_netlink_payload_len(skb);
+	u32 count;
+	struct netlbl_dom_map *entry = NULL;
+	u32 iter;
+	u32 tmp_val;
+	int tmp_size;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto add_failure;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto add_failure;
+	count = netlbl_getinc_u32(&msg_ptr, &msg_len);
+
+	for (iter = 0; iter < count && msg_len > 0; iter++, entry = NULL) {
+		if (msg_len <= 0) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+		if (entry == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		tmp_size = nla_len(msg_ptr);
+		if (tmp_size <= 0 || tmp_size > msg_len) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		entry->domain = kmalloc(tmp_size, GFP_KERNEL);
+		if (entry->domain == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		nla_strlcpy(entry->domain, msg_ptr, tmp_size);
+		entry->domain[tmp_size - 1] = '\0';
+		msg_ptr = nla_next(msg_ptr, &msg_len);
+
+		if (msg_len < NETLBL_LEN_U32) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+		entry->type = tmp_val;
+		switch (tmp_val) {
+		case NETLBL_NLTYPE_UNLABELED:
+			ret_val = netlbl_domhsh_add(entry);
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			if (msg_len < NETLBL_LEN_U32) {
+				ret_val = -EINVAL;
+				goto add_failure;
+			}
+			tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+			/* We should be holding a rcu_read_lock() here
+			 * while we hold the result but since the entry
+			 * will always be deleted when the CIPSO DOI
+			 * is deleted we aren't going to keep the lock. */
+			rcu_read_lock();
+			entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
+			if (entry->type_def.cipsov4 == NULL) {
+				rcu_read_unlock();
+				ret_val = -EINVAL;
+				goto add_failure;
+			}
+			ret_val = netlbl_domhsh_add(entry);
+			rcu_read_unlock();
+			break;
+		default:
+			ret_val = -EINVAL;
+		}
+		if (ret_val != 0)
+			goto add_failure;
+	}
+
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				NETLBL_E_OK);
+	return 0;
+
+add_failure:
+	if (entry)
+		kfree(entry->domain);
+	kfree(entry);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_remove - Handle a REMOVE message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVE message and remove the specified domain
+ * mappings.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
+	int msg_len = netlbl_netlink_payload_len(skb);
+	u32 count;
+	u32 iter;
+	int tmp_size;
+	unsigned char *domain;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto remove_return;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto remove_return;
+	count = netlbl_getinc_u32(&msg_ptr, &msg_len);
+
+	for (iter = 0; iter < count && msg_len > 0; iter++) {
+		if (msg_len <= 0) {
+			ret_val = -EINVAL;
+			goto remove_return;
+		}
+		tmp_size = nla_len(msg_ptr);
+		domain = nla_data(msg_ptr);
+		if (tmp_size <= 0 || tmp_size > msg_len ||
+		    domain[tmp_size - 1] != '\0') {
+			ret_val = -EINVAL;
+			goto remove_return;
+		}
+		ret_val = netlbl_domhsh_remove(domain);
+		if (ret_val != 0)
+			goto remove_return;
+		msg_ptr = nla_next(msg_ptr, &msg_len);
+	}
+
+	ret_val = 0;
+
+remove_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_list - Handle a LIST message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LIST message and dumps the domain hash table in a
+ * form suitable for use in a kernel generated LIST message.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_list(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb;
+
+	ans_skb = netlbl_domhsh_dump(NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL)
+		goto list_failure;
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_LIST);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto list_failure;
+
+	return 0;
+
+list_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_adddef - Handle an ADDDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ADDDEF message and respond accordingly.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
+	int msg_len = netlbl_netlink_payload_len(skb);
+	struct netlbl_dom_map *entry = NULL;
+	u32 tmp_val;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto adddef_failure;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto adddef_failure;
+	tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry == NULL) {
+		ret_val = -ENOMEM;
+		goto adddef_failure;
+	}
+
+	entry->type = tmp_val;
+	switch (entry->type) {
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = netlbl_domhsh_add_default(entry);
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		if (msg_len < NETLBL_LEN_U32) {
+			ret_val = -EINVAL;
+			goto adddef_failure;
+		}
+		tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+		/* We should be holding a rcu_read_lock here while we
+		 * hold the result but since the entry will always be
+		 * deleted when the CIPSO DOI is deleted we are going
+		 * to skip the lock. */
+		rcu_read_lock();
+		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
+		if (entry->type_def.cipsov4 == NULL) {
+			rcu_read_unlock();
+			ret_val = -EINVAL;
+			goto adddef_failure;
+		}
+		ret_val = netlbl_domhsh_add_default(entry);
+		rcu_read_unlock();
+		break;
+	default:
+		ret_val = -EINVAL;
+	}
+	if (ret_val != 0)
+		goto adddef_failure;
+
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				NETLBL_E_OK);
+	return 0;
+
+adddef_failure:
+	kfree(entry);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_removedef - Handle a REMOVEDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVEDEF message and remove the default domain
+ * mapping.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto removedef_return;
+
+	ret_val = netlbl_domhsh_remove_default();
+
+removedef_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_listdef - Handle a LISTDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LISTDEF message and dumps the default domain
+ * mapping in a form suitable for use in a kernel generated LISTDEF message.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb;
+
+	ans_skb = netlbl_domhsh_dump_default(NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL)
+		goto listdef_failure;
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_LISTDEF);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto listdef_failure;
+
+	return 0;
+
+listdef_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_modules - Handle a MODULES message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated MODULES message and respond accordingly.
+ *
+ */
+static int netlbl_mgmt_modules(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	size_t data_size;
+	u32 mod_count;
+	struct sk_buff *ans_skb = NULL;
+
+	/* unlabeled + cipsov4 */
+	mod_count = 2;
+
+	data_size = GENL_HDRLEN + NETLBL_LEN_U32 + mod_count * NETLBL_LEN_U32;
+	ans_skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
+	if (ans_skb == NULL)
+		goto modules_failure;
+
+	if (netlbl_netlink_hdr_put(ans_skb,
+				   info->snd_pid,
+				   0,
+				   netlbl_mgmt_gnl_family.id,
+				   NLBL_MGMT_C_MODULES) == NULL)
+		goto modules_failure;
+
+	ret_val = nla_put_u32(ans_skb, NLA_U32, mod_count);
+	if (ret_val != 0)
+		goto modules_failure;
+	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_UNLABELED);
+	if (ret_val != 0)
+		goto modules_failure;
+	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_CIPSOV4);
+	if (ret_val != 0)
+		goto modules_failure;
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto modules_failure;
+
+	return 0;
+
+modules_failure:
+	kfree_skb(ans_skb);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_version - Handle a VERSION message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated VERSION message and respond accordingly.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb = NULL;
+
+	ans_skb = netlbl_netlink_alloc_skb(0,
+					   GENL_HDRLEN + NETLBL_LEN_U32,
+					   GFP_KERNEL);
+	if (ans_skb == NULL)
+		goto version_failure;
+	if (netlbl_netlink_hdr_put(ans_skb,
+				   info->snd_pid,
+				   0,
+				   netlbl_mgmt_gnl_family.id,
+				   NLBL_MGMT_C_VERSION) == NULL)
+		goto version_failure;
+
+	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_PROTO_VERSION);
+	if (ret_val != 0)
+		goto version_failure;
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto version_failure;
+
+	return 0;
+
+version_failure:
+	kfree_skb(ans_skb);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_mgmt_genl_c_add = {
+	.cmd = NLBL_MGMT_C_ADD,
+	.flags = 0,
+	.doit = netlbl_mgmt_add,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_remove = {
+	.cmd = NLBL_MGMT_C_REMOVE,
+	.flags = 0,
+	.doit = netlbl_mgmt_remove,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_list = {
+	.cmd = NLBL_MGMT_C_LIST,
+	.flags = 0,
+	.doit = netlbl_mgmt_list,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_adddef = {
+	.cmd = NLBL_MGMT_C_ADDDEF,
+	.flags = 0,
+	.doit = netlbl_mgmt_adddef,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_removedef = {
+	.cmd = NLBL_MGMT_C_REMOVEDEF,
+	.flags = 0,
+	.doit = netlbl_mgmt_removedef,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_listdef = {
+	.cmd = NLBL_MGMT_C_LISTDEF,
+	.flags = 0,
+	.doit = netlbl_mgmt_listdef,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_modules = {
+	.cmd = NLBL_MGMT_C_MODULES,
+	.flags = 0,
+	.doit = netlbl_mgmt_modules,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_version = {
+	.cmd = NLBL_MGMT_C_VERSION,
+	.flags = 0,
+	.doit = netlbl_mgmt_version,
+	.dumpit = NULL,
+};
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_mgmt_genl_init - Register the NetLabel management component
+ *
+ * Description:
+ * Register the NetLabel management component with the Generic NETLINK
+ * mechanism.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_mgmt_genl_init(void)
+{
+	int ret_val;
+
+	ret_val = genl_register_family(&netlbl_mgmt_gnl_family);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_add);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_remove);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_list);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_adddef);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_removedef);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_listdef);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_modules);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_version);
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
new file mode 100644
index 0000000000000..fd6c6acbfa08d
--- /dev/null
+++ b/net/netlabel/netlabel_mgmt.h
@@ -0,0 +1,246 @@
+/*
+ * NetLabel Management Support
+ *
+ * This file defines the management functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_MGMT_H
+#define _NETLABEL_MGMT_H
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the management interface,
+ * all of which are preceeded by the nlmsghdr struct.
+ *
+ * o ACK:
+ *   Sent by the kernel in response to an applications message, applications
+ *   should never send this message.
+ *
+ *   +----------------------+-----------------------+
+ *   | seq number (32 bits) | return code (32 bits) |
+ *   +----------------------+-----------------------+
+ *
+ *     seq number:  the sequence number of the original message, taken from the
+ *                  nlmsghdr structure
+ *     return code: return value, based on errno values
+ *
+ * o ADD:
+ *   Sent by an application to add a domain mapping to the NetLabel system.
+ *   The kernel should respond with an ACK.
+ *
+ *   +-------------------+
+ *   | domains (32 bits) | ...
+ *   +-------------------+
+ *
+ *     domains: the number of domains in the message
+ *
+ *   +--------------------------+-------------------------+
+ *   | domain string (variable) | protocol type (32 bits) | ...
+ *   +--------------------------+-------------------------+
+ *
+ *   +-------------- ---- --- -- -
+ *   | mapping data                ... repeated
+ *   +-------------- ---- --- -- -
+ *
+ *     domain string: the domain string, NULL terminated
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +---------------+
+ *   | doi (32 bits) |
+ *   +---------------+
+ *
+ *     doi:  the CIPSO DOI value
+ *
+ * o REMOVE:
+ *   Sent by an application to remove a domain mapping from the NetLabel
+ *   system.  The kernel should ACK this message.
+ *
+ *   +-------------------+
+ *   | domains (32 bits) | ...
+ *   +-------------------+
+ *
+ *     domains: the number of domains in the message
+ *
+ *   +--------------------------+
+ *   | domain string (variable) | ...
+ *   +--------------------------+
+ *
+ *     domain string: the domain string, NULL terminated
+ *
+ * o LIST:
+ *   This message can be sent either from an application or by the kernel in
+ *   response to an application generated LIST message.  When sent by an
+ *   application there is no payload.  The kernel should respond to a LIST
+ *   message either with a LIST message on success or an ACK message on
+ *   failure.
+ *
+ *   +-------------------+
+ *   | domains (32 bits) | ...
+ *   +-------------------+
+ *
+ *     domains: the number of domains in the message
+ *
+ *   +--------------------------+
+ *   | domain string (variable) | ...
+ *   +--------------------------+
+ *
+ *   +-------------------------+-------------- ---- --- -- -
+ *   | protocol type (32 bits) | mapping data                ... repeated
+ *   +-------------------------+-------------- ---- --- -- -
+ *
+ *     domain string: the domain string, NULL terminated
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +----------------+---------------+
+ *   | type (32 bits) | doi (32 bits) |
+ *   +----------------+---------------+
+ *
+ *     type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
+ *           as CIPSO_V4_MAP_*)
+ *     doi:  the CIPSO DOI value
+ *
+ * o ADDDEF:
+ *   Sent by an application to set the default domain mapping for the NetLabel
+ *   system.  The kernel should respond with an ACK.
+ *
+ *   +-------------------------+-------------- ---- --- -- -
+ *   | protocol type (32 bits) | mapping data                ... repeated
+ *   +-------------------------+-------------- ---- --- -- -
+ *
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +---------------+
+ *   | doi (32 bits) |
+ *   +---------------+
+ *
+ *     doi:  the CIPSO DOI value
+ *
+ * o REMOVEDEF:
+ *   Sent by an application to remove the default domain mapping from the
+ *   NetLabel system, there is no payload.  The kernel should ACK this message.
+ *
+ * o LISTDEF:
+ *   This message can be sent either from an application or by the kernel in
+ *   response to an application generated LISTDEF message.  When sent by an
+ *   application there is no payload.  The kernel should respond to a
+ *   LISTDEF message either with a LISTDEF message on success or an ACK message
+ *   on failure.
+ *
+ *   +-------------------------+-------------- ---- --- -- -
+ *   | protocol type (32 bits) | mapping data                ... repeated
+ *   +-------------------------+-------------- ---- --- -- -
+ *
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +----------------+---------------+
+ *   | type (32 bits) | doi (32 bits) |
+ *   +----------------+---------------+
+ *
+ *     type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
+ *           as CIPSO_V4_MAP_*)
+ *     doi:  the CIPSO DOI value
+ *
+ * o MODULES:
+ *   Sent by an application to request a list of configured NetLabel modules
+ *   in the kernel.  When sent by an application there is no payload.
+ *
+ *   +-------------------+
+ *   | modules (32 bits) | ...
+ *   +-------------------+
+ *
+ *     modules: the number of modules in the message, if this is an application
+ *              generated message and the value is zero then return a list of
+ *              the configured modules
+ *
+ *   +------------------+
+ *   | module (32 bits) | ... repeated
+ *   +------------------+
+ *
+ *     module: the module number as defined by NETLBL_NLTYPE_*
+ *
+ * o VERSION:
+ *   Sent by an application to request the NetLabel version string.  When sent
+ *   by an application there is no payload.  This message type is also used by
+ *   the kernel to respond to an VERSION request.
+ *
+ *   +-------------------+
+ *   | version (32 bits) |
+ *   +-------------------+
+ *
+ *     version: the protocol version number
+ *
+ */
+
+/* NetLabel Management commands */
+enum {
+	NLBL_MGMT_C_UNSPEC,
+	NLBL_MGMT_C_ACK,
+	NLBL_MGMT_C_ADD,
+	NLBL_MGMT_C_REMOVE,
+	NLBL_MGMT_C_LIST,
+	NLBL_MGMT_C_ADDDEF,
+	NLBL_MGMT_C_REMOVEDEF,
+	NLBL_MGMT_C_LISTDEF,
+	NLBL_MGMT_C_MODULES,
+	NLBL_MGMT_C_VERSION,
+	__NLBL_MGMT_C_MAX,
+};
+#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_mgmt_genl_init(void);
+
+#endif
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
new file mode 100644
index 0000000000000..f300e54e14b65
--- /dev/null
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -0,0 +1,98 @@
+/*
+ * NetLabel Unlabeled Support
+ *
+ * This file defines functions for dealing with unlabeled packets for the
+ * NetLabel system.  The NetLabel system manages static and dynamic label
+ * mappings for network protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_UNLABELED_H
+#define _NETLABEL_UNLABELED_H
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the Unlabeled subsystem.
+ *
+ * o ACK:
+ *   Sent by the kernel in response to an applications message, applications
+ *   should never send this message.
+ *
+ *   +----------------------+-----------------------+
+ *   | seq number (32 bits) | return code (32 bits) |
+ *   +----------------------+-----------------------+
+ *
+ *     seq number:  the sequence number of the original message, taken from the
+ *                  nlmsghdr structure
+ *     return code: return value, based on errno values
+ *
+ * o ACCEPT
+ *   This message is sent from an application to specify if the kernel should
+ *   allow unlabled packets to pass if they do not match any of the static
+ *   mappings defined in the unlabeled module.
+ *
+ *   +-----------------+
+ *   | allow (32 bits) |
+ *   +-----------------+
+ *
+ *     allow: if true (1) then allow the packets to pass, if false (0) then
+ *            reject the packets
+ *
+ * o LIST
+ *   This message can be sent either from an application or by the kernel in
+ *   response to an application generated LIST message.  When sent by an
+ *   application there is no payload.  The kernel should respond to a LIST
+ *   message either with a LIST message on success or an ACK message on
+ *   failure.
+ *
+ *   +-----------------------+
+ *   | accept flag (32 bits) |
+ *   +-----------------------+
+ *
+ *     accept flag: if true (1) then unlabeled packets are allowed to pass,
+ *                  if false (0) then unlabeled packets are rejected
+ *
+ */
+
+/* NetLabel Unlabeled commands */
+enum {
+	NLBL_UNLABEL_C_UNSPEC,
+	NLBL_UNLABEL_C_ACK,
+	NLBL_UNLABEL_C_ACCEPT,
+	NLBL_UNLABEL_C_LIST,
+	__NLBL_UNLABEL_C_MAX,
+};
+#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_unlabel_genl_init(void);
+
+/* Process Unlabeled incoming network packets */
+int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr);
+
+/* Set the default configuration to allow Unlabeled packets */
+int netlbl_unlabel_defconf(void);
+
+#endif
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
new file mode 100644
index 0000000000000..80022221b0a75
--- /dev/null
+++ b/net/netlabel/netlabel_user.c
@@ -0,0 +1,158 @@
+/*
+ * NetLabel NETLINK Interface
+ *
+ * This file defines the NETLINK interface for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/socket.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <asm/bug.h>
+
+#include "netlabel_mgmt.h"
+#include "netlabel_unlabeled.h"
+#include "netlabel_cipso_v4.h"
+#include "netlabel_user.h"
+
+/*
+ * NetLabel NETLINK Setup Functions
+ */
+
+/**
+ * netlbl_netlink_init - Initialize the NETLINK communication channel
+ *
+ * Description:
+ * Call out to the NetLabel components so they can register their families and
+ * commands with the Generic NETLINK mechanism.  Returns zero on success and
+ * non-zero on failure.
+ *
+ */
+int netlbl_netlink_init(void)
+{
+	int ret_val;
+
+	ret_val = netlbl_mgmt_genl_init();
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = netlbl_cipsov4_genl_init();
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = netlbl_unlabel_genl_init();
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
+
+/*
+ * NetLabel Common Protocol Functions
+ */
+
+/**
+ * netlbl_netlink_send_ack - Send an ACK message
+ * @info: the generic NETLINK information
+ * @genl_family: the generic NETLINK family ID value
+ * @ack_cmd: the generic NETLINK family ACK command value
+ * @ret_code: return code to use
+ *
+ * Description:
+ * This function sends an ACK message to the sender of the NETLINK message
+ * specified by @info.
+ *
+ */
+void netlbl_netlink_send_ack(const struct genl_info *info,
+			     u32 genl_family,
+			     u8 ack_cmd,
+			     u32 ret_code)
+{
+	size_t data_size;
+	struct sk_buff *skb;
+
+	data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32;
+	skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
+	if (skb == NULL)
+		return;
+
+	if (netlbl_netlink_hdr_put(skb,
+				   info->snd_pid,
+				   0,
+				   genl_family,
+				   ack_cmd) == NULL)
+		goto send_ack_failure;
+
+	if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0)
+		goto send_ack_failure;
+	if (nla_put_u32(skb, NLA_U32, ret_code) != 0)
+		goto send_ack_failure;
+
+	netlbl_netlink_snd(skb, info->snd_pid);
+	return;
+
+send_ack_failure:
+	kfree_skb(skb);
+}
+
+/*
+ * NETLINK I/O Functions
+ */
+
+/**
+ * netlbl_netlink_snd - Send a NetLabel message
+ * @skb: NetLabel message
+ * @pid: destination PID
+ *
+ * Description:
+ * Sends a unicast NetLabel message over the NETLINK socket.
+ *
+ */
+int netlbl_netlink_snd(struct sk_buff *skb, u32 pid)
+{
+	return genlmsg_unicast(skb, pid);
+}
+
+/**
+ * netlbl_netlink_snd - Send a NetLabel message
+ * @skb: NetLabel message
+ * @pid: sending PID
+ * @group: multicast group id
+ *
+ * Description:
+ * Sends a multicast NetLabel message over the NETLINK socket to all members
+ * of @group except @pid.
+ *
+ */
+int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group)
+{
+	return genlmsg_multicast(skb, pid, group);
+}
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
new file mode 100644
index 0000000000000..ccf237b3a1282
--- /dev/null
+++ b/net/netlabel/netlabel_user.h
@@ -0,0 +1,214 @@
+/*
+ * NetLabel NETLINK Interface
+ *
+ * This file defines the NETLINK interface for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_USER_H
+#define _NETLABEL_USER_H
+
+#include <linux/skbuff.h>
+#include <linux/capability.h>
+#include <linux/genetlink.h>
+#include <net/netlabel.h>
+#include <net/genetlink.h>
+
+/* NetLabel NETLINK helper functions */
+
+/**
+ * netlbl_netlink_cap_check - Check the NETLINK msg capabilities
+ * @skb: the NETLINK buffer
+ * @req_cap: the required capability
+ *
+ * Description:
+ * Check the NETLINK buffer's capabilities against the required capabilities.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static inline int netlbl_netlink_cap_check(const struct sk_buff *skb,
+					   kernel_cap_t req_cap)
+{
+	if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap))
+		return 0;
+	return -EPERM;
+}
+
+/**
+ * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on
+ * @nla: the attribute
+ * @rem_len: remaining length
+ *
+ * Description:
+ * Return a u8 value pointed to by @nla and advance it to the next attribute.
+ *
+ */
+static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len)
+{
+	u8 val = nla_get_u8(*nla);
+	*nla = nla_next(*nla, rem_len);
+	return val;
+}
+
+/**
+ * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on
+ * @nla: the attribute
+ * @rem_len: remaining length
+ *
+ * Description:
+ * Return a u16 value pointed to by @nla and advance it to the next attribute.
+ *
+ */
+static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len)
+{
+	u16 val = nla_get_u16(*nla);
+	*nla = nla_next(*nla, rem_len);
+	return val;
+}
+
+/**
+ * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on
+ * @nla: the attribute
+ * @rem_len: remaining length
+ *
+ * Description:
+ * Return a u32 value pointed to by @nla and advance it to the next attribute.
+ *
+ */
+static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len)
+{
+	u32 val = nla_get_u32(*nla);
+	*nla = nla_next(*nla, rem_len);
+	return val;
+}
+
+/**
+ * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff
+ * @skb: the packet
+ * @pid: the PID of the receipient
+ * @seq: the sequence number
+ * @type: the generic NETLINK message family type
+ * @cmd: command
+ *
+ * Description:
+ * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
+ * struct to the packet.  Returns a pointer to the start of the payload buffer
+ * on success or NULL on failure.
+ *
+ */
+static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
+					   u32 pid,
+					   u32 seq,
+					   int type,
+					   u8 cmd)
+{
+	return genlmsg_put(skb,
+			   pid,
+			   seq,
+			   type,
+			   0,
+			   0,
+			   cmd,
+			   NETLBL_PROTO_VERSION);
+}
+
+/**
+ * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff
+ * @skb: the packet
+ * @pid: the PID of the receipient
+ * @seq: the sequence number
+ * @type: the generic NETLINK message family type
+ * @cmd: command
+ *
+ * Description:
+ * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
+ * struct to the packet.
+ *
+ */
+static inline void netlbl_netlink_hdr_push(struct sk_buff *skb,
+					   u32 pid,
+					   u32 seq,
+					   int type,
+					   u8 cmd)
+
+{
+	struct nlmsghdr *nlh;
+	struct genlmsghdr *hdr;
+
+	nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN));
+	nlh->nlmsg_type = type;
+	nlh->nlmsg_len = skb->len;
+	nlh->nlmsg_flags = 0;
+	nlh->nlmsg_pid = pid;
+	nlh->nlmsg_seq = seq;
+
+	hdr = nlmsg_data(nlh);
+	hdr->cmd = cmd;
+	hdr->version = NETLBL_PROTO_VERSION;
+	hdr->reserved = 0;
+}
+
+/**
+ * netlbl_netlink_payload_len - Return the length of the payload
+ * @skb: the NETLINK buffer
+ *
+ * Description:
+ * This function returns the length of the NetLabel payload.
+ *
+ */
+static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb)
+{
+	return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN;
+}
+
+/**
+ * netlbl_netlink_payload_data - Returns a pointer to the start of the payload
+ * @skb: the NETLINK buffer
+ *
+ * Description:
+ * This function returns a pointer to the start of the NetLabel payload.
+ *
+ */
+static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb)
+{
+  return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) +
+	  GENL_HDRLEN;
+}
+
+/* NetLabel common protocol functions */
+
+void netlbl_netlink_send_ack(const struct genl_info *info,
+			     u32 genl_family,
+			     u8 ack_cmd,
+			     u32 ret_code);
+
+/* NetLabel NETLINK I/O functions */
+
+int netlbl_netlink_init(void);
+int netlbl_netlink_snd(struct sk_buff *skb, u32 pid);
+int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group);
+
+#endif
-- 
GitLab


From 96cb8e3313c7a12e026c1ed510522ae6f6023875 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Thu, 3 Aug 2006 16:48:59 -0700
Subject: [PATCH 0392/1063] [NetLabel]: CIPSOv4 and Unlabeled packet
 integration

Add CIPSO/IPv4 and unlabeled packet management to the NetLabel
subsystem.  The CIPSO/IPv4 changes allow the configuration of
CIPSO/IPv4 within the overall NetLabel framework.  The unlabeled
packet changes allows NetLabel to pass unlabeled packets without
error.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_cipso_v4.c  | 542 ++++++++++++++++++++++++++++++
 net/netlabel/netlabel_unlabeled.c | 253 ++++++++++++++
 2 files changed, 795 insertions(+)
 create mode 100644 net/netlabel/netlabel_cipso_v4.c
 create mode 100644 net/netlabel/netlabel_unlabeled.c

diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
new file mode 100644
index 0000000000000..a4f40adc447b0
--- /dev/null
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -0,0 +1,542 @@
+/*
+ * NetLabel CIPSO/IPv4 Support
+ *
+ * This file defines the CIPSO/IPv4 functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+
+#include "netlabel_user.h"
+#include "netlabel_cipso_v4.h"
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_cipsov4_gnl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_CIPSOV4_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = 0,
+};
+
+
+/*
+ * Helper Functions
+ */
+
+/**
+ * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to the DOI definition can be released
+ * safely.
+ *
+ */
+static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
+{
+	struct cipso_v4_doi *ptr;
+
+	ptr = container_of(entry, struct cipso_v4_doi, rcu);
+	switch (ptr->type) {
+	case CIPSO_V4_MAP_STD:
+		kfree(ptr->map.std->lvl.cipso);
+		kfree(ptr->map.std->lvl.local);
+		kfree(ptr->map.std->cat.cipso);
+		kfree(ptr->map.std->cat.local);
+		break;
+	}
+	kfree(ptr);
+}
+
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition
+ * @doi: the DOI value
+ * @msg: the ADD message data
+ * @msg_size: the size of the ADD message buffer
+ *
+ * Description:
+ * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message
+ * and add it to the CIPSO V4 engine.  Return zero on success and non-zero on
+ * error.
+ *
+ */
+static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
+{
+	int ret_val = -EINVAL;
+	int msg_len = msg_size;
+	u32 num_tags;
+	u32 num_lvls;
+	u32 num_cats;
+	struct cipso_v4_doi *doi_def = NULL;
+	u32 iter;
+	u32 tmp_val_a;
+	u32 tmp_val_b;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto add_std_failure;
+	num_tags = netlbl_getinc_u32(&msg, &msg_len);
+	if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
+		goto add_std_failure;
+
+	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
+	if (doi_def == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
+	if (doi_def->map.std == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->type = CIPSO_V4_MAP_STD;
+
+	for (iter = 0; iter < num_tags; iter++) {
+		if (msg_len < NETLBL_LEN_U8)
+			goto add_std_failure;
+		doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
+		switch (doi_def->tags[iter]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			break;
+		default:
+			goto add_std_failure;
+		}
+	}
+	if (iter < CIPSO_V4_TAG_MAXCNT)
+		doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
+
+	if (msg_len < 6 * NETLBL_LEN_U32)
+		goto add_std_failure;
+
+	num_lvls = netlbl_getinc_u32(&msg, &msg_len);
+	if (num_lvls == 0)
+		goto add_std_failure;
+	doi_def->map.std->lvl.local_size = netlbl_getinc_u32(&msg, &msg_len);
+	if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS)
+		goto add_std_failure;
+	doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->lvl.local == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->map.std->lvl.cipso_size = netlbl_getinc_u8(&msg, &msg_len);
+	if (doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS)
+		goto add_std_failure;
+	doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->lvl.cipso == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+
+	num_cats = netlbl_getinc_u32(&msg, &msg_len);
+	doi_def->map.std->cat.local_size = netlbl_getinc_u32(&msg, &msg_len);
+	if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS)
+		goto add_std_failure;
+	doi_def->map.std->cat.local = kcalloc(doi_def->map.std->cat.local_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->cat.local == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->map.std->cat.cipso_size = netlbl_getinc_u16(&msg, &msg_len);
+	if (doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS)
+		goto add_std_failure;
+	doi_def->map.std->cat.cipso = kcalloc(doi_def->map.std->cat.cipso_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->cat.cipso == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+
+	if (msg_len <
+	    num_lvls * (NETLBL_LEN_U32 + NETLBL_LEN_U8) +
+	    num_cats * (NETLBL_LEN_U32 + NETLBL_LEN_U16))
+		goto add_std_failure;
+
+	for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++)
+		doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL;
+	for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++)
+		doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL;
+	for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++)
+		doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT;
+	for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++)
+		doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT;
+
+	for (iter = 0; iter < num_lvls; iter++) {
+		tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
+		tmp_val_b = netlbl_getinc_u8(&msg, &msg_len);
+
+		if (tmp_val_a >= doi_def->map.std->lvl.local_size ||
+		    tmp_val_b >= doi_def->map.std->lvl.cipso_size)
+			goto add_std_failure;
+
+		doi_def->map.std->lvl.cipso[tmp_val_b] = tmp_val_a;
+		doi_def->map.std->lvl.local[tmp_val_a] = tmp_val_b;
+	}
+
+	for (iter = 0; iter < num_cats; iter++) {
+		tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
+		tmp_val_b = netlbl_getinc_u16(&msg, &msg_len);
+
+		if (tmp_val_a >= doi_def->map.std->cat.local_size ||
+		    tmp_val_b >= doi_def->map.std->cat.cipso_size)
+			goto add_std_failure;
+
+		doi_def->map.std->cat.cipso[tmp_val_b] = tmp_val_a;
+		doi_def->map.std->cat.local[tmp_val_a] = tmp_val_b;
+	}
+
+	doi_def->doi = doi;
+	ret_val = cipso_v4_doi_add(doi_def);
+	if (ret_val != 0)
+		goto add_std_failure;
+	return 0;
+
+add_std_failure:
+	if (doi_def)
+		netlbl_cipsov4_doi_free(&doi_def->rcu);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition
+ * @doi: the DOI value
+ * @msg: the ADD message data
+ * @msg_size: the size of the ADD message buffer
+ *
+ * Description:
+ * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message
+ * and add it to the CIPSO V4 engine.  Return zero on success and non-zero on
+ * error.
+ *
+ */
+static int netlbl_cipsov4_add_pass(u32 doi,
+				   struct nlattr *msg,
+				   size_t msg_size)
+{
+	int ret_val = -EINVAL;
+	int msg_len = msg_size;
+	u32 num_tags;
+	struct cipso_v4_doi *doi_def = NULL;
+	u32 iter;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto add_pass_failure;
+	num_tags = netlbl_getinc_u32(&msg, &msg_len);
+	if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
+		goto add_pass_failure;
+
+	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
+	if (doi_def == NULL) {
+		ret_val = -ENOMEM;
+		goto add_pass_failure;
+	}
+	doi_def->type = CIPSO_V4_MAP_PASS;
+
+	for (iter = 0; iter < num_tags; iter++) {
+		if (msg_len < NETLBL_LEN_U8)
+			goto add_pass_failure;
+		doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
+		switch (doi_def->tags[iter]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			break;
+		default:
+			goto add_pass_failure;
+		}
+	}
+	if (iter < CIPSO_V4_TAG_MAXCNT)
+		doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
+
+	doi_def->doi = doi;
+	ret_val = cipso_v4_doi_add(doi_def);
+	if (ret_val != 0)
+		goto add_pass_failure;
+	return 0;
+
+add_pass_failure:
+	if (doi_def)
+		netlbl_cipsov4_doi_free(&doi_def->rcu);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_add - Handle an ADD message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Create a new DOI definition based on the given ADD message and add it to the
+ * CIPSO V4 engine.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
+
+{
+	int ret_val = -EINVAL;
+	u32 doi;
+	u32 map_type;
+	int msg_len = netlbl_netlink_payload_len(skb);
+	struct nlattr *msg = netlbl_netlink_payload_data(skb);
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto add_return;
+
+	if (msg_len < 2 * NETLBL_LEN_U32)
+		goto add_return;
+
+	doi = netlbl_getinc_u32(&msg, &msg_len);
+	map_type = netlbl_getinc_u32(&msg, &msg_len);
+	switch (map_type) {
+	case CIPSO_V4_MAP_STD:
+		ret_val = netlbl_cipsov4_add_std(doi, msg, msg_len);
+		break;
+	case CIPSO_V4_MAP_PASS:
+		ret_val = netlbl_cipsov4_add_pass(doi, msg, msg_len);
+		break;
+	}
+
+add_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_list - Handle a LIST message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LIST message and respond accordingly.  Returns
+ * zero on success and negative values on error.
+ *
+ */
+static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	u32 doi;
+	struct nlattr *msg = netlbl_netlink_payload_data(skb);
+	struct sk_buff *ans_skb;
+
+	if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32)
+		goto list_failure;
+
+	doi = nla_get_u32(msg);
+	ans_skb = cipso_v4_doi_dump(doi, NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL) {
+		ret_val = -ENOMEM;
+		goto list_failure;
+	}
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_LIST);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto list_failure;
+
+	return 0;
+
+list_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_listall - Handle a LISTALL message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LISTALL message and respond accordingly.  Returns
+ * zero on success and negative values on error.
+ *
+ */
+static int netlbl_cipsov4_listall(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct sk_buff *ans_skb;
+
+	ans_skb = cipso_v4_doi_dump_all(NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL) {
+		ret_val = -ENOMEM;
+		goto listall_failure;
+	}
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_LISTALL);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto listall_failure;
+
+	return 0;
+
+listall_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_remove - Handle a REMOVE message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVE message and respond accordingly.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val;
+	u32 doi;
+	struct nlattr *msg = netlbl_netlink_payload_data(skb);
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto remove_return;
+
+	if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) {
+		ret_val = -EINVAL;
+		goto remove_return;
+	}
+
+	doi = nla_get_u32(msg);
+	ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free);
+
+remove_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_cipsov4_genl_c_add = {
+	.cmd = NLBL_CIPSOV4_C_ADD,
+	.flags = 0,
+	.doit = netlbl_cipsov4_add,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_remove = {
+	.cmd = NLBL_CIPSOV4_C_REMOVE,
+	.flags = 0,
+	.doit = netlbl_cipsov4_remove,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_list = {
+	.cmd = NLBL_CIPSOV4_C_LIST,
+	.flags = 0,
+	.doit = netlbl_cipsov4_list,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_listall = {
+	.cmd = NLBL_CIPSOV4_C_LISTALL,
+	.flags = 0,
+	.doit = netlbl_cipsov4_listall,
+	.dumpit = NULL,
+};
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_cipsov4_genl_init - Register the CIPSOv4 NetLabel component
+ *
+ * Description:
+ * Register the CIPSOv4 packet NetLabel component with the Generic NETLINK
+ * mechanism.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_cipsov4_genl_init(void)
+{
+	int ret_val;
+
+	ret_val = genl_register_family(&netlbl_cipsov4_gnl_family);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_add);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_remove);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_list);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_listall);
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
new file mode 100644
index 0000000000000..785f4960e0d3a
--- /dev/null
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -0,0 +1,253 @@
+/*
+ * NetLabel Unlabeled Support
+ *
+ * This file defines functions for dealing with unlabeled packets for the
+ * NetLabel system.  The NetLabel system manages static and dynamic label
+ * mappings for network protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <net/netlabel.h>
+#include <asm/bug.h>
+
+#include "netlabel_user.h"
+#include "netlabel_domainhash.h"
+#include "netlabel_unlabeled.h"
+
+/* Accept unlabeled packets flag */
+static atomic_t netlabel_unlabel_accept_flg = ATOMIC_INIT(0);
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_unlabel_gnl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_UNLABELED_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = 0,
+};
+
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_unlabel_accept - Handle an ACCEPT message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ACCEPT message and set the accept flag accordingly.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val;
+	struct nlattr *data = netlbl_netlink_payload_data(skb);
+	u32 value;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		return ret_val;
+
+	if (netlbl_netlink_payload_len(skb) == NETLBL_LEN_U32) {
+		value = nla_get_u32(data);
+		if (value == 1 || value == 0) {
+			atomic_set(&netlabel_unlabel_accept_flg, value);
+			netlbl_netlink_send_ack(info,
+						netlbl_unlabel_gnl_family.id,
+						NLBL_UNLABEL_C_ACK,
+						NETLBL_E_OK);
+			return 0;
+		}
+	}
+
+	netlbl_netlink_send_ack(info,
+				netlbl_unlabel_gnl_family.id,
+				NLBL_UNLABEL_C_ACK,
+				EINVAL);
+	return -EINVAL;
+}
+
+/**
+ * netlbl_unlabel_list - Handle a LIST message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LIST message and respond with the current status.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb;
+
+	ans_skb = netlbl_netlink_alloc_skb(0,
+					   GENL_HDRLEN + NETLBL_LEN_U32,
+					   GFP_KERNEL);
+	if (ans_skb == NULL)
+		goto list_failure;
+
+	if (netlbl_netlink_hdr_put(ans_skb,
+				   info->snd_pid,
+				   0,
+				   netlbl_unlabel_gnl_family.id,
+				   NLBL_UNLABEL_C_LIST) == NULL)
+		goto list_failure;
+
+	ret_val = nla_put_u32(ans_skb,
+			      NLA_U32,
+			      atomic_read(&netlabel_unlabel_accept_flg));
+	if (ret_val != 0)
+		goto list_failure;
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto list_failure;
+
+	return 0;
+
+list_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_unlabel_gnl_family.id,
+				NLBL_UNLABEL_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_unlabel_genl_c_accept = {
+	.cmd = NLBL_UNLABEL_C_ACCEPT,
+	.flags = 0,
+	.doit = netlbl_unlabel_accept,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_unlabel_genl_c_list = {
+	.cmd = NLBL_UNLABEL_C_LIST,
+	.flags = 0,
+	.doit = netlbl_unlabel_list,
+	.dumpit = NULL,
+};
+
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component
+ *
+ * Description:
+ * Register the unlabeled packet NetLabel component with the Generic NETLINK
+ * mechanism.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_unlabel_genl_init(void)
+{
+	int ret_val;
+
+	ret_val = genl_register_family(&netlbl_unlabel_gnl_family);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
+				    &netlbl_unlabel_genl_c_accept);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
+				    &netlbl_unlabel_genl_c_list);
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
+
+/*
+ * NetLabel KAPI Hooks
+ */
+
+/**
+ * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Determine the security attributes, if any, for an unlabled packet and return
+ * them in @secattr.  Returns zero on success and negative values on failure.
+ *
+ */
+int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr)
+{
+	if (atomic_read(&netlabel_unlabel_accept_flg) == 1) {
+		memset(secattr, 0, sizeof(*secattr));
+		return 0;
+	}
+
+	return -ENOMSG;
+}
+
+/**
+ * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets
+ *
+ * Description:
+ * Set the default NetLabel configuration to allow incoming unlabeled packets
+ * and to send unlabeled network traffic by default.
+ *
+ */
+int netlbl_unlabel_defconf(void)
+{
+	int ret_val;
+	struct netlbl_dom_map *entry;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry == NULL)
+		return -ENOMEM;
+	entry->type = NETLBL_NLTYPE_UNLABELED;
+	ret_val = netlbl_domhsh_add_default(entry);
+	if (ret_val != 0)
+		return ret_val;
+
+	atomic_set(&netlabel_unlabel_accept_flg, 1);
+
+	return 0;
+}
-- 
GitLab


From 7420ed23a4f77480b5b7b3245e5da30dd24b7575 Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Fri, 4 Aug 2006 23:17:57 -0700
Subject: [PATCH 0393/1063] [NetLabel]: SELinux support

Add NetLabel support to the SELinux LSM and modify the
socket_post_create() LSM hook to return an error code.  The most
significant part of this patch is the addition of NetLabel hooks into
the following SELinux LSM hooks:

 * selinux_file_permission()
 * selinux_socket_sendmsg()
 * selinux_socket_post_create()
 * selinux_socket_sock_rcv_skb()
 * selinux_socket_getpeersec_stream()
 * selinux_socket_getpeersec_dgram()
 * selinux_sock_graft()
 * selinux_inet_conn_request()

The basic reasoning behind this patch is that outgoing packets are
"NetLabel'd" by labeling their socket and the NetLabel security
attributes are checked via the additional hook in
selinux_socket_sock_rcv_skb().  NetLabel itself is only a labeling
mechanism, similar to filesystem extended attributes, it is up to the
SELinux enforcement mechanism to perform the actual access checks.

In addition to the changes outlined above this patch also includes
some changes to the extended bitmap (ebitmap) and multi-level security
(mls) code to import and export SELinux TE/MLS attributes into and out
of NetLabel.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/security.h                    |  25 +-
 net/socket.c                                |  13 +-
 security/dummy.c                            |   6 +-
 security/selinux/hooks.c                    |  56 ++-
 security/selinux/include/objsec.h           |   8 +
 security/selinux/include/selinux_netlabel.h | 125 +++++
 security/selinux/ss/ebitmap.c               | 144 ++++++
 security/selinux/ss/ebitmap.h               |   6 +
 security/selinux/ss/mls.c                   | 156 +++++++
 security/selinux/ss/mls.h                   |  21 +
 security/selinux/ss/services.c              | 488 ++++++++++++++++++++
 11 files changed, 1020 insertions(+), 28 deletions(-)
 create mode 100644 security/selinux/include/selinux_netlabel.h

diff --git a/include/linux/security.h b/include/linux/security.h
index bb4c80fdfe7a6..9f56fb8a4a6c3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1341,8 +1341,8 @@ struct security_operations {
 	int (*unix_may_send) (struct socket * sock, struct socket * other);
 
 	int (*socket_create) (int family, int type, int protocol, int kern);
-	void (*socket_post_create) (struct socket * sock, int family,
-				    int type, int protocol, int kern);
+	int (*socket_post_create) (struct socket * sock, int family,
+				   int type, int protocol, int kern);
 	int (*socket_bind) (struct socket * sock,
 			    struct sockaddr * address, int addrlen);
 	int (*socket_connect) (struct socket * sock,
@@ -2824,13 +2824,13 @@ static inline int security_socket_create (int family, int type,
 	return security_ops->socket_create(family, type, protocol, kern);
 }
 
-static inline void security_socket_post_create(struct socket * sock, 
-					       int family,
-					       int type, 
-					       int protocol, int kern)
+static inline int security_socket_post_create(struct socket * sock,
+					      int family,
+					      int type,
+					      int protocol, int kern)
 {
-	security_ops->socket_post_create(sock, family, type,
-					 protocol, kern);
+	return security_ops->socket_post_create(sock, family, type,
+						protocol, kern);
 }
 
 static inline int security_socket_bind(struct socket * sock, 
@@ -2982,11 +2982,12 @@ static inline int security_socket_create (int family, int type,
 	return 0;
 }
 
-static inline void security_socket_post_create(struct socket * sock, 
-					       int family,
-					       int type, 
-					       int protocol, int kern)
+static inline int security_socket_post_create(struct socket * sock,
+					      int family,
+					      int type,
+					      int protocol, int kern)
 {
+	return 0;
 }
 
 static inline int security_socket_bind(struct socket * sock, 
diff --git a/net/socket.c b/net/socket.c
index 6d261bf206fc8..6756e57e1ff0b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -973,11 +973,18 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res)
 		goto out;
 	}
 
-	security_socket_post_create(sock, family, type, protocol, 1);
 	sock->type = type;
+	err = security_socket_post_create(sock, family, type, protocol, 1);
+	if (err)
+		goto out_release;
+
 out:
 	*res = sock;
 	return err;
+out_release:
+	sock_release(sock);
+	sock = NULL;
+	goto out;
 }
 
 /* No kernel lock held - perfect */
@@ -1214,7 +1221,9 @@ static int __sock_create(int family, int type, int protocol, struct socket **res
 	 */
 	module_put(net_families[family]->owner);
 	*res = sock;
-	security_socket_post_create(sock, family, type, protocol, kern);
+	err = security_socket_post_create(sock, family, type, protocol, kern);
+	if (err)
+		goto out_release;
 
 out:
 	net_family_read_unlock();
diff --git a/security/dummy.c b/security/dummy.c
index 1c45f8e4aad15..aeee70565509d 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -709,10 +709,10 @@ static int dummy_socket_create (int family, int type,
 	return 0;
 }
 
-static void dummy_socket_post_create (struct socket *sock, int family, int type,
-				      int protocol, int kern)
+static int dummy_socket_post_create (struct socket *sock, int family, int type,
+				     int protocol, int kern)
 {
-	return;
+	return 0;
 }
 
 static int dummy_socket_bind (struct socket *sock, struct sockaddr *address,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 33028b3b19ce1..2a6bbb921e1ed 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -12,6 +12,8 @@
  *  Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com>
  *  Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
  *                          <dgoeddel@trustedcs.com>
+ *  Copyright (C) 2006 Hewlett-Packard Development Company, L.P.
+ *                     Paul Moore, <paul.moore@hp.com>
  *
  *	This program is free software; you can redistribute it and/or modify
  *	it under the terms of the GNU General Public License version 2,
@@ -74,6 +76,7 @@
 #include "objsec.h"
 #include "netif.h"
 #include "xfrm.h"
+#include "selinux_netlabel.h"
 
 #define XATTR_SELINUX_SUFFIX "selinux"
 #define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX
@@ -2395,6 +2398,7 @@ static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t
 
 static int selinux_file_permission(struct file *file, int mask)
 {
+	int rc;
 	struct inode *inode = file->f_dentry->d_inode;
 
 	if (!mask) {
@@ -2406,8 +2410,12 @@ static int selinux_file_permission(struct file *file, int mask)
 	if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE))
 		mask |= MAY_APPEND;
 
-	return file_has_perm(current, file,
-			     file_mask_to_av(inode->i_mode, mask));
+	rc = file_has_perm(current, file,
+			   file_mask_to_av(inode->i_mode, mask));
+	if (rc)
+		return rc;
+
+	return selinux_netlbl_inode_permission(inode, mask);
 }
 
 static int selinux_file_alloc_security(struct file *file)
@@ -3058,9 +3066,10 @@ static int selinux_socket_create(int family, int type,
 	return err;
 }
 
-static void selinux_socket_post_create(struct socket *sock, int family,
-				       int type, int protocol, int kern)
+static int selinux_socket_post_create(struct socket *sock, int family,
+				      int type, int protocol, int kern)
 {
+	int err = 0;
 	struct inode_security_struct *isec;
 	struct task_security_struct *tsec;
 	struct sk_security_struct *sksec;
@@ -3077,9 +3086,12 @@ static void selinux_socket_post_create(struct socket *sock, int family,
 	if (sock->sk) {
 		sksec = sock->sk->sk_security;
 		sksec->sid = isec->sid;
+		err = selinux_netlbl_socket_post_create(sock,
+							family,
+							isec->sid);
 	}
 
-	return;
+	return err;
 }
 
 /* Range of port numbers used to automatically bind.
@@ -3260,7 +3272,13 @@ static int selinux_socket_accept(struct socket *sock, struct socket *newsock)
 static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg,
  				  int size)
 {
-	return socket_has_perm(current, sock, SOCKET__WRITE);
+	int rc;
+
+	rc = socket_has_perm(current, sock, SOCKET__WRITE);
+	if (rc)
+		return rc;
+
+	return selinux_netlbl_inode_permission(SOCK_INODE(sock), MAY_WRITE);
 }
 
 static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg,
@@ -3468,6 +3486,10 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (err)
 		goto out;
 
+	err = selinux_netlbl_sock_rcv_skb(sksec, skb, &ad);
+	if (err)
+		goto out;
+
 	err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad);
 out:	
 	return err;
@@ -3491,8 +3513,9 @@ static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *op
 		peer_sid = ssec->peer_sid;
 	}
 	else if (isec->sclass == SECCLASS_TCP_SOCKET) {
-		peer_sid = selinux_socket_getpeer_stream(sock->sk);
-
+		peer_sid = selinux_netlbl_socket_getpeersec_stream(sock);
+		if (peer_sid == SECSID_NULL)
+			peer_sid = selinux_socket_getpeer_stream(sock->sk);
 		if (peer_sid == SECSID_NULL) {
 			err = -ENOPROTOOPT;
 			goto out;
@@ -3532,8 +3555,11 @@ static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *
 
 	if (sock && (sock->sk->sk_family == PF_UNIX))
 		selinux_get_inode_sid(SOCK_INODE(sock), &peer_secid);
-	else if (skb)
-		peer_secid = selinux_socket_getpeer_dgram(skb);
+	else if (skb) {
+		peer_secid = selinux_netlbl_socket_getpeersec_dgram(skb);
+		if (peer_secid == SECSID_NULL)
+			peer_secid = selinux_socket_getpeer_dgram(skb);
+	}
 
 	if (peer_secid == SECSID_NULL)
 		err = -EINVAL;
@@ -3578,6 +3604,8 @@ void selinux_sock_graft(struct sock* sk, struct socket *parent)
 	struct sk_security_struct *sksec = sk->sk_security;
 
 	isec->sid = sksec->sid;
+
+	selinux_netlbl_sock_graft(sk, parent);
 }
 
 int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
@@ -3585,9 +3613,15 @@ int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 {
 	struct sk_security_struct *sksec = sk->sk_security;
 	int err;
-	u32 newsid = 0;
+	u32 newsid;
 	u32 peersid;
 
+	newsid = selinux_netlbl_inet_conn_request(skb, sksec->sid);
+	if (newsid != SECSID_NULL) {
+		req->secid = newsid;
+		return 0;
+	}
+
 	err = selinux_xfrm_decode_session(skb, &peersid, 0);
 	BUG_ON(err);
 
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 79b9e0af19a0c..0a39bfd1319fe 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -101,6 +101,14 @@ struct sk_security_struct {
 	struct sock *sk;		/* back pointer to sk object */
 	u32 sid;			/* SID of this object */
 	u32 peer_sid;			/* SID of peer */
+#ifdef CONFIG_NETLABEL
+	u16 sclass;			/* sock security class */
+	enum {				/* NetLabel state */
+		NLBL_UNSET = 0,
+		NLBL_REQUIRE,
+		NLBL_LABELED,
+	} nlbl_state;
+#endif
 };
 
 struct key_security_struct {
diff --git a/security/selinux/include/selinux_netlabel.h b/security/selinux/include/selinux_netlabel.h
new file mode 100644
index 0000000000000..88c463eef1e1b
--- /dev/null
+++ b/security/selinux/include/selinux_netlabel.h
@@ -0,0 +1,125 @@
+/*
+ * SELinux interface to the NetLabel subsystem
+ *
+ * Author : Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _SELINUX_NETLABEL_H_
+#define _SELINUX_NETLABEL_H_
+
+#ifdef CONFIG_NETLABEL
+void selinux_netlbl_cache_invalidate(void);
+int selinux_netlbl_socket_post_create(struct socket *sock,
+				      int sock_family,
+				      u32 sid);
+void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock);
+u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid);
+int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
+				struct sk_buff *skb,
+				struct avc_audit_data *ad);
+u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock);
+u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb);
+
+int __selinux_netlbl_inode_permission(struct inode *inode, int mask);
+/**
+ * selinux_netlbl_inode_permission - Verify the socket is NetLabel labeled
+ * @inode: the file descriptor's inode
+ * @mask: the permission mask
+ *
+ * Description:
+ * Looks at a file's inode and if it is marked as a socket protected by
+ * NetLabel then verify that the socket has been labeled, if not try to label
+ * the socket now with the inode's SID.  Returns zero on success, negative
+ * values on failure.
+ *
+ */
+static inline int selinux_netlbl_inode_permission(struct inode *inode,
+						  int mask)
+{
+	int rc = 0;
+	struct inode_security_struct *isec;
+	struct sk_security_struct *sksec;
+
+	if (!S_ISSOCK(inode->i_mode))
+		return 0;
+
+	isec = inode->i_security;
+	sksec = SOCKET_I(inode)->sk->sk_security;
+	down(&isec->sem);
+	if (unlikely(sksec->nlbl_state == NLBL_REQUIRE &&
+		     (mask & (MAY_WRITE | MAY_APPEND))))
+		rc = __selinux_netlbl_inode_permission(inode, mask);
+	up(&isec->sem);
+
+	return rc;
+}
+#else
+static inline void selinux_netlbl_cache_invalidate(void)
+{
+	return;
+}
+
+static inline int selinux_netlbl_socket_post_create(struct socket *sock,
+						    int sock_family,
+						    u32 sid)
+{
+	return 0;
+}
+
+static inline void selinux_netlbl_sock_graft(struct sock *sk,
+					     struct socket *sock)
+{
+	return;
+}
+
+static inline u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb,
+						   u32 sock_sid)
+{
+	return SECSID_NULL;
+}
+
+static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
+					      struct sk_buff *skb,
+					      struct avc_audit_data *ad)
+{
+	return 0;
+}
+
+static inline u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock)
+{
+	return SECSID_NULL;
+}
+
+static inline u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb)
+{
+	return SECSID_NULL;
+}
+
+static inline int selinux_netlbl_inode_permission(struct inode *inode,
+						  int mask)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+#endif
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index 47024a6e18440..4b915eb60c45d 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -3,6 +3,14 @@
  *
  * Author : Stephen Smalley, <sds@epoch.ncsc.mil>
  */
+/*
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added ebitmap_export() and ebitmap_import()
+ *
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ */
+
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
@@ -59,6 +67,142 @@ int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src)
 	return 0;
 }
 
+/**
+ * ebitmap_export - Export an ebitmap to a unsigned char bitmap string
+ * @src: the ebitmap to export
+ * @dst: the resulting bitmap string
+ * @dst_len: length of dst in bytes
+ *
+ * Description:
+ * Allocate a buffer at least src->highbit bits long and export the extensible
+ * bitmap into the buffer.  The bitmap string will be in little endian format,
+ * i.e. LSB first.  The value returned in dst_len may not the true size of the
+ * buffer as the length of the buffer is rounded up to a multiple of MAPTYPE.
+ * The caller must free the buffer when finished. Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int ebitmap_export(const struct ebitmap *src,
+		   unsigned char **dst,
+		   size_t *dst_len)
+{
+	size_t bitmap_len;
+	unsigned char *bitmap;
+	struct ebitmap_node *iter_node;
+	MAPTYPE node_val;
+	size_t bitmap_byte;
+	unsigned char bitmask;
+
+	bitmap_len = src->highbit / 8;
+	if (src->highbit % 7)
+		bitmap_len += 1;
+	if (bitmap_len == 0)
+		return -EINVAL;
+
+	bitmap = kzalloc((bitmap_len & ~(sizeof(MAPTYPE) - 1)) +
+			 sizeof(MAPTYPE),
+			 GFP_ATOMIC);
+	if (bitmap == NULL)
+		return -ENOMEM;
+
+	iter_node = src->node;
+	do {
+		bitmap_byte = iter_node->startbit / 8;
+		bitmask = 0x80;
+		node_val = iter_node->map;
+		do {
+			if (bitmask == 0) {
+				bitmap_byte++;
+				bitmask = 0x80;
+			}
+			if (node_val & (MAPTYPE)0x01)
+				bitmap[bitmap_byte] |= bitmask;
+			node_val >>= 1;
+			bitmask >>= 1;
+		} while (node_val > 0);
+		iter_node = iter_node->next;
+	} while (iter_node);
+
+	*dst = bitmap;
+	*dst_len = bitmap_len;
+	return 0;
+}
+
+/**
+ * ebitmap_import - Import an unsigned char bitmap string into an ebitmap
+ * @src: the bitmap string
+ * @src_len: the bitmap length in bytes
+ * @dst: the empty ebitmap
+ *
+ * Description:
+ * This function takes a little endian bitmap string in src and imports it into
+ * the ebitmap pointed to by dst.  Returns zero on success, negative values on
+ * failure.
+ *
+ */
+int ebitmap_import(const unsigned char *src,
+		   size_t src_len,
+		   struct ebitmap *dst)
+{
+	size_t src_off = 0;
+	struct ebitmap_node *node_new;
+	struct ebitmap_node *node_last = NULL;
+	size_t iter;
+	size_t iter_bit;
+	size_t iter_limit;
+	unsigned char src_byte;
+
+	do {
+		iter_limit = src_len - src_off;
+		if (iter_limit >= sizeof(MAPTYPE)) {
+			if (*(MAPTYPE *)&src[src_off] == 0) {
+				src_off += sizeof(MAPTYPE);
+				continue;
+			}
+			iter_limit = sizeof(MAPTYPE);
+		} else {
+			iter = src_off;
+			src_byte = 0;
+			do {
+				src_byte |= src[iter++];
+			} while (iter < src_len && src_byte == 0);
+			if (src_byte == 0)
+				break;
+		}
+
+		node_new = kzalloc(sizeof(*node_new), GFP_ATOMIC);
+		if (unlikely(node_new == NULL)) {
+			ebitmap_destroy(dst);
+			return -ENOMEM;
+		}
+		node_new->startbit = src_off * 8;
+		iter = 0;
+		do {
+			src_byte = src[src_off++];
+			iter_bit = iter++ * 8;
+			while (src_byte != 0) {
+				if (src_byte & 0x80)
+					node_new->map |= MAPBIT << iter_bit;
+				iter_bit++;
+				src_byte <<= 1;
+			}
+		} while (iter < iter_limit);
+
+		if (node_last != NULL)
+			node_last->next = node_new;
+		else
+			dst->node = node_new;
+		node_last = node_new;
+	} while (src_off < src_len);
+
+	if (likely(node_last != NULL))
+		dst->highbit = node_last->startbit + MAPSIZE;
+	else
+		ebitmap_init(dst);
+
+	return 0;
+}
+
 int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2)
 {
 	struct ebitmap_node *n1, *n2;
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h
index 8bf41055a6cb7..da2d4651b10d4 100644
--- a/security/selinux/ss/ebitmap.h
+++ b/security/selinux/ss/ebitmap.h
@@ -69,6 +69,12 @@ static inline int ebitmap_node_get_bit(struct ebitmap_node * n,
 
 int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2);
 int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src);
+int ebitmap_export(const struct ebitmap *src,
+		   unsigned char **dst,
+		   size_t *dst_len);
+int ebitmap_import(const unsigned char *src,
+		   size_t src_len,
+		   struct ebitmap *dst);
 int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2);
 int ebitmap_get_bit(struct ebitmap *e, unsigned long bit);
 int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value);
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index e15f7e0399b8e..119bd6078ba12 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -10,6 +10,13 @@
  *
  * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  */
+/*
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added support to import/export the MLS label
+ *
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ */
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -565,3 +572,152 @@ int mls_compute_sid(struct context *scontext,
 	return -EINVAL;
 }
 
+/**
+ * mls_export_lvl - Export the MLS sensitivity levels
+ * @context: the security context
+ * @low: the low sensitivity level
+ * @high: the high sensitivity level
+ *
+ * Description:
+ * Given the security context copy the low MLS sensitivity level into lvl_low
+ * and the high sensitivity level in lvl_high.  The MLS levels are only
+ * exported if the pointers are not NULL, if they are NULL then that level is
+ * not exported.
+ *
+ */
+void mls_export_lvl(const struct context *context, u32 *low, u32 *high)
+{
+	if (!selinux_mls_enabled)
+		return;
+
+	if (low != NULL)
+		*low = context->range.level[0].sens - 1;
+	if (high != NULL)
+		*high = context->range.level[1].sens - 1;
+}
+
+/**
+ * mls_import_lvl - Import the MLS sensitivity levels
+ * @context: the security context
+ * @low: the low sensitivity level
+ * @high: the high sensitivity level
+ *
+ * Description:
+ * Given the security context and the two sensitivty levels, set the MLS levels
+ * in the context according the two given as parameters.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+void mls_import_lvl(struct context *context, u32 low, u32 high)
+{
+	if (!selinux_mls_enabled)
+		return;
+
+	context->range.level[0].sens = low + 1;
+	context->range.level[1].sens = high + 1;
+}
+
+/**
+ * mls_export_cat - Export the MLS categories
+ * @context: the security context
+ * @low: the low category
+ * @low_len: length of the cat_low bitmap in bytes
+ * @high: the high category
+ * @high_len: length of the cat_high bitmap in bytes
+ *
+ * Description:
+ * Given the security context export the low MLS category bitmap into cat_low
+ * and the high category bitmap into cat_high.  The MLS categories are only
+ * exported if the pointers are not NULL, if they are NULL then that level is
+ * not exported.  The caller is responsibile for freeing the memory when
+ * finished.  Returns zero on success, negative values on failure.
+ *
+ */
+int mls_export_cat(const struct context *context,
+		   unsigned char **low,
+		   size_t *low_len,
+		   unsigned char **high,
+		   size_t *high_len)
+{
+	int rc = -EPERM;
+
+	if (!selinux_mls_enabled)
+		return 0;
+
+	if (low != NULL) {
+		rc = ebitmap_export(&context->range.level[0].cat,
+				    low,
+				    low_len);
+		if (rc != 0)
+			goto export_cat_failure;
+	}
+	if (high != NULL) {
+		rc = ebitmap_export(&context->range.level[1].cat,
+				    high,
+				    high_len);
+		if (rc != 0)
+			goto export_cat_failure;
+	}
+
+	return 0;
+
+export_cat_failure:
+	if (low != NULL)
+		kfree(*low);
+	if (high != NULL)
+		kfree(*high);
+	return rc;
+}
+
+/**
+ * mls_import_cat - Import the MLS categories
+ * @context: the security context
+ * @low: the low category
+ * @low_len: length of the cat_low bitmap in bytes
+ * @high: the high category
+ * @high_len: length of the cat_high bitmap in bytes
+ *
+ * Description:
+ * Given the security context and the two category bitmap strings import the
+ * categories into the security context.  The MLS categories are only imported
+ * if the pointers are not NULL, if they are NULL they are skipped.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+int mls_import_cat(struct context *context,
+		   const unsigned char *low,
+		   size_t low_len,
+		   const unsigned char *high,
+		   size_t high_len)
+{
+	int rc = -EPERM;
+
+	if (!selinux_mls_enabled)
+		return 0;
+
+	if (low != NULL) {
+		rc = ebitmap_import(low,
+				    low_len,
+				    &context->range.level[0].cat);
+		if (rc != 0)
+			goto import_cat_failure;
+	}
+	if (high != NULL) {
+		if (high == low)
+			rc = ebitmap_cpy(&context->range.level[1].cat,
+					 &context->range.level[0].cat);
+		else
+			rc = ebitmap_import(high,
+					    high_len,
+					    &context->range.level[1].cat);
+		if (rc != 0)
+			goto import_cat_failure;
+	}
+
+	return 0;
+
+import_cat_failure:
+	ebitmap_destroy(&context->range.level[0].cat);
+	ebitmap_destroy(&context->range.level[1].cat);
+	return rc;
+}
diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h
index 90c5e88987fa8..df6032c6d4926 100644
--- a/security/selinux/ss/mls.h
+++ b/security/selinux/ss/mls.h
@@ -10,6 +10,13 @@
  *
  * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  */
+/*
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added support to import/export the MLS label
+ *
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ */
 
 #ifndef _SS_MLS_H_
 #define _SS_MLS_H_
@@ -62,5 +69,19 @@ int mls_compute_sid(struct context *scontext,
 int mls_setup_user_range(struct context *fromcon, struct user_datum *user,
                          struct context *usercon);
 
+void mls_export_lvl(const struct context *context, u32 *low, u32 *high);
+void mls_import_lvl(struct context *context, u32 low, u32 high);
+
+int mls_export_cat(const struct context *context,
+		   unsigned char **low,
+		   size_t *low_len,
+		   unsigned char **high,
+		   size_t *high_len);
+int mls_import_cat(struct context *context,
+		   const unsigned char *low,
+		   size_t low_len,
+		   const unsigned char *high,
+		   size_t high_len);
+
 #endif	/* _SS_MLS_H */
 
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index b00ec69f0ffd5..910afa1ffc31c 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -13,6 +13,11 @@
  *
  * 	Added conditional policy language extensions
  *
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added support for NetLabel
+ *
+ * Copyright (C) 2006 Hewlett-Packard Development Company, L.P.
  * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  * Copyright (C) 2003 - 2004 Tresys Technology, LLC
  * Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com>
@@ -29,6 +34,8 @@
 #include <linux/sched.h>
 #include <linux/audit.h>
 #include <linux/mutex.h>
+#include <net/sock.h>
+#include <net/netlabel.h>
 
 #include "flask.h"
 #include "avc.h"
@@ -40,6 +47,8 @@
 #include "services.h"
 #include "conditional.h"
 #include "mls.h"
+#include "objsec.h"
+#include "selinux_netlabel.h"
 
 extern void selnl_notify_policyload(u32 seqno);
 unsigned int policydb_loaded_version;
@@ -1241,6 +1250,7 @@ int security_load_policy(void *data, size_t len)
 		selinux_complete_init();
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_netlbl_cache_invalidate();
 		return 0;
 	}
 
@@ -1295,6 +1305,7 @@ int security_load_policy(void *data, size_t len)
 
 	avc_ss_reset(seqno);
 	selnl_notify_policyload(seqno);
+	selinux_netlbl_cache_invalidate();
 
 	return 0;
 
@@ -2133,3 +2144,480 @@ void selinux_audit_set_callback(int (*callback)(void))
 {
 	aurule_callback = callback;
 }
+
+#ifdef CONFIG_NETLABEL
+/*
+ * This is the structure we store inside the NetLabel cache block.
+ */
+#define NETLBL_CACHE(x)           ((struct netlbl_cache *)(x))
+#define NETLBL_CACHE_T_NONE       0
+#define NETLBL_CACHE_T_SID        1
+#define NETLBL_CACHE_T_MLS        2
+struct netlbl_cache {
+	u32 type;
+	union {
+		u32 sid;
+		struct mls_range mls_label;
+	} data;
+};
+
+/**
+ * selinux_netlbl_cache_free - Free the NetLabel cached data
+ * @data: the data to free
+ *
+ * Description:
+ * This function is intended to be used as the free() callback inside the
+ * netlbl_lsm_cache structure.
+ *
+ */
+static void selinux_netlbl_cache_free(const void *data)
+{
+	struct netlbl_cache *cache = NETLBL_CACHE(data);
+	switch (cache->type) {
+	case NETLBL_CACHE_T_MLS:
+		ebitmap_destroy(&cache->data.mls_label.level[0].cat);
+		break;
+	}
+	kfree(data);
+}
+
+/**
+ * selinux_netlbl_cache_add - Add an entry to the NetLabel cache
+ * @skb: the packet
+ * @ctx: the SELinux context
+ *
+ * Description:
+ * Attempt to cache the context in @ctx, which was derived from the packet in
+ * @skb, in the NetLabel subsystem cache.
+ *
+ */
+static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx)
+{
+	struct netlbl_cache *cache = NULL;
+	struct netlbl_lsm_secattr secattr;
+
+	netlbl_secattr_init(&secattr);
+
+	cache = kzalloc(sizeof(*cache),	GFP_ATOMIC);
+	if (cache == NULL)
+		goto netlbl_cache_add_failure;
+	secattr.cache.free = selinux_netlbl_cache_free;
+	secattr.cache.data = (void *)cache;
+
+	cache->type = NETLBL_CACHE_T_MLS;
+	if (ebitmap_cpy(&cache->data.mls_label.level[0].cat,
+			&ctx->range.level[0].cat) != 0)
+		goto netlbl_cache_add_failure;
+	cache->data.mls_label.level[1].cat.highbit =
+		cache->data.mls_label.level[0].cat.highbit;
+	cache->data.mls_label.level[1].cat.node =
+		cache->data.mls_label.level[0].cat.node;
+	cache->data.mls_label.level[0].sens = ctx->range.level[0].sens;
+	cache->data.mls_label.level[1].sens = ctx->range.level[0].sens;
+
+	if (netlbl_cache_add(skb, &secattr) != 0)
+		goto netlbl_cache_add_failure;
+
+	return;
+
+netlbl_cache_add_failure:
+	netlbl_secattr_destroy(&secattr, 1);
+}
+
+/**
+ * selinux_netlbl_cache_invalidate - Invalidate the NetLabel cache
+ *
+ * Description:
+ * Invalidate the NetLabel security attribute mapping cache.
+ *
+ */
+void selinux_netlbl_cache_invalidate(void)
+{
+	netlbl_cache_invalidate();
+}
+
+/**
+ * selinux_netlbl_secattr_to_sid - Convert a NetLabel secattr to a SELinux SID
+ * @skb: the network packet
+ * @secattr: the NetLabel packet security attributes
+ * @base_sid: the SELinux SID to use as a context for MLS only attributes
+ * @sid: the SELinux SID
+ *
+ * Description:
+ * Convert the given NetLabel packet security attributes in @secattr into a
+ * SELinux SID.  If the @secattr field does not contain a full SELinux
+ * SID/context then use the context in @base_sid as the foundation.  If @skb
+ * is not NULL attempt to cache as much data as possibile.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int selinux_netlbl_secattr_to_sid(struct sk_buff *skb,
+					 struct netlbl_lsm_secattr *secattr,
+					 u32 base_sid,
+					 u32 *sid)
+{
+	int rc = -EIDRM;
+	struct context *ctx;
+	struct context ctx_new;
+	struct netlbl_cache *cache;
+
+	POLICY_RDLOCK;
+
+	if (secattr->cache.data) {
+		cache = NETLBL_CACHE(secattr->cache.data);
+		switch (cache->type) {
+		case NETLBL_CACHE_T_SID:
+			*sid = cache->data.sid;
+			rc = 0;
+			break;
+		case NETLBL_CACHE_T_MLS:
+			ctx = sidtab_search(&sidtab, base_sid);
+			if (ctx == NULL)
+				goto netlbl_secattr_to_sid_return;
+
+			ctx_new.user = ctx->user;
+			ctx_new.role = ctx->role;
+			ctx_new.type = ctx->type;
+			ctx_new.range.level[0].sens =
+				cache->data.mls_label.level[0].sens;
+			ctx_new.range.level[0].cat.highbit =
+				cache->data.mls_label.level[0].cat.highbit;
+			ctx_new.range.level[0].cat.node =
+				cache->data.mls_label.level[0].cat.node;
+			ctx_new.range.level[1].sens =
+				cache->data.mls_label.level[1].sens;
+			ctx_new.range.level[1].cat.highbit =
+				cache->data.mls_label.level[1].cat.highbit;
+			ctx_new.range.level[1].cat.node =
+				cache->data.mls_label.level[1].cat.node;
+
+			rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid);
+			break;
+		default:
+			goto netlbl_secattr_to_sid_return;
+		}
+	} else if (secattr->mls_lvl_vld) {
+		ctx = sidtab_search(&sidtab, base_sid);
+		if (ctx == NULL)
+			goto netlbl_secattr_to_sid_return;
+
+		ctx_new.user = ctx->user;
+		ctx_new.role = ctx->role;
+		ctx_new.type = ctx->type;
+		mls_import_lvl(&ctx_new, secattr->mls_lvl, secattr->mls_lvl);
+		if (secattr->mls_cat) {
+			if (mls_import_cat(&ctx_new,
+					   secattr->mls_cat,
+					   secattr->mls_cat_len,
+					   NULL,
+					   0) != 0)
+				goto netlbl_secattr_to_sid_return;
+			ctx_new.range.level[1].cat.highbit =
+				ctx_new.range.level[0].cat.highbit;
+			ctx_new.range.level[1].cat.node =
+				ctx_new.range.level[0].cat.node;
+		} else {
+			ebitmap_init(&ctx_new.range.level[0].cat);
+			ebitmap_init(&ctx_new.range.level[1].cat);
+		}
+		if (mls_context_isvalid(&policydb, &ctx_new) != 1)
+			goto netlbl_secattr_to_sid_return_cleanup;
+
+		rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid);
+		if (rc != 0)
+			goto netlbl_secattr_to_sid_return_cleanup;
+
+		if (skb != NULL)
+			selinux_netlbl_cache_add(skb, &ctx_new);
+		ebitmap_destroy(&ctx_new.range.level[0].cat);
+	} else {
+		*sid = SECINITSID_UNLABELED;
+		rc = 0;
+	}
+
+netlbl_secattr_to_sid_return:
+	POLICY_RDUNLOCK;
+	return rc;
+netlbl_secattr_to_sid_return_cleanup:
+	ebitmap_destroy(&ctx_new.range.level[0].cat);
+	goto netlbl_secattr_to_sid_return;
+}
+
+/**
+ * selinux_netlbl_skbuff_getsid - Get the sid of a packet using NetLabel
+ * @skb: the packet
+ * @base_sid: the SELinux SID to use as a context for MLS only attributes
+ * @sid: the SID
+ *
+ * Description:
+ * Call the NetLabel mechanism to get the security attributes of the given
+ * packet and use those attributes to determine the correct context/SID to
+ * assign to the packet.  Returns zero on success, negative values on failure.
+ *
+ */
+static int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
+					u32 base_sid,
+					u32 *sid)
+{
+	int rc;
+	struct netlbl_lsm_secattr secattr;
+
+	netlbl_secattr_init(&secattr);
+	rc = netlbl_skbuff_getattr(skb, &secattr);
+	if (rc == 0)
+		rc = selinux_netlbl_secattr_to_sid(skb,
+						   &secattr,
+						   base_sid,
+						   sid);
+	netlbl_secattr_destroy(&secattr, 0);
+
+	return rc;
+}
+
+/**
+ * selinux_netlbl_socket_setsid - Label a socket using the NetLabel mechanism
+ * @sock: the socket to label
+ * @sid: the SID to use
+ *
+ * Description:
+ * Attempt to label a socket using the NetLabel mechanism using the given
+ * SID.  Returns zero values on success, negative values on failure.
+ *
+ */
+static int selinux_netlbl_socket_setsid(struct socket *sock, u32 sid)
+{
+	int rc = -ENOENT;
+	struct sk_security_struct *sksec = sock->sk->sk_security;
+	struct netlbl_lsm_secattr secattr;
+	struct context *ctx;
+
+	if (!ss_initialized)
+		return 0;
+
+	POLICY_RDLOCK;
+
+	ctx = sidtab_search(&sidtab, sid);
+	if (ctx == NULL)
+		goto netlbl_socket_setsid_return;
+
+	netlbl_secattr_init(&secattr);
+	secattr.domain = kstrdup(policydb.p_type_val_to_name[ctx->type - 1],
+				 GFP_ATOMIC);
+	mls_export_lvl(ctx, &secattr.mls_lvl, NULL);
+	secattr.mls_lvl_vld = 1;
+	mls_export_cat(ctx,
+		       &secattr.mls_cat,
+		       &secattr.mls_cat_len,
+		       NULL,
+		       NULL);
+
+	rc = netlbl_socket_setattr(sock, &secattr);
+	if (rc == 0)
+		sksec->nlbl_state = NLBL_LABELED;
+
+	netlbl_secattr_destroy(&secattr, 0);
+
+netlbl_socket_setsid_return:
+	POLICY_RDUNLOCK;
+	return rc;
+}
+
+/**
+ * selinux_netlbl_socket_post_create - Label a socket using NetLabel
+ * @sock: the socket to label
+ * @sock_family: the socket family
+ * @sid: the SID to use
+ *
+ * Description:
+ * Attempt to label a socket using the NetLabel mechanism using the given
+ * SID.  Returns zero values on success, negative values on failure.
+ *
+ */
+int selinux_netlbl_socket_post_create(struct socket *sock,
+				      int sock_family,
+				      u32 sid)
+{
+	struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
+	struct sk_security_struct *sksec = sock->sk->sk_security;
+
+	if (sock_family != PF_INET)
+		return 0;
+
+	sksec->sclass = isec->sclass;
+	sksec->nlbl_state = NLBL_REQUIRE;
+	return selinux_netlbl_socket_setsid(sock, sid);
+}
+
+/**
+ * selinux_netlbl_sock_graft - Netlabel the new socket
+ * @sk: the new connection
+ * @sock: the new socket
+ *
+ * Description:
+ * The connection represented by @sk is being grafted onto @sock so set the
+ * socket's NetLabel to match the SID of @sk.
+ *
+ */
+void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
+{
+	struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
+	struct sk_security_struct *sksec = sk->sk_security;
+
+	if (sk->sk_family != PF_INET)
+		return;
+
+	sksec->nlbl_state = NLBL_REQUIRE;
+	sksec->peer_sid = sksec->sid;
+	sksec->sclass = isec->sclass;
+
+	/* Try to set the NetLabel on the socket to save time later, if we fail
+	 * here we will pick up the pieces in later calls to
+	 * selinux_netlbl_inode_permission(). */
+	selinux_netlbl_socket_setsid(sock, sksec->sid);
+}
+
+/**
+ * selinux_netlbl_inet_conn_request - Handle a new connection request
+ * @skb: the packet
+ * @sock_sid: the SID of the parent socket
+ *
+ * Description:
+ * If present, use the security attributes of the packet in @skb and the
+ * parent sock's SID to arrive at a SID for the new child sock.  Returns the
+ * SID of the connection or SECSID_NULL on failure.
+ *
+ */
+u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid)
+{
+	int rc;
+	u32 peer_sid;
+
+	rc = selinux_netlbl_skbuff_getsid(skb, sock_sid, &peer_sid);
+	if (rc != 0)
+		return SECSID_NULL;
+
+	if (peer_sid == SECINITSID_UNLABELED)
+		return SECSID_NULL;
+
+	return peer_sid;
+}
+
+/**
+ * __selinux_netlbl_inode_permission - Label a socket using NetLabel
+ * @inode: the file descriptor's inode
+ * @mask: the permission mask
+ *
+ * Description:
+ * Try to label a socket with the inode's SID using NetLabel.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+int __selinux_netlbl_inode_permission(struct inode *inode, int mask)
+{
+	int rc;
+	struct socket *sock = SOCKET_I(inode);
+	struct sk_security_struct *sksec = sock->sk->sk_security;
+
+	lock_sock(sock->sk);
+	rc = selinux_netlbl_socket_setsid(sock, sksec->sid);
+	release_sock(sock->sk);
+
+	return rc;
+}
+
+/**
+ * selinux_netlbl_sock_rcv_skb - Do an inbound access check using NetLabel
+ * @sksec: the sock's sk_security_struct
+ * @skb: the packet
+ * @ad: the audit data
+ *
+ * Description:
+ * Fetch the NetLabel security attributes from @skb and perform an access check
+ * against the receiving socket.  Returns zero on success, negative values on
+ * error.
+ *
+ */
+int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
+				struct sk_buff *skb,
+				struct avc_audit_data *ad)
+{
+	int rc;
+	u32 netlbl_sid;
+	u32 recv_perm;
+
+	rc = selinux_netlbl_skbuff_getsid(skb, sksec->sid, &netlbl_sid);
+	if (rc != 0)
+		return rc;
+
+	if (netlbl_sid == SECINITSID_UNLABELED)
+		return 0;
+
+	switch (sksec->sclass) {
+	case SECCLASS_UDP_SOCKET:
+		recv_perm = UDP_SOCKET__RECV_MSG;
+		break;
+	case SECCLASS_TCP_SOCKET:
+		recv_perm = TCP_SOCKET__RECV_MSG;
+		break;
+	default:
+		recv_perm = RAWIP_SOCKET__RECV_MSG;
+	}
+
+	rc = avc_has_perm(sksec->sid,
+			  netlbl_sid,
+			  sksec->sclass,
+			  recv_perm,
+			  ad);
+	if (rc == 0)
+		return 0;
+
+	netlbl_skbuff_err(skb, rc);
+	return rc;
+}
+
+/**
+ * selinux_netlbl_socket_peersid - Return the peer SID of a connected socket
+ * @sock: the socket
+ *
+ * Description:
+ * Examine @sock to find the connected peer's SID.  Returns the SID on success
+ * or SECSID_NULL on error.
+ *
+ */
+u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock)
+{
+	struct sk_security_struct *sksec = sock->sk->sk_security;
+
+	if (sksec->peer_sid == SECINITSID_UNLABELED)
+		return SECSID_NULL;
+
+	return sksec->peer_sid;
+}
+
+/**
+ * selinux_netlbl_socket_getpeersec_dgram - Return the SID of a NetLabel packet
+ * @skb: the packet
+ *
+ * Description:
+ * Examine @skb to find the SID assigned to it by NetLabel.  Returns the SID on
+ * success, SECSID_NULL on error.
+ *
+ */
+u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb)
+{
+	int peer_sid;
+	struct sock *sk = skb->sk;
+	struct inode_security_struct *isec;
+
+	if (sk == NULL || sk->sk_socket == NULL)
+		return SECSID_NULL;
+
+	isec = SOCK_INODE(sk->sk_socket)->i_security;
+	if (selinux_netlbl_skbuff_getsid(skb, isec->sid, &peer_sid) != 0)
+		return SECSID_NULL;
+	if (peer_sid == SECINITSID_UNLABELED)
+		return SECSID_NULL;
+
+	return peer_sid;
+}
+#endif /* CONFIG_NETLABEL */
-- 
GitLab


From 8161327311fe4da1684ed08015e141feb9a0a737 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Thu, 3 Aug 2006 16:50:39 -0700
Subject: [PATCH 0394/1063] [NetLabel]: tie NetLabel into the Kconfig system

Modify the net/Kconfig file to enable selecting the NetLabel Kconfig
options.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/Kconfig b/net/Kconfig
index 4959a4e1e0fec..eb855b7fa642e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -249,6 +249,8 @@ source "net/ieee80211/Kconfig"
 config WIRELESS_EXT
 	bool
 
+source "net/netlabel/Kconfig"
+
 endif   # if NET
 endmenu # Networking
 
-- 
GitLab


From 5d0bbeeb144f631150881712607345c532e38e7e Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 03:37:36 -0700
Subject: [PATCH 0395/1063] [IPV6]: Remove ndiscs rt6_lock dependency

(Ab)using rt6_lock wouldn't work anymore if rt6_lock is
converted into a per table lock.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d9baca062d24c..ce1f49b595b07 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -747,8 +747,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 	}
 }
 
-/* Protected by rt6_lock.  */
-static struct dst_entry *ndisc_dst_gc_list;
 static int ipv6_get_mtu(struct net_device *dev);
 
 static inline unsigned int ipv6_advmss(unsigned int mtu)
@@ -769,6 +767,9 @@ static inline unsigned int ipv6_advmss(unsigned int mtu)
 	return mtu;
 }
 
+static struct dst_entry *ndisc_dst_gc_list;
+DEFINE_SPINLOCK(ndisc_lock);
+
 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
 				  struct neighbour *neigh,
 				  struct in6_addr *addr,
@@ -809,10 +810,10 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
 	rt->rt6i_dst.plen = 128;
 #endif
 
-	write_lock_bh(&rt6_lock);
+	spin_lock_bh(&ndisc_lock);
 	rt->u.dst.next = ndisc_dst_gc_list;
 	ndisc_dst_gc_list = &rt->u.dst;
-	write_unlock_bh(&rt6_lock);
+	spin_unlock_bh(&ndisc_lock);
 
 	fib6_force_start_gc();
 
@@ -826,8 +827,11 @@ int ndisc_dst_gc(int *more)
 	int freed;
 
 	next = NULL;
+ 	freed = 0;
+
+	spin_lock_bh(&ndisc_lock);
 	pprev = &ndisc_dst_gc_list;
-	freed = 0;
+
 	while ((dst = *pprev) != NULL) {
 		if (!atomic_read(&dst->__refcnt)) {
 			*pprev = dst->next;
@@ -839,6 +843,8 @@ int ndisc_dst_gc(int *more)
 		}
 	}
 
+	spin_unlock_bh(&ndisc_lock);
+
 	return freed;
 }
 
-- 
GitLab


From c71099acce933455123ee505cc75964610a209ad Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 23:20:06 -0700
Subject: [PATCH 0396/1063] [IPV6]: Multiple Routing Tables

Adds the framework to support multiple IPv6 routing tables.
Currently all automatically generated routes are put into the
same table. This could be changed at a later point after
considering the produced locking overhead.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h   |  39 ++++-
 include/net/ip6_route.h |   3 +-
 net/ipv6/Kconfig        |   6 +
 net/ipv6/addrconf.c     |   6 +-
 net/ipv6/ip6_fib.c      | 144 ++++++++++++++-
 net/ipv6/route.c        | 380 ++++++++++++++++++++++++++--------------
 6 files changed, 441 insertions(+), 137 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index a66e9de16a6cb..818411519c89e 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -51,6 +51,8 @@ struct rt6key
 	int		plen;
 };
 
+struct fib6_table;
+
 struct rt6_info
 {
 	union {
@@ -71,6 +73,7 @@ struct rt6_info
 	u32				rt6i_flags;
 	u32				rt6i_metric;
 	atomic_t			rt6i_ref;
+	struct fib6_table		*rt6i_table;
 
 	struct rt6key			rt6i_dst;
 	struct rt6key			rt6i_src;
@@ -143,12 +146,43 @@ struct rt6_statistics {
 
 typedef void			(*f_pnode)(struct fib6_node *fn, void *);
 
-extern struct fib6_node		ip6_routing_table;
+struct fib6_table {
+	struct hlist_node	tb6_hlist;
+	u32			tb6_id;
+	rwlock_t		tb6_lock;
+	struct fib6_node	tb6_root;
+};
+
+#define RT6_TABLE_UNSPEC	RT_TABLE_UNSPEC
+#define RT6_TABLE_MAIN		RT_TABLE_MAIN
+#define RT6_TABLE_LOCAL		RT6_TABLE_MAIN
+#define RT6_TABLE_DFLT		RT6_TABLE_MAIN
+#define RT6_TABLE_INFO		RT6_TABLE_MAIN
+#define RT6_TABLE_PREFIX	RT6_TABLE_MAIN
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#define FIB6_TABLE_MIN		1
+#define FIB6_TABLE_MAX		RT_TABLE_MAX
+#else
+#define FIB6_TABLE_MIN		RT_TABLE_MAIN
+#define FIB6_TABLE_MAX		FIB6_TABLE_MIN
+#endif
+
+#define RT6_F_STRICT		1
+#define RT6_F_HAS_SADDR		2
+
+typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *,
+					 struct flowi *, int);
 
 /*
  *	exported functions
  */
 
+extern struct fib6_table *	fib6_get_table(u32 id);
+extern struct fib6_table *	fib6_new_table(u32 id);
+extern struct dst_entry *	fib6_rule_lookup(struct flowi *fl, int flags,
+						 pol_lookup_t lookup);
+
 extern struct fib6_node		*fib6_lookup(struct fib6_node *root,
 					     struct in6_addr *daddr,
 					     struct in6_addr *saddr);
@@ -161,6 +195,9 @@ extern void			fib6_clean_tree(struct fib6_node *root,
 						int (*func)(struct rt6_info *, void *arg),
 						int prune, void *arg);
 
+extern void			fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
+					       int prune, void *arg);
+
 extern int			fib6_walk(struct fib6_walker_t *w);
 extern int			fib6_walk_continue(struct fib6_walker_t *w);
 
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 96b0e66406ecc..d49c8c90eb687 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -58,7 +58,8 @@ extern int			ipv6_route_ioctl(unsigned int cmd, void __user *arg);
 extern int			ip6_route_add(struct in6_rtmsg *rtmsg,
 					      struct nlmsghdr *,
 					      void *rtattr,
-					      struct netlink_skb_parms *req);
+					      struct netlink_skb_parms *req,
+					      u32 table_id);
 extern int			ip6_ins_rt(struct rt6_info *,
 					   struct nlmsghdr *,
 					   void *rtattr,
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 0ba06c0c5d390..159c63d99c810 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -136,3 +136,9 @@ config IPV6_TUNNEL
 
 	  If unsure, say N.
 
+config IPV6_MULTIPLE_TABLES
+	bool "IPv6: Multiple Routing Tables"
+	depends on IPV6 && EXPERIMENTAL
+	---help---
+	  Support multiple routing tables.
+
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c7852b38e03e4..318767fcefdc7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1525,7 +1525,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 	if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
 		rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_PREFIX);
 }
 
 /* Create "default" multicast route to the interface */
@@ -1542,7 +1542,7 @@ static void addrconf_add_mroute(struct net_device *dev)
 	rtmsg.rtmsg_ifindex = dev->ifindex;
 	rtmsg.rtmsg_flags = RTF_UP;
 	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_LOCAL);
 }
 
 static void sit_route_add(struct net_device *dev)
@@ -1559,7 +1559,7 @@ static void sit_route_add(struct net_device *dev)
 	rtmsg.rtmsg_flags	= RTF_UP|RTF_NONEXTHOP;
 	rtmsg.rtmsg_ifindex	= dev->ifindex;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_MAIN);
 }
 
 static void addrconf_add_lroute(struct net_device *dev)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 764221220afd3..fcd7da830aca8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -26,6 +26,7 @@
 #include <linux/netdevice.h>
 #include <linux/in6.h>
 #include <linux/init.h>
+#include <linux/list.h>
 
 #ifdef 	CONFIG_PROC_FS
 #include <linux/proc_fs.h>
@@ -147,6 +148,126 @@ static __inline__ void rt6_release(struct rt6_info *rt)
 		dst_free(&rt->u.dst);
 }
 
+static struct fib6_table fib6_main_tbl = {
+	.tb6_id		= RT6_TABLE_MAIN,
+	.tb6_lock	= RW_LOCK_UNLOCKED,
+	.tb6_root	= {
+		.leaf		= &ip6_null_entry,
+		.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+	},
+};
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+
+#define FIB_TABLE_HASHSZ 256
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+
+static struct fib6_table *fib6_alloc_table(u32 id)
+{
+	struct fib6_table *table;
+
+	table = kzalloc(sizeof(*table), GFP_ATOMIC);
+	if (table != NULL) {
+		table->tb6_id = id;
+		table->tb6_lock = RW_LOCK_UNLOCKED;
+		table->tb6_root.leaf = &ip6_null_entry;
+		table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+	}
+
+	return table;
+}
+
+static void fib6_link_table(struct fib6_table *tb)
+{
+	unsigned int h;
+
+	h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
+
+	/*
+	 * No protection necessary, this is the only list mutatation
+	 * operation, tables never disappear once they exist.
+	 */
+	hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
+}
+
+struct fib6_table *fib6_new_table(u32 id)
+{
+	struct fib6_table *tb;
+
+	if (id == 0)
+		id = RT6_TABLE_MAIN;
+	tb = fib6_get_table(id);
+	if (tb)
+		return tb;
+
+	tb = fib6_alloc_table(id);
+	if (tb != NULL)
+		fib6_link_table(tb);
+
+	return tb;
+}
+
+struct fib6_table *fib6_get_table(u32 id)
+{
+	struct fib6_table *tb;
+	struct hlist_node *node;
+	unsigned int h;
+
+	if (id == 0)
+		id = RT6_TABLE_MAIN;
+	h = id & (FIB_TABLE_HASHSZ - 1);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) {
+		if (tb->tb6_id == id) {
+			rcu_read_unlock();
+			return tb;
+		}
+	}
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
+				   pol_lookup_t lookup)
+{
+	/*
+	 * TODO: Add rule lookup
+	 */
+	struct fib6_table *table = fib6_get_table(RT6_TABLE_MAIN);
+
+	return (struct dst_entry *) lookup(table, fl, flags);
+}
+
+static void __init fib6_tables_init(void)
+{
+	fib6_link_table(&fib6_main_tbl);
+}
+
+#else
+
+struct fib6_table *fib6_new_table(u32 id)
+{
+	return fib6_get_table(id);
+}
+
+struct fib6_table *fib6_get_table(u32 id)
+{
+	return &fib6_main_tbl;
+}
+
+struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
+				   pol_lookup_t lookup)
+{
+	return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags);
+}
+
+static void __init fib6_tables_init(void)
+{
+}
+
+#endif
+
 
 /*
  *	Routing Table
@@ -1064,6 +1185,22 @@ void fib6_clean_tree(struct fib6_node *root,
 	fib6_walk(&c.w);
 }
 
+void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
+		    int prune, void *arg)
+{
+	int i;
+	struct fib6_table *table;
+
+	for (i = FIB6_TABLE_MIN; i <= FIB6_TABLE_MAX; i++) {
+		table = fib6_get_table(i);
+		if (table != NULL) {
+			write_lock_bh(&table->tb6_lock);
+			fib6_clean_tree(&table->tb6_root, func, prune, arg);
+			write_unlock_bh(&table->tb6_lock);
+		}
+	}
+}
+
 static int fib6_prune_clone(struct rt6_info *rt, void *arg)
 {
 	if (rt->rt6i_flags & RTF_CACHE) {
@@ -1142,11 +1279,8 @@ void fib6_run_gc(unsigned long dummy)
 	}
 	gc_args.more = 0;
 
-
-	write_lock_bh(&rt6_lock);
 	ndisc_dst_gc(&gc_args.more);
-	fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
-	write_unlock_bh(&rt6_lock);
+	fib6_clean_all(fib6_age, 0, NULL);
 
 	if (gc_args.more)
 		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
@@ -1165,6 +1299,8 @@ void __init fib6_init(void)
 					   NULL, NULL);
 	if (!fib6_node_kmem)
 		panic("cannot create fib6_nodes cache");
+
+	fib6_tables_init();
 }
 
 void fib6_gc_cleanup(void)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ce1f49b595b07..73efdadb9ab89 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -140,16 +140,6 @@ struct rt6_info ip6_null_entry = {
 	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
-struct fib6_node ip6_routing_table = {
-	.leaf		= &ip6_null_entry,
-	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
-};
-
-/* Protects all the ip6 fib */
-
-DEFINE_RWLOCK(rt6_lock);
-
-
 /* allocate dst with ip6_dst_ops */
 static __inline__ struct rt6_info *ip6_dst_alloc(void)
 {
@@ -188,8 +178,14 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 		time_after(jiffies, rt->rt6i_expires));
 }
 
+static inline int rt6_need_strict(struct in6_addr *daddr)
+{
+	return (ipv6_addr_type(daddr) &
+		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+}
+
 /*
- *	Route lookup. Any rt6_lock is implied.
+ *	Route lookup. Any table->tb6_lock is implied.
  */
 
 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
@@ -441,27 +437,66 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 }
 #endif
 
-struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
-			    int oif, int strict)
+#define BACKTRACK() \
+if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
+	while ((fn = fn->parent) != NULL) { \
+		if (fn->fn_flags & RTN_TL_ROOT) { \
+			dst_hold(&rt->u.dst); \
+			goto out; \
+		} \
+		if (fn->fn_flags & RTN_RTINFO) \
+			goto restart; \
+	} \
+}
+
+static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
+					     struct flowi *fl, int flags)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 
-	read_lock_bh(&rt6_lock);
-	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
-	rt = rt6_device_match(fn->leaf, oif, strict);
+	read_lock_bh(&table->tb6_lock);
+	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+restart:
+	rt = fn->leaf;
+	rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
+	BACKTRACK();
 	dst_hold(&rt->u.dst);
-	rt->u.dst.__use++;
-	read_unlock_bh(&rt6_lock);
+out:
+	read_unlock_bh(&table->tb6_lock);
 
 	rt->u.dst.lastuse = jiffies;
-	if (rt->u.dst.error == 0)
-		return rt;
-	dst_release(&rt->u.dst);
+	rt->u.dst.__use++;
+
+	return rt;
+
+}
+
+struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
+			    int oif, int strict)
+{
+	struct flowi fl = {
+		.oif = oif,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = *daddr,
+				/* TODO: saddr */
+			},
+		},
+	};
+	struct dst_entry *dst;
+	int flags = strict ? RT6_F_STRICT : 0;
+
+	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
+	if (dst->error == 0)
+		return (struct rt6_info *) dst;
+
+	dst_release(dst);
+
 	return NULL;
 }
 
-/* ip6_ins_rt is called with FREE rt6_lock.
+/* ip6_ins_rt is called with FREE table->tb6_lock.
    It takes new route entry, the addition fails by any reason the
    route is freed. In any case, if caller does not hold it, it may
    be destroyed.
@@ -471,10 +506,12 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
 		void *_rtattr, struct netlink_skb_parms *req)
 {
 	int err;
+	struct fib6_table *table;
 
-	write_lock_bh(&rt6_lock);
-	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
-	write_unlock_bh(&rt6_lock);
+	table = rt->rt6i_table;
+	write_lock_bh(&table->tb6_lock);
+	err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
+	write_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
@@ -532,51 +569,40 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
 	return rt;
 }
 
-#define BACKTRACK() \
-if (rt == &ip6_null_entry) { \
-       while ((fn = fn->parent) != NULL) { \
-		if (fn->fn_flags & RTN_ROOT) { \
-			goto out; \
-		} \
-		if (fn->fn_flags & RTN_RTINFO) \
-			goto restart; \
-	} \
-}
-
-
-void ip6_route_input(struct sk_buff *skb)
+struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl,
+				     int flags)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt, *nrt;
-	int strict;
+	int strict = 0;
 	int attempts = 3;
 	int err;
 	int reachable = RT6_SELECT_F_REACHABLE;
 
-	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
+	if (flags & RT6_F_STRICT)
+		strict = RT6_SELECT_F_IFACE;
 
 relookup:
-	read_lock_bh(&rt6_lock);
+	read_lock_bh(&table->tb6_lock);
 
 restart_2:
-	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
-			 &skb->nh.ipv6h->saddr);
+	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
-	rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
+	rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
 	BACKTRACK();
 	if (rt == &ip6_null_entry ||
 	    rt->rt6i_flags & RTF_CACHE)
 		goto out;
 
 	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
-		nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
+		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
 	else {
 #if CLONE_OFFLINK_ROUTE
-		nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
+		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
 #else
 		goto out2;
 #endif
@@ -587,7 +613,7 @@ void ip6_route_input(struct sk_buff *skb)
 
 	dst_hold(&rt->u.dst);
 	if (nrt) {
-		err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
+		err = ip6_ins_rt(nrt, NULL, NULL, NULL);
 		if (!err)
 			goto out2;
 	}
@@ -596,7 +622,7 @@ void ip6_route_input(struct sk_buff *skb)
 		goto out2;
 
 	/*
-	 * Race condition! In the gap, when rt6_lock was
+	 * Race condition! In the gap, when table->tb6_lock was
 	 * released someone could insert this route.  Relookup.
 	 */
 	dst_release(&rt->u.dst);
@@ -608,30 +634,54 @@ void ip6_route_input(struct sk_buff *skb)
 		goto restart_2;
 	}
 	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 out2:
 	rt->u.dst.lastuse = jiffies;
 	rt->u.dst.__use++;
-	skb->dst = (struct dst_entry *) rt;
-	return;
+
+	return rt;
 }
 
-struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+void ip6_route_input(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct flowi fl = {
+		.iif = skb->dev->ifindex,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = iph->daddr,
+				.saddr = iph->saddr,
+				.flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
+			},
+		},
+		.proto = iph->nexthdr,
+	};
+	int flags = 0;
+
+	if (rt6_need_strict(&iph->daddr))
+		flags |= RT6_F_STRICT;
+
+	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
+}
+
+static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
+					     struct flowi *fl, int flags)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt, *nrt;
-	int strict;
+	int strict = 0;
 	int attempts = 3;
 	int err;
 	int reachable = RT6_SELECT_F_REACHABLE;
 
-	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
+	if (flags & RT6_F_STRICT)
+		strict = RT6_SELECT_F_IFACE;
 
 relookup:
-	read_lock_bh(&rt6_lock);
+	read_lock_bh(&table->tb6_lock);
 
 restart_2:
-	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
+	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
 	rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
@@ -641,7 +691,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
 		goto out;
 
 	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
@@ -667,7 +717,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
 		goto out2;
 
 	/*
-	 * Race condition! In the gap, when rt6_lock was
+	 * Race condition! In the gap, when table->tb6_lock was
 	 * released someone could insert this route.  Relookup.
 	 */
 	dst_release(&rt->u.dst);
@@ -679,11 +729,21 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
 		goto restart_2;
 	}
 	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 out2:
 	rt->u.dst.lastuse = jiffies;
 	rt->u.dst.__use++;
-	return &rt->u.dst;
+	return rt;
+}
+
+struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+{
+	int flags = 0;
+
+	if (rt6_need_strict(&fl->fl6_dst))
+		flags |= RT6_F_STRICT;
+
+	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
 }
 
 
@@ -906,7 +966,8 @@ int ipv6_get_hoplimit(struct net_device *dev)
  */
 
 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
-		void *_rtattr, struct netlink_skb_parms *req)
+		  void *_rtattr, struct netlink_skb_parms *req,
+		  u32 table_id)
 {
 	int err;
 	struct rtmsg *r;
@@ -914,6 +975,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 	struct rt6_info *rt = NULL;
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
+	struct fib6_table *table;
 	int addr_type;
 
 	rta = (struct rtattr **) _rtattr;
@@ -937,6 +999,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 	if (rtmsg->rtmsg_metric == 0)
 		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
 
+	table = fib6_new_table(table_id);
+	if (table == NULL) {
+		err = -ENOBUFS;
+		goto out;
+	}
+
 	rt = ip6_dst_alloc();
 
 	if (rt == NULL) {
@@ -1093,6 +1161,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
 	rt->u.dst.dev = dev;
 	rt->rt6i_idev = idev;
+	rt->rt6i_table = table;
 	return ip6_ins_rt(rt, nlh, _rtattr, req);
 
 out:
@@ -1108,26 +1177,35 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
 {
 	int err;
+	struct fib6_table *table;
 
-	write_lock_bh(&rt6_lock);
+	table = rt->rt6i_table;
+	write_lock_bh(&table->tb6_lock);
 
 	err = fib6_del(rt, nlh, _rtattr, req);
 	dst_release(&rt->u.dst);
 
-	write_unlock_bh(&rt6_lock);
+	write_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
 
-static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
+			 void *_rtattr, struct netlink_skb_parms *req,
+			 u32 table_id)
 {
+	struct fib6_table *table;
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 	int err = -ESRCH;
 
-	read_lock_bh(&rt6_lock);
+	table = fib6_get_table(table_id);
+	if (table == NULL)
+		return err;
+
+	read_lock_bh(&table->tb6_lock);
 
-	fn = fib6_locate(&ip6_routing_table,
+	fn = fib6_locate(&table->tb6_root,
 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
 	
@@ -1144,12 +1222,12 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
 			    rtmsg->rtmsg_metric != rt->rt6i_metric)
 				continue;
 			dst_hold(&rt->u.dst);
-			read_unlock_bh(&rt6_lock);
+			read_unlock_bh(&table->tb6_lock);
 
 			return ip6_del_rt(rt, nlh, _rtattr, req);
 		}
 	}
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
@@ -1161,10 +1239,15 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
 		  struct neighbour *neigh, u8 *lladdr, int on_link)
 {
 	struct rt6_info *rt, *nrt = NULL;
-	int strict;
 	struct fib6_node *fn;
+	struct fib6_table *table;
 	struct netevent_redirect netevent;
 
+	/* TODO: Very lazy, might need to check all tables */
+	table = fib6_get_table(RT6_TABLE_MAIN);
+	if (table == NULL)
+		return;
+
 	/*
 	 * Get the "current" route for this destination and
 	 * check if the redirect has come from approriate router.
@@ -1175,10 +1258,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
 	 * is a bit fuzzy and one might need to check all possible
 	 * routes.
 	 */
-	strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
 
-	read_lock_bh(&rt6_lock);
-	fn = fib6_lookup(&ip6_routing_table, dest, NULL);
+	read_lock_bh(&table->tb6_lock);
+	fn = fib6_lookup(&table->tb6_root, dest, NULL);
 restart:
 	for (rt = fn->leaf; rt; rt = rt->u.next) {
 		/*
@@ -1201,7 +1283,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
 	}
 	if (rt)
 		dst_hold(&rt->u.dst);
-	else if (strict) {
+	else if (rt6_need_strict(dest)) {
 		while ((fn = fn->parent) != NULL) {
 			if (fn->fn_flags & RTN_ROOT)
 				break;
@@ -1209,7 +1291,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
 				goto restart;
 		}
 	}
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	if (!rt) {
 		if (net_ratelimit())
@@ -1384,6 +1466,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 #ifdef CONFIG_IPV6_SUBTREES
 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 #endif
+		rt->rt6i_table = ort->rt6i_table;
 	}
 	return rt;
 }
@@ -1394,9 +1477,14 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt = NULL;
+	struct fib6_table *table;
+
+	table = fib6_get_table(RT6_TABLE_INFO);
+	if (table == NULL)
+		return NULL;
 
-	write_lock_bh(&rt6_lock);
-	fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
+	write_lock_bh(&table->tb6_lock);
+	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
 	if (!fn)
 		goto out;
 
@@ -1411,7 +1499,7 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
 		break;
 	}
 out:
-	write_unlock_bh(&rt6_lock);
+	write_unlock_bh(&table->tb6_lock);
 	return rt;
 }
 
@@ -1433,7 +1521,7 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
 		rtmsg.rtmsg_flags |= RTF_DEFAULT;
 	rtmsg.rtmsg_ifindex = ifindex;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
 
 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
 }
@@ -1442,12 +1530,14 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
 {	
 	struct rt6_info *rt;
-	struct fib6_node *fn;
+	struct fib6_table *table;
 
-	fn = &ip6_routing_table;
+	table = fib6_get_table(RT6_TABLE_DFLT);
+	if (table == NULL)
+		return NULL;
 
-	write_lock_bh(&rt6_lock);
-	for (rt = fn->leaf; rt; rt=rt->u.next) {
+	write_lock_bh(&table->tb6_lock);
+	for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
 		if (dev == rt->rt6i_dev &&
 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
@@ -1455,7 +1545,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
 	}
 	if (rt)
 		dst_hold(&rt->u.dst);
-	write_unlock_bh(&rt6_lock);
+	write_unlock_bh(&table->tb6_lock);
 	return rt;
 }
 
@@ -1474,28 +1564,31 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
 
 	rtmsg.rtmsg_ifindex = dev->ifindex;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
 	return rt6_get_dflt_router(gwaddr, dev);
 }
 
 void rt6_purge_dflt_routers(void)
 {
 	struct rt6_info *rt;
+	struct fib6_table *table;
+
+	/* NOTE: Keep consistent with rt6_get_dflt_router */
+	table = fib6_get_table(RT6_TABLE_DFLT);
+	if (table == NULL)
+		return;
 
 restart:
-	read_lock_bh(&rt6_lock);
-	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
+	read_lock_bh(&table->tb6_lock);
+	for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
 			dst_hold(&rt->u.dst);
-
-			read_unlock_bh(&rt6_lock);
-
+			read_unlock_bh(&table->tb6_lock);
 			ip6_del_rt(rt, NULL, NULL, NULL);
-
 			goto restart;
 		}
 	}
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 }
 
 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
@@ -1516,10 +1609,12 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
 		rtnl_lock();
 		switch (cmd) {
 		case SIOCADDRT:
-			err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
+			err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
+					    RT6_TABLE_MAIN);
 			break;
 		case SIOCDELRT:
-			err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
+			err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
+					    RT6_TABLE_MAIN);
 			break;
 		default:
 			err = -EINVAL;
@@ -1593,6 +1688,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
 	rt->rt6i_dst.plen = 128;
+	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
 
 	atomic_set(&rt->u.dst.__refcnt, 1);
 
@@ -1611,9 +1707,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
 
 void rt6_ifdown(struct net_device *dev)
 {
-	write_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
-	write_unlock_bh(&rt6_lock);
+	fib6_clean_all(fib6_ifdown, 0, dev);
 }
 
 struct rt6_mtu_change_arg
@@ -1663,13 +1757,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 
 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
 {
-	struct rt6_mtu_change_arg arg;
+	struct rt6_mtu_change_arg arg = {
+		.dev = dev,
+		.mtu = mtu,
+	};
 
-	arg.dev = dev;
-	arg.mtu = mtu;
-	read_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
-	read_unlock_bh(&rt6_lock);
+	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
 }
 
 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
@@ -1719,7 +1812,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
 		return -EINVAL;
-	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
+	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
 }
 
 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -1729,7 +1822,7 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
 		return -EINVAL;
-	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
+	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
 }
 
 struct rt6_rtnl_dump_arg
@@ -1761,6 +1854,10 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
 	rtm->rtm_src_len = rt->rt6i_src.plen;
 	rtm->rtm_tos = 0;
+	if (rt->rt6i_table)
+		rtm->rtm_table = rt->rt6i_table->tb6_id;
+	else
+		rtm->rtm_table = RT6_TABLE_UNSPEC;
 	rtm->rtm_table = RT_TABLE_MAIN;
 	if (rt->rt6i_flags&RTF_REJECT)
 		rtm->rtm_type = RTN_UNREACHABLE;
@@ -1868,7 +1965,6 @@ static void fib6_dump_end(struct netlink_callback *cb)
 
 	if (w) {
 		cb->args[0] = 0;
-		fib6_walker_unlink(w);
 		kfree(w);
 	}
 	cb->done = (void*)cb->args[1];
@@ -1883,13 +1979,20 @@ static int fib6_dump_done(struct netlink_callback *cb)
 
 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct fib6_table *table;
 	struct rt6_rtnl_dump_arg arg;
 	struct fib6_walker_t *w;
-	int res;
+	int i, res = 0;
 
 	arg.skb = skb;
 	arg.cb = cb;
 
+	/*
+	 * cb->args[0] = pointer to walker structure
+	 * cb->args[1] = saved cb->done() pointer
+	 * cb->args[2] = current table being dumped
+	 */
+
 	w = (void*)cb->args[0];
 	if (w == NULL) {
 		/* New dump:
@@ -1905,24 +2008,48 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 		w = kzalloc(sizeof(*w), GFP_ATOMIC);
 		if (w == NULL)
 			return -ENOMEM;
-		RT6_TRACE("dump<%p", w);
-		w->root = &ip6_routing_table;
 		w->func = fib6_dump_node;
 		w->args = &arg;
 		cb->args[0] = (long)w;
-		read_lock_bh(&rt6_lock);
-		res = fib6_walk(w);
-		read_unlock_bh(&rt6_lock);
+		cb->args[2] = FIB6_TABLE_MIN;
 	} else {
 		w->args = &arg;
-		read_lock_bh(&rt6_lock);
-		res = fib6_walk_continue(w);
-		read_unlock_bh(&rt6_lock);
+		i = cb->args[2];
+		if (i > FIB6_TABLE_MAX)
+			goto end;
+
+		table = fib6_get_table(i);
+		if (table != NULL) {
+			read_lock_bh(&table->tb6_lock);
+			w->root = &table->tb6_root;
+			res = fib6_walk_continue(w);
+			read_unlock_bh(&table->tb6_lock);
+			if (res != 0) {
+				if (res < 0)
+					fib6_walker_unlink(w);
+				goto end;
+			}
+		}
+
+		fib6_walker_unlink(w);
+		cb->args[2] = ++i;
 	}
-#if RT6_DEBUG >= 3
-	if (res <= 0 && skb->len == 0)
-		RT6_TRACE("%p>dump end\n", w);
-#endif
+
+	for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) {
+		table = fib6_get_table(i);
+		if (table == NULL)
+			continue;
+
+		read_lock_bh(&table->tb6_lock);
+		w->root = &table->tb6_root;
+		res = fib6_walk(w);
+		read_unlock_bh(&table->tb6_lock);
+		if (res)
+			break;
+	}
+end:
+	cb->args[2] = i;
+
 	res = res < 0 ? res : skb->len;
 	/* res < 0 is an error. (really, impossible)
 	   res == 0 means that dump is complete, but skb still can contain data.
@@ -2102,16 +2229,13 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 
 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
 {
-	struct rt6_proc_arg arg;
-	arg.buffer = buffer;
-	arg.offset = offset;
-	arg.length = length;
-	arg.skip = 0;
-	arg.len = 0;
+	struct rt6_proc_arg arg = {
+		.buffer = buffer,
+		.offset = offset,
+		.length = length,
+	};
 
-	read_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
-	read_unlock_bh(&rt6_lock);
+	fib6_clean_all(rt6_info_route, 0, &arg);
 
 	*start = buffer;
 	if (offset)
-- 
GitLab


From 14c0b97ddfc2944982d078b8e33b088840068976 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 03:38:38 -0700
Subject: [PATCH 0397/1063] [NET]: Protocol Independant Policy Routing Rules
 Framework

Derived from net/ipv/fib_rules.c

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h |  60 ++++++
 include/net/fib_rules.h   |  90 +++++++++
 net/Kconfig               |   3 +
 net/core/Makefile         |   1 +
 net/core/fib_rules.c      | 416 ++++++++++++++++++++++++++++++++++++++
 net/core/rtnetlink.c      |   9 +-
 6 files changed, 577 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/fib_rules.h
 create mode 100644 include/net/fib_rules.h
 create mode 100644 net/core/fib_rules.c

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
new file mode 100644
index 0000000000000..5e503f0ca6e46
--- /dev/null
+++ b/include/linux/fib_rules.h
@@ -0,0 +1,60 @@
+#ifndef __LINUX_FIB_RULES_H
+#define __LINUX_FIB_RULES_H
+
+#include <linux/types.h>
+#include <linux/rtnetlink.h>
+
+/* rule is permanent, and cannot be deleted */
+#define FIB_RULE_PERMANENT	1
+
+struct fib_rule_hdr
+{
+	__u8		family;
+	__u8		dst_len;
+	__u8		src_len;
+	__u8		tos;
+
+	__u8		table;
+	__u8		res1;	/* reserved */
+	__u8		res2;	/* reserved */
+	__u8		action;
+
+	__u32		flags;
+};
+
+enum
+{
+	FRA_UNSPEC,
+	FRA_DST,	/* destination address */
+	FRA_SRC,	/* source address */
+	FRA_IFNAME,	/* interface name */
+	FRA_UNUSED1,
+	FRA_UNUSED2,
+	FRA_PRIORITY,	/* priority/preference */
+	FRA_UNUSED3,
+	FRA_UNUSED4,
+	FRA_UNUSED5,
+	FRA_FWMARK,	/* netfilter mark (IPv4) */
+	FRA_FLOW,	/* flow/class id */
+	__FRA_MAX
+};
+
+#define FRA_MAX (__FRA_MAX - 1)
+
+enum
+{
+	FR_ACT_UNSPEC,
+	FR_ACT_TO_TBL,		/* Pass to fixed table */
+	FR_ACT_RES1,
+	FR_ACT_RES2,
+	FR_ACT_RES3,
+	FR_ACT_RES4,
+	FR_ACT_BLACKHOLE,	/* Drop without notification */
+	FR_ACT_UNREACHABLE,	/* Drop with ENETUNREACH */
+	FR_ACT_PROHIBIT,	/* Drop with EACCES */
+	__FR_ACT_MAX,
+};
+
+#define FR_ACT_MAX (__FR_ACT_MAX - 1)
+
+#endif
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
new file mode 100644
index 0000000000000..61375d9e53f82
--- /dev/null
+++ b/include/net/fib_rules.h
@@ -0,0 +1,90 @@
+#ifndef __NET_FIB_RULES_H
+#define __NET_FIB_RULES_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/fib_rules.h>
+#include <net/flow.h>
+#include <net/netlink.h>
+
+struct fib_rule
+{
+	struct list_head	list;
+	atomic_t		refcnt;
+	int			ifindex;
+	char			ifname[IFNAMSIZ];
+	u32			pref;
+	u32			flags;
+	u32			table;
+	u8			action;
+	struct rcu_head		rcu;
+};
+
+struct fib_lookup_arg
+{
+	void			*lookup_ptr;
+	void			*result;
+	struct fib_rule		*rule;
+};
+
+struct fib_rules_ops
+{
+	int			family;
+	struct list_head	list;
+	int			rule_size;
+
+	int			(*action)(struct fib_rule *,
+					  struct flowi *, int,
+					  struct fib_lookup_arg *);
+	int			(*match)(struct fib_rule *,
+					 struct flowi *, int);
+	int			(*configure)(struct fib_rule *,
+					     struct sk_buff *,
+					     struct nlmsghdr *,
+					     struct fib_rule_hdr *,
+					     struct nlattr **);
+	int			(*compare)(struct fib_rule *,
+					   struct fib_rule_hdr *,
+					   struct nlattr **);
+	int			(*fill)(struct fib_rule *, struct sk_buff *,
+					struct nlmsghdr *,
+					struct fib_rule_hdr *);
+	u32			(*default_pref)(void);
+
+	int			nlgroup;
+	struct nla_policy	*policy;
+	struct list_head	*rules_list;
+	struct module		*owner;
+};
+
+static inline void fib_rule_get(struct fib_rule *rule)
+{
+	atomic_inc(&rule->refcnt);
+}
+
+static inline void fib_rule_put_rcu(struct rcu_head *head)
+{
+	struct fib_rule *rule = container_of(head, struct fib_rule, rcu);
+	kfree(rule);
+}
+
+static inline void fib_rule_put(struct fib_rule *rule)
+{
+	if (atomic_dec_and_test(&rule->refcnt))
+		call_rcu(&rule->rcu, fib_rule_put_rcu);
+}
+
+extern int			fib_rules_register(struct fib_rules_ops *);
+extern int			fib_rules_unregister(struct fib_rules_ops *);
+
+extern int			fib_rules_lookup(struct fib_rules_ops *,
+						 struct flowi *, int flags,
+						 struct fib_lookup_arg *);
+
+extern int			fib_nl_newrule(struct sk_buff *,
+					       struct nlmsghdr *, void *);
+extern int			fib_nl_delrule(struct sk_buff *,
+					       struct nlmsghdr *, void *);
+extern int			fib_rules_dump(struct sk_buff *,
+					       struct netlink_callback *, int);
+#endif
diff --git a/net/Kconfig b/net/Kconfig
index eb855b7fa642e..6528a935622cc 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -251,6 +251,9 @@ config WIRELESS_EXT
 
 source "net/netlabel/Kconfig"
 
+config FIB_RULES
+	bool
+
 endif   # if NET
 endmenu # Networking
 
diff --git a/net/core/Makefile b/net/core/Makefile
index 2645ba428d489..119568077dab3 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_WIRELESS_EXT) += wireless.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_NET_DMA) += user_dma.o
+obj-$(CONFIG_FIB_RULES) += fib_rules.o
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
new file mode 100644
index 0000000000000..6cdad24038e26
--- /dev/null
+++ b/net/core/fib_rules.c
@@ -0,0 +1,416 @@
+/*
+ * net/core/fib_rules.c		Generic Routing Rules
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <net/fib_rules.h>
+
+static LIST_HEAD(rules_ops);
+static DEFINE_SPINLOCK(rules_mod_lock);
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+			       struct fib_rules_ops *ops);
+
+static struct fib_rules_ops *lookup_rules_ops(int family)
+{
+	struct fib_rules_ops *ops;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ops, &rules_ops, list) {
+		if (ops->family == family) {
+			if (!try_module_get(ops->owner))
+				ops = NULL;
+			rcu_read_unlock();
+			return ops;
+		}
+	}
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+static void rules_ops_put(struct fib_rules_ops *ops)
+{
+	if (ops)
+		module_put(ops->owner);
+}
+
+int fib_rules_register(struct fib_rules_ops *ops)
+{
+	int err = -EEXIST;
+	struct fib_rules_ops *o;
+
+	if (ops->rule_size < sizeof(struct fib_rule))
+		return -EINVAL;
+
+	if (ops->match == NULL || ops->configure == NULL ||
+	    ops->compare == NULL || ops->fill == NULL ||
+	    ops->action == NULL)
+		return -EINVAL;
+
+	spin_lock(&rules_mod_lock);
+	list_for_each_entry(o, &rules_ops, list)
+		if (ops->family == o->family)
+			goto errout;
+
+	list_add_tail_rcu(&ops->list, &rules_ops);
+	err = 0;
+errout:
+	spin_unlock(&rules_mod_lock);
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_register);
+
+static void cleanup_ops(struct fib_rules_ops *ops)
+{
+	struct fib_rule *rule, *tmp;
+
+	list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
+		list_del_rcu(&rule->list);
+		fib_rule_put(rule);
+	}
+}
+
+int fib_rules_unregister(struct fib_rules_ops *ops)
+{
+	int err = 0;
+	struct fib_rules_ops *o;
+
+	spin_lock(&rules_mod_lock);
+	list_for_each_entry(o, &rules_ops, list) {
+		if (o == ops) {
+			list_del_rcu(&o->list);
+			cleanup_ops(ops);
+			goto out;
+		}
+	}
+
+	err = -ENOENT;
+out:
+	spin_unlock(&rules_mod_lock);
+
+	synchronize_rcu();
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_unregister);
+
+int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
+		     int flags, struct fib_lookup_arg *arg)
+{
+	struct fib_rule *rule;
+	int err;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(rule, ops->rules_list, list) {
+		if (rule->ifindex && (rule->ifindex != fl->iif))
+			continue;
+
+		if (!ops->match(rule, fl, flags))
+			continue;
+
+		err = ops->action(rule, fl, flags, arg);
+		if (err != -EAGAIN) {
+			fib_rule_get(rule);
+			arg->rule = rule;
+			goto out;
+		}
+	}
+
+	err = -ENETUNREACH;
+out:
+	rcu_read_unlock();
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_lookup);
+
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rules_ops *ops = NULL;
+	struct fib_rule *rule, *r, *last = NULL;
+	struct nlattr *tb[FRA_MAX+1];
+	int err = -EINVAL;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+		goto errout;
+
+	ops = lookup_rules_ops(frh->family);
+	if (ops == NULL) {
+		err = EAFNOSUPPORT;
+		goto errout;
+	}
+
+	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+	if (err < 0)
+		goto errout;
+
+	if (tb[FRA_IFNAME] && nla_len(tb[FRA_IFNAME]) > IFNAMSIZ)
+		goto errout;
+
+	rule = kzalloc(ops->rule_size, GFP_KERNEL);
+	if (rule == NULL) {
+		err = -ENOMEM;
+		goto errout;
+	}
+
+	if (tb[FRA_PRIORITY])
+		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
+
+	if (tb[FRA_IFNAME]) {
+		struct net_device *dev;
+
+		rule->ifindex = -1;
+		if (nla_strlcpy(rule->ifname, tb[FRA_IFNAME],
+				IFNAMSIZ) >= IFNAMSIZ)
+			goto errout_free;
+
+		dev = __dev_get_by_name(rule->ifname);
+		if (dev)
+			rule->ifindex = dev->ifindex;
+	}
+
+	rule->action = frh->action;
+	rule->flags = frh->flags;
+	rule->table = frh->table;
+
+	if (!rule->pref && ops->default_pref)
+		rule->pref = ops->default_pref();
+
+	err = ops->configure(rule, skb, nlh, frh, tb);
+	if (err < 0)
+		goto errout_free;
+
+	list_for_each_entry(r, ops->rules_list, list) {
+		if (r->pref > rule->pref)
+			break;
+		last = r;
+	}
+
+	fib_rule_get(rule);
+
+	if (last)
+		list_add_rcu(&rule->list, &last->list);
+	else
+		list_add_rcu(&rule->list, ops->rules_list);
+
+	notify_rule_change(RTM_NEWRULE, rule, ops);
+	rules_ops_put(ops);
+	return 0;
+
+errout_free:
+	kfree(rule);
+errout:
+	rules_ops_put(ops);
+	return err;
+}
+
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rules_ops *ops = NULL;
+	struct fib_rule *rule;
+	struct nlattr *tb[FRA_MAX+1];
+	int err = -EINVAL;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+		goto errout;
+
+	ops = lookup_rules_ops(frh->family);
+	if (ops == NULL) {
+		err = EAFNOSUPPORT;
+		goto errout;
+	}
+
+	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+	if (err < 0)
+		goto errout;
+
+	list_for_each_entry(rule, ops->rules_list, list) {
+		if (frh->action && (frh->action != rule->action))
+			continue;
+
+		if (frh->table && (frh->table != rule->table))
+			continue;
+
+		if (tb[FRA_PRIORITY] &&
+		    (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
+			continue;
+
+		if (tb[FRA_IFNAME] &&
+		    nla_strcmp(tb[FRA_IFNAME], rule->ifname))
+			continue;
+
+		if (!ops->compare(rule, frh, tb))
+			continue;
+
+		if (rule->flags & FIB_RULE_PERMANENT) {
+			err = -EPERM;
+			goto errout;
+		}
+
+		list_del_rcu(&rule->list);
+		synchronize_rcu();
+		notify_rule_change(RTM_DELRULE, rule, ops);
+		fib_rule_put(rule);
+		rules_ops_put(ops);
+		return 0;
+	}
+
+	err = -ENOENT;
+errout:
+	rules_ops_put(ops);
+	return err;
+}
+
+static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
+			    u32 pid, u32 seq, int type, int flags,
+			    struct fib_rules_ops *ops)
+{
+	struct nlmsghdr *nlh;
+	struct fib_rule_hdr *frh;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
+	if (nlh == NULL)
+		return -1;
+
+	frh = nlmsg_data(nlh);
+	frh->table = rule->table;
+	frh->res1 = 0;
+	frh->res2 = 0;
+	frh->action = rule->action;
+	frh->flags = rule->flags;
+
+	if (rule->ifname[0])
+		NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
+
+	if (rule->pref)
+		NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
+
+	if (ops->fill(rule, skb, nlh, frh) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
+}
+
+int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
+{
+	int idx = 0;
+	struct fib_rule *rule;
+	struct fib_rules_ops *ops;
+
+	ops = lookup_rules_ops(family);
+	if (ops == NULL)
+		return -EAFNOSUPPORT;
+
+	rcu_read_lock();
+	list_for_each_entry(rule, ops->rules_list, list) {
+		if (idx < cb->args[0])
+			goto skip;
+
+		if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
+				     cb->nlh->nlmsg_seq, RTM_NEWRULE,
+				     NLM_F_MULTI, ops) < 0)
+			break;
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+	cb->args[0] = idx;
+	rules_ops_put(ops);
+
+	return skb->len;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+			       struct fib_rules_ops *ops)
+{
+	int size = nlmsg_total_size(sizeof(struct fib_rule_hdr) + 128);
+	struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
+
+	if (skb == NULL)
+		netlink_set_err(rtnl, 0, ops->nlgroup, ENOBUFS);
+	else if (fib_nl_fill_rule(skb, rule, 0, 0, event, 0, ops) < 0) {
+		kfree_skb(skb);
+		netlink_set_err(rtnl, 0, ops->nlgroup, EINVAL);
+	} else
+		netlink_broadcast(rtnl, skb, 0, ops->nlgroup, GFP_KERNEL);
+}
+
+static void attach_rules(struct list_head *rules, struct net_device *dev)
+{
+	struct fib_rule *rule;
+
+	list_for_each_entry(rule, rules, list) {
+		if (rule->ifindex == -1 &&
+		    strcmp(dev->name, rule->ifname) == 0)
+			rule->ifindex = dev->ifindex;
+	}
+}
+
+static void detach_rules(struct list_head *rules, struct net_device *dev)
+{
+	struct fib_rule *rule;
+
+	list_for_each_entry(rule, rules, list)
+		if (rule->ifindex == dev->ifindex)
+			rule->ifindex = -1;
+}
+
+
+static int fib_rules_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct fib_rules_ops *ops;
+
+	ASSERT_RTNL();
+	rcu_read_lock();
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		list_for_each_entry(ops, &rules_ops, list)
+			attach_rules(ops->rules_list, dev);
+		break;
+
+	case NETDEV_UNREGISTER:
+		list_for_each_entry(ops, &rules_ops, list)
+			detach_rules(ops->rules_list, dev);
+		break;
+	}
+
+	rcu_read_unlock();
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block fib_rules_notifier = {
+	.notifier_call = fib_rules_event,
+};
+
+static int __init fib_rules_init(void)
+{
+	return register_netdevice_notifier(&fib_rules_notifier);
+}
+
+subsys_initcall(fib_rules_init);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 30cc1ba6ed5c4..aa7cff2257b1d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -49,6 +49,7 @@
 #include <net/udp.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
+#include <net/fib_rules.h>
 #include <net/netlink.h>
 #ifdef CONFIG_NET_WIRELESS_RTNETLINK
 #include <linux/wireless.h>
@@ -103,7 +104,7 @@ static const int rtm_min[RTM_NR_FAMILIES] =
 	[RTM_FAM(RTM_NEWADDR)]      = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
 	[RTM_FAM(RTM_NEWROUTE)]     = NLMSG_LENGTH(sizeof(struct rtmsg)),
 	[RTM_FAM(RTM_NEWNEIGH)]     = NLMSG_LENGTH(sizeof(struct ndmsg)),
-	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct rtmsg)),
+	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
 	[RTM_FAM(RTM_NEWQDISC)]     = NLMSG_LENGTH(sizeof(struct tcmsg)),
 	[RTM_FAM(RTM_NEWTCLASS)]    = NLMSG_LENGTH(sizeof(struct tcmsg)),
 	[RTM_FAM(RTM_NEWTFILTER)]   = NLMSG_LENGTH(sizeof(struct tcmsg)),
@@ -120,7 +121,7 @@ static const int rta_max[RTM_NR_FAMILIES] =
 	[RTM_FAM(RTM_NEWADDR)]      = IFA_MAX,
 	[RTM_FAM(RTM_NEWROUTE)]     = RTA_MAX,
 	[RTM_FAM(RTM_NEWNEIGH)]     = NDA_MAX,
-	[RTM_FAM(RTM_NEWRULE)]      = RTA_MAX,
+	[RTM_FAM(RTM_NEWRULE)]      = FRA_MAX,
 	[RTM_FAM(RTM_NEWQDISC)]     = TCA_MAX,
 	[RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX,
 	[RTM_FAM(RTM_NEWTFILTER)]   = TCA_MAX,
@@ -757,6 +758,10 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
 	[RTM_NEWNEIGH    - RTM_BASE] = { .doit   = neigh_add		 },
 	[RTM_DELNEIGH    - RTM_BASE] = { .doit   = neigh_delete		 },
 	[RTM_GETNEIGH    - RTM_BASE] = { .dumpit = neigh_dump_info	 },
+#ifdef CONFIG_FIB_RULES
+	[RTM_NEWRULE     - RTM_BASE] = { .doit   = fib_nl_newrule	 },
+	[RTM_DELRULE     - RTM_BASE] = { .doit   = fib_nl_delrule	 },
+#endif
 	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
 	[RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info	 },
 	[RTM_SETNEIGHTBL - RTM_BASE] = { .doit   = neightbl_set		 },
-- 
GitLab


From 101367c2f8c464ea96643192673aa18d88e6336d Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 03:39:02 -0700
Subject: [PATCH 0398/1063] [IPV6]: Policy Routing Rules

Adds support for policy routing rules including a new
local table for routes with a local destination.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |   2 +
 include/net/ip6_fib.h     |   9 +-
 include/net/ip6_route.h   |   5 +
 net/ipv6/Kconfig          |   1 +
 net/ipv6/Makefile         |   1 +
 net/ipv6/addrconf.c       |   1 +
 net/ipv6/fib6_rules.c     | 251 ++++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_fib.c        |  21 ++--
 net/ipv6/route.c          |  50 ++++++++
 9 files changed, 329 insertions(+), 12 deletions(-)
 create mode 100644 net/ipv6/fib6_rules.c

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index facd9ee37b76f..bf353538ae93d 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -889,6 +889,8 @@ enum rtnetlink_groups {
 	RTNLGRP_NOP4,
 	RTNLGRP_IPV6_PREFIX,
 #define RTNLGRP_IPV6_PREFIX	RTNLGRP_IPV6_PREFIX
+	RTNLGRP_IPV6_RULE,
+#define RTNLGRP_IPV6_RULE	RTNLGRP_IPV6_RULE
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 818411519c89e..7b47e8d5a765b 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -155,7 +155,6 @@ struct fib6_table {
 
 #define RT6_TABLE_UNSPEC	RT_TABLE_UNSPEC
 #define RT6_TABLE_MAIN		RT_TABLE_MAIN
-#define RT6_TABLE_LOCAL		RT6_TABLE_MAIN
 #define RT6_TABLE_DFLT		RT6_TABLE_MAIN
 #define RT6_TABLE_INFO		RT6_TABLE_MAIN
 #define RT6_TABLE_PREFIX	RT6_TABLE_MAIN
@@ -163,9 +162,11 @@ struct fib6_table {
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 #define FIB6_TABLE_MIN		1
 #define FIB6_TABLE_MAX		RT_TABLE_MAX
+#define RT6_TABLE_LOCAL		RT_TABLE_LOCAL
 #else
 #define FIB6_TABLE_MIN		RT_TABLE_MAIN
 #define FIB6_TABLE_MAX		FIB6_TABLE_MIN
+#define RT6_TABLE_LOCAL		RT6_TABLE_MAIN
 #endif
 
 #define RT6_F_STRICT		1
@@ -221,5 +222,11 @@ extern void			fib6_run_gc(unsigned long dummy);
 extern void			fib6_gc_cleanup(void);
 
 extern void			fib6_init(void);
+
+extern void			fib6_rules_init(void);
+extern void			fib6_rules_cleanup(void);
+extern int			fib6_rules_dump(struct sk_buff *,
+						struct netlink_callback *);
+
 #endif
 #endif
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index d49c8c90eb687..9bfa3cc6cedb9 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -41,6 +41,11 @@ struct pol_chain {
 
 extern struct rt6_info	ip6_null_entry;
 
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+extern struct rt6_info	ip6_prohibit_entry;
+extern struct rt6_info	ip6_blk_hole_entry;
+#endif
+
 extern int ip6_rt_gc_interval;
 
 extern void			ip6_route_input(struct sk_buff *skb);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 159c63d99c810..36a6c2b79889e 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -139,6 +139,7 @@ config IPV6_TUNNEL
 config IPV6_MULTIPLE_TABLES
 	bool "IPv6: Multiple Routing Tables"
 	depends on IPV6 && EXPERIMENTAL
+	select FIB_RULES
 	---help---
 	  Support multiple routing tables.
 
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 386e0a6269481..9eebf6091279d 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -13,6 +13,7 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
 ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
 	xfrm6_output.o
 ipv6-$(CONFIG_NETFILTER) += netfilter.o
+ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
 ipv6-objs += $(ipv6-y)
 
 obj-$(CONFIG_INET6_AH) += ah6.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 318767fcefdc7..ed766eebc0227 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3528,6 +3528,7 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
 	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet6_rtm_delroute, },
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet6_rtm_getroute,
 				      .dumpit	= inet6_dump_fib, },
+	[RTM_GETRULE  - RTM_BASE] = { .dumpit   = fib6_rules_dump,   },
 };
 
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
new file mode 100644
index 0000000000000..c3c8195744ee4
--- /dev/null
+++ b/net/ipv6/fib6_rules.c
@@ -0,0 +1,251 @@
+/*
+ * net/ipv6/fib6_rules.c	IPv6 Routing Policy Rules
+ *
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Authors
+ *	Thomas Graf		<tgraf@suug.ch>
+ *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
+ */
+
+#include <linux/config.h>
+#include <linux/netdevice.h>
+
+#include <net/fib_rules.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/netlink.h>
+
+struct fib6_rule
+{
+	struct fib_rule		common;
+	struct rt6key		src;
+	struct rt6key		dst;
+	u8			tclass;
+};
+
+static struct fib_rules_ops fib6_rules_ops;
+
+static struct fib6_rule main_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0x7FFE,
+		.action =	FR_ACT_TO_TBL,
+		.table =	RT6_TABLE_MAIN,
+	},
+};
+
+static struct fib6_rule local_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0,
+		.action =	FR_ACT_TO_TBL,
+		.table =	RT6_TABLE_LOCAL,
+		.flags =	FIB_RULE_PERMANENT,
+	},
+};
+
+static LIST_HEAD(fib6_rules);
+
+struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
+				   pol_lookup_t lookup)
+{
+	struct fib_lookup_arg arg = {
+		.lookup_ptr = lookup,
+	};
+
+	fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg);
+	if (arg.rule)
+		fib_rule_put(arg.rule);
+
+	return (struct dst_entry *) arg.result;
+}
+
+int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+		     int flags, struct fib_lookup_arg *arg)
+{
+	struct rt6_info *rt = NULL;
+	struct fib6_table *table;
+	pol_lookup_t lookup = arg->lookup_ptr;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		rt = &ip6_null_entry;
+		goto discard_pkt;
+	default:
+	case FR_ACT_BLACKHOLE:
+		rt = &ip6_blk_hole_entry;
+		goto discard_pkt;
+	case FR_ACT_PROHIBIT:
+		rt = &ip6_prohibit_entry;
+		goto discard_pkt;
+	}
+
+	table = fib6_get_table(rule->table);
+	if (table)
+		rt = lookup(table, flp, flags);
+
+	if (rt != &ip6_null_entry)
+		goto out;
+
+	dst_release(&rt->u.dst);
+discard_pkt:
+	dst_hold(&rt->u.dst);
+out:
+	arg->result = rt;
+	return rt == NULL ? -EAGAIN : 0;
+}
+
+
+static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+	struct fib6_rule *r = (struct fib6_rule *) rule;
+
+	if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
+		return 0;
+
+	if ((flags & RT6_F_HAS_SADDR) &&
+	    !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
+		return 0;
+
+	return 1;
+}
+
+static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = {
+	[FRA_IFNAME]	= { .type = NLA_STRING },
+	[FRA_PRIORITY]	= { .type = NLA_U32 },
+	[FRA_SRC]	= { .minlen = sizeof(struct in6_addr) },
+	[FRA_DST]	= { .minlen = sizeof(struct in6_addr) },
+};
+
+static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+			       struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+			       struct nlattr **tb)
+{
+	int err = -EINVAL;
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	if (frh->src_len > 128 || frh->dst_len > 128 ||
+	    (frh->tos & ~IPV6_FLOWINFO_MASK))
+		goto errout;
+
+	if (rule->action == FR_ACT_TO_TBL) {
+		if (rule->table == RT6_TABLE_UNSPEC)
+			goto errout;
+
+		if (fib6_new_table(rule->table) == NULL) {
+			err = -ENOBUFS;
+			goto errout;
+		}
+	}
+
+	if (tb[FRA_SRC])
+		nla_memcpy(&rule6->src.addr, tb[FRA_SRC],
+			   sizeof(struct in6_addr));
+
+	if (tb[FRA_DST])
+		nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
+			   sizeof(struct in6_addr));
+
+	rule6->src.plen = frh->src_len;
+	rule6->dst.plen = frh->dst_len;
+	rule6->tclass = frh->tos;
+
+	err = 0;
+errout:
+	return err;
+}
+
+static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			     struct nlattr **tb)
+{
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	if (frh->src_len && (rule6->src.plen != frh->src_len))
+		return 0;
+
+	if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
+		return 0;
+
+	if (frh->tos && (rule6->tclass != frh->tos))
+		return 0;
+
+	if (tb[FRA_SRC] &&
+	    nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
+		return 0;
+
+	if (tb[FRA_DST] &&
+	    nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
+		return 0;
+
+	return 1;
+}
+
+static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			  struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+{
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	frh->family = AF_INET6;
+	frh->dst_len = rule6->dst.plen;
+	frh->src_len = rule6->src.plen;
+	frh->tos = rule6->tclass;
+
+	if (rule6->dst.plen)
+		NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr),
+			&rule6->dst.addr);
+
+	if (rule6->src.plen)
+		NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr),
+			&rule6->src.addr);
+
+	return 0;
+
+nla_put_failure:
+	return -ENOBUFS;
+}
+
+int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return fib_rules_dump(skb, cb, AF_INET6);
+}
+
+static u32 fib6_rule_default_pref(void)
+{
+	return 0x3FFF;
+}
+
+static struct fib_rules_ops fib6_rules_ops = {
+	.family			= AF_INET6,
+	.rule_size		= sizeof(struct fib6_rule),
+	.action			= fib6_rule_action,
+	.match			= fib6_rule_match,
+	.configure		= fib6_rule_configure,
+	.compare		= fib6_rule_compare,
+	.fill			= fib6_rule_fill,
+	.default_pref		= fib6_rule_default_pref,
+	.nlgroup		= RTNLGRP_IPV6_RULE,
+	.policy			= fib6_rule_policy,
+	.rules_list		= &fib6_rules,
+	.owner			= THIS_MODULE,
+};
+
+void __init fib6_rules_init(void)
+{
+	list_add_tail(&local_rule.common.list, &fib6_rules);
+	list_add_tail(&main_rule.common.list, &fib6_rules);
+
+	fib_rules_register(&fib6_rules_ops);
+}
+
+void fib6_rules_cleanup(void)
+{
+	fib_rules_unregister(&fib6_rules_ops);
+}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index fcd7da830aca8..ce226c14bef5b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -159,6 +159,15 @@ static struct fib6_table fib6_main_tbl = {
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
+static struct fib6_table fib6_local_tbl = {
+	.tb6_id		= RT6_TABLE_LOCAL,
+	.tb6_lock	= RW_LOCK_UNLOCKED,
+	.tb6_root 	= {
+		.leaf		= &ip6_null_entry,
+		.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+	},
+};
+
 #define FIB_TABLE_HASHSZ 256
 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
 
@@ -228,20 +237,10 @@ struct fib6_table *fib6_get_table(u32 id)
 	return NULL;
 }
 
-struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
-				   pol_lookup_t lookup)
-{
-	/*
-	 * TODO: Add rule lookup
-	 */
-	struct fib6_table *table = fib6_get_table(RT6_TABLE_MAIN);
-
-	return (struct dst_entry *) lookup(table, fl, flags);
-}
-
 static void __init fib6_tables_init(void)
 {
 	fib6_link_table(&fib6_main_tbl);
+	fib6_link_table(&fib6_local_tbl);
 }
 
 #else
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 73efdadb9ab89..438977e2085df 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -140,6 +140,50 @@ struct rt6_info ip6_null_entry = {
 	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+
+struct rt6_info ip6_prohibit_entry = {
+	.u = {
+		.dst = {
+			.__refcnt	= ATOMIC_INIT(1),
+			.__use		= 1,
+			.dev		= &loopback_dev,
+			.obsolete	= -1,
+			.error		= -EACCES,
+			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+			.input		= ip6_pkt_discard,
+			.output		= ip6_pkt_discard_out,
+			.ops		= &ip6_dst_ops,
+			.path		= (struct dst_entry*)&ip6_prohibit_entry,
+		}
+	},
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_metric	= ~(u32) 0,
+	.rt6i_ref	= ATOMIC_INIT(1),
+};
+
+struct rt6_info ip6_blk_hole_entry = {
+	.u = {
+		.dst = {
+			.__refcnt	= ATOMIC_INIT(1),
+			.__use		= 1,
+			.dev		= &loopback_dev,
+			.obsolete	= -1,
+			.error		= -EINVAL,
+			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+			.input		= ip6_pkt_discard,
+			.output		= ip6_pkt_discard_out,
+			.ops		= &ip6_dst_ops,
+			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
+		}
+	},
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_metric	= ~(u32) 0,
+	.rt6i_ref	= ATOMIC_INIT(1),
+};
+
+#endif
+
 /* allocate dst with ip6_dst_ops */
 static __inline__ struct rt6_info *ip6_dst_alloc(void)
 {
@@ -2408,10 +2452,16 @@ void __init ip6_route_init(void)
 #ifdef CONFIG_XFRM
 	xfrm6_init();
 #endif
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	fib6_rules_init();
+#endif
 }
 
 void ip6_route_cleanup(void)
 {
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	fib6_rules_cleanup();
+#endif
 #ifdef CONFIG_PROC_FS
 	proc_net_remove("ipv6_route");
 	proc_net_remove("rt6_stats");
-- 
GitLab


From e1ef4bf23b1ced0bf78a1c98289f746486e5c912 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 03:39:22 -0700
Subject: [PATCH 0399/1063] [IPV4]: Use Protocol Independant Policy Routing
 Rules Framework

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h    |  14 +-
 net/ipv4/Kconfig        |   1 +
 net/ipv4/devinet.c      |   4 +-
 net/ipv4/fib_frontend.c |   2 +-
 net/ipv4/fib_rules.c    | 605 ++++++++++++++++------------------------
 5 files changed, 249 insertions(+), 377 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a095d1dec7a41..14c82e611c95f 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -18,6 +18,7 @@
 
 #include <net/flow.h>
 #include <linux/seq_file.h>
+#include <net/fib_rules.h>
 
 /* WARNING: The ordering of these elements must match ordering
  *          of RTA_* rtnetlink attribute numbers.
@@ -203,9 +204,8 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result
 #define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
 
 extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
-extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
+extern int fib_lookup(struct flowi *flp, struct fib_result *res);
 extern struct fib_table *__fib_new_table(int id);
-extern void fib_rule_put(struct fib_rule *r);
 
 static inline struct fib_table *fib_get_table(int id)
 {
@@ -251,15 +251,15 @@ extern u32  __fib_res_prefsrc(struct fib_result *res);
 extern struct fib_table *fib_hash_init(int id);
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-/* Exported by fib_rules.c */
+extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb);
+
+extern void __init fib4_rules_init(void);
+extern void __exit fib4_rules_cleanup(void);
 
-extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
-extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
-extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
 #ifdef CONFIG_NET_CLS_ROUTE
 extern u32 fib_rules_tclass(struct fib_result *res);
 #endif
-extern void fib_rules_init(void);
+
 #endif
 
 static inline void fib_combine_itag(u32 *itag, struct fib_result *res)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 3b5d504a74be6..1650b64415aa8 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -88,6 +88,7 @@ config IP_FIB_HASH
 config IP_MULTIPLE_TABLES
 	bool "IP: policy routing"
 	depends on IP_ADVANCED_ROUTER
+	select FIB_RULES
 	---help---
 	  Normally, a router decides what to do with a received packet based
 	  solely on the packet's final destination address. If you say Y here,
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a6cc31d911ebf..9f3ffbec32966 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1151,9 +1151,7 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet_rtm_getroute,
 				      .dumpit	= inet_dump_fib,	},
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-	[RTM_NEWRULE  - RTM_BASE] = { .doit	= inet_rtm_newrule,	},
-	[RTM_DELRULE  - RTM_BASE] = { .doit	= inet_rtm_delrule,	},
-	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= inet_dump_rules,	},
+	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= fib4_rules_dump,	},
 #endif
 };
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ba2a70745a63e..fe4a53d4d10dc 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -656,7 +656,7 @@ void __init ip_fib_init(void)
 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
 #else
-	fib_rules_init();
+	fib4_rules_init();
 #endif
 
 	register_netdevice_notifier(&fib_netdev_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 79b04718bdfdd..23ec6ae1a0f69 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -5,9 +5,8 @@
  *
  *		IPv4 Forwarding Information Base: policy rules.
  *
- * Version:	$Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * 		Thomas Graf <tgraf@suug.ch>
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -19,129 +18,154 @@
  *		Marc Boucher	:	routing by fwmark
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/errno.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/proc_fs.h>
-#include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/inetdevice.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/rcupdate.h>
-
 #include <net/ip.h>
-#include <net/protocol.h>
 #include <net/route.h>
 #include <net/tcp.h>
-#include <net/sock.h>
 #include <net/ip_fib.h>
+#include <net/fib_rules.h>
 
-#define FRprintk(a...)
+static struct fib_rules_ops fib4_rules_ops;
 
-struct fib_rule
+struct fib4_rule
 {
-	struct hlist_node hlist;
-	atomic_t	r_clntref;
-	u32		r_preference;
-	unsigned char	r_table;
-	unsigned char	r_action;
-	unsigned char	r_dst_len;
-	unsigned char	r_src_len;
-	u32		r_src;
-	u32		r_srcmask;
-	u32		r_dst;
-	u32		r_dstmask;
-	u32		r_srcmap;
-	u8		r_flags;
-	u8		r_tos;
+	struct fib_rule		common;
+	u8			dst_len;
+	u8			src_len;
+	u8			tos;
+	u32			src;
+	u32			srcmask;
+	u32			dst;
+	u32			dstmask;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	u32		r_fwmark;
+	u32			fwmark;
 #endif
-	int		r_ifindex;
 #ifdef CONFIG_NET_CLS_ROUTE
-	__u32		r_tclassid;
+	u32			tclassid;
 #endif
-	char		r_ifname[IFNAMSIZ];
-	int		r_dead;
-	struct		rcu_head rcu;
 };
 
-static struct fib_rule default_rule = {
-	.r_clntref =	ATOMIC_INIT(2),
-	.r_preference =	0x7FFF,
-	.r_table =	RT_TABLE_DEFAULT,
-	.r_action =	RTN_UNICAST,
+static struct fib4_rule default_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0x7FFF,
+		.table =	RT_TABLE_DEFAULT,
+		.action =	FR_ACT_TO_TBL,
+	},
 };
 
-static struct fib_rule main_rule = {
-	.r_clntref =	ATOMIC_INIT(2),
-	.r_preference =	0x7FFE,
-	.r_table =	RT_TABLE_MAIN,
-	.r_action =	RTN_UNICAST,
+static struct fib4_rule main_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0x7FFE,
+		.table =	RT_TABLE_MAIN,
+		.action =	FR_ACT_TO_TBL,
+	},
 };
 
-static struct fib_rule local_rule = {
-	.r_clntref =	ATOMIC_INIT(2),
-	.r_table =	RT_TABLE_LOCAL,
-	.r_action =	RTN_UNICAST,
+static struct fib4_rule local_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.table =	RT_TABLE_LOCAL,
+		.action =	FR_ACT_TO_TBL,
+		.flags =	FIB_RULE_PERMANENT,
+	},
 };
 
-static struct hlist_head fib_rules;
+static LIST_HEAD(fib4_rules);
+
+#ifdef CONFIG_NET_CLS_ROUTE
+u32 fib_rules_tclass(struct fib_result *res)
+{
+	return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
+}
+#endif
 
-/* writer func called from netlink -- rtnl_sem hold*/
+int fib_lookup(struct flowi *flp, struct fib_result *res)
+{
+	struct fib_lookup_arg arg = {
+		.result = res,
+	};
+	int err;
 
-static void rtmsg_rule(int, struct fib_rule *);
+	err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg);
+	res->r = arg.rule;
 
-int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+	return err;
+}
+
+int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags,
+		     struct fib_lookup_arg *arg)
 {
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct fib_rule *r;
-	struct hlist_node *node;
-	int err = -ESRCH;
-
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
-		    rtm->rtm_src_len == r->r_src_len &&
-		    rtm->rtm_dst_len == r->r_dst_len &&
-		    (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) &&
-		    rtm->rtm_tos == r->r_tos &&
-#ifdef CONFIG_IP_ROUTE_FWMARK
-		    (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
-#endif
-		    (!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
-		    (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
-		    (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
-		    (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
-			err = -EPERM;
-			if (r == &local_rule)
-				break;
-
-			hlist_del_rcu(&r->hlist);
-			r->r_dead = 1;
-			rtmsg_rule(RTM_DELRULE, r);
-			fib_rule_put(r);
-			err = 0;
-			break;
-		}
+	int err = -EAGAIN;
+	struct fib_table *tbl;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+
+	case FR_ACT_UNREACHABLE:
+		err = -ENETUNREACH;
+		goto errout;
+
+	case FR_ACT_PROHIBIT:
+		err = -EACCES;
+		goto errout;
+
+	case FR_ACT_BLACKHOLE:
+	default:
+		err = -EINVAL;
+		goto errout;
 	}
+
+	if ((tbl = fib_get_table(rule->table)) == NULL)
+		goto errout;
+
+	err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
+	if (err > 0)
+		err = -EAGAIN;
+errout:
 	return err;
 }
 
-/* Allocate new unique table id */
+
+void fib_select_default(const struct flowi *flp, struct fib_result *res)
+{
+	if (res->r && res->r->action == FR_ACT_TO_TBL &&
+	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
+		struct fib_table *tb;
+		if ((tb = fib_get_table(res->r->table)) != NULL)
+			tb->tb_select_default(tb, flp, res);
+	}
+}
+
+static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+	struct fib4_rule *r = (struct fib4_rule *) rule;
+	u32 daddr = fl->fl4_dst;
+	u32 saddr = fl->fl4_src;
+
+	if (((saddr ^ r->src) & r->srcmask) ||
+	    ((daddr ^ r->dst) & r->dstmask))
+		return 0;
+
+	if (r->tos && (r->tos != fl->fl4_tos))
+		return 0;
+
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	if (r->fwmark && (r->fwmark != fl->fl4_fwmark))
+		return 0;
+#endif
+
+	return 1;
+}
 
 static struct fib_table *fib_empty_table(void)
 {
@@ -153,329 +177,178 @@ static struct fib_table *fib_empty_table(void)
 	return NULL;
 }
 
-static inline void fib_rule_put_rcu(struct rcu_head *head)
-{
-	struct fib_rule *r = container_of(head, struct fib_rule, rcu);
-	kfree(r);
-}
+static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
+	[FRA_IFNAME]	= { .type = NLA_STRING },
+	[FRA_PRIORITY]	= { .type = NLA_U32 },
+	[FRA_SRC]	= { .type = NLA_U32 },
+	[FRA_DST]	= { .type = NLA_U32 },
+	[FRA_FWMARK]	= { .type = NLA_U32 },
+	[FRA_FLOW]	= { .type = NLA_U32 },
+};
 
-void fib_rule_put(struct fib_rule *r)
+static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+			       struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+			       struct nlattr **tb)
 {
-	if (atomic_dec_and_test(&r->r_clntref)) {
-		if (r->r_dead)
-			call_rcu(&r->rcu, fib_rule_put_rcu);
-		else
-			printk("Freeing alive rule %p\n", r);
-	}
-}
+	int err = -EINVAL;
+	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-/* writer func called from netlink -- rtnl_sem hold*/
+	if (frh->src_len > 32 || frh->dst_len > 32 ||
+	    (frh->tos & ~IPTOS_TOS_MASK))
+		goto errout;
 
-int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
-{
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct fib_rule *r, *new_r, *last = NULL;
-	struct hlist_node *node = NULL;
-	unsigned char table_id;
-
-	if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 ||
-	    (rtm->rtm_tos & ~IPTOS_TOS_MASK))
-		return -EINVAL;
-
-	if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
-		return -EINVAL;
-
-	table_id = rtm->rtm_table;
-	if (table_id == RT_TABLE_UNSPEC) {
-		struct fib_table *table;
-		if (rtm->rtm_type == RTN_UNICAST) {
-			if ((table = fib_empty_table()) == NULL)
-				return -ENOBUFS;
-			table_id = table->tb_id;
-		}
-	}
+	if (rule->table == RT_TABLE_UNSPEC) {
+		if (rule->action == FR_ACT_TO_TBL) {
+			struct fib_table *table;
 
-	new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
-	if (!new_r)
-		return -ENOMEM;
-
-	if (rta[RTA_SRC-1])
-		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
-	if (rta[RTA_DST-1])
-		memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4);
-	if (rta[RTA_GATEWAY-1])
-		memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4);
-	new_r->r_src_len = rtm->rtm_src_len;
-	new_r->r_dst_len = rtm->rtm_dst_len;
-	new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
-	new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len);
-	new_r->r_tos = rtm->rtm_tos;
-#ifdef CONFIG_IP_ROUTE_FWMARK
-	if (rta[RTA_PROTOINFO-1])
-		memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
-#endif
-	new_r->r_action = rtm->rtm_type;
-	new_r->r_flags = rtm->rtm_flags;
-	if (rta[RTA_PRIORITY-1])
-		memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
-	new_r->r_table = table_id;
-	if (rta[RTA_IIF-1]) {
-		struct net_device *dev;
-		rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
-		new_r->r_ifindex = -1;
-		dev = __dev_get_by_name(new_r->r_ifname);
-		if (dev)
-			new_r->r_ifindex = dev->ifindex;
-	}
-#ifdef CONFIG_NET_CLS_ROUTE
-	if (rta[RTA_FLOW-1])
-		memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4);
-#endif
-	r = container_of(fib_rules.first, struct fib_rule, hlist);
+			table = fib_empty_table();
+			if (table == NULL) {
+				err = -ENOBUFS;
+				goto errout;
+			}
 
-	if (!new_r->r_preference) {
-		if (r && r->hlist.next != NULL) {
-			r = container_of(r->hlist.next, struct fib_rule, hlist);
-			if (r->r_preference)
-				new_r->r_preference = r->r_preference - 1;
+			rule->table = table->tb_id;
 		}
 	}
 
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (r->r_preference > new_r->r_preference)
-			break;
-		last = r;
-	}
-	atomic_inc(&new_r->r_clntref);
+	if (tb[FRA_SRC])
+		rule4->src = nla_get_u32(tb[FRA_SRC]);
 
-	if (last)
-		hlist_add_after_rcu(&last->hlist, &new_r->hlist);
-	else
-		hlist_add_before_rcu(&new_r->hlist, &r->hlist);
+	if (tb[FRA_DST])
+		rule4->dst = nla_get_u32(tb[FRA_DST]);
 
-	rtmsg_rule(RTM_NEWRULE, new_r);
-	return 0;
-}
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	if (tb[FRA_FWMARK])
+		rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+#endif
 
 #ifdef CONFIG_NET_CLS_ROUTE
-u32 fib_rules_tclass(struct fib_result *res)
-{
-	if (res->r)
-		return res->r->r_tclassid;
-	return 0;
-}
+	if (tb[FRA_FLOW])
+		rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
 #endif
 
-/* callers should hold rtnl semaphore */
-
-static void fib_rules_detach(struct net_device *dev)
-{
-	struct hlist_node *node;
-	struct fib_rule *r;
-
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (r->r_ifindex == dev->ifindex)
-			r->r_ifindex = -1;
+	rule4->src_len = frh->src_len;
+	rule4->srcmask = inet_make_mask(rule4->src_len);
+	rule4->dst_len = frh->dst_len;
+	rule4->dstmask = inet_make_mask(rule4->dst_len);
+	rule4->tos = frh->tos;
 
-	}
-}
-
-/* callers should hold rtnl semaphore */
-
-static void fib_rules_attach(struct net_device *dev)
-{
-	struct hlist_node *node;
-	struct fib_rule *r;
-
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0)
-			r->r_ifindex = dev->ifindex;
-	}
+	err = 0;
+errout:
+	return err;
 }
 
-int fib_lookup(const struct flowi *flp, struct fib_result *res)
+static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			     struct nlattr **tb)
 {
-	int err;
-	struct fib_rule *r, *policy;
-	struct fib_table *tb;
-	struct hlist_node *node;
+	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-	u32 daddr = flp->fl4_dst;
-	u32 saddr = flp->fl4_src;
+	if (frh->src_len && (rule4->src_len != frh->src_len))
+		return 0;
 
-FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
-	NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
+	if (frh->dst_len && (rule4->dst_len != frh->dst_len))
+		return 0;
 
-	rcu_read_lock();
+	if (frh->tos && (rule4->tos != frh->tos))
+		return 0;
 
-	hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) {
-		if (((saddr^r->r_src) & r->r_srcmask) ||
-		    ((daddr^r->r_dst) & r->r_dstmask) ||
-		    (r->r_tos && r->r_tos != flp->fl4_tos) ||
 #ifdef CONFIG_IP_ROUTE_FWMARK
-		    (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) ||
+	if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+		return 0;
 #endif
-		    (r->r_ifindex && r->r_ifindex != flp->iif))
-			continue;
-
-FRprintk("tb %d r %d ", r->r_table, r->r_action);
-		switch (r->r_action) {
-		case RTN_UNICAST:
-			policy = r;
-			break;
-		case RTN_UNREACHABLE:
-			rcu_read_unlock();
-			return -ENETUNREACH;
-		default:
-		case RTN_BLACKHOLE:
-			rcu_read_unlock();
-			return -EINVAL;
-		case RTN_PROHIBIT:
-			rcu_read_unlock();
-			return -EACCES;
-		}
 
-		if ((tb = fib_get_table(r->r_table)) == NULL)
-			continue;
-		err = tb->tb_lookup(tb, flp, res);
-		if (err == 0) {
-			res->r = policy;
-			if (policy)
-				atomic_inc(&policy->r_clntref);
-			rcu_read_unlock();
-			return 0;
-		}
-		if (err < 0 && err != -EAGAIN) {
-			rcu_read_unlock();
-			return err;
-		}
-	}
-FRprintk("FAILURE\n");
-	rcu_read_unlock();
-	return -ENETUNREACH;
-}
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
+		return 0;
+#endif
 
-void fib_select_default(const struct flowi *flp, struct fib_result *res)
-{
-	if (res->r && res->r->r_action == RTN_UNICAST &&
-	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
-		struct fib_table *tb;
-		if ((tb = fib_get_table(res->r->r_table)) != NULL)
-			tb->tb_select_default(tb, flp, res);
-	}
-}
+	if (tb[FRA_SRC] && (rule4->src != nla_get_u32(tb[FRA_SRC])))
+		return 0;
 
-static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
-	struct net_device *dev = ptr;
+	if (tb[FRA_DST] && (rule4->dst != nla_get_u32(tb[FRA_DST])))
+		return 0;
 
-	if (event == NETDEV_UNREGISTER)
-		fib_rules_detach(dev);
-	else if (event == NETDEV_REGISTER)
-		fib_rules_attach(dev);
-	return NOTIFY_DONE;
+	return 1;
 }
 
+static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			  struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+{
+	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-static struct notifier_block fib_rules_notifier = {
-	.notifier_call =fib_rules_event,
-};
+	frh->family = AF_INET;
+	frh->dst_len = rule4->dst_len;
+	frh->src_len = rule4->src_len;
+	frh->tos = rule4->tos;
 
-static __inline__ int inet_fill_rule(struct sk_buff *skb,
-				     struct fib_rule *r,
-				     u32 pid, u32 seq, int event,
-				     unsigned int flags)
-{
-	struct rtmsg *rtm;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
-
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
-	rtm->rtm_family = AF_INET;
-	rtm->rtm_dst_len = r->r_dst_len;
-	rtm->rtm_src_len = r->r_src_len;
-	rtm->rtm_tos = r->r_tos;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	if (r->r_fwmark)
-		RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
+	if (rule4->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark);
 #endif
-	rtm->rtm_table = r->r_table;
-	rtm->rtm_protocol = 0;
-	rtm->rtm_scope = 0;
-	rtm->rtm_type = r->r_action;
-	rtm->rtm_flags = r->r_flags;
-
-	if (r->r_dst_len)
-		RTA_PUT(skb, RTA_DST, 4, &r->r_dst);
-	if (r->r_src_len)
-		RTA_PUT(skb, RTA_SRC, 4, &r->r_src);
-	if (r->r_ifname[0])
-		RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
-	if (r->r_preference)
-		RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
-	if (r->r_srcmap)
-		RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap);
+
+	if (rule4->dst_len)
+		NLA_PUT_U32(skb, FRA_DST, rule4->dst);
+
+	if (rule4->src_len)
+		NLA_PUT_U32(skb, FRA_SRC, rule4->src);
+
 #ifdef CONFIG_NET_CLS_ROUTE
-	if (r->r_tclassid)
-		RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid);
+	if (rule4->tclassid)
+		NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
 #endif
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	return 0;
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+nla_put_failure:
+	return -ENOBUFS;
 }
 
-/* callers should hold rtnl semaphore */
+int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return fib_rules_dump(skb, cb, AF_INET);
+}
 
-static void rtmsg_rule(int event, struct fib_rule *r)
+static u32 fib4_rule_default_pref(void)
 {
-	int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128);
-	struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
-
-	if (!skb)
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS);
-	else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) {
-		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL);
-	} else {
-		netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL);
+	struct list_head *pos;
+	struct fib_rule *rule;
+
+	if (!list_empty(&fib4_rules)) {
+		pos = fib4_rules.next;
+		if (pos->next != &fib4_rules) {
+			rule = list_entry(pos->next, struct fib_rule, list);
+			if (rule->pref)
+				return rule->pref - 1;
+		}
 	}
+
+	return 0;
 }
 
-int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+static struct fib_rules_ops fib4_rules_ops = {
+	.family		= AF_INET,
+	.rule_size	= sizeof(struct fib4_rule),
+	.action		= fib4_rule_action,
+	.match		= fib4_rule_match,
+	.configure	= fib4_rule_configure,
+	.compare	= fib4_rule_compare,
+	.fill		= fib4_rule_fill,
+	.default_pref	= fib4_rule_default_pref,
+	.nlgroup	= RTNLGRP_IPV4_RULE,
+	.policy		= fib4_rule_policy,
+	.rules_list	= &fib4_rules,
+	.owner		= THIS_MODULE,
+};
+
+void __init fib4_rules_init(void)
 {
-	int idx = 0;
-	int s_idx = cb->args[0];
-	struct fib_rule *r;
-	struct hlist_node *node;
-
-	rcu_read_lock();
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (idx < s_idx)
-			goto next;
-		if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid,
-				   cb->nlh->nlmsg_seq,
-				   RTM_NEWRULE, NLM_F_MULTI) < 0)
-			break;
-next:
-		idx++;
-	}
-	rcu_read_unlock();
-	cb->args[0] = idx;
+	list_add_tail(&local_rule.common.list, &fib4_rules);
+	list_add_tail(&main_rule.common.list, &fib4_rules);
+	list_add_tail(&default_rule.common.list, &fib4_rules);
 
-	return skb->len;
+	fib_rules_register(&fib4_rules_ops);
 }
 
-void __init fib_rules_init(void)
+void __exit fib4_rules_cleanup(void)
 {
-	INIT_HLIST_HEAD(&fib_rules);
-	hlist_add_head(&local_rule.hlist, &fib_rules);
-	hlist_add_after(&local_rule.hlist, &main_rule.hlist);
-	hlist_add_after(&main_rule.hlist, &default_rule.hlist);
-	register_netdevice_notifier(&fib_rules_notifier);
+	fib_rules_unregister(&fib4_rules_ops);
 }
-- 
GitLab


From fe4944e59c357f945f81bc67edb7ed1392e875ad Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 23:03:05 -0700
Subject: [PATCH 0400/1063] [NETLINK]: Extend netlink messaging interface

Adds:
 nlmsg_get_pos()                 return current position in message
 nlmsg_trim()                    trim part of message
 nla_reserve_nohdr(skb, len)     reserve room for an attribute w/o hdr
 nla_put_nohdr(skb, len, data)   add attribute w/o hdr
 nla_find_nested()               find attribute in nested attributes

Fixes nlmsg_new() to take allocation flags and consider size.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h   | 74 ++++++++++++++++++++++++++++++++++------
 kernel/taskstats.c      |  2 +-
 net/netlink/attr.c      | 75 +++++++++++++++++++++++++++++++++++++++++
 net/netlink/genetlink.c |  2 +-
 4 files changed, 141 insertions(+), 12 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 640c26a90cf1f..3a5e40b1e0450 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -35,6 +35,8 @@
  *   nlmsg_put()			add a netlink message to an skb
  *   nlmsg_put_answer()			callback based nlmsg_put()
  *   nlmsg_end()			finanlize netlink message
+ *   nlmsg_get_pos()			return current position in message
+ *   nlmsg_trim()			trim part of message
  *   nlmsg_cancel()			cancel message construction
  *   nlmsg_free()			free a netlink message
  *
@@ -80,8 +82,10 @@
  *   struct nlattr			netlink attribtue header
  *
  * Attribute Construction:
- *   nla_reserve(skb, type, len)	reserve skb tailroom for an attribute
+ *   nla_reserve(skb, type, len)	reserve room for an attribute
+ *   nla_reserve_nohdr(skb, len)	reserve room for an attribute w/o hdr
  *   nla_put(skb, type, len, data)	add attribute to skb
+ *   nla_put_nohdr(skb, len, data)	add attribute w/o hdr
  *
  * Attribute Construction for Basic Types:
  *   nla_put_u8(skb, type, value)	add u8 attribute to skb
@@ -139,6 +143,7 @@
  *   nla_next(nla, remaining)		get next netlink attribute
  *   nla_validate()			validate a stream of attributes
  *   nla_find()				find attribute in stream of attributes
+ *   nla_find_nested()			find attribute in nested attributes
  *   nla_parse()			parse and validate stream of attrs
  *   nla_parse_nested()			parse nested attribuets
  *   nla_for_each_attr()		loop over all attributes
@@ -203,12 +208,18 @@ extern int		nla_memcmp(const struct nlattr *nla, const void *data,
 extern int		nla_strcmp(const struct nlattr *nla, const char *str);
 extern struct nlattr *	__nla_reserve(struct sk_buff *skb, int attrtype,
 				      int attrlen);
+extern void *		__nla_reserve_nohdr(struct sk_buff *skb, int attrlen);
 extern struct nlattr *	nla_reserve(struct sk_buff *skb, int attrtype,
 				    int attrlen);
+extern void *		nla_reserve_nohdr(struct sk_buff *skb, int attrlen);
 extern void		__nla_put(struct sk_buff *skb, int attrtype,
 				  int attrlen, const void *data);
+extern void		__nla_put_nohdr(struct sk_buff *skb, int attrlen,
+					const void *data);
 extern int		nla_put(struct sk_buff *skb, int attrtype,
 				int attrlen, const void *data);
+extern int		nla_put_nohdr(struct sk_buff *skb, int attrlen,
+				      const void *data);
 
 /**************************************************************************
  * Netlink Messages
@@ -453,12 +464,13 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
 /**
  * nlmsg_new - Allocate a new netlink message
  * @size: maximum size of message
+ * @flags: the type of memory to allocate.
  *
  * Use NLMSG_GOODSIZE if size isn't know and you need a good default size.
  */
-static inline struct sk_buff *nlmsg_new(int size)
+static inline struct sk_buff *nlmsg_new(int size, gfp_t flags)
 {
-	return alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	return alloc_skb(size, flags);
 }
 
 /**
@@ -479,6 +491,32 @@ static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
 	return skb->len;
 }
 
+/**
+ * nlmsg_get_pos - return current position in netlink message
+ * @skb: socket buffer the message is stored in
+ *
+ * Returns a pointer to the current tail of the message.
+ */
+static inline void *nlmsg_get_pos(struct sk_buff *skb)
+{
+	return skb->tail;
+}
+
+/**
+ * nlmsg_trim - Trim message to a mark
+ * @skb: socket buffer the message is stored in
+ * @mark: mark to trim to
+ *
+ * Trims the message to the provided mark. Returns -1.
+ */
+static inline int nlmsg_trim(struct sk_buff *skb, void *mark)
+{
+	if (mark)
+		skb_trim(skb, (unsigned char *) mark - skb->data);
+
+	return -1;
+}
+
 /**
  * nlmsg_cancel - Cancel construction of a netlink message
  * @skb: socket buffer the message is stored in
@@ -489,9 +527,7 @@ static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
  */
 static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	skb_trim(skb, (unsigned char *) nlh - skb->data);
-
-	return -1;
+	return nlmsg_trim(skb, nlh);
 }
 
 /**
@@ -630,6 +666,18 @@ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
 	return (struct nlattr *) ((char *) nla + totlen);
 }
 
+/**
+ * nla_find_nested - find attribute in a set of nested attributes
+ * @nla: attribute containing the nested attributes
+ * @attrtype: type of attribute to look for
+ *
+ * Returns the first attribute which matches the specified type.
+ */
+static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype)
+{
+	return nla_find(nla_data(nla), nla_len(nla), attrtype);
+}
+
 /**
  * nla_parse_nested - parse nested attributes
  * @tb: destination array with maxtype+1 elements
@@ -862,10 +910,7 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
  */
 static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
 {
-	if (start)
-		skb_trim(skb, (unsigned char *) start - skb->data);
-
-	return -1;
+	return nlmsg_trim(skb, start);
 }
 
 /**
@@ -880,4 +925,13 @@ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
 	     nla_ok(pos, rem); \
 	     pos = nla_next(pos, &(rem)))
 
+/**
+ * nla_for_each_nested - iterate over nested attributes
+ * @pos: loop counter, set to current attribute
+ * @nla: attribute containing the nested attributes
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nla_for_each_nested(pos, nla, rem) \
+	nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem)
+
 #endif
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index e781876573304..2ed4040d0dc56 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -75,7 +75,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
 	/*
 	 * If new attributes are added, please revisit this allocation
 	 */
-	skb = nlmsg_new(size);
+	skb = nlmsg_new(size, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index dddbd15135a87..136e529e5780c 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -254,6 +254,26 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
 	return nla;
 }
 
+/**
+ * __nla_reserve_nohdr - reserve room for attribute without header
+ * @skb: socket buffer to reserve room on
+ * @attrlen: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the payload.
+ */
+void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
+{
+	void *start;
+
+	start = skb_put(skb, NLA_ALIGN(attrlen));
+	memset(start, 0, NLA_ALIGN(attrlen));
+
+	return start;
+}
+
 /**
  * nla_reserve - reserve room for attribute on the skb
  * @skb: socket buffer to reserve room on
@@ -274,6 +294,24 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
 	return __nla_reserve(skb, attrtype, attrlen);
 }
 
+/**
+ * nla_reserve - reserve room for attribute without header
+ * @skb: socket buffer to reserve room on
+ * @len: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
+{
+	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+		return NULL;
+
+	return __nla_reserve_nohdr(skb, attrlen);
+}
+
 /**
  * __nla_put - Add a netlink attribute to a socket buffer
  * @skb: socket buffer to add attribute to
@@ -293,6 +331,22 @@ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen,
 	memcpy(nla_data(nla), data, attrlen);
 }
 
+/**
+ * __nla_put_nohdr - Add a netlink attribute without header
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the attribute payload.
+ */
+void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
+{
+	void *start;
+
+	start = __nla_reserve_nohdr(skb, attrlen);
+	memcpy(start, data, attrlen);
+}
 
 /**
  * nla_put - Add a netlink attribute to a socket buffer
@@ -313,15 +367,36 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
 	return 0;
 }
 
+/**
+ * nla_put_nohdr - Add a netlink attribute without header
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -1 if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
+{
+	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+		return -1;
+
+	__nla_put_nohdr(skb, attrlen, data);
+	return 0;
+}
 
 EXPORT_SYMBOL(nla_validate);
 EXPORT_SYMBOL(nla_parse);
 EXPORT_SYMBOL(nla_find);
 EXPORT_SYMBOL(nla_strlcpy);
 EXPORT_SYMBOL(__nla_reserve);
+EXPORT_SYMBOL(__nla_reserve_nohdr);
 EXPORT_SYMBOL(nla_reserve);
+EXPORT_SYMBOL(nla_reserve_nohdr);
 EXPORT_SYMBOL(__nla_put);
+EXPORT_SYMBOL(__nla_put_nohdr);
 EXPORT_SYMBOL(nla_put);
+EXPORT_SYMBOL(nla_put_nohdr);
 EXPORT_SYMBOL(nla_memcpy);
 EXPORT_SYMBOL(nla_memcmp);
 EXPORT_SYMBOL(nla_strcmp);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a298f77cc3e38..75bb47a898dd7 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -440,7 +440,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
 	struct sk_buff *skb;
 	int err;
 
-	skb = nlmsg_new(NLMSG_GOODSIZE);
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		return ERR_PTR(-ENOBUFS);
 
-- 
GitLab


From bf8b79e444a748963c71d2a58709e1ce5597e1b5 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 23:03:29 -0700
Subject: [PATCH 0401/1063] [NETLINK]: Convert core netlink handling to new
 netlink api

Fixes a theoretical memory and locking leak when the size of
the netlink header would exceed the skb tailroom.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 41 +++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 8b85036ba8e39..0f36ddc0b72d7 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1147,7 +1147,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (len > sk->sk_sndbuf - 32)
 		goto out;
 	err = -ENOBUFS;
-	skb = alloc_skb(len, GFP_KERNEL);
+	skb = nlmsg_new(len, GFP_KERNEL);
 	if (skb==NULL)
 		goto out;
 
@@ -1341,19 +1341,18 @@ static int netlink_dump(struct sock *sk)
 	struct netlink_callback *cb;
 	struct sk_buff *skb;
 	struct nlmsghdr *nlh;
-	int len;
+	int len, err = -ENOBUFS;
 	
 	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
 	if (!skb)
-		return -ENOBUFS;
+		goto errout;
 
 	spin_lock(&nlk->cb_lock);
 
 	cb = nlk->cb;
 	if (cb == NULL) {
-		spin_unlock(&nlk->cb_lock);
-		kfree_skb(skb);
-		return -EINVAL;
+		err = -EINVAL;
+		goto errout_skb;
 	}
 
 	len = cb->dump(skb, cb);
@@ -1365,8 +1364,12 @@ static int netlink_dump(struct sock *sk)
 		return 0;
 	}
 
-	nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
-	memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
+	nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
+	if (!nlh)
+		goto errout_skb;
+
+	memcpy(nlmsg_data(nlh), &len, sizeof(len));
+
 	skb_queue_tail(&sk->sk_receive_queue, skb);
 	sk->sk_data_ready(sk, skb->len);
 
@@ -1378,8 +1381,11 @@ static int netlink_dump(struct sock *sk)
 	netlink_destroy_callback(cb);
 	return 0;
 
-nlmsg_failure:
-	return -ENOBUFS;
+errout_skb:
+	spin_unlock(&nlk->cb_lock);
+	kfree_skb(skb);
+errout:
+	return err;
 }
 
 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
@@ -1431,11 +1437,11 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 	int size;
 
 	if (err == 0)
-		size = NLMSG_SPACE(sizeof(struct nlmsgerr));
+		size = nlmsg_total_size(sizeof(*errmsg));
 	else
-		size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len));
+		size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh));
 
-	skb = alloc_skb(size, GFP_KERNEL);
+	skb = nlmsg_new(size, GFP_KERNEL);
 	if (!skb) {
 		struct sock *sk;
 
@@ -1451,16 +1457,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 
 	rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
 			  NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
-	errmsg = NLMSG_DATA(rep);
+	errmsg = nlmsg_data(rep);
 	errmsg->error = err;
-	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
+	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
 	netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
 }
 
 static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 						     struct nlmsghdr *, int *))
 {
-	unsigned int total_len;
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -1470,8 +1475,6 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 		if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
 			return 0;
 
-		total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
-
 		if (cb(skb, nlh, &err) < 0) {
 			/* Not an error, but we have to interrupt processing
 			 * here. Note: that in this case we do not pull
@@ -1483,7 +1486,7 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 		} else if (nlh->nlmsg_flags & NLM_F_ACK)
 			netlink_ack(skb, nlh, 0);
 
-		skb_pull(skb, total_len);
+		netlink_queue_skip(nlh, skb);
 	}
 
 	return 0;
-- 
GitLab


From 5c7539781d392629fb40b04aad9a1f197b66cd01 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 23:03:53 -0700
Subject: [PATCH 0402/1063] [IPV4]: Convert address addition to new netlink api

Adds rtm_to_ifaddr() transforming a netlink message to a
struct in_ifaddr. Fixes various unvalidated netlink attributes
causing memory corruptions when left empty by userspace
applications.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 108 +++++++++++++++++++++++++++++++--------------
 1 file changed, 75 insertions(+), 33 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9f3ffbec32966..6b297c8697e60 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -62,6 +62,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
+#include <net/netlink.h>
 
 struct ipv4_devconf ipv4_devconf = {
 	.accept_redirects = 1,
@@ -78,6 +79,14 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
 	.accept_source_route = 1,
 };
 
+static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
+	[IFA_LOCAL]     	= { .type = NLA_U32 },
+	[IFA_ADDRESS]   	= { .type = NLA_U32 },
+	[IFA_BROADCAST] 	= { .type = NLA_U32 },
+	[IFA_ANYCAST]   	= { .type = NLA_U32 },
+	[IFA_LABEL]     	= { .type = NLA_STRING },
+};
+
 static void rtmsg_ifa(int event, struct in_ifaddr *);
 
 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
@@ -451,57 +460,90 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
 	return -EADDRNOTAVAIL;
 }
 
-static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
 {
-	struct rtattr **rta = arg;
+	struct nlattr *tb[IFA_MAX+1];
+	struct in_ifaddr *ifa;
+	struct ifaddrmsg *ifm;
 	struct net_device *dev;
 	struct in_device *in_dev;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
-	struct in_ifaddr *ifa;
-	int rc = -EINVAL;
+	int err = -EINVAL;
 
-	ASSERT_RTNL();
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+	if (err < 0)
+		goto errout;
 
-	if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1])
-		goto out;
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
+		goto errout;
 
-	rc = -ENODEV;
-	if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL)
-		goto out;
+	dev = __dev_get_by_index(ifm->ifa_index);
+	if (dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
 
-	rc = -ENOBUFS;
-	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
+	in_dev = __in_dev_get_rtnl(dev);
+	if (in_dev == NULL) {
 		in_dev = inetdev_init(dev);
-		if (!in_dev)
-			goto out;
+		if (in_dev == NULL) {
+			err = -ENOBUFS;
+			goto errout;
+		}
 	}
 
-	if ((ifa = inet_alloc_ifa()) == NULL)
-		goto out;
+	ifa = inet_alloc_ifa();
+	if (ifa == NULL) {
+		/*
+		 * A potential indev allocation can be left alive, it stays
+		 * assigned to its device and is destroy with it.
+		 */
+		err = -ENOBUFS;
+		goto errout;
+	}
+
+	in_dev_hold(in_dev);
+
+	if (tb[IFA_ADDRESS] == NULL)
+		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 
-	if (!rta[IFA_ADDRESS - 1])
-		rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1];
-	memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4);
-	memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4);
 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
-	if (rta[IFA_BROADCAST - 1])
-		memcpy(&ifa->ifa_broadcast,
-		       RTA_DATA(rta[IFA_BROADCAST - 1]), 4);
-	if (rta[IFA_ANYCAST - 1])
-		memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4);
 	ifa->ifa_flags = ifm->ifa_flags;
 	ifa->ifa_scope = ifm->ifa_scope;
-	in_dev_hold(in_dev);
-	ifa->ifa_dev   = in_dev;
-	if (rta[IFA_LABEL - 1])
-		rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ);
+	ifa->ifa_dev = in_dev;
+
+	ifa->ifa_local = nla_get_u32(tb[IFA_LOCAL]);
+	ifa->ifa_address = nla_get_u32(tb[IFA_ADDRESS]);
+
+	if (tb[IFA_BROADCAST])
+		ifa->ifa_broadcast = nla_get_u32(tb[IFA_BROADCAST]);
+
+	if (tb[IFA_ANYCAST])
+		ifa->ifa_anycast = nla_get_u32(tb[IFA_ANYCAST]);
+
+	if (tb[IFA_LABEL])
+		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 	else
 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 
-	rc = inet_insert_ifa(ifa);
-out:
-	return rc;
+	return ifa;
+
+errout:
+	return ERR_PTR(err);
+}
+
+static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct in_ifaddr *ifa;
+
+	ASSERT_RTNL();
+
+	ifa = rtm_to_ifaddr(nlh);
+	if (IS_ERR(ifa))
+		return PTR_ERR(ifa);
+
+	return inet_insert_ifa(ifa);
 }
 
 /*
-- 
GitLab


From dfdd5fd4e93d98e06be9ac9db84e3b98c6c26706 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 23:04:17 -0700
Subject: [PATCH 0403/1063] [IPV4]: Convert address deletion to new netlink api

Fixes various unvalidated netlink attributes causing
memory corruptions when left empty by userspace.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 44 +++++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6b297c8697e60..309640e9ede1f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -430,34 +430,48 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix,
 
 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct rtattr **rta = arg;
+	struct nlattr *tb[IFA_MAX+1];
 	struct in_device *in_dev;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct ifaddrmsg *ifm;
 	struct in_ifaddr *ifa, **ifap;
+	int err = -EINVAL;
 
 	ASSERT_RTNL();
 
-	if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL)
-		goto out;
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+	if (err < 0)
+		goto errout;
+
+	ifm = nlmsg_data(nlh);
+	in_dev = inetdev_by_index(ifm->ifa_index);
+	if (in_dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
+
 	__in_dev_put(in_dev);
 
 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 	     ifap = &ifa->ifa_next) {
-		if ((rta[IFA_LOCAL - 1] &&
-		     memcmp(RTA_DATA(rta[IFA_LOCAL - 1]),
-			    &ifa->ifa_local, 4)) ||
-		    (rta[IFA_LABEL - 1] &&
-		     rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) ||
-		    (rta[IFA_ADDRESS - 1] &&
-		     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
-		      !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]),
-			      	      ifa))))
+		if (tb[IFA_LOCAL] &&
+		    ifa->ifa_local != nla_get_u32(tb[IFA_LOCAL]))
 			continue;
+
+		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
+			continue;
+
+		if (tb[IFA_ADDRESS] &&
+		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
+		    !inet_ifa_match(nla_get_u32(tb[IFA_ADDRESS]), ifa)))
+			continue;
+
 		inet_del_ifa(in_dev, ifap, 1);
 		return 0;
 	}
-out:
-	return -EADDRNOTAVAIL;
+
+	err = -EADDRNOTAVAIL;
+errout:
+	return err;
 }
 
 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
-- 
GitLab


From 47f68512d2685431f1781830dfcbab31bda87644 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 23:04:36 -0700
Subject: [PATCH 0404/1063] [IPV4]: Convert address dumping to new netlink api

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 42 +++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 309640e9ede1f..80bf5b2ea2e6c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1112,32 +1112,37 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	ifm = nlmsg_data(nlh);
 	ifm->ifa_family = AF_INET;
 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
 	ifm->ifa_scope = ifa->ifa_scope;
 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+
 	if (ifa->ifa_address)
-		RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address);
+		NLA_PUT_U32(skb, IFA_ADDRESS, ifa->ifa_address);
+
 	if (ifa->ifa_local)
-		RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local);
+		NLA_PUT_U32(skb, IFA_LOCAL, ifa->ifa_local);
+
 	if (ifa->ifa_broadcast)
-		RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast);
+		NLA_PUT_U32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
+
 	if (ifa->ifa_anycast)
-		RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast);
+		NLA_PUT_U32(skb, IFA_ANYCAST, ifa->ifa_anycast);
+
 	if (ifa->ifa_label[0])
-		RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
+
+	return nlmsg_end(skb, nlh);
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1185,17 +1190,16 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 
 static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
 {
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128);
-	struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
+	struct sk_buff *skb;
 
-	if (!skb)
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
 		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS);
 	else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
 		kfree_skb(skb);
 		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL);
-	} else {
+	} else
 		netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL);
-	}
 }
 
 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
-- 
GitLab


From 1823730fbc89fadde72a7bb3b7bdf03cc7b8835c Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 23:04:54 -0700
Subject: [PATCH 0405/1063] [IPv4]: Move interface address bits to
 linux/if_addr.h

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_addr.h   | 53 ++++++++++++++++++++++++++++++++++++
 include/linux/rtnetlink.h | 56 ---------------------------------------
 net/core/rtnetlink.c      |  1 +
 net/decnet/dn_dev.c       |  1 +
 net/ipv4/devinet.c        |  1 +
 net/ipv4/fib_frontend.c   |  1 +
 net/ipv6/addrconf.c       |  1 +
 net/ipv6/ndisc.c          |  1 +
 8 files changed, 59 insertions(+), 56 deletions(-)
 create mode 100644 include/linux/if_addr.h

diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h
new file mode 100644
index 0000000000000..e1590454db596
--- /dev/null
+++ b/include/linux/if_addr.h
@@ -0,0 +1,53 @@
+#ifndef __LINUX_IF_ADDR_H
+#define __LINUX_IF_ADDR_H
+
+#include <linux/netlink.h>
+
+struct ifaddrmsg
+{
+	__u8		ifa_family;
+	__u8		ifa_prefixlen;	/* The prefix length		*/
+	__u8		ifa_flags;	/* Flags			*/
+	__u8		ifa_scope;	/* Address scope		*/
+	__u32		ifa_index;	/* Link index			*/
+};
+
+/*
+ * Important comment:
+ * IFA_ADDRESS is prefix address, rather than local interface address.
+ * It makes no difference for normally configured broadcast interfaces,
+ * but for point-to-point IFA_ADDRESS is DESTINATION address,
+ * local address is supplied in IFA_LOCAL attribute.
+ */
+enum
+{
+	IFA_UNSPEC,
+	IFA_ADDRESS,
+	IFA_LOCAL,
+	IFA_LABEL,
+	IFA_BROADCAST,
+	IFA_ANYCAST,
+	IFA_CACHEINFO,
+	IFA_MULTICAST,
+	__IFA_MAX,
+};
+
+#define IFA_MAX (__IFA_MAX - 1)
+
+/* ifa_flags */
+#define IFA_F_SECONDARY		0x01
+#define IFA_F_TEMPORARY		IFA_F_SECONDARY
+
+#define IFA_F_DEPRECATED	0x20
+#define IFA_F_TENTATIVE		0x40
+#define IFA_F_PERMANENT		0x80
+
+struct ifa_cacheinfo
+{
+	__u32	ifa_prefered;
+	__u32	ifa_valid;
+	__u32	cstamp; /* created timestamp, hundredths of seconds */
+	__u32	tstamp; /* updated timestamp, hundredths of seconds */
+};
+
+#endif
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index bf353538ae93d..890c4d4038b61 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -384,62 +384,6 @@ struct rta_session
 };
 
 
-/*********************************************************
- *		Interface address.
- ****/
-
-struct ifaddrmsg
-{
-	unsigned char	ifa_family;
-	unsigned char	ifa_prefixlen;	/* The prefix length		*/
-	unsigned char	ifa_flags;	/* Flags			*/
-	unsigned char	ifa_scope;	/* See above			*/
-	int		ifa_index;	/* Link index			*/
-};
-
-enum
-{
-	IFA_UNSPEC,
-	IFA_ADDRESS,
-	IFA_LOCAL,
-	IFA_LABEL,
-	IFA_BROADCAST,
-	IFA_ANYCAST,
-	IFA_CACHEINFO,
-	IFA_MULTICAST,
-	__IFA_MAX
-};
-
-#define IFA_MAX (__IFA_MAX - 1)
-
-/* ifa_flags */
-
-#define IFA_F_SECONDARY		0x01
-#define IFA_F_TEMPORARY		IFA_F_SECONDARY
-
-#define IFA_F_DEPRECATED	0x20
-#define IFA_F_TENTATIVE		0x40
-#define IFA_F_PERMANENT		0x80
-
-struct ifa_cacheinfo
-{
-	__u32	ifa_prefered;
-	__u32	ifa_valid;
-	__u32	cstamp; /* created timestamp, hundredths of seconds */
-	__u32	tstamp; /* updated timestamp, hundredths of seconds */
-};
-
-
-#define IFA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg))))
-#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg))
-
-/*
-   Important comment:
-   IFA_ADDRESS is prefix address, rather than local interface address.
-   It makes no difference for normally configured broadcast interfaces,
-   but for point-to-point IFA_ADDRESS is DESTINATION address,
-   local address is supplied in IFA_LOCAL attribute.
- */
 
 /**************************************************************
  *		Neighbour discovery.
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index aa7cff2257b1d..35712031e2c33 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/security.h>
 #include <linux/mutex.h>
+#include <linux/if_addr.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 476455fbdb030..632c5a90b5895 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -34,6 +34,7 @@
 #include <linux/seq_file.h>
 #include <linux/timer.h>
 #include <linux/string.h>
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/skbuff.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 80bf5b2ea2e6c..398e7b9ca66b4 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -43,6 +43,7 @@
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/if_addr.h>
 #include <linux/if_ether.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index fe4a53d4d10dc..a83f1aa8034e3 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -32,6 +32,7 @@
 #include <linux/inet.h>
 #include <linux/inetdevice.h>
 #include <linux/netdevice.h>
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ed766eebc0227..c2a4db843e514 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -48,6 +48,7 @@
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/if_arcnet.h>
 #include <linux/if_infiniband.h>
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 67cfc3813c32d..5743e8bffefd5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -62,6 +62,7 @@
 #include <linux/sysctl.h>
 #endif
 
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
-- 
GitLab


From da5e0494c542dddc56a1f1edfd30310ea30f41ff Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 10 Aug 2006 21:17:37 -0700
Subject: [PATCH 0406/1063] [NET]: Convert link modification to new netlink api

Transforms do_setlink() into rtnl_setlink() using the new
netlink api. A warning message printed to the console is
added in the event that a change request fails while part
of the change request has been comitted already. The ioctl()
based nature of net devices makes it almost impossible to
move on to atomic netlink operations without obsoleting
some of the functionality.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 191 ++++++++++++++++++++++---------------------
 1 file changed, 100 insertions(+), 91 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 35712031e2c33..2adc966d981e3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -336,52 +336,69 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c
 	return skb->len;
 }
 
-static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = {
+	[IFLA_IFNAME]		= { .type = NLA_STRING },
+	[IFLA_MAP]		= { .minlen = sizeof(struct rtnl_link_ifmap) },
+	[IFLA_MTU]		= { .type = NLA_U32 },
+	[IFLA_TXQLEN]		= { .type = NLA_U32 },
+	[IFLA_WEIGHT]		= { .type = NLA_U32 },
+	[IFLA_OPERSTATE]	= { .type = NLA_U8 },
+	[IFLA_LINKMODE]		= { .type = NLA_U8 },
+};
+
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct ifinfomsg  *ifm = NLMSG_DATA(nlh);
-	struct rtattr    **ida = arg;
+	struct ifinfomsg *ifm;
 	struct net_device *dev;
-	int err, send_addr_notify = 0;
+	int err, send_addr_notify = 0, modified = 0;
+	struct nlattr *tb[IFLA_MAX+1];
+	char ifname[IFNAMSIZ];
 
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		goto errout;
+
+	if (tb[IFLA_IFNAME] &&
+	    nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ) >= IFNAMSIZ)
+		return -EINVAL;
+
+	err = -EINVAL;
+	ifm = nlmsg_data(nlh);
 	if (ifm->ifi_index >= 0)
 		dev = dev_get_by_index(ifm->ifi_index);
-	else if (ida[IFLA_IFNAME - 1]) {
-		char ifname[IFNAMSIZ];
-
-		if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
-		                   IFNAMSIZ) >= IFNAMSIZ)
-			return -EINVAL;
+	else if (tb[IFLA_IFNAME])
 		dev = dev_get_by_name(ifname);
-	} else
-		return -EINVAL;
+	else
+		goto errout;
 
-	if (!dev)
-		return -ENODEV;
+	if (dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
 
-	err = -EINVAL;
+	if (tb[IFLA_ADDRESS] &&
+	    nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
+		goto errout_dev;
 
-	if (ifm->ifi_flags)
-		dev_change_flags(dev, ifm->ifi_flags);
+	if (tb[IFLA_BROADCAST] &&
+	    nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
+		goto errout_dev;
 
-	if (ida[IFLA_MAP - 1]) {
+	if (tb[IFLA_MAP]) {
 		struct rtnl_link_ifmap *u_map;
 		struct ifmap k_map;
 
 		if (!dev->set_config) {
 			err = -EOPNOTSUPP;
-			goto out;
+			goto errout_dev;
 		}
 
 		if (!netif_device_present(dev)) {
 			err = -ENODEV;
-			goto out;
+			goto errout_dev;
 		}
-		
-		if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map)))
-			goto out;
-
-		u_map = RTA_DATA(ida[IFLA_MAP - 1]);
 
+		u_map = nla_data(tb[IFLA_MAP]);
 		k_map.mem_start = (unsigned long) u_map->mem_start;
 		k_map.mem_end = (unsigned long) u_map->mem_end;
 		k_map.base_addr = (unsigned short) u_map->base_addr;
@@ -390,119 +407,111 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		k_map.port = (unsigned char) u_map->port;
 
 		err = dev->set_config(dev, &k_map);
+		if (err < 0)
+			goto errout_dev;
 
-		if (err)
-			goto out;
+		modified = 1;
 	}
 
-	if (ida[IFLA_ADDRESS - 1]) {
+	if (tb[IFLA_ADDRESS]) {
 		struct sockaddr *sa;
 		int len;
 
 		if (!dev->set_mac_address) {
 			err = -EOPNOTSUPP;
-			goto out;
+			goto errout_dev;
 		}
+
 		if (!netif_device_present(dev)) {
 			err = -ENODEV;
-			goto out;
+			goto errout_dev;
 		}
-		if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
-			goto out;
 
 		len = sizeof(sa_family_t) + dev->addr_len;
 		sa = kmalloc(len, GFP_KERNEL);
 		if (!sa) {
 			err = -ENOMEM;
-			goto out;
+			goto errout_dev;
 		}
 		sa->sa_family = dev->type;
-		memcpy(sa->sa_data, RTA_DATA(ida[IFLA_ADDRESS - 1]),
+		memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
 		       dev->addr_len);
 		err = dev->set_mac_address(dev, sa);
 		kfree(sa);
 		if (err)
-			goto out;
+			goto errout_dev;
 		send_addr_notify = 1;
+		modified = 1;
 	}
 
-	if (ida[IFLA_BROADCAST - 1]) {
-		if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len))
-			goto out;
-		memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]),
-		       dev->addr_len);
-		send_addr_notify = 1;
+	if (tb[IFLA_MTU]) {
+		err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+		if (err < 0)
+			goto errout_dev;
+		modified = 1;
 	}
 
-	if (ida[IFLA_MTU - 1]) {
-		if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-			goto out;
-		err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1])));
-
-		if (err)
-			goto out;
-
+	/*
+	 * Interface selected by interface index but interface
+	 * name provided implies that a name change has been
+	 * requested.
+	 */
+	if (ifm->ifi_index >= 0 && ifname[0]) {
+		err = dev_change_name(dev, ifname);
+		if (err < 0)
+			goto errout_dev;
+		modified = 1;
 	}
 
-	if (ida[IFLA_TXQLEN - 1]) {
-		if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-			goto out;
+#ifdef CONFIG_NET_WIRELESS_RTNETLINK
+	if (tb[IFLA_WIRELESS]) {
+		/* Call Wireless Extensions.
+		 * Various stuff checked in there... */
+		err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
+					     nla_len(tb[IFLA_WIRELESS]));
+		if (err < 0)
+			goto errout_dev;
+	}
+#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
-		dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1]));
+	if (tb[IFLA_BROADCAST]) {
+		nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
+		send_addr_notify = 1;
 	}
 
-	if (ida[IFLA_WEIGHT - 1]) {
-		if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-			goto out;
 
-		dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1]));
-	}
+	if (ifm->ifi_flags)
+		dev_change_flags(dev, ifm->ifi_flags);
 
-	if (ida[IFLA_OPERSTATE - 1]) {
-		if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
-			goto out;
+	if (tb[IFLA_TXQLEN])
+		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
 
-		set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1])));
-	}
+	if (tb[IFLA_WEIGHT])
+		dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
 
-	if (ida[IFLA_LINKMODE - 1]) {
-		if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
-			goto out;
+	if (tb[IFLA_OPERSTATE])
+		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
 
+	if (tb[IFLA_LINKMODE]) {
 		write_lock_bh(&dev_base_lock);
-		dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1]));
+		dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
 		write_unlock_bh(&dev_base_lock);
 	}
 
-	if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) {
-		char ifname[IFNAMSIZ];
-
-		if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
-		                   IFNAMSIZ) >= IFNAMSIZ)
-			goto out;
-		err = dev_change_name(dev, ifname);
-		if (err)
-			goto out;
-	}
-
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-	if (ida[IFLA_WIRELESS - 1]) {
-
-		/* Call Wireless Extensions.
-		 * Various stuff checked in there... */
-		err = wireless_rtnetlink_set(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len);
-		if (err)
-			goto out;
-	}
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
-
 	err = 0;
 
-out:
+errout_dev:
+	if (err < 0 && modified && net_ratelimit())
+		printk(KERN_WARNING "A link change request failed with "
+		       "some changes comitted already. Interface %s may "
+		       "have been left with an inconsistent configuration, "
+		       "please check.\n", dev->name);
+
 	if (send_addr_notify)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 
 	dev_put(dev);
+errout:
 	return err;
 }
 
@@ -753,7 +762,7 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
 					 .doit   = do_getlink,
 #endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 					 .dumpit = rtnetlink_dump_ifinfo },
-	[RTM_SETLINK     - RTM_BASE] = { .doit   = do_setlink		 },
+	[RTM_SETLINK     - RTM_BASE] = { .doit   = rtnl_setlink		 },
 	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
 	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
 	[RTM_NEWNEIGH    - RTM_BASE] = { .doit   = neigh_add		 },
-- 
GitLab


From b60c5115f4abf0b961a18682889798dcfbe6a801 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 23:05:34 -0700
Subject: [PATCH 0407/1063] [NET]: Convert link dumping to new netlink api

Transforms netlink code to dump link tables to use the new
netlink api. Makes rtnl_getlink() available regardless of the
availability of the wireless extensions.

Adding copy_rtnl_link_stats() avoids the structural dependency
of struct rtnl_link_stats on struct net_device_stats and thus
avoids troubles later on.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 281 +++++++++++++++++++++----------------------
 1 file changed, 137 insertions(+), 144 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2adc966d981e3..93ba04fb84442 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -218,41 +218,73 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
 	}
 }
 
-static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
-				 int type, u32 pid, u32 seq, u32 change, 
-				 unsigned int flags)
+static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
+				 struct net_device_stats *b)
 {
-	struct ifinfomsg *r;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
-
-	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
-	r = NLMSG_DATA(nlh);
-	r->ifi_family = AF_UNSPEC;
-	r->__ifi_pad = 0;
-	r->ifi_type = dev->type;
-	r->ifi_index = dev->ifindex;
-	r->ifi_flags = dev_get_flags(dev);
-	r->ifi_change = change;
+	a->rx_packets = b->rx_packets;
+	a->tx_packets = b->tx_packets;
+	a->rx_bytes = b->rx_bytes;
+	a->tx_bytes = b->tx_bytes;
+	a->rx_errors = b->rx_errors;
+	a->tx_errors = b->tx_errors;
+	a->rx_dropped = b->rx_dropped;
+	a->tx_dropped = b->tx_dropped;
+
+	a->multicast = b->multicast;
+	a->collisions = b->collisions;
+
+	a->rx_length_errors = b->rx_length_errors;
+	a->rx_over_errors = b->rx_over_errors;
+	a->rx_crc_errors = b->rx_crc_errors;
+	a->rx_frame_errors = b->rx_frame_errors;
+	a->rx_fifo_errors = b->rx_fifo_errors;
+	a->rx_missed_errors = b->rx_missed_errors;
+
+	a->tx_aborted_errors = b->tx_aborted_errors;
+	a->tx_carrier_errors = b->tx_carrier_errors;
+	a->tx_fifo_errors = b->tx_fifo_errors;
+	a->tx_heartbeat_errors = b->tx_heartbeat_errors;
+	a->tx_window_errors = b->tx_window_errors;
+
+	a->rx_compressed = b->rx_compressed;
+	a->tx_compressed = b->tx_compressed;
+};
 
-	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+			    void *iwbuf, int iwbuflen, int type, u32 pid,
+			    u32 seq, u32 change, unsigned int flags)
+{
+	struct ifinfomsg *ifm;
+	struct nlmsghdr *nlh;
 
-	if (1) {
-		u32 txqlen = dev->tx_queue_len;
-		RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen);
-	}
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
 
-	if (1) {
-		u32 weight = dev->weight;
-		RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight);
-	}
+	ifm = nlmsg_data(nlh);
+	ifm->ifi_family = AF_UNSPEC;
+	ifm->__ifi_pad = 0;
+	ifm->ifi_type = dev->type;
+	ifm->ifi_index = dev->ifindex;
+	ifm->ifi_flags = dev_get_flags(dev);
+	ifm->ifi_change = change;
+
+	NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+	NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
+	NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
+	NLA_PUT_U8(skb, IFLA_OPERSTATE,
+		   netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
+	NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
+	NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+
+	if (dev->ifindex != dev->iflink)
+		NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
+
+	if (dev->master)
+		NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
 
-	if (1) {
-		u8 operstate = netif_running(dev)?dev->operstate:IF_OPER_DOWN;
-		u8 link_mode = dev->link_mode;
-		RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate);
-		RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode);
-	}
+	if (dev->qdisc_sleeping)
+		NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id);
 
 	if (1) {
 		struct rtnl_link_ifmap map = {
@@ -263,58 +295,38 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			.dma         = dev->dma,
 			.port        = dev->if_port,
 		};
-		RTA_PUT(skb, IFLA_MAP, sizeof(map), &map);
+		NLA_PUT(skb, IFLA_MAP, sizeof(map), &map);
 	}
 
 	if (dev->addr_len) {
-		RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
-		RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
-	}
-
-	if (1) {
-		u32 mtu = dev->mtu;
-		RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
-	}
-
-	if (dev->ifindex != dev->iflink) {
-		u32 iflink = dev->iflink;
-		RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink);
-	}
-
-	if (dev->qdisc_sleeping)
-		RTA_PUT(skb, IFLA_QDISC,
-			strlen(dev->qdisc_sleeping->ops->id) + 1,
-			dev->qdisc_sleeping->ops->id);
-	
-	if (dev->master) {
-		u32 master = dev->master->ifindex;
-		RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master);
+		NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+		NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
 	}
 
 	if (dev->get_stats) {
-		unsigned long *stats = (unsigned long*)dev->get_stats(dev);
+		struct net_device_stats *stats = dev->get_stats(dev);
 		if (stats) {
-			struct rtattr  *a;
-			__u32	       *s;
-			int		i;
-			int		n = sizeof(struct rtnl_link_stats)/4;
-
-			a = __RTA_PUT(skb, IFLA_STATS, n*4);
-			s = RTA_DATA(a);
-			for (i=0; i<n; i++)
-				s[i] = stats[i];
+			struct nlattr *attr;
+
+			attr = nla_reserve(skb, IFLA_STATS,
+					   sizeof(struct rtnl_link_stats));
+			if (attr == NULL)
+				goto nla_put_failure;
+
+			copy_rtnl_link_stats(nla_data(attr), stats);
 		}
 	}
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	if (iwbuf)
+		NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
-static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int idx;
 	int s_idx = cb->args[0];
@@ -324,10 +336,9 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c
 	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
 		if (idx < s_idx)
 			continue;
-		if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
-					  NETLINK_CB(cb->skb).pid,
-					  cb->nlh->nlmsg_seq, 0,
-					  NLM_F_MULTI) <= 0)
+		if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK,
+				     NETLINK_CB(cb->skb).pid,
+				     cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
 			break;
 	}
 	read_unlock(&dev_base_lock);
@@ -515,84 +526,69 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	return err;
 }
 
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-static int do_getlink(struct sk_buff *in_skb, struct nlmsghdr* in_nlh, void *arg)
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct ifinfomsg  *ifm = NLMSG_DATA(in_nlh);
-	struct rtattr    **ida = arg;
-	struct net_device *dev;
-	struct ifinfomsg *r;
-	struct nlmsghdr  *nlh;
-	int err = -ENOBUFS;
-	struct sk_buff *skb;
-	unsigned char	 *b;
-	char *iw_buf = NULL;
+	struct ifinfomsg *ifm;
+	struct nlattr *tb[IFLA_MAX+1];
+	struct net_device *dev = NULL;
+	struct sk_buff *nskb;
+	char *iw_buf = NULL, *iw = NULL;
 	int iw_buf_len = 0;
+	int err, payload;
 
-	if (ifm->ifi_index >= 0)
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		goto errout;
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index >= 0) {
 		dev = dev_get_by_index(ifm->ifi_index);
-	else
+		if (dev == NULL)
+			return -ENODEV;
+	} else
 		return -EINVAL;
-	if (!dev)
-		return -ENODEV;
 
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-	if (ida[IFLA_WIRELESS - 1]) {
 
+#ifdef CONFIG_NET_WIRELESS_RTNETLINK
+	if (tb[IFLA_WIRELESS]) {
 		/* Call Wireless Extensions. We need to know the size before
 		 * we can alloc. Various stuff checked in there... */
-		err = wireless_rtnetlink_get(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len, &iw_buf, &iw_buf_len);
-		if (err)
-			goto out;
+		err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
+					     nla_len(tb[IFLA_WIRELESS]),
+					     &iw_buf, &iw_buf_len);
+		if (err < 0)
+			goto errout;
+
+		iw += IW_EV_POINT_OFF;
 	}
 #endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
-	/* Create a skb big enough to include all the data.
-	 * Some requests are way bigger than 4k... Jean II */
-	skb = alloc_skb((NLMSG_LENGTH(sizeof(*r))) + (RTA_SPACE(iw_buf_len)),
-			GFP_KERNEL);
-	if (!skb)
-		goto out;
-	b = skb->tail;
-
-	/* Put in the message the usual good stuff */
-	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, in_nlh->nlmsg_seq,
-			RTM_NEWLINK, sizeof(*r));
-	r = NLMSG_DATA(nlh);
-	r->ifi_family = AF_UNSPEC;
-	r->__ifi_pad = 0;
-	r->ifi_type = dev->type;
-	r->ifi_index = dev->ifindex;
-	r->ifi_flags = dev->flags;
-	r->ifi_change = 0;
-
-	/* Put the wireless payload if it exist */
-	if(iw_buf != NULL)
-		RTA_PUT(skb, IFLA_WIRELESS, iw_buf_len,
-			iw_buf + IW_EV_POINT_OFF);
-
-	nlh->nlmsg_len = skb->tail - b;
-
-	/* Needed ? */
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+	payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) +
+			      nla_total_size(iw_buf_len));
+	nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+	if (nskb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
+
+	err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK,
+			       NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0);
+	if (err <= 0) {
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT);
 	if (err > 0)
 		err = 0;
-out:
-	if(iw_buf != NULL)
-		kfree(iw_buf);
+errout:
+	kfree(iw_buf);
 	dev_put(dev);
-	return err;
 
-rtattr_failure:
-nlmsg_failure:
-	kfree_skb(skb);
-	goto out;
+	return err;
 }
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
-static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int idx;
 	int s_idx = cb->family;
@@ -623,11 +619,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 			       sizeof(struct rtnl_link_ifmap) +
 			       sizeof(struct rtnl_link_stats) + 128);
 
-	skb = alloc_skb(size, GFP_KERNEL);
+	skb = nlmsg_new(size, GFP_KERNEL);
 	if (!skb)
 		return;
 
-	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) {
+	if (rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0) < 0) {
 		kfree_skb(skb);
 		return;
 	}
@@ -757,14 +753,11 @@ static void rtnetlink_rcv(struct sock *sk, int len)
 
 static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
 {
-	[RTM_GETLINK     - RTM_BASE] = {
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-					 .doit   = do_getlink,
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
-					 .dumpit = rtnetlink_dump_ifinfo },
+	[RTM_GETLINK     - RTM_BASE] = { .doit   = rtnl_getlink,
+					 .dumpit = rtnl_dump_ifinfo	 },
 	[RTM_SETLINK     - RTM_BASE] = { .doit   = rtnl_setlink		 },
-	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
-	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
+	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
+	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
 	[RTM_NEWNEIGH    - RTM_BASE] = { .doit   = neigh_add		 },
 	[RTM_DELNEIGH    - RTM_BASE] = { .doit   = neigh_delete		 },
 	[RTM_GETNEIGH    - RTM_BASE] = { .dumpit = neigh_dump_info	 },
@@ -772,7 +765,7 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
 	[RTM_NEWRULE     - RTM_BASE] = { .doit   = fib_nl_newrule	 },
 	[RTM_DELRULE     - RTM_BASE] = { .doit   = fib_nl_delrule	 },
 #endif
-	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
+	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
 	[RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info	 },
 	[RTM_SETNEIGHTBL - RTM_BASE] = { .doit   = neightbl_set		 },
 };
-- 
GitLab


From 0844565fb8a9418f5a860aa480c1aef70319c9a2 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Aug 2006 23:05:56 -0700
Subject: [PATCH 0408/1063] [NET]: Move netlink interface bits to linux/if.h

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if.h        | 129 ++++++++++++++++++++++++++++++++++++
 include/linux/rtnetlink.h | 133 +-------------------------------------
 2 files changed, 130 insertions(+), 132 deletions(-)

diff --git a/include/linux/if.h b/include/linux/if.h
index 374e20ad8b0d0..cd080d7653247 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -212,5 +212,134 @@ struct ifconf
 #define	ifc_buf	ifc_ifcu.ifcu_buf		/* buffer address	*/
 #define	ifc_req	ifc_ifcu.ifcu_req		/* array of structures	*/
 
+/* The struct should be in sync with struct net_device_stats */
+struct rtnl_link_stats
+{
+	__u32	rx_packets;		/* total packets received	*/
+	__u32	tx_packets;		/* total packets transmitted	*/
+	__u32	rx_bytes;		/* total bytes received 	*/
+	__u32	tx_bytes;		/* total bytes transmitted	*/
+	__u32	rx_errors;		/* bad packets received		*/
+	__u32	tx_errors;		/* packet transmit problems	*/
+	__u32	rx_dropped;		/* no space in linux buffers	*/
+	__u32	tx_dropped;		/* no space available in linux	*/
+	__u32	multicast;		/* multicast packets received	*/
+	__u32	collisions;
+
+	/* detailed rx_errors: */
+	__u32	rx_length_errors;
+	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
+	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
+	__u32	rx_frame_errors;	/* recv'd frame alignment error */
+	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
+	__u32	rx_missed_errors;	/* receiver missed packet	*/
+
+	/* detailed tx_errors */
+	__u32	tx_aborted_errors;
+	__u32	tx_carrier_errors;
+	__u32	tx_fifo_errors;
+	__u32	tx_heartbeat_errors;
+	__u32	tx_window_errors;
+
+	/* for cslip etc */
+	__u32	rx_compressed;
+	__u32	tx_compressed;
+};
+
+/* The struct should be in sync with struct ifmap */
+struct rtnl_link_ifmap
+{
+	__u64	mem_start;
+	__u64	mem_end;
+	__u64	base_addr;
+	__u16	irq;
+	__u8	dma;
+	__u8	port;
+};
+
+enum
+{
+	IFLA_UNSPEC,
+	IFLA_ADDRESS,
+	IFLA_BROADCAST,
+	IFLA_IFNAME,
+	IFLA_MTU,
+	IFLA_LINK,
+	IFLA_QDISC,
+	IFLA_STATS,
+	IFLA_COST,
+#define IFLA_COST IFLA_COST
+	IFLA_PRIORITY,
+#define IFLA_PRIORITY IFLA_PRIORITY
+	IFLA_MASTER,
+#define IFLA_MASTER IFLA_MASTER
+	IFLA_WIRELESS,		/* Wireless Extension event - see wireless.h */
+#define IFLA_WIRELESS IFLA_WIRELESS
+	IFLA_PROTINFO,		/* Protocol specific information for a link */
+#define IFLA_PROTINFO IFLA_PROTINFO
+	IFLA_TXQLEN,
+#define IFLA_TXQLEN IFLA_TXQLEN
+	IFLA_MAP,
+#define IFLA_MAP IFLA_MAP
+	IFLA_WEIGHT,
+#define IFLA_WEIGHT IFLA_WEIGHT
+	IFLA_OPERSTATE,
+	IFLA_LINKMODE,
+	__IFLA_MAX
+};
+
+
+#define IFLA_MAX (__IFLA_MAX - 1)
+
+/* ifi_flags.
+
+   IFF_* flags.
+
+   The only change is:
+   IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
+   more not changeable by user. They describe link media
+   characteristics and set by device driver.
+
+   Comments:
+   - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
+   - If neither of these three flags are set;
+     the interface is NBMA.
+
+   - IFF_MULTICAST does not mean anything special:
+   multicasts can be used on all not-NBMA links.
+   IFF_MULTICAST means that this media uses special encapsulation
+   for multicast frames. Apparently, all IFF_POINTOPOINT and
+   IFF_BROADCAST devices are able to use multicasts too.
+ */
+
+/* IFLA_LINK.
+   For usual devices it is equal ifi_index.
+   If it is a "virtual interface" (f.e. tunnel), ifi_link
+   can point to real physical interface (f.e. for bandwidth calculations),
+   or maybe 0, what means, that real media is unknown (usual
+   for IPIP tunnels, when route to endpoint is allowed to change)
+ */
+
+/* Subtype attributes for IFLA_PROTINFO */
+enum
+{
+	IFLA_INET6_UNSPEC,
+	IFLA_INET6_FLAGS,	/* link flags			*/
+	IFLA_INET6_CONF,	/* sysctl parameters		*/
+	IFLA_INET6_STATS,	/* statistics			*/
+	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
+	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
+	__IFLA_INET6_MAX
+};
+
+#define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
+
+struct ifla_cacheinfo
+{
+	__u32	max_reasm_len;
+	__u32	tstamp;		/* ipv6InterfaceTable updated timestamp */
+	__u32	reachable_time;
+	__u32	retrans_time;
+};
 
 #endif /* _LINUX_IF_H */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 890c4d4038b61..84f3eb426da20 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -2,6 +2,7 @@
 #define __LINUX_RTNETLINK_H
 
 #include <linux/netlink.h>
+#include <linux/if.h>
 
 /****
  *		Routing/neighbour discovery messages.
@@ -607,138 +608,6 @@ struct prefix_cacheinfo
 	__u32	valid_time;
 };
 
-/* The struct should be in sync with struct net_device_stats */
-struct rtnl_link_stats
-{
-	__u32	rx_packets;		/* total packets received	*/
-	__u32	tx_packets;		/* total packets transmitted	*/
-	__u32	rx_bytes;		/* total bytes received 	*/
-	__u32	tx_bytes;		/* total bytes transmitted	*/
-	__u32	rx_errors;		/* bad packets received		*/
-	__u32	tx_errors;		/* packet transmit problems	*/
-	__u32	rx_dropped;		/* no space in linux buffers	*/
-	__u32	tx_dropped;		/* no space available in linux	*/
-	__u32	multicast;		/* multicast packets received	*/
-	__u32	collisions;
-
-	/* detailed rx_errors: */
-	__u32	rx_length_errors;
-	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u32	rx_frame_errors;	/* recv'd frame alignment error */
-	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u32	rx_missed_errors;	/* receiver missed packet	*/
-
-	/* detailed tx_errors */
-	__u32	tx_aborted_errors;
-	__u32	tx_carrier_errors;
-	__u32	tx_fifo_errors;
-	__u32	tx_heartbeat_errors;
-	__u32	tx_window_errors;
-	
-	/* for cslip etc */
-	__u32	rx_compressed;
-	__u32	tx_compressed;
-};
-
-/* The struct should be in sync with struct ifmap */
-struct rtnl_link_ifmap
-{
-	__u64	mem_start;
-	__u64	mem_end;
-	__u64	base_addr;
-	__u16	irq;
-	__u8	dma;
-	__u8	port;
-};
-
-enum
-{
-	IFLA_UNSPEC,
-	IFLA_ADDRESS,
-	IFLA_BROADCAST,
-	IFLA_IFNAME,
-	IFLA_MTU,
-	IFLA_LINK,
-	IFLA_QDISC,
-	IFLA_STATS,
-	IFLA_COST,
-#define IFLA_COST IFLA_COST
-	IFLA_PRIORITY,
-#define IFLA_PRIORITY IFLA_PRIORITY
-	IFLA_MASTER,
-#define IFLA_MASTER IFLA_MASTER
-	IFLA_WIRELESS,		/* Wireless Extension event - see wireless.h */
-#define IFLA_WIRELESS IFLA_WIRELESS
-	IFLA_PROTINFO,		/* Protocol specific information for a link */
-#define IFLA_PROTINFO IFLA_PROTINFO
-	IFLA_TXQLEN,
-#define IFLA_TXQLEN IFLA_TXQLEN
-	IFLA_MAP,
-#define IFLA_MAP IFLA_MAP
-	IFLA_WEIGHT,
-#define IFLA_WEIGHT IFLA_WEIGHT
-	IFLA_OPERSTATE,
-	IFLA_LINKMODE,
-	__IFLA_MAX
-};
-
-
-#define IFLA_MAX (__IFLA_MAX - 1)
-
-#define IFLA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
-#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
-
-/* ifi_flags.
-
-   IFF_* flags.
-
-   The only change is:
-   IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
-   more not changeable by user. They describe link media
-   characteristics and set by device driver.
-
-   Comments:
-   - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
-   - If neither of these three flags are set;
-     the interface is NBMA.
-
-   - IFF_MULTICAST does not mean anything special:
-   multicasts can be used on all not-NBMA links.
-   IFF_MULTICAST means that this media uses special encapsulation
-   for multicast frames. Apparently, all IFF_POINTOPOINT and
-   IFF_BROADCAST devices are able to use multicasts too.
- */
-
-/* IFLA_LINK.
-   For usual devices it is equal ifi_index.
-   If it is a "virtual interface" (f.e. tunnel), ifi_link
-   can point to real physical interface (f.e. for bandwidth calculations),
-   or maybe 0, what means, that real media is unknown (usual
-   for IPIP tunnels, when route to endpoint is allowed to change)
- */
-
-/* Subtype attributes for IFLA_PROTINFO */
-enum
-{
-	IFLA_INET6_UNSPEC,
-	IFLA_INET6_FLAGS,	/* link flags			*/
-	IFLA_INET6_CONF,	/* sysctl parameters		*/
-	IFLA_INET6_STATS,	/* statistics			*/
-	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
-	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
-	__IFLA_INET6_MAX
-};
-
-#define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
-
-struct ifla_cacheinfo
-{
-	__u32	max_reasm_len;
-	__u32	tstamp;		/* ipv6InterfaceTable updated timestamp */
-	__u32	reachable_time;
-	__u32	retrans_time;
-};
 
 /*****************************************************************
  *		Traffic control messages.
-- 
GitLab


From 8584d6df39db5601965f9bc5e3bf2fea833ad7bb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Aug 2006 00:56:16 -0700
Subject: [PATCH 0409/1063] [NETFILTER]: netbios conntrack: fix compile

Fix compile breakage caused by move of IFA_F_SECONDARY to new header
file.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_netbios_ns.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index a566a81325b2d..3d0b438783db0 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -21,6 +21,7 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
+#include <linux/if_addr.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/route.h>
-- 
GitLab


From 84fa7933a33f806bbbaae6775e87459b1ec584c0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 29 Aug 2006 16:44:56 -0700
Subject: [PATCH 0410/1063] [NET]: Replace CHECKSUM_HW by
 CHECKSUM_PARTIAL/CHECKSUM_COMPLETE

Replace CHECKSUM_HW by CHECKSUM_PARTIAL (for outgoing packets, whose
checksum still needs to be completed) and CHECKSUM_COMPLETE (for
incoming packets, device supplied full checksum).

Patch originally from Herbert Xu, updated by myself for 2.6.18-rc3.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/he.c                            |  2 +-
 drivers/net/3c59x.c                         |  2 +-
 drivers/net/8139cp.c                        |  6 +++---
 drivers/net/acenic.c                        |  8 ++++----
 drivers/net/bnx2.c                          |  2 +-
 drivers/net/cassini.c                       |  4 ++--
 drivers/net/chelsio/sge.c                   | 10 +++++-----
 drivers/net/dl2k.c                          |  2 +-
 drivers/net/e1000/e1000_main.c              |  8 ++++----
 drivers/net/forcedeth.c                     |  3 ++-
 drivers/net/gianfar.c                       |  2 +-
 drivers/net/hamachi.c                       |  2 +-
 drivers/net/ibm_emac/ibm_emac_core.c        |  2 +-
 drivers/net/ioc3-eth.c                      |  2 +-
 drivers/net/ixgb/ixgb_main.c                |  2 +-
 drivers/net/mv643xx_eth.c                   |  2 +-
 drivers/net/myri10ge/myri10ge.c             |  8 ++++----
 drivers/net/ns83820.c                       |  2 +-
 drivers/net/r8169.c                         |  2 +-
 drivers/net/s2io.c                          |  2 +-
 drivers/net/sk98lin/skge.c                  |  6 +++---
 drivers/net/skge.c                          |  4 ++--
 drivers/net/sky2.c                          |  6 +++---
 drivers/net/starfire.c                      |  6 +++---
 drivers/net/sungem.c                        |  4 ++--
 drivers/net/sunhme.c                        |  6 +++---
 drivers/net/tg3.c                           |  6 +++---
 drivers/net/typhoon.c                       |  2 +-
 drivers/net/via-rhine.c                     |  2 +-
 drivers/net/via-velocity.c                  |  2 +-
 include/linux/netdevice.h                   |  4 ++--
 include/linux/skbuff.h                      | 17 +++++++++--------
 net/core/datagram.c                         |  4 ++--
 net/core/dev.c                              | 12 ++++++------
 net/core/netpoll.c                          |  2 +-
 net/core/skbuff.c                           | 14 +++++++-------
 net/ipv4/icmp.c                             |  2 +-
 net/ipv4/igmp.c                             |  2 +-
 net/ipv4/ip_fragment.c                      |  2 +-
 net/ipv4/ip_gre.c                           |  4 ++--
 net/ipv4/ip_output.c                        |  6 +++---
 net/ipv4/ipvs/ip_vs_proto_tcp.c             |  8 ++++----
 net/ipv4/ipvs/ip_vs_proto_udp.c             |  8 ++++----
 net/ipv4/netfilter.c                        |  2 +-
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c |  3 +--
 net/ipv4/netfilter/ip_conntrack_proto_udp.c |  3 +--
 net/ipv4/netfilter/ip_nat_standalone.c      |  5 +++--
 net/ipv4/netfilter/ip_queue.c               |  6 +++---
 net/ipv4/netfilter/ipt_ECN.c                |  9 +++++----
 net/ipv4/netfilter/ipt_TCPMSS.c             |  5 +++--
 net/ipv4/tcp.c                              |  8 ++++----
 net/ipv4/tcp_ipv4.c                         |  6 +++---
 net/ipv4/tcp_output.c                       | 18 ++++++++----------
 net/ipv4/udp.c                              |  6 +++---
 net/ipv4/xfrm4_output.c                     |  4 ++--
 net/ipv6/exthdrs.c                          |  2 +-
 net/ipv6/icmp.c                             |  2 +-
 net/ipv6/ip6_output.c                       |  2 +-
 net/ipv6/netfilter.c                        |  2 +-
 net/ipv6/netfilter/ip6_queue.c              |  6 +++---
 net/ipv6/netfilter/nf_conntrack_reasm.c     |  6 +++---
 net/ipv6/raw.c                              |  2 +-
 net/ipv6/reassembly.c                       |  6 +++---
 net/ipv6/tcp_ipv6.c                         |  6 +++---
 net/ipv6/udp.c                              |  2 +-
 net/ipv6/xfrm6_output.c                     |  4 ++--
 net/netfilter/nf_conntrack_proto_tcp.c      |  3 +--
 net/netfilter/nf_conntrack_proto_udp.c      |  3 +--
 net/netfilter/nfnetlink_queue.c             |  6 +++---
 net/packet/af_packet.c                      |  2 +-
 net/sched/sch_netem.c                       |  4 ++--
 net/sunrpc/socklib.c                        |  2 +-
 72 files changed, 168 insertions(+), 169 deletions(-)

diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index ffcb9fd31c38f..41e052fecd7fd 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -1912,7 +1912,7 @@ he_service_rbrq(struct he_dev *he_dev, int group)
 				skb->tail = skb->data + skb->len;
 #ifdef USE_CHECKSUM_HW
 				if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) {
-					skb->ip_summed = CHECKSUM_HW;
+					skb->ip_summed = CHECKSUM_COMPLETE;
 					skb->csum = TCP_CKSUM(skb->data,
 							he_vcc->pdu_len);
 				}
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 80e8ca013e448..29dede2eaa850 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -2077,7 +2077,7 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	vp->tx_ring[entry].next = 0;
 #if DO_ZEROCOPY
-	if (skb->ip_summed != CHECKSUM_HW)
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
 			vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
 	else
 			vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum);
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 1428bb7715afd..a48b211c489df 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -813,7 +813,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 
 		if (mss)
 			flags |= LargeSend | ((mss & MSSMask) << MSSShift);
-		else if (skb->ip_summed == CHECKSUM_HW) {
+		else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			const struct iphdr *ip = skb->nh.iph;
 			if (ip->protocol == IPPROTO_TCP)
 				flags |= IPCS | TCPCS;
@@ -867,7 +867,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 			if (mss)
 				ctrl |= LargeSend |
 					((mss & MSSMask) << MSSShift);
-			else if (skb->ip_summed == CHECKSUM_HW) {
+			else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 				if (ip->protocol == IPPROTO_TCP)
 					ctrl |= IPCS | TCPCS;
 				else if (ip->protocol == IPPROTO_UDP)
@@ -898,7 +898,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 		txd->addr = cpu_to_le64(first_mapping);
 		wmb();
 
-		if (skb->ip_summed == CHECKSUM_HW) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			if (ip->protocol == IPPROTO_TCP)
 				txd->opts1 = cpu_to_le32(first_eor | first_len |
 							 FirstFrag | DescOwn |
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 1c01e9b3d07c8..826548644d7b1 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -2040,7 +2040,7 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm)
 		 */
 		if (bd_flags & BD_FLG_TCP_UDP_SUM) {
 			skb->csum = htons(csum);
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 		} else {
 			skb->ip_summed = CHECKSUM_NONE;
 		}
@@ -2511,7 +2511,7 @@ static int ace_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		mapping = ace_map_tx_skb(ap, skb, skb, idx);
 		flagsize = (skb->len << 16) | (BD_FLG_END);
-		if (skb->ip_summed == CHECKSUM_HW)
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flagsize |= BD_FLG_TCP_UDP_SUM;
 #if ACENIC_DO_VLAN
 		if (vlan_tx_tag_present(skb)) {
@@ -2534,7 +2534,7 @@ static int ace_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		mapping = ace_map_tx_skb(ap, skb, NULL, idx);
 		flagsize = (skb_headlen(skb) << 16);
-		if (skb->ip_summed == CHECKSUM_HW)
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flagsize |= BD_FLG_TCP_UDP_SUM;
 #if ACENIC_DO_VLAN
 		if (vlan_tx_tag_present(skb)) {
@@ -2560,7 +2560,7 @@ static int ace_start_xmit(struct sk_buff *skb, struct net_device *dev)
 					       PCI_DMA_TODEVICE);
 
 			flagsize = (frag->size << 16);
-			if (skb->ip_summed == CHECKSUM_HW)
+			if (skb->ip_summed == CHECKSUM_PARTIAL)
 				flagsize |= BD_FLG_TCP_UDP_SUM;
 			idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
 
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 652eb05a6c2df..7857b46301244 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4423,7 +4423,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	ring_prod = TX_RING_IDX(prod);
 
 	vlan_tag_flags = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		vlan_tag_flags |= TX_BD_FLAGS_TCP_UDP_CKSUM;
 	}
 
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index a31544ccb3c49..558fdb8ad2dc2 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2167,7 +2167,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 			cas_page_unmap(addr);
 	}
 	skb->csum = ntohs(i ^ 0xffff);
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_COMPLETE;
 	skb->protocol = eth_type_trans(skb, cp->dev);
 	return len;
 }
@@ -2821,7 +2821,7 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
 	}
 
 	ctrl = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u64 csum_start_off, csum_stuff_off;
 
 		csum_start_off = (u64) (skb->h.raw - skb->data);
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 61b3754f50ff6..ddd0bdb498f4f 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1470,9 +1470,9 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 
 		if (!(adapter->flags & UDP_CSUM_CAPABLE) &&
-		    skb->ip_summed == CHECKSUM_HW &&
+		    skb->ip_summed == CHECKSUM_PARTIAL &&
 		    skb->nh.iph->protocol == IPPROTO_UDP)
-			if (unlikely(skb_checksum_help(skb, 0))) {
+			if (unlikely(skb_checksum_help(skb))) {
 				dev_kfree_skb_any(skb);
 				return NETDEV_TX_OK;
 			}
@@ -1495,11 +1495,11 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		cpl = (struct cpl_tx_pkt *)__skb_push(skb, sizeof(*cpl));
 		cpl->opcode = CPL_TX_PKT;
 		cpl->ip_csum_dis = 1;    /* SW calculates IP csum */
-		cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_HW ? 0 : 1;
+		cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_PARTIAL ? 0 : 1;
 		/* the length field isn't used so don't bother setting it */
 
-		st->tx_cso += (skb->ip_summed == CHECKSUM_HW);
-		sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_HW);
+		st->tx_cso += (skb->ip_summed == CHECKSUM_PARTIAL);
+		sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_PARTIAL);
 		sge->stats.tx_reg_pkts++;
 	}
 	cpl->iff = dev->if_port;
diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c
index 402961e68c89f..b74e67654764f 100644
--- a/drivers/net/dl2k.c
+++ b/drivers/net/dl2k.c
@@ -611,7 +611,7 @@ start_xmit (struct sk_buff *skb, struct net_device *dev)
 	txdesc = &np->tx_ring[entry];
 
 #if 0
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		txdesc->status |=
 		    cpu_to_le64 (TCPChecksumEnable | UDPChecksumEnable |
 				 IPChecksumEnable);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 98ef9f85482f0..2ab9f96f5dab8 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2600,7 +2600,7 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 	unsigned int i;
 	uint8_t css;
 
-	if (likely(skb->ip_summed == CHECKSUM_HW)) {
+	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		css = skb->h.raw - skb->data;
 
 		i = tx_ring->next_to_use;
@@ -2927,11 +2927,11 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	/* reserve a descriptor for the offload context */
-	if ((mss) || (skb->ip_summed == CHECKSUM_HW))
+	if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL))
 		count++;
 	count++;
 #else
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		count++;
 #endif
 
@@ -3608,7 +3608,7 @@ e1000_rx_checksum(struct e1000_adapter *adapter,
 		 */
 		csum = ntohl(csum ^ 0xFFFF);
 		skb->csum = csum;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 	}
 	adapter->hw_csum_good++;
 }
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 11b8f1b43dd59..32cacf115f754 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1503,7 +1503,8 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
 	else
 #endif
-	tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
+	tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ?
+			 NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0;
 
 	/* vlan tag */
 	if (np->vlangrp && vlan_tx_tag_present(skb)) {
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index ebbbd6ca6204e..ba960913c034f 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -947,7 +947,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* Set up checksumming */
 	if (likely((dev->features & NETIF_F_IP_CSUM)
-			&& (CHECKSUM_HW == skb->ip_summed))) {
+			&& (CHECKSUM_PARTIAL == skb->ip_summed))) {
 		fcb = gfar_add_fcb(skb, txbdp);
 		status |= TXBD_TOE;
 		gfar_tx_checksum(skb, fcb);
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index 409c6aab0411c..763373ae96660 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1648,7 +1648,7 @@ static int hamachi_rx(struct net_device *dev)
 						* could do the pseudo myself and return
 						* CHECKSUM_UNNECESSARY
 						*/
-						skb->ip_summed = CHECKSUM_HW;
+						skb->ip_summed = CHECKSUM_COMPLETE;
 					}
 				}	
 			}
diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c
index 82468e2dc7991..57e214d85e9ac 100644
--- a/drivers/net/ibm_emac/ibm_emac_core.c
+++ b/drivers/net/ibm_emac/ibm_emac_core.c
@@ -1036,7 +1036,7 @@ static inline u16 emac_tx_csum(struct ocp_enet_private *dev,
 			       struct sk_buff *skb)
 {
 #if defined(CONFIG_IBM_EMAC_TAH)
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		++dev->stats.tx_packets_csum;
 		return EMAC_TX_CTRL_TAH_CSUM;
 	}
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 68d8af7df08e3..65f897ddb9205 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1387,7 +1387,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * MAC header which should not be summed and the TCP/UDP pseudo headers
 	 * manually.
 	 */
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		int proto = ntohs(skb->nh.iph->protocol);
 		unsigned int csoff;
 		struct iphdr *ih = skb->nh.iph;
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 7bbd447289b58..9405b44f32148 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1232,7 +1232,7 @@ ixgb_tx_csum(struct ixgb_adapter *adapter, struct sk_buff *skb)
 	unsigned int i;
 	uint8_t css, cso;
 
-	if(likely(skb->ip_summed == CHECKSUM_HW)) {
+	if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		css = skb->h.raw - skb->data;
 		cso = (skb->h.raw + skb->csum) - skb->data;
 
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index eeab1df5bef3f..38df58fdb358e 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1147,7 +1147,7 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp,
 	desc->byte_cnt = length;
 	desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE);
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		BUG_ON(skb->protocol != ETH_P_IP);
 
 		cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM |
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 9bdd43ab3573a..9f16681d0e7ee 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -930,7 +930,7 @@ static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, u16 hw_csum)
 	    (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) ||
 	     vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) {
 		skb->csum = hw_csum;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 	}
 }
 
@@ -973,7 +973,7 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
 		if ((skb->protocol == ntohs(ETH_P_IP)) ||
 		    (skb->protocol == ntohs(ETH_P_IPV6))) {
 			skb->csum = ntohs((u16) csum);
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 		} else
 			myri10ge_vlan_ip_csum(skb, ntohs((u16) csum));
 	}
@@ -1897,13 +1897,13 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev)
 	pseudo_hdr_offset = 0;
 	odd_flag = 0;
 	flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
-	if (likely(skb->ip_summed == CHECKSUM_HW)) {
+	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		cksum_offset = (skb->h.raw - skb->data);
 		pseudo_hdr_offset = (skb->h.raw + skb->csum) - skb->data;
 		/* If the headers are excessively large, then we must
 		 * fall back to a software checksum */
 		if (unlikely(cksum_offset > 255 || pseudo_hdr_offset > 127)) {
-			if (skb_checksum_help(skb, 0))
+			if (skb_checksum_help(skb))
 				goto drop;
 			cksum_offset = 0;
 			pseudo_hdr_offset = 0;
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 0e76859c90a2a..5143f5dbb2e53 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -1153,7 +1153,7 @@ static int ns83820_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	if (!nr_frags)
 		frag = NULL;
 	extsts = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		extsts |= EXTSTS_IPPKT;
 		if (IPPROTO_TCP == skb->nh.iph->protocol)
 			extsts |= EXTSTS_TCPPKT;
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 4c2f575faad70..d9b960aa9b0db 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2169,7 +2169,7 @@ static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev)
 		if (mss)
 			return LargeSend | ((mss & MSSMask) << MSSShift);
 	}
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		const struct iphdr *ip = skb->nh.iph;
 
 		if (ip->protocol == IPPROTO_TCP)
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index e72e0e099060f..5b3713f622d72 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -3893,7 +3893,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 		txdp->Control_1 |= TXD_TCP_LSO_MSS(s2io_tcp_mss(skb));
 	}
 #endif
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		txdp->Control_2 |=
 		    (TXD_TX_CKO_IPV4_EN | TXD_TX_CKO_TCP_EN |
 		     TXD_TX_CKO_UDP_EN);
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index ee62845d3ac9d..eb3b35180c2fa 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -1559,7 +1559,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 	pTxd->VDataHigh = (SK_U32) (PhysAddr >> 32);
 	pTxd->pMBuf     = pMessage;
 
-	if (pMessage->ip_summed == CHECKSUM_HW) {
+	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
 		u16 hdrlen = pMessage->h.raw - pMessage->data;
 		u16 offset = hdrlen + pMessage->csum;
 
@@ -1678,7 +1678,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 	/* 
 	** Does the HW need to evaluate checksum for TCP or UDP packets? 
 	*/
-	if (pMessage->ip_summed == CHECKSUM_HW) {
+	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
 		u16 hdrlen = pMessage->h.raw - pMessage->data;
 		u16 offset = hdrlen + pMessage->csum;
 
@@ -2158,7 +2158,7 @@ SK_U64			PhysAddr;
 
 #ifdef USE_SK_RX_CHECKSUM
 		pMsg->csum = pRxd->TcpSums & 0xffff;
-		pMsg->ip_summed = CHECKSUM_HW;
+		pMsg->ip_summed = CHECKSUM_COMPLETE;
 #else
 		pMsg->ip_summed = CHECKSUM_NONE;
 #endif
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index ad878dfddef46..b3d6fa3d6df4b 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2338,7 +2338,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	td->dma_lo = map;
 	td->dma_hi = map >> 32;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		int offset = skb->h.raw - skb->data;
 
 		/* This seems backwards, but it is what the sk98lin
@@ -2642,7 +2642,7 @@ static inline struct sk_buff *skge_rx_get(struct skge_port *skge,
 	skb->dev = skge->netdev;
 	if (skge->rx_csum) {
 		skb->csum = csum;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 	}
 
 	skb->protocol = eth_type_trans(skb, skge->netdev);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 933e87f1cc687..8e92566b587ee 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1163,7 +1163,7 @@ static unsigned tx_le_req(const struct sk_buff *skb)
 	if (skb_is_gso(skb))
 		++count;
 
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		++count;
 
 	return count;
@@ -1272,7 +1272,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 #endif
 
 	/* Handle TCP checksum offload */
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u16 hdr = skb->h.raw - skb->data;
 		u16 offset = hdr + skb->csum;
 
@@ -2000,7 +2000,7 @@ static int sky2_status_intr(struct sky2_hw *hw, int to_do)
 #endif
 		case OP_RXCHKS:
 			skb = sky2->rx_ring[sky2->rx_next].skb;
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 			skb->csum = le16_to_cpu(status);
 			break;
 
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index c0a62b00ffc89..2607aa51d8e03 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -1230,7 +1230,7 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev)
 	}
 
 #if defined(ZEROCOPY) && defined(HAS_BROKEN_FIRMWARE)
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		if (skb_padto(skb, (skb->len + PADDING_MASK) & ~PADDING_MASK))
 			return NETDEV_TX_OK;
 	}
@@ -1252,7 +1252,7 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev)
 				status |= TxDescIntr;
 				np->reap_tx = 0;
 			}
-			if (skb->ip_summed == CHECKSUM_HW) {
+			if (skb->ip_summed == CHECKSUM_PARTIAL) {
 				status |= TxCalTCP;
 				np->stats.tx_compressed++;
 			}
@@ -1499,7 +1499,7 @@ static int __netdev_rx(struct net_device *dev, int *quota)
 		 * Until then, the printk stays. :-) -Ion
 		 */
 		else if (le16_to_cpu(desc->status2) & 0x0040) {
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 			skb->csum = le16_to_cpu(desc->csum);
 			printk(KERN_DEBUG "%s: checksum_hw, status2 = %#x\n", dev->name, le16_to_cpu(desc->status2));
 		}
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index d7b1d1882cab5..b388651b78361 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -855,7 +855,7 @@ static int gem_rx(struct gem *gp, int work_to_do)
 		}
 
 		skb->csum = ntohs((status & RXDCTRL_TCPCSUM) ^ 0xffff);
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 		skb->protocol = eth_type_trans(skb, gp->dev);
 
 		netif_receive_skb(skb);
@@ -1026,7 +1026,7 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	unsigned long flags;
 
 	ctrl = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u64 csum_start_off, csum_stuff_off;
 
 		csum_start_off = (u64) (skb->h.raw - skb->data);
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index c6f5bc3c042ff..17981da22730e 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -1207,7 +1207,7 @@ static void happy_meal_transceiver_check(struct happy_meal *hp, void __iomem *tr
  * flags, thus:
  *
  * 	skb->csum = rxd->rx_flags & 0xffff;
- * 	skb->ip_summed = CHECKSUM_HW;
+ * 	skb->ip_summed = CHECKSUM_COMPLETE;
  *
  * before sending off the skb to the protocols, and we are good as gold.
  */
@@ -2074,7 +2074,7 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev)
 
 		/* This card is _fucking_ hot... */
 		skb->csum = ntohs(csum ^ 0xffff);
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 
 		RXD(("len=%d csum=%4x]", len, csum));
 		skb->protocol = eth_type_trans(skb, dev);
@@ -2268,7 +2268,7 @@ static int happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev)
  	u32 tx_flags;
 
 	tx_flags = TXFLAG_OWN;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u32 csum_start_off, csum_stuff_off;
 
 		csum_start_off = (u32) (skb->h.raw - skb->data);
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index eafabb253f08d..6f5d3a38c5822 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3851,11 +3851,11 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		skb->h.th->check = 0;
 
 	}
-	else if (skb->ip_summed == CHECKSUM_HW)
+	else if (skb->ip_summed == CHECKSUM_PARTIAL)
 		base_flags |= TXD_FLAG_TCPUDP_CSUM;
 #else
 	mss = 0;
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		base_flags |= TXD_FLAG_TCPUDP_CSUM;
 #endif
 #if TG3_VLAN_TAG_USED
@@ -3981,7 +3981,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 
 	entry = tp->tx_prod;
 	base_flags = 0;
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		base_flags |= TXD_FLAG_TCPUDP_CSUM;
 #if TG3_TSO_SUPPORT != 0
 	mss = 0;
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 4103c37172f9b..c6e601dc6bbcf 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -830,7 +830,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
 	first_txd->addrHi = (u64)((unsigned long) skb) >> 32;
 	first_txd->processFlags = 0;
 
-	if(skb->ip_summed == CHECKSUM_HW) {
+	if(skb->ip_summed == CHECKSUM_PARTIAL) {
 		/* The 3XP will figure out if this is UDP/TCP */
 		first_txd->processFlags |= TYPHOON_TX_PF_TCP_CHKSUM;
 		first_txd->processFlags |= TYPHOON_TX_PF_UDP_CHKSUM;
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index ae971080e2e43..66547159bfd9b 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -1230,7 +1230,7 @@ static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev)
 	rp->tx_skbuff[entry] = skb;
 
 	if ((rp->quirks & rqRhineI) &&
-	    (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_HW)) {
+	    (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_PARTIAL)) {
 		/* Must use alignment buffer. */
 		if (skb->len > PKT_BUF_SZ) {
 			/* packet too long, drop it */
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index aa9cd92f46b2d..f1e0c746a388e 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -2002,7 +2002,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
 	 *	Handle hardware checksum
 	 */
 	if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM)
-				 && (skb->ip_summed == CHECKSUM_HW)) {
+				 && (skb->ip_summed == CHECKSUM_PARTIAL)) {
 		struct iphdr *ip = skb->nh.iph;
 		if (ip->protocol == IPPROTO_TCP)
 			td_ptr->tdesc1.TCR |= TCR0_TCPCK;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 50a4719512ede..4f2c2b6beb5e1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -976,7 +976,7 @@ extern void		dev_mcast_init(void);
 extern int		netdev_max_backlog;
 extern int		weight_p;
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
-extern int skb_checksum_help(struct sk_buff *skb, int inward);
+extern int skb_checksum_help(struct sk_buff *skb);
 extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features);
 #ifdef CONFIG_BUG
 extern void netdev_rx_csum_fault(struct net_device *dev);
@@ -1012,7 +1012,7 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 {
 	return skb_is_gso(skb) &&
 	       (!skb_gso_ok(skb, dev->features) ||
-		unlikely(skb->ip_summed != CHECKSUM_HW));
+		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
 }
 
 /* On bonding slaves other than the currently active slave, suppress
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 755e9cddac47e..85577a4ffa618 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -34,8 +34,9 @@
 #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
 
 #define CHECKSUM_NONE 0
-#define CHECKSUM_HW 1
+#define CHECKSUM_PARTIAL 1
 #define CHECKSUM_UNNECESSARY 2
+#define CHECKSUM_COMPLETE 3
 
 #define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES - 1)) & \
 				 ~(SMP_CACHE_BYTES - 1))
@@ -56,17 +57,17 @@
  *	      Apparently with secret goal to sell you new device, when you
  *	      will add new protocol to your host. F.e. IPv6. 8)
  *
- *	HW: the most generic way. Device supplied checksum of _all_
+ *	COMPLETE: the most generic way. Device supplied checksum of _all_
  *	    the packet as seen by netif_rx in skb->csum.
  *	    NOTE: Even if device supports only some protocols, but
- *	    is able to produce some skb->csum, it MUST use HW,
+ *	    is able to produce some skb->csum, it MUST use COMPLETE,
  *	    not UNNECESSARY.
  *
  * B. Checksumming on output.
  *
  *	NONE: skb is checksummed by protocol or csum is not required.
  *
- *	HW: device is required to csum packet as seen by hard_start_xmit
+ *	PARTIAL: device is required to csum packet as seen by hard_start_xmit
  *	from skb->h.raw to the end and to record the checksum
  *	at skb->h.raw+skb->csum.
  *
@@ -1261,14 +1262,14 @@ static inline int skb_linearize_cow(struct sk_buff *skb)
  *	@len: length of data pulled
  *
  *	After doing a pull on a received packet, you need to call this to
- *	update the CHECKSUM_HW checksum, or set ip_summed to CHECKSUM_NONE
- *	so that it can be recomputed from scratch.
+ *	update the CHECKSUM_COMPLETE checksum, or set ip_summed to
+ *	CHECKSUM_NONE so that it can be recomputed from scratch.
  */
 
 static inline void skb_postpull_rcsum(struct sk_buff *skb,
 				      const void *start, unsigned int len)
 {
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
 }
 
@@ -1287,7 +1288,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 {
 	if (likely(len >= skb->len))
 		return 0;
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 	return __pskb_trim(skb, len);
 }
diff --git a/net/core/datagram.c b/net/core/datagram.c
index aecddcc304012..f558c61aecc76 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -417,7 +417,7 @@ unsigned int __skb_checksum_complete(struct sk_buff *skb)
 
 	sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
 	if (likely(!sum)) {
-		if (unlikely(skb->ip_summed == CHECKSUM_HW))
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 			netdev_rx_csum_fault(skb->dev);
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
@@ -462,7 +462,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
 			goto fault;
 		if ((unsigned short)csum_fold(csum))
 			goto csum_error;
-		if (unlikely(skb->ip_summed == CHECKSUM_HW))
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 			netdev_rx_csum_fault(skb->dev);
 		iov->iov_len -= chunk;
 		iov->iov_base += chunk;
diff --git a/net/core/dev.c b/net/core/dev.c
index d4a1ec3bded5f..fc82f6f6e1c17 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1166,12 +1166,12 @@ EXPORT_SYMBOL(netif_device_attach);
  * Invalidate hardware checksum when packet is to be mangled, and
  * complete checksum manually on outgoing path.
  */
-int skb_checksum_help(struct sk_buff *skb, int inward)
+int skb_checksum_help(struct sk_buff *skb)
 {
 	unsigned int csum;
 	int ret = 0, offset = skb->h.raw - skb->data;
 
-	if (inward)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		goto out_set_summed;
 
 	if (unlikely(skb_shinfo(skb)->gso_size)) {
@@ -1223,7 +1223,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	skb->mac_len = skb->nh.raw - skb->data;
 	__skb_pull(skb, skb->mac_len);
 
-	if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
+	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 		if (skb_header_cloned(skb) &&
 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 			return ERR_PTR(err);
@@ -1232,7 +1232,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
-			if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
+			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 				err = ptype->gso_send_check(skb);
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
@@ -1444,11 +1444,11 @@ int dev_queue_xmit(struct sk_buff *skb)
 	/* If packet is not checksummed and device does not support
 	 * checksumming for this protocol, complete checksumming here.
 	 */
-	if (skb->ip_summed == CHECKSUM_HW &&
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 	    (!(dev->features & NETIF_F_GEN_CSUM) &&
 	     (!(dev->features & NETIF_F_IP_CSUM) ||
 	      skb->protocol != htons(ETH_P_IP))))
-	      	if (skb_checksum_help(skb, 0))
+	      	if (skb_checksum_help(skb))
 	      		goto out_kfree_skb;
 
 gso:
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 471da451cd487..ead5920c26d64 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -110,7 +110,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 
 	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
 
-	if (skb->ip_summed == CHECKSUM_HW &&
+	if (skb->ip_summed == CHECKSUM_COMPLETE &&
 	    !(u16)csum_fold(csum_add(psum, skb->csum)))
 		return 0;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c54f3664bce5b..8a476f1956e58 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1397,7 +1397,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 	unsigned int csum;
 	long csstart;
 
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		csstart = skb->h.raw - skb->data;
 	else
 		csstart = skb_headlen(skb);
@@ -1411,7 +1411,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 		csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
 					      skb->len - csstart, 0);
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		long csstuff = csstart + skb->csum;
 
 		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
@@ -1898,10 +1898,10 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
  *	@len: length of data pulled
  *
  *	This function performs an skb_pull on the packet and updates
- *	update the CHECKSUM_HW checksum.  It should be used on receive
- *	path processing instead of skb_pull unless you know that the
- *	checksum difference is zero (e.g., a valid IP header) or you
- *	are setting ip_summed to CHECKSUM_NONE.
+ *	update the CHECKSUM_COMPLETE checksum.  It should be used on
+ *	receive path processing instead of skb_pull unless you know
+ *	that the checksum difference is zero (e.g., a valid IP header)
+ *	or you are setting ip_summed to CHECKSUM_NONE.
  */
 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 {
@@ -1994,7 +1994,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		frag = skb_shinfo(nskb)->frags;
 		k = 0;
 
-		nskb->ip_summed = CHECKSUM_HW;
+		nskb->ip_summed = CHECKSUM_PARTIAL;
 		nskb->csum = skb->csum;
 		memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 6ad797c14163e..6d223e5c67410 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -930,7 +930,7 @@ int icmp_rcv(struct sk_buff *skb)
 	ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (!(u16)csum_fold(skb->csum))
 			break;
 		/* fall through */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8e8117c19e4db..7003e763d9700 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -931,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb)
 		goto drop;
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (!(u16)csum_fold(skb->csum))
 			break;
 		/* fall through */
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b84b53a475265..8d7f107c2eefd 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -665,7 +665,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
 			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_HW)
+		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 		atomic_sub(fp->truesize, &ip_frag_mem);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0f9b3a31997be..e66f6ff2e1989 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -576,7 +576,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 
 		if (flags&GRE_CSUM) {
 			switch (skb->ip_summed) {
-			case CHECKSUM_HW:
+			case CHECKSUM_COMPLETE:
 				csum = (u16)csum_fold(skb->csum);
 				if (!csum)
 					break;
@@ -584,7 +584,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 			case CHECKSUM_NONE:
 				skb->csum = 0;
 				csum = __skb_checksum_complete(skb);
-				skb->ip_summed = CHECKSUM_HW;
+				skb->ip_summed = CHECKSUM_COMPLETE;
 			}
 			offset += 4;
 		}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 308bdeac3455a..1b9b6742ef772 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -680,7 +680,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
 {
 	struct iovec *iov = from;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
 			return -EFAULT;
 	} else {
@@ -736,7 +736,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
 		/* initialize protocol header pointer */
 		skb->h.raw = skb->data + fragheaderlen;
 
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
 		sk->sk_sndmsg_off = 0;
 	}
@@ -844,7 +844,7 @@ int ip_append_data(struct sock *sk,
 	    length + fragheaderlen <= mtu &&
 	    rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
 	    !exthdrlen)
-		csummode = CHECKSUM_HW;
+		csummode = CHECKSUM_PARTIAL;
 
 	inet->cork.length += length;
 	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index bc28b1160a3aa..820e8318d10df 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
 				     cp->dport, cp->vport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
 				     cp->vport, cp->dport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 	switch (skb->ip_summed) {
 	case CHECKSUM_NONE:
 		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
 				      skb->len - tcphoff,
 				      skb->nh.iph->protocol, skb->csum)) {
@@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 		}
 		break;
 	default:
-		/* CHECKSUM_UNNECESSARY */
+		/* No need to checksum. */
 		break;
 	}
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 89d9175d8f288..90c8166c0ec12 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
 				     cp->dport, cp->vport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
 				     cp->vport, cp->dport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 		case CHECKSUM_NONE:
 			skb->csum = skb_checksum(skb, udphoff,
 						 skb->len - udphoff, 0);
-		case CHECKSUM_HW:
+		case CHECKSUM_COMPLETE:
 			if (csum_tcpudp_magic(skb->nh.iph->saddr,
 					      skb->nh.iph->daddr,
 					      skb->len - udphoff,
@@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 			}
 			break;
 		default:
-			/* CHECKSUM_UNNECESSARY */
+			/* No need to checksum. */
 			break;
 		}
 	}
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6a9e34b794bc7..f88347de21a99 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 	unsigned int csum = 0;
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN)
 			break;
 		if ((protocol == 0 && !(u16)csum_fold(skb->csum)) ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index fb920e76ec106..9de81ff645d50 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -865,8 +865,7 @@ static int tcp_error(struct sk_buff *skb,
   
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there 
-	 * and moreover root might send raw packets.
+	 * because it is assumed to be correct.
 	 */
 	/* FIXME: Source route IP option packets --RR */
 	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 9b2c16b4d2ffd..e58e52f145536 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -117,8 +117,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
 
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there 
-	 * and moreover root might send raw packets.
+	 * because the checksum is assumed to be correct.
 	 * FIXME: Source route IP option packets --RR */
 	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
 	    nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 17de077a79016..f4f00c816d874 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -111,8 +111,9 @@ ip_nat_fn(unsigned int hooknum,
 		       & htons(IP_MF|IP_OFFSET)));
 
 	/* If we had a hardware checksum before, it's now invalid */
-	if ((*pskb)->ip_summed == CHECKSUM_HW)
-		if (skb_checksum_help(*pskb, (out == NULL)))
+	if ((*pskb)->ip_summed == CHECKSUM_PARTIAL ||
+	    (*pskb)->ip_summed == CHECKSUM_COMPLETE)
+		if (skb_checksum_help(*pskb))
 			return NF_DROP;
 
 	ct = ip_conntrack_get(*pskb, &ctinfo);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 198ac36db8612..276a964ee6cfc 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 		break;
 	
 	case IPQ_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entry->skb,
-		                               entry->info->outdev == NULL))) {
+		if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
+		     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    (*errp = skb_checksum_help(entry->skb))) {
 			read_unlock_bh(&queue_lock);
 			return NULL;
 		}
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4adf5c9d34f52..4ec43f98fe492 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -49,7 +49,7 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 
 /* Return 0 if there was an error. */
 static inline int
-set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
+set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
 	struct tcphdr _tcph, *tcph;
 	u_int16_t diffs[2];
@@ -70,8 +70,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
 		return 0;
 	tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
 
-	if ((*pskb)->ip_summed == CHECKSUM_HW &&
-	    skb_checksum_help(*pskb, inward))
+	if (((*pskb)->ip_summed == CHECKSUM_PARTIAL ||
+	     (*pskb)->ip_summed == CHECKSUM_COMPLETE) &&
+	    skb_checksum_help(*pskb))
 		return 0;
 
 	diffs[0] = ((u_int16_t *)tcph)[6];
@@ -106,7 +107,7 @@ target(struct sk_buff **pskb,
 
 	if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
 	    && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
-		if (!set_ect_tcp(pskb, einfo, (out == NULL)))
+		if (!set_ect_tcp(pskb, einfo))
 			return NF_DROP;
 
 	return IPT_CONTINUE;
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index ef2fe5b3f0d8f..c998dc0fcd159 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -62,8 +62,9 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return NF_DROP;
 
-	if ((*pskb)->ip_summed == CHECKSUM_HW &&
-	    skb_checksum_help(*pskb, out == NULL))
+	if (((*pskb)->ip_summed == CHECKSUM_PARTIAL ||
+	     (*pskb)->ip_summed == CHECKSUM_COMPLETE) &&
+	    skb_checksum_help(*pskb))
 		return NF_DROP;
 
 	iph = (*pskb)->nh.iph;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 934396bb1376f..b0124e69ab383 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -568,7 +568,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 		skb->truesize += copy;
 		sk->sk_wmem_queued += copy;
 		sk->sk_forward_alloc -= copy;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		tp->write_seq += copy;
 		TCP_SKB_CB(skb)->end_seq += copy;
 		skb_shinfo(skb)->gso_segs = 0;
@@ -723,7 +723,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				 * Check whether we can use HW checksum.
 				 */
 				if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
-					skb->ip_summed = CHECKSUM_HW;
+					skb->ip_summed = CHECKSUM_PARTIAL;
 
 				skb_entail(sk, tp, skb);
 				copy = size_goal;
@@ -2205,7 +2205,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 		th->fin = th->psh = 0;
 
 		th->check = ~csum_fold(th->check + delta);
-		if (skb->ip_summed != CHECKSUM_HW)
+		if (skb->ip_summed != CHECKSUM_PARTIAL)
 			th->check = csum_fold(csum_partial(skb->h.raw, thlen,
 							   skb->csum));
 
@@ -2219,7 +2219,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 
 	delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
 	th->check = ~csum_fold(th->check + delta);
-	if (skb->ip_summed != CHECKSUM_HW)
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
 		th->check = csum_fold(csum_partial(skb->h.raw, thlen,
 						   skb->csum));
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 43f6740244f8f..b2aa512a30e9d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -484,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 	struct inet_sock *inet = inet_sk(sk);
 	struct tcphdr *th = skb->h.th;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
 		skb->csum = offsetof(struct tcphdr, check);
 	} else {
@@ -509,7 +509,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 	th->check = 0;
 	th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
 	skb->csum = offsetof(struct tcphdr, check);
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
 }
 
@@ -973,7 +973,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 
 static int tcp_v4_checksum_init(struct sk_buff *skb)
 {
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
 				  skb->nh.iph->daddr, skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4f3ffe1b3b44..9252a50c4b490 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -577,7 +577,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 	TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
 
-	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
+	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
 		/* Copy and checksum data tail into the new buffer. */
 		buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
 						       nsize, 0);
@@ -586,7 +586,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 
 		skb->csum = csum_block_sub(skb->csum, buff->csum, len);
 	} else {
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb_split(skb, buff, len);
 	}
 
@@ -689,7 +689,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 		__pskb_trim_head(skb, len - skb_headlen(skb));
 
 	TCP_SKB_CB(skb)->seq += len;
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_PARTIAL;
 
 	skb->truesize	     -= len;
 	sk->sk_wmem_queued   -= len;
@@ -1062,7 +1062,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 	/* This packet was never sent out yet, so no SACK bits. */
 	TCP_SKB_CB(buff)->sacked = 0;
 
-	buff->ip_summed = skb->ip_summed = CHECKSUM_HW;
+	buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
 	skb_split(skb, buff, len);
 
 	/* Fix up tso_factor for both original and new SKB.  */
@@ -1206,8 +1206,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
 	TCP_SKB_CB(nskb)->sacked = 0;
 	nskb->csum = 0;
-	if (skb->ip_summed == CHECKSUM_HW)
-		nskb->ip_summed = CHECKSUM_HW;
+	nskb->ip_summed = skb->ip_summed;
 
 	len = 0;
 	while (len < probe_size) {
@@ -1231,7 +1230,7 @@ static int tcp_mtu_probe(struct sock *sk)
 			                           ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
 			if (!skb_shinfo(skb)->nr_frags) {
 				skb_pull(skb, copy);
-				if (skb->ip_summed != CHECKSUM_HW)
+				if (skb->ip_summed != CHECKSUM_PARTIAL)
 					skb->csum = csum_partial(skb->data, skb->len, 0);
 			} else {
 				__pskb_trim_head(skb, copy);
@@ -1572,10 +1571,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 
 		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
 
-		if (next_skb->ip_summed == CHECKSUM_HW)
-			skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = next_skb->ip_summed;
 
-		if (skb->ip_summed != CHECKSUM_HW)
+		if (skb->ip_summed != CHECKSUM_PARTIAL)
 			skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
 
 		/* Update sequence range on original skb. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a4d005eccc7f6..87152510980c8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -429,7 +429,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
 		/*
 		 * Only one fragment on the socket.
 		 */
-		if (skb->ip_summed == CHECKSUM_HW) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			skb->csum = offsetof(struct udphdr, check);
 			uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
 					up->len, IPPROTO_UDP, 0);
@@ -448,7 +448,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
 		 * fragments on the socket so that all csums of sk_buffs
 		 * should be together.
 		 */
-		if (skb->ip_summed == CHECKSUM_HW) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			int offset = (unsigned char *)uh - skb->data;
 			skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 
@@ -1088,7 +1088,7 @@ static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
 {
 	if (uh->check == 0) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	} else if (skb->ip_summed == CHECKSUM_HW) {
+	} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d16f863cf6876..4a96a9e3ef3bc 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -48,8 +48,8 @@ static int xfrm4_output_one(struct sk_buff *skb)
 	struct xfrm_state *x = dst->xfrm;
 	int err;
 	
-	if (skb->ip_summed == CHECKSUM_HW) {
-		err = skb_checksum_help(skb, 0);
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		err = skb_checksum_help(skb);
 		if (err)
 			goto error_nolock;
 	}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 86dac106873b7..05afa6b1912b7 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -294,7 +294,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 		hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
 	}
 
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 
 	i = n - --hdr->segments_left;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index dbfce089e916d..1030551076741 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -606,7 +606,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
 
 	/* Perform checksum. */
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
 				     skb->csum))
 			break;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4fb47a2529131..65514f21c1864 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -866,7 +866,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 		/* initialize protocol header pointer */
 		skb->h.raw = skb->data + fragheaderlen;
 
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
 		sk->sk_sndmsg_off = 0;
 	}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 395a417ba9554..580b1aba6722b 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -87,7 +87,7 @@ unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
 	unsigned int csum = 0;
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN)
 			break;
 		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 968a14be0d057..c01c126224e2a 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -206,9 +206,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 		break;
 	
 	case IPQ_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entry->skb,
-		                               entry->info->outdev == NULL))) {
+		if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
+		     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    (*errp = skb_checksum_help(entry->skb))) {
 			read_unlock_bh(&queue_lock);
 			return NULL;
 		}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 00d5583807f76..7a4e4c2e31972 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -408,7 +408,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
  		return -1;
 	}
 
- 	if (skb->ip_summed == CHECKSUM_HW)
+ 	if (skb->ip_summed == CHECKSUM_COMPLETE)
  		skb->csum = csum_sub(skb->csum,
  				     csum_partial(skb->nh.raw,
 						  (u8*)(fhdr + 1) - skb->nh.raw,
@@ -640,7 +640,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
 			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_HW)
+		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 		atomic_sub(fp->truesize, &nf_ct_frag6_mem);
@@ -652,7 +652,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	head->nh.ipv6h->payload_len = htons(payload_len);
 
 	/* Yes, and fold redundant checksum back. 8) */
-	if (head->ip_summed == CHECKSUM_HW)
+	if (head->ip_summed == CHECKSUM_COMPLETE)
 		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
 
 	fq->fragments = NULL;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index d5040e172292e..d4af1cb5e19fe 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -334,7 +334,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 	if (!rp->checksum)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		skb_postpull_rcsum(skb, skb->nh.raw,
 		                   skb->h.raw - skb->nh.raw);
 		if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4e299c69e1c60..a8623d2b0879c 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -433,7 +433,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
  		return;
 	}
 
- 	if (skb->ip_summed == CHECKSUM_HW)
+ 	if (skb->ip_summed == CHECKSUM_COMPLETE)
  		skb->csum = csum_sub(skb->csum,
  				     csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
 
@@ -647,7 +647,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
 			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_HW)
+		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 		atomic_sub(fp->truesize, &ip6_frag_mem);
@@ -662,7 +662,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	*skb_in = head;
 
 	/* Yes, and fold redundant checksum back. 8) */
-	if (head->ip_summed == CHECKSUM_HW)
+	if (head->ip_summed == CHECKSUM_COMPLETE)
 		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
 
 	IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 302786a11cd67..7f1b660493b78 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -545,7 +545,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcphdr *th = skb->h.th;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
 		skb->csum = offsetof(struct tcphdr, check);
 	} else {
@@ -570,7 +570,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 	th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
 				     IPPROTO_TCP, 0);
 	skb->csum = offsetof(struct tcphdr, check);
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
 }
 
@@ -1033,7 +1033,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 static int tcp_v6_checksum_init(struct sk_buff *skb)
 {
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
 				  &skb->nh.ipv6h->daddr,skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 82c7c9cde2a86..780b89f6dfcc3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -475,7 +475,7 @@ static int udpv6_rcv(struct sk_buff **pskb)
 		uh = skb->h.uh;
 	}
 
-	if (skb->ip_summed == CHECKSUM_HW &&
+	if (skb->ip_summed == CHECKSUM_COMPLETE &&
 	    !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index c8c8b44a0f581..6d111743e5083 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -41,8 +41,8 @@ static int xfrm6_output_one(struct sk_buff *skb)
 	struct xfrm_state *x = dst->xfrm;
 	int err;
 	
-	if (skb->ip_summed == CHECKSUM_HW) {
-		err = skb_checksum_help(skb, 0);
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		err = skb_checksum_help(skb);
 		if (err)
 			goto error_nolock;
 	}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index af8adcba23a72..308d2abd7ee58 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -823,8 +823,7 @@ static int tcp_error(struct sk_buff *skb,
   
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there 
-	 * and moreover root might send raw packets.
+	 * because the checksum is assumed to be correct.
 	 */
 	/* FIXME: Source route IP option packets --RR */
 	if (nf_conntrack_checksum &&
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index ae07ebe3ab375..d36e03139e8b5 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -131,8 +131,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff,
 
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there
-	 * and moreover root might send raw packets.
+	 * because the checksum is assumed to be correct.
 	 * FIXME: Source route IP option packets --RR */
 	if (nf_conntrack_checksum &&
 	    ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 49ef41e34c483..eddfbe4441a2a 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -377,9 +377,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		break;
 	
 	case NFQNL_COPY_PACKET:
-		if (entskb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entskb,
-		                               outdev == NULL))) {
+		if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
+		     entskb->ip_summed == CHECKSUM_COMPLETE) &&
+		    (*errp = skb_checksum_help(entskb))) {
 			spin_unlock_bh(&queue->lock);
 			return NULL;
 		}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4172a5235916e..300215bdbf466 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -586,7 +586,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
 			skb_pull(skb, skb->nh.raw - skb->data);
-			if (skb->ip_summed == CHECKSUM_HW)
+			if (skb->ip_summed == CHECKSUM_PARTIAL)
 				status |= TP_STATUS_CSUMNOTREADY;
 		}
 	}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a08ec4c7c55d7..45939bafbdf89 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -192,8 +192,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	 */
 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
 		if (!(skb = skb_unshare(skb, GFP_ATOMIC))
-		    || (skb->ip_summed == CHECKSUM_HW
-			&& skb_checksum_help(skb, 0))) {
+		    || (skb->ip_summed == CHECKSUM_PARTIAL
+			&& skb_checksum_help(skb))) {
 			sch->qstats.drops++;
 			return NET_XMIT_DROP;
 		}
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index eb330d4f66d6c..6f17527b9e695 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -168,7 +168,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 		return -1;
 	if ((unsigned short)csum_fold(desc.csum))
 		return -1;
-	if (unlikely(skb->ip_summed == CHECKSUM_HW))
+	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 		netdev_rx_csum_fault(skb->dev);
 	return 0;
 no_checksum:
-- 
GitLab


From 4cf411de49c65140b3c259748629b561c0d3340f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Aug 2006 00:58:33 -0700
Subject: [PATCH 0411/1063] [NETFILTER]: Get rid of HW checksum invalidation

Update hardware checksums incrementally to avoid breaking GSO.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h                  |  6 +++
 include/linux/netfilter_ipv4/ip_nat.h      |  4 --
 include/linux/netfilter_ipv4/ip_nat_core.h |  8 +--
 net/ipv4/netfilter/ip_nat_core.c           | 52 ++++++++-----------
 net/ipv4/netfilter/ip_nat_helper.c         | 59 +++++++++++++++-------
 net/ipv4/netfilter/ip_nat_proto_gre.c      |  5 +-
 net/ipv4/netfilter/ip_nat_proto_icmp.c     |  8 +--
 net/ipv4/netfilter/ip_nat_proto_tcp.c      |  7 ++-
 net/ipv4/netfilter/ip_nat_proto_udp.c      | 15 ++++--
 net/ipv4/netfilter/ip_nat_standalone.c     | 10 +---
 net/ipv4/netfilter/ipt_ECN.c               | 19 +++----
 net/ipv4/netfilter/ipt_REJECT.c            |  1 +
 net/ipv4/netfilter/ipt_TCPMSS.c            | 39 ++++++--------
 net/netfilter/core.c                       | 22 ++++++++
 14 files changed, 138 insertions(+), 117 deletions(-)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 10168e26a8466..b7e67d1d4382b 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -282,6 +282,12 @@ extern void nf_invalidate_cache(int pf);
    Returns true or false. */
 extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len);
 
+extern u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval,
+				u_int32_t csum);
+extern u_int16_t nf_proto_csum_update(struct sk_buff *skb,
+				      u_int32_t oldval, u_int32_t newval,
+				      u_int16_t csum, int pseudohdr);
+
 struct nf_afinfo {
 	unsigned short	family;
 	unsigned int	(*checksum)(struct sk_buff *skb, unsigned int hook,
diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h
index e9f5ed1d9f680..98f8407e4cb50 100644
--- a/include/linux/netfilter_ipv4/ip_nat.h
+++ b/include/linux/netfilter_ipv4/ip_nat.h
@@ -72,10 +72,6 @@ extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack,
 extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
 			     const struct ip_conntrack *ignored_conntrack);
 
-/* Calculate relative checksum. */
-extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv,
-				    u_int32_t newval,
-				    u_int16_t oldcheck);
 #else  /* !__KERNEL__: iptables wants this to compile. */
 #define ip_nat_multi_range ip_nat_multi_range_compat
 #endif /*__KERNEL__*/
diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h
index 30db23f06b037..60566f9fd7b38 100644
--- a/include/linux/netfilter_ipv4/ip_nat_core.h
+++ b/include/linux/netfilter_ipv4/ip_nat_core.h
@@ -11,8 +11,8 @@ extern unsigned int ip_nat_packet(struct ip_conntrack *ct,
 			       unsigned int hooknum,
 			       struct sk_buff **pskb);
 
-extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
-					 struct ip_conntrack *ct,
-					 enum ip_nat_manip_type manip,
-					 enum ip_conntrack_dir dir);
+extern int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
+					 enum ip_conntrack_info ctinfo,
+					 unsigned int hooknum,
+					 struct sk_buff **pskb);
 #endif /* _IP_NAT_CORE_H */
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1741d555ad0dd..4c540d03d48ee 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -101,18 +101,6 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
 	write_unlock_bh(&ip_nat_lock);
 }
 
-/* We do checksum mangling, so if they were wrong before they're still
- * wrong.  Also works for incomplete packets (eg. ICMP dest
- * unreachables.) */
-u_int16_t
-ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
-	u_int32_t diffs[] = { oldvalinv, newval };
-	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
-				      oldcheck^0xFFFF));
-}
-EXPORT_SYMBOL(ip_nat_cheat_check);
-
 /* Is this tuple already taken? (not by us) */
 int
 ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
@@ -378,12 +366,12 @@ manip_pkt(u_int16_t proto,
 	iph = (void *)(*pskb)->data + iphdroff;
 
 	if (maniptype == IP_NAT_MANIP_SRC) {
-		iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
-						iph->check);
+		iph->check = nf_csum_update(~iph->saddr, target->src.ip,
+					    iph->check);
 		iph->saddr = target->src.ip;
 	} else {
-		iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
-						iph->check);
+		iph->check = nf_csum_update(~iph->daddr, target->dst.ip,
+					    iph->check);
 		iph->daddr = target->dst.ip;
 	}
 	return 1;
@@ -423,10 +411,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct,
 EXPORT_SYMBOL_GPL(ip_nat_packet);
 
 /* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
-				  struct ip_conntrack *ct,
-				  enum ip_nat_manip_type manip,
-				  enum ip_conntrack_dir dir)
+int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
+				  enum ip_conntrack_info ctinfo,
+				  unsigned int hooknum,
+				  struct sk_buff **pskb)
 {
 	struct {
 		struct icmphdr icmp;
@@ -434,7 +422,9 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
 	} *inside;
 	struct ip_conntrack_tuple inner, target;
 	int hdrlen = (*pskb)->nh.iph->ihl * 4;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
+	enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
 
 	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
 		return 0;
@@ -443,12 +433,8 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
 
 	/* We're actually going to mangle it beyond trivial checksum
 	   adjustment, so make sure the current checksum is correct. */
-	if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
-		hdrlen = (*pskb)->nh.iph->ihl * 4;
-		if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
-						(*pskb)->len - hdrlen, 0)))
-			return 0;
-	}
+	if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+		return 0;
 
 	/* Must be RELATED */
 	IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
@@ -487,12 +473,14 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
 		       !manip))
 		return 0;
 
-	/* Reloading "inside" here since manip_pkt inner. */
-	inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
-	inside->icmp.checksum = 0;
-	inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
-						       (*pskb)->len - hdrlen,
-						       0));
+	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+		/* Reloading "inside" here since manip_pkt inner. */
+		inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+		inside->icmp.checksum = 0;
+		inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
+							       (*pskb)->len - hdrlen,
+							       0));
+	}
 
 	/* Change outer to look the reply to an incoming packet
 	 * (proto 0 means don't invert per-proto part). */
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index cbcaa45370ae5..021c3daae3edb 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -165,7 +165,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
 {
 	struct iphdr *iph;
 	struct tcphdr *tcph;
-	int datalen;
+	int oldlen, datalen;
 
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return 0;
@@ -180,13 +180,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
 	iph = (*pskb)->nh.iph;
 	tcph = (void *)iph + iph->ihl*4;
 
+	oldlen = (*pskb)->len - iph->ihl*4;
 	mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
 			match_offset, match_len, rep_buffer, rep_len);
 
 	datalen = (*pskb)->len - iph->ihl*4;
-	tcph->check = 0;
-	tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr,
-				   csum_partial((char *)tcph, datalen, 0));
+	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+		tcph->check = 0;
+		tcph->check = tcp_v4_check(tcph, datalen,
+					   iph->saddr, iph->daddr,
+					   csum_partial((char *)tcph,
+					   		datalen, 0));
+	} else
+		tcph->check = nf_proto_csum_update(*pskb,
+						   htons(oldlen) ^ 0xFFFF,
+						   htons(datalen),
+						   tcph->check, 1);
 
 	if (rep_len != match_len) {
 		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -221,6 +230,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
 {
 	struct iphdr *iph;
 	struct udphdr *udph;
+	int datalen, oldlen;
 
 	/* UDP helpers might accidentally mangle the wrong packet */
 	iph = (*pskb)->nh.iph;
@@ -238,22 +248,32 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
 
 	iph = (*pskb)->nh.iph;
 	udph = (void *)iph + iph->ihl*4;
+
+	oldlen = (*pskb)->len - iph->ihl*4;
 	mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
 			match_offset, match_len, rep_buffer, rep_len);
 
 	/* update the length of the UDP packet */
-	udph->len = htons((*pskb)->len - iph->ihl*4);
+	datalen = (*pskb)->len - iph->ihl*4;
+	udph->len = htons(datalen);
 
 	/* fix udp checksum if udp checksum was previously calculated */
-	if (udph->check) {
-		int datalen = (*pskb)->len - iph->ihl * 4;
+	if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
+		return 1;
+
+	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
 		udph->check = 0;
 		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 		                                datalen, IPPROTO_UDP,
 		                                csum_partial((char *)udph,
 		                                             datalen, 0));
-	}
-
+		if (!udph->check)
+			udph->check = -1;
+	} else
+		udph->check = nf_proto_csum_update(*pskb,
+						   htons(oldlen) ^ 0xFFFF,
+						   htons(datalen),
+						   udph->check, 1);
 	return 1;
 }
 EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
@@ -293,11 +313,14 @@ sack_adjust(struct sk_buff *skb,
 			ntohl(sack->start_seq), new_start_seq,
 			ntohl(sack->end_seq), new_end_seq);
 
-		tcph->check = 
-			ip_nat_cheat_check(~sack->start_seq, new_start_seq,
-					   ip_nat_cheat_check(~sack->end_seq, 
-						   	      new_end_seq,
-							      tcph->check));
+		tcph->check = nf_proto_csum_update(skb,
+						   ~sack->start_seq,
+						   new_start_seq,
+						   tcph->check, 0);
+		tcph->check = nf_proto_csum_update(skb,
+						   ~sack->end_seq,
+						   new_end_seq,
+						   tcph->check, 0);
 		sack->start_seq = new_start_seq;
 		sack->end_seq = new_end_seq;
 		sackoff += sizeof(*sack);
@@ -381,10 +404,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
 		newack = ntohl(tcph->ack_seq) - other_way->offset_before;
 	newack = htonl(newack);
 
-	tcph->check = ip_nat_cheat_check(~tcph->seq, newseq,
-					 ip_nat_cheat_check(~tcph->ack_seq, 
-					 		    newack, 
-							    tcph->check));
+	tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq,
+					   tcph->check, 0);
+	tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack,
+					   tcph->check, 0);
 
 	DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
 		ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 38acfdf540ebe..70a65372225a2 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -130,9 +130,10 @@ gre_manip_pkt(struct sk_buff **pskb,
 			if (greh->csum) {
 				/* FIXME: Never tested this code... */
 				*(gre_csum(greh)) = 
-					ip_nat_cheat_check(~*(gre_key(greh)),
+					nf_proto_csum_update(*pskb,
+							~*(gre_key(greh)),
 							tuple->dst.u.gre.key,
-							*(gre_csum(greh)));
+							*(gre_csum(greh)), 0);
 			}
 			*(gre_key(greh)) = tuple->dst.u.gre.key;
 			break;
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 31a3f4ccb99cc..ec50cc295317f 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb,
 		return 0;
 
 	hdr = (struct icmphdr *)((*pskb)->data + hdroff);
-
-	hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
-					    tuple->src.u.icmp.id,
-					    hdr->checksum);
+	hdr->checksum = nf_proto_csum_update(*pskb,
+					     hdr->un.echo.id ^ 0xFFFF,
+					     tuple->src.u.icmp.id,
+					     hdr->checksum, 0);
 	hdr->un.echo.id = tuple->src.u.icmp.id;
 	return 1;
 }
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index a3d14079eba6e..72a6307bd2db9 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -129,10 +129,9 @@ tcp_manip_pkt(struct sk_buff **pskb,
 	if (hdrsize < sizeof(*hdr))
 		return 1;
 
-	hdr->check = ip_nat_cheat_check(~oldip, newip,
-					ip_nat_cheat_check(oldport ^ 0xFFFF,
-							   newport,
-							   hdr->check));
+	hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1);
+	hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport,
+					  hdr->check, 0);
 	return 1;
 }
 
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index ec6053fdc867d..5da196ae758cf 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb,
 		newport = tuple->dst.u.udp.port;
 		portptr = &hdr->dest;
 	}
-	if (hdr->check) /* 0 is a special case meaning no checksum */
-		hdr->check = ip_nat_cheat_check(~oldip, newip,
-					ip_nat_cheat_check(*portptr ^ 0xFFFF,
-							   newport,
-							   hdr->check));
+
+	if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
+		hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip,
+						  hdr->check, 1);
+		hdr->check = nf_proto_csum_update(*pskb,
+						  *portptr ^ 0xFFFF, newport,
+						  hdr->check, 0);
+		if (!hdr->check)
+			hdr->check = -1;
+	}
 	*portptr = newport;
 	return 1;
 }
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index f4f00c816d874..f3b778355432a 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -110,12 +110,6 @@ ip_nat_fn(unsigned int hooknum,
 	IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
 		       & htons(IP_MF|IP_OFFSET)));
 
-	/* If we had a hardware checksum before, it's now invalid */
-	if ((*pskb)->ip_summed == CHECKSUM_PARTIAL ||
-	    (*pskb)->ip_summed == CHECKSUM_COMPLETE)
-		if (skb_checksum_help(*pskb))
-			return NF_DROP;
-
 	ct = ip_conntrack_get(*pskb, &ctinfo);
 	/* Can't track?  It's not due to stress, or conntrack would
 	   have dropped it.  Hence it's the user's responsibilty to
@@ -146,8 +140,8 @@ ip_nat_fn(unsigned int hooknum,
 	case IP_CT_RELATED:
 	case IP_CT_RELATED+IP_CT_IS_REPLY:
 		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
-			if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype,
-							   CTINFO2DIR(ctinfo)))
+			if (!ip_nat_icmp_reply_translation(ct, ctinfo,
+							   hooknum, pskb))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4ec43f98fe492..35916c74fe4eb 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -52,7 +52,7 @@ static inline int
 set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
 	struct tcphdr _tcph, *tcph;
-	u_int16_t diffs[2];
+	u_int16_t oldval;
 
 	/* Not enought header? */
 	tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
@@ -70,23 +70,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 		return 0;
 	tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
 
-	if (((*pskb)->ip_summed == CHECKSUM_PARTIAL ||
-	     (*pskb)->ip_summed == CHECKSUM_COMPLETE) &&
-	    skb_checksum_help(*pskb))
-		return 0;
-
-	diffs[0] = ((u_int16_t *)tcph)[6];
+	oldval = ((u_int16_t *)tcph)[6];
 	if (einfo->operation & IPT_ECN_OP_SET_ECE)
 		tcph->ece = einfo->proto.tcp.ece;
 	if (einfo->operation & IPT_ECN_OP_SET_CWR)
 		tcph->cwr = einfo->proto.tcp.cwr;
-	diffs[1] = ((u_int16_t *)tcph)[6];
-	diffs[0] = diffs[0] ^ 0xFFFF;
 
-	if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY)
-		tcph->check = csum_fold(csum_partial((char *)diffs,
-						     sizeof(diffs),
-						     tcph->check^0xFFFF));
+	tcph->check = nf_proto_csum_update((*pskb),
+					   oldval ^ 0xFFFF,
+					   ((u_int16_t *)tcph)[6],
+					   tcph->check, 0);
 	return 1;
 }
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 7f905bf2bde57..95c6662b663c5 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -185,6 +185,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	tcph->urg_ptr = 0;
 
 	/* Adjust TCP checksum */
+	nskb->ip_summed = CHECKSUM_NONE;
 	tcph->check = 0;
 	tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
 				   nskb->nh.iph->saddr,
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index c998dc0fcd159..0fce85e055071 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -27,14 +27,6 @@ MODULE_DESCRIPTION("iptables TCP MSS modification module");
 #define DEBUGP(format, args...)
 #endif
 
-static u_int16_t
-cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
-	u_int32_t diffs[] = { oldvalinv, newval };
-	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
-                                      oldcheck^0xFFFF));
-}
-
 static inline unsigned int
 optlen(const u_int8_t *opt, unsigned int offset)
 {
@@ -62,11 +54,6 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return NF_DROP;
 
-	if (((*pskb)->ip_summed == CHECKSUM_PARTIAL ||
-	     (*pskb)->ip_summed == CHECKSUM_COMPLETE) &&
-	    skb_checksum_help(*pskb))
-		return NF_DROP;
-
 	iph = (*pskb)->nh.iph;
 	tcplen = (*pskb)->len - iph->ihl*4;
 
@@ -120,9 +107,10 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 			opt[i+2] = (newmss & 0xff00) >> 8;
 			opt[i+3] = (newmss & 0x00ff);
 
-			tcph->check = cheat_check(htons(oldmss)^0xFFFF,
-						  htons(newmss),
-						  tcph->check);
+			tcph->check = nf_proto_csum_update(*pskb,
+							   htons(oldmss)^0xFFFF,
+							   htons(newmss),
+							   tcph->check, 0);
 
 			DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
 			       "->%u.%u.%u.%u:%hu changed TCP MSS option"
@@ -162,8 +150,10 @@ ipt_tcpmss_target(struct sk_buff **pskb,
  	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
 	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
 
-	tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF,
-				  htons(tcplen + TCPOLEN_MSS), tcph->check);
+	tcph->check = nf_proto_csum_update(*pskb,
+					   htons(tcplen) ^ 0xFFFF,
+				           htons(tcplen + TCPOLEN_MSS),
+					   tcph->check, 1);
 	tcplen += TCPOLEN_MSS;
 
 	opt[0] = TCPOPT_MSS;
@@ -171,16 +161,19 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	opt[2] = (newmss & 0xff00) >> 8;
 	opt[3] = (newmss & 0x00ff);
 
-	tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check);
+	tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt),
+					   tcph->check, 0);
 
 	oldval = ((u_int16_t *)tcph)[6];
 	tcph->doff += TCPOLEN_MSS/4;
-	tcph->check = cheat_check(oldval ^ 0xFFFF,
-				  ((u_int16_t *)tcph)[6], tcph->check);
+	tcph->check = nf_proto_csum_update(*pskb,
+					   oldval ^ 0xFFFF,
+					   ((u_int16_t *)tcph)[6],
+					   tcph->check, 0);
 
 	newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
-	iph->check = cheat_check(iph->tot_len ^ 0xFFFF,
-				 newtotlen, iph->check);
+	iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF,
+				    newtotlen, iph->check);
 	iph->tot_len = newtotlen;
 
 	DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 5d29d5e23624d..27f639f3ac2a5 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -222,6 +222,28 @@ int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
 }
 EXPORT_SYMBOL(skb_make_writable);
 
+u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum)
+{
+	u_int32_t diff[] = { oldval, newval };
+
+	return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum));
+}
+EXPORT_SYMBOL(nf_csum_update);
+
+u_int16_t nf_proto_csum_update(struct sk_buff *skb,
+			       u_int32_t oldval, u_int32_t newval,
+			       u_int16_t csum, int pseudohdr)
+{
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		csum = nf_csum_update(oldval, newval, csum);
+		if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+			skb->csum = nf_csum_update(oldval, newval, skb->csum);
+	} else if (pseudohdr)
+		csum = ~nf_csum_update(oldval, newval, ~csum);
+
+	return csum;
+}
+EXPORT_SYMBOL(nf_proto_csum_update);
 
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
-- 
GitLab


From 394f545db6e7e4d7a6a2fa3f543b755ca39d58ac Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Aug 2006 00:58:52 -0700
Subject: [PATCH 0412/1063] [NETFILTER]: nf_queue: handle GSO packets

Handle GSO packets in nf_queue by segmenting them before queueing to
avoid breaking GSO in case they get mangled.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/core.c         |  2 +-
 net/netfilter/nf_internals.h |  2 +-
 net/netfilter/nf_queue.c     | 80 +++++++++++++++++++++++++++---------
 3 files changed, 62 insertions(+), 22 deletions(-)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 27f639f3ac2a5..d80b935b3a922 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -182,7 +182,7 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
 		ret = -EPERM;
 	} else if ((verdict & NF_VERDICT_MASK)  == NF_QUEUE) {
 		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
-		if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn,
+		if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn,
 			      verdict >> NF_VERDICT_BITS))
 			goto next_hook;
 	}
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 86e392bfe8330..a981971ce1d5f 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -23,7 +23,7 @@ extern unsigned int nf_iterate(struct list_head *head,
 				int hook_thresh);
 
 /* nf_queue.c */
-extern int nf_queue(struct sk_buff **skb, 
+extern int nf_queue(struct sk_buff *skb,
 		    struct list_head *elem, 
 		    int pf, unsigned int hook,
 		    struct net_device *indev,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 662a869593bff..4d8936ed581d2 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -74,13 +74,13 @@ EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
  * Any packet that leaves via this function must come back 
  * through nf_reinject().
  */
-int nf_queue(struct sk_buff **skb, 
-	     struct list_head *elem, 
-	     int pf, unsigned int hook,
-	     struct net_device *indev,
-	     struct net_device *outdev,
-	     int (*okfn)(struct sk_buff *),
-	     unsigned int queuenum)
+static int __nf_queue(struct sk_buff *skb,
+		      struct list_head *elem,
+		      int pf, unsigned int hook,
+		      struct net_device *indev,
+		      struct net_device *outdev,
+		      int (*okfn)(struct sk_buff *),
+		      unsigned int queuenum)
 {
 	int status;
 	struct nf_info *info;
@@ -94,14 +94,14 @@ int nf_queue(struct sk_buff **skb,
 	read_lock(&queue_handler_lock);
 	if (!queue_handler[pf]) {
 		read_unlock(&queue_handler_lock);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 		return 1;
 	}
 
 	afinfo = nf_get_afinfo(pf);
 	if (!afinfo) {
 		read_unlock(&queue_handler_lock);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 		return 1;
 	}
 
@@ -109,9 +109,9 @@ int nf_queue(struct sk_buff **skb,
 	if (!info) {
 		if (net_ratelimit())
 			printk(KERN_ERR "OOM queueing packet %p\n",
-			       *skb);
+			       skb);
 		read_unlock(&queue_handler_lock);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 		return 1;
 	}
 
@@ -130,15 +130,15 @@ int nf_queue(struct sk_buff **skb,
 	if (outdev) dev_hold(outdev);
 
 #ifdef CONFIG_BRIDGE_NETFILTER
-	if ((*skb)->nf_bridge) {
-		physindev = (*skb)->nf_bridge->physindev;
+	if (skb->nf_bridge) {
+		physindev = skb->nf_bridge->physindev;
 		if (physindev) dev_hold(physindev);
-		physoutdev = (*skb)->nf_bridge->physoutdev;
+		physoutdev = skb->nf_bridge->physoutdev;
 		if (physoutdev) dev_hold(physoutdev);
 	}
 #endif
-	afinfo->saveroute(*skb, info);
-	status = queue_handler[pf]->outfn(*skb, info, queuenum,
+	afinfo->saveroute(skb, info);
+	status = queue_handler[pf]->outfn(skb, info, queuenum,
 					  queue_handler[pf]->data);
 
 	read_unlock(&queue_handler_lock);
@@ -153,7 +153,7 @@ int nf_queue(struct sk_buff **skb,
 #endif
 		module_put(info->elem->owner);
 		kfree(info);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 
 		return 1;
 	}
@@ -161,6 +161,46 @@ int nf_queue(struct sk_buff **skb,
 	return 1;
 }
 
+int nf_queue(struct sk_buff *skb,
+	     struct list_head *elem,
+	     int pf, unsigned int hook,
+	     struct net_device *indev,
+	     struct net_device *outdev,
+	     int (*okfn)(struct sk_buff *),
+	     unsigned int queuenum)
+{
+	struct sk_buff *segs;
+
+	if (!skb_is_gso(skb))
+		return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+				  queuenum);
+
+	switch (pf) {
+	case AF_INET:
+		skb->protocol = htons(ETH_P_IP);
+		break;
+	case AF_INET6:
+		skb->protocol = htons(ETH_P_IPV6);
+		break;
+	}
+
+	segs = skb_gso_segment(skb, 0);
+	kfree_skb(skb);
+	if (unlikely(IS_ERR(segs)))
+		return 1;
+
+	do {
+		struct sk_buff *nskb = segs->next;
+
+		segs->next = NULL;
+		if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
+				queuenum))
+			kfree_skb(segs);
+		segs = nskb;
+	} while (segs);
+	return 1;
+}
+
 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 		 unsigned int verdict)
 {
@@ -224,9 +264,9 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 	case NF_STOLEN:
 		break;
 	case NF_QUEUE:
-		if (!nf_queue(&skb, elem, info->pf, info->hook, 
-			      info->indev, info->outdev, info->okfn,
-			      verdict >> NF_VERDICT_BITS))
+		if (!__nf_queue(skb, elem, info->pf, info->hook,
+				info->indev, info->outdev, info->okfn,
+				verdict >> NF_VERDICT_BITS))
 			goto next_hook;
 		break;
 	default:
-- 
GitLab


From d7aba67f814729647c938ac6da2d5224b790f926 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 5 Aug 2006 02:20:42 -0700
Subject: [PATCH 0413/1063] [IPV6]: Fix thinko in rt6_fill_node

This looks like a mistake, the table ID is overwritten again.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 438977e2085df..ff5affe2636cc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1902,7 +1902,6 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 		rtm->rtm_table = rt->rt6i_table->tb6_id;
 	else
 		rtm->rtm_table = RT6_TABLE_UNSPEC;
-	rtm->rtm_table = RT_TABLE_MAIN;
 	if (rt->rt6i_flags&RTF_REJECT)
 		rtm->rtm_type = RTN_UNREACHABLE;
 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
-- 
GitLab


From 6c813a7297e3af4cd7c3458e09e9ee3d161c6830 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 6 Aug 2006 22:22:47 -0700
Subject: [PATCH 0414/1063] [IPV6]: Fix crash in ip6_del_rt

ip6_null_entry doesn't have rt6i_table set, when trying to delete it the
kernel crashes dereferencing table->tb6_lock.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ff5affe2636cc..41c5905d31913 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1223,6 +1223,9 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct
 	int err;
 	struct fib6_table *table;
 
+	if (rt == &ip6_null_entry)
+		return -ENOENT;
+
 	table = rt->rt6i_table;
 	write_lock_bh(&table->tb6_lock);
 
-- 
GitLab


From 3226f6881719e61e00e92b4c85a8ef49aa4d42b1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 6 Aug 2006 22:24:08 -0700
Subject: [PATCH 0415/1063] [IPV6]: Fix policy routing lookup

When the lookup in a table returns ip6_null_entry the policy routing lookup
returns it instead of continuing in the next table, which effectively means
it only searches the local table.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index c3c8195744ee4..94a46ec967a41 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -94,8 +94,10 @@ int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 
 	if (rt != &ip6_null_entry)
 		goto out;
-
 	dst_release(&rt->u.dst);
+	rt = NULL;
+	goto out;
+
 discard_pkt:
 	dst_hold(&rt->u.dst);
 out:
-- 
GitLab


From a14a49d2b7b9290e87751f21f503f1954267d4c4 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 7 Aug 2006 17:53:08 -0700
Subject: [PATCH 0416/1063] [NEIGH]: Convert neighbour deletion to new netlink
 api

Fixes:
  Return ENOENT if the neighbour is not found (was EINVAL)
  Return EAFNOSUPPORT if no table matches the specified
  address family.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 53 ++++++++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index fe2113f54e2b6..39c07cc66ee74 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -30,6 +30,7 @@
 #include <net/dst.h>
 #include <net/sock.h>
 #include <net/netevent.h>
+#include <net/netlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/random.h>
 #include <linux/string.h>
@@ -1440,48 +1441,62 @@ int neigh_table_clear(struct neigh_table *tbl)
 
 int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct ndmsg *ndm = NLMSG_DATA(nlh);
-	struct rtattr **nda = arg;
+	struct ndmsg *ndm;
+	struct nlattr *dst_attr;
 	struct neigh_table *tbl;
 	struct net_device *dev = NULL;
-	int err = -ENODEV;
+	int err = -EINVAL;
 
-	if (ndm->ndm_ifindex &&
-	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+	if (nlmsg_len(nlh) < sizeof(*ndm))
+		goto out;
+
+	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
+	if (dst_attr == NULL)
 		goto out;
 
+	ndm = nlmsg_data(nlh);
+	if (ndm->ndm_ifindex) {
+		dev = dev_get_by_index(ndm->ndm_ifindex);
+		if (dev == NULL) {
+			err = -ENODEV;
+			goto out;
+		}
+	}
+
 	read_lock(&neigh_tbl_lock);
 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
-		struct rtattr *dst_attr = nda[NDA_DST - 1];
-		struct neighbour *n;
+		struct neighbour *neigh;
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
 		read_unlock(&neigh_tbl_lock);
 
-		err = -EINVAL;
-		if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+		if (nla_len(dst_attr) < tbl->key_len)
 			goto out_dev_put;
 
 		if (ndm->ndm_flags & NTF_PROXY) {
-			err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev);
+			err = pneigh_delete(tbl, nla_data(dst_attr), dev);
 			goto out_dev_put;
 		}
 
-		if (!dev)
-			goto out;
+		if (dev == NULL)
+			goto out_dev_put;
 
-		n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
-		if (n) {
-			err = neigh_update(n, NULL, NUD_FAILED, 
-					   NEIGH_UPDATE_F_OVERRIDE|
-					   NEIGH_UPDATE_F_ADMIN);
-			neigh_release(n);
+		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
+		if (neigh == NULL) {
+			err = -ENOENT;
+			goto out_dev_put;
 		}
+
+		err = neigh_update(neigh, NULL, NUD_FAILED,
+				   NEIGH_UPDATE_F_OVERRIDE |
+				   NEIGH_UPDATE_F_ADMIN);
+		neigh_release(neigh);
 		goto out_dev_put;
 	}
 	read_unlock(&neigh_tbl_lock);
-	err = -EADDRNOTAVAIL;
+	err = -EAFNOSUPPORT;
+
 out_dev_put:
 	if (dev)
 		dev_put(dev);
-- 
GitLab


From 5208debd0f1da07bbb350f8b0b142775d4f002ea Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 7 Aug 2006 17:55:40 -0700
Subject: [PATCH 0417/1063] [NEIGH]: Convert neighbour addition to new netlink
 api

Fixes:
    Return EAFNOSUPPORT if no table matches the specified
    address family.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 90 +++++++++++++++++++++++++-------------------
 1 file changed, 51 insertions(+), 39 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 39c07cc66ee74..6036f43c1fd6d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1506,76 +1506,88 @@ int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct ndmsg *ndm = NLMSG_DATA(nlh);
-	struct rtattr **nda = arg;
+	struct ndmsg *ndm;
+	struct nlattr *tb[NDA_MAX+1];
 	struct neigh_table *tbl;
 	struct net_device *dev = NULL;
-	int err = -ENODEV;
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+	if (err < 0)
+		goto out;
 
-	if (ndm->ndm_ifindex &&
-	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+	err = -EINVAL;
+	if (tb[NDA_DST] == NULL)
 		goto out;
 
+	ndm = nlmsg_data(nlh);
+	if (ndm->ndm_ifindex) {
+		dev = dev_get_by_index(ndm->ndm_ifindex);
+		if (dev == NULL) {
+			err = -ENODEV;
+			goto out;
+		}
+
+		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
+			goto out_dev_put;
+	}
+
 	read_lock(&neigh_tbl_lock);
 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
-		struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1];
-		struct rtattr *dst_attr = nda[NDA_DST - 1];
-		int override = 1;
-		struct neighbour *n;
+		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
+		struct neighbour *neigh;
+		void *dst, *lladdr;
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
 		read_unlock(&neigh_tbl_lock);
 
-		err = -EINVAL;
-		if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+		if (nla_len(tb[NDA_DST]) < tbl->key_len)
 			goto out_dev_put;
+		dst = nla_data(tb[NDA_DST]);
+		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
 
 		if (ndm->ndm_flags & NTF_PROXY) {
-			err = -ENOBUFS;
-			if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1))
-				err = 0;
+			err = 0;
+			if (pneigh_lookup(tbl, dst, dev, 1) == NULL)
+				err = -ENOBUFS;
 			goto out_dev_put;
 		}
 
-		err = -EINVAL;
-		if (!dev)
-			goto out;
-		if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len)
+		if (dev == NULL)
 			goto out_dev_put;
+
+		neigh = neigh_lookup(tbl, dst, dev);
+		if (neigh == NULL) {
+			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+				err = -ENOENT;
+				goto out_dev_put;
+			}
 	
-		n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
-		if (n) {
-			if (nlh->nlmsg_flags & NLM_F_EXCL) {
-				err = -EEXIST;
-				neigh_release(n);
+			neigh = __neigh_lookup_errno(tbl, dst, dev);
+			if (IS_ERR(neigh)) {
+				err = PTR_ERR(neigh);
 				goto out_dev_put;
 			}
-			
-			override = nlh->nlmsg_flags & NLM_F_REPLACE;
-		} else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
-			err = -ENOENT;
-			goto out_dev_put;
 		} else {
-			n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev);
-			if (IS_ERR(n)) {
-				err = PTR_ERR(n);
+			if (nlh->nlmsg_flags & NLM_F_EXCL) {
+				err = -EEXIST;
+				neigh_release(neigh);
 				goto out_dev_put;
 			}
-		}
 
-		err = neigh_update(n,
-				   lladdr_attr ? RTA_DATA(lladdr_attr) : NULL,
-				   ndm->ndm_state,
-				   (override ? NEIGH_UPDATE_F_OVERRIDE : 0) |
-				   NEIGH_UPDATE_F_ADMIN);
+			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
+				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
+		}
 
-		neigh_release(n);
+		err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+		neigh_release(neigh);
 		goto out_dev_put;
 	}
 
 	read_unlock(&neigh_tbl_lock);
-	err = -EADDRNOTAVAIL;
+	err = -EAFNOSUPPORT;
+
 out_dev_put:
 	if (dev)
 		dev_put(dev);
-- 
GitLab


From 8b8aec508302d4e63fd88f47894805115277f70f Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 7 Aug 2006 17:56:37 -0700
Subject: [PATCH 0418/1063] [NEIGH]: Convert neighbour dumping to new netlink
 api

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 106 +++++++++++++++++++++----------------------
 1 file changed, 51 insertions(+), 55 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6036f43c1fd6d..5490afd23b826 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1901,48 +1901,49 @@ int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
-			   u32 pid, u32 seq, int event, unsigned int flags)
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
+			   u32 pid, u32 seq, int type, unsigned int flags)
 {
 	unsigned long now = jiffies;
-	unsigned char *b = skb->tail;
 	struct nda_cacheinfo ci;
-	int locked = 0;
-	u32 probes;
-	struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event,
-					 sizeof(struct ndmsg), flags);
-	struct ndmsg *ndm = NLMSG_DATA(nlh);
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
 
-	ndm->ndm_family	 = n->ops->family;
+	ndm = nlmsg_data(nlh);
+	ndm->ndm_family	 = neigh->ops->family;
 	ndm->ndm_pad1    = 0;
 	ndm->ndm_pad2    = 0;
-	ndm->ndm_flags	 = n->flags;
-	ndm->ndm_type	 = n->type;
-	ndm->ndm_ifindex = n->dev->ifindex;
-	RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
-	read_lock_bh(&n->lock);
-	locked		 = 1;
-	ndm->ndm_state	 = n->nud_state;
-	if (n->nud_state & NUD_VALID)
-		RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
-	ci.ndm_used	 = now - n->used;
-	ci.ndm_confirmed = now - n->confirmed;
-	ci.ndm_updated	 = now - n->updated;
-	ci.ndm_refcnt	 = atomic_read(&n->refcnt) - 1;
-	probes = atomic_read(&n->probes);
-	read_unlock_bh(&n->lock);
-	locked		 = 0;
-	RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
-	RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes);
-	nlh->nlmsg_len	 = skb->tail - b;
-	return skb->len;
+	ndm->ndm_flags	 = neigh->flags;
+	ndm->ndm_type	 = neigh->type;
+	ndm->ndm_ifindex = neigh->dev->ifindex;
 
-nlmsg_failure:
-rtattr_failure:
-	if (locked)
-		read_unlock_bh(&n->lock);
-	skb_trim(skb, b - skb->data);
-	return -1;
+	NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
+
+	read_lock_bh(&neigh->lock);
+	ndm->ndm_state	 = neigh->nud_state;
+	if ((neigh->nud_state & NUD_VALID) &&
+	    nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
+		read_unlock_bh(&neigh->lock);
+		goto nla_put_failure;
+	}
+
+	ci.ndm_used	 = now - neigh->used;
+	ci.ndm_confirmed = now - neigh->confirmed;
+	ci.ndm_updated	 = now - neigh->updated;
+	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
+	read_unlock_bh(&neigh->lock);
+
+	NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
+	NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
 
@@ -1986,7 +1987,7 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 	int t, family, s_t;
 
 	read_lock(&neigh_tbl_lock);
-	family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 	s_t = cb->args[0];
 
 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
@@ -2367,39 +2368,34 @@ static struct file_operations neigh_stat_seq_fops = {
 #ifdef CONFIG_ARPD
 void neigh_app_ns(struct neighbour *n)
 {
-	struct nlmsghdr  *nlh;
-	int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
-	struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+	struct sk_buff *skb;
 
-	if (!skb)
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (skb == NULL)
 		return;
 
-	if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) {
+	if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, NLM_F_REQUEST) <= 0)
 		kfree_skb(skb);
-		return;
+	else {
+		NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
+		netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
 	}
-	nlh			   = (struct nlmsghdr *)skb->data;
-	nlh->nlmsg_flags	   = NLM_F_REQUEST;
-	NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
 }
 
 static void neigh_app_notify(struct neighbour *n)
 {
-	struct nlmsghdr *nlh;
-	int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
-	struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+	struct sk_buff *skb;
 
-	if (!skb)
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (skb == NULL)
 		return;
 
-	if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) {
+	if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) <= 0)
 		kfree_skb(skb);
-		return;
+	else {
+		NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
+		netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
 	}
-	nlh			   = (struct nlmsghdr *)skb->data;
-	NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
 }
 
 #endif /* CONFIG_ARPD */
-- 
GitLab


From 9067c722cf6930adf1df2d169de9094dd90b0c33 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 7 Aug 2006 17:57:44 -0700
Subject: [PATCH 0419/1063] [NEIGH]: Move netlink neighbour bits to
 linux/neighbour.h

Moves netlink neighbour bits to linux/neighbour.h. Also
moves bits to be exported to userspace from net/neighbour.h
to linux/neighbour.h and removes __KERNEL__ guards, userspace
is not supposed to be using it.

rtnetlink_rcv_msg() is not longer required to parse attributes
for the neighbour layer, remove dependency on obsolete and
buggy rta_buf.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/neighbour.h | 65 +++++++++++++++++++++++++++++++++++++++
 include/linux/rtnetlink.h | 63 -------------------------------------
 include/net/neighbour.h   | 39 ++---------------------
 net/core/rtnetlink.c      |  2 --
 4 files changed, 67 insertions(+), 102 deletions(-)
 create mode 100644 include/linux/neighbour.h

diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
new file mode 100644
index 0000000000000..8e8293d86fbb4
--- /dev/null
+++ b/include/linux/neighbour.h
@@ -0,0 +1,65 @@
+#ifndef __LINUX_NEIGHBOUR_H
+#define __LINUX_NEIGHBOUR_H
+
+#include <linux/netlink.h>
+
+struct ndmsg
+{
+	__u8		ndm_family;
+	__u8		ndm_pad1;
+	__u16		ndm_pad2;
+	__s32		ndm_ifindex;
+	__u16		ndm_state;
+	__u8		ndm_flags;
+	__u8		ndm_type;
+};
+
+enum
+{
+	NDA_UNSPEC,
+	NDA_DST,
+	NDA_LLADDR,
+	NDA_CACHEINFO,
+	NDA_PROBES,
+	__NDA_MAX
+};
+
+#define NDA_MAX (__NDA_MAX - 1)
+
+/*
+ *	Neighbor Cache Entry Flags
+ */
+
+#define NTF_PROXY	0x08	/* == ATF_PUBL */
+#define NTF_ROUTER	0x80
+
+/*
+ *	Neighbor Cache Entry States.
+ */
+
+#define NUD_INCOMPLETE	0x01
+#define NUD_REACHABLE	0x02
+#define NUD_STALE	0x04
+#define NUD_DELAY	0x08
+#define NUD_PROBE	0x10
+#define NUD_FAILED	0x20
+
+/* Dummy states */
+#define NUD_NOARP	0x40
+#define NUD_PERMANENT	0x80
+#define NUD_NONE	0x00
+
+/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
+   and make no address resolution or NUD.
+   NUD_PERMANENT is also cannot be deleted by garbage collectors.
+ */
+
+struct nda_cacheinfo
+{
+	__u32		ndm_confirmed;
+	__u32		ndm_used;
+	__u32		ndm_updated;
+	__u32		ndm_refcnt;
+};
+
+#endif
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 84f3eb426da20..9750f0214c225 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -386,69 +386,6 @@ struct rta_session
 
 
 
-/**************************************************************
- *		Neighbour discovery.
- ****/
-
-struct ndmsg
-{
-	unsigned char	ndm_family;
-	unsigned char	ndm_pad1;
-	unsigned short	ndm_pad2;
-	int		ndm_ifindex;	/* Link index			*/
-	__u16		ndm_state;
-	__u8		ndm_flags;
-	__u8		ndm_type;
-};
-
-enum
-{
-	NDA_UNSPEC,
-	NDA_DST,
-	NDA_LLADDR,
-	NDA_CACHEINFO,
-	NDA_PROBES,
-	__NDA_MAX
-};
-
-#define NDA_MAX (__NDA_MAX - 1)
-
-#define NDA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
-#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg))
-
-/*
- *	Neighbor Cache Entry Flags
- */
-
-#define NTF_PROXY	0x08	/* == ATF_PUBL */
-#define NTF_ROUTER	0x80
-
-/*
- *	Neighbor Cache Entry States.
- */
-
-#define NUD_INCOMPLETE	0x01
-#define NUD_REACHABLE	0x02
-#define NUD_STALE	0x04
-#define NUD_DELAY	0x08
-#define NUD_PROBE	0x10
-#define NUD_FAILED	0x20
-
-/* Dummy states */
-#define NUD_NOARP	0x40
-#define NUD_PERMANENT	0x80
-#define NUD_NONE	0x00
-
-
-struct nda_cacheinfo
-{
-	__u32		ndm_confirmed;
-	__u32		ndm_used;
-	__u32		ndm_updated;
-	__u32		ndm_refcnt;
-};
-
-
 /*****************************************************************
  *		Neighbour tables specific messages.
  *
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 4901ee446879e..74c4b6ff8a5c5 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -1,6 +1,8 @@
 #ifndef _NET_NEIGHBOUR_H
 #define _NET_NEIGHBOUR_H
 
+#include <linux/neighbour.h>
+
 /*
  *	Generic neighbour manipulation
  *
@@ -14,40 +16,6 @@
  *		- Add neighbour cache statistics like rtstat
  */
 
-/* The following flags & states are exported to user space,
-   so that they should be moved to include/linux/ directory.
- */
-
-/*
- *	Neighbor Cache Entry Flags
- */
-
-#define NTF_PROXY	0x08	/* == ATF_PUBL */
-#define NTF_ROUTER	0x80
-
-/*
- *	Neighbor Cache Entry States.
- */
-
-#define NUD_INCOMPLETE	0x01
-#define NUD_REACHABLE	0x02
-#define NUD_STALE	0x04
-#define NUD_DELAY	0x08
-#define NUD_PROBE	0x10
-#define NUD_FAILED	0x20
-
-/* Dummy states */
-#define NUD_NOARP	0x40
-#define NUD_PERMANENT	0x80
-#define NUD_NONE	0x00
-
-/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
-   and make no address resolution or NUD.
-   NUD_PERMANENT is also cannot be deleted by garbage collectors.
- */
-
-#ifdef __KERNEL__
-
 #include <asm/atomic.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
@@ -374,6 +342,3 @@ struct neighbour_cb {
 #define NEIGH_CB(skb)	((struct neighbour_cb *)(skb)->cb)
 
 #endif
-#endif
-
-
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 93ba04fb84442..78ccbd4c4e376 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -104,7 +104,6 @@ static const int rtm_min[RTM_NR_FAMILIES] =
 	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
 	[RTM_FAM(RTM_NEWADDR)]      = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
 	[RTM_FAM(RTM_NEWROUTE)]     = NLMSG_LENGTH(sizeof(struct rtmsg)),
-	[RTM_FAM(RTM_NEWNEIGH)]     = NLMSG_LENGTH(sizeof(struct ndmsg)),
 	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
 	[RTM_FAM(RTM_NEWQDISC)]     = NLMSG_LENGTH(sizeof(struct tcmsg)),
 	[RTM_FAM(RTM_NEWTCLASS)]    = NLMSG_LENGTH(sizeof(struct tcmsg)),
@@ -121,7 +120,6 @@ static const int rta_max[RTM_NR_FAMILIES] =
 	[RTM_FAM(RTM_NEWLINK)]      = IFLA_MAX,
 	[RTM_FAM(RTM_NEWADDR)]      = IFA_MAX,
 	[RTM_FAM(RTM_NEWROUTE)]     = RTA_MAX,
-	[RTM_FAM(RTM_NEWNEIGH)]     = NDA_MAX,
 	[RTM_FAM(RTM_NEWRULE)]      = FRA_MAX,
 	[RTM_FAM(RTM_NEWQDISC)]     = TCA_MAX,
 	[RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX,
-- 
GitLab


From 6b3f8674bccbb2e784d01e44373fb730af6cb149 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 7 Aug 2006 17:58:53 -0700
Subject: [PATCH 0420/1063] [NEIGH]: Convert neighbour table modification to
 new netlink api

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 172 ++++++++++++++++++++++++++-----------------
 1 file changed, 104 insertions(+), 68 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5490afd23b826..5a0b8f48a0996 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1754,28 +1754,61 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
 	return NULL;
 }
 
+static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = {
+	[NDTA_NAME]		= { .type = NLA_STRING },
+	[NDTA_THRESH1]		= { .type = NLA_U32 },
+	[NDTA_THRESH2]		= { .type = NLA_U32 },
+	[NDTA_THRESH3]		= { .type = NLA_U32 },
+	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
+	[NDTA_PARMS]		= { .type = NLA_NESTED },
+};
+
+static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
+	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
+	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
+	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
+	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
+	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
+	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
+	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
+	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
+	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
+	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
+};
+
 int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct neigh_table *tbl;
-	struct ndtmsg *ndtmsg = NLMSG_DATA(nlh);
-	struct rtattr **tb = arg;
-	int err = -EINVAL;
+	struct ndtmsg *ndtmsg;
+	struct nlattr *tb[NDTA_MAX+1];
+	int err;
 
-	if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1]))
-		return -EINVAL;
+	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
+			  nl_neightbl_policy);
+	if (err < 0)
+		goto errout;
+
+	if (tb[NDTA_NAME] == NULL) {
+		err = -EINVAL;
+		goto errout;
+	}
 
+	ndtmsg = nlmsg_data(nlh);
 	read_lock(&neigh_tbl_lock);
 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
 			continue;
 
-		if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
+		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
 			break;
 	}
 
 	if (tbl == NULL) {
 		err = -ENOENT;
-		goto errout;
+		goto errout_locked;
 	}
 
 	/* 
@@ -1784,86 +1817,89 @@ int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	 */
 	write_lock_bh(&tbl->lock);
 
-	if (tb[NDTA_THRESH1 - 1])
-		tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]);
-
-	if (tb[NDTA_THRESH2 - 1])
-		tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]);
-
-	if (tb[NDTA_THRESH3 - 1])
-		tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]);
-
-	if (tb[NDTA_GC_INTERVAL - 1])
-		tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]);
-
-	if (tb[NDTA_PARMS - 1]) {
-		struct rtattr *tbp[NDTPA_MAX];
+	if (tb[NDTA_PARMS]) {
+		struct nlattr *tbp[NDTPA_MAX+1];
 		struct neigh_parms *p;
-		u32 ifindex = 0;
+		int i, ifindex = 0;
 
-		if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0)
-			goto rtattr_failure;
+		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
+				       nl_ntbl_parm_policy);
+		if (err < 0)
+			goto errout_tbl_lock;
 
-		if (tbp[NDTPA_IFINDEX - 1])
-			ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]);
+		if (tbp[NDTPA_IFINDEX])
+			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
 
 		p = lookup_neigh_params(tbl, ifindex);
 		if (p == NULL) {
 			err = -ENOENT;
-			goto rtattr_failure;
+			goto errout_tbl_lock;
 		}
-	
-		if (tbp[NDTPA_QUEUE_LEN - 1])
-			p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]);
-
-		if (tbp[NDTPA_PROXY_QLEN - 1])
-			p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]);
-
-		if (tbp[NDTPA_APP_PROBES - 1])
-			p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]);
-
-		if (tbp[NDTPA_UCAST_PROBES - 1])
-			p->ucast_probes =
-			   RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]);
 
-		if (tbp[NDTPA_MCAST_PROBES - 1])
-			p->mcast_probes =
-			   RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]);
-
-		if (tbp[NDTPA_BASE_REACHABLE_TIME - 1])
-			p->base_reachable_time =
-			   RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]);
-
-		if (tbp[NDTPA_GC_STALETIME - 1])
-			p->gc_staletime =
-			   RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]);
+		for (i = 1; i <= NDTPA_MAX; i++) {
+			if (tbp[i] == NULL)
+				continue;
 
-		if (tbp[NDTPA_DELAY_PROBE_TIME - 1])
-			p->delay_probe_time =
-			   RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]);
+			switch (i) {
+			case NDTPA_QUEUE_LEN:
+				p->queue_len = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_PROXY_QLEN:
+				p->proxy_qlen = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_APP_PROBES:
+				p->app_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_UCAST_PROBES:
+				p->ucast_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_MCAST_PROBES:
+				p->mcast_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_BASE_REACHABLE_TIME:
+				p->base_reachable_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_GC_STALETIME:
+				p->gc_staletime = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_DELAY_PROBE_TIME:
+				p->delay_probe_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_RETRANS_TIME:
+				p->retrans_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_ANYCAST_DELAY:
+				p->anycast_delay = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_PROXY_DELAY:
+				p->proxy_delay = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_LOCKTIME:
+				p->locktime = nla_get_msecs(tbp[i]);
+				break;
+			}
+		}
+	}
 
-		if (tbp[NDTPA_RETRANS_TIME - 1])
-			p->retrans_time =
-			   RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]);
+	if (tb[NDTA_THRESH1])
+		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
 
-		if (tbp[NDTPA_ANYCAST_DELAY - 1])
-			p->anycast_delay =
-			   RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]);
+	if (tb[NDTA_THRESH2])
+		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
 
-		if (tbp[NDTPA_PROXY_DELAY - 1])
-			p->proxy_delay =
-			   RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]);
+	if (tb[NDTA_THRESH3])
+		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
 
-		if (tbp[NDTPA_LOCKTIME - 1])
-			p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]);
-	}
+	if (tb[NDTA_GC_INTERVAL])
+		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
 
 	err = 0;
 
-rtattr_failure:
+errout_tbl_lock:
 	write_unlock_bh(&tbl->lock);
-errout:
+errout_locked:
 	read_unlock(&neigh_tbl_lock);
+errout:
 	return err;
 }
 
-- 
GitLab


From ca860fb39b4aa1479e2fea67435a2c1eac9ce789 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 7 Aug 2006 18:00:18 -0700
Subject: [PATCH 0421/1063] [NEIGH]: Convert neighbour table dumping to new
 netlink api

Also fixes skipping of already dumped neighbours.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 141 +++++++++++++++++++++++--------------------
 1 file changed, 74 insertions(+), 67 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5a0b8f48a0996..2f4e06a134572 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1597,56 +1597,59 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 {
-	struct rtattr *nest = NULL;
-	
-	nest = RTA_NEST(skb, NDTA_PARMS);
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NDTA_PARMS);
+	if (nest == NULL)
+		return -ENOBUFS;
 
 	if (parms->dev)
-		RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
-
-	RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
-	RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
-	RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
-	RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
-	RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
-	RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
-	RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
-	RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
+		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
+
+	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
+	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
+	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
+	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
+	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
+	NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
+	NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
+	NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
 		      parms->base_reachable_time);
-	RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
-	RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
-	RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
-	RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
-	RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
-	RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
+	NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
+	NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
+	NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
+	NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
+	NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
+	NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
 
-	return RTA_NEST_END(skb, nest);
+	return nla_nest_end(skb, nest);
 
-rtattr_failure:
-	return RTA_NEST_CANCEL(skb, nest);
+nla_put_failure:
+	return nla_nest_cancel(skb, nest);
 }
 
-static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
-			      struct netlink_callback *cb)
+static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
+			      u32 pid, u32 seq, int type, int flags)
 {
 	struct nlmsghdr *nlh;
 	struct ndtmsg *ndtmsg;
 
-	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
-			       NLM_F_MULTI);
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
 
-	ndtmsg = NLMSG_DATA(nlh);
+	ndtmsg = nlmsg_data(nlh);
 
 	read_lock_bh(&tbl->lock);
 	ndtmsg->ndtm_family = tbl->family;
 	ndtmsg->ndtm_pad1   = 0;
 	ndtmsg->ndtm_pad2   = 0;
 
-	RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
-	RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
-	RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
-	RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
-	RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
+	NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
+	NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
+	NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
+	NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
+	NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
 
 	{
 		unsigned long now = jiffies;
@@ -1665,7 +1668,7 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
 		};
 
-		RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
+		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
 	}
 
 	{
@@ -1690,55 +1693,50 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
 		}
 
-		RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
+		NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
 	}
 
 	BUG_ON(tbl->parms.dev);
 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_END(skb, nlh);
+	return nlmsg_end(skb, nlh);
 
-rtattr_failure:
+nla_put_failure:
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_CANCEL(skb, nlh);
- 
-nlmsg_failure:
-	return -1;
+	return nlmsg_cancel(skb, nlh);
 }
 
-static int neightbl_fill_param_info(struct neigh_table *tbl,
+static int neightbl_fill_param_info(struct sk_buff *skb,
+				    struct neigh_table *tbl,
 				    struct neigh_parms *parms,
-				    struct sk_buff *skb,
-				    struct netlink_callback *cb)
+				    u32 pid, u32 seq, int type,
+				    unsigned int flags)
 {
 	struct ndtmsg *ndtmsg;
 	struct nlmsghdr *nlh;
 
-	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
-			       NLM_F_MULTI);
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
 
-	ndtmsg = NLMSG_DATA(nlh);
+	ndtmsg = nlmsg_data(nlh);
 
 	read_lock_bh(&tbl->lock);
 	ndtmsg->ndtm_family = tbl->family;
 	ndtmsg->ndtm_pad1   = 0;
 	ndtmsg->ndtm_pad2   = 0;
-	RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
 
-	if (neightbl_fill_parms(skb, parms) < 0)
-		goto rtattr_failure;
+	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
+	    neightbl_fill_parms(skb, parms) < 0)
+		goto errout;
 
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_END(skb, nlh);
-
-rtattr_failure:
+	return nlmsg_end(skb, nlh);
+errout:
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_CANCEL(skb, nlh);
-
-nlmsg_failure:
-	return -1;
+	return nlmsg_cancel(skb, nlh);
 }
  
 static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
@@ -1905,34 +1903,43 @@ int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int idx, family;
-	int s_idx = cb->args[0];
+	int family, tidx, nidx = 0;
+	int tbl_skip = cb->args[0];
+	int neigh_skip = cb->args[1];
 	struct neigh_table *tbl;
 
-	family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 
 	read_lock(&neigh_tbl_lock);
-	for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) {
+	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
 		struct neigh_parms *p;
 
-		if (idx < s_idx || (family && tbl->family != family))
+		if (tidx < tbl_skip || (family && tbl->family != family))
 			continue;
 
-		if (neightbl_fill_info(tbl, skb, cb) <= 0)
+		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
+				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
+				       NLM_F_MULTI) <= 0)
 			break;
 
-		for (++idx, p = tbl->parms.next; p; p = p->next, idx++) {
-			if (idx < s_idx)
+		for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
+			if (nidx < neigh_skip)
 				continue;
 
-			if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0)
+			if (neightbl_fill_param_info(skb, tbl, p,
+						     NETLINK_CB(cb->skb).pid,
+						     cb->nlh->nlmsg_seq,
+						     RTM_NEWNEIGHTBL,
+						     NLM_F_MULTI) <= 0)
 				goto out;
 		}
 
+		neigh_skip = 0;
 	}
 out:
 	read_unlock(&neigh_tbl_lock);
-	cb->args[0] = idx;
+	cb->args[0] = tidx;
+	cb->args[1] = nidx;
 
 	return skb->len;
 }
-- 
GitLab


From b63bbc5006a0a62fabc81c4f77e95f16ff16f340 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 7 Aug 2006 18:00:57 -0700
Subject: [PATCH 0422/1063] [NEIGH]: Move netlink neighbour table bits to
 linux/neighbour.h

rtnetlink_rcv_msg() is not longer required to parse attributes
for the neighbour tables layer, remove dependency on obsolete and
buggy rta_buf.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/neighbour.h |  94 +++++++++++++++++++++++++++++++++++
 include/linux/rtnetlink.h | 101 --------------------------------------
 net/core/rtnetlink.c      |   2 -
 3 files changed, 94 insertions(+), 103 deletions(-)

diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
index 8e8293d86fbb4..bd3bbf668cdb0 100644
--- a/include/linux/neighbour.h
+++ b/include/linux/neighbour.h
@@ -62,4 +62,98 @@ struct nda_cacheinfo
 	__u32		ndm_refcnt;
 };
 
+/*****************************************************************
+ *		Neighbour tables specific messages.
+ *
+ * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
+ * NLM_F_DUMP flag set. Every neighbour table configuration is
+ * spread over multiple messages to avoid running into message
+ * size limits on systems with many interfaces. The first message
+ * in the sequence transports all not device specific data such as
+ * statistics, configuration, and the default parameter set.
+ * This message is followed by 0..n messages carrying device
+ * specific parameter sets.
+ * Although the ordering should be sufficient, NDTA_NAME can be
+ * used to identify sequences. The initial message can be identified
+ * by checking for NDTA_CONFIG. The device specific messages do
+ * not contain this TLV but have NDTPA_IFINDEX set to the
+ * corresponding interface index.
+ *
+ * To change neighbour table attributes, send RTM_SETNEIGHTBL
+ * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
+ * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
+ * otherwise. Device specific parameter sets can be changed by
+ * setting NDTPA_IFINDEX to the interface index of the corresponding
+ * device.
+ ****/
+
+struct ndt_stats
+{
+	__u64		ndts_allocs;
+	__u64		ndts_destroys;
+	__u64		ndts_hash_grows;
+	__u64		ndts_res_failed;
+	__u64		ndts_lookups;
+	__u64		ndts_hits;
+	__u64		ndts_rcv_probes_mcast;
+	__u64		ndts_rcv_probes_ucast;
+	__u64		ndts_periodic_gc_runs;
+	__u64		ndts_forced_gc_runs;
+};
+
+enum {
+	NDTPA_UNSPEC,
+	NDTPA_IFINDEX,			/* u32, unchangeable */
+	NDTPA_REFCNT,			/* u32, read-only */
+	NDTPA_REACHABLE_TIME,		/* u64, read-only, msecs */
+	NDTPA_BASE_REACHABLE_TIME,	/* u64, msecs */
+	NDTPA_RETRANS_TIME,		/* u64, msecs */
+	NDTPA_GC_STALETIME,		/* u64, msecs */
+	NDTPA_DELAY_PROBE_TIME,		/* u64, msecs */
+	NDTPA_QUEUE_LEN,		/* u32 */
+	NDTPA_APP_PROBES,		/* u32 */
+	NDTPA_UCAST_PROBES,		/* u32 */
+	NDTPA_MCAST_PROBES,		/* u32 */
+	NDTPA_ANYCAST_DELAY,		/* u64, msecs */
+	NDTPA_PROXY_DELAY,		/* u64, msecs */
+	NDTPA_PROXY_QLEN,		/* u32 */
+	NDTPA_LOCKTIME,			/* u64, msecs */
+	__NDTPA_MAX
+};
+#define NDTPA_MAX (__NDTPA_MAX - 1)
+
+struct ndtmsg
+{
+	__u8		ndtm_family;
+	__u8		ndtm_pad1;
+	__u16		ndtm_pad2;
+};
+
+struct ndt_config
+{
+	__u16		ndtc_key_len;
+	__u16		ndtc_entry_size;
+	__u32		ndtc_entries;
+	__u32		ndtc_last_flush;	/* delta to now in msecs */
+	__u32		ndtc_last_rand;		/* delta to now in msecs */
+	__u32		ndtc_hash_rnd;
+	__u32		ndtc_hash_mask;
+	__u32		ndtc_hash_chain_gc;
+	__u32		ndtc_proxy_qlen;
+};
+
+enum {
+	NDTA_UNSPEC,
+	NDTA_NAME,			/* char *, unchangeable */
+	NDTA_THRESH1,			/* u32 */
+	NDTA_THRESH2,			/* u32 */
+	NDTA_THRESH3,			/* u32 */
+	NDTA_CONFIG,			/* struct ndt_config, read-only */
+	NDTA_PARMS,			/* nested TLV NDTPA_* */
+	NDTA_STATS,			/* struct ndt_stats, read-only */
+	NDTA_GC_INTERVAL,		/* u64, msecs */
+	__NDTA_MAX
+};
+#define NDTA_MAX (__NDTA_MAX - 1)
+
 #endif
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 9750f0214c225..784a1a29490ec 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -384,107 +384,6 @@ struct rta_session
 	} u;
 };
 
-
-
-/*****************************************************************
- *		Neighbour tables specific messages.
- *
- * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
- * NLM_F_DUMP flag set. Every neighbour table configuration is
- * spread over multiple messages to avoid running into message
- * size limits on systems with many interfaces. The first message
- * in the sequence transports all not device specific data such as
- * statistics, configuration, and the default parameter set.
- * This message is followed by 0..n messages carrying device
- * specific parameter sets.
- * Although the ordering should be sufficient, NDTA_NAME can be
- * used to identify sequences. The initial message can be identified
- * by checking for NDTA_CONFIG. The device specific messages do
- * not contain this TLV but have NDTPA_IFINDEX set to the
- * corresponding interface index.
- *
- * To change neighbour table attributes, send RTM_SETNEIGHTBL
- * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
- * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
- * otherwise. Device specific parameter sets can be changed by
- * setting NDTPA_IFINDEX to the interface index of the corresponding
- * device.
- ****/
-
-struct ndt_stats
-{
-	__u64		ndts_allocs;
-	__u64		ndts_destroys;
-	__u64		ndts_hash_grows;
-	__u64		ndts_res_failed;
-	__u64		ndts_lookups;
-	__u64		ndts_hits;
-	__u64		ndts_rcv_probes_mcast;
-	__u64		ndts_rcv_probes_ucast;
-	__u64		ndts_periodic_gc_runs;
-	__u64		ndts_forced_gc_runs;
-};
-
-enum {
-	NDTPA_UNSPEC,
-	NDTPA_IFINDEX,			/* u32, unchangeable */
-	NDTPA_REFCNT,			/* u32, read-only */
-	NDTPA_REACHABLE_TIME,		/* u64, read-only, msecs */
-	NDTPA_BASE_REACHABLE_TIME,	/* u64, msecs */
-	NDTPA_RETRANS_TIME,		/* u64, msecs */
-	NDTPA_GC_STALETIME,		/* u64, msecs */
-	NDTPA_DELAY_PROBE_TIME,		/* u64, msecs */
-	NDTPA_QUEUE_LEN,		/* u32 */
-	NDTPA_APP_PROBES,		/* u32 */
-	NDTPA_UCAST_PROBES,		/* u32 */
-	NDTPA_MCAST_PROBES,		/* u32 */
-	NDTPA_ANYCAST_DELAY,		/* u64, msecs */
-	NDTPA_PROXY_DELAY,		/* u64, msecs */
-	NDTPA_PROXY_QLEN,		/* u32 */
-	NDTPA_LOCKTIME,			/* u64, msecs */
-	__NDTPA_MAX
-};
-#define NDTPA_MAX (__NDTPA_MAX - 1)
-
-struct ndtmsg
-{
-	__u8		ndtm_family;
-	__u8		ndtm_pad1;
-	__u16		ndtm_pad2;
-};
-
-struct ndt_config
-{
-	__u16		ndtc_key_len;
-	__u16		ndtc_entry_size;
-	__u32		ndtc_entries;
-	__u32		ndtc_last_flush;	/* delta to now in msecs */
-	__u32		ndtc_last_rand;		/* delta to now in msecs */
-	__u32		ndtc_hash_rnd;
-	__u32		ndtc_hash_mask;
-	__u32		ndtc_hash_chain_gc;
-	__u32		ndtc_proxy_qlen;
-};
-
-enum {
-	NDTA_UNSPEC,
-	NDTA_NAME,			/* char *, unchangeable */
-	NDTA_THRESH1,			/* u32 */
-	NDTA_THRESH2,			/* u32 */
-	NDTA_THRESH3,			/* u32 */
-	NDTA_CONFIG,			/* struct ndt_config, read-only */
-	NDTA_PARMS,			/* nested TLV NDTPA_* */
-	NDTA_STATS,			/* struct ndt_stats, read-only */
-	NDTA_GC_INTERVAL,		/* u64, msecs */
-	__NDTA_MAX
-};
-#define NDTA_MAX (__NDTA_MAX - 1)
-
-#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \
-		     NLMSG_ALIGN(sizeof(struct ndtmsg))))
-#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg))
-
-
 /****
  *		General form of address family dependent message.
  ****/
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 78ccbd4c4e376..a1b783a6afc61 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -112,7 +112,6 @@ static const int rtm_min[RTM_NR_FAMILIES] =
 	[RTM_FAM(RTM_NEWPREFIX)]    = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
 	[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
 	[RTM_FAM(RTM_GETANYCAST)]   = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
-	[RTM_FAM(RTM_NEWNEIGHTBL)]  = NLMSG_LENGTH(sizeof(struct ndtmsg)),
 };
 
 static const int rta_max[RTM_NR_FAMILIES] =
@@ -125,7 +124,6 @@ static const int rta_max[RTM_NR_FAMILIES] =
 	[RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX,
 	[RTM_FAM(RTM_NEWTFILTER)]   = TCA_MAX,
 	[RTM_FAM(RTM_NEWACTION)]    = TCAA_MAX,
-	[RTM_FAM(RTM_NEWNEIGHTBL)]  = NDTA_MAX,
 };
 
 void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
-- 
GitLab


From ac5a488ef252ed673cb067843e411f8cc43f7ab9 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Mon, 7 Aug 2006 20:57:31 -0700
Subject: [PATCH 0423/1063] [NET]: Round out in-kernel sockets API

This patch implements wrapper functions that provide a convenient way
to access the sockets API for in-kernel users like sunrpc, cifs &
ocfs2 etc and any future users.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h |  19 ++++++++
 net/socket.c        | 113 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 132 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index b20c53c744130..19da2c08d7b6b 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -208,6 +208,25 @@ extern int   	     kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 				    struct kvec *vec, size_t num,
 				    size_t len, int flags);
 
+extern int kernel_bind(struct socket *sock, struct sockaddr *addr,
+		       int addrlen);
+extern int kernel_listen(struct socket *sock, int backlog);
+extern int kernel_accept(struct socket *sock, struct socket **newsock,
+			 int flags);
+extern int kernel_connect(struct socket *sock, struct sockaddr *addr,
+			  int addrlen, int flags);
+extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+			      int *addrlen);
+extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+			      int *addrlen);
+extern int kernel_getsockopt(struct socket *sock, int level, int optname,
+			     char *optval, int *optlen);
+extern int kernel_setsockopt(struct socket *sock, int level, int optname,
+			     char *optval, int optlen);
+extern int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+			   size_t size, int flags);
+extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
+
 #ifndef CONFIG_SMP
 #define SOCKOPS_WRAPPED(name) name
 #define SOCKOPS_WRAP(name, fam)
diff --git a/net/socket.c b/net/socket.c
index 6756e57e1ff0b..2eaebf934a1a5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2170,6 +2170,109 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd,
 }
 #endif
 
+int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+{
+	return sock->ops->bind(sock, addr, addrlen);
+}
+
+int kernel_listen(struct socket *sock, int backlog)
+{
+	return sock->ops->listen(sock, backlog);
+}
+
+int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
+			       newsock);
+	if (err < 0)
+		goto done;
+
+	err = sock->ops->accept(sock, *newsock, flags);
+	if (err < 0) {
+		sock_release(*newsock);
+		goto done;
+	}
+
+	(*newsock)->ops = sock->ops;
+
+done:
+	return err;
+}
+
+int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
+                   int flags)
+{
+	return sock->ops->connect(sock, addr, addrlen, flags);
+}
+
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+			 int *addrlen)
+{
+	return sock->ops->getname(sock, addr, addrlen, 0);
+}
+
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+			 int *addrlen)
+{
+	return sock->ops->getname(sock, addr, addrlen, 1);
+}
+
+int kernel_getsockopt(struct socket *sock, int level, int optname,
+			char *optval, int *optlen)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	if (level == SOL_SOCKET)
+		err = sock_getsockopt(sock, level, optname, optval, optlen);
+	else
+		err = sock->ops->getsockopt(sock, level, optname, optval,
+					    optlen);
+	set_fs(oldfs);
+	return err;
+}
+
+int kernel_setsockopt(struct socket *sock, int level, int optname,
+			char *optval, int optlen)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	if (level == SOL_SOCKET)
+		err = sock_setsockopt(sock, level, optname, optval, optlen);
+	else
+		err = sock->ops->setsockopt(sock, level, optname, optval,
+					    optlen);
+	set_fs(oldfs);
+	return err;
+}
+
+int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+		    size_t size, int flags)
+{
+	if (sock->ops->sendpage)
+		return sock->ops->sendpage(sock, page, offset, size, flags);
+
+	return sock_no_sendpage(sock, page, offset, size, flags);
+}
+
+int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	err = sock->ops->ioctl(sock, cmd, arg);
+	set_fs(oldfs);
+
+	return err;
+}
+
 /* ABI emulation layers need these two */
 EXPORT_SYMBOL(move_addr_to_kernel);
 EXPORT_SYMBOL(move_addr_to_user);
@@ -2186,3 +2289,13 @@ EXPORT_SYMBOL(sock_wake_async);
 EXPORT_SYMBOL(sockfd_lookup);
 EXPORT_SYMBOL(kernel_sendmsg);
 EXPORT_SYMBOL(kernel_recvmsg);
+EXPORT_SYMBOL(kernel_bind);
+EXPORT_SYMBOL(kernel_listen);
+EXPORT_SYMBOL(kernel_accept);
+EXPORT_SYMBOL(kernel_connect);
+EXPORT_SYMBOL(kernel_getsockname);
+EXPORT_SYMBOL(kernel_getpeername);
+EXPORT_SYMBOL(kernel_getsockopt);
+EXPORT_SYMBOL(kernel_setsockopt);
+EXPORT_SYMBOL(kernel_sendpage);
+EXPORT_SYMBOL(kernel_sock_ioctl);
-- 
GitLab


From e6242e928ef1e4ed853f909a7479e4934f4bcb70 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Mon, 7 Aug 2006 20:58:01 -0700
Subject: [PATCH 0424/1063] [SUNRPC]: Update to use in-kernel sockets API.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/svcsock.c  | 38 ++++++++++++++------------------------
 net/sunrpc/xprtsock.c |  8 ++++----
 2 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d9a95732df46a..953aff89bcac1 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -388,7 +388,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 	/* send head */
 	if (slen == xdr->head[0].iov_len)
 		flags = 0;
-	len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
+	len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
 	if (len != xdr->head[0].iov_len)
 		goto out;
 	slen -= xdr->head[0].iov_len;
@@ -400,7 +400,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 	while (pglen > 0) {
 		if (slen == size)
 			flags = 0;
-		result = sock->ops->sendpage(sock, *ppage, base, size, flags);
+		result = kernel_sendpage(sock, *ppage, base, size, flags);
 		if (result > 0)
 			len += result;
 		if (result != size)
@@ -413,7 +413,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 	}
 	/* send tail */
 	if (xdr->tail[0].iov_len) {
-		result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], 
+		result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage],
 					     ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1),
 					     xdr->tail[0].iov_len, 0);
 
@@ -434,13 +434,10 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 static int
 svc_recv_available(struct svc_sock *svsk)
 {
-	mm_segment_t	oldfs;
 	struct socket	*sock = svsk->sk_sock;
 	int		avail, err;
 
-	oldfs = get_fs(); set_fs(KERNEL_DS);
-	err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
-	set_fs(oldfs);
+	err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
 
 	return (err >= 0)? avail : err;
 }
@@ -472,7 +469,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
 	 * at accept time. FIXME
 	 */
 	alen = sizeof(rqstp->rq_addr);
-	sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1);
+	kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen);
 
 	dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
 		rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
@@ -758,7 +755,6 @@ svc_tcp_accept(struct svc_sock *svsk)
 	struct svc_serv	*serv = svsk->sk_server;
 	struct socket	*sock = svsk->sk_sock;
 	struct socket	*newsock;
-	const struct proto_ops *ops;
 	struct svc_sock	*newsvsk;
 	int		err, slen;
 
@@ -766,29 +762,23 @@ svc_tcp_accept(struct svc_sock *svsk)
 	if (!sock)
 		return;
 
-	err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
-	if (err) {
+	clear_bit(SK_CONN, &svsk->sk_flags);
+	err = kernel_accept(sock, &newsock, O_NONBLOCK);
+	if (err < 0) {
 		if (err == -ENOMEM)
 			printk(KERN_WARNING "%s: no more sockets!\n",
 			       serv->sv_name);
-		return;
-	}
-
-	dprintk("svc: tcp_accept %p allocated\n", newsock);
-	newsock->ops = ops = sock->ops;
-
-	clear_bit(SK_CONN, &svsk->sk_flags);
-	if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) {
-		if (err != -EAGAIN && net_ratelimit())
+		else if (err != -EAGAIN && net_ratelimit())
 			printk(KERN_WARNING "%s: accept failed (err %d)!\n",
 				   serv->sv_name, -err);
-		goto failed;		/* aborted connection or whatever */
+		return;
 	}
+
 	set_bit(SK_CONN, &svsk->sk_flags);
 	svc_sock_enqueue(svsk);
 
 	slen = sizeof(sin);
-	err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1);
+	err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen);
 	if (err < 0) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "%s: peername failed (err %d)!\n",
@@ -1406,14 +1396,14 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
 	if (sin != NULL) {
 		if (type == SOCK_STREAM)
 			sock->sk->sk_reuse = 1; /* allow address reuse */
-		error = sock->ops->bind(sock, (struct sockaddr *) sin,
+		error = kernel_bind(sock, (struct sockaddr *) sin,
 						sizeof(*sin));
 		if (error < 0)
 			goto bummer;
 	}
 
 	if (protocol == IPPROTO_TCP) {
-		if ((error = sock->ops->listen(sock, 64)) < 0)
+		if ((error = kernel_listen(sock, 64)) < 0)
 			goto bummer;
 	}
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 441bd53f5eca8..8b319e375049b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -207,7 +207,7 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a
 		base &= ~PAGE_CACHE_MASK;
 	}
 
-	sendpage = sock->ops->sendpage ? : sock_no_sendpage;
+	sendpage = kernel_sendpage;
 	do {
 		int flags = XS_SENDMSG_FLAGS;
 
@@ -986,7 +986,7 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
 
 	do {
 		myaddr.sin_port = htons(port);
-		err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
+		err = kernel_bind(sock, (struct sockaddr *) &myaddr,
 						sizeof(myaddr));
 		if (err == 0) {
 			xprt->port = port;
@@ -1081,7 +1081,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
 	 */
 	memset(&any, 0, sizeof(any));
 	any.sa_family = AF_UNSPEC;
-	result = sock->ops->connect(sock, &any, sizeof(any), 0);
+	result = kernel_connect(sock, &any, sizeof(any), 0);
 	if (result)
 		dprintk("RPC:      AF_UNSPEC connect return code %d\n",
 				result);
@@ -1151,7 +1151,7 @@ static void xs_tcp_connect_worker(void *args)
 	/* Tell the socket layer to start connecting... */
 	xprt->stat.connect_count++;
 	xprt->stat.connect_start = jiffies;
-	status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
+	status = kernel_connect(sock, (struct sockaddr *) &xprt->addr,
 			sizeof(xprt->addr), O_NONBLOCK);
 	dprintk("RPC: %p  connect status %d connected %d sock state %d\n",
 			xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
-- 
GitLab


From 8ce11e6a9faf1f1c849b77104adc1642c46aee95 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 7 Aug 2006 21:50:48 -0700
Subject: [PATCH 0425/1063] [NET]: Make code static.

This patch makes needlessly global code static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h              | 4 ----
 net/ipv4/cipso_ipv4.c              | 2 +-
 net/ipv4/fib_rules.c               | 4 ++--
 net/ipv6/fib6_rules.c              | 4 ++--
 net/ipv6/ip6_fib.c                 | 6 +++---
 net/ipv6/route.c                   | 6 +++---
 net/netlabel/netlabel_domainhash.c | 4 ++--
 7 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 7b47e8d5a765b..c0660cea9a2f6 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -192,10 +192,6 @@ struct fib6_node		*fib6_locate(struct fib6_node *root,
 					     struct in6_addr *daddr, int dst_len,
 					     struct in6_addr *saddr, int src_len);
 
-extern void			fib6_clean_tree(struct fib6_node *root,
-						int (*func)(struct rt6_info *, void *arg),
-						int prune, void *arg);
-
 extern void			fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
 					       int prune, void *arg);
 
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index b82a101c95c5b..80a2a0911b49a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -60,7 +60,7 @@ struct cipso_v4_domhsh_entry {
  * if in practice there are a lot of different DOIs this list should
  * probably be turned into a hash table or something similar so we
  * can do quick lookups. */
-DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
+static DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
 static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list);
 
 /* Label mapping cache */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 23ec6ae1a0f69..03d1e8a43a48a 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -101,8 +101,8 @@ int fib_lookup(struct flowi *flp, struct fib_result *res)
 	return err;
 }
 
-int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags,
-		     struct fib_lookup_arg *arg)
+static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
+			    int flags, struct fib_lookup_arg *arg)
 {
 	int err = -EAGAIN;
 	struct fib_table *tbl;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 94a46ec967a41..bf9bba83b852e 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -66,8 +66,8 @@ struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
 	return (struct dst_entry *) arg.result;
 }
 
-int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
-		     int flags, struct fib_lookup_arg *arg)
+static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+			    int flags, struct fib_lookup_arg *arg)
 {
 	struct rt6_info *rt = NULL;
 	struct fib6_table *table;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ce226c14bef5b..1f2316187ca40 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1169,9 +1169,9 @@ static int fib6_clean_node(struct fib6_walker_t *w)
  *	ignoring pure split nodes) will be scanned.
  */
 
-void fib6_clean_tree(struct fib6_node *root,
-		     int (*func)(struct rt6_info *, void *arg),
-		     int prune, void *arg)
+static void fib6_clean_tree(struct fib6_node *root,
+			    int (*func)(struct rt6_info *, void *arg),
+			    int prune, void *arg)
 {
 	struct fib6_cleaner_t c;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 41c5905d31913..e08d84063c1fd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -613,8 +613,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
 	return rt;
 }
 
-struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl,
-				     int flags)
+static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+					    struct flowi *fl, int flags)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt, *nrt;
@@ -872,7 +872,7 @@ static inline unsigned int ipv6_advmss(unsigned int mtu)
 }
 
 static struct dst_entry *ndisc_dst_gc_list;
-DEFINE_SPINLOCK(ndisc_lock);
+static DEFINE_SPINLOCK(ndisc_lock);
 
 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
 				  struct neighbour *neigh,
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 5bb3fad4a1159..0489a1378101b 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -50,11 +50,11 @@ struct netlbl_domhsh_tbl {
 /* Domain hash table */
 /* XXX - updates should be so rare that having one spinlock for the entire
  * hash table should be okay */
-DEFINE_SPINLOCK(netlbl_domhsh_lock);
+static DEFINE_SPINLOCK(netlbl_domhsh_lock);
 static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL;
 
 /* Default domain mapping */
-DEFINE_SPINLOCK(netlbl_domhsh_def_lock);
+static DEFINE_SPINLOCK(netlbl_domhsh_def_lock);
 static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
 
 /*
-- 
GitLab


From 8423a9aadfaa135fd5fd1ab8bbd4a1e76b4143c9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 7 Aug 2006 21:54:37 -0700
Subject: [PATCH 0426/1063] [IPV6]: Protect RTM_GETRULE table entry with
 IPV6_MULTIPLE_TABLES ifdef

This is how IPv4 handles this case too.

Based upon a patch from Andrew Morton.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c2a4db843e514..9ba1e811ba502 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3529,7 +3529,9 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
 	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet6_rtm_delroute, },
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet6_rtm_getroute,
 				      .dumpit	= inet6_dump_fib, },
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	[RTM_GETRULE  - RTM_BASE] = { .dumpit   = fib6_rules_dump,   },
+#endif
 };
 
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
-- 
GitLab


From 0298f36a579b5bd7f10f6f6d57e5929977a865a1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 7 Aug 2006 21:56:52 -0700
Subject: [PATCH 0427/1063] [IPV4]: Kill fib4_rules_clean().

As noted by Adrian Bunk this function is totally unused.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 1 -
 net/ipv4/fib_rules.c | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 14c82e611c95f..adf73586bc050 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -254,7 +254,6 @@ extern struct fib_table *fib_hash_init(int id);
 extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
 extern void __init fib4_rules_init(void);
-extern void __exit fib4_rules_cleanup(void);
 
 #ifdef CONFIG_NET_CLS_ROUTE
 extern u32 fib_rules_tclass(struct fib_result *res);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 03d1e8a43a48a..d242e5291fccc 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -347,8 +347,3 @@ void __init fib4_rules_init(void)
 
 	fib_rules_register(&fib4_rules_ops);
 }
-
-void __exit fib4_rules_cleanup(void)
-{
-	fib_rules_unregister(&fib4_rules_ops);
-}
-- 
GitLab


From 1a01912ae0a5666c4c24eaae2b4821711e2ad79a Mon Sep 17 00:00:00 2001
From: Louis Nyffenegger <louis.nyffenegger@gmail.com>
Date: Tue, 8 Aug 2006 00:56:11 -0700
Subject: [PATCH 0428/1063] [INET]: Remove is_setbyuser patch

The value is_setbyuser from struct ip_options is never used and set
only one time (http://linux-net.osdl.org/index.php/TODO#IPV4).
This little patch removes it from the kernel source.

Signed-off-by: Louis Nyffenegger <louis.nyffenegger@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 4 +---
 net/ipv4/ip_options.c   | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index f4caad56cd035..f6242710f2ffe 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -27,7 +27,6 @@
 /** struct ip_options - IP Options
  *
  * @faddr - Saved first hop address
- * @is_setbyuser - Set by setsockopt?
  * @is_data - Options in __data, rather than skb
  * @is_strictroute - Strict source route
  * @srr_is_hit - Packet destination addr was our one
@@ -42,8 +41,7 @@ struct ip_options {
 	unsigned char	srr;
 	unsigned char	rr;
 	unsigned char	ts;
-	unsigned char	is_setbyuser:1,
-			is_data:1,
+	unsigned char	is_data:1,
 			is_strictroute:1,
 			srr_is_hit:1,
 			is_changed:1,
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index e0a93b4fa8cc7..e7437c0913266 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -525,7 +525,6 @@ static int ip_options_get_finish(struct ip_options **optp,
 		opt->__data[optlen++] = IPOPT_END;
 	opt->optlen = optlen;
 	opt->is_data = 1;
-	opt->is_setbyuser = 1;
 	if (optlen && ip_options_compile(opt, NULL)) {
 		kfree(opt);
 		return -EINVAL;
-- 
GitLab


From 99a92ff50424146ba01a222248fd47a1cd55b78f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 8 Aug 2006 02:18:10 -0700
Subject: [PATCH 0429/1063] [IPV4]: Uninline inet_lookup_listener

By modern standards this function is way too big to be inlined.  It's
even bigger than __inet_lookup_listener :)

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 33 ++-------------------------------
 net/ipv4/inet_hashtables.c    | 35 ++++++++++++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 98e0bb3014fea..bd513f3b9c7ee 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -271,39 +271,10 @@ static inline int inet_iif(const struct sk_buff *skb)
 	return ((struct rtable *)skb->dst)->rt_iif;
 }
 
-extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
-					   const u32 daddr,
-					   const unsigned short hnum,
-					   const int dif);
-
-/* Optimize the common listener case. */
-static inline struct sock *
+extern struct sock *
 		inet_lookup_listener(struct inet_hashinfo *hashinfo,
 				     const u32 daddr,
-				     const unsigned short hnum, const int dif)
-{
-	struct sock *sk = NULL;
-	const struct hlist_head *head;
-
-	read_lock(&hashinfo->lhash_lock);
-	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
-	if (!hlist_empty(head)) {
-		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
-
-		if (inet->num == hnum && !sk->sk_node.next &&
-		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
-		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
-		    !sk->sk_bound_dev_if)
-			goto sherry_cache;
-		sk = __inet_lookup_listener(head, daddr, hnum, dif);
-	}
-	if (sk) {
-sherry_cache:
-		sock_hold(sk);
-	}
-	read_unlock(&hashinfo->lhash_lock);
-	return sk;
-}
+				     const unsigned short hnum, const int dif);
 
 /* Socket demux engine toys. */
 #ifdef __BIG_ENDIAN
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 95fac5532994d..bfc39066e7307 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -124,8 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock);
  * remote address for the connection. So always assume those are both
  * wildcarded during the search since they can never be otherwise.
  */
-struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
-				    const unsigned short hnum, const int dif)
+static struct sock *__inet_lookup_listener(const struct hlist_head *head,
+					   const u32 daddr,
+					   const unsigned short hnum,
+					   const int dif)
 {
 	struct sock *result = NULL, *sk;
 	const struct hlist_node *node;
@@ -159,7 +161,34 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
 	return result;
 }
 
-EXPORT_SYMBOL_GPL(__inet_lookup_listener);
+/* Optimize the common listener case. */
+struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
+				  const u32 daddr, const unsigned short hnum,
+				  const int dif)
+{
+	struct sock *sk = NULL;
+	const struct hlist_head *head;
+
+	read_lock(&hashinfo->lhash_lock);
+	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+	if (!hlist_empty(head)) {
+		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+
+		if (inet->num == hnum && !sk->sk_node.next &&
+		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
+		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+		    !sk->sk_bound_dev_if)
+			goto sherry_cache;
+		sk = __inet_lookup_listener(head, daddr, hnum, dif);
+	}
+	if (sk) {
+sherry_cache:
+		sock_hold(sk);
+	}
+	read_unlock(&hashinfo->lhash_lock);
+	return sk;
+}
+EXPORT_SYMBOL_GPL(inet_lookup_listener);
 
 /* called with local bh disabled */
 static int __inet_check_established(struct inet_timewait_death_row *death_row,
-- 
GitLab


From b14295532421c40f82ee099fdbd3d011f022e756 Mon Sep 17 00:00:00 2001
From: Ville Nuorvala <vnuorval@tcs.hut.fi>
Date: Tue, 8 Aug 2006 16:44:17 -0700
Subject: [PATCH 0430/1063] [IPV6]: Make sure fib6_rule_lookup doesn't return
 NULL

The callers of fib6_rule_lookup don't expect it to return NULL,
therefore it must return ip6_null_entry whenever fib_rule_lookup fails.

Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index bf9bba83b852e..22a2fdb098319 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -63,7 +63,11 @@ struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
 	if (arg.rule)
 		fib_rule_put(arg.rule);
 
-	return (struct dst_entry *) arg.result;
+	if (arg.result)
+		return (struct dst_entry *) arg.result;
+
+	dst_hold(&ip6_null_entry.u.dst);
+	return &ip6_null_entry.u.dst;
 }
 
 static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
-- 
GitLab


From 832b4c5e184391773e462653aa862a8cab71f38d Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 29 Aug 2006 16:48:09 -0700
Subject: [PATCH 0431/1063] [IPV4] fib: convert reader/writer to spinlock

Ther is no point in using a more expensive reader/writer lock
for a low contention lock like the fib_info_lock. The only
reader case is in handling route redirects.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 51738000f3dc4..38bca473c7e29 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -49,7 +49,7 @@
 
 #define FSprintk(a...)
 
-static DEFINE_RWLOCK(fib_info_lock);
+static DEFINE_SPINLOCK(fib_info_lock);
 static struct hlist_head *fib_info_hash;
 static struct hlist_head *fib_info_laddrhash;
 static unsigned int fib_hash_size;
@@ -159,7 +159,7 @@ void free_fib_info(struct fib_info *fi)
 
 void fib_release_info(struct fib_info *fi)
 {
-	write_lock_bh(&fib_info_lock);
+	spin_lock_bh(&fib_info_lock);
 	if (fi && --fi->fib_treeref == 0) {
 		hlist_del(&fi->fib_hash);
 		if (fi->fib_prefsrc)
@@ -172,7 +172,7 @@ void fib_release_info(struct fib_info *fi)
 		fi->fib_dead = 1;
 		fib_info_put(fi);
 	}
-	write_unlock_bh(&fib_info_lock);
+	spin_unlock_bh(&fib_info_lock);
 }
 
 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
@@ -254,7 +254,7 @@ int ip_fib_check_default(u32 gw, struct net_device *dev)
 	struct fib_nh *nh;
 	unsigned int hash;
 
-	read_lock(&fib_info_lock);
+	spin_lock(&fib_info_lock);
 
 	hash = fib_devindex_hashfn(dev->ifindex);
 	head = &fib_info_devhash[hash];
@@ -262,12 +262,12 @@ int ip_fib_check_default(u32 gw, struct net_device *dev)
 		if (nh->nh_dev == dev &&
 		    nh->nh_gw == gw &&
 		    !(nh->nh_flags&RTNH_F_DEAD)) {
-			read_unlock(&fib_info_lock);
+			spin_unlock(&fib_info_lock);
 			return 0;
 		}
 	}
 
-	read_unlock(&fib_info_lock);
+	spin_unlock(&fib_info_lock);
 
 	return -1;
 }
@@ -598,7 +598,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
 	unsigned int old_size = fib_hash_size;
 	unsigned int i, bytes;
 
-	write_lock_bh(&fib_info_lock);
+	spin_lock_bh(&fib_info_lock);
 	old_info_hash = fib_info_hash;
 	old_laddrhash = fib_info_laddrhash;
 	fib_hash_size = new_size;
@@ -639,7 +639,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
 	}
 	fib_info_laddrhash = new_laddrhash;
 
-	write_unlock_bh(&fib_info_lock);
+	spin_unlock_bh(&fib_info_lock);
 
 	bytes = old_size * sizeof(struct hlist_head *);
 	fib_hash_free(old_info_hash, bytes);
@@ -820,7 +820,7 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
 
 	fi->fib_treeref++;
 	atomic_inc(&fi->fib_clntref);
-	write_lock_bh(&fib_info_lock);
+	spin_lock_bh(&fib_info_lock);
 	hlist_add_head(&fi->fib_hash,
 		       &fib_info_hash[fib_info_hashfn(fi)]);
 	if (fi->fib_prefsrc) {
@@ -839,7 +839,7 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
 		head = &fib_info_devhash[hash];
 		hlist_add_head(&nh->nh_hash, head);
 	} endfor_nexthops(fi)
-	write_unlock_bh(&fib_info_lock);
+	spin_unlock_bh(&fib_info_lock);
 	return fi;
 
 err_inval:
-- 
GitLab


From 8f491069b40be5d627007a343f99759e9da6a178 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 9 Aug 2006 15:47:12 -0700
Subject: [PATCH 0432/1063] [IPV4]: Use network-order dport for all visible
 inet_lookup_*

Right now most inet_lookup_* functions take a host-order hnum instead
of a network-order dport because that's how it is represented
internally.

This means that users of these functions have to be careful about
using the right byte-order.  To add more confusion, inet_lookup takes
a network-order dport unlike all other functions.

So this patch changes all visible inet_lookup functions to take a
dport and move all dport->hnum conversion inside them.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 31 ++++++++++++++++++++++++-------
 net/dccp/ipv4.c               | 10 +++++-----
 net/ipv4/inet_hashtables.c    | 18 +++++++++---------
 net/ipv4/tcp_ipv4.c           | 10 +++++-----
 4 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index bd513f3b9c7ee..b4491c9e2a5a0 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -271,10 +271,16 @@ static inline int inet_iif(const struct sk_buff *skb)
 	return ((struct rtable *)skb->dst)->rt_iif;
 }
 
-extern struct sock *
-		inet_lookup_listener(struct inet_hashinfo *hashinfo,
-				     const u32 daddr,
-				     const unsigned short hnum, const int dif);
+extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
+					   const u32 daddr,
+					   const unsigned short hnum,
+					   const int dif);
+
+static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
+						u32 daddr, u16 dport, int dif)
+{
+	return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif);
+}
 
 /* Socket demux engine toys. */
 #ifdef __BIG_ENDIAN
@@ -362,14 +368,25 @@ static inline struct sock *
 	goto out;
 }
 
+static inline struct sock *
+	inet_lookup_established(struct inet_hashinfo *hashinfo,
+				const u32 saddr, const u16 sport,
+				const u32 daddr, const u16 dport,
+				const int dif)
+{
+	return __inet_lookup_established(hashinfo, saddr, sport, daddr,
+					 ntohs(dport), dif);
+}
+
 static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo,
 					 const u32 saddr, const u16 sport,
-					 const u32 daddr, const u16 hnum,
+					 const u32 daddr, const u16 dport,
 					 const int dif)
 {
+	u16 hnum = ntohs(dport);
 	struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr,
 						    hnum, dif);
-	return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif);
+	return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif);
 }
 
 static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
@@ -380,7 +397,7 @@ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
 	struct sock *sk;
 
 	local_bh_disable();
-	sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+	sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif);
 	local_bh_enable();
 
 	return sk;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 171d363876eeb..9a1a76a7dc41f 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -608,10 +608,10 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 	if (req != NULL)
 		return dccp_check_req(sk, skb, req, prev);
 
-	nsk = __inet_lookup_established(&dccp_hashinfo,
-					iph->saddr, dh->dccph_sport,
-					iph->daddr, ntohs(dh->dccph_dport),
-					inet_iif(skb));
+	nsk = inet_lookup_established(&dccp_hashinfo,
+				      iph->saddr, dh->dccph_sport,
+				      iph->daddr, dh->dccph_dport,
+				      inet_iif(skb));
 	if (nsk != NULL) {
 		if (nsk->sk_state != DCCP_TIME_WAIT) {
 			bh_lock_sock(nsk);
@@ -925,7 +925,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 	 * 	Look up flow ID in table and get corresponding socket */
 	sk = __inet_lookup(&dccp_hashinfo,
 			   skb->nh.iph->saddr, dh->dccph_sport,
-			   skb->nh.iph->daddr, ntohs(dh->dccph_dport),
+			   skb->nh.iph->daddr, dh->dccph_dport,
 			   inet_iif(skb));
 
 	/* 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index bfc39066e7307..fb296c9a7f3fe 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -124,10 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock);
  * remote address for the connection. So always assume those are both
  * wildcarded during the search since they can never be otherwise.
  */
-static struct sock *__inet_lookup_listener(const struct hlist_head *head,
-					   const u32 daddr,
-					   const unsigned short hnum,
-					   const int dif)
+static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
+					      const u32 daddr,
+					      const unsigned short hnum,
+					      const int dif)
 {
 	struct sock *result = NULL, *sk;
 	const struct hlist_node *node;
@@ -162,9 +162,9 @@ static struct sock *__inet_lookup_listener(const struct hlist_head *head,
 }
 
 /* Optimize the common listener case. */
-struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
-				  const u32 daddr, const unsigned short hnum,
-				  const int dif)
+struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
+				    const u32 daddr, const unsigned short hnum,
+				    const int dif)
 {
 	struct sock *sk = NULL;
 	const struct hlist_head *head;
@@ -179,7 +179,7 @@ struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
 		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
 		    !sk->sk_bound_dev_if)
 			goto sherry_cache;
-		sk = __inet_lookup_listener(head, daddr, hnum, dif);
+		sk = inet_lookup_listener_slow(head, daddr, hnum, dif);
 	}
 	if (sk) {
 sherry_cache:
@@ -188,7 +188,7 @@ struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
 	read_unlock(&hashinfo->lhash_lock);
 	return sk;
 }
-EXPORT_SYMBOL_GPL(inet_lookup_listener);
+EXPORT_SYMBOL_GPL(__inet_lookup_listener);
 
 /* called with local bh disabled */
 static int __inet_check_established(struct inet_timewait_death_row *death_row,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b2aa512a30e9d..2973dee0a489e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -951,9 +951,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 	if (req)
 		return tcp_check_req(sk, skb, req, prev);
 
-	nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
-					th->source, skb->nh.iph->daddr,
-					ntohs(th->dest), inet_iif(skb));
+	nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
+				      th->source, skb->nh.iph->daddr,
+				      th->dest, inet_iif(skb));
 
 	if (nsk) {
 		if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1090,7 +1090,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->sacked	 = 0;
 
 	sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
-			   skb->nh.iph->daddr, ntohs(th->dest),
+			   skb->nh.iph->daddr, th->dest,
 			   inet_iif(skb));
 
 	if (!sk)
@@ -1168,7 +1168,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	case TCP_TW_SYN: {
 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
 							skb->nh.iph->daddr,
-							ntohs(th->dest),
+							th->dest,
 							inet_iif(skb));
 		if (sk2) {
 			inet_twsk_deschedule((struct inet_timewait_sock *)sk,
-- 
GitLab


From a8731cbf61c8768ea129780b70dc7dfc6795aad4 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <steve@chygwyn.com>
Date: Wed, 9 Aug 2006 15:56:46 -0700
Subject: [PATCH 0433/1063] [DECNET]: Covert rules to use generic code

This patch converts the DECnet rules code to use the generic
rules system created by Thomas Graf <tgraf@suug.ch>.

Signed-off-by: Steven Whitehouse <steve@chygwyn.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |   3 +-
 include/net/dn_fib.h      |   8 +-
 net/decnet/Kconfig        |   1 +
 net/decnet/af_decnet.c    |   1 +
 net/decnet/dn_dev.c       |   3 +-
 net/decnet/dn_fib.c       |   1 +
 net/decnet/dn_route.c     |   3 +-
 net/decnet/dn_rules.c     | 494 ++++++++++++++------------------------
 net/decnet/dn_table.c     |   1 +
 9 files changed, 196 insertions(+), 319 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 784a1a29490ec..0aaffa2ae666e 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -534,7 +534,8 @@ enum rtnetlink_groups {
 	RTNLGRP_NOP2,
 	RTNLGRP_DECnet_ROUTE,
 #define RTNLGRP_DECnet_ROUTE	RTNLGRP_DECnet_ROUTE
-	RTNLGRP_NOP3,
+	RTNLGRP_DECnet_RULE,
+#define RTNLGRP_DECnet_RULE	RTNLGRP_DECnet_RULE
 	RTNLGRP_NOP4,
 	RTNLGRP_IPV6_PREFIX,
 #define RTNLGRP_IPV6_PREFIX	RTNLGRP_IPV6_PREFIX
diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
index a15dcf0d5c1e6..32bc8ce5c5ce8 100644
--- a/include/net/dn_fib.h
+++ b/include/net/dn_fib.h
@@ -22,7 +22,7 @@ struct dn_kern_rta
 };
 
 struct dn_fib_res {
-	struct dn_fib_rule *r;
+	struct fib_rule *r;
 	struct dn_fib_info *fi;
 	unsigned char prefixlen;
 	unsigned char nh_sel;
@@ -147,10 +147,8 @@ extern void dn_fib_table_cleanup(void);
  */
 extern void dn_fib_rules_init(void);
 extern void dn_fib_rules_cleanup(void);
-extern void dn_fib_rule_put(struct dn_fib_rule *);
-extern __le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags);
 extern unsigned dnet_addr_type(__le16 addr);
-extern int dn_fib_lookup(const struct flowi *fl, struct dn_fib_res *res);
+extern int dn_fib_lookup(struct flowi *fl, struct dn_fib_res *res);
 
 /*
  * rtnetlink interface
@@ -176,7 +174,7 @@ static inline void dn_fib_res_put(struct dn_fib_res *res)
 	if (res->fi)
 		dn_fib_info_put(res->fi);
 	if (res->r)
-		dn_fib_rule_put(res->r);
+		fib_rule_put(res->r);
 }
 
 extern struct dn_fib_table *dn_fib_tables[];
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 92f2ec46fd224..36e72cb145b04 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -27,6 +27,7 @@ config DECNET
 config DECNET_ROUTER
 	bool "DECnet: router support (EXPERIMENTAL)"
 	depends on DECNET && EXPERIMENTAL
+	select FIB_RULES
 	---help---
 	  Add support for turning your DECnet Endnode into a level 1 or 2
 	  router.  This is an experimental, but functional option.  If you
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 5486247735f6a..70e027375682c 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -130,6 +130,7 @@ Version 0.0.6    2.1.110   07-aug-98   Eduardo Marcelo Serrat
 #include <linux/poll.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_nsp.h>
 #include <net/dn_dev.h>
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 632c5a90b5895..88ea7a13bb242 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -46,6 +46,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_dev.h>
 #include <net/dn_route.h>
@@ -1418,8 +1419,6 @@ static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] =
 	[RTM_DELROUTE - RTM_BASE] = { .doit	= dn_fib_rtm_delroute,	},
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
 				      .dumpit	= dn_fib_dump,		},
-	[RTM_NEWRULE  - RTM_BASE] = { .doit	= dn_fib_rtm_newrule,	},
-	[RTM_DELRULE  - RTM_BASE] = { .doit	= dn_fib_rtm_delrule,	},
 	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= dn_fib_dump_rules,	},
 #else
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index fa20e2efcfc1b..846df3954a633 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -34,6 +34,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_route.h>
 #include <net/dn_fib.h>
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 743e9fcf7c5ae..5e6f4616ca10a 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -80,6 +80,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_dev.h>
 #include <net/dn_nsp.h>
@@ -1284,7 +1285,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
 		dev_hold(out_dev);
 
 		if (res.r)
-			src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags);
+			src_map = fl.fld_src; /* no NAT support for now */
 
 		gateway = DN_FIB_RES_GW(res);
 		if (res.type == RTN_NAT) {
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 6986be754ef28..096f1273e714f 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -11,259 +11,198 @@
  *
  *
  * Changes:
+ *              Steve Whitehouse <steve@chygwyn.com>
+ *              Updated for Thomas Graf's generic rules
  *
  */
-#include <linux/string.h>
 #include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
 #include <linux/init.h>
-#include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
 #include <linux/netdevice.h>
-#include <linux/timer.h>
 #include <linux/spinlock.h>
-#include <linux/in_route.h>
 #include <linux/list.h>
 #include <linux/rcupdate.h>
-#include <asm/atomic.h>
-#include <asm/uaccess.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_fib.h>
 #include <net/dn_neigh.h>
 #include <net/dn_dev.h>
 
+static struct fib_rules_ops dn_fib_rules_ops;
+
 struct dn_fib_rule
 {
-	struct hlist_node	r_hlist;
-	atomic_t		r_clntref;
-	u32			r_preference;
-	unsigned char		r_table;
-	unsigned char		r_action;
-	unsigned char		r_dst_len;
-	unsigned char		r_src_len;
-	__le16			r_src;
-	__le16			r_srcmask;
-	__le16			r_dst;
-	__le16			r_dstmask;
-	__le16			r_srcmap;
-	u8			r_flags;
+	struct fib_rule		common;
+	unsigned char		dst_len;
+	unsigned char		src_len;
+	__le16			src;
+	__le16			srcmask;
+	__le16			dst;
+	__le16			dstmask;
+	__le16			srcmap;
+	u8			flags;
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-	u32			r_fwmark;
+	u32			fwmark;
 #endif
-	int			r_ifindex;
-	char			r_ifname[IFNAMSIZ];
-	int			r_dead;
-	struct rcu_head		rcu;
 };
 
 static struct dn_fib_rule default_rule = {
-	.r_clntref =		ATOMIC_INIT(2),
-	.r_preference =		0x7fff,
-	.r_table =		RT_TABLE_MAIN,
-	.r_action =		RTN_UNICAST
+	.common = {
+		.refcnt =		ATOMIC_INIT(2),
+		.pref =			0x7fff,
+		.table =		RT_TABLE_MAIN,
+		.action =		FR_ACT_TO_TBL,
+	},
 };
 
-static struct hlist_head dn_fib_rules;
+static LIST_HEAD(dn_fib_rules);
+
 
-int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res)
 {
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct dn_fib_rule *r;
-	struct hlist_node *node;
-	int err = -ESRCH;
-
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) &&
-			rtm->rtm_src_len == r->r_src_len &&
-			rtm->rtm_dst_len == r->r_dst_len &&
-			(!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) &&
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
-			(!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
-#endif
-			(!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
-			(!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
-			(!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
-			(!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
-
-			err = -EPERM;
-			if (r == &default_rule)
-				break;
-
-			hlist_del_rcu(&r->r_hlist);
-			r->r_dead = 1;
-			dn_fib_rule_put(r);
-			err = 0;
-			break;
-		}
-	}
+	struct fib_lookup_arg arg = {
+		.result = res,
+	};
+	int err;
+
+	err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg);
+	res->r = arg.rule;
 
 	return err;
 }
 
-static inline void dn_fib_rule_put_rcu(struct rcu_head *head)
+int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, int flags,
+		       struct fib_lookup_arg *arg)
 {
-	struct dn_fib_rule *r = container_of(head, struct dn_fib_rule, rcu);
-	kfree(r);
-}
+	int err = -EAGAIN;
+	struct dn_fib_table *tbl;
 
-void dn_fib_rule_put(struct dn_fib_rule *r)
-{
-	if (atomic_dec_and_test(&r->r_clntref)) {
-		if (r->r_dead)
-			call_rcu(&r->rcu, dn_fib_rule_put_rcu);
-		else
-			printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n");
+	switch(rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+
+	case FR_ACT_UNREACHABLE:
+		err = -ENETUNREACH;
+		goto errout;
+
+	case FR_ACT_PROHIBIT:
+		err = -EACCES;
+		goto errout;
+
+	case FR_ACT_BLACKHOLE:
+	default:
+		err = -EINVAL;
+		goto errout;
 	}
+
+	tbl = dn_fib_get_table(rule->table, 0);
+	if (tbl == NULL)
+		goto errout;
+
+	err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result);
+	if (err > 0)
+		err = -EAGAIN;
+errout:
+	return err;
 }
 
+static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
+	[FRA_IFNAME]	= { .type = NLA_STRING },
+	[FRA_PRIORITY]	= { .type = NLA_U32 },
+	[FRA_SRC]	= { .type = NLA_U16 },
+	[FRA_DST]	= { .type = NLA_U16 },
+	[FRA_FWMARK]	= { .type = NLA_U32 },
+};
 
-int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 {
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct dn_fib_rule *r, *new_r, *last = NULL;
-	struct hlist_node *node = NULL;
-	unsigned char table_id;
-
-	if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16)
-		return -EINVAL;
-
-	if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
-		return -EINVAL;
-
-	if (rtm->rtm_type == RTN_NAT)
-		return -EINVAL;
-
-	table_id = rtm->rtm_table;
-	if (table_id == RT_TABLE_UNSPEC) {
-		struct dn_fib_table *tb;
-		if (rtm->rtm_type == RTN_UNICAST) {
-			if ((tb = dn_fib_empty_table()) == NULL)
-				return -ENOBUFS;
-			table_id = tb->n;
-		}
-	}
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+	u16 daddr = fl->fld_dst;
+	u16 saddr = fl->fld_src;
+
+	if (((saddr ^ r->src) & r->srcmask) ||
+	    ((daddr ^ r->dst) & r->dstmask))
+		return 0;
 
-	new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
-	if (!new_r)
-		return -ENOMEM;
-
-	if (rta[RTA_SRC-1])
-		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2);
-	if (rta[RTA_DST-1])
-		memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2);
-	if (rta[RTA_GATEWAY-1])
-		memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2);
-	new_r->r_src_len = rtm->rtm_src_len;
-	new_r->r_dst_len = rtm->rtm_dst_len;
-	new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len);
-	new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len);
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-	if (rta[RTA_PROTOINFO-1])
-		memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
+	if (r->fwmark && (r->fwmark != fl->fld_fwmark))
+		return 0;
 #endif
-	new_r->r_action = rtm->rtm_type;
-	new_r->r_flags = rtm->rtm_flags;
-	if (rta[RTA_PRIORITY-1])
-		memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
-	new_r->r_table = table_id;
-	if (rta[RTA_IIF-1]) {
-		struct net_device *dev;
-		rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
-		new_r->r_ifindex = -1;
-		dev = dev_get_by_name(new_r->r_ifname);
-		if (dev) {
-			new_r->r_ifindex = dev->ifindex;
-			dev_put(dev);
-		}
-	}
 
-	r = container_of(dn_fib_rules.first, struct dn_fib_rule, r_hlist);
-	if (!new_r->r_preference) {
-		if (r && r->r_hlist.next != NULL) {
-			r = container_of(r->r_hlist.next, struct dn_fib_rule, r_hlist);
-			if (r->r_preference)
-				new_r->r_preference = r->r_preference - 1;
+	return 1;
+}
+
+static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+				 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+				 struct nlattr **tb)
+{
+	int err = -EINVAL;
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+
+	if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos)
+		goto  errout;
+
+	if (rule->table == RT_TABLE_UNSPEC) {
+		if (rule->action == FR_ACT_TO_TBL) {
+			struct dn_fib_table *table;
+
+			table = dn_fib_empty_table();
+			if (table == NULL) {
+				err = -ENOBUFS;
+				goto errout;
+			}
+
+			rule->table = table->n;
 		}
 	}
 
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (r->r_preference > new_r->r_preference)
-			break;
-		last = r;
-	}
-	atomic_inc(&new_r->r_clntref);
+	if (tb[FRA_SRC])
+		r->src = nla_get_u16(tb[FRA_SRC]);
 
-	if (last)
-		hlist_add_after_rcu(&last->r_hlist, &new_r->r_hlist);
-	else
-		hlist_add_before_rcu(&new_r->r_hlist, &r->r_hlist);
-	return 0;
-}
+	if (tb[FRA_DST])
+		r->dst = nla_get_u16(tb[FRA_DST]);
 
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+	if (tb[FRA_FWMARK])
+		r->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+#endif
+
+	r->src_len = frh->src_len;
+	r->srcmask = dnet_make_mask(r->src_len);
+	r->dst_len = frh->dst_len;
+	r->dstmask = dnet_make_mask(r->dst_len);
+	err = 0;
+errout:
+	return err;
+}
 
-int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res)
+static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			       struct nlattr **tb)
 {
-	struct dn_fib_rule *r, *policy;
-	struct dn_fib_table *tb;
-	__le16 saddr = flp->fld_src;
-	__le16 daddr = flp->fld_dst;
-	struct hlist_node *node;
-	int err;
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+
+	if (frh->src_len && (r->src_len != frh->src_len))
+		return 0;
 
-	rcu_read_lock();
+	if (frh->dst_len && (r->dst_len != frh->dst_len))
+		return 0;
 
-	hlist_for_each_entry_rcu(r, node, &dn_fib_rules, r_hlist) {
-		if (((saddr^r->r_src) & r->r_srcmask) ||
-		    ((daddr^r->r_dst) & r->r_dstmask) ||
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-		    (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) ||
+	if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+		return 0;
 #endif
-		    (r->r_ifindex && r->r_ifindex != flp->iif))
-			continue;
-
-		switch(r->r_action) {
-			case RTN_UNICAST:
-			case RTN_NAT:
-				policy = r;
-				break;
-			case RTN_UNREACHABLE:
-				rcu_read_unlock();
-				return -ENETUNREACH;
-			default:
-			case RTN_BLACKHOLE:
-				rcu_read_unlock();
-				return -EINVAL;
-			case RTN_PROHIBIT:
-				rcu_read_unlock();
-				return -EACCES;
-		}
 
-		if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL)
-			continue;
-		err = tb->lookup(tb, flp, res);
-		if (err == 0) {
-			res->r = policy;
-			if (policy)
-				atomic_inc(&policy->r_clntref);
-			rcu_read_unlock();
-			return 0;
-		}
-		if (err < 0 && err != -EAGAIN) {
-			rcu_read_unlock();
-			return err;
-		}
-	}
+	if (tb[FRA_SRC] && (r->src != nla_get_u32(tb[FRA_SRC])))
+		return 0;
+
+	if (tb[FRA_DST] && (r->dst != nla_get_u32(tb[FRA_DST])))
+		return 0;
 
-	rcu_read_unlock();
-	return -ESRCH;
+	return 1;
 }
 
 unsigned dnet_addr_type(__le16 addr)
@@ -284,142 +223,77 @@ unsigned dnet_addr_type(__le16 addr)
 	return ret;
 }
 
-__le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags)
+static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			    struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
 {
-	struct dn_fib_rule *r = res->r;
-
-	if (r->r_action == RTN_NAT) {
-		int addrtype = dnet_addr_type(r->r_srcmap);
-
-		if (addrtype == RTN_NAT) {
-			saddr = (saddr&~r->r_srcmask)|r->r_srcmap;
-			*flags |= RTCF_SNAT;
-		} else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) {
-			saddr = r->r_srcmap;
-			*flags |= RTCF_MASQ;
-		}
-	}
-	return saddr;
-}
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
 
-static void dn_fib_rules_detach(struct net_device *dev)
-{
-	struct hlist_node *node;
-	struct dn_fib_rule *r;
+	frh->family = AF_DECnet;
+	frh->dst_len = r->dst_len;
+	frh->src_len = r->src_len;
+	frh->tos = 0;
 
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (r->r_ifindex == dev->ifindex)
-			r->r_ifindex = -1;
-	}
-}
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+	if (r->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark);
+#endif
+	if (r->dst_len)
+		NLA_PUT_U16(skb, FRA_DST, r->dst);
+	if (r->src_len)
+		NLA_PUT_U16(skb, FRA_SRC, r->src);
 
-static void dn_fib_rules_attach(struct net_device *dev)
-{
-	struct hlist_node *node;
-	struct dn_fib_rule *r;
+	return 0;
 
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0)
-			r->r_ifindex = dev->ifindex;
-	}
+nla_put_failure:
+	return -ENOBUFS;
 }
 
-static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
+static u32 dn_fib_rule_default_pref(void)
 {
-	struct net_device *dev = ptr;
-
-	switch(event) {
-		case NETDEV_UNREGISTER:
-			dn_fib_rules_detach(dev);
-			dn_fib_sync_down(0, dev, 1);
-		case NETDEV_REGISTER:
-			dn_fib_rules_attach(dev);
-			dn_fib_sync_up(dev);
+	struct list_head *pos;
+	struct fib_rule *rule;
+
+	if (!list_empty(&dn_fib_rules)) {
+		pos = dn_fib_rules.next;
+		if (pos->next != &dn_fib_rules) {
+			rule = list_entry(pos->next, struct fib_rule, list);
+			if (rule->pref)
+				return rule->pref - 1;
+		}
 	}
 
-	return NOTIFY_DONE;
-}
-
-
-static struct notifier_block dn_fib_rules_notifier = {
-	.notifier_call =	dn_fib_rules_event,
-};
-
-static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r,
-			    struct netlink_callback *cb, unsigned int flags)
-{
-	struct rtmsg *rtm;
-	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
-
-
-	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
-	rtm->rtm_family = AF_DECnet;
-	rtm->rtm_dst_len = r->r_dst_len;
-	rtm->rtm_src_len = r->r_src_len;
-	rtm->rtm_tos = 0;
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
-	if (r->r_fwmark)
-		RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
-#endif
-	rtm->rtm_table = r->r_table;
-	rtm->rtm_protocol = 0;
-	rtm->rtm_scope = 0;
-	rtm->rtm_type = r->r_action;
-	rtm->rtm_flags = r->r_flags;
-
-	if (r->r_dst_len)
-		RTA_PUT(skb, RTA_DST, 2, &r->r_dst);
-	if (r->r_src_len)
-		RTA_PUT(skb, RTA_SRC, 2, &r->r_src);
-	if (r->r_ifname[0])
-		RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
-	if (r->r_preference)
-		RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
-	if (r->r_srcmap)
-		RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return 0;
 }
 
 int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int idx = 0;
-	int s_idx = cb->args[0];
-	struct dn_fib_rule *r;
-	struct hlist_node *node;
-
-	rcu_read_lock();
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (idx < s_idx)
-			goto next;
-		if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
-			break;
-next:
-		idx++;
-	}
-	rcu_read_unlock();
-	cb->args[0] = idx;
-
-	return skb->len;
+	return fib_rules_dump(skb, cb, AF_DECnet);
 }
 
+static struct fib_rules_ops dn_fib_rules_ops = {
+	.family		= AF_DECnet,
+	.rule_size	= sizeof(struct dn_fib_rule),
+	.action		= dn_fib_rule_action,
+	.match		= dn_fib_rule_match,
+	.configure	= dn_fib_rule_configure,
+	.compare	= dn_fib_rule_compare,
+	.fill		= dn_fib_rule_fill,
+	.default_pref	= dn_fib_rule_default_pref,
+	.nlgroup	= RTNLGRP_DECnet_RULE,
+	.policy		= dn_fib_rule_policy,
+	.rules_list	= &dn_fib_rules,
+	.owner		= THIS_MODULE,
+};
+
 void __init dn_fib_rules_init(void)
 {
-	INIT_HLIST_HEAD(&dn_fib_rules);
-	hlist_add_head(&default_rule.r_hlist, &dn_fib_rules);
-	register_netdevice_notifier(&dn_fib_rules_notifier);
+	list_add_tail(&default_rule.common.list, &dn_fib_rules);
+	fib_rules_register(&dn_fib_rules_ops);
 }
 
 void __exit dn_fib_rules_cleanup(void)
 {
-	unregister_netdevice_notifier(&dn_fib_rules_notifier);
+	fib_rules_unregister(&dn_fib_rules_ops);
 }
 
 
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index e926c952e3632..2e01b67398c8a 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -30,6 +30,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_route.h>
 #include <net/dn_fib.h>
-- 
GitLab


From a22ec367b08455f95fa0096ce1999950b6f6911c Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <steve@chygwyn.com>
Date: Wed, 9 Aug 2006 16:00:57 -0700
Subject: [PATCH 0434/1063] [DECNET]: Convert rwlock to spinlock

As per Stephen Hemminger's recent patch to ipv4/fib_semantics.c this
is the same change but for DECnet.

Signed-off-by: Steven Whitehouse <steve@chygwyn.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_fib.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 846df3954a633..ed5fb5c3eab59 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -59,7 +59,7 @@ extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
 static DEFINE_SPINLOCK(dn_fib_multipath_lock);
 static struct dn_fib_info *dn_fib_info_list;
-static DEFINE_RWLOCK(dn_fib_info_lock);
+static DEFINE_SPINLOCK(dn_fib_info_lock);
 
 static struct
 {
@@ -97,7 +97,7 @@ void dn_fib_free_info(struct dn_fib_info *fi)
 
 void dn_fib_release_info(struct dn_fib_info *fi)
 {
-	write_lock(&dn_fib_info_lock);
+	spin_lock(&dn_fib_info_lock);
 	if (fi && --fi->fib_treeref == 0) {
 		if (fi->fib_next)
 			fi->fib_next->fib_prev = fi->fib_prev;
@@ -108,7 +108,7 @@ void dn_fib_release_info(struct dn_fib_info *fi)
 		fi->fib_dead = 1;
 		dn_fib_info_put(fi);
 	}
-	write_unlock(&dn_fib_info_lock);
+	spin_unlock(&dn_fib_info_lock);
 }
 
 static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi)
@@ -379,13 +379,13 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
 
 	fi->fib_treeref++;
 	atomic_inc(&fi->fib_clntref);
-	write_lock(&dn_fib_info_lock);
+	spin_lock(&dn_fib_info_lock);
 	fi->fib_next = dn_fib_info_list;
 	fi->fib_prev = NULL;
 	if (dn_fib_info_list)
 		dn_fib_info_list->fib_prev = fi;
 	dn_fib_info_list = fi;
-	write_unlock(&dn_fib_info_lock);
+	spin_unlock(&dn_fib_info_lock);
 	return fi;
 
 err_inval:
-- 
GitLab


From 53fad3cbff120d8987f377eff374cf4db4ecb177 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Wed, 9 Aug 2006 17:03:17 -0700
Subject: [PATCH 0435/1063] [SUNRPC]: Remove the unnecessary check for highmem
 in xs_sendpages().

Just call kernel_sendpage() directly.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/xprtsock.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8b319e375049b..897bdd9823155 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -174,7 +174,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a
 	struct page **ppage = xdr->pages;
 	unsigned int len, pglen = xdr->page_len;
 	int err, ret = 0;
-	ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
 
 	if (unlikely(!sock))
 		return -ENOTCONN;
@@ -207,7 +206,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a
 		base &= ~PAGE_CACHE_MASK;
 	}
 
-	sendpage = kernel_sendpage;
 	do {
 		int flags = XS_SENDMSG_FLAGS;
 
@@ -220,10 +218,7 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a
 		if (pglen != len || xdr->tail[0].iov_len != 0)
 			flags |= MSG_MORE;
 
-		/* Hmm... We might be dealing with highmem pages */
-		if (PageHighMem(*ppage))
-			sendpage = sock_no_sendpage;
-		err = sendpage(sock, *ppage, base, len, flags);
+		err = kernel_sendpage(sock, *ppage, base, len, flags);
 		if (ret == 0)
 			ret = err;
 		else if (err > 0)
-- 
GitLab


From 89bddce58e85bb18b13f5077e8349ba9a3ee2597 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Fri, 1 Sep 2006 00:19:31 -0700
Subject: [PATCH 0436/1063] [NET] socket: code style cleanup

Make socket.c conform to current style:
	* run through Lindent
	* get rid of unneeded casts
	* split assignment and comparsion where possible

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 729 +++++++++++++++++++++++++++------------------------
 1 file changed, 388 insertions(+), 341 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 2eaebf934a1a5..156f2efa4e4a8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -42,7 +42,7 @@
  *		Andi Kleen	:	Some small cleanups, optimizations,
  *					and fixed a copy_from_user() bug.
  *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)
- *		Tigran Aivazian	:	Made listen(2) backlog sanity checks 
+ *		Tigran Aivazian	:	Made listen(2) backlog sanity checks
  *					protocol-independent
  *
  *
@@ -53,7 +53,7 @@
  *
  *
  *	This module is effectively the top level interface to the BSD socket
- *	paradigm. 
+ *	paradigm.
  *
  *	Based upon Swansea University Computer Society NET3.039
  */
@@ -96,25 +96,24 @@
 
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
 static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
-			 size_t size, loff_t pos);
+			     size_t size, loff_t pos);
 static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
-			  size_t size, loff_t pos);
-static int sock_mmap(struct file *file, struct vm_area_struct * vma);
+			      size_t size, loff_t pos);
+static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 
 static int sock_close(struct inode *inode, struct file *file);
 static unsigned int sock_poll(struct file *file,
 			      struct poll_table_struct *wait);
-static long sock_ioctl(struct file *file,
-		      unsigned int cmd, unsigned long arg);
+static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 #ifdef CONFIG_COMPAT
 static long compat_sock_ioctl(struct file *file,
-		      unsigned int cmd, unsigned long arg);
+			      unsigned int cmd, unsigned long arg);
 #endif
 static int sock_fasync(int fd, struct file *filp, int on);
 static ssize_t sock_readv(struct file *file, const struct iovec *vector,
 			  unsigned long count, loff_t *ppos);
 static ssize_t sock_writev(struct file *file, const struct iovec *vector,
-			  unsigned long count, loff_t *ppos);
+			   unsigned long count, loff_t *ppos);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
 
@@ -193,7 +192,6 @@ static __inline__ void net_family_read_unlock(void)
 #define net_family_read_unlock() do { } while(0)
 #endif
 
-
 /*
  *	Statistics counters of the socket lists
  */
@@ -201,19 +199,20 @@ static __inline__ void net_family_read_unlock(void)
 static DEFINE_PER_CPU(int, sockets_in_use) = 0;
 
 /*
- *	Support routines. Move socket addresses back and forth across the kernel/user
- *	divide and look after the messy bits.
+ * Support routines.
+ * Move socket addresses back and forth across the kernel/user
+ * divide and look after the messy bits.
  */
 
-#define MAX_SOCK_ADDR	128		/* 108 for Unix domain - 
+#define MAX_SOCK_ADDR	128		/* 108 for Unix domain -
 					   16 for IP, 16 for IPX,
 					   24 for IPv6,
-					   about 80 for AX.25 
+					   about 80 for AX.25
 					   must be at least one bigger than
 					   the AF_UNIX size (see net/unix/af_unix.c
-					   :unix_mkname()).  
+					   :unix_mkname()).
 					 */
-					 
+
 /**
  *	move_addr_to_kernel	-	copy a socket address into kernel space
  *	@uaddr: Address in user space
@@ -227,11 +226,11 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0;
 
 int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
 {
-	if(ulen<0||ulen>MAX_SOCK_ADDR)
+	if (ulen < 0 || ulen > MAX_SOCK_ADDR)
 		return -EINVAL;
-	if(ulen==0)
+	if (ulen == 0)
 		return 0;
-	if(copy_from_user(kaddr,uaddr,ulen))
+	if (copy_from_user(kaddr, uaddr, ulen))
 		return -EFAULT;
 	return audit_sockaddr(ulen, kaddr);
 }
@@ -252,44 +251,46 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
  *	length of the data is written over the length limit the user
  *	specified. Zero is returned for a success.
  */
- 
-int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen)
+
+int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
+		      int __user *ulen)
 {
 	int err;
 	int len;
 
-	if((err=get_user(len, ulen)))
+	err = get_user(len, ulen);
+	if (err)
 		return err;
-	if(len>klen)
-		len=klen;
-	if(len<0 || len> MAX_SOCK_ADDR)
+	if (len > klen)
+		len = klen;
+	if (len < 0 || len > MAX_SOCK_ADDR)
 		return -EINVAL;
-	if(len)
-	{
+	if (len) {
 		if (audit_sockaddr(klen, kaddr))
 			return -ENOMEM;
-		if(copy_to_user(uaddr,kaddr,len))
+		if (copy_to_user(uaddr, kaddr, len))
 			return -EFAULT;
 	}
 	/*
-	 *	"fromlen shall refer to the value before truncation.."
-	 *			1003.1g
+	 *      "fromlen shall refer to the value before truncation.."
+	 *                      1003.1g
 	 */
 	return __put_user(klen, ulen);
 }
 
 #define SOCKFS_MAGIC 0x534F434B
 
-static kmem_cache_t * sock_inode_cachep __read_mostly;
+static kmem_cache_t *sock_inode_cachep __read_mostly;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
 	struct socket_alloc *ei;
-	ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
+
+	ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
 	if (!ei)
 		return NULL;
 	init_waitqueue_head(&ei->socket.wait);
-	
+
 	ei->socket.fasync_list = NULL;
 	ei->socket.state = SS_UNCONNECTED;
 	ei->socket.flags = 0;
@@ -307,22 +308,25 @@ static void sock_destroy_inode(struct inode *inode)
 			container_of(inode, struct socket_alloc, vfs_inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 {
-	struct socket_alloc *ei = (struct socket_alloc *) foo;
+	struct socket_alloc *ei = (struct socket_alloc *)foo;
 
-	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR)
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
+	    == SLAB_CTOR_CONSTRUCTOR)
 		inode_init_once(&ei->vfs_inode);
 }
- 
+
 static int init_inodecache(void)
 {
 	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
-				sizeof(struct socket_alloc),
-				0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
-					SLAB_MEM_SPREAD),
-				init_once, NULL);
+					      sizeof(struct socket_alloc),
+					      0,
+					      (SLAB_HWCACHE_ALIGN |
+					       SLAB_RECLAIM_ACCOUNT |
+					       SLAB_MEM_SPREAD),
+					      init_once,
+					      NULL);
 	if (sock_inode_cachep == NULL)
 		return -ENOMEM;
 	return 0;
@@ -335,7 +339,8 @@ static struct super_operations sockfs_ops = {
 };
 
 static int sockfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+			 int flags, const char *dev_name, void *data,
+			 struct vfsmount *mnt)
 {
 	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
 			     mnt);
@@ -348,12 +353,13 @@ static struct file_system_type sock_fs_type = {
 	.get_sb =	sockfs_get_sb,
 	.kill_sb =	kill_anon_super,
 };
+
 static int sockfs_delete_dentry(struct dentry *dentry)
 {
 	return 1;
 }
 static struct dentry_operations sockfs_dentry_operations = {
-	.d_delete =	sockfs_delete_dentry,
+	.d_delete = sockfs_delete_dentry,
 };
 
 /*
@@ -477,10 +483,12 @@ struct socket *sockfd_lookup(int fd, int *err)
 	struct file *file;
 	struct socket *sock;
 
-	if (!(file = fget(fd))) {
+	file = fget(fd);
+	if (!file) {
 		*err = -EBADF;
 		return NULL;
 	}
+
 	sock = sock_from_file(file, err);
 	if (!sock)
 		fput(file);
@@ -505,7 +513,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 
 /**
  *	sock_alloc	-	allocate a socket
- *	
+ *
  *	Allocate a new inode and socket object. The two are bound together
  *	and initialised. The socket is then returned. If we are out of inodes
  *	NULL is returned.
@@ -513,8 +521,8 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 
 static struct socket *sock_alloc(void)
 {
-	struct inode * inode;
-	struct socket * sock;
+	struct inode *inode;
+	struct socket *sock;
 
 	inode = new_inode(sock_mnt->mnt_sb);
 	if (!inode)
@@ -522,7 +530,7 @@ static struct socket *sock_alloc(void)
 
 	sock = SOCKET_I(inode);
 
-	inode->i_mode = S_IFSOCK|S_IRWXUGO;
+	inode->i_mode = S_IFSOCK | S_IRWXUGO;
 	inode->i_uid = current->fsuid;
 	inode->i_gid = current->fsgid;
 
@@ -536,7 +544,7 @@ static struct socket *sock_alloc(void)
  *	a back door. Remember to keep it shut otherwise you'll let the
  *	creepy crawlies in.
  */
-  
+
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 {
 	return -ENXIO;
@@ -553,9 +561,9 @@ const struct file_operations bad_sock_fops = {
  *
  *	The socket is released from the protocol stack if it has a release
  *	callback, and the inode is then released if the socket is bound to
- *	an inode not a file. 
+ *	an inode not a file.
  */
- 
+
 void sock_release(struct socket *sock)
 {
 	if (sock->ops) {
@@ -575,10 +583,10 @@ void sock_release(struct socket *sock)
 		iput(SOCK_INODE(sock));
 		return;
 	}
-	sock->file=NULL;
+	sock->file = NULL;
 }
 
-static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 
+static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size)
 {
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
@@ -621,14 +629,14 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 	 * the following is safe, since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	msg->msg_iov = (struct iovec *)vec,
+	msg->msg_iov = (struct iovec *)vec;
 	msg->msg_iovlen = num;
 	result = sock_sendmsg(sock, msg, size);
 	set_fs(oldfs);
 	return result;
 }
 
-static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 
+static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size, int flags)
 {
 	int err;
@@ -647,14 +655,14 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
 }
 
-int sock_recvmsg(struct socket *sock, struct msghdr *msg, 
+int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 		 size_t size, int flags)
 {
 	struct kiocb iocb;
 	struct sock_iocb siocb;
 	int ret;
 
-        init_sync_kiocb(&iocb, NULL);
+	init_sync_kiocb(&iocb, NULL);
 	iocb.private = &siocb;
 	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
 	if (-EIOCBQUEUED == ret)
@@ -662,9 +670,8 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 	return ret;
 }
 
-int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 
-		   struct kvec *vec, size_t num,
-		   size_t size, int flags)
+int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
+		   struct kvec *vec, size_t num, size_t size, int flags)
 {
 	mm_segment_t oldfs = get_fs();
 	int result;
@@ -674,8 +681,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 	 * the following is safe, since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	msg->msg_iov = (struct iovec *)vec,
-	msg->msg_iovlen = num;
+	msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
 	result = sock_recvmsg(sock, msg, size, flags);
 	set_fs(oldfs);
 	return result;
@@ -702,7 +708,8 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
 }
 
 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
-		char __user *ubuf, size_t size, struct sock_iocb *siocb)
+					 char __user *ubuf, size_t size,
+					 struct sock_iocb *siocb)
 {
 	if (!is_sync_kiocb(iocb)) {
 		siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
@@ -720,20 +727,21 @@ static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
 }
 
 static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
-		struct file *file, struct iovec *iov, unsigned long nr_segs)
+			    struct file *file, struct iovec *iov,
+			    unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
 	size_t size = 0;
 	int i;
 
-        for (i = 0 ; i < nr_segs ; i++)
-                size += iov[i].iov_len;
+	for (i = 0; i < nr_segs; i++)
+		size += iov[i].iov_len;
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	msg->msg_iov = (struct iovec *) iov;
+	msg->msg_iov = (struct iovec *)iov;
 	msg->msg_iovlen = nr_segs;
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 
@@ -748,7 +756,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov,
 	struct msghdr msg;
 	int ret;
 
-        init_sync_kiocb(&iocb, NULL);
+	init_sync_kiocb(&iocb, NULL);
 	iocb.private = &siocb;
 
 	ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
@@ -758,7 +766,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov,
 }
 
 static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
-			 size_t count, loff_t pos)
+			     size_t count, loff_t pos)
 {
 	struct sock_iocb siocb, *x;
 
@@ -771,24 +779,25 @@ static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
 	if (!x)
 		return -ENOMEM;
 	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
-			&x->async_iov, 1);
+			    &x->async_iov, 1);
 }
 
 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
-		struct file *file, struct iovec *iov, unsigned long nr_segs)
+			     struct file *file, struct iovec *iov,
+			     unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
 	size_t size = 0;
 	int i;
 
-        for (i = 0 ; i < nr_segs ; i++)
-                size += iov[i].iov_len;
+	for (i = 0; i < nr_segs; i++)
+		size += iov[i].iov_len;
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	msg->msg_iov = (struct iovec *) iov;
+	msg->msg_iov = (struct iovec *)iov;
 	msg->msg_iovlen = nr_segs;
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 	if (sock->type == SOCK_SEQPACKET)
@@ -815,7 +824,7 @@ static ssize_t sock_writev(struct file *file, const struct iovec *iov,
 }
 
 static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
-			  size_t count, loff_t pos)
+			      size_t count, loff_t pos)
 {
 	struct sock_iocb siocb, *x;
 
@@ -829,46 +838,48 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
 		return -ENOMEM;
 
 	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
-			&x->async_iov, 1);
+			     &x->async_iov, 1);
 }
 
-
 /*
  * Atomic setting of ioctl hooks to avoid race
  * with module unload.
  */
 
 static DEFINE_MUTEX(br_ioctl_mutex);
-static int (*br_ioctl_hook)(unsigned int cmd, void __user *arg) = NULL;
+static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
 
-void brioctl_set(int (*hook)(unsigned int, void __user *))
+void brioctl_set(int (*hook) (unsigned int, void __user *))
 {
 	mutex_lock(&br_ioctl_mutex);
 	br_ioctl_hook = hook;
 	mutex_unlock(&br_ioctl_mutex);
 }
+
 EXPORT_SYMBOL(brioctl_set);
 
 static DEFINE_MUTEX(vlan_ioctl_mutex);
-static int (*vlan_ioctl_hook)(void __user *arg);
+static int (*vlan_ioctl_hook) (void __user *arg);
 
-void vlan_ioctl_set(int (*hook)(void __user *))
+void vlan_ioctl_set(int (*hook) (void __user *))
 {
 	mutex_lock(&vlan_ioctl_mutex);
 	vlan_ioctl_hook = hook;
 	mutex_unlock(&vlan_ioctl_mutex);
 }
+
 EXPORT_SYMBOL(vlan_ioctl_set);
 
 static DEFINE_MUTEX(dlci_ioctl_mutex);
-static int (*dlci_ioctl_hook)(unsigned int, void __user *);
+static int (*dlci_ioctl_hook) (unsigned int, void __user *);
 
-void dlci_ioctl_set(int (*hook)(unsigned int, void __user *))
+void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 {
 	mutex_lock(&dlci_ioctl_mutex);
 	dlci_ioctl_hook = hook;
 	mutex_unlock(&dlci_ioctl_mutex);
 }
+
 EXPORT_SYMBOL(dlci_ioctl_set);
 
 /*
@@ -890,8 +901,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
 		err = dev_ioctl(cmd, argp);
 	} else
-#endif	/* CONFIG_WIRELESS_EXT */
-	switch (cmd) {
+#endif				/* CONFIG_WIRELESS_EXT */
+		switch (cmd) {
 		case FIOSETOWN:
 		case SIOCSPGRP:
 			err = -EFAULT;
@@ -901,7 +912,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			break;
 		case FIOGETOWN:
 		case SIOCGPGRP:
-			err = put_user(sock->file->f_owner.pid, (int __user *)argp);
+			err = put_user(sock->file->f_owner.pid,
+				       (int __user *)argp);
 			break;
 		case SIOCGIFBR:
 		case SIOCSIFBR:
@@ -912,7 +924,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 				request_module("bridge");
 
 			mutex_lock(&br_ioctl_mutex);
-			if (br_ioctl_hook) 
+			if (br_ioctl_hook)
 				err = br_ioctl_hook(cmd, argp);
 			mutex_unlock(&br_ioctl_mutex);
 			break;
@@ -929,7 +941,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			break;
 		case SIOCGIFDIVERT:
 		case SIOCSIFDIVERT:
-		/* Convert this to call through a hook */
+			/* Convert this to call through a hook */
 			err = divert_ioctl(cmd, argp);
 			break;
 		case SIOCADDDLCI:
@@ -954,7 +966,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			if (err == -ENOIOCTLCMD)
 				err = dev_ioctl(cmd, argp);
 			break;
-	}
+		}
 	return err;
 }
 
@@ -962,7 +974,7 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res)
 {
 	int err;
 	struct socket *sock = NULL;
-	
+
 	err = security_socket_create(family, type, protocol, 1);
 	if (err)
 		goto out;
@@ -988,18 +1000,18 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res)
 }
 
 /* No kernel lock held - perfect */
-static unsigned int sock_poll(struct file *file, poll_table * wait)
+static unsigned int sock_poll(struct file *file, poll_table *wait)
 {
 	struct socket *sock;
 
 	/*
-	 *	We can't return errors to poll, so it's either yes or no. 
+	 *      We can't return errors to poll, so it's either yes or no.
 	 */
 	sock = file->private_data;
 	return sock->ops->poll(file, sock, wait);
 }
 
-static int sock_mmap(struct file * file, struct vm_area_struct * vma)
+static int sock_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct socket *sock = file->private_data;
 
@@ -1009,12 +1021,11 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma)
 static int sock_close(struct inode *inode, struct file *filp)
 {
 	/*
-	 *	It was possible the inode is NULL we were 
-	 *	closing an unfinished socket. 
+	 *      It was possible the inode is NULL we were
+	 *      closing an unfinished socket.
 	 */
 
-	if (!inode)
-	{
+	if (!inode) {
 		printk(KERN_DEBUG "sock_close: NULL inode\n");
 		return 0;
 	}
@@ -1040,57 +1051,52 @@ static int sock_close(struct inode *inode, struct file *filp)
 
 static int sock_fasync(int fd, struct file *filp, int on)
 {
-	struct fasync_struct *fa, *fna=NULL, **prev;
+	struct fasync_struct *fa, *fna = NULL, **prev;
 	struct socket *sock;
 	struct sock *sk;
 
-	if (on)
-	{
+	if (on) {
 		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
-		if(fna==NULL)
+		if (fna == NULL)
 			return -ENOMEM;
 	}
 
 	sock = filp->private_data;
 
-	if ((sk=sock->sk) == NULL) {
+	sk = sock->sk;
+	if (sk == NULL) {
 		kfree(fna);
 		return -EINVAL;
 	}
 
 	lock_sock(sk);
 
-	prev=&(sock->fasync_list);
+	prev = &(sock->fasync_list);
 
-	for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
-		if (fa->fa_file==filp)
+	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
+		if (fa->fa_file == filp)
 			break;
 
-	if(on)
-	{
-		if(fa!=NULL)
-		{
+	if (on) {
+		if (fa != NULL) {
 			write_lock_bh(&sk->sk_callback_lock);
-			fa->fa_fd=fd;
+			fa->fa_fd = fd;
 			write_unlock_bh(&sk->sk_callback_lock);
 
 			kfree(fna);
 			goto out;
 		}
-		fna->fa_file=filp;
-		fna->fa_fd=fd;
-		fna->magic=FASYNC_MAGIC;
-		fna->fa_next=sock->fasync_list;
+		fna->fa_file = filp;
+		fna->fa_fd = fd;
+		fna->magic = FASYNC_MAGIC;
+		fna->fa_next = sock->fasync_list;
 		write_lock_bh(&sk->sk_callback_lock);
-		sock->fasync_list=fna;
+		sock->fasync_list = fna;
 		write_unlock_bh(&sk->sk_callback_lock);
-	}
-	else
-	{
-		if (fa!=NULL)
-		{
+	} else {
+		if (fa != NULL) {
 			write_lock_bh(&sk->sk_callback_lock);
-			*prev=fa->fa_next;
+			*prev = fa->fa_next;
 			write_unlock_bh(&sk->sk_callback_lock);
 			kfree(fa);
 		}
@@ -1107,10 +1113,9 @@ int sock_wake_async(struct socket *sock, int how, int band)
 {
 	if (!sock || !sock->fasync_list)
 		return -1;
-	switch (how)
-	{
+	switch (how) {
 	case 1:
-		
+
 		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
 			break;
 		goto call_kill;
@@ -1119,7 +1124,7 @@ int sock_wake_async(struct socket *sock, int how, int band)
 			break;
 		/* fall through */
 	case 0:
-	call_kill:
+call_kill:
 		__kill_fasync(sock->fasync_list, SIGIO, band);
 		break;
 	case 3:
@@ -1128,13 +1133,14 @@ int sock_wake_async(struct socket *sock, int how, int band)
 	return 0;
 }
 
-static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
+static int __sock_create(int family, int type, int protocol,
+			 struct socket **res, int kern)
 {
 	int err;
 	struct socket *sock;
 
 	/*
-	 *	Check protocol is in range
+	 *      Check protocol is in range
 	 */
 	if (family < 0 || family >= NPROTO)
 		return -EAFNOSUPPORT;
@@ -1147,10 +1153,11 @@ static int __sock_create(int family, int type, int protocol, struct socket **res
 	   deadlock in module load.
 	 */
 	if (family == PF_INET && type == SOCK_PACKET) {
-		static int warned; 
+		static int warned;
 		if (!warned) {
 			warned = 1;
-			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
+			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
+			       current->comm);
 		}
 		family = PF_PACKET;
 	}
@@ -1158,17 +1165,16 @@ static int __sock_create(int family, int type, int protocol, struct socket **res
 	err = security_socket_create(family, type, protocol, kern);
 	if (err)
 		return err;
-		
+
 #if defined(CONFIG_KMOD)
-	/* Attempt to load a protocol module if the find failed. 
-	 * 
-	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
+	/* Attempt to load a protocol module if the find failed.
+	 *
+	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
 	 * requested real, full-featured networking support upon configuration.
 	 * Otherwise module support will break!
 	 */
-	if (net_families[family]==NULL)
-	{
-		request_module("net-pf-%d",family);
+	if (net_families[family] == NULL) {
+		request_module("net-pf-%d", family);
 	}
 #endif
 
@@ -1187,12 +1193,12 @@ static int __sock_create(int family, int type, int protocol, struct socket **res
 	if (!(sock = sock_alloc())) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "socket: no more sockets\n");
-		err = -ENFILE;		/* Not exactly a match, but its the
-					   closest posix thing */
+		err = -ENFILE;	/* Not exactly a match, but its the
+				   closest posix thing */
 		goto out;
 	}
 
-	sock->type  = type;
+	sock->type = type;
 
 	/*
 	 * We will call the ->create function, that possibly is in a loadable
@@ -1271,7 +1277,8 @@ asmlinkage long sys_socket(int family, int type, int protocol)
  *	Create a pair of connected sockets.
  */
 
-asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec)
+asmlinkage long sys_socketpair(int family, int type, int protocol,
+			       int __user *usockvec)
 {
 	struct socket *sock1, *sock2;
 	int fd1, fd2, err;
@@ -1290,7 +1297,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u
 		goto out_release_1;
 
 	err = sock1->ops->socketpair(sock1, sock2);
-	if (err < 0) 
+	if (err < 0)
 		goto out_release_both;
 
 	fd1 = fd2 = -1;
@@ -1309,7 +1316,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u
 	 * Not kernel problem.
 	 */
 
-	err = put_user(fd1, &usockvec[0]); 
+	err = put_user(fd1, &usockvec[0]);
 	if (!err)
 		err = put_user(fd2, &usockvec[1]);
 	if (!err)
@@ -1320,19 +1327,18 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u
 	return err;
 
 out_close_1:
-        sock_release(sock2);
+	sock_release(sock2);
 	sys_close(fd1);
 	return err;
 
 out_release_both:
-        sock_release(sock2);
+	sock_release(sock2);
 out_release_1:
-        sock_release(sock1);
+	sock_release(sock1);
 out:
 	return err;
 }
 
-
 /*
  *	Bind a name to a socket. Nothing much to do here since it's
  *	the protocol's responsibility to handle the local address.
@@ -1347,20 +1353,23 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
 	char address[MAX_SOCK_ADDR];
 	int err, fput_needed;
 
-	if((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL)
-	{
-		if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) {
-			err = security_socket_bind(sock, (struct sockaddr *)address, addrlen);
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if(sock) {
+		err = move_addr_to_kernel(umyaddr, addrlen, address);
+		if (err >= 0) {
+			err = security_socket_bind(sock,
+						   (struct sockaddr *)address,
+						   addrlen);
 			if (!err)
 				err = sock->ops->bind(sock,
-					(struct sockaddr *)address, addrlen);
+						      (struct sockaddr *)
+						      address, addrlen);
 		}
 		fput_light(sock->file, fput_needed);
-	}			
+	}
 	return err;
 }
 
-
 /*
  *	Perform a listen. Basically, we allow the protocol to do anything
  *	necessary for a listen, and if that works, we mark the socket as
@@ -1373,9 +1382,10 @@ asmlinkage long sys_listen(int fd, int backlog)
 {
 	struct socket *sock;
 	int err, fput_needed;
-	
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
-		if ((unsigned) backlog > sysctl_somaxconn)
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock) {
+		if ((unsigned)backlog > sysctl_somaxconn)
 			backlog = sysctl_somaxconn;
 
 		err = security_socket_listen(sock, backlog);
@@ -1387,7 +1397,6 @@ asmlinkage long sys_listen(int fd, int backlog)
 	return err;
 }
 
-
 /*
  *	For accept, we attempt to create a new socket, set up the link
  *	with the client, wake up the client, then return the new
@@ -1400,7 +1409,8 @@ asmlinkage long sys_listen(int fd, int backlog)
  *	clean when we restucture accept also.
  */
 
-asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen)
+asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+			   int __user *upeer_addrlen)
 {
 	struct socket *sock, *newsock;
 	struct file *newfile;
@@ -1412,7 +1422,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _
 		goto out;
 
 	err = -ENFILE;
-	if (!(newsock = sock_alloc())) 
+	if (!(newsock = sock_alloc()))
 		goto out_put;
 
 	newsock->type = sock->type;
@@ -1444,11 +1454,13 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _
 		goto out_fd;
 
 	if (upeer_sockaddr) {
-		if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
+		if (newsock->ops->getname(newsock, (struct sockaddr *)address,
+					  &len, 2) < 0) {
 			err = -ECONNABORTED;
 			goto out_fd;
 		}
-		err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
+		err = move_addr_to_user(address, len, upeer_sockaddr,
+					upeer_addrlen);
 		if (err < 0)
 			goto out_fd;
 	}
@@ -1470,7 +1482,6 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _
 	goto out_put;
 }
 
-
 /*
  *	Attempt to connect to a socket with the server address.  The address
  *	is in user space so we verify it is OK and move it to kernel space.
@@ -1483,7 +1494,8 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _
  *	include the -EINPROGRESS status for such sockets.
  */
 
-asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
+asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
+			    int addrlen)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
@@ -1496,11 +1508,12 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrl
 	if (err < 0)
 		goto out_put;
 
-	err = security_socket_connect(sock, (struct sockaddr *)address, addrlen);
+	err =
+	    security_socket_connect(sock, (struct sockaddr *)address, addrlen);
 	if (err)
 		goto out_put;
 
-	err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
+	err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
 				 sock->file->f_flags);
 out_put:
 	fput_light(sock->file, fput_needed);
@@ -1513,12 +1526,13 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrl
  *	name to user space.
  */
 
-asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
+asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+				int __user *usockaddr_len)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
 	int len, err, fput_needed;
-	
+
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
@@ -1543,22 +1557,27 @@ asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int _
  *	name to user space.
  */
 
-asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
+asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
+				int __user *usockaddr_len)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
 	int len, err, fput_needed;
 
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
 		err = security_socket_getpeername(sock);
 		if (err) {
 			fput_light(sock->file, fput_needed);
 			return err;
 		}
 
-		err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
+		err =
+		    sock->ops->getname(sock, (struct sockaddr *)address, &len,
+				       1);
 		if (!err)
-			err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
+			err = move_addr_to_user(address, len, usockaddr,
+						usockaddr_len);
 		fput_light(sock->file, fput_needed);
 	}
 	return err;
@@ -1570,8 +1589,9 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int _
  *	the protocol.
  */
 
-asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags,
-			   struct sockaddr __user *addr, int addr_len)
+asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
+			   unsigned flags, struct sockaddr __user *addr,
+			   int addr_len)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
@@ -1588,54 +1608,55 @@ asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flag
 	sock = sock_from_file(sock_file, &err);
 	if (!sock)
 		goto out_put;
-	iov.iov_base=buff;
-	iov.iov_len=len;
-	msg.msg_name=NULL;
-	msg.msg_iov=&iov;
-	msg.msg_iovlen=1;
-	msg.msg_control=NULL;
-	msg.msg_controllen=0;
-	msg.msg_namelen=0;
+	iov.iov_base = buff;
+	iov.iov_len = len;
+	msg.msg_name = NULL;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_namelen = 0;
 	if (addr) {
 		err = move_addr_to_kernel(addr, addr_len, address);
 		if (err < 0)
 			goto out_put;
-		msg.msg_name=address;
-		msg.msg_namelen=addr_len;
+		msg.msg_name = address;
+		msg.msg_namelen = addr_len;
 	}
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	msg.msg_flags = flags;
 	err = sock_sendmsg(sock, &msg, len);
 
-out_put:		
+out_put:
 	fput_light(sock_file, fput_needed);
 	return err;
 }
 
 /*
- *	Send a datagram down a socket. 
+ *	Send a datagram down a socket.
  */
 
-asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags)
+asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
 {
 	return sys_sendto(fd, buff, len, flags, NULL, 0);
 }
 
 /*
- *	Receive a frame from the socket and optionally record the address of the 
+ *	Receive a frame from the socket and optionally record the address of the
  *	sender. We verify the buffers are writable and if needed move the
  *	sender address from kernel to user space.
  */
 
-asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags,
-			     struct sockaddr __user *addr, int __user *addr_len)
+asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
+			     unsigned flags, struct sockaddr __user *addr,
+			     int __user *addr_len)
 {
 	struct socket *sock;
 	struct iovec iov;
 	struct msghdr msg;
 	char address[MAX_SOCK_ADDR];
-	int err,err2;
+	int err, err2;
 	struct file *sock_file;
 	int fput_needed;
 
@@ -1647,23 +1668,22 @@ asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned f
 	if (!sock)
 		goto out;
 
-	msg.msg_control=NULL;
-	msg.msg_controllen=0;
-	msg.msg_iovlen=1;
-	msg.msg_iov=&iov;
-	iov.iov_len=size;
-	iov.iov_base=ubuf;
-	msg.msg_name=address;
-	msg.msg_namelen=MAX_SOCK_ADDR;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_iov = &iov;
+	iov.iov_len = size;
+	iov.iov_base = ubuf;
+	msg.msg_name = address;
+	msg.msg_namelen = MAX_SOCK_ADDR;
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
-	err=sock_recvmsg(sock, &msg, size, flags);
+	err = sock_recvmsg(sock, &msg, size, flags);
 
-	if(err >= 0 && addr != NULL)
-	{
-		err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
-		if(err2<0)
-			err=err2;
+	if (err >= 0 && addr != NULL) {
+		err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
+		if (err2 < 0)
+			err = err2;
 	}
 out:
 	fput_light(sock_file, fput_needed);
@@ -1671,10 +1691,11 @@ asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned f
 }
 
 /*
- *	Receive a datagram from a socket. 
+ *	Receive a datagram from a socket.
  */
 
-asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags)
+asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
+			 unsigned flags)
 {
 	return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
 }
@@ -1684,24 +1705,29 @@ asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags
  *	to pass the user mode parameter for the protocols to sort out.
  */
 
-asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen)
+asmlinkage long sys_setsockopt(int fd, int level, int optname,
+			       char __user *optval, int optlen)
 {
 	int err, fput_needed;
 	struct socket *sock;
 
 	if (optlen < 0)
 		return -EINVAL;
-			
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL)
-	{
-		err = security_socket_setsockopt(sock,level,optname);
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
+		err = security_socket_setsockopt(sock, level, optname);
 		if (err)
 			goto out_put;
 
 		if (level == SOL_SOCKET)
-			err=sock_setsockopt(sock,level,optname,optval,optlen);
+			err =
+			    sock_setsockopt(sock, level, optname, optval,
+					    optlen);
 		else
-			err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
+			err =
+			    sock->ops->setsockopt(sock, level, optname, optval,
+						  optlen);
 out_put:
 		fput_light(sock->file, fput_needed);
 	}
@@ -1713,27 +1739,32 @@ asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optv
  *	to pass a user mode parameter for the protocols to sort out.
  */
 
-asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen)
+asmlinkage long sys_getsockopt(int fd, int level, int optname,
+			       char __user *optval, int __user *optlen)
 {
 	int err, fput_needed;
 	struct socket *sock;
 
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
 		err = security_socket_getsockopt(sock, level, optname);
 		if (err)
 			goto out_put;
 
 		if (level == SOL_SOCKET)
-			err=sock_getsockopt(sock,level,optname,optval,optlen);
+			err =
+			    sock_getsockopt(sock, level, optname, optval,
+					    optlen);
 		else
-			err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
+			err =
+			    sock->ops->getsockopt(sock, level, optname, optval,
+						  optlen);
 out_put:
 		fput_light(sock->file, fput_needed);
 	}
 	return err;
 }
 
-
 /*
  *	Shutdown a socket.
  */
@@ -1743,8 +1774,8 @@ asmlinkage long sys_shutdown(int fd, int how)
 	int err, fput_needed;
 	struct socket *sock;
 
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL)
-	{
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
 		err = security_socket_shutdown(sock, how);
 		if (!err)
 			err = sock->ops->shutdown(sock, how);
@@ -1753,41 +1784,42 @@ asmlinkage long sys_shutdown(int fd, int how)
 	return err;
 }
 
-/* A couple of helpful macros for getting the address of the 32/64 bit 
+/* A couple of helpful macros for getting the address of the 32/64 bit
  * fields which are the same type (int / unsigned) on our platforms.
  */
 #define COMPAT_MSG(msg, member)	((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
 #define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
 #define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
 
-
 /*
  *	BSD sendmsg interface
  */
 
 asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
 {
-	struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
+	struct compat_msghdr __user *msg_compat =
+	    (struct compat_msghdr __user *)msg;
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
 	unsigned char ctl[sizeof(struct cmsghdr) + 20]
-			__attribute__ ((aligned (sizeof(__kernel_size_t))));
-			/* 20 is size of ipv6_pktinfo */
+	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
+	/* 20 is size of ipv6_pktinfo */
 	unsigned char *ctl_buf = ctl;
 	struct msghdr msg_sys;
 	int err, ctl_len, iov_size, total_len;
 	int fput_needed;
-	
+
 	err = -EFAULT;
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(&msg_sys, msg_compat))
 			return -EFAULT;
-	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+	}
+	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
-	if (!sock) 
+	if (!sock)
 		goto out;
 
 	/* do not move before msg_sys is valid */
@@ -1795,7 +1827,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
 	if (msg_sys.msg_iovlen > UIO_MAXIOV)
 		goto out_put;
 
-	/* Check whether to allocate the iovec area*/
+	/* Check whether to allocate the iovec area */
 	err = -ENOMEM;
 	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
 	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
@@ -1809,7 +1841,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
 		err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
 	} else
 		err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
-	if (err < 0) 
+	if (err < 0)
 		goto out_freeiov;
 	total_len = err;
 
@@ -1817,18 +1849,19 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
 
 	if (msg_sys.msg_controllen > INT_MAX)
 		goto out_freeiov;
-	ctl_len = msg_sys.msg_controllen; 
+	ctl_len = msg_sys.msg_controllen;
 	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
-		err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl));
+		err =
+		    cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
+						     sizeof(ctl));
 		if (err)
 			goto out_freeiov;
 		ctl_buf = msg_sys.msg_control;
 		ctl_len = msg_sys.msg_controllen;
 	} else if (ctl_len) {
-		if (ctl_len > sizeof(ctl))
-		{
+		if (ctl_len > sizeof(ctl)) {
 			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
-			if (ctl_buf == NULL) 
+			if (ctl_buf == NULL)
 				goto out_freeiov;
 		}
 		err = -EFAULT;
@@ -1837,7 +1870,8 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
 		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
 		 * checking falls down on this.
 		 */
-		if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len))
+		if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
+				   ctl_len))
 			goto out_freectl;
 		msg_sys.msg_control = ctl_buf;
 	}
@@ -1848,14 +1882,14 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
 	err = sock_sendmsg(sock, &msg_sys, total_len);
 
 out_freectl:
-	if (ctl_buf != ctl)    
+	if (ctl_buf != ctl)
 		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
 out_freeiov:
 	if (iov != iovstack)
 		sock_kfree_s(sock->sk, iov, iov_size);
 out_put:
 	fput_light(sock->file, fput_needed);
-out:       
+out:
 	return err;
 }
 
@@ -1863,12 +1897,14 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
  *	BSD recvmsg interface
  */
 
-asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags)
+asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
+			    unsigned int flags)
 {
-	struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
+	struct compat_msghdr __user *msg_compat =
+	    (struct compat_msghdr __user *)msg;
 	struct socket *sock;
 	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov=iovstack;
+	struct iovec *iov = iovstack;
 	struct msghdr msg_sys;
 	unsigned long cmsg_ptr;
 	int err, iov_size, total_len, len;
@@ -1880,13 +1916,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
 	/* user mode address pointers */
 	struct sockaddr __user *uaddr;
 	int __user *uaddr_len;
-	
+
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(&msg_sys, msg_compat))
 			return -EFAULT;
-	} else
-		if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
-			return -EFAULT;
+	}
+	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+		return -EFAULT;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
@@ -1895,8 +1931,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
 	err = -EMSGSIZE;
 	if (msg_sys.msg_iovlen > UIO_MAXIOV)
 		goto out_put;
-	
-	/* Check whether to allocate the iovec area*/
+
+	/* Check whether to allocate the iovec area */
 	err = -ENOMEM;
 	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
 	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
@@ -1906,11 +1942,11 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
 	}
 
 	/*
-	 *	Save the user-mode address (verify_iovec will change the
-	 *	kernel msghdr to use the kernel address space)
+	 *      Save the user-mode address (verify_iovec will change the
+	 *      kernel msghdr to use the kernel address space)
 	 */
-	 
-	uaddr = (void __user *) msg_sys.msg_name;
+
+	uaddr = (void __user *)msg_sys.msg_name;
 	uaddr_len = COMPAT_NAMELEN(msg);
 	if (MSG_CMSG_COMPAT & flags) {
 		err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
@@ -1918,13 +1954,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
 		err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
 	if (err < 0)
 		goto out_freeiov;
-	total_len=err;
+	total_len = err;
 
 	cmsg_ptr = (unsigned long)msg_sys.msg_control;
 	msg_sys.msg_flags = 0;
 	if (MSG_CMSG_COMPAT & flags)
 		msg_sys.msg_flags = MSG_CMSG_COMPAT;
-	
+
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	err = sock_recvmsg(sock, &msg_sys, total_len, flags);
@@ -1933,7 +1969,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
 	len = err;
 
 	if (uaddr != NULL) {
-		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
+		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
+					uaddr_len);
 		if (err < 0)
 			goto out_freeiov;
 	}
@@ -1942,10 +1979,10 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
 	if (err)
 		goto out_freeiov;
 	if (MSG_CMSG_COMPAT & flags)
-		err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
+		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
 				 &msg_compat->msg_controllen);
 	else
-		err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
+		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
 				 &msg->msg_controllen);
 	if (err)
 		goto out_freeiov;
@@ -1964,102 +2001,113 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
 
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
-static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+static const unsigned char nargs[18]={
+	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
+};
+
 #undef AL
 
 /*
- *	System call vectors. 
+ *	System call vectors.
  *
  *	Argument checking cleaned up. Saved 20% in size.
  *  This function doesn't need to set the kernel lock because
- *  it is set by the callees. 
+ *  it is set by the callees.
  */
 
 asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 {
 	unsigned long a[6];
-	unsigned long a0,a1;
+	unsigned long a0, a1;
 	int err;
 
-	if(call<1||call>SYS_RECVMSG)
+	if (call < 1 || call > SYS_RECVMSG)
 		return -EINVAL;
 
 	/* copy_from_user should be SMP safe. */
 	if (copy_from_user(a, args, nargs[call]))
 		return -EFAULT;
 
-	err = audit_socketcall(nargs[call]/sizeof(unsigned long), a);
+	err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
 	if (err)
 		return err;
 
-	a0=a[0];
-	a1=a[1];
-	
-	switch(call) 
-	{
-		case SYS_SOCKET:
-			err = sys_socket(a0,a1,a[2]);
-			break;
-		case SYS_BIND:
-			err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);
-			break;
-		case SYS_CONNECT:
-			err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
-			break;
-		case SYS_LISTEN:
-			err = sys_listen(a0,a1);
-			break;
-		case SYS_ACCEPT:
-			err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
-			break;
-		case SYS_GETSOCKNAME:
-			err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
-			break;
-		case SYS_GETPEERNAME:
-			err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]);
-			break;
-		case SYS_SOCKETPAIR:
-			err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);
-			break;
-		case SYS_SEND:
-			err = sys_send(a0, (void __user *)a1, a[2], a[3]);
-			break;
-		case SYS_SENDTO:
-			err = sys_sendto(a0,(void __user *)a1, a[2], a[3],
-					 (struct sockaddr __user *)a[4], a[5]);
-			break;
-		case SYS_RECV:
-			err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
-			break;
-		case SYS_RECVFROM:
-			err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
-					   (struct sockaddr __user *)a[4], (int __user *)a[5]);
-			break;
-		case SYS_SHUTDOWN:
-			err = sys_shutdown(a0,a1);
-			break;
-		case SYS_SETSOCKOPT:
-			err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
-			break;
-		case SYS_GETSOCKOPT:
-			err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]);
-			break;
-		case SYS_SENDMSG:
-			err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);
-			break;
-		case SYS_RECVMSG:
-			err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);
-			break;
-		default:
-			err = -EINVAL;
-			break;
+	a0 = a[0];
+	a1 = a[1];
+
+	switch (call) {
+	case SYS_SOCKET:
+		err = sys_socket(a0, a1, a[2]);
+		break;
+	case SYS_BIND:
+		err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
+		break;
+	case SYS_CONNECT:
+		err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
+		break;
+	case SYS_LISTEN:
+		err = sys_listen(a0, a1);
+		break;
+	case SYS_ACCEPT:
+		err =
+		    sys_accept(a0, (struct sockaddr __user *)a1,
+			       (int __user *)a[2]);
+		break;
+	case SYS_GETSOCKNAME:
+		err =
+		    sys_getsockname(a0, (struct sockaddr __user *)a1,
+				    (int __user *)a[2]);
+		break;
+	case SYS_GETPEERNAME:
+		err =
+		    sys_getpeername(a0, (struct sockaddr __user *)a1,
+				    (int __user *)a[2]);
+		break;
+	case SYS_SOCKETPAIR:
+		err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
+		break;
+	case SYS_SEND:
+		err = sys_send(a0, (void __user *)a1, a[2], a[3]);
+		break;
+	case SYS_SENDTO:
+		err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
+				 (struct sockaddr __user *)a[4], a[5]);
+		break;
+	case SYS_RECV:
+		err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
+		break;
+	case SYS_RECVFROM:
+		err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
+				   (struct sockaddr __user *)a[4],
+				   (int __user *)a[5]);
+		break;
+	case SYS_SHUTDOWN:
+		err = sys_shutdown(a0, a1);
+		break;
+	case SYS_SETSOCKOPT:
+		err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
+		break;
+	case SYS_GETSOCKOPT:
+		err =
+		    sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
+				   (int __user *)a[4]);
+		break;
+	case SYS_SENDMSG:
+		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
+		break;
+	case SYS_RECVMSG:
+		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
+		break;
+	default:
+		err = -EINVAL;
+		break;
 	}
 	return err;
 }
 
-#endif /* __ARCH_WANT_SYS_SOCKETCALL */
+#endif				/* __ARCH_WANT_SYS_SOCKETCALL */
 
 /*
  *	This function is called by a protocol handler that wants to
@@ -2072,18 +2120,18 @@ int sock_register(struct net_proto_family *ops)
 	int err;
 
 	if (ops->family >= NPROTO) {
-		printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
+		printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
+		       NPROTO);
 		return -ENOBUFS;
 	}
 	net_family_write_lock();
 	err = -EEXIST;
 	if (net_families[ops->family] == NULL) {
-		net_families[ops->family]=ops;
+		net_families[ops->family] = ops;
 		err = 0;
 	}
 	net_family_write_unlock();
-	printk(KERN_INFO "NET: Registered protocol family %d\n",
-	       ops->family);
+	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
 	return err;
 }
 
@@ -2099,28 +2147,27 @@ int sock_unregister(int family)
 		return -1;
 
 	net_family_write_lock();
-	net_families[family]=NULL;
+	net_families[family] = NULL;
 	net_family_write_unlock();
-	printk(KERN_INFO "NET: Unregistered protocol family %d\n",
-	       family);
+	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
 	return 0;
 }
 
 static int __init sock_init(void)
 {
 	/*
-	 *	Initialize sock SLAB cache.
+	 *      Initialize sock SLAB cache.
 	 */
-	 
+
 	sk_init();
 
 	/*
-	 *	Initialize skbuff SLAB cache 
+	 *      Initialize skbuff SLAB cache
 	 */
 	skb_init();
 
 	/*
-	 *	Initialize the protocols module. 
+	 *      Initialize the protocols module.
 	 */
 
 	init_inodecache();
@@ -2146,7 +2193,7 @@ void socket_seq_show(struct seq_file *seq)
 	int counter = 0;
 
 	for_each_possible_cpu(cpu)
-		counter += per_cpu(sockets_in_use, cpu);
+	    counter += per_cpu(sockets_in_use, cpu);
 
 	/* It can be negative, by the way. 8) */
 	if (counter < 0)
@@ -2154,11 +2201,11 @@ void socket_seq_show(struct seq_file *seq)
 
 	seq_printf(seq, "sockets: used %d\n", counter);
 }
-#endif /* CONFIG_PROC_FS */
+#endif				/* CONFIG_PROC_FS */
 
 #ifdef CONFIG_COMPAT
 static long compat_sock_ioctl(struct file *file, unsigned cmd,
-				unsigned long arg)
+			      unsigned long arg)
 {
 	struct socket *sock = file->private_data;
 	int ret = -ENOIOCTLCMD;
-- 
GitLab


From 757dbb494be3309fe41ce4c62f8057d8b41d8897 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Wed, 9 Aug 2006 20:50:00 -0700
Subject: [PATCH 0437/1063] [NET]: drop unused elements from net_proto_family

Three values in net_proto_family are defined but never used.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 19da2c08d7b6b..1bd76327ee2b1 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -169,11 +169,6 @@ struct proto_ops {
 struct net_proto_family {
 	int		family;
 	int		(*create)(struct socket *sock, int protocol);
-	/* These are counters for the number of different methods of
-	   each we support */
-	short		authentication;
-	short		encryption;
-	short		encrypt_net;
 	struct module	*owner;
 };
 
-- 
GitLab


From 55737fda0bc73cb20f702301d8b52938a5a43630 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Fri, 1 Sep 2006 00:23:39 -0700
Subject: [PATCH 0438/1063] [NET]: socket family using RCU

Replace the gross custom locking done in socket code for net_family[]
with simple RCU usage. Some reordering necessary to avoid sleep issues
with sock_alloc.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 174 ++++++++++++++++++++++-----------------------------
 1 file changed, 76 insertions(+), 98 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 156f2efa4e4a8..b5a3fcb9ed6da 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -59,11 +59,11 @@
  */
 
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/socket.h>
 #include <linux/file.h>
 #include <linux/net.h>
 #include <linux/interrupt.h>
+#include <linux/rcupdate.h>
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -146,51 +146,8 @@ static struct file_operations socket_file_ops = {
  *	The protocol list. Each protocol is registered in here.
  */
 
-static struct net_proto_family *net_families[NPROTO];
-
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
-static atomic_t net_family_lockct = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(net_family_lock);
-
-/* The strategy is: modifications net_family vector are short, do not
-   sleep and veeery rare, but read access should be free of any exclusive
-   locks.
- */
-
-static void net_family_write_lock(void)
-{
-	spin_lock(&net_family_lock);
-	while (atomic_read(&net_family_lockct) != 0) {
-		spin_unlock(&net_family_lock);
-
-		yield();
-
-		spin_lock(&net_family_lock);
-	}
-}
-
-static __inline__ void net_family_write_unlock(void)
-{
-	spin_unlock(&net_family_lock);
-}
-
-static __inline__ void net_family_read_lock(void)
-{
-	atomic_inc(&net_family_lockct);
-	spin_unlock_wait(&net_family_lock);
-}
-
-static __inline__ void net_family_read_unlock(void)
-{
-	atomic_dec(&net_family_lockct);
-}
-
-#else
-#define net_family_write_lock() do { } while(0)
-#define net_family_write_unlock() do { } while(0)
-#define net_family_read_lock() do { } while(0)
-#define net_family_read_unlock() do { } while(0)
-#endif
+static const struct net_proto_family *net_families[NPROTO];
 
 /*
  *	Statistics counters of the socket lists
@@ -1138,6 +1095,7 @@ static int __sock_create(int family, int type, int protocol,
 {
 	int err;
 	struct socket *sock;
+	const struct net_proto_family *pf;
 
 	/*
 	 *      Check protocol is in range
@@ -1166,6 +1124,21 @@ static int __sock_create(int family, int type, int protocol,
 	if (err)
 		return err;
 
+	/*
+	 *	Allocate the socket and allow the family to set things up. if
+	 *	the protocol is 0, the family is instructed to select an appropriate
+	 *	default.
+	 */
+	sock = sock_alloc();
+	if (!sock) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "socket: no more sockets\n");
+		return -ENFILE;	/* Not exactly a match, but its the
+				   closest posix thing */
+	}
+
+	sock->type = type;
+
 #if defined(CONFIG_KMOD)
 	/* Attempt to load a protocol module if the find failed.
 	 *
@@ -1173,72 +1146,61 @@ static int __sock_create(int family, int type, int protocol,
 	 * requested real, full-featured networking support upon configuration.
 	 * Otherwise module support will break!
 	 */
-	if (net_families[family] == NULL) {
+	if (net_families[family] == NULL)
 		request_module("net-pf-%d", family);
-	}
 #endif
 
-	net_family_read_lock();
-	if (net_families[family] == NULL) {
-		err = -EAFNOSUPPORT;
-		goto out;
-	}
-
-/*
- *	Allocate the socket and allow the family to set things up. if
- *	the protocol is 0, the family is instructed to select an appropriate
- *	default.
- */
-
-	if (!(sock = sock_alloc())) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "socket: no more sockets\n");
-		err = -ENFILE;	/* Not exactly a match, but its the
-				   closest posix thing */
-		goto out;
-	}
-
-	sock->type = type;
+	rcu_read_lock();
+	pf = rcu_dereference(net_families[family]);
+	err = -EAFNOSUPPORT;
+	if (!pf)
+		goto out_release;
 
 	/*
 	 * We will call the ->create function, that possibly is in a loadable
 	 * module, so we have to bump that loadable module refcnt first.
 	 */
-	err = -EAFNOSUPPORT;
-	if (!try_module_get(net_families[family]->owner))
+	if (!try_module_get(pf->owner))
 		goto out_release;
 
-	if ((err = net_families[family]->create(sock, protocol)) < 0) {
-		sock->ops = NULL;
+	/* Now protected by module ref count */
+	rcu_read_unlock();
+
+	err = pf->create(sock, protocol);
+	if (err < 0)
 		goto out_module_put;
-	}
 
 	/*
 	 * Now to bump the refcnt of the [loadable] module that owns this
 	 * socket at sock_release time we decrement its refcnt.
 	 */
-	if (!try_module_get(sock->ops->owner)) {
-		sock->ops = NULL;
-		goto out_module_put;
-	}
+	if (!try_module_get(sock->ops->owner))
+		goto out_module_busy;
+
 	/*
 	 * Now that we're done with the ->create function, the [loadable]
 	 * module can have its refcnt decremented
 	 */
-	module_put(net_families[family]->owner);
-	*res = sock;
+	module_put(pf->owner);
 	err = security_socket_post_create(sock, family, type, protocol, kern);
 	if (err)
 		goto out_release;
+	*res = sock;
 
-out:
-	net_family_read_unlock();
-	return err;
+	return 0;
+
+out_module_busy:
+	err = -EAFNOSUPPORT;
 out_module_put:
-	module_put(net_families[family]->owner);
-out_release:
+	sock->ops = NULL;
+	module_put(pf->owner);
+out_sock_release:
 	sock_release(sock);
-	goto out;
+	return err;
+
+out_release:
+	rcu_read_unlock();
+	goto out_sock_release;
 }
 
 int sock_create(int family, int type, int protocol, struct socket **res)
@@ -2109,12 +2071,15 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 
 #endif				/* __ARCH_WANT_SYS_SOCKETCALL */
 
-/*
+/**
+ *	sock_register - add a socket protocol handler
+ *	@ops: description of protocol
+ *
  *	This function is called by a protocol handler that wants to
  *	advertise its address family, and have it linked into the
- *	SOCKET module.
+ *	socket interface. The value ops->family coresponds to the
+ *	socket system call protocol family.
  */
-
 int sock_register(struct net_proto_family *ops)
 {
 	int err;
@@ -2124,31 +2089,44 @@ int sock_register(struct net_proto_family *ops)
 		       NPROTO);
 		return -ENOBUFS;
 	}
-	net_family_write_lock();
-	err = -EEXIST;
-	if (net_families[ops->family] == NULL) {
+
+	spin_lock(&net_family_lock);
+	if (net_families[ops->family])
+		err = -EEXIST;
+	else {
 		net_families[ops->family] = ops;
 		err = 0;
 	}
-	net_family_write_unlock();
+	spin_unlock(&net_family_lock);
+
 	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
 	return err;
 }
 
-/*
+/**
+ *	sock_unregister - remove a protocol handler
+ *	@family: protocol family to remove
+ *
  *	This function is called by a protocol handler that wants to
  *	remove its address family, and have it unlinked from the
- *	SOCKET module.
+ *	new socket creation.
+ *
+ *	If protocol handler is a module, then it can use module reference
+ *	counts to protect against new references. If protocol handler is not
+ *	a module then it needs to provide its own protection in
+ *	the ops->create routine.
  */
-
 int sock_unregister(int family)
 {
 	if (family < 0 || family >= NPROTO)
-		return -1;
+		return -EINVAL;
 
-	net_family_write_lock();
+	spin_lock(&net_family_lock);
 	net_families[family] = NULL;
-	net_family_write_unlock();
+	spin_unlock(&net_family_lock);
+
+	synchronize_rcu();
+
 	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
 	return 0;
 }
-- 
GitLab


From f0fd27d42e39b91f85e1840ec49b072fd6c545b8 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Wed, 9 Aug 2006 21:03:17 -0700
Subject: [PATCH 0439/1063] [NET]: sock_register interface changes

The sock_register() doesn't change the family, so the protocols can
define it read-only.  No caller ever checks return value from
sock_unregister()

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h |  4 ++--
 net/socket.c        | 10 ++++------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 1bd76327ee2b1..c257f716e00f0 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -176,8 +176,8 @@ struct iovec;
 struct kvec;
 
 extern int	     sock_wake_async(struct socket *sk, int how, int band);
-extern int	     sock_register(struct net_proto_family *fam);
-extern int	     sock_unregister(int family);
+extern int	     sock_register(const struct net_proto_family *fam);
+extern void	     sock_unregister(int family);
 extern int	     sock_create(int family, int type, int proto,
 				 struct socket **res);
 extern int	     sock_create_kern(int family, int type, int proto,
diff --git a/net/socket.c b/net/socket.c
index b5a3fcb9ed6da..4147fe4bf41d1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -147,7 +147,7 @@ static struct file_operations socket_file_ops = {
  */
 
 static DEFINE_SPINLOCK(net_family_lock);
-static const struct net_proto_family *net_families[NPROTO];
+static const struct net_proto_family *net_families[NPROTO] __read_mostly;
 
 /*
  *	Statistics counters of the socket lists
@@ -2080,7 +2080,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
  *	socket interface. The value ops->family coresponds to the
  *	socket system call protocol family.
  */
-int sock_register(struct net_proto_family *ops)
+int sock_register(const struct net_proto_family *ops)
 {
 	int err;
 
@@ -2116,10 +2116,9 @@ int sock_register(struct net_proto_family *ops)
  *	a module then it needs to provide its own protection in
  *	the ops->create routine.
  */
-int sock_unregister(int family)
+void sock_unregister(int family)
 {
-	if (family < 0 || family >= NPROTO)
-		return -EINVAL;
+	BUG_ON(family < 0 || family >= NPROTO);
 
 	spin_lock(&net_family_lock);
 	net_families[family] = NULL;
@@ -2128,7 +2127,6 @@ int sock_unregister(int family)
 	synchronize_rcu();
 
 	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
-	return 0;
 }
 
 static int __init sock_init(void)
-- 
GitLab


From bf0d52492d82ad70684e17c8a46942c13d0e140e Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 22 Sep 2006 14:00:29 -0700
Subject: [PATCH 0440/1063] [NET]: Remove unnecessary config.h includes from
 net/

config.h is automatically included by kbuild these days.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/atm_sysfs.c                   | 1 -
 net/core/dev_mcast.c                  | 3 +--
 net/core/wireless.c                   | 1 -
 net/ipv4/af_inet.c                    | 1 -
 net/ipv4/ipconfig.c                   | 1 -
 net/ipv4/netfilter/ip_conntrack_sip.c | 1 -
 net/ipv4/raw.c                        | 3 +--
 net/ipv4/tcp_lp.c                     | 1 -
 net/ipv4/tcp_veno.c                   | 1 -
 9 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 5df4b9a068bb2..c0a4ae28fcfab 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -1,6 +1,5 @@
 /* ATM driver model support. */
 
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/kobject.h>
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index c57d887da2ef2..b22648d04d365 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -21,8 +21,7 @@
  *	2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h> 
-#include <linux/module.h> 
+#include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/bitops.h>
diff --git a/net/core/wireless.c b/net/core/wireless.c
index de0bde4b51dd5..348b9da73cc4f 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -72,7 +72,6 @@
 
 /***************************** INCLUDES *****************************/
 
-#include <linux/config.h>		/* Not needed ??? */
 #include <linux/module.h>
 #include <linux/types.h>		/* off_t */
 #include <linux/netdevice.h>		/* struct ifreq, dev_get_by_name() */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fc40da3b6d399..36c38bffb4748 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -67,7 +67,6 @@
  *		2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/types.h>
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cb8a92f18ef6e..1fbb38415b193 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -31,7 +31,6 @@
  *              --  Josef Siemes <jsiemes@web.de>, Aug 2002
  */
 
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
index 4f222d6be009a..2893e9c748506 100644
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ b/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -8,7 +8,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/skbuff.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index fe44cb50a1c52..0e935b4c87411 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -38,8 +38,7 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  */
- 
-#include <linux/config.h> 
+
 #include <linux/types.h>
 #include <asm/atomic.h>
 #include <asm/byteorder.h>
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 48f28d617ce60..649ebaed1df1d 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -35,7 +35,6 @@
  * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <net/tcp.h>
 
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 11b42a7135c19..5b2fe6d2aba98 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -9,7 +9,6 @@
  * 	See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
  */
 
-#include <linux/config.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
-- 
GitLab


From 1e38bb3a38d08129d08c904b10ea3ba08e22d297 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 10 Aug 2006 00:22:41 -0700
Subject: [PATCH 0441/1063] [NET]: Kill double initialization in
 sock_alloc_inode.

No need to set ei->socket.flags to zero twice.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/socket.c b/net/socket.c
index 4147fe4bf41d1..d6f27ed9ba6c4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -254,7 +254,6 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 	ei->socket.ops = NULL;
 	ei->socket.sk = NULL;
 	ei->socket.file = NULL;
-	ei->socket.flags = 0;
 
 	return &ei->vfs_inode;
 }
-- 
GitLab


From d924424aaed116b362c6d0e667d912b77e655085 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 10 Aug 2006 23:03:23 -0700
Subject: [PATCH 0442/1063] [NEIGHBOUR]: Use ALIGN() macro.

Rather than opencoding the mask, it looks better to use ALIGN()
macro from kernel.h.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 74c4b6ff8a5c5..bd187daffdb9f 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -101,7 +101,7 @@ struct neighbour
 	__u8			dead;
 	atomic_t		probes;
 	rwlock_t		lock;
-	unsigned char		ha[(MAX_ADDR_LEN+sizeof(unsigned long)-1)&~(sizeof(unsigned long)-1)];
+	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
 	struct hh_cache		*hh;
 	atomic_t		refcnt;
 	int			(*output)(struct sk_buff *skb);
-- 
GitLab


From 2dfe55b47e3d66ded5a84caf71e0da5710edf48b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 10 Aug 2006 23:08:33 -0700
Subject: [PATCH 0443/1063] [NET]: Use u32 for routing table IDs

Use u32 for routing table IDs in net/ipv4 and net/decnet in preparation of
support for a larger number of routing tables. net/ipv6 already uses u32
everywhere and needs no further changes. No functional changes are made by
this patch.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn_fib.h     |  4 ++--
 include/net/ip_fib.h     | 14 +++++++-------
 net/decnet/dn_fib.c      |  6 +++---
 net/decnet/dn_table.c    | 10 +++++-----
 net/ipv4/fib_frontend.c  |  8 ++++----
 net/ipv4/fib_hash.c      |  4 ++--
 net/ipv4/fib_lookup.h    |  4 ++--
 net/ipv4/fib_rules.c     |  2 +-
 net/ipv4/fib_semantics.c |  4 ++--
 net/ipv4/fib_trie.c      |  6 +++---
 10 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
index 32bc8ce5c5ce8..cd9c3782f8384 100644
--- a/include/net/dn_fib.h
+++ b/include/net/dn_fib.h
@@ -94,7 +94,7 @@ struct dn_fib_node {
 
 
 struct dn_fib_table {
-	int n;
+	u32 n;
 
 	int (*insert)(struct dn_fib_table *t, struct rtmsg *r, 
 			struct dn_kern_rta *rta, struct nlmsghdr *n, 
@@ -137,7 +137,7 @@ extern int dn_fib_sync_up(struct net_device *dev);
 /*
  * dn_tables.c
  */
-extern struct dn_fib_table *dn_fib_get_table(int n, int creat);
+extern struct dn_fib_table *dn_fib_get_table(u32 n, int creat);
 extern struct dn_fib_table *dn_fib_empty_table(void);
 extern void dn_fib_table_init(void);
 extern void dn_fib_table_cleanup(void);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index adf73586bc050..0dcbf166eb949 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -150,7 +150,7 @@ struct fib_result_nl {
 #endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
 
 struct fib_table {
-	unsigned char	tb_id;
+	u32		tb_id;
 	unsigned	tb_stamp;
 	int		(*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res);
 	int		(*tb_insert)(struct fib_table *table, struct rtmsg *r,
@@ -173,14 +173,14 @@ struct fib_table {
 extern struct fib_table *ip_fib_local_table;
 extern struct fib_table *ip_fib_main_table;
 
-static inline struct fib_table *fib_get_table(int id)
+static inline struct fib_table *fib_get_table(u32 id)
 {
 	if (id != RT_TABLE_LOCAL)
 		return ip_fib_main_table;
 	return ip_fib_local_table;
 }
 
-static inline struct fib_table *fib_new_table(int id)
+static inline struct fib_table *fib_new_table(u32 id)
 {
 	return fib_get_table(id);
 }
@@ -205,9 +205,9 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result
 
 extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
 extern int fib_lookup(struct flowi *flp, struct fib_result *res);
-extern struct fib_table *__fib_new_table(int id);
+extern struct fib_table *__fib_new_table(u32 id);
 
-static inline struct fib_table *fib_get_table(int id)
+static inline struct fib_table *fib_get_table(u32 id)
 {
 	if (id == 0)
 		id = RT_TABLE_MAIN;
@@ -215,7 +215,7 @@ static inline struct fib_table *fib_get_table(int id)
 	return fib_tables[id];
 }
 
-static inline struct fib_table *fib_new_table(int id)
+static inline struct fib_table *fib_new_table(u32 id)
 {
 	if (id == 0)
 		id = RT_TABLE_MAIN;
@@ -248,7 +248,7 @@ extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
 extern u32  __fib_res_prefsrc(struct fib_result *res);
 
 /* Exported by fib_hash.c */
-extern struct fib_table *fib_hash_init(int id);
+extern struct fib_table *fib_hash_init(u32 id);
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index ed5fb5c3eab59..7b3bf5c3d720f 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -534,8 +534,8 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int t;
-	int s_t;
+	u32 t;
+	u32 s_t;
 	struct dn_fib_table *tb;
 
 	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
@@ -765,7 +765,7 @@ void dn_fib_flush(void)
 {
         int flushed = 0;
         struct dn_fib_table *tb;
-        int id;
+        u32 id;
 
         for(id = RT_TABLE_MAX; id > 0; id--) {
                 if ((tb = dn_fib_get_table(id, 0)) == NULL)
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 2e01b67398c8a..1601ee5406a84 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -264,7 +264,7 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
 }
 
 static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-                        u8 tb_id, u8 type, u8 scope, void *dst, int dst_len,
+                        u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
                         struct dn_fib_info *fi, unsigned int flags)
 {
         struct rtmsg *rtm;
@@ -327,7 +327,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 }
 
 
-static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
+static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
                         struct nlmsghdr *nlh, struct netlink_skb_parms *req)
 {
         struct sk_buff *skb;
@@ -740,7 +740,7 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp,
 }
 
 
-struct dn_fib_table *dn_fib_get_table(int n, int create)
+struct dn_fib_table *dn_fib_get_table(u32 n, int create)
 {
         struct dn_fib_table *t;
 
@@ -777,7 +777,7 @@ struct dn_fib_table *dn_fib_get_table(int n, int create)
         return t;
 }
 
-static void dn_fib_del_tree(int n)
+static void dn_fib_del_tree(u32 n)
 {
 	struct dn_fib_table *t;
 
@@ -791,7 +791,7 @@ static void dn_fib_del_tree(int n)
 
 struct dn_fib_table *dn_fib_empty_table(void)
 {
-        int id;
+        u32 id;
 
         for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++)
                 if (dn_fib_tables[id] == NULL)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index a83f1aa8034e3..06f4b23f6f57d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -62,7 +62,7 @@ struct fib_table *ip_fib_main_table;
 
 struct fib_table *fib_tables[RT_TABLE_MAX+1];
 
-struct fib_table *__fib_new_table(int id)
+struct fib_table *__fib_new_table(u32 id)
 {
 	struct fib_table *tb;
 
@@ -82,7 +82,7 @@ static void fib_flush(void)
 	int flushed = 0;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_table *tb;
-	int id;
+	u32 id;
 
 	for (id = RT_TABLE_MAX; id>0; id--) {
 		if ((tb = fib_get_table(id))==NULL)
@@ -333,8 +333,8 @@ int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int t;
-	int s_t;
+	u32 t;
+	u32 s_t;
 	struct fib_table *tb;
 
 	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 72c633b357cf3..f8d5c8024ccb7 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -765,9 +765,9 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
 }
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(int id)
+struct fib_table * fib_hash_init(u32 id)
 #else
-struct fib_table * __init fib_hash_init(int id)
+struct fib_table * __init fib_hash_init(u32 id)
 #endif
 {
 	struct fib_table *tb;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ef6609ea0eb75..ddd52496b4511 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -30,11 +30,11 @@ extern struct fib_info *fib_create_info(const struct rtmsg *r,
 extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
 			struct kern_rta *rta, struct fib_info *fi);
 extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-			 u8 tb_id, u8 type, u8 scope, void *dst,
+			 u32 tb_id, u8 type, u8 scope, void *dst,
 			 int dst_len, u8 tos, struct fib_info *fi,
 			 unsigned int);
 extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
-		      int z, int tb_id,
+		      int z, u32 tb_id,
 		      struct nlmsghdr *n, struct netlink_skb_parms *req);
 extern struct fib_alias *fib_find_alias(struct list_head *fah,
 					u8 tos, u32 prio);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index d242e5291fccc..58fb91b00fdf4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -169,7 +169,7 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 
 static struct fib_table *fib_empty_table(void)
 {
-	int id;
+	u32 id;
 
 	for (id = 1; id <= RT_TABLE_MAX; id++)
 		if (fib_tables[id] == NULL)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 38bca473c7e29..c7a112b5a185a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -273,7 +273,7 @@ int ip_fib_check_default(u32 gw, struct net_device *dev)
 }
 
 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
-	       int z, int tb_id,
+	       int z, u32 tb_id,
 	       struct nlmsghdr *n, struct netlink_skb_parms *req)
 {
 	struct sk_buff *skb;
@@ -939,7 +939,7 @@ u32 __fib_res_prefsrc(struct fib_result *res)
 
 int
 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
+	      u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
 	      struct fib_info *fi, unsigned int flags)
 {
 	struct rtmsg *rtm;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 01801c0f885d1..4a27b2d573a39 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1148,7 +1148,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 
 	key = ntohl(key);
 
-	pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
+	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
 
 	mask = ntohl(inet_make_mask(plen));
 
@@ -1943,9 +1943,9 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
 /* Fix more generic FIB names for init later */
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(int id)
+struct fib_table * fib_hash_init(u32 id)
 #else
-struct fib_table * __init fib_hash_init(int id)
+struct fib_table * __init fib_hash_init(u32 id)
 #endif
 {
 	struct fib_table *tb;
-- 
GitLab


From 9e762a4a89b302cb3b26a1f9bb33eff459eaeca9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 10 Aug 2006 23:09:48 -0700
Subject: [PATCH 0444/1063] [NET]: Introduce RTA_TABLE/FRA_TABLE attributes

Introduce RTA_TABLE route attribute and FRA_TABLE routing rule attribute
to hold 32 bit routing table IDs. Usespace compatibility is provided by
continuing to accept and send the rtm_table field, but because of its
limited size it can only carry the low 8 bits of the table ID. This
implies that if larger IDs are used, _all_ userspace programs using them
need to use RTA_TABLE.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h |  4 ++++
 include/linux/rtnetlink.h |  8 ++++++++
 include/net/fib_rules.h   |  7 +++++++
 net/core/fib_rules.c      |  5 +++--
 net/decnet/dn_fib.c       |  7 ++++---
 net/decnet/dn_route.c     |  1 +
 net/decnet/dn_table.c     |  1 +
 net/ipv4/fib_frontend.c   |  7 ++++---
 net/ipv4/fib_rules.c      |  1 +
 net/ipv4/fib_semantics.c  |  1 +
 net/ipv4/route.c          |  1 +
 net/ipv6/fib6_rules.c     |  1 +
 net/ipv6/route.c          | 13 +++++++++----
 13 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 5e503f0ca6e46..19a82b6c1c1f5 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -36,6 +36,10 @@ enum
 	FRA_UNUSED5,
 	FRA_FWMARK,	/* netfilter mark (IPv4) */
 	FRA_FLOW,	/* flow/class id */
+	FRA_UNUSED6,
+	FRA_UNUSED7,
+	FRA_UNUSED8,
+	FRA_TABLE,	/* Extended table id */
 	__FRA_MAX
 };
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 0aaffa2ae666e..ea422a539a03c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -264,6 +264,7 @@ enum rtattr_type_t
 	RTA_CACHEINFO,
 	RTA_SESSION,
 	RTA_MP_ALGO,
+	RTA_TABLE,
 	__RTA_MAX
 };
 
@@ -717,6 +718,13 @@ extern void __rtnl_unlock(void);
 	} \
 } while(0)
 
+static inline u32 rtm_get_table(struct rtattr **rta, u8 table)
+{
+	return RTA_GET_U32(rta[RTA_TABLE-1]);
+rtattr_failure:
+	return table;
+}
+
 #endif /* __KERNEL__ */
 
 
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 61375d9e53f82..8e2f473d3e828 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -74,6 +74,13 @@ static inline void fib_rule_put(struct fib_rule *rule)
 		call_rcu(&rule->rcu, fib_rule_put_rcu);
 }
 
+static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
+{
+	if (nla[FRA_TABLE])
+		return nla_get_u32(nla[FRA_TABLE]);
+	return frh->table;
+}
+
 extern int			fib_rules_register(struct fib_rules_ops *);
 extern int			fib_rules_unregister(struct fib_rules_ops *);
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 6cdad24038e26..873b04d5df812 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -187,7 +187,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 	rule->action = frh->action;
 	rule->flags = frh->flags;
-	rule->table = frh->table;
+	rule->table = frh_get_table(frh, tb);
 
 	if (!rule->pref && ops->default_pref)
 		rule->pref = ops->default_pref();
@@ -245,7 +245,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		if (frh->action && (frh->action != rule->action))
 			continue;
 
-		if (frh->table && (frh->table != rule->table))
+		if (frh->table && (frh_get_table(frh, tb) != rule->table))
 			continue;
 
 		if (tb[FRA_PRIORITY] &&
@@ -291,6 +291,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 
 	frh = nlmsg_data(nlh);
 	frh->table = rule->table;
+	NLA_PUT_U32(skb, FRA_TABLE, rule->table);
 	frh->res1 = 0;
 	frh->res2 = 0;
 	frh->action = rule->action;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 7b3bf5c3d720f..fb596373daa89 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -491,7 +491,8 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
 		if (attr) {
 			if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2)
 				return -EINVAL;
-			if (i != RTA_MULTIPATH && i != RTA_METRICS)
+			if (i != RTA_MULTIPATH && i != RTA_METRICS &&
+			    i != RTA_TABLE)
 				rta[i-1] = (struct rtattr *)RTA_DATA(attr);
 		}
 	}
@@ -508,7 +509,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if (dn_fib_check_attr(r, rta))
 		return -EINVAL;
 
-	tb = dn_fib_get_table(r->rtm_table, 0);
+	tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0);
 	if (tb)
 		return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
 
@@ -524,7 +525,7 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if (dn_fib_check_attr(r, rta))
 		return -EINVAL;
 
-	tb = dn_fib_get_table(r->rtm_table, 1);
+	tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1);
 	if (tb) 
 		return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5e6f4616ca10a..4c963213fba51 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1486,6 +1486,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	r->rtm_src_len = 0;
 	r->rtm_tos = 0;
 	r->rtm_table = RT_TABLE_MAIN;
+	RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
 	r->rtm_type = rt->rt_type;
 	r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
 	r->rtm_scope = RT_SCOPE_UNIVERSE;
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 1601ee5406a84..eca7c1e10c809 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -278,6 +278,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
         rtm->rtm_src_len = 0;
         rtm->rtm_tos = 0;
         rtm->rtm_table = tb_id;
+	RTA_PUT_U32(skb, RTA_TABLE, tb_id);
         rtm->rtm_flags = fi->fib_flags;
         rtm->rtm_scope = scope;
 	rtm->rtm_type  = type;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 06f4b23f6f57d..2696ede52de25 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -294,7 +294,8 @@ static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
 		if (attr) {
 			if (RTA_PAYLOAD(attr) < 4)
 				return -EINVAL;
-			if (i != RTA_MULTIPATH && i != RTA_METRICS)
+			if (i != RTA_MULTIPATH && i != RTA_METRICS &&
+			    i != RTA_TABLE)
 				*rta = (struct rtattr*)RTA_DATA(attr);
 		}
 	}
@@ -310,7 +311,7 @@ int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (inet_check_attr(r, rta))
 		return -EINVAL;
 
-	tb = fib_get_table(r->rtm_table);
+	tb = fib_get_table(rtm_get_table(rta, r->rtm_table));
 	if (tb)
 		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
 	return -ESRCH;
@@ -325,7 +326,7 @@ int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (inet_check_attr(r, rta))
 		return -EINVAL;
 
-	tb = fib_new_table(r->rtm_table);
+	tb = fib_new_table(rtm_get_table(rta, r->rtm_table));
 	if (tb)
 		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
 	return -ENOBUFS;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 58fb91b00fdf4..0330b9cc4b584 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -184,6 +184,7 @@ static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
 	[FRA_DST]	= { .type = NLA_U32 },
 	[FRA_FWMARK]	= { .type = NLA_U32 },
 	[FRA_FLOW]	= { .type = NLA_U32 },
+	[FRA_TABLE]	= { .type = NLA_U32 },
 };
 
 static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c7a112b5a185a..ab753df20a39a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -953,6 +953,7 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 	rtm->rtm_src_len = 0;
 	rtm->rtm_tos = tos;
 	rtm->rtm_table = tb_id;
+	RTA_PUT_U32(skb, RTA_TABLE, tb_id);
 	rtm->rtm_type = type;
 	rtm->rtm_flags = fi->fib_flags;
 	rtm->rtm_scope = scope;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b873cbcdd0b8f..12128b82c9dcb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2652,6 +2652,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 	r->rtm_src_len	= 0;
 	r->rtm_tos	= rt->fl.fl4_tos;
 	r->rtm_table	= RT_TABLE_MAIN;
+	RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
 	r->rtm_type	= rt->rt_type;
 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
 	r->rtm_protocol = RTPROT_UNSPEC;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 22a2fdb098319..2c4fbc855e6c7 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -129,6 +129,7 @@ static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = {
 	[FRA_PRIORITY]	= { .type = NLA_U32 },
 	[FRA_SRC]	= { .minlen = sizeof(struct in6_addr) },
 	[FRA_DST]	= { .minlen = sizeof(struct in6_addr) },
+	[FRA_TABLE]	= { .type = NLA_U32 },
 };
 
 static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e08d84063c1fd..843c5509fcede 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1859,7 +1859,8 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
 		return -EINVAL;
-	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
+	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb),
+			     rtm_get_table(arg, r->rtm_table));
 }
 
 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -1869,7 +1870,8 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
 		return -EINVAL;
-	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
+	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb),
+			     rtm_get_table(arg, r->rtm_table));
 }
 
 struct rt6_rtnl_dump_arg
@@ -1887,6 +1889,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 	struct nlmsghdr  *nlh;
 	unsigned char	 *b = skb->tail;
 	struct rta_cacheinfo ci;
+	u32 table;
 
 	if (prefix) {	/* user wants prefix routes only */
 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -1902,9 +1905,11 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 	rtm->rtm_src_len = rt->rt6i_src.plen;
 	rtm->rtm_tos = 0;
 	if (rt->rt6i_table)
-		rtm->rtm_table = rt->rt6i_table->tb6_id;
+		table = rt->rt6i_table->tb6_id;
 	else
-		rtm->rtm_table = RT6_TABLE_UNSPEC;
+		table = RT6_TABLE_UNSPEC;
+	rtm->rtm_table = table;
+	RTA_PUT_U32(skb, RTA_TABLE, table);
 	if (rt->rt6i_flags&RTF_REJECT)
 		rtm->rtm_type = RTN_UNREACHABLE;
 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
-- 
GitLab


From 1af5a8c4a11cfed0c9a7f30fcfb689981750599c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 10 Aug 2006 23:10:46 -0700
Subject: [PATCH 0445/1063] [IPV4]: Increase number of possible routing tables
 to 2^32

Increase the number of possible routing tables to 2^32 by replacing the
fixed sized array of pointers by a hash table and replacing iterations
over all possible table IDs by hash table walking.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h    |  25 ++--------
 net/ipv4/fib_frontend.c | 102 ++++++++++++++++++++++++++--------------
 net/ipv4/fib_hash.c     |  26 +++++-----
 net/ipv4/fib_rules.c    |   4 +-
 net/ipv4/fib_trie.c     |  26 +++++-----
 5 files changed, 101 insertions(+), 82 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 0dcbf166eb949..8e9ba563d3422 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -150,6 +150,7 @@ struct fib_result_nl {
 #endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
 
 struct fib_table {
+	struct hlist_node tb_hlist;
 	u32		tb_id;
 	unsigned	tb_stamp;
 	int		(*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res);
@@ -200,29 +201,13 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result
 }
 
 #else /* CONFIG_IP_MULTIPLE_TABLES */
-#define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL])
-#define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
+#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL)
+#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN)
 
-extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
 extern int fib_lookup(struct flowi *flp, struct fib_result *res);
-extern struct fib_table *__fib_new_table(u32 id);
-
-static inline struct fib_table *fib_get_table(u32 id)
-{
-	if (id == 0)
-		id = RT_TABLE_MAIN;
-
-	return fib_tables[id];
-}
-
-static inline struct fib_table *fib_new_table(u32 id)
-{
-	if (id == 0)
-		id = RT_TABLE_MAIN;
-
-	return fib_tables[id] ? : __fib_new_table(id);
-}
 
+extern struct fib_table *fib_new_table(u32 id);
+extern struct fib_table *fib_get_table(u32 id);
 extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
 
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 2696ede52de25..ad4c14f968a13 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -37,6 +37,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
+#include <linux/list.h>
 
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -51,48 +52,67 @@
 
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
-#define RT_TABLE_MIN RT_TABLE_MAIN
-
 struct fib_table *ip_fib_local_table;
 struct fib_table *ip_fib_main_table;
 
-#else
+#define FIB_TABLE_HASHSZ 1
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
 
-#define RT_TABLE_MIN 1
+#else
 
-struct fib_table *fib_tables[RT_TABLE_MAX+1];
+#define FIB_TABLE_HASHSZ 256
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
 
-struct fib_table *__fib_new_table(u32 id)
+struct fib_table *fib_new_table(u32 id)
 {
 	struct fib_table *tb;
+	unsigned int h;
 
+	if (id == 0)
+		id = RT_TABLE_MAIN;
+	tb = fib_get_table(id);
+	if (tb)
+		return tb;
 	tb = fib_hash_init(id);
 	if (!tb)
 		return NULL;
-	fib_tables[id] = tb;
+	h = id & (FIB_TABLE_HASHSZ - 1);
+	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
 	return tb;
 }
 
+struct fib_table *fib_get_table(u32 id)
+{
+	struct fib_table *tb;
+	struct hlist_node *node;
+	unsigned int h;
 
+	if (id == 0)
+		id = RT_TABLE_MAIN;
+	h = id & (FIB_TABLE_HASHSZ - 1);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
+		if (tb->tb_id == id) {
+			rcu_read_unlock();
+			return tb;
+		}
+	}
+	rcu_read_unlock();
+	return NULL;
+}
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
-
 static void fib_flush(void)
 {
 	int flushed = 0;
-#ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_table *tb;
-	u32 id;
+	struct hlist_node *node;
+	unsigned int h;
 
-	for (id = RT_TABLE_MAX; id>0; id--) {
-		if ((tb = fib_get_table(id))==NULL)
-			continue;
-		flushed += tb->tb_flush(tb);
+	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
+			flushed += tb->tb_flush(tb);
 	}
-#else /* CONFIG_IP_MULTIPLE_TABLES */
-	flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
-	flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
-#endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 	if (flushed)
 		rt_cache_flush(-1);
@@ -334,29 +354,37 @@ int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	u32 t;
-	u32 s_t;
+	unsigned int h, s_h;
+	unsigned int e = 0, s_e;
 	struct fib_table *tb;
+	struct hlist_node *node;
+	int dumped = 0;
 
 	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
 	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
 		return ip_rt_dump(skb, cb);
 
-	s_t = cb->args[0];
-	if (s_t == 0)
-		s_t = cb->args[0] = RT_TABLE_MIN;
-
-	for (t=s_t; t<=RT_TABLE_MAX; t++) {
-		if (t < s_t) continue;
-		if (t > s_t)
-			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
-		if ((tb = fib_get_table(t))==NULL)
-			continue;
-		if (tb->tb_dump(tb, skb, cb) < 0) 
-			break;
+	s_h = cb->args[0];
+	s_e = cb->args[1];
+
+	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
+		e = 0;
+		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
+			if (e < s_e)
+				goto next;
+			if (dumped)
+				memset(&cb->args[2], 0, sizeof(cb->args) -
+				                 2 * sizeof(cb->args[0]));
+			if (tb->tb_dump(tb, skb, cb) < 0)
+				goto out;
+			dumped = 1;
+next:
+			e++;
+		}
 	}
-
-	cb->args[0] = t;
+out:
+	cb->args[1] = e;
+	cb->args[0] = h;
 
 	return skb->len;
 }
@@ -654,9 +682,15 @@ static struct notifier_block fib_netdev_notifier = {
 
 void __init ip_fib_init(void)
 {
+	unsigned int i;
+
+	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
+		INIT_HLIST_HEAD(&fib_table_hash[i]);
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
+	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
 #else
 	fib4_rules_init();
 #endif
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index f8d5c8024ccb7..b5bee1a71e5cb 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -684,7 +684,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
 	struct fib_node *f;
 	int i, s_i;
 
-	s_i = cb->args[3];
+	s_i = cb->args[4];
 	i = 0;
 	hlist_for_each_entry(f, node, head, fn_hash) {
 		struct fib_alias *fa;
@@ -704,14 +704,14 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
 					  fa->fa_tos,
 					  fa->fa_info,
 					  NLM_F_MULTI) < 0) {
-				cb->args[3] = i;
+				cb->args[4] = i;
 				return -1;
 			}
 		next:
 			i++;
 		}
 	}
-	cb->args[3] = i;
+	cb->args[4] = i;
 	return skb->len;
 }
 
@@ -722,21 +722,21 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
 {
 	int h, s_h;
 
-	s_h = cb->args[2];
+	s_h = cb->args[3];
 	for (h=0; h < fz->fz_divisor; h++) {
 		if (h < s_h) continue;
 		if (h > s_h)
-			memset(&cb->args[3], 0,
-			       sizeof(cb->args) - 3*sizeof(cb->args[0]));
+			memset(&cb->args[4], 0,
+			       sizeof(cb->args) - 4*sizeof(cb->args[0]));
 		if (fz->fz_hash == NULL ||
 		    hlist_empty(&fz->fz_hash[h]))
 			continue;
 		if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) {
-			cb->args[2] = h;
+			cb->args[3] = h;
 			return -1;
 		}
 	}
-	cb->args[2] = h;
+	cb->args[3] = h;
 	return skb->len;
 }
 
@@ -746,21 +746,21 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
 	struct fn_zone *fz;
 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
 
-	s_m = cb->args[1];
+	s_m = cb->args[2];
 	read_lock(&fib_hash_lock);
 	for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
 		if (m < s_m) continue;
 		if (m > s_m)
-			memset(&cb->args[2], 0,
-			       sizeof(cb->args) - 2*sizeof(cb->args[0]));
+			memset(&cb->args[3], 0,
+			       sizeof(cb->args) - 3*sizeof(cb->args[0]));
 		if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
-			cb->args[1] = m;
+			cb->args[2] = m;
 			read_unlock(&fib_hash_lock);
 			return -1;
 		}
 	}
 	read_unlock(&fib_hash_lock);
-	cb->args[1] = m;
+	cb->args[2] = m;
 	return skb->len;
 }
 
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 0330b9cc4b584..ce185ac6f2606 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -172,8 +172,8 @@ static struct fib_table *fib_empty_table(void)
 	u32 id;
 
 	for (id = 1; id <= RT_TABLE_MAX; id++)
-		if (fib_tables[id] == NULL)
-			return __fib_new_table(id);
+		if (fib_get_table(id) == NULL)
+			return fib_new_table(id);
 	return NULL;
 }
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4a27b2d573a39..2a580eb2579bd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1848,7 +1848,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
 
 	u32 xkey = htonl(key);
 
-	s_i = cb->args[3];
+	s_i = cb->args[4];
 	i = 0;
 
 	/* rcu_read_lock is hold by caller */
@@ -1870,12 +1870,12 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
 				  plen,
 				  fa->fa_tos,
 				  fa->fa_info, 0) < 0) {
-			cb->args[3] = i;
+			cb->args[4] = i;
 			return -1;
 		}
 		i++;
 	}
-	cb->args[3] = i;
+	cb->args[4] = i;
 	return skb->len;
 }
 
@@ -1886,14 +1886,14 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
 	struct list_head *fa_head;
 	struct leaf *l = NULL;
 
-	s_h = cb->args[2];
+	s_h = cb->args[3];
 
 	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
-			memset(&cb->args[3], 0,
-			       sizeof(cb->args) - 3*sizeof(cb->args[0]));
+			memset(&cb->args[4], 0,
+			       sizeof(cb->args) - 4*sizeof(cb->args[0]));
 
 		fa_head = get_fa_head(l, plen);
 
@@ -1904,11 +1904,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
 			continue;
 
 		if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
-			cb->args[2] = h;
+			cb->args[3] = h;
 			return -1;
 		}
 	}
-	cb->args[2] = h;
+	cb->args[3] = h;
 	return skb->len;
 }
 
@@ -1917,23 +1917,23 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
 	int m, s_m;
 	struct trie *t = (struct trie *) tb->tb_data;
 
-	s_m = cb->args[1];
+	s_m = cb->args[2];
 
 	rcu_read_lock();
 	for (m = 0; m <= 32; m++) {
 		if (m < s_m)
 			continue;
 		if (m > s_m)
-			memset(&cb->args[2], 0,
-				sizeof(cb->args) - 2*sizeof(cb->args[0]));
+			memset(&cb->args[3], 0,
+				sizeof(cb->args) - 3*sizeof(cb->args[0]));
 
 		if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
-			cb->args[1] = m;
+			cb->args[2] = m;
 			goto out;
 		}
 	}
 	rcu_read_unlock();
-	cb->args[1] = m;
+	cb->args[2] = m;
 	return skb->len;
 out:
 	rcu_read_unlock();
-- 
GitLab


From 1b43af5480c351dbcb2eef478bafe179cbeb6e83 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 10 Aug 2006 23:11:17 -0700
Subject: [PATCH 0446/1063] [IPV6]: Increase number of possible routing tables
 to 2^32

Increase number of possible routing tables to 2^32 by replacing iterations
over all possible table IDs by hash table walking.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |   7 ++
 net/ipv6/ip6_fib.c      | 171 +++++++++++++++++++++++++++++++++++-----
 net/ipv6/route.c        | 128 +-----------------------------
 3 files changed, 159 insertions(+), 147 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9bfa3cc6cedb9..01bfe404784f5 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -137,6 +137,13 @@ extern int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
 extern int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet6_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 
+struct rt6_rtnl_dump_arg
+{
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+};
+
+extern int rt6_dump_route(struct rt6_info *rt, void *p_arg);
 extern void rt6_ifdown(struct net_device *dev);
 extern void rt6_mtu_change(struct net_device *dev, unsigned mtu);
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1f2316187ca40..bececbe9dd2cc 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -158,7 +158,26 @@ static struct fib6_table fib6_main_tbl = {
 };
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#define FIB_TABLE_HASHSZ 256
+#else
+#define FIB_TABLE_HASHSZ 1
+#endif
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+
+static void fib6_link_table(struct fib6_table *tb)
+{
+	unsigned int h;
+
+	h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
 
+	/*
+	 * No protection necessary, this is the only list mutatation
+	 * operation, tables never disappear once they exist.
+	 */
+	hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
+}
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 static struct fib6_table fib6_local_tbl = {
 	.tb6_id		= RT6_TABLE_LOCAL,
 	.tb6_lock	= RW_LOCK_UNLOCKED,
@@ -168,9 +187,6 @@ static struct fib6_table fib6_local_tbl = {
 	},
 };
 
-#define FIB_TABLE_HASHSZ 256
-static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
-
 static struct fib6_table *fib6_alloc_table(u32 id)
 {
 	struct fib6_table *table;
@@ -186,19 +202,6 @@ static struct fib6_table *fib6_alloc_table(u32 id)
 	return table;
 }
 
-static void fib6_link_table(struct fib6_table *tb)
-{
-	unsigned int h;
-
-	h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
-
-	/*
-	 * No protection necessary, this is the only list mutatation
-	 * operation, tables never disappear once they exist.
-	 */
-	hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
-}
-
 struct fib6_table *fib6_new_table(u32 id)
 {
 	struct fib6_table *tb;
@@ -263,10 +266,135 @@ struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
 
 static void __init fib6_tables_init(void)
 {
+	fib6_link_table(&fib6_main_tbl);
 }
 
 #endif
 
+static int fib6_dump_node(struct fib6_walker_t *w)
+{
+	int res;
+	struct rt6_info *rt;
+
+	for (rt = w->leaf; rt; rt = rt->u.next) {
+		res = rt6_dump_route(rt, w->args);
+		if (res < 0) {
+			/* Frame is full, suspend walking */
+			w->leaf = rt;
+			return 1;
+		}
+		BUG_TRAP(res!=0);
+	}
+	w->leaf = NULL;
+	return 0;
+}
+
+static void fib6_dump_end(struct netlink_callback *cb)
+{
+	struct fib6_walker_t *w = (void*)cb->args[2];
+
+	if (w) {
+		cb->args[2] = 0;
+		kfree(w);
+	}
+	cb->done = (void*)cb->args[3];
+	cb->args[1] = 3;
+}
+
+static int fib6_dump_done(struct netlink_callback *cb)
+{
+	fib6_dump_end(cb);
+	return cb->done ? cb->done(cb) : 0;
+}
+
+static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
+			   struct netlink_callback *cb)
+{
+	struct fib6_walker_t *w;
+	int res;
+
+	w = (void *)cb->args[2];
+	w->root = &table->tb6_root;
+
+	if (cb->args[4] == 0) {
+		read_lock_bh(&table->tb6_lock);
+		res = fib6_walk(w);
+		read_unlock_bh(&table->tb6_lock);
+		if (res > 0)
+			cb->args[4] = 1;
+	} else {
+		read_lock_bh(&table->tb6_lock);
+		res = fib6_walk_continue(w);
+		read_unlock_bh(&table->tb6_lock);
+		if (res != 0) {
+			if (res < 0)
+				fib6_walker_unlink(w);
+			goto end;
+		}
+		fib6_walker_unlink(w);
+		cb->args[4] = 0;
+	}
+end:
+	return res;
+}
+
+int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	unsigned int h, s_h;
+	unsigned int e = 0, s_e;
+	struct rt6_rtnl_dump_arg arg;
+	struct fib6_walker_t *w;
+	struct fib6_table *tb;
+	struct hlist_node *node;
+	int res = 0;
+
+	s_h = cb->args[0];
+	s_e = cb->args[1];
+
+	w = (void *)cb->args[2];
+	if (w == NULL) {
+		/* New dump:
+		 *
+		 * 1. hook callback destructor.
+		 */
+		cb->args[3] = (long)cb->done;
+		cb->done = fib6_dump_done;
+
+		/*
+		 * 2. allocate and initialize walker.
+		 */
+		w = kzalloc(sizeof(*w), GFP_ATOMIC);
+		if (w == NULL)
+			return -ENOMEM;
+		w->func = fib6_dump_node;
+		cb->args[2] = (long)w;
+	}
+
+	arg.skb = skb;
+	arg.cb = cb;
+	w->args = &arg;
+
+	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
+		e = 0;
+		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) {
+			if (e < s_e)
+				goto next;
+			res = fib6_dump_table(tb, skb, cb);
+			if (res != 0)
+				goto out;
+next:
+			e++;
+		}
+	}
+out:
+	cb->args[1] = e;
+	cb->args[0] = h;
+
+	res = res < 0 ? res : skb->len;
+	if (res <= 0)
+		fib6_dump_end(cb);
+	return res;
+}
 
 /*
  *	Routing Table
@@ -1187,17 +1315,20 @@ static void fib6_clean_tree(struct fib6_node *root,
 void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
 		    int prune, void *arg)
 {
-	int i;
 	struct fib6_table *table;
+	struct hlist_node *node;
+	unsigned int h;
 
-	for (i = FIB6_TABLE_MIN; i <= FIB6_TABLE_MAX; i++) {
-		table = fib6_get_table(i);
-		if (table != NULL) {
+	rcu_read_lock();
+	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry_rcu(table, node, &fib_table_hash[h],
+					 tb6_hlist) {
 			write_lock_bh(&table->tb6_lock);
 			fib6_clean_tree(&table->tb6_root, func, prune, arg);
 			write_unlock_bh(&table->tb6_lock);
 		}
 	}
+	rcu_read_unlock();
 }
 
 static int fib6_prune_clone(struct rt6_info *rt, void *arg)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 843c5509fcede..9ce28277f47fd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1874,12 +1874,6 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 			     rtm_get_table(arg, r->rtm_table));
 }
 
-struct rt6_rtnl_dump_arg
-{
-	struct sk_buff *skb;
-	struct netlink_callback *cb;
-};
-
 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 			 struct in6_addr *dst, struct in6_addr *src,
 			 int iif, int type, u32 pid, u32 seq,
@@ -1976,7 +1970,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 	return -1;
 }
 
-static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 {
 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
 	int prefix;
@@ -1992,126 +1986,6 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 		     prefix, NLM_F_MULTI);
 }
 
-static int fib6_dump_node(struct fib6_walker_t *w)
-{
-	int res;
-	struct rt6_info *rt;
-
-	for (rt = w->leaf; rt; rt = rt->u.next) {
-		res = rt6_dump_route(rt, w->args);
-		if (res < 0) {
-			/* Frame is full, suspend walking */
-			w->leaf = rt;
-			return 1;
-		}
-		BUG_TRAP(res!=0);
-	}
-	w->leaf = NULL;
-	return 0;
-}
-
-static void fib6_dump_end(struct netlink_callback *cb)
-{
-	struct fib6_walker_t *w = (void*)cb->args[0];
-
-	if (w) {
-		cb->args[0] = 0;
-		kfree(w);
-	}
-	cb->done = (void*)cb->args[1];
-	cb->args[1] = 0;
-}
-
-static int fib6_dump_done(struct netlink_callback *cb)
-{
-	fib6_dump_end(cb);
-	return cb->done ? cb->done(cb) : 0;
-}
-
-int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct fib6_table *table;
-	struct rt6_rtnl_dump_arg arg;
-	struct fib6_walker_t *w;
-	int i, res = 0;
-
-	arg.skb = skb;
-	arg.cb = cb;
-
-	/*
-	 * cb->args[0] = pointer to walker structure
-	 * cb->args[1] = saved cb->done() pointer
-	 * cb->args[2] = current table being dumped
-	 */
-
-	w = (void*)cb->args[0];
-	if (w == NULL) {
-		/* New dump:
-		 * 
-		 * 1. hook callback destructor.
-		 */
-		cb->args[1] = (long)cb->done;
-		cb->done = fib6_dump_done;
-
-		/*
-		 * 2. allocate and initialize walker.
-		 */
-		w = kzalloc(sizeof(*w), GFP_ATOMIC);
-		if (w == NULL)
-			return -ENOMEM;
-		w->func = fib6_dump_node;
-		w->args = &arg;
-		cb->args[0] = (long)w;
-		cb->args[2] = FIB6_TABLE_MIN;
-	} else {
-		w->args = &arg;
-		i = cb->args[2];
-		if (i > FIB6_TABLE_MAX)
-			goto end;
-
-		table = fib6_get_table(i);
-		if (table != NULL) {
-			read_lock_bh(&table->tb6_lock);
-			w->root = &table->tb6_root;
-			res = fib6_walk_continue(w);
-			read_unlock_bh(&table->tb6_lock);
-			if (res != 0) {
-				if (res < 0)
-					fib6_walker_unlink(w);
-				goto end;
-			}
-		}
-
-		fib6_walker_unlink(w);
-		cb->args[2] = ++i;
-	}
-
-	for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) {
-		table = fib6_get_table(i);
-		if (table == NULL)
-			continue;
-
-		read_lock_bh(&table->tb6_lock);
-		w->root = &table->tb6_root;
-		res = fib6_walk(w);
-		read_unlock_bh(&table->tb6_lock);
-		if (res)
-			break;
-	}
-end:
-	cb->args[2] = i;
-
-	res = res < 0 ? res : skb->len;
-	/* res < 0 is an error. (really, impossible)
-	   res == 0 means that dump is complete, but skb still can contain data.
-	   res > 0 dump is not complete, but frame is full.
-	 */
-	/* Destroy walker, if dump of this table is complete. */
-	if (res <= 0)
-		fib6_dump_end(cb);
-	return res;
-}
-
 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct rtattr **rta = arg;
-- 
GitLab


From abcab268303c22d24fc89fedd35d82271d20f5da Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 10 Aug 2006 23:11:47 -0700
Subject: [PATCH 0447/1063] [DECNET]: Increase number of possible routing
 tables to 2^32

Increase the number of possible routing tables to 2^32 by replacing the
fixed sized array of pointers by a hash table and replacing iterations
over all possible table IDs by hash table walking.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn_fib.h  |   3 +-
 net/decnet/dn_fib.c   |  49 -----------------
 net/decnet/dn_rules.c |   2 +-
 net/decnet/dn_table.c | 125 ++++++++++++++++++++++++++++++------------
 4 files changed, 93 insertions(+), 86 deletions(-)

diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
index cd9c3782f8384..d97aa10c463fa 100644
--- a/include/net/dn_fib.h
+++ b/include/net/dn_fib.h
@@ -94,6 +94,7 @@ struct dn_fib_node {
 
 
 struct dn_fib_table {
+	struct hlist_node hlist;
 	u32 n;
 
 	int (*insert)(struct dn_fib_table *t, struct rtmsg *r, 
@@ -177,8 +178,6 @@ static inline void dn_fib_res_put(struct dn_fib_res *res)
 		fib_rule_put(res->r);
 }
 
-extern struct dn_fib_table *dn_fib_tables[];
-
 #else /* Endnode */
 
 #define dn_fib_init()  do { } while(0)
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index fb596373daa89..5ccca3ed53bd7 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -532,39 +532,6 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	return -ENOBUFS;
 }
 
-
-int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	u32 t;
-	u32 s_t;
-	struct dn_fib_table *tb;
-
-	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
-		((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
-			return dn_cache_dump(skb, cb);
-
-	s_t = cb->args[0];
-	if (s_t == 0)
-		s_t = cb->args[0] = RT_MIN_TABLE;
-
-	for(t = s_t; t <= RT_TABLE_MAX; t++) {
-		if (t < s_t)
-			continue;
-		if (t > s_t)
-			memset(&cb->args[1], 0,
-			       sizeof(cb->args) - sizeof(cb->args[0]));
-		tb = dn_fib_get_table(t, 0);
-		if (tb == NULL)
-			continue;
-		if (tb->dump(tb, skb, cb) < 0)
-			break;
-	}
-
-	cb->args[0] = t;
-
-	return skb->len;
-}
-
 static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
 {
 	struct dn_fib_table *tb;
@@ -762,22 +729,6 @@ int dn_fib_sync_up(struct net_device *dev)
         return ret;
 }
 
-void dn_fib_flush(void)
-{
-        int flushed = 0;
-        struct dn_fib_table *tb;
-        u32 id;
-
-        for(id = RT_TABLE_MAX; id > 0; id--) {
-                if ((tb = dn_fib_get_table(id, 0)) == NULL)
-                        continue;
-                flushed += tb->flush(tb);
-        }
-
-        if (flushed)
-                dn_rt_cache_flush(-1);
-}
-
 static struct notifier_block dn_fib_dnaddr_notifier = {
 	.notifier_call = dn_fib_dnaddr_event,
 };
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 096f1273e714f..878312ff34ec7 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -210,7 +210,7 @@ unsigned dnet_addr_type(__le16 addr)
 	struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } };
 	struct dn_fib_res res;
 	unsigned ret = RTN_UNICAST;
-	struct dn_fib_table *tb = dn_fib_tables[RT_TABLE_LOCAL];
+	struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
 
 	res.r = NULL;
 
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index eca7c1e10c809..10e87262b6fb9 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -75,9 +75,9 @@ for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
 for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
 
 #define RT_TABLE_MIN 1
-
+#define DN_FIB_TABLE_HASHSZ 256
+static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ];
 static DEFINE_RWLOCK(dn_fib_tables_lock);
-struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1];
 
 static kmem_cache_t *dn_hash_kmem __read_mostly;
 static int dn_fib_hash_zombies;
@@ -361,7 +361,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
 {
 	int i, s_i;
 
-	s_i = cb->args[3];
+	s_i = cb->args[4];
 	for(i = 0; f; i++, f = f->fn_next) {
 		if (i < s_i)
 			continue;
@@ -374,11 +374,11 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
 				(f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
 				f->fn_scope, &f->fn_key, dz->dz_order, 
 				f->fn_info, NLM_F_MULTI) < 0) {
-			cb->args[3] = i;
+			cb->args[4] = i;
 			return -1;
 		}
 	}
-	cb->args[3] = i;
+	cb->args[4] = i;
 	return skb->len;
 }
 
@@ -389,20 +389,20 @@ static __inline__ int dn_hash_dump_zone(struct sk_buff *skb,
 {
 	int h, s_h;
 
-	s_h = cb->args[2];
+	s_h = cb->args[3];
 	for(h = 0; h < dz->dz_divisor; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
-			memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
+			memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0]));
 		if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL)
 			continue;
 		if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) {
-			cb->args[2] = h;
+			cb->args[3] = h;
 			return -1;
 		}
 	}
-	cb->args[2] = h;
+	cb->args[3] = h;
 	return skb->len;
 }
 
@@ -413,26 +413,63 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb,
 	struct dn_zone *dz;
 	struct dn_hash *table = (struct dn_hash *)tb->data;
 
-	s_m = cb->args[1];
+	s_m = cb->args[2];
 	read_lock(&dn_fib_tables_lock);
 	for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) {
 		if (m < s_m)
 			continue;
 		if (m > s_m)
-			memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
+			memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
 
 		if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) {
-			cb->args[1] = m;
+			cb->args[2] = m;
 			read_unlock(&dn_fib_tables_lock);
 			return -1;
 		}
 	}
 	read_unlock(&dn_fib_tables_lock);
-	cb->args[1] = m;
+	cb->args[2] = m;
 
         return skb->len;
 }
 
+int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	unsigned int h, s_h;
+	unsigned int e = 0, s_e;
+	struct dn_fib_table *tb;
+	struct hlist_node *node;
+	int dumped = 0;
+
+	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
+		((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+			return dn_cache_dump(skb, cb);
+
+	s_h = cb->args[0];
+	s_e = cb->args[1];
+
+	for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) {
+		e = 0;
+		hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) {
+			if (e < s_e)
+				goto next;
+			if (dumped)
+				memset(&cb->args[2], 0, sizeof(cb->args) -
+				                 2 * sizeof(cb->args[0]));
+			if (tb->dump(tb, skb, cb) < 0)
+				goto out;
+			dumped = 1;
+next:
+			e++;
+		}
+	}
+out:
+	cb->args[1] = e;
+	cb->args[0] = h;
+
+	return skb->len;
+}
+
 static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req)
 {
 	struct dn_hash *table = (struct dn_hash *)tb->data;
@@ -744,6 +781,8 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp,
 struct dn_fib_table *dn_fib_get_table(u32 n, int create)
 {
         struct dn_fib_table *t;
+	struct hlist_node *node;
+	unsigned int h;
 
         if (n < RT_TABLE_MIN)
                 return NULL;
@@ -751,8 +790,15 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create)
         if (n > RT_TABLE_MAX)
                 return NULL;
 
-        if (dn_fib_tables[n]) 
-                return dn_fib_tables[n];
+	h = n & (DN_FIB_TABLE_HASHSZ - 1);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) {
+		if (t->n == n) {
+			rcu_read_unlock();
+			return t;
+		}
+	}
+	rcu_read_unlock();
 
         if (!create)
                 return NULL;
@@ -773,33 +819,37 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create)
         t->flush  = dn_fib_table_flush;
         t->dump = dn_fib_table_dump;
 	memset(t->data, 0, sizeof(struct dn_hash));
-        dn_fib_tables[n] = t;
+	hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]);
 
         return t;
 }
 
-static void dn_fib_del_tree(u32 n)
-{
-	struct dn_fib_table *t;
-
-	write_lock(&dn_fib_tables_lock);
-	t = dn_fib_tables[n];
-	dn_fib_tables[n] = NULL;
-	write_unlock(&dn_fib_tables_lock);
-
-	kfree(t);
-}
-
 struct dn_fib_table *dn_fib_empty_table(void)
 {
         u32 id;
 
         for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++)
-                if (dn_fib_tables[id] == NULL)
+		if (dn_fib_get_table(id, 0) == NULL)
                         return dn_fib_get_table(id, 1);
         return NULL;
 }
 
+void dn_fib_flush(void)
+{
+        int flushed = 0;
+        struct dn_fib_table *tb;
+	struct hlist_node *node;
+	unsigned int h;
+
+	for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist)
+	                flushed += tb->flush(tb);
+        }
+
+        if (flushed)
+                dn_rt_cache_flush(-1);
+}
+
 void __init dn_fib_table_init(void)
 {
 	dn_hash_kmem = kmem_cache_create("dn_fib_info_cache",
@@ -810,10 +860,17 @@ void __init dn_fib_table_init(void)
 
 void __exit dn_fib_table_cleanup(void)
 {
-	int i;
-
-	for (i = RT_TABLE_MIN; i <= RT_TABLE_MAX; ++i)
-		dn_fib_del_tree(i);
+	struct dn_fib_table *t;
+	struct hlist_node *node, *next;
+	unsigned int h;
 
-	return;
+	write_lock(&dn_fib_tables_lock);
+	for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h],
+		                          hlist) {
+			hlist_del(&t->hlist);
+			kfree(t);
+		}
+	}
+	write_unlock(&dn_fib_tables_lock);
 }
-- 
GitLab


From b801f54917b7c6e8540f877ee562cd0725e62ebd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 10 Aug 2006 23:12:34 -0700
Subject: [PATCH 0448/1063] [NET]: Increate RT_TABLE_MAX to 2^32

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index ea422a539a03c..7e4aa48680a73 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -239,10 +239,8 @@ enum rt_class_t
 	RT_TABLE_DEFAULT=253,
 	RT_TABLE_MAIN=254,
 	RT_TABLE_LOCAL=255,
-	__RT_TABLE_MAX
+	RT_TABLE_MAX=0xFFFFFFFF
 };
-#define RT_TABLE_MAX (__RT_TABLE_MAX - 1)
-
 
 
 /* Routing message attributes */
-- 
GitLab


From 3bf72957d2a553c343e4285463ef0a88139bdec4 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 10 Aug 2006 23:31:08 -0700
Subject: [PATCH 0449/1063] [HTB]: Remove broken debug code.

The HTB network scheduler had debug code that wouldn't compile
and confused and obfuscated the code, remove it.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 302 +++++---------------------------------------
 1 file changed, 34 insertions(+), 268 deletions(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 880a3394a51fb..73094e7f41691 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -70,7 +70,6 @@
 
 #define HTB_HSIZE 16	/* classid hash size */
 #define HTB_EWMAC 2	/* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#undef HTB_DEBUG	/* compile debugging support (activated by tc tool) */
 #define HTB_RATECM 1    /* whether to use rate computer */
 #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
 #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
@@ -81,51 +80,6 @@
 #error "Mismatched sch_htb.c and pkt_sch.h"
 #endif
 
-/* debugging support; S is subsystem, these are defined:
-  0 - netlink messages
-  1 - enqueue
-  2 - drop & requeue
-  3 - dequeue main
-  4 - dequeue one prio DRR part
-  5 - dequeue class accounting
-  6 - class overlimit status computation
-  7 - hint tree
-  8 - event queue
- 10 - rate estimator
- 11 - classifier 
- 12 - fast dequeue cache
-
- L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full
- q->debug uint32 contains 16 2-bit fields one for subsystem starting
- from LSB
- */
-#ifdef HTB_DEBUG
-#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
-#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
-	printk(KERN_DEBUG FMT,##ARG)
-#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
-#define HTB_PASSQ q,
-#define HTB_ARGQ struct htb_sched *q,
-#define static
-#undef __inline__
-#define __inline__
-#undef inline
-#define inline
-#define HTB_CMAGIC 0xFEFAFEF1
-#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
-		if ((N)->rb_color == -1) break; \
-		rb_erase(N,R); \
-		(N)->rb_color = -1; } while (0)
-#else
-#define HTB_DBG_COND(S,L) (0)
-#define HTB_DBG(S,L,FMT,ARG...)
-#define HTB_PASSQ
-#define HTB_ARGQ
-#define HTB_CHCL(cl)
-#define htb_safe_rb_erase(N,R) rb_erase(N,R)
-#endif
-
-
 /* used internaly to keep status of single class */
 enum htb_cmode {
     HTB_CANT_SEND,		/* class can't send and can't borrow */
@@ -136,9 +90,6 @@ enum htb_cmode {
 /* interior & leaf nodes; props specific to leaves are marked L: */
 struct htb_class
 {
-#ifdef HTB_DEBUG
-	unsigned magic;
-#endif
     /* general class parameters */
     u32 classid;
     struct gnet_stats_basic bstats;
@@ -238,7 +189,6 @@ struct htb_sched
     int nwc_hit;	/* this to disable mindelay complaint in dequeue */
 
     int defcls;		/* class where unclassified flows go to */
-    u32 debug;		/* subsystem debug levels */
 
     /* filters for qdisc itself */
     struct tcf_proto *filter_list;
@@ -354,75 +304,21 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
 	return cl;
 }
 
-#ifdef HTB_DEBUG
-static void htb_next_rb_node(struct rb_node **n);
-#define HTB_DUMTREE(root,memb) if(root) { \
-	struct rb_node *n = (root)->rb_node; \
-	while (n->rb_left) n = n->rb_left; \
-	while (n) { \
-		struct htb_class *cl = rb_entry(n, struct htb_class, memb); \
-		printk(" %x",cl->classid); htb_next_rb_node (&n); \
-	} }
-
-static void htb_debug_dump (struct htb_sched *q)
-{
-	int i,p;
-	printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
-	/* rows */
-	for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
-		printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
-		for (p=0;p<TC_HTB_NUMPRIO;p++) {
-			if (!q->row[i][p].rb_node) continue;
-			printk(" p%d:",p);
-			HTB_DUMTREE(q->row[i]+p,node[p]);
-		}
-		printk("\n");
-	}
-	/* classes */
-	for (i = 0; i < HTB_HSIZE; i++) {
-		struct list_head *l;
-		list_for_each (l,q->hash+i) {
-			struct htb_class *cl = list_entry(l,struct htb_class,hlist);
-			long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-			printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d "
-					"pa=%x f:",
-				cl->classid,cl->cmode,cl->tokens,cl->ctokens,
-				cl->pq_node.rb_color==-1?0:cl->pq_key,diff,
-				cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity);
-			if (cl->level)
-			for (p=0;p<TC_HTB_NUMPRIO;p++) {
-				if (!cl->un.inner.feed[p].rb_node) continue;
-				printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0);
-				HTB_DUMTREE(cl->un.inner.feed+p,node[p]);
-			}
-			printk("\n");
-		}
-	}
-}
-#endif
 /**
  * htb_add_to_id_tree - adds class to the round robin list
  *
  * Routine adds class to the list (actually tree) sorted by classid.
  * Make sure that class is not already on such list for given prio.
  */
-static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
+static void htb_add_to_id_tree (struct rb_root *root,
 		struct htb_class *cl,int prio)
 {
 	struct rb_node **p = &root->rb_node, *parent = NULL;
-	HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio);
-#ifdef HTB_DEBUG
-	if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; }
-	HTB_CHCL(cl);
-	if (*p) {
-		struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]);
-		HTB_CHCL(x);
-	}
-#endif
+
 	while (*p) {
 		struct htb_class *c; parent = *p;
 		c = rb_entry(parent, struct htb_class, node[prio]);
-		HTB_CHCL(c);
+
 		if (cl->classid > c->classid)
 			p = &parent->rb_right;
 		else 
@@ -440,16 +336,10 @@ static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
  * already in the queue.
  */
 static void htb_add_to_wait_tree (struct htb_sched *q,
-		struct htb_class *cl,long delay,int debug_hint)
+				  struct htb_class *cl,long delay)
 {
 	struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
-	HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key);
-#ifdef HTB_DEBUG
-	if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; }
-	HTB_CHCL(cl);
-	if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
-		printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
-#endif
+
 	cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
 	if (cl->pq_key == q->jiffies)
 		cl->pq_key++;
@@ -490,14 +380,11 @@ static void htb_next_rb_node(struct rb_node **n)
 static inline void htb_add_class_to_row(struct htb_sched *q, 
 		struct htb_class *cl,int mask)
 {
-	HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n",
-			cl->classid,mask,q->row_mask[cl->level]);
-	HTB_CHCL(cl);
 	q->row_mask[cl->level] |= mask;
 	while (mask) {
 		int prio = ffz(~mask);
 		mask &= ~(1 << prio);
-		htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio);
+		htb_add_to_id_tree(q->row[cl->level]+prio,cl,prio);
 	}
 }
 
@@ -511,18 +398,16 @@ static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
 		struct htb_class *cl,int mask)
 {
 	int m = 0;
-	HTB_CHCL(cl);
+
 	while (mask) {
 		int prio = ffz(~mask);
 		mask &= ~(1 << prio);
 		if (q->ptr[cl->level][prio] == cl->node+prio)
 			htb_next_rb_node(q->ptr[cl->level]+prio);
-		htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio);
+		rb_erase(cl->node + prio,q->row[cl->level]+prio);
 		if (!q->row[cl->level][prio].rb_node) 
 			m |= 1 << prio;
 	}
-	HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n",
-			cl->classid,mask,q->row_mask[cl->level],m);
 	q->row_mask[cl->level] &= ~m;
 }
 
@@ -537,11 +422,9 @@ static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
 {
 	struct htb_class *p = cl->parent;
 	long m,mask = cl->prio_activity;
-	HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
-	HTB_CHCL(cl);
 
 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
-		HTB_CHCL(p);
+
 		m = mask; while (m) {
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
@@ -551,13 +434,11 @@ static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
 				   reset bit in mask as parent is already ok */
 				mask &= ~(1 << prio);
 			
-			htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio);
+			htb_add_to_id_tree(p->un.inner.feed+prio,cl,prio);
 		}
-		HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
-				p->classid,p->prio_activity,mask,p->cmode);
 		p->prio_activity |= mask;
 		cl = p; p = cl->parent;
-		HTB_CHCL(cl);
+
 	}
 	if (cl->cmode == HTB_CAN_SEND && mask)
 		htb_add_class_to_row(q,cl,mask);
@@ -574,8 +455,7 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 {
 	struct htb_class *p = cl->parent;
 	long m,mask = cl->prio_activity;
-	HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
-	HTB_CHCL(cl);
+
 
 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 		m = mask; mask = 0; 
@@ -591,16 +471,15 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 				p->un.inner.ptr[prio] = NULL;
 			}
 			
-			htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
+			rb_erase(cl->node + prio,p->un.inner.feed + prio);
 			
 			if (!p->un.inner.feed[prio].rb_node) 
 				mask |= 1 << prio;
 		}
-		HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
-				p->classid,p->prio_activity,mask,p->cmode);
+
 		p->prio_activity &= ~mask;
 		cl = p; p = cl->parent;
-		HTB_CHCL(cl);
+
 	}
 	if (cl->cmode == HTB_CAN_SEND && mask) 
 		htb_remove_class_from_row(q,cl,mask);
@@ -655,8 +534,6 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
 { 
 	enum htb_cmode new_mode = htb_class_mode(cl,diff);
 	
-	HTB_CHCL(cl);
-	HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid);
 
 	if (new_mode == cl->cmode)
 		return;	
@@ -681,7 +558,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
 static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
 {
 	BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
-	HTB_CHCL(cl);
+
 	if (!cl->prio_activity) {
 		cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
 		htb_activate_prios(q,cl);
@@ -699,7 +576,7 @@ static __inline__ void
 htb_deactivate(struct htb_sched *q,struct htb_class *cl)
 {
 	BUG_TRAP(cl->prio_activity);
-	HTB_CHCL(cl);
+
 	htb_deactivate_prios(q,cl);
 	cl->prio_activity = 0;
 	list_del_init(&cl->un.leaf.drop_list);
@@ -739,7 +616,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
     sch->q.qlen++;
     sch->bstats.packets++; sch->bstats.bytes += skb->len;
-    HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
     return NET_XMIT_SUCCESS;
 }
 
@@ -771,7 +647,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 
     sch->q.qlen++;
     sch->qstats.requeues++;
-    HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
     return NET_XMIT_SUCCESS;
 }
 
@@ -793,7 +668,6 @@ static void htb_rate_timer(unsigned long arg)
 
 	/* lock queue so that we can muck with it */
 	HTB_QLOCK(sch);
-	HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies);
 
 	q->rttim.expires = jiffies + HZ;
 	add_timer(&q->rttim);
@@ -803,8 +677,7 @@ static void htb_rate_timer(unsigned long arg)
 		q->recmp_bucket = 0;
 	list_for_each (p,q->hash+q->recmp_bucket) {
 		struct htb_class *cl = list_entry(p,struct htb_class,hlist);
-		HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n",
-				cl->classid,cl->sum_bytes,cl->sum_packets);
+
 		RT_GEN (cl->sum_bytes,cl->rate_bytes);
 		RT_GEN (cl->sum_packets,cl->rate_packets);
 	}
@@ -828,7 +701,6 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
 {	
 	long toks,diff;
 	enum htb_cmode old_mode;
-	HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes);
 
 #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
 	if (toks > cl->B) toks = cl->B; \
@@ -837,24 +709,7 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
 	cl->T = toks
 
 	while (cl) {
-		HTB_CHCL(cl);
 		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-#ifdef HTB_DEBUG
-		if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
-			if (net_ratelimit())
-				printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
-				       cl->classid, diff,
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-				       q->now.tv_sec * 1000000ULL + q->now.tv_usec,
-				       cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
-#else
-				       (unsigned long long) q->now,
-				       (unsigned long long) cl->t_c,
-#endif
-				       q->jiffies);
-			diff = 1000;
-		}
-#endif
 		if (cl->level >= level) {
 			if (cl->level == level) cl->xstats.lends++;
 			HTB_ACCNT (tokens,buffer,rate);
@@ -864,15 +719,14 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
 		}
 		HTB_ACCNT (ctokens,cbuffer,ceil);
 		cl->t_c = q->now;
-		HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens);
 
 		old_mode = cl->cmode; diff = 0;
 		htb_change_class_mode(q,cl,&diff);
 		if (old_mode != cl->cmode) {
 			if (old_mode != HTB_CAN_SEND)
-				htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
+				rb_erase(&cl->pq_node,q->wait_pq+cl->level);
 			if (cl->cmode != HTB_CAN_SEND)
-				htb_add_to_wait_tree (q,cl,diff,1);
+				htb_add_to_wait_tree (q,cl,diff);
 		}
 		
 #ifdef HTB_RATECM
@@ -899,8 +753,7 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
 static long htb_do_events(struct htb_sched *q,int level)
 {
 	int i;
-	HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n",
-			level,q->wait_pq[level].rb_node,q->row_mask[level]);
+
 	for (i = 0; i < 500; i++) {
 		struct htb_class *cl;
 		long diff;
@@ -910,30 +763,13 @@ static long htb_do_events(struct htb_sched *q,int level)
 
 		cl = rb_entry(p, struct htb_class, pq_node);
 		if (time_after(cl->pq_key, q->jiffies)) {
-			HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
 			return cl->pq_key - q->jiffies;
 		}
-		htb_safe_rb_erase(p,q->wait_pq+level);
+		rb_erase(p,q->wait_pq+level);
 		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-#ifdef HTB_DEBUG
-		if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
-			if (net_ratelimit())
-				printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
-				       cl->classid, diff,
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-				       q->now.tv_sec * 1000000ULL + q->now.tv_usec,
-				       cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
-#else
-				       (unsigned long long) q->now,
-				       (unsigned long long) cl->t_c,
-#endif
-				       q->jiffies);
-			diff = 1000;
-		}
-#endif
 		htb_change_class_mode(q,cl,&diff);
 		if (cl->cmode != HTB_CAN_SEND)
-			htb_add_to_wait_tree (q,cl,diff,2);
+			htb_add_to_wait_tree (q,cl,diff);
 	}
 	if (net_ratelimit())
 		printk(KERN_WARNING "htb: too many events !\n");
@@ -966,7 +802,7 @@ htb_id_find_next_upper(int prio,struct rb_node *n,u32 id)
  * Find leaf where current feed pointers points to.
  */
 static struct htb_class *
-htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid)
+htb_lookup_leaf(struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid)
 {
 	int i;
 	struct {
@@ -981,8 +817,6 @@ htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32
 	sp->pid = pid;
 
 	for (i = 0; i < 65535; i++) {
-		HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid);
-		
 		if (!*sp->pptr && *sp->pid) { 
 			/* ptr was invalidated but id is valid - try to recover 
 			   the original or next ptr */
@@ -1002,7 +836,6 @@ htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32
 		} else {
 			struct htb_class *cl;
 			cl = rb_entry(*sp->pptr,struct htb_class,node[prio]);
-			HTB_CHCL(cl);
 			if (!cl->level) 
 				return cl;
 			(++sp)->root = cl->un.inner.feed[prio].rb_node;
@@ -1022,15 +855,13 @@ htb_dequeue_tree(struct htb_sched *q,int prio,int level)
 	struct sk_buff *skb = NULL;
 	struct htb_class *cl,*start;
 	/* look initial class up in the row */
-	start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,
+	start = cl = htb_lookup_leaf (q->row[level]+prio,prio,
 			q->ptr[level]+prio,q->last_ptr_id[level]+prio);
 	
 	do {
 next:
 		BUG_TRAP(cl); 
 		if (!cl) return NULL;
-		HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
-				prio,level,cl->classid,cl->un.leaf.deficit[level]);
 
 		/* class can be empty - it is unlikely but can be true if leaf
 		   qdisc drops packets in enqueue routine or if someone used
@@ -1044,7 +875,7 @@ htb_dequeue_tree(struct htb_sched *q,int prio,int level)
 			if ((q->row_mask[level] & (1 << prio)) == 0)
 				return NULL; 
 			
-			next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,
+			next = htb_lookup_leaf (q->row[level]+prio,
 					prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio);
 
 			if (cl == start) /* fix start if we just deleted it */
@@ -1061,15 +892,13 @@ htb_dequeue_tree(struct htb_sched *q,int prio,int level)
 		}
 		q->nwc_hit++;
 		htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
-		cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio,
+		cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio,
 				q->last_ptr_id[level]+prio);
 
 	} while (cl != start);
 
 	if (likely(skb != NULL)) {
 		if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
-			HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
-				level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
 			cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
 			htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
 		}
@@ -1095,7 +924,6 @@ static void htb_delay_by(struct Qdisc *sch,long delay)
 	mod_timer(&q->timer, q->jiffies + delay);
 	sch->flags |= TCQ_F_THROTTLED;
 	sch->qstats.overlimits++;
-	HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
 }
 
 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
@@ -1104,13 +932,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 	struct htb_sched *q = qdisc_priv(sch);
 	int level;
 	long min_delay;
-#ifdef HTB_DEBUG
-	int evs_used = 0;
-#endif
 
 	q->jiffies = jiffies;
-	HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
-			sch->q.qlen);
 
 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
 	if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) {
@@ -1131,9 +954,6 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 		if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
 			delay = htb_do_events(q,level);
 			q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
-#ifdef HTB_DEBUG
-			evs_used++;
-#endif
 		} else
 			delay = q->near_ev_cache[level] - q->jiffies;	
 		
@@ -1151,20 +971,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 			}
 		}
 	}
-#ifdef HTB_DEBUG
-	if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
-		if (min_delay == LONG_MAX) {
-			printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
-					evs_used,q->jiffies,jiffies);
-			htb_debug_dump(q);
-		} else 
-			printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
-					"too small rate\n",min_delay);
-	}
-#endif
 	htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
 fin:
-	HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
 	return skb;
 }
 
@@ -1198,7 +1006,6 @@ static void htb_reset(struct Qdisc* sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	int i;
-	HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle);
 
 	for (i = 0; i < HTB_HSIZE; i++) {
 		struct list_head *p;
@@ -1213,10 +1020,6 @@ static void htb_reset(struct Qdisc* sch)
 			}
 			cl->prio_activity = 0;
 			cl->cmode = HTB_CAN_SEND;
-#ifdef HTB_DEBUG
-			cl->pq_node.rb_color = -1;
-			memset(cl->node,255,sizeof(cl->node));
-#endif
 
 		}
 	}
@@ -1238,10 +1041,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 	struct rtattr *tb[TCA_HTB_INIT];
 	struct tc_htb_glob *gopt;
 	int i;
-#ifdef HTB_DEBUG
-	printk(KERN_INFO "HTB init, kernel part version %d.%d\n",
-			  HTB_VER >> 16,HTB_VER & 0xffff);
-#endif
 	if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
 			tb[TCA_HTB_INIT-1] == NULL ||
 			RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) {
@@ -1254,8 +1053,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 				HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
 		return -EINVAL;
 	}
-	q->debug = gopt->debug;
-	HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
 
 	INIT_LIST_HEAD(&q->root);
 	for (i = 0; i < HTB_HSIZE; i++)
@@ -1292,18 +1089,13 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	unsigned char	 *b = skb->tail;
 	struct rtattr *rta;
 	struct tc_htb_glob gopt;
-	HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle);
 	HTB_QLOCK(sch);
 	gopt.direct_pkts = q->direct_pkts;
 
-#ifdef HTB_DEBUG
-	if (HTB_DBG_COND(0,2))
-		htb_debug_dump(q);
-#endif
 	gopt.version = HTB_VER;
 	gopt.rate2quantum = q->rate2quantum;
 	gopt.defcls = q->defcls;
-	gopt.debug = q->debug;
+	gopt.debug = 0;
 	rta = (struct rtattr*)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
@@ -1319,16 +1111,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	struct sk_buff *skb, struct tcmsg *tcm)
 {
-#ifdef HTB_DEBUG
-	struct htb_sched *q = qdisc_priv(sch);
-#endif
 	struct htb_class *cl = (struct htb_class*)arg;
 	unsigned char	 *b = skb->tail;
 	struct rtattr *rta;
 	struct tc_htb_opt opt;
 
-	HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid);
-
 	HTB_QLOCK(sch);
 	tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->classid;
@@ -1410,11 +1197,7 @@ static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
 
 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
 {
-#ifdef HTB_DEBUG
-	struct htb_sched *q = qdisc_priv(sch);
-#endif
 	struct htb_class *cl = htb_find(classid,sch);
-	HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0);
 	if (cl) 
 		cl->refcnt++;
 	return (unsigned long)cl;
@@ -1433,7 +1216,6 @@ static void htb_destroy_filters(struct tcf_proto **fl)
 static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
 	if (!cl->level) {
 		BUG_TRAP(cl->un.leaf.q);
 		sch->q.qlen -= cl->un.leaf.q->q.qlen;
@@ -1456,7 +1238,7 @@ static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
 		htb_deactivate (q,cl);
 	
 	if (cl->cmode != HTB_CAN_SEND)
-		htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
+		rb_erase(&cl->pq_node,q->wait_pq+cl->level);
 	
 	kfree(cl);
 }
@@ -1465,7 +1247,6 @@ static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
 static void htb_destroy(struct Qdisc* sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	HTB_DBG(0,1,"htb_destroy q=%p\n",q);
 
 	del_timer_sync (&q->timer);
 #ifdef HTB_RATECM
@@ -1488,7 +1269,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class*)arg;
-	HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
 
 	// TODO: why don't allow to delete subtree ? references ? does
 	// tc subsys quarantee us that in htb_destroy it holds no class
@@ -1512,11 +1292,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 
 static void htb_put(struct Qdisc *sch, unsigned long arg)
 {
-#ifdef HTB_DEBUG
-	struct htb_sched *q = qdisc_priv(sch);
-#endif
 	struct htb_class *cl = (struct htb_class*)arg;
-	HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
 
 	if (--cl->refcnt == 0)
 		htb_destroy_class(sch,cl);
@@ -1542,7 +1318,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
 
 	hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
-	HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
+
 	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
 	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
 	if (!rtab || !ctab) goto failure;
@@ -1567,9 +1343,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		INIT_LIST_HEAD(&cl->hlist);
 		INIT_LIST_HEAD(&cl->children);
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
-#ifdef HTB_DEBUG
-		cl->magic = HTB_CMAGIC;
-#endif
 
 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
 		   so that can't be used inside of sch_tree_lock
@@ -1585,7 +1358,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
 			/* remove from evt list because of level change */
 			if (parent->cmode != HTB_CAN_SEND) {
-				htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/);
+				rb_erase(&parent->pq_node,q->wait_pq);
 				parent->cmode = HTB_CAN_SEND;
 			}
 			parent->level = (parent->parent ? parent->parent->level
@@ -1607,13 +1380,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		/* attach to the hash list and parent's family */
 		list_add_tail(&cl->hlist, q->hash+htb_hash(classid));
 		list_add_tail(&cl->sibling, parent ? &parent->children : &q->root);
-#ifdef HTB_DEBUG
-		{ 
-			int i;
-			for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1;
-			cl->pq_node.rb_color = -1;
-		}
-#endif
 	} else sch_tree_lock(sch);
 
 	/* it used to be a nasty bug here, we have to check that node
@@ -1654,7 +1420,7 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
 	struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
-	HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl);
+
 	return fl;
 }
 
@@ -1663,7 +1429,7 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = htb_find (classid,sch);
-	HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt);
+
 	/*if (cl && !cl->level) return 0;
 	  The line above used to be there to prevent attaching filters to 
 	  leaves. But at least tc_index filter uses this just to get class 
@@ -1684,7 +1450,7 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
-	HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt);
+
 	if (cl) 
 		cl->filter_cnt--; 
 	else 
-- 
GitLab


From 9ac961ee05bfc837e5271be34ad7158e90dce7d9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 10 Aug 2006 23:33:16 -0700
Subject: [PATCH 0450/1063] [HTB]: Remove lock macro.

Get rid of the macro's being used to obscure the locking.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 73094e7f41691..c0b80b75cdff1 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -72,8 +72,6 @@
 #define HTB_EWMAC 2	/* rate average over HTB_EWMAC*HTB_HSIZE sec */
 #define HTB_RATECM 1    /* whether to use rate computer */
 #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
-#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
-#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
 #define HTB_VER 0x30011	/* major must be matched with number suplied by TC as version */
 
 #if HTB_VER >> 16 != TC_HTB_PROTOVER
@@ -667,7 +665,7 @@ static void htb_rate_timer(unsigned long arg)
 	struct list_head *p;
 
 	/* lock queue so that we can muck with it */
-	HTB_QLOCK(sch);
+	spin_lock_bh(&sch->dev->queue_lock);
 
 	q->rttim.expires = jiffies + HZ;
 	add_timer(&q->rttim);
@@ -681,7 +679,7 @@ static void htb_rate_timer(unsigned long arg)
 		RT_GEN (cl->sum_bytes,cl->rate_bytes);
 		RT_GEN (cl->sum_packets,cl->rate_packets);
 	}
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 }
 #endif
 
@@ -1089,7 +1087,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	unsigned char	 *b = skb->tail;
 	struct rtattr *rta;
 	struct tc_htb_glob gopt;
-	HTB_QLOCK(sch);
+	spin_lock_bh(&sch->dev->queue_lock);
 	gopt.direct_pkts = q->direct_pkts;
 
 	gopt.version = HTB_VER;
@@ -1100,10 +1098,10 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
 	rta->rta_len = skb->tail - b;
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
 rtattr_failure:
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	skb_trim(skb, skb->tail - skb->data);
 	return -1;
 }
@@ -1116,7 +1114,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	struct rtattr *rta;
 	struct tc_htb_opt opt;
 
-	HTB_QLOCK(sch);
+	spin_lock_bh(&sch->dev->queue_lock);
 	tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->classid;
 	if (!cl->level && cl->un.leaf.q)
@@ -1133,10 +1131,10 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	opt.level = cl->level; 
 	RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
 	rta->rta_len = skb->tail - b;
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
 rtattr_failure:
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	skb_trim(skb, b - skb->data);
 	return -1;
 }
-- 
GitLab


From 18a63e868b04cf949643cc9d2c8a51d8cb5da9c4 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 10 Aug 2006 23:34:02 -0700
Subject: [PATCH 0451/1063] [HTB]: HTB_HYSTERESIS cleanup

Change the conditional compilation around HTB_HYSTERSIS
since code was splitting mid expression.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c0b80b75cdff1..d8c1a6b0def1a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -483,6 +483,20 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 		htb_remove_class_from_row(q,cl,mask);
 }
 
+#if HTB_HYSTERESIS
+static inline long htb_lowater(const struct htb_class *cl)
+{
+	return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
+}
+static inline long htb_hiwater(const struct htb_class *cl)
+{
+	return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
+}
+#else
+#define htb_lowater(cl)	(0)
+#define htb_hiwater(cl)	(0)
+#endif
+
 /**
  * htb_class_mode - computes and returns current class mode
  *
@@ -499,19 +513,12 @@ htb_class_mode(struct htb_class *cl,long *diff)
 {
     long toks;
 
-    if ((toks = (cl->ctokens + *diff)) < (
-#if HTB_HYSTERESIS
-	    cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
-#endif
-       	    0)) {
+    if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 	    *diff = -toks;
 	    return HTB_CANT_SEND;
     }
-    if ((toks = (cl->tokens + *diff)) >= (
-#if HTB_HYSTERESIS
-	    cl->cmode == HTB_CAN_SEND ? -cl->buffer :
-#endif
-	    0))
+
+    if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 	    return HTB_CAN_SEND;
 
     *diff = -toks;
-- 
GitLab


From 87990467d387f922103db31678034785d8f21cb7 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 10 Aug 2006 23:35:16 -0700
Subject: [PATCH 0452/1063] [HTB]: Lindent

Code was a mess in terms of indentation.  Run through Lindent
script, and cleanup the damage. Also, don't use, vim magic
comment, and substitute inline for __inline__.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 1001 +++++++++++++++++++++++--------------------
 1 file changed, 526 insertions(+), 475 deletions(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d8c1a6b0def1a..6c6cac65255f7 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1,4 +1,4 @@
-/* vim: ts=8 sw=8
+/*
  * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
  *
  *		This program is free software; you can redistribute it and/or
@@ -68,11 +68,11 @@
     one less than their parent.
 */
 
-#define HTB_HSIZE 16	/* classid hash size */
-#define HTB_EWMAC 2	/* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#define HTB_RATECM 1    /* whether to use rate computer */
-#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
-#define HTB_VER 0x30011	/* major must be matched with number suplied by TC as version */
+#define HTB_HSIZE 16		/* classid hash size */
+#define HTB_EWMAC 2		/* rate average over HTB_EWMAC*HTB_HSIZE sec */
+#define HTB_RATECM 1		/* whether to use rate computer */
+#define HTB_HYSTERESIS 1	/* whether to use mode hysteresis for speedup */
+#define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
 
 #if HTB_VER >> 16 != TC_HTB_PROTOVER
 #error "Mismatched sch_htb.c and pkt_sch.h"
@@ -80,154 +80,152 @@
 
 /* used internaly to keep status of single class */
 enum htb_cmode {
-    HTB_CANT_SEND,		/* class can't send and can't borrow */
-    HTB_MAY_BORROW,		/* class can't send but may borrow */
-    HTB_CAN_SEND		/* class can send */
+	HTB_CANT_SEND,		/* class can't send and can't borrow */
+	HTB_MAY_BORROW,		/* class can't send but may borrow */
+	HTB_CAN_SEND		/* class can send */
 };
 
 /* interior & leaf nodes; props specific to leaves are marked L: */
-struct htb_class
-{
-    /* general class parameters */
-    u32 classid;
-    struct gnet_stats_basic bstats;
-    struct gnet_stats_queue qstats;
-    struct gnet_stats_rate_est rate_est;
-    struct tc_htb_xstats xstats;/* our special stats */
-    int refcnt;			/* usage count of this class */
+struct htb_class {
+	/* general class parameters */
+	u32 classid;
+	struct gnet_stats_basic bstats;
+	struct gnet_stats_queue qstats;
+	struct gnet_stats_rate_est rate_est;
+	struct tc_htb_xstats xstats;	/* our special stats */
+	int refcnt;		/* usage count of this class */
 
 #ifdef HTB_RATECM
-    /* rate measurement counters */
-    unsigned long rate_bytes,sum_bytes;
-    unsigned long rate_packets,sum_packets;
+	/* rate measurement counters */
+	unsigned long rate_bytes, sum_bytes;
+	unsigned long rate_packets, sum_packets;
 #endif
 
-    /* topology */
-    int level;			/* our level (see above) */
-    struct htb_class *parent;	/* parent class */
-    struct list_head hlist;	/* classid hash list item */
-    struct list_head sibling;	/* sibling list item */
-    struct list_head children;	/* children list */
-
-    union {
-	    struct htb_class_leaf {
-		    struct Qdisc *q;
-		    int prio;
-		    int aprio;	
-		    int quantum;
-		    int deficit[TC_HTB_MAXDEPTH];
-		    struct list_head drop_list;
-	    } leaf;
-	    struct htb_class_inner {
-		    struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
-		    struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
-            /* When class changes from state 1->2 and disconnects from 
-               parent's feed then we lost ptr value and start from the
-              first child again. Here we store classid of the
-              last valid ptr (used when ptr is NULL). */
-              u32 last_ptr_id[TC_HTB_NUMPRIO];
-	    } inner;
-    } un;
-    struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
-    struct rb_node pq_node;		 /* node for event queue */
-    unsigned long pq_key;	/* the same type as jiffies global */
-    
-    int prio_activity;		/* for which prios are we active */
-    enum htb_cmode cmode;	/* current mode of the class */
-
-    /* class attached filters */
-    struct tcf_proto *filter_list;
-    int filter_cnt;
-
-    int warned;		/* only one warning about non work conserving .. */
-
-    /* token bucket parameters */
-    struct qdisc_rate_table *rate;	/* rate table of the class itself */
-    struct qdisc_rate_table *ceil;	/* ceiling rate (limits borrows too) */
-    long buffer,cbuffer;		/* token bucket depth/rate */
-    psched_tdiff_t mbuffer;		/* max wait time */
-    long tokens,ctokens;		/* current number of tokens */
-    psched_time_t t_c;			/* checkpoint time */
+	/* topology */
+	int level;		/* our level (see above) */
+	struct htb_class *parent;	/* parent class */
+	struct list_head hlist;	/* classid hash list item */
+	struct list_head sibling;	/* sibling list item */
+	struct list_head children;	/* children list */
+
+	union {
+		struct htb_class_leaf {
+			struct Qdisc *q;
+			int prio;
+			int aprio;
+			int quantum;
+			int deficit[TC_HTB_MAXDEPTH];
+			struct list_head drop_list;
+		} leaf;
+		struct htb_class_inner {
+			struct rb_root feed[TC_HTB_NUMPRIO];	/* feed trees */
+			struct rb_node *ptr[TC_HTB_NUMPRIO];	/* current class ptr */
+			/* When class changes from state 1->2 and disconnects from
+			   parent's feed then we lost ptr value and start from the
+			   first child again. Here we store classid of the
+			   last valid ptr (used when ptr is NULL). */
+			u32 last_ptr_id[TC_HTB_NUMPRIO];
+		} inner;
+	} un;
+	struct rb_node node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
+	struct rb_node pq_node;	/* node for event queue */
+	unsigned long pq_key;	/* the same type as jiffies global */
+
+	int prio_activity;	/* for which prios are we active */
+	enum htb_cmode cmode;	/* current mode of the class */
+
+	/* class attached filters */
+	struct tcf_proto *filter_list;
+	int filter_cnt;
+
+	int warned;		/* only one warning about non work conserving .. */
+
+	/* token bucket parameters */
+	struct qdisc_rate_table *rate;	/* rate table of the class itself */
+	struct qdisc_rate_table *ceil;	/* ceiling rate (limits borrows too) */
+	long buffer, cbuffer;	/* token bucket depth/rate */
+	psched_tdiff_t mbuffer;	/* max wait time */
+	long tokens, ctokens;	/* current number of tokens */
+	psched_time_t t_c;	/* checkpoint time */
 };
 
 /* TODO: maybe compute rate when size is too large .. or drop ? */
-static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate,
-	int size)
-{ 
-    int slot = size >> rate->rate.cell_log;
-    if (slot > 255) {
-	cl->xstats.giants++;
-	slot = 255;
-    }
-    return rate->data[slot];
+static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
+			   int size)
+{
+	int slot = size >> rate->rate.cell_log;
+	if (slot > 255) {
+		cl->xstats.giants++;
+		slot = 255;
+	}
+	return rate->data[slot];
 }
 
-struct htb_sched
-{
-    struct list_head root;			/* root classes list */
-    struct list_head hash[HTB_HSIZE];		/* hashed by classid */
-    struct list_head drops[TC_HTB_NUMPRIO];	/* active leaves (for drops) */
-    
-    /* self list - roots of self generating tree */
-    struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
-    int row_mask[TC_HTB_MAXDEPTH];
-    struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
-    u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+struct htb_sched {
+	struct list_head root;	/* root classes list */
+	struct list_head hash[HTB_HSIZE];	/* hashed by classid */
+	struct list_head drops[TC_HTB_NUMPRIO];	/* active leaves (for drops) */
+
+	/* self list - roots of self generating tree */
+	struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+	int row_mask[TC_HTB_MAXDEPTH];
+	struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+	u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 
-    /* self wait list - roots of wait PQs per row */
-    struct rb_root wait_pq[TC_HTB_MAXDEPTH];
+	/* self wait list - roots of wait PQs per row */
+	struct rb_root wait_pq[TC_HTB_MAXDEPTH];
 
-    /* time of nearest event per level (row) */
-    unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
+	/* time of nearest event per level (row) */
+	unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
 
-    /* cached value of jiffies in dequeue */
-    unsigned long jiffies;
+	/* cached value of jiffies in dequeue */
+	unsigned long jiffies;
 
-    /* whether we hit non-work conserving class during this dequeue; we use */
-    int nwc_hit;	/* this to disable mindelay complaint in dequeue */
+	/* whether we hit non-work conserving class during this dequeue; we use */
+	int nwc_hit;		/* this to disable mindelay complaint in dequeue */
 
-    int defcls;		/* class where unclassified flows go to */
+	int defcls;		/* class where unclassified flows go to */
 
-    /* filters for qdisc itself */
-    struct tcf_proto *filter_list;
-    int filter_cnt;
+	/* filters for qdisc itself */
+	struct tcf_proto *filter_list;
+	int filter_cnt;
 
-    int rate2quantum;		/* quant = rate / rate2quantum */
-    psched_time_t now;		/* cached dequeue time */
-    struct timer_list timer;	/* send delay timer */
+	int rate2quantum;	/* quant = rate / rate2quantum */
+	psched_time_t now;	/* cached dequeue time */
+	struct timer_list timer;	/* send delay timer */
 #ifdef HTB_RATECM
-    struct timer_list rttim;	/* rate computer timer */
-    int recmp_bucket;		/* which hash bucket to recompute next */
+	struct timer_list rttim;	/* rate computer timer */
+	int recmp_bucket;	/* which hash bucket to recompute next */
 #endif
-    
-    /* non shaped skbs; let them go directly thru */
-    struct sk_buff_head direct_queue;
-    int direct_qlen;  /* max qlen of above */
 
-    long direct_pkts;
+	/* non shaped skbs; let them go directly thru */
+	struct sk_buff_head direct_queue;
+	int direct_qlen;	/* max qlen of above */
+
+	long direct_pkts;
 };
 
 /* compute hash of size HTB_HSIZE for given handle */
-static __inline__ int htb_hash(u32 h) 
+static inline int htb_hash(u32 h)
 {
 #if HTB_HSIZE != 16
- #error "Declare new hash for your HTB_HSIZE"
+#error "Declare new hash for your HTB_HSIZE"
 #endif
-    h ^= h>>8;	/* stolen from cbq_hash */
-    h ^= h>>4;
-    return h & 0xf;
+	h ^= h >> 8;		/* stolen from cbq_hash */
+	h ^= h >> 4;
+	return h & 0xf;
 }
 
 /* find class in global hash table using given handle */
-static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
+static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct list_head *p;
-	if (TC_H_MAJ(handle) != sch->handle) 
+	if (TC_H_MAJ(handle) != sch->handle)
 		return NULL;
-	
-	list_for_each (p,q->hash+htb_hash(handle)) {
-		struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+
+	list_for_each(p, q->hash + htb_hash(handle)) {
+		struct htb_class *cl = list_entry(p, struct htb_class, hlist);
 		if (cl->classid == handle)
 			return cl;
 	}
@@ -252,7 +250,8 @@ static inline u32 htb_classid(struct htb_class *cl)
 	return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
 }
 
-static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
+				      int *qerr)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl;
@@ -264,8 +263,8 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
 	   note that nfmark can be used too by attaching filter fw with no
 	   rules in it */
 	if (skb->priority == sch->handle)
-		return HTB_DIRECT;  /* X:0 (direct flow) selected */
-	if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) 
+		return HTB_DIRECT;	/* X:0 (direct flow) selected */
+	if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
 		return cl;
 
 	*qerr = NET_XMIT_BYPASS;
@@ -274,7 +273,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_QUEUED:
-		case TC_ACT_STOLEN: 
+		case TC_ACT_STOLEN:
 			*qerr = NET_XMIT_SUCCESS;
 		case TC_ACT_SHOT:
 			return NULL;
@@ -283,22 +282,22 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
 		if (result == TC_POLICE_SHOT)
 			return HTB_DIRECT;
 #endif
-		if ((cl = (void*)res.class) == NULL) {
+		if ((cl = (void *)res.class) == NULL) {
 			if (res.classid == sch->handle)
-				return HTB_DIRECT;  /* X:0 (direct flow) */
-			if ((cl = htb_find(res.classid,sch)) == NULL)
-				break; /* filter selected invalid classid */
+				return HTB_DIRECT;	/* X:0 (direct flow) */
+			if ((cl = htb_find(res.classid, sch)) == NULL)
+				break;	/* filter selected invalid classid */
 		}
 		if (!cl->level)
-			return cl; /* we hit leaf; return it */
+			return cl;	/* we hit leaf; return it */
 
 		/* we have got inner class; apply inner filter chain */
 		tcf = cl->filter_list;
 	}
 	/* classification failed; try to use default class */
-	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch);
+	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 	if (!cl || cl->level)
-		return HTB_DIRECT; /* bad default .. this is safe bet */
+		return HTB_DIRECT;	/* bad default .. this is safe bet */
 	return cl;
 }
 
@@ -308,18 +307,19 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
  * Routine adds class to the list (actually tree) sorted by classid.
  * Make sure that class is not already on such list for given prio.
  */
-static void htb_add_to_id_tree (struct rb_root *root,
-		struct htb_class *cl,int prio)
+static void htb_add_to_id_tree(struct rb_root *root,
+			       struct htb_class *cl, int prio)
 {
 	struct rb_node **p = &root->rb_node, *parent = NULL;
 
 	while (*p) {
-		struct htb_class *c; parent = *p;
+		struct htb_class *c;
+		parent = *p;
 		c = rb_entry(parent, struct htb_class, node[prio]);
 
 		if (cl->classid > c->classid)
 			p = &parent->rb_right;
-		else 
+		else
 			p = &parent->rb_left;
 	}
 	rb_link_node(&cl->node[prio], parent, p);
@@ -333,8 +333,8 @@ static void htb_add_to_id_tree (struct rb_root *root,
  * change its mode in cl->pq_key microseconds. Make sure that class is not
  * already in the queue.
  */
-static void htb_add_to_wait_tree (struct htb_sched *q,
-				  struct htb_class *cl,long delay)
+static void htb_add_to_wait_tree(struct htb_sched *q,
+				 struct htb_class *cl, long delay)
 {
 	struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
 
@@ -345,13 +345,14 @@ static void htb_add_to_wait_tree (struct htb_sched *q,
 	/* update the nearest event cache */
 	if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
 		q->near_ev_cache[cl->level] = cl->pq_key;
-	
+
 	while (*p) {
-		struct htb_class *c; parent = *p;
+		struct htb_class *c;
+		parent = *p;
 		c = rb_entry(parent, struct htb_class, pq_node);
 		if (time_after_eq(cl->pq_key, c->pq_key))
 			p = &parent->rb_right;
-		else 
+		else
 			p = &parent->rb_left;
 	}
 	rb_link_node(&cl->pq_node, parent, p);
@@ -375,14 +376,14 @@ static void htb_next_rb_node(struct rb_node **n)
  * The class is added to row at priorities marked in mask.
  * It does nothing if mask == 0.
  */
-static inline void htb_add_class_to_row(struct htb_sched *q, 
-		struct htb_class *cl,int mask)
+static inline void htb_add_class_to_row(struct htb_sched *q,
+					struct htb_class *cl, int mask)
 {
 	q->row_mask[cl->level] |= mask;
 	while (mask) {
 		int prio = ffz(~mask);
 		mask &= ~(1 << prio);
-		htb_add_to_id_tree(q->row[cl->level]+prio,cl,prio);
+		htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
 	}
 }
 
@@ -392,18 +393,18 @@ static inline void htb_add_class_to_row(struct htb_sched *q,
  * The class is removed from row at priorities marked in mask.
  * It does nothing if mask == 0.
  */
-static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
-		struct htb_class *cl,int mask)
+static inline void htb_remove_class_from_row(struct htb_sched *q,
+						 struct htb_class *cl, int mask)
 {
 	int m = 0;
 
 	while (mask) {
 		int prio = ffz(~mask);
 		mask &= ~(1 << prio);
-		if (q->ptr[cl->level][prio] == cl->node+prio)
-			htb_next_rb_node(q->ptr[cl->level]+prio);
-		rb_erase(cl->node + prio,q->row[cl->level]+prio);
-		if (!q->row[cl->level][prio].rb_node) 
+		if (q->ptr[cl->level][prio] == cl->node + prio)
+			htb_next_rb_node(q->ptr[cl->level] + prio);
+		rb_erase(cl->node + prio, q->row[cl->level] + prio);
+		if (!q->row[cl->level][prio].rb_node)
 			m |= 1 << prio;
 	}
 	q->row_mask[cl->level] &= ~m;
@@ -416,30 +417,31 @@ static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
  * for priorities it is participating on. cl->cmode must be new 
  * (activated) mode. It does nothing if cl->prio_activity == 0.
  */
-static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
+static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 {
 	struct htb_class *p = cl->parent;
-	long m,mask = cl->prio_activity;
+	long m, mask = cl->prio_activity;
 
 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
-
-		m = mask; while (m) {
+		m = mask;
+		while (m) {
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
-			
+
 			if (p->un.inner.feed[prio].rb_node)
 				/* parent already has its feed in use so that
 				   reset bit in mask as parent is already ok */
 				mask &= ~(1 << prio);
-			
-			htb_add_to_id_tree(p->un.inner.feed+prio,cl,prio);
+
+			htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
 		}
 		p->prio_activity |= mask;
-		cl = p; p = cl->parent;
+		cl = p;
+		p = cl->parent;
 
 	}
 	if (cl->cmode == HTB_CAN_SEND && mask)
-		htb_add_class_to_row(q,cl,mask);
+		htb_add_class_to_row(q, cl, mask);
 }
 
 /**
@@ -452,35 +454,36 @@ static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 {
 	struct htb_class *p = cl->parent;
-	long m,mask = cl->prio_activity;
-
+	long m, mask = cl->prio_activity;
 
 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
-		m = mask; mask = 0; 
+		m = mask;
+		mask = 0;
 		while (m) {
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
-			
-			if (p->un.inner.ptr[prio] == cl->node+prio) {
+
+			if (p->un.inner.ptr[prio] == cl->node + prio) {
 				/* we are removing child which is pointed to from
 				   parent feed - forget the pointer but remember
 				   classid */
 				p->un.inner.last_ptr_id[prio] = cl->classid;
 				p->un.inner.ptr[prio] = NULL;
 			}
-			
-			rb_erase(cl->node + prio,p->un.inner.feed + prio);
-			
-			if (!p->un.inner.feed[prio].rb_node) 
+
+			rb_erase(cl->node + prio, p->un.inner.feed + prio);
+
+			if (!p->un.inner.feed[prio].rb_node)
 				mask |= 1 << prio;
 		}
 
 		p->prio_activity &= ~mask;
-		cl = p; p = cl->parent;
+		cl = p;
+		p = cl->parent;
 
 	}
-	if (cl->cmode == HTB_CAN_SEND && mask) 
-		htb_remove_class_from_row(q,cl,mask);
+	if (cl->cmode == HTB_CAN_SEND && mask)
+		htb_remove_class_from_row(q, cl, mask);
 }
 
 #if HTB_HYSTERESIS
@@ -508,21 +511,21 @@ static inline long htb_hiwater(const struct htb_class *cl)
  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
  * mode transitions per time unit. The speed gain is about 1/6.
  */
-static __inline__ enum htb_cmode 
-htb_class_mode(struct htb_class *cl,long *diff)
+static inline enum htb_cmode
+htb_class_mode(struct htb_class *cl, long *diff)
 {
-    long toks;
+	long toks;
 
-    if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
-	    *diff = -toks;
-	    return HTB_CANT_SEND;
-    }
+	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
+		*diff = -toks;
+		return HTB_CANT_SEND;
+	}
 
-    if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
-	    return HTB_CAN_SEND;
+	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
+		return HTB_CAN_SEND;
 
-    *diff = -toks;
-    return HTB_MAY_BORROW;
+	*diff = -toks;
+	return HTB_MAY_BORROW;
 }
 
 /**
@@ -534,22 +537,21 @@ htb_class_mode(struct htb_class *cl,long *diff)
  * be different from old one and cl->pq_key has to be valid if changing
  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
  */
-static void 
+static void
 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
-{ 
-	enum htb_cmode new_mode = htb_class_mode(cl,diff);
-	
+{
+	enum htb_cmode new_mode = htb_class_mode(cl, diff);
 
 	if (new_mode == cl->cmode)
-		return;	
-	
-	if (cl->prio_activity) { /* not necessary: speed optimization */
-		if (cl->cmode != HTB_CANT_SEND) 
-			htb_deactivate_prios(q,cl);
+		return;
+
+	if (cl->prio_activity) {	/* not necessary: speed optimization */
+		if (cl->cmode != HTB_CANT_SEND)
+			htb_deactivate_prios(q, cl);
 		cl->cmode = new_mode;
-		if (new_mode != HTB_CANT_SEND) 
-			htb_activate_prios(q,cl);
-	} else 
+		if (new_mode != HTB_CANT_SEND)
+			htb_activate_prios(q, cl);
+	} else
 		cl->cmode = new_mode;
 }
 
@@ -560,14 +562,15 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
  * for the prio. It can be called on already active leaf safely.
  * It also adds leaf into droplist.
  */
-static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
+static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 {
 	BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
 
 	if (!cl->prio_activity) {
 		cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
-		htb_activate_prios(q,cl);
-		list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio);
+		htb_activate_prios(q, cl);
+		list_add_tail(&cl->un.leaf.drop_list,
+			      q->drops + cl->un.leaf.aprio);
 	}
 }
 
@@ -577,97 +580,100 @@ static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
  * Make sure that leaf is active. In the other words it can't be called
  * with non-active leaf. It also removes class from the drop list.
  */
-static __inline__ void 
-htb_deactivate(struct htb_sched *q,struct htb_class *cl)
+static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 {
 	BUG_TRAP(cl->prio_activity);
 
-	htb_deactivate_prios(q,cl);
+	htb_deactivate_prios(q, cl);
 	cl->prio_activity = 0;
 	list_del_init(&cl->un.leaf.drop_list);
 }
 
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-    int ret;
-    struct htb_sched *q = qdisc_priv(sch);
-    struct htb_class *cl = htb_classify(skb,sch,&ret);
-
-    if (cl == HTB_DIRECT) {
-	/* enqueue to helper queue */
-	if (q->direct_queue.qlen < q->direct_qlen) {
-	    __skb_queue_tail(&q->direct_queue, skb);
-	    q->direct_pkts++;
-	} else {
-	    kfree_skb(skb);
-	    sch->qstats.drops++;
-	    return NET_XMIT_DROP;
-	}
+	int ret;
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = htb_classify(skb, sch, &ret);
+
+	if (cl == HTB_DIRECT) {
+		/* enqueue to helper queue */
+		if (q->direct_queue.qlen < q->direct_qlen) {
+			__skb_queue_tail(&q->direct_queue, skb);
+			q->direct_pkts++;
+		} else {
+			kfree_skb(skb);
+			sch->qstats.drops++;
+			return NET_XMIT_DROP;
+		}
 #ifdef CONFIG_NET_CLS_ACT
-    } else if (!cl) {
-	if (ret == NET_XMIT_BYPASS)
-		sch->qstats.drops++;
-	kfree_skb (skb);
-	return ret;
+	} else if (!cl) {
+		if (ret == NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
 #endif
-    } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
-	sch->qstats.drops++;
-	cl->qstats.drops++;
-	return NET_XMIT_DROP;
-    } else {
-	cl->bstats.packets++; cl->bstats.bytes += skb->len;
-	htb_activate (q,cl);
-    }
-
-    sch->q.qlen++;
-    sch->bstats.packets++; sch->bstats.bytes += skb->len;
-    return NET_XMIT_SUCCESS;
+	} else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
+		   NET_XMIT_SUCCESS) {
+		sch->qstats.drops++;
+		cl->qstats.drops++;
+		return NET_XMIT_DROP;
+	} else {
+		cl->bstats.packets++;
+		cl->bstats.bytes += skb->len;
+		htb_activate(q, cl);
+	}
+
+	sch->q.qlen++;
+	sch->bstats.packets++;
+	sch->bstats.bytes += skb->len;
+	return NET_XMIT_SUCCESS;
 }
 
 /* TODO: requeuing packet charges it to policers again !! */
 static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
-    struct htb_sched *q = qdisc_priv(sch);
-    int ret =  NET_XMIT_SUCCESS;
-    struct htb_class *cl = htb_classify(skb,sch, &ret);
-    struct sk_buff *tskb;
-
-    if (cl == HTB_DIRECT || !cl) {
-	/* enqueue to helper queue */
-	if (q->direct_queue.qlen < q->direct_qlen && cl) {
-	    __skb_queue_head(&q->direct_queue, skb);
-	} else {
-            __skb_queue_head(&q->direct_queue, skb);
-            tskb = __skb_dequeue_tail(&q->direct_queue);
-            kfree_skb (tskb);
-            sch->qstats.drops++;
-            return NET_XMIT_CN;	
-	}
-    } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
-	sch->qstats.drops++;
-	cl->qstats.drops++;
-	return NET_XMIT_DROP;
-    } else 
-	    htb_activate (q,cl);
-
-    sch->q.qlen++;
-    sch->qstats.requeues++;
-    return NET_XMIT_SUCCESS;
+	struct htb_sched *q = qdisc_priv(sch);
+	int ret = NET_XMIT_SUCCESS;
+	struct htb_class *cl = htb_classify(skb, sch, &ret);
+	struct sk_buff *tskb;
+
+	if (cl == HTB_DIRECT || !cl) {
+		/* enqueue to helper queue */
+		if (q->direct_queue.qlen < q->direct_qlen && cl) {
+			__skb_queue_head(&q->direct_queue, skb);
+		} else {
+			__skb_queue_head(&q->direct_queue, skb);
+			tskb = __skb_dequeue_tail(&q->direct_queue);
+			kfree_skb(tskb);
+			sch->qstats.drops++;
+			return NET_XMIT_CN;
+		}
+	} else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
+		   NET_XMIT_SUCCESS) {
+		sch->qstats.drops++;
+		cl->qstats.drops++;
+		return NET_XMIT_DROP;
+	} else
+		htb_activate(q, cl);
+
+	sch->q.qlen++;
+	sch->qstats.requeues++;
+	return NET_XMIT_SUCCESS;
 }
 
 static void htb_timer(unsigned long arg)
 {
-    struct Qdisc *sch = (struct Qdisc*)arg;
-    sch->flags &= ~TCQ_F_THROTTLED;
-    wmb();
-    netif_schedule(sch->dev);
+	struct Qdisc *sch = (struct Qdisc *)arg;
+	sch->flags &= ~TCQ_F_THROTTLED;
+	wmb();
+	netif_schedule(sch->dev);
 }
 
 #ifdef HTB_RATECM
 #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
 static void htb_rate_timer(unsigned long arg)
 {
-	struct Qdisc *sch = (struct Qdisc*)arg;
+	struct Qdisc *sch = (struct Qdisc *)arg;
 	struct htb_sched *q = qdisc_priv(sch);
 	struct list_head *p;
 
@@ -678,13 +684,13 @@ static void htb_rate_timer(unsigned long arg)
 	add_timer(&q->rttim);
 
 	/* scan and recompute one bucket at time */
-	if (++q->recmp_bucket >= HTB_HSIZE) 
+	if (++q->recmp_bucket >= HTB_HSIZE)
 		q->recmp_bucket = 0;
-	list_for_each (p,q->hash+q->recmp_bucket) {
-		struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+	list_for_each(p, q->hash + q->recmp_bucket) {
+		struct htb_class *cl = list_entry(p, struct htb_class, hlist);
 
-		RT_GEN (cl->sum_bytes,cl->rate_bytes);
-		RT_GEN (cl->sum_packets,cl->rate_packets);
+		RT_GEN(cl->sum_bytes, cl->rate_bytes);
+		RT_GEN(cl->sum_packets, cl->rate_packets);
 	}
 	spin_unlock_bh(&sch->dev->queue_lock);
 }
@@ -701,10 +707,10 @@ static void htb_rate_timer(unsigned long arg)
  * CAN_SEND) because we can use more precise clock that event queue here.
  * In such case we remove class from event queue first.
  */
-static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
-		int level,int bytes)
-{	
-	long toks,diff;
+static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
+			     int level, int bytes)
+{
+	long toks, diff;
 	enum htb_cmode old_mode;
 
 #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
@@ -714,29 +720,31 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
 	cl->T = toks
 
 	while (cl) {
-		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
+		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
 		if (cl->level >= level) {
-			if (cl->level == level) cl->xstats.lends++;
-			HTB_ACCNT (tokens,buffer,rate);
+			if (cl->level == level)
+				cl->xstats.lends++;
+			HTB_ACCNT(tokens, buffer, rate);
 		} else {
 			cl->xstats.borrows++;
-			cl->tokens += diff; /* we moved t_c; update tokens */
+			cl->tokens += diff;	/* we moved t_c; update tokens */
 		}
-		HTB_ACCNT (ctokens,cbuffer,ceil);
+		HTB_ACCNT(ctokens, cbuffer, ceil);
 		cl->t_c = q->now;
 
-		old_mode = cl->cmode; diff = 0;
-		htb_change_class_mode(q,cl,&diff);
+		old_mode = cl->cmode;
+		diff = 0;
+		htb_change_class_mode(q, cl, &diff);
 		if (old_mode != cl->cmode) {
 			if (old_mode != HTB_CAN_SEND)
-				rb_erase(&cl->pq_node,q->wait_pq+cl->level);
+				rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 			if (cl->cmode != HTB_CAN_SEND)
-				htb_add_to_wait_tree (q,cl,diff);
+				htb_add_to_wait_tree(q, cl, diff);
 		}
-		
 #ifdef HTB_RATECM
 		/* update rate counters */
-		cl->sum_bytes += bytes; cl->sum_packets++;
+		cl->sum_bytes += bytes;
+		cl->sum_packets++;
 #endif
 
 		/* update byte stats except for leaves which are already updated */
@@ -755,7 +763,7 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
  * next pending event (0 for no event in pq).
  * Note: Aplied are events whose have cl->pq_key <= jiffies.
  */
-static long htb_do_events(struct htb_sched *q,int level)
+static long htb_do_events(struct htb_sched *q, int level)
 {
 	int i;
 
@@ -763,34 +771,38 @@ static long htb_do_events(struct htb_sched *q,int level)
 		struct htb_class *cl;
 		long diff;
 		struct rb_node *p = q->wait_pq[level].rb_node;
-		if (!p) return 0;
-		while (p->rb_left) p = p->rb_left;
+		if (!p)
+			return 0;
+		while (p->rb_left)
+			p = p->rb_left;
 
 		cl = rb_entry(p, struct htb_class, pq_node);
 		if (time_after(cl->pq_key, q->jiffies)) {
 			return cl->pq_key - q->jiffies;
 		}
-		rb_erase(p,q->wait_pq+level);
-		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-		htb_change_class_mode(q,cl,&diff);
+		rb_erase(p, q->wait_pq + level);
+		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+		htb_change_class_mode(q, cl, &diff);
 		if (cl->cmode != HTB_CAN_SEND)
-			htb_add_to_wait_tree (q,cl,diff);
+			htb_add_to_wait_tree(q, cl, diff);
 	}
 	if (net_ratelimit())
 		printk(KERN_WARNING "htb: too many events !\n");
-	return HZ/10;
+	return HZ / 10;
 }
 
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
    is no such one exists. */
-static struct rb_node *
-htb_id_find_next_upper(int prio,struct rb_node *n,u32 id)
+static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
+					      u32 id)
 {
 	struct rb_node *r = NULL;
 	while (n) {
-		struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]);
-		if (id == cl->classid) return n;
-		
+		struct htb_class *cl =
+		    rb_entry(n, struct htb_class, node[prio]);
+		if (id == cl->classid)
+			return n;
+
 		if (id > cl->classid) {
 			n = n->rb_right;
 		} else {
@@ -806,46 +818,49 @@ htb_id_find_next_upper(int prio,struct rb_node *n,u32 id)
  *
  * Find leaf where current feed pointers points to.
  */
-static struct htb_class *
-htb_lookup_leaf(struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid)
+static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
+					 struct rb_node **pptr, u32 * pid)
 {
 	int i;
 	struct {
 		struct rb_node *root;
 		struct rb_node **pptr;
 		u32 *pid;
-	} stk[TC_HTB_MAXDEPTH],*sp = stk;
-	
+	} stk[TC_HTB_MAXDEPTH], *sp = stk;
+
 	BUG_TRAP(tree->rb_node);
 	sp->root = tree->rb_node;
 	sp->pptr = pptr;
 	sp->pid = pid;
 
 	for (i = 0; i < 65535; i++) {
-		if (!*sp->pptr && *sp->pid) { 
+		if (!*sp->pptr && *sp->pid) {
 			/* ptr was invalidated but id is valid - try to recover 
 			   the original or next ptr */
-			*sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid);
+			*sp->pptr =
+			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
 		}
-		*sp->pid = 0; /* ptr is valid now so that remove this hint as it
-			         can become out of date quickly */
-		if (!*sp->pptr) { /* we are at right end; rewind & go up */
+		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
+				   can become out of date quickly */
+		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
 			*sp->pptr = sp->root;
-			while ((*sp->pptr)->rb_left) 
+			while ((*sp->pptr)->rb_left)
 				*sp->pptr = (*sp->pptr)->rb_left;
 			if (sp > stk) {
 				sp--;
-				BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL;
-				htb_next_rb_node (sp->pptr);
+				BUG_TRAP(*sp->pptr);
+				if (!*sp->pptr)
+					return NULL;
+				htb_next_rb_node(sp->pptr);
 			}
 		} else {
 			struct htb_class *cl;
-			cl = rb_entry(*sp->pptr,struct htb_class,node[prio]);
-			if (!cl->level) 
+			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
+			if (!cl->level)
 				return cl;
 			(++sp)->root = cl->un.inner.feed[prio].rb_node;
-			sp->pptr = cl->un.inner.ptr+prio;
-			sp->pid = cl->un.inner.last_ptr_id+prio;
+			sp->pptr = cl->un.inner.ptr + prio;
+			sp->pid = cl->un.inner.last_ptr_id + prio;
 		}
 	}
 	BUG_TRAP(0);
@@ -854,19 +869,21 @@ htb_lookup_leaf(struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid)
 
 /* dequeues packet at given priority and level; call only if
    you are sure that there is active class at prio/level */
-static struct sk_buff *
-htb_dequeue_tree(struct htb_sched *q,int prio,int level)
+static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
+					int level)
 {
 	struct sk_buff *skb = NULL;
-	struct htb_class *cl,*start;
+	struct htb_class *cl, *start;
 	/* look initial class up in the row */
-	start = cl = htb_lookup_leaf (q->row[level]+prio,prio,
-			q->ptr[level]+prio,q->last_ptr_id[level]+prio);
-	
+	start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
+				     q->ptr[level] + prio,
+				     q->last_ptr_id[level] + prio);
+
 	do {
 next:
-		BUG_TRAP(cl); 
-		if (!cl) return NULL;
+		BUG_TRAP(cl);
+		if (!cl)
+			return NULL;
 
 		/* class can be empty - it is unlikely but can be true if leaf
 		   qdisc drops packets in enqueue routine or if someone used
@@ -874,56 +891,64 @@ htb_dequeue_tree(struct htb_sched *q,int prio,int level)
 		   simply deactivate and skip such class */
 		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
 			struct htb_class *next;
-			htb_deactivate(q,cl);
+			htb_deactivate(q, cl);
 
 			/* row/level might become empty */
 			if ((q->row_mask[level] & (1 << prio)) == 0)
-				return NULL; 
-			
-			next = htb_lookup_leaf (q->row[level]+prio,
-					prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio);
+				return NULL;
 
-			if (cl == start) /* fix start if we just deleted it */
+			next = htb_lookup_leaf(q->row[level] + prio,
+					       prio, q->ptr[level] + prio,
+					       q->last_ptr_id[level] + prio);
+
+			if (cl == start)	/* fix start if we just deleted it */
 				start = next;
 			cl = next;
 			goto next;
 		}
-	
-		if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) 
+
+		skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
+		if (likely(skb != NULL))
 			break;
 		if (!cl->warned) {
-			printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid);
+			printk(KERN_WARNING
+			       "htb: class %X isn't work conserving ?!\n",
+			       cl->classid);
 			cl->warned = 1;
 		}
 		q->nwc_hit++;
-		htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
-		cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio,
-				q->last_ptr_id[level]+prio);
+		htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+				  ptr[0]) + prio);
+		cl = htb_lookup_leaf(q->row[level] + prio, prio,
+				     q->ptr[level] + prio,
+				     q->last_ptr_id[level] + prio);
 
 	} while (cl != start);
 
 	if (likely(skb != NULL)) {
 		if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
 			cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
-			htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
+			htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+					  ptr[0]) + prio);
 		}
 		/* this used to be after charge_class but this constelation
 		   gives us slightly better performance */
 		if (!cl->un.leaf.q->q.qlen)
-			htb_deactivate (q,cl);
-		htb_charge_class (q,cl,level,skb->len);
+			htb_deactivate(q, cl);
+		htb_charge_class(q, cl, level, skb->len);
 	}
 	return skb;
 }
 
-static void htb_delay_by(struct Qdisc *sch,long delay)
+static void htb_delay_by(struct Qdisc *sch, long delay)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	if (delay <= 0) delay = 1;
-	if (unlikely(delay > 5*HZ)) {
+	if (delay <= 0)
+		delay = 1;
+	if (unlikely(delay > 5 * HZ)) {
 		if (net_ratelimit())
 			printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
-		delay = 5*HZ;
+		delay = 5 * HZ;
 	}
 	/* why don't use jiffies here ? because expires can be in past */
 	mod_timer(&q->timer, q->jiffies + delay);
@@ -941,13 +966,15 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 	q->jiffies = jiffies;
 
 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
-	if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) {
+	skb = __skb_dequeue(&q->direct_queue);
+	if (skb != NULL) {
 		sch->flags &= ~TCQ_F_THROTTLED;
 		sch->q.qlen--;
 		return skb;
 	}
 
-	if (!sch->q.qlen) goto fin;
+	if (!sch->q.qlen)
+		goto fin;
 	PSCHED_GET_TIME(q->now);
 
 	min_delay = LONG_MAX;
@@ -957,18 +984,19 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 		int m;
 		long delay;
 		if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
-			delay = htb_do_events(q,level);
-			q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
+			delay = htb_do_events(q, level);
+			q->near_ev_cache[level] =
+			    q->jiffies + (delay ? delay : HZ);
 		} else
-			delay = q->near_ev_cache[level] - q->jiffies;	
-		
-		if (delay && min_delay > delay) 
+			delay = q->near_ev_cache[level] - q->jiffies;
+
+		if (delay && min_delay > delay)
 			min_delay = delay;
 		m = ~q->row_mask[level];
 		while (m != (int)(-1)) {
-			int prio = ffz (m);
+			int prio = ffz(m);
 			m |= 1 << prio;
-			skb = htb_dequeue_tree(q,prio,level);
+			skb = htb_dequeue_tree(q, prio, level);
 			if (likely(skb != NULL)) {
 				sch->q.qlen--;
 				sch->flags &= ~TCQ_F_THROTTLED;
@@ -976,28 +1004,28 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 			}
 		}
 	}
-	htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
+	htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
 fin:
 	return skb;
 }
 
 /* try to drop from each class (by prio) until one succeed */
-static unsigned int htb_drop(struct Qdisc* sch)
+static unsigned int htb_drop(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	int prio;
 
 	for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
 		struct list_head *p;
-		list_for_each (p,q->drops+prio) {
+		list_for_each(p, q->drops + prio) {
 			struct htb_class *cl = list_entry(p, struct htb_class,
 							  un.leaf.drop_list);
 			unsigned int len;
-			if (cl->un.leaf.q->ops->drop && 
-				(len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+			if (cl->un.leaf.q->ops->drop &&
+			    (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
 				sch->q.qlen--;
 				if (!cl->un.leaf.q->q.qlen)
-					htb_deactivate (q,cl);
+					htb_deactivate(q, cl);
 				return len;
 			}
 		}
@@ -1007,19 +1035,20 @@ static unsigned int htb_drop(struct Qdisc* sch)
 
 /* reset all classes */
 /* always caled under BH & queue lock */
-static void htb_reset(struct Qdisc* sch)
+static void htb_reset(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	int i;
 
 	for (i = 0; i < HTB_HSIZE; i++) {
 		struct list_head *p;
-		list_for_each (p,q->hash+i) {
-			struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+		list_for_each(p, q->hash + i) {
+			struct htb_class *cl =
+			    list_entry(p, struct htb_class, hlist);
 			if (cl->level)
-				memset(&cl->un.inner,0,sizeof(cl->un.inner));
+				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 			else {
-				if (cl->un.leaf.q) 
+				if (cl->un.leaf.q)
 					qdisc_reset(cl->un.leaf.q);
 				INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 			}
@@ -1032,12 +1061,12 @@ static void htb_reset(struct Qdisc* sch)
 	del_timer(&q->timer);
 	__skb_queue_purge(&q->direct_queue);
 	sch->q.qlen = 0;
-	memset(q->row,0,sizeof(q->row));
-	memset(q->row_mask,0,sizeof(q->row_mask));
-	memset(q->wait_pq,0,sizeof(q->wait_pq));
-	memset(q->ptr,0,sizeof(q->ptr));
+	memset(q->row, 0, sizeof(q->row));
+	memset(q->row_mask, 0, sizeof(q->row_mask));
+	memset(q->wait_pq, 0, sizeof(q->wait_pq));
+	memset(q->ptr, 0, sizeof(q->ptr));
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
-		INIT_LIST_HEAD(q->drops+i);
+		INIT_LIST_HEAD(q->drops + i);
 }
 
 static int htb_init(struct Qdisc *sch, struct rtattr *opt)
@@ -1047,29 +1076,30 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 	struct tc_htb_glob *gopt;
 	int i;
 	if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
-			tb[TCA_HTB_INIT-1] == NULL ||
-			RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) {
+	    tb[TCA_HTB_INIT - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) {
 		printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
 		return -EINVAL;
 	}
-	gopt = RTA_DATA(tb[TCA_HTB_INIT-1]);
+	gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]);
 	if (gopt->version != HTB_VER >> 16) {
-		printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n",
-				HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
+		printk(KERN_ERR
+		       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+		       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
 		return -EINVAL;
 	}
 
 	INIT_LIST_HEAD(&q->root);
 	for (i = 0; i < HTB_HSIZE; i++)
-		INIT_LIST_HEAD(q->hash+i);
+		INIT_LIST_HEAD(q->hash + i);
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
-		INIT_LIST_HEAD(q->drops+i);
+		INIT_LIST_HEAD(q->drops + i);
 
 	init_timer(&q->timer);
 	skb_queue_head_init(&q->direct_queue);
 
 	q->direct_qlen = sch->dev->tx_queue_len;
-	if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
+	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
 		q->direct_qlen = 2;
 	q->timer.function = htb_timer;
 	q->timer.data = (unsigned long)sch;
@@ -1091,7 +1121,7 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb->tail;
 	struct rtattr *rta;
 	struct tc_htb_glob gopt;
 	spin_lock_bh(&sch->dev->queue_lock);
@@ -1101,7 +1131,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	gopt.rate2quantum = q->rate2quantum;
 	gopt.defcls = q->defcls;
 	gopt.debug = 0;
-	rta = (struct rtattr*)b;
+	rta = (struct rtattr *)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
 	rta->rta_len = skb->tail - b;
@@ -1114,10 +1144,10 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 }
 
 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
-	struct sk_buff *skb, struct tcmsg *tcm)
+			  struct sk_buff *skb, struct tcmsg *tcm)
 {
-	struct htb_class *cl = (struct htb_class*)arg;
-	unsigned char	 *b = skb->tail;
+	struct htb_class *cl = (struct htb_class *)arg;
+	unsigned char *b = skb->tail;
 	struct rtattr *rta;
 	struct tc_htb_opt opt;
 
@@ -1127,15 +1157,18 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	if (!cl->level && cl->un.leaf.q)
 		tcm->tcm_info = cl->un.leaf.q->handle;
 
-	rta = (struct rtattr*)b;
+	rta = (struct rtattr *)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 
-	memset (&opt,0,sizeof(opt));
+	memset(&opt, 0, sizeof(opt));
 
-	opt.rate = cl->rate->rate; opt.buffer = cl->buffer;
-	opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer;
-	opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio;
-	opt.level = cl->level; 
+	opt.rate = cl->rate->rate;
+	opt.buffer = cl->buffer;
+	opt.ceil = cl->ceil->rate;
+	opt.cbuffer = cl->cbuffer;
+	opt.quantum = cl->un.leaf.quantum;
+	opt.prio = cl->un.leaf.prio;
+	opt.level = cl->level;
 	RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
 	rta->rta_len = skb->tail - b;
 	spin_unlock_bh(&sch->dev->queue_lock);
@@ -1147,14 +1180,13 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 }
 
 static int
-htb_dump_class_stats(struct Qdisc *sch, unsigned long arg,
-	struct gnet_dump *d)
+htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 {
-	struct htb_class *cl = (struct htb_class*)arg;
+	struct htb_class *cl = (struct htb_class *)arg;
 
 #ifdef HTB_RATECM
-	cl->rate_est.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE);
-	cl->rate_est.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE);
+	cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
+	cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
 #endif
 
 	if (!cl->level && cl->un.leaf.q)
@@ -1171,21 +1203,22 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 }
 
 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
-	struct Qdisc **old)
+		     struct Qdisc **old)
 {
-	struct htb_class *cl = (struct htb_class*)arg;
+	struct htb_class *cl = (struct htb_class *)arg;
 
 	if (cl && !cl->level) {
-		if (new == NULL && (new = qdisc_create_dflt(sch->dev, 
-					&pfifo_qdisc_ops)) == NULL)
-					return -ENOBUFS;
+		if (new == NULL && (new = qdisc_create_dflt(sch->dev,
+							    &pfifo_qdisc_ops))
+		    == NULL)
+			return -ENOBUFS;
 		sch_tree_lock(sch);
 		if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
 			if (cl->prio_activity)
-				htb_deactivate (qdisc_priv(sch),cl);
+				htb_deactivate(qdisc_priv(sch), cl);
 
 			/* TODO: is it correct ? Why CBQ doesn't do it ? */
-			sch->q.qlen -= (*old)->q.qlen;	
+			sch->q.qlen -= (*old)->q.qlen;
 			qdisc_reset(*old);
 		}
 		sch_tree_unlock(sch);
@@ -1194,16 +1227,16 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	return -ENOENT;
 }
 
-static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
+static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
 {
-	struct htb_class *cl = (struct htb_class*)arg;
+	struct htb_class *cl = (struct htb_class *)arg;
 	return (cl && !cl->level) ? cl->un.leaf.q : NULL;
 }
 
 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
 {
-	struct htb_class *cl = htb_find(classid,sch);
-	if (cl) 
+	struct htb_class *cl = htb_find(classid, sch);
+	if (cl)
 		cl->refcnt++;
 	return (unsigned long)cl;
 }
@@ -1218,7 +1251,7 @@ static void htb_destroy_filters(struct tcf_proto **fl)
 	}
 }
 
-static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
+static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	if (!cl->level) {
@@ -1228,44 +1261,44 @@ static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
 	}
 	qdisc_put_rtab(cl->rate);
 	qdisc_put_rtab(cl->ceil);
-	
-	htb_destroy_filters (&cl->filter_list);
-	
-	while (!list_empty(&cl->children)) 
-		htb_destroy_class (sch,list_entry(cl->children.next,
-					struct htb_class,sibling));
+
+	htb_destroy_filters(&cl->filter_list);
+
+	while (!list_empty(&cl->children))
+		htb_destroy_class(sch, list_entry(cl->children.next,
+						  struct htb_class, sibling));
 
 	/* note: this delete may happen twice (see htb_delete) */
 	list_del(&cl->hlist);
 	list_del(&cl->sibling);
-	
+
 	if (cl->prio_activity)
-		htb_deactivate (q,cl);
-	
+		htb_deactivate(q, cl);
+
 	if (cl->cmode != HTB_CAN_SEND)
-		rb_erase(&cl->pq_node,q->wait_pq+cl->level);
-	
+		rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+
 	kfree(cl);
 }
 
 /* always caled under BH & queue lock */
-static void htb_destroy(struct Qdisc* sch)
+static void htb_destroy(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 
-	del_timer_sync (&q->timer);
+	del_timer_sync(&q->timer);
 #ifdef HTB_RATECM
-	del_timer_sync (&q->rttim);
+	del_timer_sync(&q->rttim);
 #endif
 	/* This line used to be after htb_destroy_class call below
 	   and surprisingly it worked in 2.4. But it must precede it 
 	   because filter need its target class alive to be able to call
 	   unbind_filter on it (without Oops). */
 	htb_destroy_filters(&q->filter_list);
-	
-	while (!list_empty(&q->root)) 
-		htb_destroy_class (sch,list_entry(q->root.next,
-					struct htb_class,sibling));
+
+	while (!list_empty(&q->root))
+		htb_destroy_class(sch, list_entry(q->root.next,
+						  struct htb_class, sibling));
 
 	__skb_queue_purge(&q->direct_queue);
 }
@@ -1273,23 +1306,23 @@ static void htb_destroy(struct Qdisc* sch)
 static int htb_delete(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = (struct htb_class*)arg;
+	struct htb_class *cl = (struct htb_class *)arg;
 
 	// TODO: why don't allow to delete subtree ? references ? does
 	// tc subsys quarantee us that in htb_destroy it holds no class
 	// refs so that we can remove children safely there ?
 	if (!list_empty(&cl->children) || cl->filter_cnt)
 		return -EBUSY;
-	
+
 	sch_tree_lock(sch);
-	
+
 	/* delete from hash and active; remainder in destroy_class */
 	list_del_init(&cl->hlist);
 	if (cl->prio_activity)
-		htb_deactivate (q,cl);
+		htb_deactivate(q, cl);
 
 	if (--cl->refcnt == 0)
-		htb_destroy_class(sch,cl);
+		htb_destroy_class(sch, cl);
 
 	sch_tree_unlock(sch);
 	return 0;
@@ -1297,41 +1330,44 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 
 static void htb_put(struct Qdisc *sch, unsigned long arg)
 {
-	struct htb_class *cl = (struct htb_class*)arg;
+	struct htb_class *cl = (struct htb_class *)arg;
 
 	if (--cl->refcnt == 0)
-		htb_destroy_class(sch,cl);
+		htb_destroy_class(sch, cl);
 }
 
-static int htb_change_class(struct Qdisc *sch, u32 classid, 
-		u32 parentid, struct rtattr **tca, unsigned long *arg)
+static int htb_change_class(struct Qdisc *sch, u32 classid,
+			    u32 parentid, struct rtattr **tca,
+			    unsigned long *arg)
 {
 	int err = -EINVAL;
 	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = (struct htb_class*)*arg,*parent;
-	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct htb_class *cl = (struct htb_class *)*arg, *parent;
+	struct rtattr *opt = tca[TCA_OPTIONS - 1];
 	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
 	struct rtattr *tb[TCA_HTB_RTAB];
 	struct tc_htb_opt *hopt;
 
 	/* extract all subattrs from opt attr */
 	if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
-			tb[TCA_HTB_PARMS-1] == NULL ||
-			RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt))
+	    tb[TCA_HTB_PARMS - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt))
 		goto failure;
-	
-	parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
 
-	hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
+	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
+
+	hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]);
 
-	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
-	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
-	if (!rtab || !ctab) goto failure;
+	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]);
+	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]);
+	if (!rtab || !ctab)
+		goto failure;
 
-	if (!cl) { /* new class */
+	if (!cl) {		/* new class */
 		struct Qdisc *new_q;
 		/* check for valid classid */
-		if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
+		if (!classid || TC_H_MAJ(classid ^ sch->handle)
+		    || htb_find(classid, sch))
 			goto failure;
 
 		/* check maximal depth */
@@ -1342,7 +1378,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		err = -ENOBUFS;
 		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
 			goto failure;
-		
+
 		cl->refcnt = 1;
 		INIT_LIST_HEAD(&cl->sibling);
 		INIT_LIST_HEAD(&cl->hlist);
@@ -1357,46 +1393,53 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		if (parent && !parent->level) {
 			/* turn parent into inner node */
 			sch->q.qlen -= parent->un.leaf.q->q.qlen;
-			qdisc_destroy (parent->un.leaf.q);
-			if (parent->prio_activity) 
-				htb_deactivate (q,parent);
+			qdisc_destroy(parent->un.leaf.q);
+			if (parent->prio_activity)
+				htb_deactivate(q, parent);
 
 			/* remove from evt list because of level change */
 			if (parent->cmode != HTB_CAN_SEND) {
-				rb_erase(&parent->pq_node,q->wait_pq);
+				rb_erase(&parent->pq_node, q->wait_pq);
 				parent->cmode = HTB_CAN_SEND;
 			}
 			parent->level = (parent->parent ? parent->parent->level
-					: TC_HTB_MAXDEPTH) - 1;
-			memset (&parent->un.inner,0,sizeof(parent->un.inner));
+					 : TC_HTB_MAXDEPTH) - 1;
+			memset(&parent->un.inner, 0, sizeof(parent->un.inner));
 		}
 		/* leaf (we) needs elementary qdisc */
 		cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
 
-		cl->classid = classid; cl->parent = parent;
+		cl->classid = classid;
+		cl->parent = parent;
 
 		/* set class to be in HTB_CAN_SEND state */
 		cl->tokens = hopt->buffer;
 		cl->ctokens = hopt->cbuffer;
-		cl->mbuffer = PSCHED_JIFFIE2US(HZ*60); /* 1min */
+		cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60);	/* 1min */
 		PSCHED_GET_TIME(cl->t_c);
 		cl->cmode = HTB_CAN_SEND;
 
 		/* attach to the hash list and parent's family */
-		list_add_tail(&cl->hlist, q->hash+htb_hash(classid));
-		list_add_tail(&cl->sibling, parent ? &parent->children : &q->root);
-	} else sch_tree_lock(sch);
+		list_add_tail(&cl->hlist, q->hash + htb_hash(classid));
+		list_add_tail(&cl->sibling,
+			      parent ? &parent->children : &q->root);
+	} else
+		sch_tree_lock(sch);
 
 	/* it used to be a nasty bug here, we have to check that node
-           is really leaf before changing cl->un.leaf ! */
+	   is really leaf before changing cl->un.leaf ! */
 	if (!cl->level) {
 		cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
 		if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
-			printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
+			printk(KERN_WARNING
+			       "HTB: quantum of class %X is small. Consider r2q change.\n",
+			       cl->classid);
 			cl->un.leaf.quantum = 1000;
 		}
 		if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
-			printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
+			printk(KERN_WARNING
+			       "HTB: quantum of class %X is big. Consider r2q change.\n",
+			       cl->classid);
 			cl->un.leaf.quantum = 200000;
 		}
 		if (hopt->quantum)
@@ -1407,16 +1450,22 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
 	cl->buffer = hopt->buffer;
 	cl->cbuffer = hopt->cbuffer;
-	if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab;
-	if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab;
+	if (cl->rate)
+		qdisc_put_rtab(cl->rate);
+	cl->rate = rtab;
+	if (cl->ceil)
+		qdisc_put_rtab(cl->ceil);
+	cl->ceil = ctab;
 	sch_tree_unlock(sch);
 
 	*arg = (unsigned long)cl;
 	return 0;
 
 failure:
-	if (rtab) qdisc_put_rtab(rtab);
-	if (ctab) qdisc_put_rtab(ctab);
+	if (rtab)
+		qdisc_put_rtab(rtab);
+	if (ctab)
+		qdisc_put_rtab(ctab);
 	return err;
 }
 
@@ -1430,23 +1479,23 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
 }
 
 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
-	u32 classid)
+				     u32 classid)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = htb_find (classid,sch);
+	struct htb_class *cl = htb_find(classid, sch);
 
 	/*if (cl && !cl->level) return 0;
-	  The line above used to be there to prevent attaching filters to 
-	  leaves. But at least tc_index filter uses this just to get class 
-	  for other reasons so that we have to allow for it.
-	  ----
-	  19.6.2002 As Werner explained it is ok - bind filter is just
-	  another way to "lock" the class - unlike "get" this lock can
-	  be broken by class during destroy IIUC.
+	   The line above used to be there to prevent attaching filters to
+	   leaves. But at least tc_index filter uses this just to get class
+	   for other reasons so that we have to allow for it.
+	   ----
+	   19.6.2002 As Werner explained it is ok - bind filter is just
+	   another way to "lock" the class - unlike "get" this lock can
+	   be broken by class during destroy IIUC.
 	 */
-	if (cl) 
-		cl->filter_cnt++; 
-	else 
+	if (cl)
+		cl->filter_cnt++;
+	else
 		q->filter_cnt++;
 	return (unsigned long)cl;
 }
@@ -1456,9 +1505,9 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
 
-	if (cl) 
-		cl->filter_cnt--; 
-	else 
+	if (cl)
+		cl->filter_cnt--;
+	else
 		q->filter_cnt--;
 }
 
@@ -1472,8 +1521,9 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 
 	for (i = 0; i < HTB_HSIZE; i++) {
 		struct list_head *p;
-		list_for_each (p,q->hash+i) {
-			struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+		list_for_each(p, q->hash + i) {
+			struct htb_class *cl =
+			    list_entry(p, struct htb_class, hlist);
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
@@ -1521,12 +1571,13 @@ static struct Qdisc_ops htb_qdisc_ops = {
 
 static int __init htb_module_init(void)
 {
-    return register_qdisc(&htb_qdisc_ops);
+	return register_qdisc(&htb_qdisc_ops);
 }
-static void __exit htb_module_exit(void) 
+static void __exit htb_module_exit(void)
 {
-    unregister_qdisc(&htb_qdisc_ops);
+	unregister_qdisc(&htb_qdisc_ops);
 }
+
 module_init(htb_module_init)
 module_exit(htb_module_exit)
 MODULE_LICENSE("GPL");
-- 
GitLab


From 0cef296da9331e871401076b8c0688b2b31fcadd Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 10 Aug 2006 23:35:38 -0700
Subject: [PATCH 0453/1063] [HTB]: Use hlist for hash lists.

Use hlist instead of list for the hash list. This saves
space, and we can check for double delete better.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 49 +++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 6c6cac65255f7..a686b9511b054 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -104,7 +104,7 @@ struct htb_class {
 	/* topology */
 	int level;		/* our level (see above) */
 	struct htb_class *parent;	/* parent class */
-	struct list_head hlist;	/* classid hash list item */
+	struct hlist_node hlist;	/* classid hash list item */
 	struct list_head sibling;	/* sibling list item */
 	struct list_head children;	/* children list */
 
@@ -163,8 +163,8 @@ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
 
 struct htb_sched {
 	struct list_head root;	/* root classes list */
-	struct list_head hash[HTB_HSIZE];	/* hashed by classid */
-	struct list_head drops[TC_HTB_NUMPRIO];	/* active leaves (for drops) */
+	struct hlist_head hash[HTB_HSIZE];	/* hashed by classid */
+	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 
 	/* self list - roots of self generating tree */
 	struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
@@ -220,12 +220,13 @@ static inline int htb_hash(u32 h)
 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct list_head *p;
+	struct hlist_node *p;
+	struct htb_class *cl;
+
 	if (TC_H_MAJ(handle) != sch->handle)
 		return NULL;
 
-	list_for_each(p, q->hash + htb_hash(handle)) {
-		struct htb_class *cl = list_entry(p, struct htb_class, hlist);
+	hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
 		if (cl->classid == handle)
 			return cl;
 	}
@@ -675,7 +676,9 @@ static void htb_rate_timer(unsigned long arg)
 {
 	struct Qdisc *sch = (struct Qdisc *)arg;
 	struct htb_sched *q = qdisc_priv(sch);
-	struct list_head *p;
+	struct hlist_node *p;
+	struct htb_class *cl;
+
 
 	/* lock queue so that we can muck with it */
 	spin_lock_bh(&sch->dev->queue_lock);
@@ -686,9 +689,8 @@ static void htb_rate_timer(unsigned long arg)
 	/* scan and recompute one bucket at time */
 	if (++q->recmp_bucket >= HTB_HSIZE)
 		q->recmp_bucket = 0;
-	list_for_each(p, q->hash + q->recmp_bucket) {
-		struct htb_class *cl = list_entry(p, struct htb_class, hlist);
 
+	hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) {
 		RT_GEN(cl->sum_bytes, cl->rate_bytes);
 		RT_GEN(cl->sum_packets, cl->rate_packets);
 	}
@@ -1041,10 +1043,10 @@ static void htb_reset(struct Qdisc *sch)
 	int i;
 
 	for (i = 0; i < HTB_HSIZE; i++) {
-		struct list_head *p;
-		list_for_each(p, q->hash + i) {
-			struct htb_class *cl =
-			    list_entry(p, struct htb_class, hlist);
+		struct hlist_node *p;
+		struct htb_class *cl;
+
+		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
 			if (cl->level)
 				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 			else {
@@ -1091,7 +1093,7 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 
 	INIT_LIST_HEAD(&q->root);
 	for (i = 0; i < HTB_HSIZE; i++)
-		INIT_LIST_HEAD(q->hash + i);
+		INIT_HLIST_HEAD(q->hash + i);
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
 		INIT_LIST_HEAD(q->drops + i);
 
@@ -1269,7 +1271,8 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 						  struct htb_class, sibling));
 
 	/* note: this delete may happen twice (see htb_delete) */
-	list_del(&cl->hlist);
+	if (!hlist_unhashed(&cl->hlist))
+		hlist_del(&cl->hlist);
 	list_del(&cl->sibling);
 
 	if (cl->prio_activity)
@@ -1317,7 +1320,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 	sch_tree_lock(sch);
 
 	/* delete from hash and active; remainder in destroy_class */
-	list_del_init(&cl->hlist);
+	if (!hlist_unhashed(&cl->hlist))
+		hlist_del(&cl->hlist);
+
 	if (cl->prio_activity)
 		htb_deactivate(q, cl);
 
@@ -1381,7 +1386,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
 		cl->refcnt = 1;
 		INIT_LIST_HEAD(&cl->sibling);
-		INIT_LIST_HEAD(&cl->hlist);
+		INIT_HLIST_NODE(&cl->hlist);
 		INIT_LIST_HEAD(&cl->children);
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 
@@ -1420,7 +1425,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		cl->cmode = HTB_CAN_SEND;
 
 		/* attach to the hash list and parent's family */
-		list_add_tail(&cl->hlist, q->hash + htb_hash(classid));
+		hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
 		list_add_tail(&cl->sibling,
 			      parent ? &parent->children : &q->root);
 	} else
@@ -1520,10 +1525,10 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 		return;
 
 	for (i = 0; i < HTB_HSIZE; i++) {
-		struct list_head *p;
-		list_for_each(p, q->hash + i) {
-			struct htb_class *cl =
-			    list_entry(p, struct htb_class, hlist);
+		struct hlist_node *p;
+		struct htb_class *cl;
+
+		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
-- 
GitLab


From 3696f625e2efa1f1b228b276788274e1eb86fcfa Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 10 Aug 2006 23:36:01 -0700
Subject: [PATCH 0454/1063] [HTB]: rbtree cleanup

Add code to initialize rb tree nodes, and check for double deletion.
This is not a real fix, but I can make it trap sometimes and may
be a bandaid for: http://bugzilla.kernel.org/show_bug.cgi?id=6681

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index a686b9511b054..bb3ddd4784b1c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -366,7 +366,7 @@ static void htb_add_to_wait_tree(struct htb_sched *q,
  * When we are past last key we return NULL.
  * Average complexity is 2 steps per call.
  */
-static void htb_next_rb_node(struct rb_node **n)
+static inline void htb_next_rb_node(struct rb_node **n)
 {
 	*n = rb_next(*n);
 }
@@ -388,6 +388,18 @@ static inline void htb_add_class_to_row(struct htb_sched *q,
 	}
 }
 
+/* If this triggers, it is a bug in this code, but it need not be fatal */
+static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
+{
+	if (RB_EMPTY_NODE(rb)) {
+		WARN_ON(1);
+	} else {
+		rb_erase(rb, root);
+		RB_CLEAR_NODE(rb);
+	}
+}
+
+
 /**
  * htb_remove_class_from_row - removes class from its row
  *
@@ -401,10 +413,12 @@ static inline void htb_remove_class_from_row(struct htb_sched *q,
 
 	while (mask) {
 		int prio = ffz(~mask);
+
 		mask &= ~(1 << prio);
 		if (q->ptr[cl->level][prio] == cl->node + prio)
 			htb_next_rb_node(q->ptr[cl->level] + prio);
-		rb_erase(cl->node + prio, q->row[cl->level] + prio);
+
+		htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
 		if (!q->row[cl->level][prio].rb_node)
 			m |= 1 << prio;
 	}
@@ -472,7 +486,7 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 				p->un.inner.ptr[prio] = NULL;
 			}
 
-			rb_erase(cl->node + prio, p->un.inner.feed + prio);
+			htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
 
 			if (!p->un.inner.feed[prio].rb_node)
 				mask |= 1 << prio;
@@ -739,7 +753,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 		htb_change_class_mode(q, cl, &diff);
 		if (old_mode != cl->cmode) {
 			if (old_mode != HTB_CAN_SEND)
-				rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+				htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 			if (cl->cmode != HTB_CAN_SEND)
 				htb_add_to_wait_tree(q, cl, diff);
 		}
@@ -782,7 +796,7 @@ static long htb_do_events(struct htb_sched *q, int level)
 		if (time_after(cl->pq_key, q->jiffies)) {
 			return cl->pq_key - q->jiffies;
 		}
-		rb_erase(p, q->wait_pq + level);
+		htb_safe_rb_erase(p, q->wait_pq + level);
 		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
 		htb_change_class_mode(q, cl, &diff);
 		if (cl->cmode != HTB_CAN_SEND)
@@ -1279,7 +1293,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 		htb_deactivate(q, cl);
 
 	if (cl->cmode != HTB_CAN_SEND)
-		rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+		htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 
 	kfree(cl);
 }
@@ -1370,6 +1384,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
 	if (!cl) {		/* new class */
 		struct Qdisc *new_q;
+		int prio;
+
 		/* check for valid classid */
 		if (!classid || TC_H_MAJ(classid ^ sch->handle)
 		    || htb_find(classid, sch))
@@ -1389,6 +1405,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		INIT_HLIST_NODE(&cl->hlist);
 		INIT_LIST_HEAD(&cl->children);
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
+		RB_CLEAR_NODE(&cl->pq_node);
+
+		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
+			RB_CLEAR_NODE(&cl->node[prio]);
 
 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
 		   so that can't be used inside of sch_tree_lock
@@ -1404,7 +1424,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
 			/* remove from evt list because of level change */
 			if (parent->cmode != HTB_CAN_SEND) {
-				rb_erase(&parent->pq_node, q->wait_pq);
+				htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
 				parent->cmode = HTB_CAN_SEND;
 			}
 			parent->level = (parent->parent ? parent->parent->level
-- 
GitLab


From b6fe17d6cc5d570b72f8e4da351b593c5a680355 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 29 Aug 2006 17:06:13 -0700
Subject: [PATCH 0455/1063] [NET] netdev: Check name length

Some improvements to robust name interface.  These API's are safe
now by convention, but it is worth providing some safety checks
against future bugs.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index fc82f6f6e1c17..14de297d024d1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -640,6 +640,8 @@ int dev_valid_name(const char *name)
 {
 	if (*name == '\0')
 		return 0;
+	if (strlen(name) >= IFNAMSIZ)
+		return 0;
 	if (!strcmp(name, ".") || !strcmp(name, ".."))
 		return 0;
 
@@ -3191,13 +3193,15 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 	struct net_device *dev;
 	int alloc_size;
 
+	BUG_ON(strlen(name) >= sizeof(dev->name));
+
 	/* ensure 32-byte alignment of both the device and private area */
 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
 
 	p = kzalloc(alloc_size, GFP_KERNEL);
 	if (!p) {
-		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
+		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
 		return NULL;
 	}
 
-- 
GitLab


From d880309ae17783c27016bf4f903782d322d0a2a1 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <steve@chygwyn.com>
Date: Fri, 11 Aug 2006 16:43:41 -0700
Subject: [PATCH 0456/1063] [DECNET] Fix to multiple tables routing

Here is a fix to Patrick McHardy's increase number of routing tables
patch for DECnet. I did just test this and it appears to be working
fine with this patch.

Signed-off-by: Steven Whitehouse <steve@chygwyn.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_rules.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 878312ff34ec7..c8d9411e5943f 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -116,6 +116,7 @@ static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
 	[FRA_SRC]	= { .type = NLA_U16 },
 	[FRA_DST]	= { .type = NLA_U16 },
 	[FRA_FWMARK]	= { .type = NLA_U32 },
+	[FRA_TABLE]     = { .type = NLA_U32 },
 };
 
 static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
-- 
GitLab


From d1aa62f15b511457af2233150c960dc1fd02769b Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <steve@chygwyn.com>
Date: Fri, 11 Aug 2006 16:44:18 -0700
Subject: [PATCH 0457/1063] [DECNET] Fix to decnet rules compare function

Here is a fix to the DECnet rules compare function where we used 32bit
values rather than 16bit values. Spotted by Patrick McHardy.

Signed-off-by: Steven Whitehouse <steve@chygwyn.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_rules.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index c8d9411e5943f..977bb56c3ce49 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -197,10 +197,10 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 		return 0;
 #endif
 
-	if (tb[FRA_SRC] && (r->src != nla_get_u32(tb[FRA_SRC])))
+	if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC])))
 		return 0;
 
-	if (tb[FRA_DST] && (r->dst != nla_get_u32(tb[FRA_DST])))
+	if (tb[FRA_DST] && (r->dst != nla_get_u16(tb[FRA_DST])))
 		return 0;
 
 	return 1;
-- 
GitLab


From 90d41122f79c8c3687d965dde4c6d30a6e0cac4c Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 14 Aug 2006 23:49:16 -0700
Subject: [PATCH 0458/1063] [IPV6] ip6_fib.c: make code static

Make the following needlessly global code static:
- fib6_walker_lock
- struct fib6_walker_list
- fib6_walk_continue()
- fib6_walk()

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 25 -------------------------
 net/ipv6/ip6_fib.c    | 29 ++++++++++++++++++++++++-----
 2 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index c0660cea9a2f6..69c444209781e 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -92,28 +92,6 @@ struct fib6_walker_t
 	void *args;
 };
 
-extern struct fib6_walker_t fib6_walker_list;
-extern rwlock_t fib6_walker_lock;
-
-static inline void fib6_walker_link(struct fib6_walker_t *w)
-{
-	write_lock_bh(&fib6_walker_lock);
-	w->next = fib6_walker_list.next;
-	w->prev = &fib6_walker_list;
-	w->next->prev = w;
-	w->prev->next = w;
-	write_unlock_bh(&fib6_walker_lock);
-}
-
-static inline void fib6_walker_unlink(struct fib6_walker_t *w)
-{
-	write_lock_bh(&fib6_walker_lock);
-	w->next->prev = w->prev;
-	w->prev->next = w->next;
-	w->prev = w->next = w;
-	write_unlock_bh(&fib6_walker_lock);
-}
-
 struct rt6_statistics {
 	__u32		fib_nodes;
 	__u32		fib_route_nodes;
@@ -195,9 +173,6 @@ struct fib6_node		*fib6_locate(struct fib6_node *root,
 extern void			fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
 					       int prune, void *arg);
 
-extern int			fib6_walk(struct fib6_walker_t *w);
-extern int			fib6_walk_continue(struct fib6_walker_t *w);
-
 extern int			fib6_add(struct fib6_node *root,
 					 struct rt6_info *rt,
 					 struct nlmsghdr *nlh,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index bececbe9dd2cc..be36f4acda944 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -69,8 +69,7 @@ struct fib6_cleaner_t
 	void *arg;
 };
 
-DEFINE_RWLOCK(fib6_walker_lock);
-
+static DEFINE_RWLOCK(fib6_walker_lock);
 
 #ifdef CONFIG_IPV6_SUBTREES
 #define FWS_INIT FWS_S
@@ -82,6 +81,8 @@ DEFINE_RWLOCK(fib6_walker_lock);
 
 static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
 static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
+static int fib6_walk(struct fib6_walker_t *w);
+static int fib6_walk_continue(struct fib6_walker_t *w);
 
 /*
  *	A routing update causes an increase of the serial number on the
@@ -94,13 +95,31 @@ static __u32 rt_sernum;
 
 static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
 
-struct fib6_walker_t fib6_walker_list = {
+static struct fib6_walker_t fib6_walker_list = {
 	.prev	= &fib6_walker_list,
 	.next	= &fib6_walker_list, 
 };
 
 #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
 
+static inline void fib6_walker_link(struct fib6_walker_t *w)
+{
+	write_lock_bh(&fib6_walker_lock);
+	w->next = fib6_walker_list.next;
+	w->prev = &fib6_walker_list;
+	w->next->prev = w;
+	w->prev->next = w;
+	write_unlock_bh(&fib6_walker_lock);
+}
+
+static inline void fib6_walker_unlink(struct fib6_walker_t *w)
+{
+	write_lock_bh(&fib6_walker_lock);
+	w->next->prev = w->prev;
+	w->prev->next = w->next;
+	w->prev = w->next = w;
+	write_unlock_bh(&fib6_walker_lock);
+}
 static __inline__ u32 fib6_new_sernum(void)
 {
 	u32 n = ++rt_sernum;
@@ -1173,7 +1192,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne
  *	<0  -> walk is terminated by an error.
  */
 
-int fib6_walk_continue(struct fib6_walker_t *w)
+static int fib6_walk_continue(struct fib6_walker_t *w)
 {
 	struct fib6_node *fn, *pn;
 
@@ -1247,7 +1266,7 @@ int fib6_walk_continue(struct fib6_walker_t *w)
 	}
 }
 
-int fib6_walk(struct fib6_walker_t *w)
+static int fib6_walk(struct fib6_walker_t *w)
 {
 	int res;
 
-- 
GitLab


From 50da859d4e566fba90ebda87b843970d902c903e Mon Sep 17 00:00:00 2001
From: Andreas Mohr <andi@lisas.de>
Date: Mon, 14 Aug 2006 23:54:30 -0700
Subject: [PATCH 0459/1063] [TG3]: Constify firmware structs

Constify largish areas of firmware data in Tigon3 ethernet driver.

non-const:

lsmod:
tg3                   101404  0

objdump -x:
.rodata 000003e8
.data 00004a0c

ls -l:
-rw-r--r-- 1 root root 114404 2006-08-19 21:36 drivers/net/tg3.ko

const:

lsmod:
tg3                   101404  0

objdump -x:
.rodata 000042c8
.data 00000b4c

ls -l:
-rw-r--r-- 1 root root 114532 2006-08-19 21:06 drivers/net/tg3.ko

Signed-off-by: Andreas Mohr <andi@lisas.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 6f5d3a38c5822..34078a7c1a843 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -264,7 +264,7 @@ static struct pci_device_id tg3_pci_tbl[] = {
 
 MODULE_DEVICE_TABLE(pci, tg3_pci_tbl);
 
-static struct {
+static const struct {
 	const char string[ETH_GSTRING_LEN];
 } ethtool_stats_keys[TG3_NUM_STATS] = {
 	{ "rx_octets" },
@@ -345,7 +345,7 @@ static struct {
 	{ "nic_tx_threshold_hit" }
 };
 
-static struct {
+static const struct {
 	const char string[ETH_GSTRING_LEN];
 } ethtool_test_keys[TG3_NUM_TEST] = {
 	{ "nvram test     (online) " },
@@ -4969,7 +4969,7 @@ static int tg3_halt(struct tg3 *tp, int kind, int silent)
 #define TG3_FW_BSS_ADDR		0x08000a70
 #define TG3_FW_BSS_LEN		0x10
 
-static u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = {
+static const u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = {
 	0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800,
 	0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000018, 0x00000000,
 	0x0000000d, 0x3c1d0800, 0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100034,
@@ -5063,7 +5063,7 @@ static u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = {
 	0x27bd0008, 0x03e00008, 0x00000000, 0x00000000, 0x00000000
 };
 
-static u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = {
+static const u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = {
 	0x35373031, 0x726c7341, 0x00000000, 0x00000000, 0x53774576, 0x656e7430,
 	0x00000000, 0x726c7045, 0x76656e74, 0x31000000, 0x556e6b6e, 0x45766e74,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x66617461, 0x6c457272,
@@ -5128,13 +5128,13 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset)
 struct fw_info {
 	unsigned int text_base;
 	unsigned int text_len;
-	u32 *text_data;
+	const u32 *text_data;
 	unsigned int rodata_base;
 	unsigned int rodata_len;
-	u32 *rodata_data;
+	const u32 *rodata_data;
 	unsigned int data_base;
 	unsigned int data_len;
-	u32 *data_data;
+	const u32 *data_data;
 };
 
 /* tp->lock is held. */
@@ -5266,7 +5266,7 @@ static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp)
 #define TG3_TSO_FW_BSS_ADDR		0x08001b80
 #define TG3_TSO_FW_BSS_LEN		0x894
 
-static u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = {
+static const u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = {
 	0x0e000003, 0x00000000, 0x08001b24, 0x00000000, 0x10000003, 0x00000000,
 	0x0000000d, 0x0000000d, 0x3c1d0800, 0x37bd4000, 0x03a0f021, 0x3c100800,
 	0x26100000, 0x0e000010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe,
@@ -5553,7 +5553,7 @@ static u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = {
 	0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c, 0x00000000, 0x00000000,
 };
 
-static u32 tg3TsoFwRodata[] = {
+static const u32 tg3TsoFwRodata[] = {
 	0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000,
 	0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x496e0000, 0x73746b6f,
 	0x66662a2a, 0x00000000, 0x53774576, 0x656e7430, 0x00000000, 0x00000000,
@@ -5561,7 +5561,7 @@ static u32 tg3TsoFwRodata[] = {
 	0x00000000,
 };
 
-static u32 tg3TsoFwData[] = {
+static const u32 tg3TsoFwData[] = {
 	0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x362e3000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000,
@@ -5583,7 +5583,7 @@ static u32 tg3TsoFwData[] = {
 #define TG3_TSO5_FW_BSS_ADDR		0x00010f50
 #define TG3_TSO5_FW_BSS_LEN		0x88
 
-static u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = {
+static const u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = {
 	0x0c004003, 0x00000000, 0x00010f04, 0x00000000, 0x10000003, 0x00000000,
 	0x0000000d, 0x0000000d, 0x3c1d0001, 0x37bde000, 0x03a0f021, 0x3c100001,
 	0x26100000, 0x0c004010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe,
@@ -5742,14 +5742,14 @@ static u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = {
 	0x00000000, 0x00000000, 0x00000000,
 };
 
-static u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = {
+static const u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = {
 	0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000,
 	0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x00000000, 0x00000000,
 	0x73746b6f, 0x66666c64, 0x00000000, 0x00000000, 0x66617461, 0x6c457272,
 	0x00000000, 0x00000000, 0x00000000,
 };
 
-static u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = {
+static const u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = {
 	0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x322e3000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000,
 };
-- 
GitLab


From 2aa7f36cdb332a32849afbf25fcbf35dce5b1940 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 14 Aug 2006 23:55:20 -0700
Subject: [PATCH 0460/1063] [DECNET]: cleanups

- make the following needlessly global functions static:
  - dn_fib.c: dn_fib_sync_down()
  - dn_fib.c: dn_fib_sync_up()
  - dn_rules.c: dn_fib_rule_action()
- remove the following unneeded prototype:
  - dn_fib.c: dn_cache_dump()

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn_fib.h  | 3 ---
 net/decnet/dn_fib.c   | 9 +++++----
 net/decnet/dn_rules.c | 4 ++--
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
index d97aa10c463fa..f01626cbbed6f 100644
--- a/include/net/dn_fib.h
+++ b/include/net/dn_fib.h
@@ -131,9 +131,6 @@ extern __le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type);
 extern void dn_fib_flush(void);
 extern void dn_fib_select_multipath(const struct flowi *fl,
 					struct dn_fib_res *res);
-extern int dn_fib_sync_down(__le16 local, struct net_device *dev,
-				int force);
-extern int dn_fib_sync_up(struct net_device *dev);
 
 /*
  * dn_tables.c
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 5ccca3ed53bd7..1cf010124ec5a 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -55,8 +55,6 @@
 
 #define endfor_nexthops(fi) }
 
-extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
 static DEFINE_SPINLOCK(dn_fib_multipath_lock);
 static struct dn_fib_info *dn_fib_info_list;
 static DEFINE_SPINLOCK(dn_fib_info_lock);
@@ -80,6 +78,9 @@ static struct
 	[RTN_XRESOLVE] =    { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
 };
 
+static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force);
+static int dn_fib_sync_up(struct net_device *dev);
+
 void dn_fib_free_info(struct dn_fib_info *fi)
 {
 	if (fi->fib_dead == 0) {
@@ -651,7 +652,7 @@ static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event,
 	return NOTIFY_DONE;
 }
 
-int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
+static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
 {
         int ret = 0;
         int scope = RT_SCOPE_NOWHERE;
@@ -695,7 +696,7 @@ int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
 }
 
 
-int dn_fib_sync_up(struct net_device *dev)
+static int dn_fib_sync_up(struct net_device *dev)
 {
         int ret = 0;
 
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 977bb56c3ce49..50e819edf8c72 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -75,8 +75,8 @@ int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res)
 	return err;
 }
 
-int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, int flags,
-		       struct fib_lookup_arg *arg)
+static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
+			      int flags, struct fib_lookup_arg *arg)
 {
 	int err = -EAGAIN;
 	struct dn_fib_table *tbl;
-- 
GitLab


From 81aa646cc4df3779bcbf9d18cc2c0813ee9b3262 Mon Sep 17 00:00:00 2001
From: Martin Bligh <mbligh@google.com>
Date: Mon, 14 Aug 2006 23:57:10 -0700
Subject: [PATCH 0461/1063] [IPV4]: add the UdpSndbufErrors and UdpRcvbufErrors
 MIBs

Signed-off-by: Martin Bligh <mbligh@google.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 include/linux/snmp.h |  2 ++
 net/ipv4/proc.c      |  2 ++
 net/ipv4/udp.c       | 16 +++++++++++++++-
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 4db25d5c7cd17..30156556f78d7 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -155,6 +155,8 @@ enum
 	UDP_MIB_NOPORTS,			/* NoPorts */
 	UDP_MIB_INERRORS,			/* InErrors */
 	UDP_MIB_OUTDATAGRAMS,			/* OutDatagrams */
+	UDP_MIB_RCVBUFERRORS,			/* RcvbufErrors */
+	UDP_MIB_SNDBUFERRORS,			/* SndbufErrors */
 	__UDP_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d61e2a9d394d2..9c6cbe3d9fb80 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -173,6 +173,8 @@ static const struct snmp_mib snmp4_udp_list[] = {
 	SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS),
 	SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS),
 	SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+	SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+	SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 87152510980c8..514c1e9ae8103 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -662,6 +662,16 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
 		return len;
 	}
+	/*
+	 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
+	 * ENOBUFS might not be good (it's not tunable per se), but otherwise
+	 * we don't have a good statistic (IpOutDiscards but it can be too many
+	 * things).  We could add another new stat but at least for now that
+	 * seems like overkill.
+	 */
+	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+		UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+	}
 	return err;
 
 do_confirm:
@@ -981,6 +991,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
 	struct udp_sock *up = udp_sk(sk);
+	int rc;
 
 	/*
 	 *	Charge it to the socket, dropping if the queue is full.
@@ -1027,7 +1038,10 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	if (sock_queue_rcv_skb(sk,skb)<0) {
+	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
+		/* Note that an ENOMEM error is charged twice */
+		if (rc == -ENOMEM)
+			UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
 		UDP_INC_STATS_BH(UDP_MIB_INERRORS);
 		kfree_skb(skb);
 		return -1;
-- 
GitLab


From a18135eb9389c26d36ef5c05bd8bc526e0cbe883 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 15 Aug 2006 00:00:09 -0700
Subject: [PATCH 0462/1063] [IPV6]: Add UDP_MIB_{SND,RCV}BUFERRORS handling.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 780b89f6dfcc3..c813381020bc8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -345,6 +345,8 @@ static void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
+	int rc;
+
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
 		kfree_skb(skb);
 		return -1;
@@ -356,7 +358,10 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		return 0;
 	}
 
-	if (sock_queue_rcv_skb(sk,skb)<0) {
+	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
+		/* Note that an ENOMEM error is charged twice */
+		if (rc == -ENOMEM)
+			UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
 		UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
 		kfree_skb(skb);
 		return 0;
@@ -857,6 +862,16 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
 		return len;
 	}
+	/*
+	 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
+	 * ENOBUFS might not be good (it's not tunable per se), but otherwise
+	 * we don't have a good statistic (IpOutDiscards but it can be too many
+	 * things).  We could add another new stat but at least for now that
+	 * seems like overkill.
+	 */
+	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+		UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+	}
 	return err;
 
 do_confirm:
-- 
GitLab


From 97a4f3e7110619568aa239fe19143d9ec42dede5 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Tue, 15 Aug 2006 00:01:05 -0700
Subject: [PATCH 0463/1063] [NETFILTER]: Make unused signal code go away so
 nobody copies its brokenness

This code is wrong on so many levels, please lose it so it isn't
replicated anywhere else.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebtables.c | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3a13ed643459b..d06a5075b5f69 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -37,30 +37,9 @@
 #include <linux/netfilter_ipv4/listhelp.h>
 #include <linux/mutex.h>
 
-#if 0
-/* use this for remote debugging
- * Copyright (C) 1998 by Ori Pomerantz
- * Print the string to the appropriate tty, the one
- * the current task uses
- */
-static void print_string(char *str)
-{
-	struct tty_struct *my_tty;
-
-	/* The tty for the current task */
-	my_tty = current->signal->tty;
-	if (my_tty != NULL) {
-		my_tty->driver->write(my_tty, 0, str, strlen(str));
-		my_tty->driver->write(my_tty, 0, "\015\012", 2);
-	}
-}
-
-#define BUGPRINT(args) print_string(args);
-#else
 #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\
                                          "report to author: "format, ## args)
 /* #define BUGPRINT(format, args...) */
-#endif
 #define MEMPRINT(format, args...) printk("kernel msg: ebtables "\
                                          ": out of memory: "format, ## args)
 /* #define MEMPRINT(format, args...) */
-- 
GitLab


From 9a673e563e543a5c8a6f9824562e55e807b8a56c Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 15 Aug 2006 00:03:53 -0700
Subject: [PATCH 0464/1063] [SELINUX]: security/selinux/hooks.c: Make 4
 functions static.

This patch makes four needlessly global functions static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/selinux/hooks.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2a6bbb921e1ed..180b26b97d2d6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3598,7 +3598,7 @@ static void selinux_sk_getsecid(struct sock *sk, u32 *secid)
 	}
 }
 
-void selinux_sock_graft(struct sock* sk, struct socket *parent)
+static void selinux_sock_graft(struct sock* sk, struct socket *parent)
 {
 	struct inode_security_struct *isec = SOCK_INODE(parent)->i_security;
 	struct sk_security_struct *sksec = sk->sk_security;
@@ -3608,8 +3608,8 @@ void selinux_sock_graft(struct sock* sk, struct socket *parent)
 	selinux_netlbl_sock_graft(sk, parent);
 }
 
-int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
-					   struct request_sock *req)
+static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
+				     struct request_sock *req)
 {
 	struct sk_security_struct *sksec = sk->sk_security;
 	int err;
@@ -3638,7 +3638,8 @@ int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 	return 0;
 }
 
-void selinux_inet_csk_clone(struct sock *newsk, const struct request_sock *req)
+static void selinux_inet_csk_clone(struct sock *newsk,
+				   const struct request_sock *req)
 {
 	struct sk_security_struct *newsksec = newsk->sk_security;
 
@@ -3649,7 +3650,8 @@ void selinux_inet_csk_clone(struct sock *newsk, const struct request_sock *req)
 	   time it will have been created and available. */
 }
 
-void selinux_req_classify_flow(const struct request_sock *req, struct flowi *fl)
+static void selinux_req_classify_flow(const struct request_sock *req,
+				      struct flowi *fl)
 {
 	fl->secid = req->secid;
 }
-- 
GitLab


From 9c3bd6833a4df1abd9ecd3b51492b8949bf9cd11 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 15 Aug 2006 00:05:38 -0700
Subject: [PATCH 0465/1063] [IRDA]: Replace hard-coded dev_self[] array sizes
 with ARRAY_SIZE()

Several IR drivers used "for (i = 0; i < 4; i++)" to walk their
dev_self[] table.  Better to use ARRAY_SIZE().  And fix ali-ircc so it
won't run off the end if we find too many adapters.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/irda/ali-ircc.c    | 8 +++++++-
 drivers/net/irda/irport.c      | 4 ++--
 drivers/net/irda/via-ircc.c    | 5 ++++-
 drivers/net/irda/w83977af_ir.c | 4 ++--
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index e3c8cd5eca676..68d4c418cb984 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -249,7 +249,7 @@ static void __exit ali_ircc_cleanup(void)
 
 	IRDA_DEBUG(2, "%s(), ---------------- Start ----------------\n", __FUNCTION__);	
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
 		if (dev_self[i])
 			ali_ircc_close(dev_self[i]);
 	}
@@ -273,6 +273,12 @@ static int ali_ircc_open(int i, chipio_t *info)
 	int err;
 			
 	IRDA_DEBUG(2, "%s(), ---------------- Start ----------------\n", __FUNCTION__);	
+
+	if (i >= ARRAY_SIZE(dev_self)) {
+		IRDA_ERROR("%s(), maximum number of supported chips reached!\n",
+			   __FUNCTION__);
+		return -ENOMEM;
+	}
 	
 	/* Set FIR FIFO and DMA Threshold */
 	if ((ali_ircc_setup(info)) == -1)
diff --git a/drivers/net/irda/irport.c b/drivers/net/irda/irport.c
index 44efd49bf4a9f..ba4f3eb988b39 100644
--- a/drivers/net/irda/irport.c
+++ b/drivers/net/irda/irport.c
@@ -1090,7 +1090,7 @@ static int __init irport_init(void)
 {
  	int i;
 
- 	for (i=0; (io[i] < 2000) && (i < 4); i++) {
+ 	for (i=0; (io[i] < 2000) && (i < ARRAY_SIZE(dev_self)); i++) {
  		if (irport_open(i, io[i], irq[i]) != NULL)
  			return 0;
  	}
@@ -1112,7 +1112,7 @@ static void __exit irport_cleanup(void)
 
         IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
  		if (dev_self[i])
  			irport_close(dev_self[i]);
  	}
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index 8bafb455c102d..79b85f327500c 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -279,7 +279,7 @@ static void via_ircc_clean(void)
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
 		if (dev_self[i])
 			via_ircc_close(dev_self[i]);
 	}
@@ -327,6 +327,9 @@ static __devinit int via_ircc_open(int i, chipio_t * info, unsigned int id)
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
+	if (i >= ARRAY_SIZE(dev_self))
+		return -ENOMEM;
+
 	/* Allocate new instance of the driver */
 	dev = alloc_irdadev(sizeof(struct via_ircc_cb));
 	if (dev == NULL) 
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index 0ea65c4c6f854..8421597072a74 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -117,7 +117,7 @@ static int __init w83977af_init(void)
 
 	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
 
-	for (i=0; (io[i] < 2000) && (i < 4); i++) { 
+	for (i=0; (io[i] < 2000) && (i < ARRAY_SIZE(dev_self)); i++) {
 		if (w83977af_open(i, io[i], irq[i], dma[i]) == 0)
 			return 0;
 	}
@@ -136,7 +136,7 @@ static void __exit w83977af_cleanup(void)
 
         IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
 		if (dev_self[i])
 			w83977af_close(dev_self[i]);
 	}
-- 
GitLab


From 62872e2dcb3127b20a49e3b4b1d93523cf476cc4 Mon Sep 17 00:00:00 2001
From: Stphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
Date: Tue, 15 Aug 2006 00:09:17 -0700
Subject: [PATCH 0466/1063] [ARCNET]: SoHard PCI support

Add support for a SoHard PCI ARCnet card.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arcnet/com20020-pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 979a33df0a8c3..96d8a694d433c 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -161,6 +161,7 @@ static struct pci_device_id com20020pci_id_table[] = {
 	{ 0x1571, 0xa204, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{ 0x1571, 0xa205, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{ 0x1571, 0xa206, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
+	{ 0x10B5, 0x9030, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{ 0x10B5, 0x9050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{0,}
 };
-- 
GitLab


From f8d8fda54a1bfcf8cf829e44c494b2b4582819aa Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 15 Aug 2006 00:15:41 -0700
Subject: [PATCH 0467/1063] [IPV6] udp: Fix type in previous change.

UDPv6 stats are UDP6_foo not UDP_foo.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c813381020bc8..eb9e1b39c8f84 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -361,7 +361,7 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
 		if (rc == -ENOMEM)
-			UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
+			UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
 		UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
 		kfree_skb(skb);
 		return 0;
@@ -870,7 +870,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	 * seems like overkill.
 	 */
 	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
-		UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+		UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
 	}
 	return err;
 
-- 
GitLab


From 2942e90050569525628a9f34e0daaa9b661b49cc Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:30:25 -0700
Subject: [PATCH 0468/1063] [RTNETLINK]: Use rtnl_unicast() for rtnetlink
 unicasts

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  1 +
 net/core/rtnetlink.c      | 10 +++++++---
 net/decnet/dn_route.c     |  4 +---
 net/ipv4/ipmr.c           |  7 ++++---
 net/ipv4/route.c          |  7 +++----
 net/ipv6/addrconf.c       |  4 +---
 net/ipv6/route.c          |  4 +---
 net/sched/act_api.c       |  7 ++-----
 8 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 7e4aa48680a73..0e4f478e2cb56 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -584,6 +584,7 @@ struct rtnetlink_link
 
 extern struct rtnetlink_link * rtnetlink_links[NPROTO];
 extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
+extern int rtnl_unicast(struct sk_buff *skb, u32 pid);
 extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 
 extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a1b783a6afc61..e02fa6a33f426 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -166,6 +166,11 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
 	return err;
 }
 
+int rtnl_unicast(struct sk_buff *skb, u32 pid)
+{
+	return nlmsg_unicast(rtnl, skb, pid);
+}
+
 int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 {
 	struct rtattr *mx = (struct rtattr*)skb->tail;
@@ -574,9 +579,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		goto errout;
 	}
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
+	err = rtnl_unicast(skb, NETLINK_CB(skb).pid);
 errout:
 	kfree(iw_buf);
 	dev_put(dev);
@@ -825,3 +828,4 @@ EXPORT_SYMBOL(rtnl);
 EXPORT_SYMBOL(rtnl_lock);
 EXPORT_SYMBOL(rtnl_trylock);
 EXPORT_SYMBOL(rtnl_unlock);
+EXPORT_SYMBOL(rtnl_unicast);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 4c963213fba51..c5daf3557c1f1 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1611,9 +1611,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
 		goto out_free;
 	}
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-
-	return err;
+	return rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
 
 out_free:
 	kfree_skb(skb);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 85893eef6b16c..98f0aa0d42160 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -312,7 +312,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
 			e = NLMSG_DATA(nlh);
 			e->error = -ETIMEDOUT;
 			memset(&e->msg, 0, sizeof(e->msg));
-			netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+
+			rtnl_unicast(skb, NETLINK_CB(skb).pid);
 		} else
 			kfree_skb(skb);
 	}
@@ -512,7 +513,6 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 
 	while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 		if (skb->nh.iph->version == 0) {
-			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -525,7 +525,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 				e->error = -EMSGSIZE;
 				memset(&e->msg, 0, sizeof(e->msg));
 			}
-			err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+
+			rtnl_unicast(skb, NETLINK_CB(skb).pid);
 		} else
 			ip_mr_forward(skb, c, 0);
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 12128b82c9dcb..b8f6cadc5b3a9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2809,10 +2809,9 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 		goto out_free;
 	}
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
-out:	return err;
+	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+out:
+	return err;
 
 out_free:
 	kfree_skb(skb);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9ba1e811ba502..4f991a2234d0b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3268,9 +3268,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb,
 		goto out_free;
 	}
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
+	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
 out:
 	in6_ifa_put(ifa);
 	return err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9ce28277f47fd..024c8e26c2ecc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2044,9 +2044,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 		goto out_free;
 	}
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
+	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
 out:
 	return err;
 out_free:
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a2587b52e531d..6990747d6d5ac 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -459,7 +459,6 @@ static int
 act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
 {
 	struct sk_buff *skb;
-	int err = 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
@@ -468,10 +467,8 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
 		kfree_skb(skb);
 		return -EINVAL;
 	}
-	err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
-	return err;
+
+	return rtnl_unicast(skb, pid);
 }
 
 static struct tc_action *
-- 
GitLab


From d387f6ad10764fc2174373b4a1cca443adee36e3 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:31:06 -0700
Subject: [PATCH 0469/1063] [NETLINK]: Add notification message sending
 interface

Adds nlmsg_notify() implementing proper notification logic. The
message is multicasted to all listeners in the group. The
applications the requests orignates from can request a unicast
back report in which case said socket will be excluded from the
multicast to avoid duplicated notifications.

nlmsg_multicast() is extended to take allocation flags to
allow notification in atomic contexts.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h      |  5 +++--
 include/net/netlink.h        |  6 ++++--
 net/netlabel/netlabel_user.c |  2 +-
 net/netlink/af_netlink.c     | 34 +++++++++++++++++++++++++++++++++-
 net/netlink/genetlink.c      |  2 +-
 5 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 8c22872642667..97d6d3aba9d2c 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -133,11 +133,12 @@ static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr)
  * @skb: netlink message as socket buffer
  * @pid: own netlink pid to avoid sending to yourself
  * @group: multicast group id
+ * @flags: allocation flags
  */
 static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid,
-				    unsigned int group)
+				    unsigned int group, gfp_t flags)
 {
-	return nlmsg_multicast(genl_sock, skb, pid, group);
+	return nlmsg_multicast(genl_sock, skb, pid, group, flags);
 }
 
 /**
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 3a5e40b1e0450..b154b81d9a7a8 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -43,6 +43,7 @@
  * Message Sending:
  *   nlmsg_multicast()			multicast message to several groups
  *   nlmsg_unicast()			unicast a message to a single socket
+ *   nlmsg_notify()			send notification message
  *
  * Message Length Calculations:
  *   nlmsg_msg_size(payload)		length of message w/o padding
@@ -545,15 +546,16 @@ static inline void nlmsg_free(struct sk_buff *skb)
  * @skb: netlink message as socket buffer
  * @pid: own netlink pid to avoid sending to yourself
  * @group: multicast group id
+ * @flags: allocation flags
  */
 static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
-				  u32 pid, unsigned int group)
+				  u32 pid, unsigned int group, gfp_t flags)
 {
 	int err;
 
 	NETLINK_CB(skb).dst_group = group;
 
-	err = netlink_broadcast(sk, skb, pid, group, GFP_KERNEL);
+	err = netlink_broadcast(sk, skb, pid, group, flags);
 	if (err > 0)
 		err = 0;
 
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 80022221b0a75..73cbe66e42ffe 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -154,5 +154,5 @@ int netlbl_netlink_snd(struct sk_buff *skb, u32 pid)
  */
 int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group)
 {
-	return genlmsg_multicast(skb, pid, group);
+	return genlmsg_multicast(skb, pid, group, GFP_KERNEL);
 }
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0f36ddc0b72d7..a80e4456e204b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1549,6 +1549,38 @@ void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
 	skb_pull(skb, msglen);
 }
 
+/**
+ * nlmsg_notify - send a notification netlink message
+ * @sk: netlink socket to use
+ * @skb: notification message
+ * @pid: destination netlink pid for reports or 0
+ * @group: destination multicast group or 0
+ * @report: 1 to report back, 0 to disable
+ * @flags: allocation flags
+ */
+int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
+		 unsigned int group, int report, gfp_t flags)
+{
+	int err = 0;
+
+	if (group) {
+		int exclude_pid = 0;
+
+		if (report) {
+			atomic_inc(&skb->users);
+			exclude_pid = pid;
+		}
+
+		/* errors reported via destination sk->sk_err */
+		nlmsg_multicast(sk, skb, exclude_pid, group, flags);
+	}
+
+	if (report)
+		err = nlmsg_unicast(sk, skb, pid);
+
+	return err;
+}
+
 #ifdef CONFIG_PROC_FS
 struct nl_seq_iter {
 	int link;
@@ -1802,4 +1834,4 @@ EXPORT_SYMBOL(netlink_set_err);
 EXPORT_SYMBOL(netlink_set_nonroot);
 EXPORT_SYMBOL(netlink_unicast);
 EXPORT_SYMBOL(netlink_unregister_notifier);
-
+EXPORT_SYMBOL(nlmsg_notify);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 75bb47a898dd7..d32599116c567 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -510,7 +510,7 @@ static int genl_ctrl_event(int event, void *data)
 		if (IS_ERR(msg))
 			return PTR_ERR(msg);
 
-		genlmsg_multicast(msg, 0, GENL_ID_CTRL);
+		genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL);
 		break;
 	}
 
-- 
GitLab


From 97676b6b5538b3e059d33b8338e7d5cc41c5f1f1 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:31:41 -0700
Subject: [PATCH 0470/1063] [RTNETLINK]: Add rtnetlink notification interface

Adds rtnl_notify() to send rtnetlink notification messages and
rtnl_set_sk_err() to report notification errors as socket
errors in order to indicate the need of a resync due to loss
of events.

nlmsg_report() is added to properly document the meaning of
NLM_F_ECHO.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  3 +++
 include/net/netlink.h     | 17 +++++++++++++++++
 net/core/rtnetlink.c      | 18 ++++++++++++++++++
 3 files changed, 38 insertions(+)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 0e4f478e2cb56..ecbe0349060c7 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -585,6 +585,9 @@ struct rtnetlink_link
 extern struct rtnetlink_link * rtnetlink_links[NPROTO];
 extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
 extern int rtnl_unicast(struct sk_buff *skb, u32 pid);
+extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+		       struct nlmsghdr *nlh, gfp_t flags);
+extern void rtnl_set_sk_err(u32 group, int error);
 extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 
 extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data);
diff --git a/include/net/netlink.h b/include/net/netlink.h
index b154b81d9a7a8..bf593eb59e1bc 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -65,6 +65,9 @@
  *   nlmsg_validate()			validate netlink message incl. attrs
  *   nlmsg_for_each_attr()		loop over all attributes
  *
+ * Misc:
+ *   nlmsg_report()			report back to application?
+ *
  * ------------------------------------------------------------------------
  *                          Attributes Interface
  * ------------------------------------------------------------------------
@@ -194,6 +197,9 @@ extern void		netlink_run_queue(struct sock *sk, unsigned int *qlen,
 						    struct nlmsghdr *, int *));
 extern void		netlink_queue_skip(struct nlmsghdr *nlh,
 					   struct sk_buff *skb);
+extern int		nlmsg_notify(struct sock *sk, struct sk_buff *skb,
+				     u32 pid, unsigned int group, int report,
+				     gfp_t flags);
 
 extern int		nla_validate(struct nlattr *head, int len, int maxtype,
 				     struct nla_policy *policy);
@@ -375,6 +381,17 @@ static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype,
 			    nlmsg_attrlen(nlh, hdrlen), maxtype, policy);
 }
 
+/**
+ * nlmsg_report - need to report back to application?
+ * @nlh: netlink message header
+ *
+ * Returns 1 if a report back to the application is requested.
+ */
+static inline int nlmsg_report(struct nlmsghdr *nlh)
+{
+	return !!(nlh->nlmsg_flags & NLM_F_ECHO);
+}
+
 /**
  * nlmsg_for_each_attr - iterate over a stream of attributes
  * @pos: loop counter, set to current attribute
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e02fa6a33f426..2b1af17e63898 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -171,6 +171,22 @@ int rtnl_unicast(struct sk_buff *skb, u32 pid)
 	return nlmsg_unicast(rtnl, skb, pid);
 }
 
+int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+		struct nlmsghdr *nlh, gfp_t flags)
+{
+	int report = 0;
+
+	if (nlh)
+		report = nlmsg_report(nlh);
+
+	return nlmsg_notify(rtnl, skb, pid, group, report, flags);
+}
+
+void rtnl_set_sk_err(u32 group, int error)
+{
+	netlink_set_err(rtnl, 0, group, error);
+}
+
 int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 {
 	struct rtattr *mx = (struct rtattr*)skb->tail;
@@ -829,3 +845,5 @@ EXPORT_SYMBOL(rtnl_lock);
 EXPORT_SYMBOL(rtnl_trylock);
 EXPORT_SYMBOL(rtnl_unlock);
 EXPORT_SYMBOL(rtnl_unicast);
+EXPORT_SYMBOL(rtnl_notify);
+EXPORT_SYMBOL(rtnl_set_sk_err);
-- 
GitLab


From c17084d21c18497b506bd28b82d964bc9e6c424b Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:32:48 -0700
Subject: [PATCH 0471/1063] [NET] fib_rules: Convert fib rule notification to
 use rtnl_notify()

Adds support for NLM_F_ECHO to simplify the process of identifying
inserted rules with an auto generated priority.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 873b04d5df812..7b2e9bb1a605d 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -18,7 +18,8 @@ static LIST_HEAD(rules_ops);
 static DEFINE_SPINLOCK(rules_mod_lock);
 
 static void notify_rule_change(int event, struct fib_rule *rule,
-			       struct fib_rules_ops *ops);
+			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+			       u32 pid);
 
 static struct fib_rules_ops *lookup_rules_ops(int family)
 {
@@ -209,7 +210,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	else
 		list_add_rcu(&rule->list, ops->rules_list);
 
-	notify_rule_change(RTM_NEWRULE, rule, ops);
+	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
 	rules_ops_put(ops);
 	return 0;
 
@@ -266,7 +267,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 		list_del_rcu(&rule->list);
 		synchronize_rcu();
-		notify_rule_change(RTM_DELRULE, rule, ops);
+		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
+				   NETLINK_CB(skb).pid);
 		fib_rule_put(rule);
 		rules_ops_put(ops);
 		return 0;
@@ -344,18 +346,26 @@ int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
 EXPORT_SYMBOL_GPL(fib_rules_dump);
 
 static void notify_rule_change(int event, struct fib_rule *rule,
-			       struct fib_rules_ops *ops)
+			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+			       u32 pid)
 {
-	int size = nlmsg_total_size(sizeof(struct fib_rule_hdr) + 128);
-	struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
 
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
-		netlink_set_err(rtnl, 0, ops->nlgroup, ENOBUFS);
-	else if (fib_nl_fill_rule(skb, rule, 0, 0, event, 0, ops) < 0) {
+		goto errout;
+
+	err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, ops->nlgroup, EINVAL);
-	} else
-		netlink_broadcast(rtnl, skb, 0, ops->nlgroup, GFP_KERNEL);
+		goto errout;
+	}
+
+	err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(ops->nlgroup, err);
 }
 
 static void attach_rules(struct list_head *rules, struct net_device *dev)
-- 
GitLab


From b8673311804ca29680dd584bd08352001fcbe2f8 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:33:14 -0700
Subject: [PATCH 0472/1063] [NEIGH]: Convert neighbour notifications ot use
 rtnl_notify()

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2f4e06a134572..23ae5e5426db8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2409,36 +2409,35 @@ static struct file_operations neigh_stat_seq_fops = {
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_ARPD
-void neigh_app_ns(struct neighbour *n)
+static void __neigh_notify(struct neighbour *n, int type, int flags)
 {
 	struct sk_buff *skb;
+	int err = -ENOBUFS;
 
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
 	if (skb == NULL)
-		return;
+		goto errout;
 
-	if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, NLM_F_REQUEST) <= 0)
+	err = neigh_fill_info(skb, n, 0, 0, type, flags);
+	if (err < 0) {
 		kfree_skb(skb);
-	else {
-		NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
-		netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
+		goto errout;
 	}
+
+	err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_NEIGH, err);
 }
 
-static void neigh_app_notify(struct neighbour *n)
+void neigh_app_ns(struct neighbour *n)
 {
-	struct sk_buff *skb;
-
-	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
-	if (skb == NULL)
-		return;
+	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+}
 
-	if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) <= 0)
-		kfree_skb(skb);
-	else {
-		NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
-		netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
-	}
+static void neigh_app_notify(struct neighbour *n)
+{
+	__neigh_notify(n, RTM_NEWNEIGH, 0);
 }
 
 #endif /* CONFIG_ARPD */
-- 
GitLab


From dc738dd83e88c3c5de55431f8cfb758de5d4df48 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:33:35 -0700
Subject: [PATCH 0473/1063] [DECNET]: Convert DECnet notifications to use
 rtnl_notify()

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_dev.c   | 25 ++++++++++++++-----------
 net/decnet/dn_table.c | 28 ++++++++++++++--------------
 2 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 88ea7a13bb242..01861feb608db 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -746,20 +746,23 @@ static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
 static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+	int payload = sizeof(struct ifaddrmsg) + 128;
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_KERNEL);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS);
-		return;
-	}
-	if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
+	skb = alloc_skb(nlmsg_total_size(payload), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err);
 }
 
 static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 10e87262b6fb9..317904bb58964 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -333,24 +333,24 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
 {
         struct sk_buff *skb;
         u32 pid = req ? req->pid : 0;
-        int size = NLMSG_SPACE(sizeof(struct rtmsg) + 256);
+	int err = -ENOBUFS;
 
-        skb = alloc_skb(size, GFP_KERNEL);
-        if (!skb)
-                return;
+        skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+        if (skb == NULL)
+		goto errout;
 
-        if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, 
-                                f->fn_type, f->fn_scope, &f->fn_key, z, 
-                                DN_FIB_INFO(f), 0) < 0) {
+        err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
+			       f->fn_type, f->fn_scope, &f->fn_key, z,
+			       DN_FIB_INFO(f), 0);
+	if (err < 0) {
                 kfree_skb(skb);
-                return;
+		goto errout;
         }
-        NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE;
-        if (nlh->nlmsg_flags & NLM_F_ECHO)
-                atomic_inc(&skb->users);
-        netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL);
-        if (nlh->nlmsg_flags & NLM_F_ECHO)
-                netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+
+	err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err);
 }
 
 static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, 
-- 
GitLab


From d6062cbbd1f5e92c94e5eae9ef1a280ed48d56d5 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:33:59 -0700
Subject: [PATCH 0474/1063] [IPv4] address: Convert address notification to use
 rtnl_notify()

Adds support for NLM_F_ECHO allowing applications to easly
see which address have been deleted, added, or promoted.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 53 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 398e7b9ca66b4..0487677729cf1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -88,7 +88,7 @@ static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
 	[IFA_LABEL]     	= { .type = NLA_STRING },
 };
 
-static void rtmsg_ifa(int event, struct in_ifaddr *);
+static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 
 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
@@ -239,8 +239,8 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
 	return 0;
 }
 
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
-			 int destroy)
+static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+			 int destroy, struct nlmsghdr *nlh, u32 pid)
 {
 	struct in_ifaddr *promote = NULL;
 	struct in_ifaddr *ifa, *ifa1 = *ifap;
@@ -273,7 +273,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			if (!do_promote) {
 				*ifap1 = ifa->ifa_next;
 
-				rtmsg_ifa(RTM_DELADDR, ifa);
+				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
 				blocking_notifier_call_chain(&inetaddr_chain,
 						NETDEV_DOWN, ifa);
 				inet_free_ifa(ifa);
@@ -298,7 +298,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	   is valid, it will try to restore deleted routes... Grr.
 	   So that, this order is correct.
 	 */
-	rtmsg_ifa(RTM_DELADDR, ifa1);
+	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 
 	if (promote) {
@@ -310,7 +310,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 		}
 
 		promote->ifa_flags &= ~IFA_F_SECONDARY;
-		rtmsg_ifa(RTM_NEWADDR, promote);
+		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
 		blocking_notifier_call_chain(&inetaddr_chain,
 				NETDEV_UP, promote);
 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
@@ -329,7 +329,14 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	}
 }
 
-static int inet_insert_ifa(struct in_ifaddr *ifa)
+static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+			 int destroy)
+{
+	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
+}
+
+static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
+			     u32 pid)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -374,12 +381,17 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
 	/* Send message first, then call notifier.
 	   Notifier will trigger FIB update, so that
 	   listeners of netlink will know about new ifaddr */
-	rtmsg_ifa(RTM_NEWADDR, ifa);
+	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 
 	return 0;
 }
 
+static int inet_insert_ifa(struct in_ifaddr *ifa)
+{
+	return __inet_insert_ifa(ifa, NULL, 0);
+}
+
 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 {
 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@ -466,7 +478,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
 		    !inet_ifa_match(nla_get_u32(tb[IFA_ADDRESS]), ifa)))
 			continue;
 
-		inet_del_ifa(in_dev, ifap, 1);
+		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
 		return 0;
 	}
 
@@ -558,7 +570,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
 	if (IS_ERR(ifa))
 		return PTR_ERR(ifa);
 
-	return inet_insert_ifa(ifa);
+	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
 }
 
 /*
@@ -1189,18 +1201,27 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
+static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
+		      u32 pid)
 {
 	struct sk_buff *skb;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	int err = -ENOBUFS;
 
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS);
-	else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
+		goto errout;
+
+	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL);
-	} else
-		netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL);
+		goto errout;
+	}
+
+	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
 }
 
 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
-- 
GitLab


From f21c7bc5f6a0a5bd03988886ff46656bc3f255b7 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:34:17 -0700
Subject: [PATCH 0475/1063] [IPv4] route: Convert route notifications to use
 rtnl_notify()

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ab753df20a39a..5dfdad5cbcd43 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -33,7 +33,6 @@
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
-#include <linux/netlink.h>
 #include <linux/init.h>
 
 #include <net/arp.h>
@@ -44,6 +43,7 @@
 #include <net/sock.h>
 #include <net/ip_fib.h>
 #include <net/ip_mp_alg.h>
+#include <net/netlink.h>
 
 #include "fib_lookup.h"
 
@@ -278,25 +278,25 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
 {
 	struct sk_buff *skb;
 	u32 pid = req ? req->pid : n->nlmsg_pid;
-	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
+	int payload = sizeof(struct rtmsg) + 256;
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_KERNEL);
-	if (!skb)
-		return;
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
 
-	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
-			  fa->fa_type, fa->fa_scope, &key, z,
-			  fa->fa_tos,
-			  fa->fa_info, 0) < 0) {
+	err = fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
+			    fa->fa_type, fa->fa_scope, &key, z, fa->fa_tos,
+			    fa->fa_info, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
-	if (n->nlmsg_flags&NLM_F_ECHO)
-		atomic_inc(&skb->users);
-	netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
-	if (n->nlmsg_flags&NLM_F_ECHO)
-		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+
+	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_ROUTE, n, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
 }
 
 /* Return the first fib alias matching TOS with
-- 
GitLab


From 5d620266431c03d1dac66287367c6da26c64a069 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:35:02 -0700
Subject: [PATCH 0476/1063] [IPv6] address: Convert address notification to use
 rtnl_notify()

Fixes a wrong use of current->pid as netlink pid.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4f991a2234d0b..81e9ef14676f8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -73,6 +73,7 @@
 #include <net/addrconf.h>
 #include <net/tcp.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <linux/if_tunnel.h>
 #include <linux/rtnetlink.h>
 
@@ -3280,20 +3281,23 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb,
 static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE);
+	int payload = sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE;
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS);
-		return;
-	}
-	if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
 }
 
 static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
-- 
GitLab


From 21713ebc4f119950e87d21c4637d5a750eea20e8 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:35:24 -0700
Subject: [PATCH 0477/1063] [IPv6] route: Convert route notifications to use
 rtnl_notify()

Fixes a wrong use of current->pid as netlink pid.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 024c8e26c2ecc..1aca787ead85b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -35,7 +35,6 @@
 #include <linux/netdevice.h>
 #include <linux/in6.h>
 #include <linux/init.h>
-#include <linux/netlink.h>
 #include <linux/if_arp.h>
 
 #ifdef 	CONFIG_PROC_FS
@@ -54,6 +53,7 @@
 #include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/netevent.h>
+#include <net/netlink.h>
 
 #include <asm/uaccess.h>
 
@@ -2056,27 +2056,25 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
 			struct netlink_skb_parms *req)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
-	u32 pid = current->pid;
-	u32 seq = 0;
-
-	if (req)
-		pid = req->pid;
-	if (nlh)
-		seq = nlh->nlmsg_seq;
-	
-	skb = alloc_skb(size, gfp_any());
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
-		return;
-	}
-	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
+	u32 pid = req ? req->pid : 0;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	int payload = sizeof(struct rtmsg) + 256;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
+	if (skb == NULL)
+		goto errout;
+
+	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
+
+	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
 }
 
 /*
-- 
GitLab


From 8d7a76c9b17866f426fcbb531c81af7a1f53e071 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:35:47 -0700
Subject: [PATCH 0478/1063] [IPv6] link: Convert link notifications to use
 rtnl_notify()

Fixes a wrong use of current->pid as netlink pid.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 81e9ef14676f8..2a3be0f1c5169 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3438,20 +3438,23 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE);
+	int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE;
+	int err = -ENOBUFS;
 	
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS);
-		return;
-	}
-	if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
 }
 
 /* Maximum length of prefix_cacheinfo attributes */
-- 
GitLab


From 8c384bfa36b1dbeba8154da20d49167ce3e275c4 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:36:07 -0700
Subject: [PATCH 0479/1063] [IPv6] prefix: Convert prefix notifications to use
 rtnl_notify()

Fixes a wrong use of current->pid as netlink pid.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2a3be0f1c5169..4af741ef8d6b3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3506,20 +3506,23 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 			 struct prefix_info *pinfo)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE);
+	int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE;
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS);
-		return;
-	}
-	if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
 }
 
 static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
-- 
GitLab


From 280a306c539389156477cc9c07028d43fe4fbf86 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:36:28 -0700
Subject: [PATCH 0480/1063] [BRIDGE]: Convert notifications to use
 rtnl_notify()

Fixes a wrong use of current->pid as netlink pid.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 53086fb750893..8f661195d09d8 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -12,6 +12,7 @@
 
 #include <linux/kernel.h>
 #include <linux/rtnetlink.h>
+#include <net/netlink.h>
 #include "br_private.h"
 
 /*
@@ -76,26 +77,24 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
 void br_ifinfo_notify(int event, struct net_bridge_port *port)
 {
 	struct sk_buff *skb;
-	int err = -ENOMEM;
+	int payload = sizeof(struct ifinfomsg) + 128;
+	int err = -ENOBUFS;
 
 	pr_debug("bridge notify event=%d\n", event);
-	skb = alloc_skb(NLMSG_SPACE(sizeof(struct ifinfomsg) + 128),
-			GFP_ATOMIC);
-	if (!skb)
-		goto err_out;
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = br_fill_ifinfo(skb, port, 0, 0, event, 0);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto errout;
+	}
 
-	err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0);
+	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+errout:
 	if (err < 0)
-		goto err_kfree;
-
-	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
-	return;
-
-err_kfree:
-	kfree_skb(skb);
-err_out:
-	netlink_set_err(rtnl, 0, RTNLGRP_LINK, err);
+		rtnl_set_sk_err(RTNLGRP_LINK, err);
 }
 
 /*
-- 
GitLab


From bd5785ba3ac1c89aa4c351ceb2acd96686424d8c Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:36:49 -0700
Subject: [PATCH 0481/1063] [WIRELESS]: Convert notifications to use
 rtnl_notify()

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/wireless.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/wireless.c b/net/core/wireless.c
index 348b9da73cc4f..3168fca312f7c 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -85,6 +85,7 @@
 
 #include <linux/wireless.h>		/* Pretty obvious */
 #include <net/iw_handler.h>		/* New driver API */
+#include <net/netlink.h>
 
 #include <asm/uaccess.h>		/* copy_to_user() */
 
@@ -1849,7 +1850,7 @@ static void wireless_nlevent_process(unsigned long data)
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&wireless_nlevent_queue)))
-		netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
+		rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
 }
 
 static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
-- 
GitLab


From 0ec6d3f467faeec5dd3b617959eb90e9d520113d Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:37:09 -0700
Subject: [PATCH 0482/1063] [NET] link: Convert notifications to use
 rtnl_notify()

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2b1af17e63898..f5300b5dd0fdc 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -630,20 +630,22 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifinfomsg) +
-			       sizeof(struct rtnl_link_ifmap) +
-			       sizeof(struct rtnl_link_stats) + 128);
+	int err = -ENOBUFS;
 
-	skb = nlmsg_new(size, GFP_KERNEL);
-	if (!skb)
-		return;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
 
-	if (rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0) < 0) {
+	err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_LINK, err);
 }
 
 /* Protected by RTNL sempahore.  */
-- 
GitLab


From 56fc85ac961e2c20dcb5ef07e2628b3f93de2e49 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 15 Aug 2006 00:37:29 -0700
Subject: [PATCH 0483/1063] [RTNETLINK]: Unexport rtnl socket

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 2 --
 net/core/rtnetlink.c      | 4 +---
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index ecbe0349060c7..9c92dc8b9a082 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -574,8 +574,6 @@ extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, in
 #define rtattr_parse_nested(tb, max, rta) \
 	rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
 
-extern struct sock *rtnl;
-
 struct rtnetlink_link
 {
 	int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f5300b5dd0fdc..dfc58269240a6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -58,6 +58,7 @@
 #endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
 static DEFINE_MUTEX(rtnl_mutex);
+static struct sock *rtnl;
 
 void rtnl_lock(void)
 {
@@ -95,8 +96,6 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
 	return 0;
 }
 
-struct sock *rtnl;
-
 struct rtnetlink_link * rtnetlink_links[NPROTO];
 
 static const int rtm_min[RTM_NR_FAMILIES] =
@@ -842,7 +841,6 @@ EXPORT_SYMBOL(rtattr_strlcpy);
 EXPORT_SYMBOL(rtattr_parse);
 EXPORT_SYMBOL(rtnetlink_links);
 EXPORT_SYMBOL(rtnetlink_put_metrics);
-EXPORT_SYMBOL(rtnl);
 EXPORT_SYMBOL(rtnl_lock);
 EXPORT_SYMBOL(rtnl_trylock);
 EXPORT_SYMBOL(rtnl_unlock);
-- 
GitLab


From ab32ea5d8a760e7dd4339634e95d7be24ee5b842 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Fri, 22 Sep 2006 14:15:41 -0700
Subject: [PATCH 0484/1063] [NET/IPV4/IPV6]: Change some sysctl variables to
 __read_mostly

Change net/core, ipv4 and ipv6 sysctl variables to __read_mostly.

Couldn't actually measure any performance increase while testing (.3%
I consider noise), but seems like the right thing to do.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c     |  2 +-
 net/core/sock.c          | 10 +++++-----
 net/ipv4/af_inet.c       |  4 ++--
 net/ipv4/icmp.c          | 12 ++++++------
 net/ipv4/igmp.c          |  4 ++--
 net/ipv4/ip_fragment.c   | 10 +++++-----
 net/ipv4/ip_output.c     |  2 +-
 net/ipv4/tcp.c           |  2 +-
 net/ipv4/tcp_input.c     | 36 ++++++++++++++++++------------------
 net/ipv4/tcp_ipv4.c      |  4 ++--
 net/ipv4/tcp_minisocks.c |  4 ++--
 net/ipv4/tcp_output.c    | 12 ++++++------
 net/ipv4/tcp_timer.c     | 16 ++++++++--------
 net/ipv6/addrconf.c      |  6 +++---
 net/ipv6/af_inet6.c      |  2 +-
 net/ipv6/icmp.c          |  2 +-
 net/ipv6/mcast.c         |  2 +-
 net/ipv6/reassembly.c    |  8 ++++----
 18 files changed, 69 insertions(+), 69 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 23ae5e5426db8..c7e653ff5ed03 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2451,7 +2451,7 @@ static struct neigh_sysctl_table {
 	ctl_table		neigh_neigh_dir[2];
 	ctl_table		neigh_proto_dir[2];
 	ctl_table		neigh_root_dir[2];
-} neigh_sysctl_template = {
+} neigh_sysctl_template __read_mostly = {
 	.neigh_vars = {
 		{
 			.ctl_name	= NET_NEIGH_MCAST_SOLICIT,
diff --git a/net/core/sock.c b/net/core/sock.c
index b67d868649cdb..cfaf09039b023 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -187,13 +187,13 @@ static struct lock_class_key af_callback_keys[AF_MAX];
 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 
 /* Run time adjustable parameters. */
-__u32 sysctl_wmem_max = SK_WMEM_MAX;
-__u32 sysctl_rmem_max = SK_RMEM_MAX;
-__u32 sysctl_wmem_default = SK_WMEM_MAX;
-__u32 sysctl_rmem_default = SK_RMEM_MAX;
+__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
+__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 
 /* Maximal space eaten by iovec or ancilliary data plus some space */
-int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
+int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 36c38bffb4748..f2e8927f45960 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -391,7 +391,7 @@ int inet_release(struct socket *sock)
 }
 
 /* It is off by default, see below. */
-int sysctl_ip_nonlocal_bind;
+int sysctl_ip_nonlocal_bind __read_mostly;
 
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
@@ -987,7 +987,7 @@ void inet_unregister_protosw(struct inet_protosw *p)
  *      Shall we try to damage output packets if routing dev changes?
  */
 
-int sysctl_ip_dynaddr;
+int sysctl_ip_dynaddr __read_mostly;
 
 static int inet_sk_reselect_saddr(struct sock *sk)
 {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 6d223e5c67410..c2ad07e48ab4f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -187,11 +187,11 @@ struct icmp_err icmp_err_convert[] = {
 };
 
 /* Control parameters for ECHO replies. */
-int sysctl_icmp_echo_ignore_all;
-int sysctl_icmp_echo_ignore_broadcasts = 1;
+int sysctl_icmp_echo_ignore_all __read_mostly;
+int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
 
 /* Control parameter - ignore bogus broadcast responses? */
-int sysctl_icmp_ignore_bogus_error_responses = 1;
+int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
 
 /*
  * 	Configurable global rate limit.
@@ -205,9 +205,9 @@ int sysctl_icmp_ignore_bogus_error_responses = 1;
  *	time exceeded (11), parameter problem (12)
  */
 
-int sysctl_icmp_ratelimit = 1 * HZ;
-int sysctl_icmp_ratemask = 0x1818;
-int sysctl_icmp_errors_use_inbound_ifaddr;
+int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
+int sysctl_icmp_ratemask __read_mostly = 0x1818;
+int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
 
 /*
  *	ICMP control array. This specifies what to do with each ICMP.
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7003e763d9700..58be8227b0cb9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1397,8 +1397,8 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
 /*
  *	Join a socket to a group
  */
-int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS;
-int sysctl_igmp_max_msf = IP_MAX_MSF;
+int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
+int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
 
 
 static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8d7f107c2eefd..165d72859ddf4 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -54,15 +54,15 @@
  * even the most extreme cases without allowing an attacker to measurably
  * harm machine performance.
  */
-int sysctl_ipfrag_high_thresh = 256*1024;
-int sysctl_ipfrag_low_thresh = 192*1024;
+int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
+int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
 
-int sysctl_ipfrag_max_dist = 64;
+int sysctl_ipfrag_max_dist __read_mostly = 64;
 
 /* Important NOTE! Fragment queue must be destroyed before MSL expires.
  * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
  */
-int sysctl_ipfrag_time = IP_FRAG_TIME;
+int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
 
 struct ipfrag_skb_cb
 {
@@ -130,7 +130,7 @@ static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot)
 }
 
 static struct timer_list ipfrag_secret_timer;
-int sysctl_ipfrag_secret_interval = 10 * 60 * HZ;
+int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
 
 static void ipfrag_secret_rebuild(unsigned long dummy)
 {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1b9b6742ef772..81b2795a4c205 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,7 +83,7 @@
 #include <linux/netlink.h>
 #include <linux/tcp.h>
 
-int sysctl_ip_default_ttl = IPDEFTTL;
+int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
 
 /* Generate a checksum for an outgoing IP datagram. */
 __inline__ void ip_send_check(struct iphdr *iph)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b0124e69ab383..e570db4d33c84 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -268,7 +268,7 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
-int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 
 DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 159fa3f1ba677..caf3c41dcc8c7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,24 +72,24 @@
 #include <asm/unaligned.h>
 #include <net/netdma.h>
 
-int sysctl_tcp_timestamps = 1;
-int sysctl_tcp_window_scaling = 1;
-int sysctl_tcp_sack = 1;
-int sysctl_tcp_fack = 1;
-int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
-int sysctl_tcp_ecn;
-int sysctl_tcp_dsack = 1;
-int sysctl_tcp_app_win = 31;
-int sysctl_tcp_adv_win_scale = 2;
-
-int sysctl_tcp_stdurg;
-int sysctl_tcp_rfc1337;
-int sysctl_tcp_max_orphans = NR_FILE;
-int sysctl_tcp_frto;
-int sysctl_tcp_nometrics_save;
-
-int sysctl_tcp_moderate_rcvbuf = 1;
-int sysctl_tcp_abc;
+int sysctl_tcp_timestamps __read_mostly = 1;
+int sysctl_tcp_window_scaling __read_mostly = 1;
+int sysctl_tcp_sack __read_mostly = 1;
+int sysctl_tcp_fack __read_mostly = 1;
+int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+int sysctl_tcp_ecn __read_mostly;
+int sysctl_tcp_dsack __read_mostly = 1;
+int sysctl_tcp_app_win __read_mostly = 31;
+int sysctl_tcp_adv_win_scale __read_mostly = 2;
+
+int sysctl_tcp_stdurg __read_mostly;
+int sysctl_tcp_rfc1337 __read_mostly;
+int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
+int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_nometrics_save __read_mostly;
+
+int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
+int sysctl_tcp_abc __read_mostly;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2973dee0a489e..23b46e36b1471 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -78,8 +78,8 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
-int sysctl_tcp_tw_reuse;
-int sysctl_tcp_low_latency;
+int sysctl_tcp_tw_reuse __read_mostly;
+int sysctl_tcp_low_latency __read_mostly;
 
 /* Check TCP sequence numbers in ICMP packets. */
 #define ICMP_MIN_LENGTH 8
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 624e2b2c7f53a..0163d98269074 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -34,8 +34,8 @@
 #define SYNC_INIT 1
 #endif
 
-int sysctl_tcp_syncookies = SYNC_INIT; 
-int sysctl_tcp_abort_on_overflow;
+int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
+int sysctl_tcp_abort_on_overflow __read_mostly;
 
 struct inet_timewait_death_row tcp_death_row = {
 	.sysctl_max_tw_buckets = NR_FILE * 2,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9252a50c4b490..061edfae0c29d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -43,24 +43,24 @@
 #include <linux/smp_lock.h>
 
 /* People can turn this off for buggy TCP's found in printers etc. */
-int sysctl_tcp_retrans_collapse = 1;
+int sysctl_tcp_retrans_collapse __read_mostly = 1;
 
 /* People can turn this on to  work with those rare, broken TCPs that
  * interpret the window field as a signed quantity.
  */
-int sysctl_tcp_workaround_signed_windows = 0;
+int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
 
 /* This limits the percentage of the congestion window which we
  * will allow a single TSO frame to consume.  Building TSO frames
  * which are too large can cause TCP streams to be bursty.
  */
-int sysctl_tcp_tso_win_divisor = 3;
+int sysctl_tcp_tso_win_divisor __read_mostly = 3;
 
-int sysctl_tcp_mtu_probing = 0;
-int sysctl_tcp_base_mss = 512;
+int sysctl_tcp_mtu_probing __read_mostly = 0;
+int sysctl_tcp_base_mss __read_mostly = 512;
 
 /* By default, RFC2861 behavior.  */
-int sysctl_tcp_slow_start_after_idle = 1;
+int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
 static void update_send_head(struct sock *sk, struct tcp_sock *tp,
 			     struct sk_buff *skb)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7c1bde3cd6cb7..fb09ade5897b3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -23,14 +23,14 @@
 #include <linux/module.h>
 #include <net/tcp.h>
 
-int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
-int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
-int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
-int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
-int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
-int sysctl_tcp_retries1 = TCP_RETR1;
-int sysctl_tcp_retries2 = TCP_RETR2;
-int sysctl_tcp_orphan_retries;
+int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
+int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
+int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME;
+int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES;
+int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
+int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
+int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
+int sysctl_tcp_orphan_retries __read_mostly;
 
 static void tcp_write_timer(unsigned long);
 static void tcp_delack_timer(unsigned long);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4af741ef8d6b3..f1ede90048870 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -146,7 +146,7 @@ static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *de
 
 static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
 
-struct ipv6_devconf ipv6_devconf = {
+struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -177,7 +177,7 @@ struct ipv6_devconf ipv6_devconf = {
 #endif
 };
 
-static struct ipv6_devconf ipv6_devconf_dflt = {
+static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -3665,7 +3665,7 @@ static struct addrconf_sysctl_table
 	ctl_table addrconf_conf_dir[2];
 	ctl_table addrconf_proto_dir[2];
 	ctl_table addrconf_root_dir[2];
-} addrconf_sysctl = {
+} addrconf_sysctl __read_mostly = {
 	.sysctl_header = NULL,
 	.addrconf_vars = {
         	{
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 82a1b1a328dbf..2ff600cfe3a47 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -67,7 +67,7 @@ MODULE_AUTHOR("Cast of dozens");
 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
 MODULE_LICENSE("GPL");
 
-int sysctl_ipv6_bindv6only;
+int sysctl_ipv6_bindv6only __read_mostly;
 
 /* The inetsw table contains everything that inet_create needs to
  * build a new socket.
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1030551076741..e3a8e27af950c 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -151,7 +151,7 @@ static int is_ineligible(struct sk_buff *skb)
 	return 0;
 }
 
-static int sysctl_icmpv6_time = 1*HZ; 
+static int sysctl_icmpv6_time __read_mostly = 1*HZ;
 
 /* 
  * Check the ICMP output rate limit 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 639eb20c9f1fd..3b114e3fa2f8d 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -171,7 +171,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 
 #define IPV6_MLD_MAX_MSF	64
 
-int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
+int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
 
 /*
  *	socket join on multicast group
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index a8623d2b0879c..f39bbedd1327c 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -53,10 +53,10 @@
 #include <net/ndisc.h>
 #include <net/addrconf.h>
 
-int sysctl_ip6frag_high_thresh = 256*1024;
-int sysctl_ip6frag_low_thresh = 192*1024;
+int sysctl_ip6frag_high_thresh __read_mostly = 256*1024;
+int sysctl_ip6frag_low_thresh __read_mostly = 192*1024;
 
-int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT;
+int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT;
 
 struct ip6frag_skb_cb
 {
@@ -152,7 +152,7 @@ static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
 }
 
 static struct timer_list ip6_frag_secret_timer;
-int sysctl_ip6frag_secret_interval = 10 * 60 * HZ;
+int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ;
 
 static void ip6_frag_secret_rebuild(unsigned long dummy)
 {
-- 
GitLab


From 4e902c57417c4c285b98ba2722468d1c3ed83d1b Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 17 Aug 2006 18:14:52 -0700
Subject: [PATCH 0485/1063] [IPv4]: FIB configuration using struct fib_config

Introduces struct fib_config replacing the ugly struct kern_rta
prone to ordering issues. Avoids creating faked netlink messages
for auto generated routes or requests via ioctl.

A new interface net/nexthop.h is added to help navigate through
nexthop configuration arrays.

A new struct nl_info will be used to carry the necessary netlink
information to be used for notifications later on.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h     |  55 +++---
 include/net/netlink.h    |  10 +
 include/net/nexthop.h    |  33 ++++
 net/ipv4/fib_frontend.c  | 364 +++++++++++++++++++++++++++++-------
 net/ipv4/fib_hash.c      |  94 +++++-----
 net/ipv4/fib_lookup.h    |  11 +-
 net/ipv4/fib_semantics.c | 385 ++++++++++++---------------------------
 net/ipv4/fib_trie.c      |  76 ++++----
 8 files changed, 560 insertions(+), 468 deletions(-)
 create mode 100644 include/net/nexthop.h

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 8e9ba563d3422..42ed96fab3f58 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -20,25 +20,32 @@
 #include <linux/seq_file.h>
 #include <net/fib_rules.h>
 
-/* WARNING: The ordering of these elements must match ordering
- *          of RTA_* rtnetlink attribute numbers.
- */
-struct kern_rta {
-	void		*rta_dst;
-	void		*rta_src;
-	int		*rta_iif;
-	int		*rta_oif;
-	void		*rta_gw;
-	u32		*rta_priority;
-	void		*rta_prefsrc;
-	struct rtattr	*rta_mx;
-	struct rtattr	*rta_mp;
-	unsigned char	*rta_protoinfo;
-	u32		*rta_flow;
-	struct rta_cacheinfo *rta_ci;
-	struct rta_session *rta_sess;
-	u32		*rta_mp_alg;
-};
+struct fib_config {
+	u8			fc_family;
+	u8			fc_dst_len;
+	u8			fc_src_len;
+	u8			fc_tos;
+	u8			fc_protocol;
+	u8			fc_scope;
+	u8			fc_type;
+	/* 1 byte unused */
+	u32			fc_table;
+	u32			fc_dst;
+	u32			fc_src;
+	u32			fc_gw;
+	int			fc_oif;
+	u32			fc_flags;
+	u32			fc_priority;
+	u32			fc_prefsrc;
+	struct nlattr		*fc_mx;
+	struct rtnexthop	*fc_mp;
+	int			fc_mx_len;
+	int			fc_mp_len;
+	u32			fc_flow;
+	u32			fc_mp_alg;
+	u32			fc_nlflags;
+	struct nl_info		fc_nlinfo;
+ };
 
 struct fib_info;
 
@@ -154,12 +161,8 @@ struct fib_table {
 	u32		tb_id;
 	unsigned	tb_stamp;
 	int		(*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res);
-	int		(*tb_insert)(struct fib_table *table, struct rtmsg *r,
-				     struct kern_rta *rta, struct nlmsghdr *n,
-				     struct netlink_skb_parms *req);
-	int		(*tb_delete)(struct fib_table *table, struct rtmsg *r,
-				     struct kern_rta *rta, struct nlmsghdr *n,
-				     struct netlink_skb_parms *req);
+	int		(*tb_insert)(struct fib_table *, struct fib_config *);
+	int		(*tb_delete)(struct fib_table *, struct fib_config *);
 	int		(*tb_dump)(struct fib_table *table, struct sk_buff *skb,
 				     struct netlink_callback *cb);
 	int		(*tb_flush)(struct fib_table *table);
@@ -228,8 +231,6 @@ struct rtentry;
 extern int ip_fib_check_default(u32 gw, struct net_device *dev);
 extern int fib_sync_down(u32 local, struct net_device *dev, int force);
 extern int fib_sync_up(struct net_device *dev);
-extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
-			       struct kern_rta *rta, struct rtentry *r);
 extern u32  __fib_res_prefsrc(struct fib_result *res);
 
 /* Exported by fib_hash.c */
diff --git a/include/net/netlink.h b/include/net/netlink.h
index bf593eb59e1bc..47044da167c50 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -192,6 +192,16 @@ struct nla_policy {
 	u16		minlen;
 };
 
+/**
+ * struct nl_info - netlink source information
+ * @nlh: Netlink message header of original request
+ * @pid: Netlink PID of requesting application
+ */
+struct nl_info {
+	struct nlmsghdr		*nlh;
+	u32			pid;
+};
+
 extern void		netlink_run_queue(struct sock *sk, unsigned int *qlen,
 					  int (*cb)(struct sk_buff *,
 						    struct nlmsghdr *, int *));
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
new file mode 100644
index 0000000000000..3334dbfa5aa4b
--- /dev/null
+++ b/include/net/nexthop.h
@@ -0,0 +1,33 @@
+#ifndef __NET_NEXTHOP_H
+#define __NET_NEXTHOP_H
+
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+
+static inline int rtnh_ok(const struct rtnexthop *rtnh, int remaining)
+{
+	return remaining >= sizeof(*rtnh) &&
+	       rtnh->rtnh_len >= sizeof(*rtnh) &&
+	       rtnh->rtnh_len <= remaining;
+}
+
+static inline struct rtnexthop *rtnh_next(const struct rtnexthop *rtnh,
+                                         int *remaining)
+{
+	int totlen = NLA_ALIGN(rtnh->rtnh_len);
+
+	*remaining -= totlen;
+	return (struct rtnexthop *) ((char *) rtnh + totlen);
+}
+
+static inline struct nlattr *rtnh_attrs(const struct rtnexthop *rtnh)
+{
+	return (struct nlattr *) ((char *) rtnh + NLA_ALIGN(sizeof(*rtnh)));
+}
+
+static inline int rtnh_attrlen(const struct rtnexthop *rtnh)
+{
+	return rtnh->rtnh_len - NLA_ALIGN(sizeof(*rtnh));
+}
+
+#endif
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ad4c14f968a13..acc18bdf2dee5 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -253,42 +253,190 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
 
 #ifndef CONFIG_IP_NOSIOCRT
 
+static inline u32 sk_extract_addr(struct sockaddr *addr)
+{
+	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+}
+
+static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
+{
+	struct nlattr *nla;
+
+	nla = (struct nlattr *) ((char *) mx + len);
+	nla->nla_type = type;
+	nla->nla_len = nla_attr_size(4);
+	*(u32 *) nla_data(nla) = value;
+
+	return len + nla_total_size(4);
+}
+
+static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
+				 struct fib_config *cfg)
+{
+	u32 addr;
+	int plen;
+
+	memset(cfg, 0, sizeof(*cfg));
+
+	if (rt->rt_dst.sa_family != AF_INET)
+		return -EAFNOSUPPORT;
+
+	/*
+	 * Check mask for validity:
+	 * a) it must be contiguous.
+	 * b) destination must have all host bits clear.
+	 * c) if application forgot to set correct family (AF_INET),
+	 *    reject request unless it is absolutely clear i.e.
+	 *    both family and mask are zero.
+	 */
+	plen = 32;
+	addr = sk_extract_addr(&rt->rt_dst);
+	if (!(rt->rt_flags & RTF_HOST)) {
+		u32 mask = sk_extract_addr(&rt->rt_genmask);
+
+		if (rt->rt_genmask.sa_family != AF_INET) {
+			if (mask || rt->rt_genmask.sa_family)
+				return -EAFNOSUPPORT;
+		}
+
+		if (bad_mask(mask, addr))
+			return -EINVAL;
+
+		plen = inet_mask_len(mask);
+	}
+
+	cfg->fc_dst_len = plen;
+	cfg->fc_dst = addr;
+
+	if (cmd != SIOCDELRT) {
+		cfg->fc_nlflags = NLM_F_CREATE;
+		cfg->fc_protocol = RTPROT_BOOT;
+	}
+
+	if (rt->rt_metric)
+		cfg->fc_priority = rt->rt_metric - 1;
+
+	if (rt->rt_flags & RTF_REJECT) {
+		cfg->fc_scope = RT_SCOPE_HOST;
+		cfg->fc_type = RTN_UNREACHABLE;
+		return 0;
+	}
+
+	cfg->fc_scope = RT_SCOPE_NOWHERE;
+	cfg->fc_type = RTN_UNICAST;
+
+	if (rt->rt_dev) {
+		char *colon;
+		struct net_device *dev;
+		char devname[IFNAMSIZ];
+
+		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
+			return -EFAULT;
+
+		devname[IFNAMSIZ-1] = 0;
+		colon = strchr(devname, ':');
+		if (colon)
+			*colon = 0;
+		dev = __dev_get_by_name(devname);
+		if (!dev)
+			return -ENODEV;
+		cfg->fc_oif = dev->ifindex;
+		if (colon) {
+			struct in_ifaddr *ifa;
+			struct in_device *in_dev = __in_dev_get_rtnl(dev);
+			if (!in_dev)
+				return -ENODEV;
+			*colon = ':';
+			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
+				if (strcmp(ifa->ifa_label, devname) == 0)
+					break;
+			if (ifa == NULL)
+				return -ENODEV;
+			cfg->fc_prefsrc = ifa->ifa_local;
+		}
+	}
+
+	addr = sk_extract_addr(&rt->rt_gateway);
+	if (rt->rt_gateway.sa_family == AF_INET && addr) {
+		cfg->fc_gw = addr;
+		if (rt->rt_flags & RTF_GATEWAY &&
+		    inet_addr_type(addr) == RTN_UNICAST)
+			cfg->fc_scope = RT_SCOPE_UNIVERSE;
+	}
+
+	if (cmd == SIOCDELRT)
+		return 0;
+
+	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
+		return -EINVAL;
+
+	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
+		cfg->fc_scope = RT_SCOPE_LINK;
+
+	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
+		struct nlattr *mx;
+		int len = 0;
+
+		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
+ 		if (mx == NULL)
+			return -ENOMEM;
+
+		if (rt->rt_flags & RTF_MTU)
+			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
+
+		if (rt->rt_flags & RTF_WINDOW)
+			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
+
+		if (rt->rt_flags & RTF_IRTT)
+			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
+
+		cfg->fc_mx = mx;
+		cfg->fc_mx_len = len;
+	}
+
+	return 0;
+}
+
 /*
  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
  */
  
 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
 {
+	struct fib_config cfg;
+	struct rtentry rt;
 	int err;
-	struct kern_rta rta;
-	struct rtentry  r;
-	struct {
-		struct nlmsghdr nlh;
-		struct rtmsg	rtm;
-	} req;
 
 	switch (cmd) {
 	case SIOCADDRT:		/* Add a route */
 	case SIOCDELRT:		/* Delete a route */
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
-		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+
+		if (copy_from_user(&rt, arg, sizeof(rt)))
 			return -EFAULT;
+
 		rtnl_lock();
-		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
+		err = rtentry_to_fib_config(cmd, &rt, &cfg);
 		if (err == 0) {
+			struct fib_table *tb;
+
 			if (cmd == SIOCDELRT) {
-				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
-				err = -ESRCH;
+				tb = fib_get_table(cfg.fc_table);
 				if (tb)
-					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+					err = tb->tb_delete(tb, &cfg);
+				else
+					err = -ESRCH;
 			} else {
-				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
-				err = -ENOBUFS;
+				tb = fib_new_table(cfg.fc_table);
 				if (tb)
-					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+					err = tb->tb_insert(tb, &cfg);
+				else
+					err = -ENOBUFS;
 			}
-			kfree(rta.rta_mx);
+
+			/* allocated by rtentry_to_fib_config() */
+			kfree(cfg.fc_mx);
 		}
 		rtnl_unlock();
 		return err;
@@ -305,51 +453,134 @@ int ip_rt_ioctl(unsigned int cmd, void *arg)
 
 #endif
 
-static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
+static struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
+	[RTA_DST]		= { .type = NLA_U32 },
+	[RTA_SRC]		= { .type = NLA_U32 },
+	[RTA_IIF]		= { .type = NLA_U32 },
+	[RTA_OIF]		= { .type = NLA_U32 },
+	[RTA_GATEWAY]		= { .type = NLA_U32 },
+	[RTA_PRIORITY]		= { .type = NLA_U32 },
+	[RTA_PREFSRC]		= { .type = NLA_U32 },
+	[RTA_METRICS]		= { .type = NLA_NESTED },
+	[RTA_MULTIPATH]		= { .minlen = sizeof(struct rtnexthop) },
+	[RTA_PROTOINFO]		= { .type = NLA_U32 },
+	[RTA_FLOW]		= { .type = NLA_U32 },
+	[RTA_MP_ALGO]		= { .type = NLA_U32 },
+};
+
+static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+			     struct fib_config *cfg)
 {
-	int i;
-
-	for (i=1; i<=RTA_MAX; i++, rta++) {
-		struct rtattr *attr = *rta;
-		if (attr) {
-			if (RTA_PAYLOAD(attr) < 4)
-				return -EINVAL;
-			if (i != RTA_MULTIPATH && i != RTA_METRICS &&
-			    i != RTA_TABLE)
-				*rta = (struct rtattr*)RTA_DATA(attr);
+	struct nlattr *attr;
+	int err, remaining;
+	struct rtmsg *rtm;
+
+	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
+	if (err < 0)
+		goto errout;
+
+	memset(cfg, 0, sizeof(*cfg));
+
+	rtm = nlmsg_data(nlh);
+	cfg->fc_family = rtm->rtm_family;
+	cfg->fc_dst_len = rtm->rtm_dst_len;
+	cfg->fc_src_len = rtm->rtm_src_len;
+	cfg->fc_tos = rtm->rtm_tos;
+	cfg->fc_table = rtm->rtm_table;
+	cfg->fc_protocol = rtm->rtm_protocol;
+	cfg->fc_scope = rtm->rtm_scope;
+	cfg->fc_type = rtm->rtm_type;
+	cfg->fc_flags = rtm->rtm_flags;
+	cfg->fc_nlflags = nlh->nlmsg_flags;
+
+	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+	cfg->fc_nlinfo.nlh = nlh;
+
+	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
+		switch (attr->nla_type) {
+		case RTA_DST:
+			cfg->fc_dst = nla_get_u32(attr);
+			break;
+		case RTA_SRC:
+			cfg->fc_src = nla_get_u32(attr);
+			break;
+		case RTA_OIF:
+			cfg->fc_oif = nla_get_u32(attr);
+			break;
+		case RTA_GATEWAY:
+			cfg->fc_gw = nla_get_u32(attr);
+			break;
+		case RTA_PRIORITY:
+			cfg->fc_priority = nla_get_u32(attr);
+			break;
+		case RTA_PREFSRC:
+			cfg->fc_prefsrc = nla_get_u32(attr);
+			break;
+		case RTA_METRICS:
+			cfg->fc_mx = nla_data(attr);
+			cfg->fc_mx_len = nla_len(attr);
+			break;
+		case RTA_MULTIPATH:
+			cfg->fc_mp = nla_data(attr);
+			cfg->fc_mp_len = nla_len(attr);
+			break;
+		case RTA_FLOW:
+			cfg->fc_flow = nla_get_u32(attr);
+			break;
+		case RTA_MP_ALGO:
+			cfg->fc_mp_alg = nla_get_u32(attr);
+			break;
+		case RTA_TABLE:
+			cfg->fc_table = nla_get_u32(attr);
+			break;
 		}
 	}
+
 	return 0;
+errout:
+	return err;
 }
 
 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct fib_table * tb;
-	struct rtattr **rta = arg;
-	struct rtmsg *r = NLMSG_DATA(nlh);
+	struct fib_config cfg;
+	struct fib_table *tb;
+	int err;
 
-	if (inet_check_attr(r, rta))
-		return -EINVAL;
+	err = rtm_to_fib_config(skb, nlh, &cfg);
+	if (err < 0)
+		goto errout;
 
-	tb = fib_get_table(rtm_get_table(rta, r->rtm_table));
-	if (tb)
-		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
-	return -ESRCH;
+	tb = fib_get_table(cfg.fc_table);
+	if (tb == NULL) {
+		err = -ESRCH;
+		goto errout;
+	}
+
+	err = tb->tb_delete(tb, &cfg);
+errout:
+	return err;
 }
 
 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct fib_table * tb;
-	struct rtattr **rta = arg;
-	struct rtmsg *r = NLMSG_DATA(nlh);
+	struct fib_config cfg;
+	struct fib_table *tb;
+	int err;
 
-	if (inet_check_attr(r, rta))
-		return -EINVAL;
+	err = rtm_to_fib_config(skb, nlh, &cfg);
+	if (err < 0)
+		goto errout;
 
-	tb = fib_new_table(rtm_get_table(rta, r->rtm_table));
-	if (tb)
-		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
-	return -ENOBUFS;
+	tb = fib_new_table(cfg.fc_table);
+	if (tb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
+
+	err = tb->tb_insert(tb, &cfg);
+errout:
+	return err;
 }
 
 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
@@ -396,17 +627,19 @@ int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
    only when netlink is already locked.
  */
 
-static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
+static void fib_magic(int cmd, int type, u32 dst, int dst_len,
+		      struct in_ifaddr *ifa)
 {
-	struct fib_table * tb;
-	struct {
-		struct nlmsghdr	nlh;
-		struct rtmsg	rtm;
-	} req;
-	struct kern_rta rta;
-
-	memset(&req.rtm, 0, sizeof(req.rtm));
-	memset(&rta, 0, sizeof(rta));
+	struct fib_table *tb;
+	struct fib_config cfg = {
+		.fc_protocol = RTPROT_KERNEL,
+		.fc_type = type,
+		.fc_dst = dst,
+		.fc_dst_len = dst_len,
+		.fc_prefsrc = ifa->ifa_local,
+		.fc_oif = ifa->ifa_dev->dev->ifindex,
+		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
+	};
 
 	if (type == RTN_UNICAST)
 		tb = fib_new_table(RT_TABLE_MAIN);
@@ -416,26 +649,17 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr
 	if (tb == NULL)
 		return;
 
-	req.nlh.nlmsg_len = sizeof(req);
-	req.nlh.nlmsg_type = cmd;
-	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
-	req.nlh.nlmsg_pid = 0;
-	req.nlh.nlmsg_seq = 0;
+	cfg.fc_table = tb->tb_id;
 
-	req.rtm.rtm_dst_len = dst_len;
-	req.rtm.rtm_table = tb->tb_id;
-	req.rtm.rtm_protocol = RTPROT_KERNEL;
-	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
-	req.rtm.rtm_type = type;
-
-	rta.rta_dst = &dst;
-	rta.rta_prefsrc = &ifa->ifa_local;
-	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
+	if (type != RTN_LOCAL)
+		cfg.fc_scope = RT_SCOPE_LINK;
+	else
+		cfg.fc_scope = RT_SCOPE_HOST;
 
 	if (cmd == RTM_NEWROUTE)
-		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+		tb->tb_insert(tb, &cfg);
 	else
-		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+		tb->tb_delete(tb, &cfg);
 }
 
 void fib_add_ifaddr(struct in_ifaddr *ifa)
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index b5bee1a71e5cb..357557549ce53 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -379,42 +379,39 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key)
 	return NULL;
 }
 
-static int
-fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
 	struct fib_node *new_f, *f;
 	struct fib_alias *fa, *new_fa;
 	struct fn_zone *fz;
 	struct fib_info *fi;
-	int z = r->rtm_dst_len;
-	int type = r->rtm_type;
-	u8 tos = r->rtm_tos;
+	u8 tos = cfg->fc_tos;
 	u32 key;
 	int err;
 
-	if (z > 32)
+	if (cfg->fc_dst_len > 32)
 		return -EINVAL;
-	fz = table->fn_zones[z];
-	if (!fz && !(fz = fn_new_zone(table, z)))
+
+	fz = table->fn_zones[cfg->fc_dst_len];
+	if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
 		return -ENOBUFS;
 
 	key = 0;
-	if (rta->rta_dst) {
-		u32 dst;
-		memcpy(&dst, rta->rta_dst, 4);
-		if (dst & ~FZ_MASK(fz))
+	if (cfg->fc_dst) {
+		if (cfg->fc_dst & ~FZ_MASK(fz))
 			return -EINVAL;
-		key = fz_key(dst, fz);
+		key = fz_key(cfg->fc_dst, fz);
 	}
 
-	if  ((fi = fib_create_info(r, rta, n, &err)) == NULL)
-		return err;
+	fi = fib_create_info(cfg);
+	if (IS_ERR(fi))
+		return PTR_ERR(fi);
 
 	if (fz->fz_nent > (fz->fz_divisor<<1) &&
 	    fz->fz_divisor < FZ_MAX_DIVISOR &&
-	    (z==32 || (1<<z) > fz->fz_divisor))
+	    (cfg->fc_dst_len == 32 ||
+	     (1 << cfg->fc_dst_len) > fz->fz_divisor))
 		fn_rehash_zone(fz);
 
 	f = fib_find_node(fz, key);
@@ -440,18 +437,18 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 		struct fib_alias *fa_orig;
 
 		err = -EEXIST;
-		if (n->nlmsg_flags & NLM_F_EXCL)
+		if (cfg->fc_nlflags & NLM_F_EXCL)
 			goto out;
 
-		if (n->nlmsg_flags & NLM_F_REPLACE) {
+		if (cfg->fc_nlflags & NLM_F_REPLACE) {
 			struct fib_info *fi_drop;
 			u8 state;
 
 			write_lock_bh(&fib_hash_lock);
 			fi_drop = fa->fa_info;
 			fa->fa_info = fi;
-			fa->fa_type = type;
-			fa->fa_scope = r->rtm_scope;
+			fa->fa_type = cfg->fc_type;
+			fa->fa_scope = cfg->fc_scope;
 			state = fa->fa_state;
 			fa->fa_state &= ~FA_S_ACCESSED;
 			fib_hash_genid++;
@@ -474,17 +471,17 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 				break;
 			if (fa->fa_info->fib_priority != fi->fib_priority)
 				break;
-			if (fa->fa_type == type &&
-			    fa->fa_scope == r->rtm_scope &&
+			if (fa->fa_type == cfg->fc_type &&
+			    fa->fa_scope == cfg->fc_scope &&
 			    fa->fa_info == fi)
 				goto out;
 		}
-		if (!(n->nlmsg_flags & NLM_F_APPEND))
+		if (!(cfg->fc_nlflags & NLM_F_APPEND))
 			fa = fa_orig;
 	}
 
 	err = -ENOENT;
-	if (!(n->nlmsg_flags&NLM_F_CREATE))
+	if (!(cfg->fc_nlflags & NLM_F_CREATE))
 		goto out;
 
 	err = -ENOBUFS;
@@ -506,8 +503,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 
 	new_fa->fa_info = fi;
 	new_fa->fa_tos = tos;
-	new_fa->fa_type = type;
-	new_fa->fa_scope = r->rtm_scope;
+	new_fa->fa_type = cfg->fc_type;
+	new_fa->fa_scope = cfg->fc_scope;
 	new_fa->fa_state = 0;
 
 	/*
@@ -526,7 +523,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 		fz->fz_nent++;
 	rt_cache_flush(-1);
 
-	rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req);
+	rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
+		  &cfg->fc_nlinfo);
 	return 0;
 
 out_free_new_fa:
@@ -537,30 +535,25 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 }
 
 
-static int
-fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
 	struct fib_node *f;
 	struct fib_alias *fa, *fa_to_delete;
-	int z = r->rtm_dst_len;
 	struct fn_zone *fz;
 	u32 key;
-	u8 tos = r->rtm_tos;
 
-	if (z > 32)
+	if (cfg->fc_dst_len > 32)
 		return -EINVAL;
-	if ((fz  = table->fn_zones[z]) == NULL)
+
+	if ((fz  = table->fn_zones[cfg->fc_dst_len]) == NULL)
 		return -ESRCH;
 
 	key = 0;
-	if (rta->rta_dst) {
-		u32 dst;
-		memcpy(&dst, rta->rta_dst, 4);
-		if (dst & ~FZ_MASK(fz))
+	if (cfg->fc_dst) {
+		if (cfg->fc_dst & ~FZ_MASK(fz))
 			return -EINVAL;
-		key = fz_key(dst, fz);
+		key = fz_key(cfg->fc_dst, fz);
 	}
 
 	f = fib_find_node(fz, key);
@@ -568,7 +561,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 	if (!f)
 		fa = NULL;
 	else
-		fa = fib_find_alias(&f->fn_alias, tos, 0);
+		fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
 	if (!fa)
 		return -ESRCH;
 
@@ -577,16 +570,16 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 	list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
 		struct fib_info *fi = fa->fa_info;
 
-		if (fa->fa_tos != tos)
+		if (fa->fa_tos != cfg->fc_tos)
 			break;
 
-		if ((!r->rtm_type ||
-		     fa->fa_type == r->rtm_type) &&
-		    (r->rtm_scope == RT_SCOPE_NOWHERE ||
-		     fa->fa_scope == r->rtm_scope) &&
-		    (!r->rtm_protocol ||
-		     fi->fib_protocol == r->rtm_protocol) &&
-		    fib_nh_match(r, n, rta, fi) == 0) {
+		if ((!cfg->fc_type ||
+		     fa->fa_type == cfg->fc_type) &&
+		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
+		     fa->fa_scope == cfg->fc_scope) &&
+		    (!cfg->fc_protocol ||
+		     fi->fib_protocol == cfg->fc_protocol) &&
+		    fib_nh_match(cfg, fi) == 0) {
 			fa_to_delete = fa;
 			break;
 		}
@@ -596,7 +589,8 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 		int kill_fn;
 
 		fa = fa_to_delete;
-		rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req);
+		rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
+			  tb->tb_id, &cfg->fc_nlinfo);
 
 		kill_fn = 0;
 		write_lock_bh(&fib_hash_lock);
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ddd52496b4511..d6d1a89e40030 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -23,19 +23,14 @@ extern int fib_semantic_match(struct list_head *head,
 			      struct fib_result *res, __u32 zone, __u32 mask,
 				int prefixlen);
 extern void fib_release_info(struct fib_info *);
-extern struct fib_info *fib_create_info(const struct rtmsg *r,
-					struct kern_rta *rta,
-					const struct nlmsghdr *,
-					int *err);
-extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
-			struct kern_rta *rta, struct fib_info *fi);
+extern struct fib_info *fib_create_info(struct fib_config *cfg);
+extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
 extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 			 u32 tb_id, u8 type, u8 scope, void *dst,
 			 int dst_len, u8 tos, struct fib_info *fi,
 			 unsigned int);
 extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
-		      int z, u32 tb_id,
-		      struct nlmsghdr *n, struct netlink_skb_parms *req);
+		      int dst_len, u32 tb_id, struct nl_info *info);
 extern struct fib_alias *fib_find_alias(struct list_head *fah,
 					u8 tos, u32 prio);
 extern int fib_detect_death(struct fib_info *fi, int order,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5dfdad5cbcd43..340f9db389e57 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -44,6 +44,7 @@
 #include <net/ip_fib.h>
 #include <net/ip_mp_alg.h>
 #include <net/netlink.h>
+#include <net/nexthop.h>
 
 #include "fib_lookup.h"
 
@@ -273,27 +274,27 @@ int ip_fib_check_default(u32 gw, struct net_device *dev)
 }
 
 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
-	       int z, u32 tb_id,
-	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+	       int dst_len, u32 tb_id, struct nl_info *info)
 {
 	struct sk_buff *skb;
-	u32 pid = req ? req->pid : n->nlmsg_pid;
 	int payload = sizeof(struct rtmsg) + 256;
+	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
 	int err = -ENOBUFS;
 
 	skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
 	if (skb == NULL)
 		goto errout;
 
-	err = fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
-			    fa->fa_type, fa->fa_scope, &key, z, fa->fa_tos,
-			    fa->fa_info, 0);
+	err = fib_dump_info(skb, info->pid, seq, event, tb_id,
+			    fa->fa_type, fa->fa_scope, &key, dst_len,
+			    fa->fa_tos, fa->fa_info, 0);
 	if (err < 0) {
 		kfree_skb(skb);
 		goto errout;
 	}
 
-	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_ROUTE, n, GFP_KERNEL);
+	err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
+			  info->nlh, GFP_KERNEL);
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
@@ -342,102 +343,100 @@ int fib_detect_death(struct fib_info *fi, int order,
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
-static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
-{
-	while (RTA_OK(attr,attrlen)) {
-		if (attr->rta_type == type)
-			return *(u32*)RTA_DATA(attr);
-		attr = RTA_NEXT(attr, attrlen);
-	}
-	return 0;
-}
-
-static int
-fib_count_nexthops(struct rtattr *rta)
+static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
 {
 	int nhs = 0;
-	struct rtnexthop *nhp = RTA_DATA(rta);
-	int nhlen = RTA_PAYLOAD(rta);
 
-	while (nhlen >= (int)sizeof(struct rtnexthop)) {
-		if ((nhlen -= nhp->rtnh_len) < 0)
-			return 0;
+	while (rtnh_ok(rtnh, remaining)) {
 		nhs++;
-		nhp = RTNH_NEXT(nhp);
-	};
-	return nhs;
+		rtnh = rtnh_next(rtnh, &remaining);
+	}
+
+	/* leftover implies invalid nexthop configuration, discard it */
+	return remaining > 0 ? 0 : nhs;
 }
 
-static int
-fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
+static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
+		       int remaining, struct fib_config *cfg)
 {
-	struct rtnexthop *nhp = RTA_DATA(rta);
-	int nhlen = RTA_PAYLOAD(rta);
-
 	change_nexthops(fi) {
-		int attrlen = nhlen - sizeof(struct rtnexthop);
-		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+		int attrlen;
+
+		if (!rtnh_ok(rtnh, remaining))
 			return -EINVAL;
-		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
-		nh->nh_oif = nhp->rtnh_ifindex;
-		nh->nh_weight = nhp->rtnh_hops + 1;
-		if (attrlen) {
-			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+
+		nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
+		nh->nh_oif = rtnh->rtnh_ifindex;
+		nh->nh_weight = rtnh->rtnh_hops + 1;
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen > 0) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+			nh->nh_gw = nla ? nla_get_u32(nla) : 0;
 #ifdef CONFIG_NET_CLS_ROUTE
-			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
+			nla = nla_find(attrs, attrlen, RTA_FLOW);
+			nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
 #endif
 		}
-		nhp = RTNH_NEXT(nhp);
+
+		rtnh = rtnh_next(rtnh, &remaining);
 	} endfor_nexthops(fi);
+
 	return 0;
 }
 
 #endif
 
-int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
-		 struct fib_info *fi)
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	struct rtnexthop *nhp;
-	int nhlen;
+	struct rtnexthop *rtnh;
+	int remaining;
 #endif
 
-	if (rta->rta_priority &&
-	    *rta->rta_priority != fi->fib_priority)
+	if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
 		return 1;
 
-	if (rta->rta_oif || rta->rta_gw) {
-		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
-		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
+	if (cfg->fc_oif || cfg->fc_gw) {
+		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
+		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
 			return 0;
 		return 1;
 	}
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (rta->rta_mp == NULL)
+	if (cfg->fc_mp == NULL)
 		return 0;
-	nhp = RTA_DATA(rta->rta_mp);
-	nhlen = RTA_PAYLOAD(rta->rta_mp);
+
+	rtnh = cfg->fc_mp;
+	remaining = cfg->fc_mp_len;
 	
 	for_nexthops(fi) {
-		int attrlen = nhlen - sizeof(struct rtnexthop);
-		u32 gw;
+		int attrlen;
 
-		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+		if (!rtnh_ok(rtnh, remaining))
 			return -EINVAL;
-		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
+
+		if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
 			return 1;
-		if (attrlen) {
-			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
-			if (gw && gw != nh->nh_gw)
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen < 0) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+			if (nla && nla_get_u32(nla) != nh->nh_gw)
 				return 1;
 #ifdef CONFIG_NET_CLS_ROUTE
-			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
-			if (gw && gw != nh->nh_tclassid)
+			nla = nla_find(attrs, attrlen, RTA_FLOW);
+			if (nla && nla_get_u32(nla) != nh->nh_tclassid)
 				return 1;
 #endif
 		}
-		nhp = RTNH_NEXT(nhp);
+
+		rtnh = rtnh_next(rtnh, &remaining);
 	} endfor_nexthops(fi);
 #endif
 	return 0;
@@ -488,7 +487,8 @@ int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
 						|-> {local prefix} (terminal node)
  */
 
-static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
+static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
+			struct fib_nh *nh)
 {
 	int err;
 
@@ -502,7 +502,7 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
 		if (nh->nh_flags&RTNH_F_ONLINK) {
 			struct net_device *dev;
 
-			if (r->rtm_scope >= RT_SCOPE_LINK)
+			if (cfg->fc_scope >= RT_SCOPE_LINK)
 				return -EINVAL;
 			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
 				return -EINVAL;
@@ -516,10 +516,15 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
 			return 0;
 		}
 		{
-			struct flowi fl = { .nl_u = { .ip4_u =
-						      { .daddr = nh->nh_gw,
-							.scope = r->rtm_scope + 1 } },
-					    .oif = nh->nh_oif };
+			struct flowi fl = {
+				.nl_u = {
+					.ip4_u = {
+						.daddr = nh->nh_gw,
+						.scope = cfg->fc_scope + 1,
+					},
+				},
+				.oif = nh->nh_oif,
+			};
 
 			/* It is not necessary, but requires a bit of thinking */
 			if (fl.fl4_scope < RT_SCOPE_LINK)
@@ -646,39 +651,28 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
 	fib_hash_free(old_laddrhash, bytes);
 }
 
-struct fib_info *
-fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
-		const struct nlmsghdr *nlh, int *errp)
+struct fib_info *fib_create_info(struct fib_config *cfg)
 {
 	int err;
 	struct fib_info *fi = NULL;
 	struct fib_info *ofi;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int nhs = 1;
-#else
-	const int nhs = 1;
-#endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	u32 mp_alg = IP_MP_ALG_NONE;
-#endif
 
 	/* Fast check to catch the most weird cases */
-	if (fib_props[r->rtm_type].scope > r->rtm_scope)
+	if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
 		goto err_inval;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (rta->rta_mp) {
-		nhs = fib_count_nexthops(rta->rta_mp);
+	if (cfg->fc_mp) {
+		nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
 		if (nhs == 0)
 			goto err_inval;
 	}
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	if (rta->rta_mp_alg) {
-		mp_alg = *rta->rta_mp_alg;
-
-		if (mp_alg < IP_MP_ALG_NONE ||
-		    mp_alg > IP_MP_ALG_MAX)
+	if (cfg->fc_mp_alg) {
+		if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
+		    cfg->fc_mp_alg > IP_MP_ALG_MAX)
 			goto err_inval;
 	}
 #endif
@@ -714,43 +708,42 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
 		goto failure;
 	fib_info_cnt++;
 
-	fi->fib_protocol = r->rtm_protocol;
+	fi->fib_protocol = cfg->fc_protocol;
+	fi->fib_flags = cfg->fc_flags;
+	fi->fib_priority = cfg->fc_priority;
+	fi->fib_prefsrc = cfg->fc_prefsrc;
 
 	fi->fib_nhs = nhs;
 	change_nexthops(fi) {
 		nh->nh_parent = fi;
 	} endfor_nexthops(fi)
 
-	fi->fib_flags = r->rtm_flags;
-	if (rta->rta_priority)
-		fi->fib_priority = *rta->rta_priority;
-	if (rta->rta_mx) {
-		int attrlen = RTA_PAYLOAD(rta->rta_mx);
-		struct rtattr *attr = RTA_DATA(rta->rta_mx);
-
-		while (RTA_OK(attr, attrlen)) {
-			unsigned flavor = attr->rta_type;
-			if (flavor) {
-				if (flavor > RTAX_MAX)
+	if (cfg->fc_mx) {
+		struct nlattr *nla;
+		int remaining;
+
+		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+			int type = nla->nla_type;
+
+			if (type) {
+				if (type > RTAX_MAX)
 					goto err_inval;
-				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
+				fi->fib_metrics[type - 1] = nla_get_u32(nla);
 			}
-			attr = RTA_NEXT(attr, attrlen);
 		}
 	}
-	if (rta->rta_prefsrc)
-		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
 
-	if (rta->rta_mp) {
+	if (cfg->fc_mp) {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
+		err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
+		if (err != 0)
 			goto failure;
-		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
+		if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
 			goto err_inval;
-		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
+		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
 			goto err_inval;
 #ifdef CONFIG_NET_CLS_ROUTE
-		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
+		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
 			goto err_inval;
 #endif
 #else
@@ -758,34 +751,32 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
 #endif
 	} else {
 		struct fib_nh *nh = fi->fib_nh;
-		if (rta->rta_oif)
-			nh->nh_oif = *rta->rta_oif;
-		if (rta->rta_gw)
-			memcpy(&nh->nh_gw, rta->rta_gw, 4);
+
+		nh->nh_oif = cfg->fc_oif;
+		nh->nh_gw = cfg->fc_gw;
+		nh->nh_flags = cfg->fc_flags;
 #ifdef CONFIG_NET_CLS_ROUTE
-		if (rta->rta_flow)
-			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
+		nh->nh_tclassid = cfg->fc_flow;
 #endif
-		nh->nh_flags = r->rtm_flags;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		nh->nh_weight = 1;
 #endif
 	}
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	fi->fib_mp_alg = mp_alg;
+	fi->fib_mp_alg = cfg->fc_mp_alg;
 #endif
 
-	if (fib_props[r->rtm_type].error) {
-		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
+	if (fib_props[cfg->fc_type].error) {
+		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
 			goto err_inval;
 		goto link_it;
 	}
 
-	if (r->rtm_scope > RT_SCOPE_HOST)
+	if (cfg->fc_scope > RT_SCOPE_HOST)
 		goto err_inval;
 
-	if (r->rtm_scope == RT_SCOPE_HOST) {
+	if (cfg->fc_scope == RT_SCOPE_HOST) {
 		struct fib_nh *nh = fi->fib_nh;
 
 		/* Local address is added. */
@@ -798,14 +789,14 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
 			goto failure;
 	} else {
 		change_nexthops(fi) {
-			if ((err = fib_check_nh(r, fi, nh)) != 0)
+			if ((err = fib_check_nh(cfg, fi, nh)) != 0)
 				goto failure;
 		} endfor_nexthops(fi)
 	}
 
 	if (fi->fib_prefsrc) {
-		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
-		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
+		if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
+		    fi->fib_prefsrc != cfg->fc_dst)
 			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
 				goto err_inval;
 	}
@@ -846,12 +837,12 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
 	err = -EINVAL;
 
 failure:
-        *errp = err;
         if (fi) {
 		fi->fib_dead = 1;
 		free_fib_info(fi);
 	}
-	return NULL;
+
+	return ERR_PTR(err);
 }
 
 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
@@ -1012,150 +1003,6 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 	return -1;
 }
 
-#ifndef CONFIG_IP_NOSIOCRT
-
-int
-fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
-		    struct kern_rta *rta, struct rtentry *r)
-{
-	int    plen;
-	u32    *ptr;
-
-	memset(rtm, 0, sizeof(*rtm));
-	memset(rta, 0, sizeof(*rta));
-
-	if (r->rt_dst.sa_family != AF_INET)
-		return -EAFNOSUPPORT;
-
-	/* Check mask for validity:
-	   a) it must be contiguous.
-	   b) destination must have all host bits clear.
-	   c) if application forgot to set correct family (AF_INET),
-	      reject request unless it is absolutely clear i.e.
-	      both family and mask are zero.
-	 */
-	plen = 32;
-	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
-	if (!(r->rt_flags&RTF_HOST)) {
-		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
-		if (r->rt_genmask.sa_family != AF_INET) {
-			if (mask || r->rt_genmask.sa_family)
-				return -EAFNOSUPPORT;
-		}
-		if (bad_mask(mask, *ptr))
-			return -EINVAL;
-		plen = inet_mask_len(mask);
-	}
-
-	nl->nlmsg_flags = NLM_F_REQUEST;
-	nl->nlmsg_pid = 0;
-	nl->nlmsg_seq = 0;
-	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
-	if (cmd == SIOCDELRT) {
-		nl->nlmsg_type = RTM_DELROUTE;
-		nl->nlmsg_flags = 0;
-	} else {
-		nl->nlmsg_type = RTM_NEWROUTE;
-		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
-		rtm->rtm_protocol = RTPROT_BOOT;
-	}
-
-	rtm->rtm_dst_len = plen;
-	rta->rta_dst = ptr;
-
-	if (r->rt_metric) {
-		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
-		rta->rta_priority = (u32*)&r->rt_pad3;
-	}
-	if (r->rt_flags&RTF_REJECT) {
-		rtm->rtm_scope = RT_SCOPE_HOST;
-		rtm->rtm_type = RTN_UNREACHABLE;
-		return 0;
-	}
-	rtm->rtm_scope = RT_SCOPE_NOWHERE;
-	rtm->rtm_type = RTN_UNICAST;
-
-	if (r->rt_dev) {
-		char *colon;
-		struct net_device *dev;
-		char   devname[IFNAMSIZ];
-
-		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
-			return -EFAULT;
-		devname[IFNAMSIZ-1] = 0;
-		colon = strchr(devname, ':');
-		if (colon)
-			*colon = 0;
-		dev = __dev_get_by_name(devname);
-		if (!dev)
-			return -ENODEV;
-		rta->rta_oif = &dev->ifindex;
-		if (colon) {
-			struct in_ifaddr *ifa;
-			struct in_device *in_dev = __in_dev_get_rtnl(dev);
-			if (!in_dev)
-				return -ENODEV;
-			*colon = ':';
-			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
-				if (strcmp(ifa->ifa_label, devname) == 0)
-					break;
-			if (ifa == NULL)
-				return -ENODEV;
-			rta->rta_prefsrc = &ifa->ifa_local;
-		}
-	}
-
-	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
-	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
-		rta->rta_gw = ptr;
-		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
-			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
-	}
-
-	if (cmd == SIOCDELRT)
-		return 0;
-
-	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
-		return -EINVAL;
-
-	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
-		rtm->rtm_scope = RT_SCOPE_LINK;
-
-	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
-		struct rtattr *rec;
-		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
-		if (mx == NULL)
-			return -ENOMEM;
-		rta->rta_mx = mx;
-		mx->rta_type = RTA_METRICS;
-		mx->rta_len  = RTA_LENGTH(0);
-		if (r->rt_flags&RTF_MTU) {
-			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
-			rec->rta_type = RTAX_ADVMSS;
-			rec->rta_len = RTA_LENGTH(4);
-			mx->rta_len += RTA_LENGTH(4);
-			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
-		}
-		if (r->rt_flags&RTF_WINDOW) {
-			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
-			rec->rta_type = RTAX_WINDOW;
-			rec->rta_len = RTA_LENGTH(4);
-			mx->rta_len += RTA_LENGTH(4);
-			*(u32*)RTA_DATA(rec) = r->rt_window;
-		}
-		if (r->rt_flags&RTF_IRTT) {
-			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
-			rec->rta_type = RTAX_RTT;
-			rec->rta_len = RTA_LENGTH(4);
-			mx->rta_len += RTA_LENGTH(4);
-			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
-		}
-	}
-	return 0;
-}
-
-#endif
-
 /*
    Update FIB if:
    - local address disappeared -> we must delete all the entries
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2a580eb2579bd..41bef0a88ab68 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1124,17 +1124,14 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 	return fa_head;
 }
 
-static int
-fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-	       struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	struct fib_alias *fa, *new_fa;
 	struct list_head *fa_head = NULL;
 	struct fib_info *fi;
-	int plen = r->rtm_dst_len;
-	int type = r->rtm_type;
-	u8 tos = r->rtm_tos;
+	int plen = cfg->fc_dst_len;
+	u8 tos = cfg->fc_tos;
 	u32 key, mask;
 	int err;
 	struct leaf *l;
@@ -1142,11 +1139,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 	if (plen > 32)
 		return -EINVAL;
 
-	key = 0;
-	if (rta->rta_dst)
-		memcpy(&key, rta->rta_dst, 4);
-
-	key = ntohl(key);
+	key = ntohl(cfg->fc_dst);
 
 	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
 
@@ -1157,10 +1150,11 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 
 	key = key & mask;
 
-	fi = fib_create_info(r, rta, nlhdr, &err);
-
-	if (!fi)
+	fi = fib_create_info(cfg);
+	if (IS_ERR(fi)) {
+		err = PTR_ERR(fi);
 		goto err;
+	}
 
 	l = fib_find_node(t, key);
 	fa = NULL;
@@ -1185,10 +1179,10 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 		struct fib_alias *fa_orig;
 
 		err = -EEXIST;
-		if (nlhdr->nlmsg_flags & NLM_F_EXCL)
+		if (cfg->fc_nlflags & NLM_F_EXCL)
 			goto out;
 
-		if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
+		if (cfg->fc_nlflags & NLM_F_REPLACE) {
 			struct fib_info *fi_drop;
 			u8 state;
 
@@ -1200,8 +1194,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 			fi_drop = fa->fa_info;
 			new_fa->fa_tos = fa->fa_tos;
 			new_fa->fa_info = fi;
-			new_fa->fa_type = type;
-			new_fa->fa_scope = r->rtm_scope;
+			new_fa->fa_type = cfg->fc_type;
+			new_fa->fa_scope = cfg->fc_scope;
 			state = fa->fa_state;
 			new_fa->fa_state &= ~FA_S_ACCESSED;
 
@@ -1224,17 +1218,17 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 				break;
 			if (fa->fa_info->fib_priority != fi->fib_priority)
 				break;
-			if (fa->fa_type == type &&
-			    fa->fa_scope == r->rtm_scope &&
+			if (fa->fa_type == cfg->fc_type &&
+			    fa->fa_scope == cfg->fc_scope &&
 			    fa->fa_info == fi) {
 				goto out;
 			}
 		}
-		if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
+		if (!(cfg->fc_nlflags & NLM_F_APPEND))
 			fa = fa_orig;
 	}
 	err = -ENOENT;
-	if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
+	if (!(cfg->fc_nlflags & NLM_F_CREATE))
 		goto out;
 
 	err = -ENOBUFS;
@@ -1244,8 +1238,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 
 	new_fa->fa_info = fi;
 	new_fa->fa_tos = tos;
-	new_fa->fa_type = type;
-	new_fa->fa_scope = r->rtm_scope;
+	new_fa->fa_type = cfg->fc_type;
+	new_fa->fa_scope = cfg->fc_scope;
 	new_fa->fa_state = 0;
 	/*
 	 * Insert new entry to the list.
@@ -1262,7 +1256,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 			  (fa ? &fa->fa_list : fa_head));
 
 	rt_cache_flush(-1);
-	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
+	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
+		  &cfg->fc_nlinfo);
 succeeded:
 	return 0;
 
@@ -1548,28 +1543,21 @@ static int trie_leaf_remove(struct trie *t, t_key key)
 	return 1;
 }
 
-static int
-fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-		struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	u32 key, mask;
-	int plen = r->rtm_dst_len;
-	u8 tos = r->rtm_tos;
+	int plen = cfg->fc_dst_len;
+	u8 tos = cfg->fc_tos;
 	struct fib_alias *fa, *fa_to_delete;
 	struct list_head *fa_head;
 	struct leaf *l;
 	struct leaf_info *li;
 
-
 	if (plen > 32)
 		return -EINVAL;
 
-	key = 0;
-	if (rta->rta_dst)
-		memcpy(&key, rta->rta_dst, 4);
-
-	key = ntohl(key);
+	key = ntohl(cfg->fc_dst);
 	mask = ntohl(inet_make_mask(plen));
 
 	if (key & ~mask)
@@ -1598,13 +1586,12 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 		if (fa->fa_tos != tos)
 			break;
 
-		if ((!r->rtm_type ||
-		     fa->fa_type == r->rtm_type) &&
-		    (r->rtm_scope == RT_SCOPE_NOWHERE ||
-		     fa->fa_scope == r->rtm_scope) &&
-		    (!r->rtm_protocol ||
-		     fi->fib_protocol == r->rtm_protocol) &&
-		    fib_nh_match(r, nlhdr, rta, fi) == 0) {
+		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
+		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
+		     fa->fa_scope == cfg->fc_scope) &&
+		    (!cfg->fc_protocol ||
+		     fi->fib_protocol == cfg->fc_protocol) &&
+		    fib_nh_match(cfg, fi) == 0) {
 			fa_to_delete = fa;
 			break;
 		}
@@ -1614,7 +1601,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 		return -ESRCH;
 
 	fa = fa_to_delete;
-	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
+	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
+		  &cfg->fc_nlinfo);
 
 	l = fib_find_node(t, key);
 	li = find_leaf_info(l, plen);
-- 
GitLab


From be403ea1856f1428b5912b42184acbba808c41d6 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 17 Aug 2006 18:15:17 -0700
Subject: [PATCH 0486/1063] [IPv4]: Convert FIB dumping to use new netlink api

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c  |  4 +-
 net/ipv4/fib_hash.c      |  2 +-
 net/ipv4/fib_lookup.h    |  2 +-
 net/ipv4/fib_semantics.c | 88 ++++++++++++++++++++++------------------
 net/ipv4/fib_trie.c      |  2 +-
 net/ipv4/route.c         | 68 +++++++++++++++----------------
 6 files changed, 86 insertions(+), 80 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index acc18bdf2dee5..d537c933abe37 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -591,8 +591,8 @@ int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	struct hlist_node *node;
 	int dumped = 0;
 
-	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
-	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
+	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
 		return ip_rt_dump(skb, cb);
 
 	s_h = cb->args[0];
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 357557549ce53..88133b383dc58 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -693,7 +693,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
 					  tb->tb_id,
 					  fa->fa_type,
 					  fa->fa_scope,
-					  &f->fn_key,
+					  f->fn_key,
 					  fz->fz_order,
 					  fa->fa_tos,
 					  fa->fa_info,
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index d6d1a89e40030..fd6f7769f8ab7 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -26,7 +26,7 @@ extern void fib_release_info(struct fib_info *);
 extern struct fib_info *fib_create_info(struct fib_config *cfg);
 extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
 extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-			 u32 tb_id, u8 type, u8 scope, void *dst,
+			 u32 tb_id, u8 type, u8 scope, u32 dst,
 			 int dst_len, u8 tos, struct fib_info *fi,
 			 unsigned int);
 extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 340f9db389e57..2ead09543f688 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -286,7 +286,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
 		goto errout;
 
 	err = fib_dump_info(skb, info->pid, seq, event, tb_id,
-			    fa->fa_type, fa->fa_scope, &key, dst_len,
+			    fa->fa_type, fa->fa_scope, key, dst_len,
 			    fa->fa_tos, fa->fa_info, 0);
 	if (err < 0) {
 		kfree_skb(skb);
@@ -928,79 +928,87 @@ u32 __fib_res_prefsrc(struct fib_result *res)
 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
 }
 
-int
-fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-	      u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
-	      struct fib_info *fi, unsigned int flags)
+int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+		  u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos,
+		  struct fib_info *fi, unsigned int flags)
 {
+	struct nlmsghdr *nlh;
 	struct rtmsg *rtm;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	rtm = nlmsg_data(nlh);
 	rtm->rtm_family = AF_INET;
 	rtm->rtm_dst_len = dst_len;
 	rtm->rtm_src_len = 0;
 	rtm->rtm_tos = tos;
 	rtm->rtm_table = tb_id;
-	RTA_PUT_U32(skb, RTA_TABLE, tb_id);
+	NLA_PUT_U32(skb, RTA_TABLE, tb_id);
 	rtm->rtm_type = type;
 	rtm->rtm_flags = fi->fib_flags;
 	rtm->rtm_scope = scope;
-	if (rtm->rtm_dst_len)
-		RTA_PUT(skb, RTA_DST, 4, dst);
 	rtm->rtm_protocol = fi->fib_protocol;
+
+	if (rtm->rtm_dst_len)
+		NLA_PUT_U32(skb, RTA_DST, dst);
+
 	if (fi->fib_priority)
-		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
+		NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
+
 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
+
 	if (fi->fib_prefsrc)
-		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
+		NLA_PUT_U32(skb, RTA_PREFSRC, fi->fib_prefsrc);
+
 	if (fi->fib_nhs == 1) {
 		if (fi->fib_nh->nh_gw)
-			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
+			NLA_PUT_U32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
+
 		if (fi->fib_nh->nh_oif)
-			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
+			NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
 #ifdef CONFIG_NET_CLS_ROUTE
 		if (fi->fib_nh[0].nh_tclassid)
-			RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
+			NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
 #endif
 	}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (fi->fib_nhs > 1) {
-		struct rtnexthop *nhp;
-		struct rtattr *mp_head;
-		if (skb_tailroom(skb) <= RTA_SPACE(0))
-			goto rtattr_failure;
-		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
+		struct rtnexthop *rtnh;
+		struct nlattr *mp;
+
+		mp = nla_nest_start(skb, RTA_MULTIPATH);
+		if (mp == NULL)
+			goto nla_put_failure;
 
 		for_nexthops(fi) {
-			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
-				goto rtattr_failure;
-			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
-			nhp->rtnh_flags = nh->nh_flags & 0xFF;
-			nhp->rtnh_hops = nh->nh_weight-1;
-			nhp->rtnh_ifindex = nh->nh_oif;
+			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
+			if (rtnh == NULL)
+				goto nla_put_failure;
+
+			rtnh->rtnh_flags = nh->nh_flags & 0xFF;
+			rtnh->rtnh_hops = nh->nh_weight - 1;
+			rtnh->rtnh_ifindex = nh->nh_oif;
+
 			if (nh->nh_gw)
-				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
+				NLA_PUT_U32(skb, RTA_GATEWAY, nh->nh_gw);
 #ifdef CONFIG_NET_CLS_ROUTE
 			if (nh->nh_tclassid)
-				RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid);
+				NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
 #endif
-			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
+			/* length of rtnetlink header + attributes */
+			rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
 		} endfor_nexthops(fi);
-		mp_head->rta_type = RTA_MULTIPATH;
-		mp_head->rta_len = skb->tail - (u8*)mp_head;
+
+		nla_nest_end(skb, mp);
 	}
 #endif
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	return nlmsg_end(skb, nlh);
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
 /*
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 41bef0a88ab68..9c3ff6ba6e218 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1854,7 +1854,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
 				  tb->tb_id,
 				  fa->fa_type,
 				  fa->fa_scope,
-				  &xkey,
+				  xkey,
 				  plen,
 				  fa->fa_tos,
 				  fa->fa_info, 0) < 0) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b8f6cadc5b3a9..31b67059ac29c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2639,52 +2639,54 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 {
 	struct rtable *rt = (struct rtable*)skb->dst;
 	struct rtmsg *r;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	struct nlmsghdr *nlh;
 	struct rta_cacheinfo ci;
-#ifdef CONFIG_IP_MROUTE
-	struct rtattr *eptr;
-#endif
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
-	r = NLMSG_DATA(nlh);
+
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	r = nlmsg_data(nlh);
 	r->rtm_family	 = AF_INET;
 	r->rtm_dst_len	= 32;
 	r->rtm_src_len	= 0;
 	r->rtm_tos	= rt->fl.fl4_tos;
 	r->rtm_table	= RT_TABLE_MAIN;
-	RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
+	NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
 	r->rtm_type	= rt->rt_type;
 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
 	r->rtm_protocol = RTPROT_UNSPEC;
 	r->rtm_flags	= (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
 	if (rt->rt_flags & RTCF_NOTIFY)
 		r->rtm_flags |= RTM_F_NOTIFY;
-	RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
+
+	NLA_PUT_U32(skb, RTA_DST, rt->rt_dst);
+
 	if (rt->fl.fl4_src) {
 		r->rtm_src_len = 32;
-		RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src);
+		NLA_PUT_U32(skb, RTA_SRC, rt->fl.fl4_src);
 	}
 	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+		NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
 #ifdef CONFIG_NET_CLS_ROUTE
 	if (rt->u.dst.tclassid)
-		RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);
+		NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	if (rt->rt_multipath_alg != IP_MP_ALG_NONE) {
-		__u32 alg = rt->rt_multipath_alg;
-
-		RTA_PUT(skb, RTA_MP_ALGO, 4, &alg);
-	}
+	if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
+		NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
 #endif
 	if (rt->fl.iif)
-		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
+		NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_spec_dst);
 	else if (rt->rt_src != rt->fl.fl4_src)
-		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
+		NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_src);
+
 	if (rt->rt_dst != rt->rt_gateway)
-		RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
+		NLA_PUT_U32(skb, RTA_GATEWAY, rt->rt_gateway);
+
 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
+
 	ci.rta_lastuse	= jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
 	ci.rta_used	= rt->u.dst.__use;
 	ci.rta_clntref	= atomic_read(&rt->u.dst.__refcnt);
@@ -2701,10 +2703,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 			ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
 		}
 	}
-#ifdef CONFIG_IP_MROUTE
-	eptr = (struct rtattr*)skb->tail;
-#endif
-	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+
 	if (rt->fl.iif) {
 #ifdef CONFIG_IP_MROUTE
 		u32 dst = rt->rt_dst;
@@ -2716,25 +2715,24 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 				if (!nowait) {
 					if (err == 0)
 						return 0;
-					goto nlmsg_failure;
+					goto nla_put_failure;
 				} else {
 					if (err == -EMSGSIZE)
-						goto nlmsg_failure;
-					((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err;
+						goto nla_put_failure;
+					ci.rta_error = err;
 				}
 			}
 		} else
 #endif
-			RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
+			NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
 	}
 
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+
+	return nlmsg_end(skb, nlh);
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
 int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
-- 
GitLab


From d889ce3b29e55b91257964b4c9aac70b91fedd91 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 17 Aug 2006 18:15:44 -0700
Subject: [PATCH 0487/1063] [IPv4]: Convert route get to new netlink api

Fixes various unvalidated netlink attributes causing memory
corruptions when left empty by userspace applications.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h    |  1 +
 net/ipv4/fib_frontend.c |  2 +-
 net/ipv4/route.c        | 84 ++++++++++++++++++++++-------------------
 3 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 42ed96fab3f58..fcc159a4ac17f 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -216,6 +216,7 @@ extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 /* Exported by fib_frontend.c */
+extern struct nla_policy rtm_ipv4_policy[];
 extern void		ip_fib_init(void);
 extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d537c933abe37..d0abeab16e663 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -453,7 +453,7 @@ int ip_rt_ioctl(unsigned int cmd, void *arg)
 
 #endif
 
-static struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
+struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
 	[RTA_DST]		= { .type = NLA_U32 },
 	[RTA_SRC]		= { .type = NLA_U32 },
 	[RTA_IIF]		= { .type = NLA_U32 },
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 31b67059ac29c..a4d4cb85a16c2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2737,18 +2737,24 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 
 int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct rtmsg *rtm;
+	struct nlattr *tb[RTA_MAX+1];
 	struct rtable *rt = NULL;
-	u32 dst = 0;
-	u32 src = 0;
-	int iif = 0;
-	int err = -ENOBUFS;
+	u32 dst, src, iif;
+	int err;
 	struct sk_buff *skb;
 
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
+	if (err < 0)
+		goto errout;
+
+	rtm = nlmsg_data(nlh);
+
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb)
-		goto out;
+	if (skb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
 
 	/* Reserve room for dummy headers, this skb can pass
 	   through good chunk of routing engine.
@@ -2759,61 +2765,61 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 	skb->nh.iph->protocol = IPPROTO_ICMP;
 	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
 
-	if (rta[RTA_SRC - 1])
-		memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4);
-	if (rta[RTA_DST - 1])
-		memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4);
-	if (rta[RTA_IIF - 1])
-		memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int));
+	src = tb[RTA_SRC] ? nla_get_u32(tb[RTA_SRC]) : 0;
+	dst = tb[RTA_DST] ? nla_get_u32(tb[RTA_DST]) : 0;
+	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
 
 	if (iif) {
-		struct net_device *dev = __dev_get_by_index(iif);
-		err = -ENODEV;
-		if (!dev)
-			goto out_free;
+		struct net_device *dev;
+
+		dev = __dev_get_by_index(iif);
+		if (dev == NULL) {
+			err = -ENODEV;
+			goto errout_free;
+		}
+
 		skb->protocol	= htons(ETH_P_IP);
 		skb->dev	= dev;
 		local_bh_disable();
 		err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
 		local_bh_enable();
-		rt = (struct rtable*)skb->dst;
-		if (!err && rt->u.dst.error)
+
+		rt = (struct rtable*) skb->dst;
+		if (err == 0 && rt->u.dst.error)
 			err = -rt->u.dst.error;
 	} else {
-		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst,
-							 .saddr = src,
-							 .tos = rtm->rtm_tos } } };
-		int oif = 0;
-		if (rta[RTA_OIF - 1])
-			memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
-		fl.oif = oif;
+		struct flowi fl = {
+			.nl_u = {
+				.ip4_u = {
+					.daddr = dst,
+					.saddr = src,
+					.tos = rtm->rtm_tos,
+				},
+			},
+			.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
+		};
 		err = ip_route_output_key(&rt, &fl);
 	}
+
 	if (err)
-		goto out_free;
+		goto errout_free;
 
 	skb->dst = &rt->u.dst;
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-
 	err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
 				RTM_NEWROUTE, 0, 0);
-	if (!err)
-		goto out_free;
-	if (err < 0) {
-		err = -EMSGSIZE;
-		goto out_free;
-	}
+	if (err <= 0)
+		goto errout_free;
 
 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
-out:
+errout:
 	return err;
 
-out_free:
+errout_free:
 	kfree_skb(skb);
-	goto out;
+	goto errout;
 }
 
 int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
-- 
GitLab


From e92b43a3455d3e817c13481bb3ea3cd29d0a47f4 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 17 Aug 2006 18:17:37 -0700
Subject: [PATCH 0488/1063] [NET] neighbour: reduce exports

There are several symbols only used by rtnetlink and since it can
not be a module, there is no reason to export them.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c7e653ff5ed03..c0a27407f4452 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -889,7 +889,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 	return rc;
 }
 
-static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(struct neighbour *neigh)
 {
 	struct hh_cache *hh;
 	void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
@@ -2724,7 +2724,6 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
 #endif	/* CONFIG_SYSCTL */
 
 EXPORT_SYMBOL(__neigh_event_send);
-EXPORT_SYMBOL(neigh_add);
 EXPORT_SYMBOL(neigh_changeaddr);
 EXPORT_SYMBOL(neigh_compat_output);
 EXPORT_SYMBOL(neigh_connected_output);
@@ -2744,11 +2743,8 @@ EXPORT_SYMBOL(neigh_table_clear);
 EXPORT_SYMBOL(neigh_table_init);
 EXPORT_SYMBOL(neigh_table_init_no_netlink);
 EXPORT_SYMBOL(neigh_update);
-EXPORT_SYMBOL(neigh_update_hhs);
 EXPORT_SYMBOL(pneigh_enqueue);
 EXPORT_SYMBOL(pneigh_lookup);
-EXPORT_SYMBOL(neightbl_dump_info);
-EXPORT_SYMBOL(neightbl_set);
 
 #ifdef CONFIG_ARPD
 EXPORT_SYMBOL(neigh_app_ns);
-- 
GitLab


From d3e01f71863da30a2d6bfca069a036168b6c8607 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 17 Aug 2006 18:18:53 -0700
Subject: [PATCH 0489/1063] [ETH]: docbook comments

Add docbook style comments to ethernet support.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethernet/eth.c | 100 +++++++++++++++++++++++++++++++--------------
 1 file changed, 69 insertions(+), 31 deletions(-)

diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 387c71c584ee3..72bdb15036ecf 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -64,23 +64,24 @@
 
 __setup("ether=", netdev_boot_setup);
 
-/*
- *	 Create the Ethernet MAC header for an arbitrary protocol layer 
+/**
+ * eth_header - create the Ethernet header
+ * @skb:	buffer to alter
+ * @dev:	source device
+ * @type:	Ethernet type field
+ * @daddr: destination address (NULL leave destination address)
+ * @saddr: source address (NULL use device source address)
+ * @len:   packet length (<= skb->len)
  *
- *	saddr=NULL	means use device source address
- *	daddr=NULL	means leave destination address (eg unresolved arp)
+ *
+ * Set the protocol type. For a packet of type ETH_P_802_3 we put the length
+ * in here instead. It is up to the 802.2 layer to carry protocol information.
  */
-
 int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
 	   void *daddr, void *saddr, unsigned len)
 {
 	struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN);
 
-	/* 
-	 *	Set the protocol type. For a packet of type ETH_P_802_3 we put the length
-	 *	in here instead. It is up to the 802.2 layer to carry protocol information.
-	 */
-	
 	if(type!=ETH_P_802_3) 
 		eth->h_proto = htons(type);
 	else
@@ -113,16 +114,16 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
 	return -ETH_HLEN;
 }
 
-
-/*
- *	Rebuild the Ethernet MAC header. This is called after an ARP
- *	(or in future other address resolution) has completed on this
- *	sk_buff. We now let ARP fill in the other fields.
+/**
+ * eth_rebuild_header- rebuild the Ethernet MAC header.
+ * @skb: socket buffer to update
+ *
+ * This is called after an ARP or IPV6 ndisc it's resolution on this
+ * sk_buff. We now let protocol (ARP) fill in the other fields.
  *
- *	This routine CANNOT use cached dst->neigh!
- *	Really, it is used only when dst->neigh is wrong.
+ * This routine CANNOT use cached dst->neigh!
+ * Really, it is used only when dst->neigh is wrong.
  */
-
 int eth_rebuild_header(struct sk_buff *skb)
 {
 	struct ethhdr *eth = (struct ethhdr *)skb->data;
@@ -147,12 +148,15 @@ int eth_rebuild_header(struct sk_buff *skb)
 }
 
 
-/*
- *	Determine the packet's protocol ID. The rule here is that we 
- *	assume 802.3 if the type field is short enough to be a length.
- *	This is normal practice and works for any 'now in use' protocol.
+/**
+ * eth_type_trans - determine the packet's protocol ID.
+ * @skb: received socket data
+ * @dev: receiving network device
+ *
+ * The rule here is that we
+ * assume 802.3 if the type field is short enough to be a length.
+ * This is normal practice and works for any 'now in use' protocol.
  */
- 
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ethhdr *eth;
@@ -202,6 +206,11 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	return htons(ETH_P_802_2);
 }
 
+/**
+ * eth_header_parse - extract hardware address from packet
+ * @skb: packet to extract header from
+ * @haddr: destination buffer
+ */
 static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
 	struct ethhdr *eth = eth_hdr(skb);
@@ -209,6 +218,12 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
 	return ETH_ALEN;
 }
 
+/**
+ * eth_header_cache - fill cache entry from neighbour
+ * @neigh: source neighbour
+ * @hh: destination cache entry
+ * Create an Ethernet header template from the neighbour.
+ */
 int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
 {
 	unsigned short type = hh->hh_type;
@@ -228,10 +243,14 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
 	return 0;
 }
 
-/*
+/**
+ * eth_header_cache_update - update cache entry
+ * @hh: destination cache entry
+ * @dev: network device
+ * @haddr: new hardware address
+ *
  * Called by Address Resolution module to notify changes in address.
  */
-
 void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr)
 {
 	memcpy(((u8*)hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)),
@@ -240,6 +259,15 @@ void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsign
 
 EXPORT_SYMBOL(eth_type_trans);
 
+/**
+ * eth_mac_addr - set new Ethernet hardware address
+ * @dev: network device
+ * @p: socket address
+ * Change hardware address of device.
+ *
+ * This doesn't change hardware matching, so needs to be overridden
+ * for most real devices.
+ */
 static int eth_mac_addr(struct net_device *dev, void *p)
 {
 	struct sockaddr *addr=p;
@@ -249,6 +277,14 @@ static int eth_mac_addr(struct net_device *dev, void *p)
 	return 0;
 }
 
+/**
+ * eth_change_mtu - set new MTU size
+ * @dev: network device
+ * @new_mtu: new Maximum Transfer Unit
+ *
+ * Allow changing MTU size. Needs to be overridden for devices
+ * supporting jumbo frames.
+ */
 static int eth_change_mtu(struct net_device *dev, int new_mtu)
 {
 	if (new_mtu < 68 || new_mtu > ETH_DATA_LEN)
@@ -257,8 +293,10 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-/*
- * Fill in the fields of the device structure with ethernet-generic values.
+/**
+ * ether_setup - setup Ethernet network device
+ * @dev: network device
+ * Fill in the fields of the device structure with Ethernet-generic values.
  */
 void ether_setup(struct net_device *dev)
 {
@@ -283,15 +321,15 @@ void ether_setup(struct net_device *dev)
 EXPORT_SYMBOL(ether_setup);
 
 /**
- * alloc_etherdev - Allocates and sets up an ethernet device
+ * alloc_etherdev - Allocates and sets up an Ethernet device
  * @sizeof_priv: Size of additional driver-private structure to be allocated
- *	for this ethernet device
+ *	for this Ethernet device
  *
- * Fill in the fields of the device structure with ethernet-generic
+ * Fill in the fields of the device structure with Ethernet-generic
  * values. Basically does everything except registering the device.
  *
  * Constructs a new net device, complete with a private data area of
- * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * size (sizeof_priv).  A 32-byte (not bit) alignment is enforced for
  * this private data area.
  */
 
-- 
GitLab


From 2e4ca75b31b6851dcc036c2cdebf3ecfe279a653 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 17 Aug 2006 18:20:18 -0700
Subject: [PATCH 0490/1063] [ETH]: indentation and cleanup

Run ethernet support through Lindent and fix up.
Applies after docbook comments patch

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethernet/eth.c | 96 ++++++++++++++++++++++------------------------
 1 file changed, 46 insertions(+), 50 deletions(-)

diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 72bdb15036ecf..43863933f27f9 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -78,39 +78,37 @@ __setup("ether=", netdev_boot_setup);
  * in here instead. It is up to the 802.2 layer to carry protocol information.
  */
 int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
-	   void *daddr, void *saddr, unsigned len)
+	       void *daddr, void *saddr, unsigned len)
 {
-	struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN);
+	struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
 
-	if(type!=ETH_P_802_3) 
+	if (type != ETH_P_802_3)
 		eth->h_proto = htons(type);
 	else
 		eth->h_proto = htons(len);
 
 	/*
-	 *	Set the source hardware address. 
+	 *      Set the source hardware address.
 	 */
-	 
-	if(!saddr)
+
+	if (!saddr)
 		saddr = dev->dev_addr;
-	memcpy(eth->h_source,saddr,dev->addr_len);
+	memcpy(eth->h_source, saddr, dev->addr_len);
 
-	if(daddr)
-	{
-		memcpy(eth->h_dest,daddr,dev->addr_len);
+	if (daddr) {
+		memcpy(eth->h_dest, daddr, dev->addr_len);
 		return ETH_HLEN;
 	}
-	
+
 	/*
-	 *	Anyway, the loopback-device should never use this function... 
+	 *      Anyway, the loopback-device should never use this function...
 	 */
 
-	if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) 
-	{
+	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
 		memset(eth->h_dest, 0, dev->addr_len);
 		return ETH_HLEN;
 	}
-	
+
 	return -ETH_HLEN;
 }
 
@@ -129,17 +127,16 @@ int eth_rebuild_header(struct sk_buff *skb)
 	struct ethhdr *eth = (struct ethhdr *)skb->data;
 	struct net_device *dev = skb->dev;
 
-	switch (eth->h_proto)
-	{
+	switch (eth->h_proto) {
 #ifdef CONFIG_INET
 	case __constant_htons(ETH_P_IP):
- 		return arp_find(eth->h_dest, skb);
-#endif	
+		return arp_find(eth->h_dest, skb);
+#endif
 	default:
 		printk(KERN_DEBUG
-		       "%s: unable to resolve type %X addresses.\n", 
+		       "%s: unable to resolve type %X addresses.\n",
 		       dev->name, (int)eth->h_proto);
-		
+
 		memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
 		break;
 	}
@@ -147,7 +144,6 @@ int eth_rebuild_header(struct sk_buff *skb)
 	return 0;
 }
 
-
 /**
  * eth_type_trans - determine the packet's protocol ID.
  * @skb: received socket data
@@ -161,50 +157,51 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ethhdr *eth;
 	unsigned char *rawp;
-	
+
 	skb->mac.raw = skb->data;
-	skb_pull(skb,ETH_HLEN);
+	skb_pull(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
-	
+
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		if (!compare_ether_addr(eth->h_dest, dev->broadcast))
 			skb->pkt_type = PACKET_BROADCAST;
 		else
 			skb->pkt_type = PACKET_MULTICAST;
 	}
-	
+
 	/*
-	 *	This ALLMULTI check should be redundant by 1.4
-	 *	so don't forget to remove it.
+	 *      This ALLMULTI check should be redundant by 1.4
+	 *      so don't forget to remove it.
 	 *
-	 *	Seems, you forgot to remove it. All silly devices
-	 *	seems to set IFF_PROMISC.
+	 *      Seems, you forgot to remove it. All silly devices
+	 *      seems to set IFF_PROMISC.
 	 */
-	 
-	else if(1 /*dev->flags&IFF_PROMISC*/) {
+
+	else if (1 /*dev->flags&IFF_PROMISC */ ) {
 		if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr)))
 			skb->pkt_type = PACKET_OTHERHOST;
 	}
-	
+
 	if (ntohs(eth->h_proto) >= 1536)
 		return eth->h_proto;
-		
+
 	rawp = skb->data;
-	
+
 	/*
-	 *	This is a magic hack to spot IPX packets. Older Novell breaks
-	 *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
-	 *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
-	 *	won't work for fault tolerant netware but does for the rest.
+	 *      This is a magic hack to spot IPX packets. Older Novell breaks
+	 *      the protocol design and runs IPX over 802.3 without an 802.2 LLC
+	 *      layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
+	 *      won't work for fault tolerant netware but does for the rest.
 	 */
 	if (*(unsigned short *)rawp == 0xFFFF)
 		return htons(ETH_P_802_3);
-		
+
 	/*
-	 *	Real 802.2 LLC
+	 *      Real 802.2 LLC
 	 */
 	return htons(ETH_P_802_2);
 }
+EXPORT_SYMBOL(eth_type_trans);
 
 /**
  * eth_header_parse - extract hardware address from packet
@@ -230,8 +227,8 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
 	struct ethhdr *eth;
 	struct net_device *dev = neigh->dev;
 
-	eth = (struct ethhdr*)
-		(((u8*)hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
+	eth = (struct ethhdr *)
+	    (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
 
 	if (type == __constant_htons(ETH_P_802_3))
 		return -1;
@@ -251,14 +248,13 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
  *
  * Called by Address Resolution module to notify changes in address.
  */
-void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr)
+void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev,
+			     unsigned char *haddr)
 {
-	memcpy(((u8*)hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)),
+	memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)),
 	       haddr, dev->addr_len);
 }
 
-EXPORT_SYMBOL(eth_type_trans);
-
 /**
  * eth_mac_addr - set new Ethernet hardware address
  * @dev: network device
@@ -270,10 +266,10 @@ EXPORT_SYMBOL(eth_type_trans);
  */
 static int eth_mac_addr(struct net_device *dev, void *p)
 {
-	struct sockaddr *addr=p;
+	struct sockaddr *addr = p;
 	if (netif_running(dev))
 		return -EBUSY;
-	memcpy(dev->dev_addr, addr->sa_data,dev->addr_len);
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
 	return 0;
 }
 
@@ -315,7 +311,7 @@ void ether_setup(struct net_device *dev)
 	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */	
 	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
 	
-	memset(dev->broadcast,0xFF, ETH_ALEN);
+	memset(dev->broadcast, 0xFF, ETH_ALEN);
 
 }
 EXPORT_SYMBOL(ether_setup);
-- 
GitLab


From e9ce1cd3cf6cf35b21d0ce990f2e738f35907386 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 21 Aug 2006 23:54:55 -0700
Subject: [PATCH 0491/1063] [PKT_SCHED]: Kill pkt_act.h inlining.

This was simply making templates of functions and mostly causing a lot
of code duplication in the classifier action modules.

We solve this more cleanly by having a common "struct tcf_common" that
hash worker functions contained once in act_api.c can work with.

Callers work with real action objects that have the common struct
plus their module specific struct members.  You go from a common
object to the higher level one using a "to_foo()" macro which makes
use of container_of() to do the dirty work.

This also kills off act_generic.h which was only used by act_simple.c
and keeping it around was more work than the it's value.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h          | 136 +++++----
 include/net/act_generic.h      | 142 ---------
 include/net/pkt_act.h          | 273 ------------------
 include/net/tc_act/tc_defact.h |  13 +-
 include/net/tc_act/tc_gact.h   |  18 +-
 include/net/tc_act/tc_ipt.h    |  15 +-
 include/net/tc_act/tc_mirred.h |  17 +-
 include/net/tc_act/tc_pedit.h  |  15 +-
 net/sched/act_api.c            | 246 ++++++++++++++--
 net/sched/act_gact.c           | 142 +++++----
 net/sched/act_ipt.c            | 175 +++++-------
 net/sched/act_mirred.c         | 159 +++++------
 net/sched/act_pedit.c          | 166 +++++------
 net/sched/act_police.c         | 508 +++++++++++++++++----------------
 net/sched/act_simple.c         | 183 ++++++++++--
 15 files changed, 1061 insertions(+), 1147 deletions(-)
 delete mode 100644 include/net/act_generic.h
 delete mode 100644 include/net/pkt_act.h

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 11e9eaf79f5ab..8b06c2f3657f3 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -8,70 +8,110 @@
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 
-#define tca_gen(name) \
-struct tcf_##name *next; \
-	u32 index; \
-	int refcnt; \
-	int bindcnt; \
-	u32 capab; \
-	int action; \
-	struct tcf_t tm; \
-	struct gnet_stats_basic bstats; \
-	struct gnet_stats_queue qstats; \
-	struct gnet_stats_rate_est rate_est; \
-	spinlock_t *stats_lock; \
-	spinlock_t lock
-
-struct tcf_police
-{
-	tca_gen(police);
-	int		result;
-	u32		ewma_rate;
-	u32		burst;
-	u32		mtu;
-	u32		toks;
-	u32		ptoks;
-	psched_time_t	t_c;
-	struct qdisc_rate_table *R_tab;
-	struct qdisc_rate_table *P_tab;
+struct tcf_common {
+	struct tcf_common		*tcfc_next;
+	u32				tcfc_index;
+	int				tcfc_refcnt;
+	int				tcfc_bindcnt;
+	u32				tcfc_capab;
+	int				tcfc_action;
+	struct tcf_t			tcfc_tm;
+	struct gnet_stats_basic		tcfc_bstats;
+	struct gnet_stats_queue		tcfc_qstats;
+	struct gnet_stats_rate_est	tcfc_rate_est;
+	spinlock_t			*tcfc_stats_lock;
+	spinlock_t			tcfc_lock;
+};
+#define tcf_next	common.tcfc_next
+#define tcf_index	common.tcfc_index
+#define tcf_refcnt	common.tcfc_refcnt
+#define tcf_bindcnt	common.tcfc_bindcnt
+#define tcf_capab	common.tcfc_capab
+#define tcf_action	common.tcfc_action
+#define tcf_tm		common.tcfc_tm
+#define tcf_bstats	common.tcfc_bstats
+#define tcf_qstats	common.tcfc_qstats
+#define tcf_rate_est	common.tcfc_rate_est
+#define tcf_stats_lock	common.tcfc_stats_lock
+#define tcf_lock	common.tcfc_lock
+
+struct tcf_police {
+	struct tcf_common	common;
+	int			tcfp_result;
+	u32			tcfp_ewma_rate;
+	u32			tcfp_burst;
+	u32			tcfp_mtu;
+	u32			tcfp_toks;
+	u32			tcfp_ptoks;
+	psched_time_t		tcfp_t_c;
+	struct qdisc_rate_table	*tcfp_R_tab;
+	struct qdisc_rate_table	*tcfp_P_tab;
 };
+#define to_police(pc)	\
+	container_of(pc, struct tcf_police, common)
+
+struct tcf_hashinfo {
+	struct tcf_common	**htab;
+	unsigned int		hmask;
+	rwlock_t		*lock;
+};
+
+static inline unsigned int tcf_hash(u32 index, unsigned int hmask)
+{
+	return index & hmask;
+}
 
 #ifdef CONFIG_NET_CLS_ACT
 
 #define ACT_P_CREATED 1
 #define ACT_P_DELETED 1
 
-struct tcf_act_hdr
-{
-	tca_gen(act_hdr);
+struct tcf_act_hdr {
+	struct tcf_common	common;
 };
 
-struct tc_action
-{
-	void *priv;
-	struct tc_action_ops *ops;
-	__u32   type;   /* for backward compat(TCA_OLD_COMPAT) */
-	__u32   order; 
-	struct tc_action *next;
+struct tc_action {
+	void			*priv;
+	struct tc_action_ops	*ops;
+	__u32			type; /* for backward compat(TCA_OLD_COMPAT) */
+	__u32			order;
+	struct tc_action	*next;
 };
 
 #define TCA_CAP_NONE 0
-struct tc_action_ops
-{
+struct tc_action_ops {
 	struct tc_action_ops *next;
+	struct tcf_hashinfo *hinfo;
 	char    kind[IFNAMSIZ];
 	__u32   type; /* TBD to match kind */
 	__u32 	capab;  /* capabilities includes 4 bit version */
 	struct module		*owner;
 	int     (*act)(struct sk_buff *, struct tc_action *, struct tcf_result *);
 	int     (*get_stats)(struct sk_buff *, struct tc_action *);
-	int     (*dump)(struct sk_buff *, struct tc_action *,int , int);
+	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	int     (*cleanup)(struct tc_action *, int bind);
-	int     (*lookup)(struct tc_action *, u32 );
-	int     (*init)(struct rtattr *,struct rtattr *,struct tc_action *, int , int );
-	int     (*walk)(struct sk_buff *, struct netlink_callback *, int , struct tc_action *);
+	int     (*lookup)(struct tc_action *, u32);
+	int     (*init)(struct rtattr *, struct rtattr *, struct tc_action *, int , int);
+	int     (*walk)(struct sk_buff *, struct netlink_callback *, int, struct tc_action *);
 };
 
+extern struct tcf_common *tcf_hash_lookup(u32 index,
+					  struct tcf_hashinfo *hinfo);
+extern void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo);
+extern int tcf_hash_release(struct tcf_common *p, int bind,
+			    struct tcf_hashinfo *hinfo);
+extern int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+			      int type, struct tc_action *a);
+extern u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo);
+extern int tcf_hash_search(struct tc_action *a, u32 index);
+extern struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a,
+					 int bind, struct tcf_hashinfo *hinfo);
+extern struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est,
+					  struct tc_action *a, int size,
+					  int bind, u32 *idx_gen,
+					  struct tcf_hashinfo *hinfo);
+extern void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo);
+
 extern int tcf_register_action(struct tc_action_ops *a);
 extern int tcf_unregister_action(struct tc_action_ops *a);
 extern void tcf_action_destroy(struct tc_action *a, int bind);
@@ -96,17 +136,17 @@ tcf_police_release(struct tcf_police *p, int bind)
 	int ret = 0;
 #ifdef CONFIG_NET_CLS_ACT
 	if (p) {
-		if (bind) {
-			 p->bindcnt--;
-		}
-		p->refcnt--;
-		if (p->refcnt <= 0 && !p->bindcnt) {
+		if (bind)
+			p->tcf_bindcnt--;
+
+		p->tcf_refcnt--;
+		if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) {
 			tcf_police_destroy(p);
 			ret = 1;
 		}
 	}
 #else
-	if (p && --p->refcnt == 0)
+	if (p && --p->tcf_refcnt == 0)
 		tcf_police_destroy(p);
 
 #endif /* CONFIG_NET_CLS_ACT */
diff --git a/include/net/act_generic.h b/include/net/act_generic.h
deleted file mode 100644
index c9daa7e52300d..0000000000000
--- a/include/net/act_generic.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * include/net/act_generic.h
- *
-*/
-#ifndef _NET_ACT_GENERIC_H
-#define _NET_ACT_GENERIC_H
-static inline int tcf_defact_release(struct tcf_defact *p, int bind)
-{
-	int ret = 0;
-	if (p) {
-		if (bind) {
-			p->bindcnt--;
-		}
-		p->refcnt--;
-		if (p->bindcnt <= 0 && p->refcnt <= 0) {
-			kfree(p->defdata);
-			tcf_hash_destroy(p);
-			ret = 1;
-		}
-	}
-	return ret;
-}
-
-static inline int
-alloc_defdata(struct tcf_defact *p, u32 datalen, void *defdata)
-{
-	p->defdata = kmalloc(datalen, GFP_KERNEL);
-	if (p->defdata == NULL)
-		return -ENOMEM;
-	p->datalen = datalen;
-	memcpy(p->defdata, defdata, datalen);
-	return 0;
-}
-
-static inline int
-realloc_defdata(struct tcf_defact *p, u32 datalen, void *defdata)
-{
-	/* safer to be just brute force for now */
-	kfree(p->defdata);
-	return alloc_defdata(p, datalen, defdata);
-}
-
-static inline int
-tcf_defact_init(struct rtattr *rta, struct rtattr *est,
-		struct tc_action *a, int ovr, int bind)
-{
-	struct rtattr *tb[TCA_DEF_MAX];
-	struct tc_defact *parm;
-	struct tcf_defact *p;
-	void *defdata;
-	u32 datalen = 0;
-	int ret = 0;
-
-	if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0)
-		return -EINVAL;
-
-	if (tb[TCA_DEF_PARMS - 1] == NULL || 
-	    RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm))
-		return -EINVAL;
-
-	parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]);
-	defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]);
-	if (defdata == NULL)
-		return -EINVAL;
-
-	datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]);
-	if (datalen <= 0)
-		return -EINVAL;
-
-	p = tcf_hash_check(parm->index, a, ovr, bind);
-	if (p == NULL) {
-		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
-		if (p == NULL)
-			return -ENOMEM;
-
-		ret = alloc_defdata(p, datalen, defdata);
-		if (ret < 0) {
-			kfree(p);
-			return ret;
-		}
-		ret = ACT_P_CREATED;
-	} else {
-		if (!ovr) {
-			tcf_defact_release(p, bind);
-			return -EEXIST;
-		}
-		realloc_defdata(p, datalen, defdata);
-	}
-
-	spin_lock_bh(&p->lock);
-	p->action = parm->action;
-	spin_unlock_bh(&p->lock);
-	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(p);
-	return ret;
-}
-
-static inline int tcf_defact_cleanup(struct tc_action *a, int bind)
-{
-	struct tcf_defact *p = PRIV(a, defact);
-
-	if (p != NULL)
-		return tcf_defact_release(p, bind);
-	return 0;
-}
-
-static inline int
-tcf_defact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
-{
-	unsigned char *b = skb->tail;
-	struct tc_defact opt;
-	struct tcf_defact *p = PRIV(a, defact);
-	struct tcf_t t;
-
-	opt.index = p->index;
-	opt.refcnt = p->refcnt - ref;
-	opt.bindcnt = p->bindcnt - bind;
-	opt.action = p->action;
-	RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
-	RTA_PUT(skb, TCA_DEF_DATA, p->datalen, p->defdata);
-	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
-	t.expires = jiffies_to_clock_t(p->tm.expires);
-	RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
-	return skb->len;
-
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
-
-#define tca_use_default_ops \
-	.dump           =       tcf_defact_dump, \
-	.cleanup        =       tcf_defact_cleanup, \
-	.init           =       tcf_defact_init, \
-	.walk           =       tcf_generic_walker, \
-
-#define tca_use_default_defines(name) \
-	static u32 idx_gen; \
-	static struct tcf_defact *tcf_##name_ht[MY_TAB_SIZE]; \
-	static DEFINE_RWLOCK(##name_lock);
-#endif /* _NET_ACT_GENERIC_H */
diff --git a/include/net/pkt_act.h b/include/net/pkt_act.h
deleted file mode 100644
index cf5e4d2e4c21c..0000000000000
--- a/include/net/pkt_act.h
+++ /dev/null
@@ -1,273 +0,0 @@
-#ifndef __NET_PKT_ACT_H
-#define __NET_PKT_ACT_H
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/proc_fs.h>
-#include <net/sock.h>
-#include <net/pkt_sched.h>
-
-#define tca_st(val) (struct tcf_##val *)
-#define PRIV(a,name) ( tca_st(name) (a)->priv)
-
-#if 0 /* control */
-#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define DPRINTK(format,args...)
-#endif
-
-#if 0 /* data */
-#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define D2PRINTK(format,args...)
-#endif
-
-static __inline__ unsigned
-tcf_hash(u32 index)
-{
-	return index & MY_TAB_MASK;
-}
-
-/* probably move this from being inline
- * and put into act_generic
-*/
-static inline void
-tcf_hash_destroy(struct tcf_st *p)
-{
-	unsigned h = tcf_hash(p->index);
-	struct tcf_st **p1p;
-
-	for (p1p = &tcf_ht[h]; *p1p; p1p = &(*p1p)->next) {
-		if (*p1p == p) {
-			write_lock_bh(&tcf_t_lock);
-			*p1p = p->next;
-			write_unlock_bh(&tcf_t_lock);
-#ifdef CONFIG_NET_ESTIMATOR
-			gen_kill_estimator(&p->bstats, &p->rate_est);
-#endif
-			kfree(p);
-			return;
-		}
-	}
-	BUG_TRAP(0);
-}
-
-static inline int
-tcf_hash_release(struct tcf_st *p, int bind )
-{
-	int ret = 0;
-	if (p) {
-		if (bind) {
-			p->bindcnt--;
-		}
-		p->refcnt--;
-	       	if(p->bindcnt <=0 && p->refcnt <= 0) {
-			tcf_hash_destroy(p);
-			ret = 1;
-		}
-	}
-	return ret;
-}
-
-static __inline__ int
-tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
-		struct tc_action *a)
-{
-	struct tcf_st *p;
-	int err =0, index =  -1,i= 0, s_i = 0, n_i = 0;
-	struct rtattr *r ;
-
-	read_lock(&tcf_t_lock);
-
-	s_i = cb->args[0];
-
-	for (i = 0; i < MY_TAB_SIZE; i++) {
-		p = tcf_ht[tcf_hash(i)];
-
-		for (; p; p = p->next) {
-			index++;
-			if (index < s_i)
-				continue;
-			a->priv = p;
-			a->order = n_i;
-			r = (struct rtattr*) skb->tail;
-			RTA_PUT(skb, a->order, 0, NULL);
-			err = tcf_action_dump_1(skb, a, 0, 0);
-			if (0 > err) {
-				index--;
-				skb_trim(skb, (u8*)r - skb->data);
-				goto done;
-			}
-			r->rta_len = skb->tail - (u8*)r;
-			n_i++;
-			if (n_i >= TCA_ACT_MAX_PRIO) {
-				goto done;
-			}
-		}
-	}
-done:
-	read_unlock(&tcf_t_lock);
-	if (n_i)
-		cb->args[0] += n_i;
-	return n_i;
-
-rtattr_failure:
-	skb_trim(skb, (u8*)r - skb->data);
-	goto done;
-}
-
-static __inline__ int
-tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
-{
-	struct tcf_st *p, *s_p;
-	struct rtattr *r ;
-	int i= 0, n_i = 0;
-
-	r = (struct rtattr*) skb->tail;
-	RTA_PUT(skb, a->order, 0, NULL);
-	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
-	for (i = 0; i < MY_TAB_SIZE; i++) {
-		p = tcf_ht[tcf_hash(i)];
-
-		while (p != NULL) {
-			s_p = p->next;
-			if (ACT_P_DELETED == tcf_hash_release(p, 0)) {
-				 module_put(a->ops->owner);
-			}
-			n_i++;
-			p = s_p;
-		}
-	}
-	RTA_PUT(skb, TCA_FCNT, 4, &n_i);
-	r->rta_len = skb->tail - (u8*)r;
-
-	return n_i;
-rtattr_failure:
-	skb_trim(skb, (u8*)r - skb->data);
-	return -EINVAL;
-}
-
-static __inline__ int
-tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, int type,
-		struct tc_action *a)
-{
-		if (type == RTM_DELACTION) {
-			return tcf_del_walker(skb,a);
-		} else if (type == RTM_GETACTION) {
-			return tcf_dump_walker(skb,cb,a);
-		} else {
-			printk("tcf_generic_walker: unknown action %d\n",type);
-			return -EINVAL;
-		}
-}
-
-static __inline__ struct tcf_st *
-tcf_hash_lookup(u32 index)
-{
-	struct tcf_st *p;
-
-	read_lock(&tcf_t_lock);
-	for (p = tcf_ht[tcf_hash(index)]; p; p = p->next) {
-		if (p->index == index)
-			break;
-	}
-	read_unlock(&tcf_t_lock);
-	return p;
-}
-
-static __inline__ u32
-tcf_hash_new_index(void)
-{
-	do {
-		if (++idx_gen == 0)
-			idx_gen = 1;
-	} while (tcf_hash_lookup(idx_gen));
-
-	return idx_gen;
-}
-
-
-static inline int
-tcf_hash_search(struct tc_action *a, u32 index)
-{
-	struct tcf_st *p = tcf_hash_lookup(index);
-
-	if (p != NULL) {
-		a->priv = p;
-		return 1;
-	}
-	return 0;
-}
-
-#ifdef CONFIG_NET_ACT_INIT
-static inline struct tcf_st *
-tcf_hash_check(u32 index, struct tc_action *a, int ovr, int bind)
-{
-	struct tcf_st *p = NULL;
-	if (index && (p = tcf_hash_lookup(index)) != NULL) {
-		if (bind) {
-			p->bindcnt++;
-			p->refcnt++;
-		}
-		a->priv = p;
-	}
-	return p;
-}
-
-static inline struct tcf_st *
-tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int ovr, int bind)
-{
-	struct tcf_st *p = NULL;
-
-	p = kmalloc(size, GFP_KERNEL);
-	if (p == NULL)
-		return p;
-
-	memset(p, 0, size);
-	p->refcnt = 1;
-
-	if (bind) {
-		p->bindcnt = 1;
-	}
-
-	spin_lock_init(&p->lock);
-	p->stats_lock = &p->lock;
-	p->index = index ? : tcf_hash_new_index();
-	p->tm.install = jiffies;
-	p->tm.lastuse = jiffies;
-#ifdef CONFIG_NET_ESTIMATOR
-	if (est)
-		gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
-#endif
-	a->priv = (void *) p;
-	return p;
-}
-
-static inline void tcf_hash_insert(struct tcf_st *p)
-{
-	unsigned h = tcf_hash(p->index);
-
-	write_lock_bh(&tcf_t_lock);
-	p->next = tcf_ht[h];
-	tcf_ht[h] = p;
-	write_unlock_bh(&tcf_t_lock);
-}
-
-#endif
-
-#endif
diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h
index 463aa671f95d2..65f024b809589 100644
--- a/include/net/tc_act/tc_defact.h
+++ b/include/net/tc_act/tc_defact.h
@@ -3,11 +3,12 @@
 
 #include <net/act_api.h>
 
-struct tcf_defact
-{
-	tca_gen(defact);
-	u32     datalen;
-	void    *defdata;
+struct tcf_defact {
+	struct tcf_common	common;
+	u32     		tcfd_datalen;
+	void    		*tcfd_defdata;
 };
+#define to_defact(pc) \
+	container_of(pc, struct tcf_defact, common)
 
-#endif
+#endif /* __NET_TC_DEF_H */
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 59f0d9628ad1c..9e3f6767b80e5 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -3,15 +3,15 @@
 
 #include <net/act_api.h>
 
-struct tcf_gact
-{
-        tca_gen(gact);
+struct tcf_gact {
+	struct tcf_common	common;
 #ifdef CONFIG_GACT_PROB
-        u16                 ptype;
-        u16                 pval;
-        int                 paction;
+        u16			tcfg_ptype;
+        u16			tcfg_pval;
+        int			tcfg_paction;
 #endif
-                                                                                
 };
-                                                                                
-#endif
+#define to_gact(pc) \
+	container_of(pc, struct tcf_gact, common)
+
+#endif /* __NET_TC_GACT_H */
diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h
index cb37ad08427fa..f7d25dfcc4b78 100644
--- a/include/net/tc_act/tc_ipt.h
+++ b/include/net/tc_act/tc_ipt.h
@@ -5,12 +5,13 @@
 
 struct xt_entry_target;
 
-struct tcf_ipt
-{
-	tca_gen(ipt);
-	u32 hook;
-	char *tname;
-	struct xt_entry_target *t;
+struct tcf_ipt {
+	struct tcf_common	common;
+	u32			tcfi_hook;
+	char			*tcfi_tname;
+	struct xt_entry_target	*tcfi_t;
 };
+#define to_ipt(pc) \
+	container_of(pc, struct tcf_ipt, common)
 
-#endif
+#endif /* __NET_TC_IPT_H */
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index b5c32f65c12c8..ceac661cdfd5f 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -3,13 +3,14 @@
 
 #include <net/act_api.h>
 
-struct tcf_mirred
-{
-	tca_gen(mirred);
-	int eaction;
-	int ifindex;
-	int ok_push;
-	struct net_device *dev;
+struct tcf_mirred {
+	struct tcf_common	common;
+	int			tcfm_eaction;
+	int			tcfm_ifindex;
+	int			tcfm_ok_push;
+	struct net_device	*tcfm_dev;
 };
+#define to_mirred(pc) \
+	container_of(pc, struct tcf_mirred, common)
 
-#endif
+#endif /* __NET_TC_MIR_H */
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index eb21689d759dd..e6f6e15956f5f 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -3,12 +3,13 @@
 
 #include <net/act_api.h>
 
-struct tcf_pedit
-{
-	tca_gen(pedit);
-	unsigned char           nkeys;
-	unsigned char           flags;
-	struct tc_pedit_key     *keys;
+struct tcf_pedit {
+	struct tcf_common	common;
+	unsigned char		tcfp_nkeys;
+	unsigned char		tcfp_flags;
+	struct tc_pedit_key	*tcfp_keys;
 };
+#define to_pedit(pc) \
+	container_of(pc, struct tcf_pedit, common)
 
-#endif
+#endif /* __NET_TC_PED_H */
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 6990747d6d5ac..835070e9169cc 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -33,16 +33,230 @@
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 
-#if 0 /* control */
-#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args)
-#else
-#define DPRINTK(format, args...)
+void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+{
+	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
+	struct tcf_common **p1p;
+
+	for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
+		if (*p1p == p) {
+			write_lock_bh(hinfo->lock);
+			*p1p = p->tcfc_next;
+			write_unlock_bh(hinfo->lock);
+#ifdef CONFIG_NET_ESTIMATOR
+			gen_kill_estimator(&p->tcfc_bstats,
+					   &p->tcfc_rate_est);
 #endif
-#if 0 /* data */
-#define D2PRINTK(format, args...) printk(KERN_DEBUG format, ##args)
-#else
-#define D2PRINTK(format, args...)
+			kfree(p);
+			return;
+		}
+	}
+	BUG_TRAP(0);
+}
+EXPORT_SYMBOL(tcf_hash_destroy);
+
+int tcf_hash_release(struct tcf_common *p, int bind,
+		     struct tcf_hashinfo *hinfo)
+{
+	int ret = 0;
+
+	if (p) {
+		if (bind)
+			p->tcfc_bindcnt--;
+
+		p->tcfc_refcnt--;
+	       	if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
+			tcf_hash_destroy(p, hinfo);
+			ret = 1;
+		}
+	}
+	return ret;
+}
+EXPORT_SYMBOL(tcf_hash_release);
+
+static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
+			   struct tc_action *a, struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p;
+	int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
+	struct rtattr *r ;
+
+	read_lock(hinfo->lock);
+
+	s_i = cb->args[0];
+
+	for (i = 0; i < (hinfo->hmask + 1); i++) {
+		p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
+
+		for (; p; p = p->tcfc_next) {
+			index++;
+			if (index < s_i)
+				continue;
+			a->priv = p;
+			a->order = n_i;
+			r = (struct rtattr*) skb->tail;
+			RTA_PUT(skb, a->order, 0, NULL);
+			err = tcf_action_dump_1(skb, a, 0, 0);
+			if (err < 0) {
+				index--;
+				skb_trim(skb, (u8*)r - skb->data);
+				goto done;
+			}
+			r->rta_len = skb->tail - (u8*)r;
+			n_i++;
+			if (n_i >= TCA_ACT_MAX_PRIO)
+				goto done;
+		}
+	}
+done:
+	read_unlock(hinfo->lock);
+	if (n_i)
+		cb->args[0] += n_i;
+	return n_i;
+
+rtattr_failure:
+	skb_trim(skb, (u8*)r - skb->data);
+	goto done;
+}
+
+static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
+			  struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p, *s_p;
+	struct rtattr *r ;
+	int i= 0, n_i = 0;
+
+	r = (struct rtattr*) skb->tail;
+	RTA_PUT(skb, a->order, 0, NULL);
+	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
+	for (i = 0; i < (hinfo->hmask + 1); i++) {
+		p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
+
+		while (p != NULL) {
+			s_p = p->tcfc_next;
+			if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
+				 module_put(a->ops->owner);
+			n_i++;
+			p = s_p;
+		}
+	}
+	RTA_PUT(skb, TCA_FCNT, 4, &n_i);
+	r->rta_len = skb->tail - (u8*)r;
+
+	return n_i;
+rtattr_failure:
+	skb_trim(skb, (u8*)r - skb->data);
+	return -EINVAL;
+}
+
+int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+		       int type, struct tc_action *a)
+{
+	struct tcf_hashinfo *hinfo = a->ops->hinfo;
+
+	if (type == RTM_DELACTION) {
+		return tcf_del_walker(skb, a, hinfo);
+	} else if (type == RTM_GETACTION) {
+		return tcf_dump_walker(skb, cb, a, hinfo);
+	} else {
+		printk("tcf_generic_walker: unknown action %d\n", type);
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(tcf_generic_walker);
+
+struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p;
+
+	read_lock(hinfo->lock);
+	for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
+	     p = p->tcfc_next) {
+		if (p->tcfc_index == index)
+			break;
+	}
+	read_unlock(hinfo->lock);
+
+	return p;
+}
+EXPORT_SYMBOL(tcf_hash_lookup);
+
+u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo)
+{
+	u32 val = *idx_gen;
+
+	do {
+		if (++val == 0)
+			val = 1;
+	} while (tcf_hash_lookup(val, hinfo));
+
+	return (*idx_gen = val);
+}
+EXPORT_SYMBOL(tcf_hash_new_index);
+
+int tcf_hash_search(struct tc_action *a, u32 index)
+{
+	struct tcf_hashinfo *hinfo = a->ops->hinfo;
+	struct tcf_common *p = tcf_hash_lookup(index, hinfo);
+
+	if (p) {
+		a->priv = p;
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(tcf_hash_search);
+
+struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
+				  struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p = NULL;
+	if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
+		if (bind) {
+			p->tcfc_bindcnt++;
+			p->tcfc_refcnt++;
+		}
+		a->priv = p;
+	}
+	return p;
+}
+EXPORT_SYMBOL(tcf_hash_check);
+
+struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p = kzalloc(size, GFP_KERNEL);
+
+	if (unlikely(!p))
+		return p;
+	p->tcfc_refcnt = 1;
+	if (bind)
+		p->tcfc_bindcnt = 1;
+
+	spin_lock_init(&p->tcfc_lock);
+	p->tcfc_stats_lock = &p->tcfc_lock;
+	p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
+	p->tcfc_tm.install = jiffies;
+	p->tcfc_tm.lastuse = jiffies;
+#ifdef CONFIG_NET_ESTIMATOR
+	if (est)
+		gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
+				  p->tcfc_stats_lock, est);
 #endif
+	a->priv = (void *) p;
+	return p;
+}
+EXPORT_SYMBOL(tcf_hash_create);
+
+void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+{
+	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
+
+	write_lock_bh(hinfo->lock);
+	p->tcfc_next = hinfo->htab[h];
+	hinfo->htab[h] = p;
+	write_unlock_bh(hinfo->lock);
+}
+EXPORT_SYMBOL(tcf_hash_insert);
 
 static struct tc_action_ops *act_base = NULL;
 static DEFINE_RWLOCK(act_mod_lock);
@@ -155,9 +369,6 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
 
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
-		D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n",
-		         skb, skb->input_dev ? skb->input_dev->name : "xxx",
-		         skb->dev->name);
 		ret = TC_ACT_OK;
 		goto exec_done;
 	}
@@ -187,8 +398,6 @@ void tcf_action_destroy(struct tc_action *act, int bind)
 
 	for (a = act; a; a = act) {
 		if (a->ops && a->ops->cleanup) {
-			DPRINTK("tcf_action_destroy destroying %p next %p\n",
-			        a, a->next);
 			if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
 				module_put(a->ops->owner);
 			act = act->next;
@@ -331,7 +540,6 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
 	if (*err != ACT_P_CREATED)
 		module_put(a_o->owner);
 	a->ops = a_o;
-	DPRINTK("tcf_action_init_1: successfull %s\n", act_name);
 
 	*err = 0;
 	return a;
@@ -392,12 +600,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
 	if (compat_mode) {
 		if (a->type == TCA_OLD_COMPAT)
 			err = gnet_stats_start_copy_compat(skb, 0,
-				TCA_STATS, TCA_XSTATS, h->stats_lock, &d);
+				TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d);
 		else
 			return 0;
 	} else
 		err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
-			h->stats_lock, &d);
+			h->tcf_stats_lock, &d);
 
 	if (err < 0)
 		goto errout;
@@ -406,11 +614,11 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
 		if (a->ops->get_stats(skb, a) < 0)
 			goto errout;
 
-	if (gnet_stats_copy_basic(&d, &h->bstats) < 0 ||
+	if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
 #ifdef CONFIG_NET_ESTIMATOR
-	    gnet_stats_copy_rate_est(&d, &h->rate_est) < 0 ||
+	    gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 ||
 #endif
-	    gnet_stats_copy_queue(&d, &h->qstats) < 0)
+	    gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0)
 		goto errout;
 
 	if (gnet_stats_finish_copy(&d) < 0)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e75a147ad60fc..6cff56696a81d 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -34,48 +34,43 @@
 #include <linux/tc_act/tc_gact.h>
 #include <net/tc_act/tc_gact.h>
 
-/* use generic hash table */
-#define MY_TAB_SIZE	16
-#define MY_TAB_MASK	15
-
-static u32 idx_gen;
-static struct tcf_gact *tcf_gact_ht[MY_TAB_SIZE];
+#define GACT_TAB_MASK	15
+static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1];
+static u32 gact_idx_gen;
 static DEFINE_RWLOCK(gact_lock);
 
-/* ovewrride the defaults */
-#define tcf_st		tcf_gact
-#define tc_st		tc_gact
-#define tcf_t_lock	gact_lock
-#define tcf_ht		tcf_gact_ht
-
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
+static struct tcf_hashinfo gact_hash_info = {
+	.htab	=	tcf_gact_ht,
+	.hmask	=	GACT_TAB_MASK,
+	.lock	=	&gact_lock,
+};
 
 #ifdef CONFIG_GACT_PROB
-static int gact_net_rand(struct tcf_gact *p)
+static int gact_net_rand(struct tcf_gact *gact)
 {
-	if (net_random()%p->pval)
-		return p->action;
-	return p->paction;
+	if (net_random() % gact->tcfg_pval)
+		return gact->tcf_action;
+	return gact->tcfg_paction;
 }
 
-static int gact_determ(struct tcf_gact *p)
+static int gact_determ(struct tcf_gact *gact)
 {
-	if (p->bstats.packets%p->pval)
-		return p->action;
-	return p->paction;
+	if (gact->tcf_bstats.packets % gact->tcfg_pval)
+		return gact->tcf_action;
+	return gact->tcfg_paction;
 }
 
-typedef int (*g_rand)(struct tcf_gact *p);
+typedef int (*g_rand)(struct tcf_gact *gact);
 static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
-#endif
+#endif /* CONFIG_GACT_PROB */
 
 static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
                          struct tc_action *a, int ovr, int bind)
 {
 	struct rtattr *tb[TCA_GACT_MAX];
 	struct tc_gact *parm;
-	struct tcf_gact *p;
+	struct tcf_gact *gact;
+	struct tcf_common *pc;
 	int ret = 0;
 
 	if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0)
@@ -94,105 +89,106 @@ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
 		return -EOPNOTSUPP;
 #endif
 
-	p = tcf_hash_check(parm->index, a, ovr, bind);
-	if (p == NULL) {
-		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
-		if (p == NULL)
+	pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
+				     bind, &gact_idx_gen, &gact_hash_info);
+		if (unlikely(!pc))
 			return -ENOMEM;
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
-			tcf_hash_release(p, bind);
+			tcf_hash_release(pc, bind, &gact_hash_info);
 			return -EEXIST;
 		}
 	}
 
-	spin_lock_bh(&p->lock);
-	p->action = parm->action;
+	gact = to_gact(pc);
+
+	spin_lock_bh(&gact->tcf_lock);
+	gact->tcf_action = parm->action;
 #ifdef CONFIG_GACT_PROB
 	if (tb[TCA_GACT_PROB-1] != NULL) {
 		struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]);
-		p->paction = p_parm->paction;
-		p->pval    = p_parm->pval;
-		p->ptype   = p_parm->ptype;
+		gact->tcfg_paction = p_parm->paction;
+		gact->tcfg_pval    = p_parm->pval;
+		gact->tcfg_ptype   = p_parm->ptype;
 	}
 #endif
-	spin_unlock_bh(&p->lock);
+	spin_unlock_bh(&gact->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(p);
+		tcf_hash_insert(pc, &gact_hash_info);
 	return ret;
 }
 
-static int
-tcf_gact_cleanup(struct tc_action *a, int bind)
+static int tcf_gact_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_gact *p = PRIV(a, gact);
+	struct tcf_gact *gact = a->priv;
 
-	if (p != NULL)
-		return tcf_hash_release(p, bind);
+	if (gact)
+		return tcf_hash_release(&gact->common, bind, &gact_hash_info);
 	return 0;
 }
 
-static int
-tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 {
-	struct tcf_gact *p = PRIV(a, gact);
+	struct tcf_gact *gact = a->priv;
 	int action = TC_ACT_SHOT;
 
-	spin_lock(&p->lock);
+	spin_lock(&gact->tcf_lock);
 #ifdef CONFIG_GACT_PROB
-	if (p->ptype && gact_rand[p->ptype] != NULL)
-		action = gact_rand[p->ptype](p);
+	if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL)
+		action = gact_rand[gact->tcfg_ptype](gact);
 	else
-		action = p->action;
+		action = gact->tcf_action;
 #else
-	action = p->action;
+	action = gact->tcf_action;
 #endif
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
+	gact->tcf_bstats.bytes += skb->len;
+	gact->tcf_bstats.packets++;
 	if (action == TC_ACT_SHOT)
-		p->qstats.drops++;
-	p->tm.lastuse = jiffies;
-	spin_unlock(&p->lock);
+		gact->tcf_qstats.drops++;
+	gact->tcf_tm.lastuse = jiffies;
+	spin_unlock(&gact->tcf_lock);
 
 	return action;
 }
 
-static int
-tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char *b = skb->tail;
 	struct tc_gact opt;
-	struct tcf_gact *p = PRIV(a, gact);
+	struct tcf_gact *gact = a->priv;
 	struct tcf_t t;
 
-	opt.index = p->index;
-	opt.refcnt = p->refcnt - ref;
-	opt.bindcnt = p->bindcnt - bind;
-	opt.action = p->action;
+	opt.index = gact->tcf_index;
+	opt.refcnt = gact->tcf_refcnt - ref;
+	opt.bindcnt = gact->tcf_bindcnt - bind;
+	opt.action = gact->tcf_action;
 	RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
 #ifdef CONFIG_GACT_PROB
-	if (p->ptype) {
+	if (gact->tcfg_ptype) {
 		struct tc_gact_p p_opt;
-		p_opt.paction = p->paction;
-		p_opt.pval = p->pval;
-		p_opt.ptype = p->ptype;
+		p_opt.paction = gact->tcfg_paction;
+		p_opt.pval = gact->tcfg_pval;
+		p_opt.ptype = gact->tcfg_ptype;
 		RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
 	}
 #endif
-	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
-	t.expires = jiffies_to_clock_t(p->tm.expires);
+	t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
 	RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
 	return skb->len;
 
-      rtattr_failure:
+rtattr_failure:
 	skb_trim(skb, b - skb->data);
 	return -1;
 }
 
 static struct tc_action_ops act_gact_ops = {
 	.kind		=	"gact",
+	.hinfo		=	&gact_hash_info,
 	.type		=	TCA_ACT_GACT,
 	.capab		=	TCA_CAP_NONE,
 	.owner		=	THIS_MODULE,
@@ -208,8 +204,7 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
 MODULE_DESCRIPTION("Generic Classifier actions");
 MODULE_LICENSE("GPL");
 
-static int __init
-gact_init_module(void)
+static int __init gact_init_module(void)
 {
 #ifdef CONFIG_GACT_PROB
 	printk("GACT probability on\n");
@@ -219,8 +214,7 @@ gact_init_module(void)
 	return tcf_register_action(&act_gact_ops);
 }
 
-static void __exit
-gact_cleanup_module(void)
+static void __exit gact_cleanup_module(void)
 {
 	tcf_unregister_action(&act_gact_ops);
 }
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d799e01248c4e..224c078a398e7 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -38,25 +38,19 @@
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 
-/* use generic hash table */
-#define MY_TAB_SIZE     16
-#define MY_TAB_MASK     15
 
-static u32 idx_gen;
-static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE];
-/* ipt hash table lock */
+#define IPT_TAB_MASK     15
+static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1];
+static u32 ipt_idx_gen;
 static DEFINE_RWLOCK(ipt_lock);
 
-/* ovewrride the defaults */
-#define tcf_st		tcf_ipt
-#define tcf_t_lock	ipt_lock
-#define tcf_ht		tcf_ipt_ht
-
-#define CONFIG_NET_ACT_INIT
-#include <net/pkt_act.h>
+static struct tcf_hashinfo ipt_hash_info = {
+	.htab	=	tcf_ipt_ht,
+	.hmask	=	IPT_TAB_MASK,
+	.lock	=	&ipt_lock,
+};
 
-static int
-ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
+static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
 {
 	struct ipt_target *target;
 	int ret = 0;
@@ -65,7 +59,6 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
 	if (!target)
 		return -ENOENT;
 
-	DPRINTK("ipt_init_target: found %s\n", target->name);
 	t->u.kernel.target = target;
 
 	ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
@@ -78,8 +71,6 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
 		    			       t->u.kernel.target, t->data,
 					       t->u.target_size - sizeof(*t),
 					       hook)) {
-		DPRINTK("ipt_init_target: check failed for `%s'.\n",
-			t->u.kernel.target->name);
 		module_put(t->u.kernel.target->me);
 		ret = -EINVAL;
 	}
@@ -87,8 +78,7 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
 	return ret;
 }
 
-static void
-ipt_destroy_target(struct ipt_entry_target *t)
+static void ipt_destroy_target(struct ipt_entry_target *t)
 {
 	if (t->u.kernel.target->destroy)
 		t->u.kernel.target->destroy(t->u.kernel.target, t->data,
@@ -96,31 +86,30 @@ ipt_destroy_target(struct ipt_entry_target *t)
         module_put(t->u.kernel.target->me);
 }
 
-static int
-tcf_ipt_release(struct tcf_ipt *p, int bind)
+static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
 {
 	int ret = 0;
-	if (p) {
+	if (ipt) {
 		if (bind)
-			p->bindcnt--;
-		p->refcnt--;
-		if (p->bindcnt <= 0 && p->refcnt <= 0) {
-			ipt_destroy_target(p->t);
-			kfree(p->tname);
-			kfree(p->t);
-			tcf_hash_destroy(p);
+			ipt->tcf_bindcnt--;
+		ipt->tcf_refcnt--;
+		if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) {
+			ipt_destroy_target(ipt->tcfi_t);
+			kfree(ipt->tcfi_tname);
+			kfree(ipt->tcfi_t);
+			tcf_hash_destroy(&ipt->common, &ipt_hash_info);
 			ret = ACT_P_DELETED;
 		}
 	}
 	return ret;
 }
 
-static int
-tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
-             int ovr, int bind)
+static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
+			struct tc_action *a, int ovr, int bind)
 {
 	struct rtattr *tb[TCA_IPT_MAX];
-	struct tcf_ipt *p;
+	struct tcf_ipt *ipt;
+	struct tcf_common *pc;
 	struct ipt_entry_target *td, *t;
 	char *tname;
 	int ret = 0, err;
@@ -144,49 +133,51 @@ tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
 	    RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32))
 		index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
 
-	p = tcf_hash_check(index, a, ovr, bind);
-	if (p == NULL) {
-		p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind);
-		if (p == NULL)
+	pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
+				     &ipt_idx_gen, &ipt_hash_info);
+		if (unlikely(!pc))
 			return -ENOMEM;
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
-			tcf_ipt_release(p, bind);
+			tcf_ipt_release(to_ipt(pc), bind);
 			return -EEXIST;
 		}
 	}
+	ipt = to_ipt(pc);
 
 	hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]);
 
 	err = -ENOMEM;
 	tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
-	if (tname == NULL)
+	if (unlikely(!tname))
 		goto err1;
 	if (tb[TCA_IPT_TABLE - 1] == NULL ||
 	    rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ)
 		strcpy(tname, "mangle");
 
 	t = kmalloc(td->u.target_size, GFP_KERNEL);
-	if (t == NULL)
+	if (unlikely(!t))
 		goto err2;
 	memcpy(t, td, td->u.target_size);
 
 	if ((err = ipt_init_target(t, tname, hook)) < 0)
 		goto err3;
 
-	spin_lock_bh(&p->lock);
+	spin_lock_bh(&ipt->tcf_lock);
 	if (ret != ACT_P_CREATED) {
-		ipt_destroy_target(p->t);
-		kfree(p->tname);
-		kfree(p->t);
+		ipt_destroy_target(ipt->tcfi_t);
+		kfree(ipt->tcfi_tname);
+		kfree(ipt->tcfi_t);
 	}
-	p->tname = tname;
-	p->t     = t;
-	p->hook  = hook;
-	spin_unlock_bh(&p->lock);
+	ipt->tcfi_tname = tname;
+	ipt->tcfi_t     = t;
+	ipt->tcfi_hook  = hook;
+	spin_unlock_bh(&ipt->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(p);
+		tcf_hash_insert(pc, &ipt_hash_info);
 	return ret;
 
 err3:
@@ -194,33 +185,32 @@ tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
 err2:
 	kfree(tname);
 err1:
-	kfree(p);
+	kfree(pc);
 	return err;
 }
 
-static int
-tcf_ipt_cleanup(struct tc_action *a, int bind)
+static int tcf_ipt_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_ipt *p = PRIV(a, ipt);
-	return tcf_ipt_release(p, bind);
+	struct tcf_ipt *ipt = a->priv;
+	return tcf_ipt_release(ipt, bind);
 }
 
-static int
-tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
+		   struct tcf_result *res)
 {
 	int ret = 0, result = 0;
-	struct tcf_ipt *p = PRIV(a, ipt);
+	struct tcf_ipt *ipt = a->priv;
 
 	if (skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 			return TC_ACT_UNSPEC;
 	}
 
-	spin_lock(&p->lock);
+	spin_lock(&ipt->tcf_lock);
 
-	p->tm.lastuse = jiffies;
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
+	ipt->tcf_tm.lastuse = jiffies;
+	ipt->tcf_bstats.bytes += skb->len;
+	ipt->tcf_bstats.packets++;
 
 	/* yes, we have to worry about both in and out dev
 	 worry later - danger - this API seems to have changed
@@ -229,16 +219,17 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 	/* iptables targets take a double skb pointer in case the skb
 	 * needs to be replaced. We don't own the skb, so this must not
 	 * happen. The pskb_expand_head above should make sure of this */
-	ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL, p->hook,
-					    p->t->u.kernel.target, p->t->data,
-					    NULL);
+	ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL,
+						   ipt->tcfi_hook,
+						   ipt->tcfi_t->u.kernel.target,
+						   ipt->tcfi_t->data, NULL);
 	switch (ret) {
 	case NF_ACCEPT:
 		result = TC_ACT_OK;
 		break;
 	case NF_DROP:
 		result = TC_ACT_SHOT;
-		p->qstats.drops++;
+		ipt->tcf_qstats.drops++;
 		break;
 	case IPT_CONTINUE:
 		result = TC_ACT_PIPE;
@@ -249,53 +240,46 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 		result = TC_POLICE_OK;
 		break;
 	}
-	spin_unlock(&p->lock);
+	spin_unlock(&ipt->tcf_lock);
 	return result;
 
 }
 
-static int
-tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
+	unsigned char *b = skb->tail;
+	struct tcf_ipt *ipt = a->priv;
 	struct ipt_entry_target *t;
 	struct tcf_t tm;
 	struct tc_cnt c;
-	unsigned char *b = skb->tail;
-	struct tcf_ipt *p = PRIV(a, ipt);
 
 	/* for simple targets kernel size == user size
 	** user name = target name
 	** for foolproof you need to not assume this
 	*/
 
-	t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC);
-	if (t == NULL)
+	t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
+	if (unlikely(!t))
 		goto rtattr_failure;
 
-	c.bindcnt = p->bindcnt - bind;
-	c.refcnt = p->refcnt - ref;
-	memcpy(t, p->t, p->t->u.user.target_size);
-	strcpy(t->u.user.name, p->t->u.kernel.target->name);
-
-	DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname,
-		strlen(p->tname));
-	DPRINTK("\tdump target name %s size %d size user %d "
-	        "data[0] %x data[1] %x\n", p->t->u.kernel.target->name,
-	        p->t->u.target_size, p->t->u.user.target_size,
-	        p->t->data[0], p->t->data[1]);
-	RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t);
-	RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index);
-	RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook);
+	c.bindcnt = ipt->tcf_bindcnt - bind;
+	c.refcnt = ipt->tcf_refcnt - ref;
+	memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size);
+	strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
+
+	RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
+	RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index);
+	RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook);
 	RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
-	RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, p->tname);
-	tm.install = jiffies_to_clock_t(jiffies - p->tm.install);
-	tm.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
-	tm.expires = jiffies_to_clock_t(p->tm.expires);
+	RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname);
+	tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
+	tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
+	tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
 	RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
 	kfree(t);
 	return skb->len;
 
-      rtattr_failure:
+rtattr_failure:
 	skb_trim(skb, b - skb->data);
 	kfree(t);
 	return -1;
@@ -303,6 +287,7 @@ tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 
 static struct tc_action_ops act_ipt_ops = {
 	.kind		=	"ipt",
+	.hinfo		=	&ipt_hash_info,
 	.type		=	TCA_ACT_IPT,
 	.capab		=	TCA_CAP_NONE,
 	.owner		=	THIS_MODULE,
@@ -318,14 +303,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
 MODULE_DESCRIPTION("Iptables target actions");
 MODULE_LICENSE("GPL");
 
-static int __init
-ipt_init_module(void)
+static int __init ipt_init_module(void)
 {
 	return tcf_register_action(&act_ipt_ops);
 }
 
-static void __exit
-ipt_cleanup_module(void)
+static void __exit ipt_cleanup_module(void)
 {
 	tcf_unregister_action(&act_ipt_ops);
 }
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fc562047ecc50..483897271f159 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -39,46 +39,39 @@
 #include <linux/etherdevice.h>
 #include <linux/if_arp.h>
 
-
-/* use generic hash table */
-#define MY_TAB_SIZE     8
-#define MY_TAB_MASK     (MY_TAB_SIZE - 1)
-static u32 idx_gen;
-static struct tcf_mirred *tcf_mirred_ht[MY_TAB_SIZE];
+#define MIRRED_TAB_MASK     7
+static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1];
+static u32 mirred_idx_gen;
 static DEFINE_RWLOCK(mirred_lock);
 
-/* ovewrride the defaults */
-#define tcf_st		tcf_mirred
-#define tc_st		tc_mirred
-#define tcf_t_lock	mirred_lock
-#define tcf_ht		tcf_mirred_ht
-
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
+static struct tcf_hashinfo mirred_hash_info = {
+	.htab	=	tcf_mirred_ht,
+	.hmask	=	MIRRED_TAB_MASK,
+	.lock	=	&mirred_lock,
+};
 
-static inline int
-tcf_mirred_release(struct tcf_mirred *p, int bind)
+static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
 {
-	if (p) {
+	if (m) {
 		if (bind)
-			p->bindcnt--;
-		p->refcnt--;
-		if(!p->bindcnt && p->refcnt <= 0) {
-			dev_put(p->dev);
-			tcf_hash_destroy(p);
+			m->tcf_bindcnt--;
+		m->tcf_refcnt--;
+		if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
+			dev_put(m->tcfm_dev);
+			tcf_hash_destroy(&m->common, &mirred_hash_info);
 			return 1;
 		}
 	}
 	return 0;
 }
 
-static int
-tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
-                int ovr, int bind)
+static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est,
+			   struct tc_action *a, int ovr, int bind)
 {
 	struct rtattr *tb[TCA_MIRRED_MAX];
 	struct tc_mirred *parm;
-	struct tcf_mirred *p;
+	struct tcf_mirred *m;
+	struct tcf_common *pc;
 	struct net_device *dev = NULL;
 	int ret = 0;
 	int ok_push = 0;
@@ -110,64 +103,62 @@ tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
 		}
 	}
 
-	p = tcf_hash_check(parm->index, a, ovr, bind);
-	if (p == NULL) {
+	pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info);
+	if (!pc) {
 		if (!parm->ifindex)
 			return -EINVAL;
-		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
-		if (p == NULL)
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind,
+				     &mirred_idx_gen, &mirred_hash_info);
+		if (unlikely(!pc))
 			return -ENOMEM;
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
-			tcf_mirred_release(p, bind);
+			tcf_mirred_release(to_mirred(pc), bind);
 			return -EEXIST;
 		}
 	}
+	m = to_mirred(pc);
 
-	spin_lock_bh(&p->lock);
-	p->action = parm->action;
-	p->eaction = parm->eaction;
+	spin_lock_bh(&m->tcf_lock);
+	m->tcf_action = parm->action;
+	m->tcfm_eaction = parm->eaction;
 	if (parm->ifindex) {
-		p->ifindex = parm->ifindex;
+		m->tcfm_ifindex = parm->ifindex;
 		if (ret != ACT_P_CREATED)
-			dev_put(p->dev);
-		p->dev = dev;
+			dev_put(m->tcfm_dev);
+		m->tcfm_dev = dev;
 		dev_hold(dev);
-		p->ok_push = ok_push;
+		m->tcfm_ok_push = ok_push;
 	}
-	spin_unlock_bh(&p->lock);
+	spin_unlock_bh(&m->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(p);
+		tcf_hash_insert(pc, &mirred_hash_info);
 
-	DPRINTK("tcf_mirred_init index %d action %d eaction %d device %s "
-	        "ifindex %d\n", parm->index, parm->action, parm->eaction,
-	        dev->name, parm->ifindex);
 	return ret;
 }
 
-static int
-tcf_mirred_cleanup(struct tc_action *a, int bind)
+static int tcf_mirred_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_mirred *p = PRIV(a, mirred);
+	struct tcf_mirred *m = a->priv;
 
-	if (p != NULL)
-		return tcf_mirred_release(p, bind);
+	if (m)
+		return tcf_mirred_release(m, bind);
 	return 0;
 }
 
-static int
-tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
+		      struct tcf_result *res)
 {
-	struct tcf_mirred *p = PRIV(a, mirred);
+	struct tcf_mirred *m = a->priv;
 	struct net_device *dev;
 	struct sk_buff *skb2 = NULL;
 	u32 at = G_TC_AT(skb->tc_verd);
 
-	spin_lock(&p->lock);
+	spin_lock(&m->tcf_lock);
 
-	dev = p->dev;
-	p->tm.lastuse = jiffies;
+	dev = m->tcfm_dev;
+	m->tcf_tm.lastuse = jiffies;
 
 	if (!(dev->flags&IFF_UP) ) {
 		if (net_ratelimit())
@@ -176,10 +167,10 @@ tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 bad_mirred:
 		if (skb2 != NULL)
 			kfree_skb(skb2);
-		p->qstats.overlimits++;
-		p->bstats.bytes += skb->len;
-		p->bstats.packets++;
-		spin_unlock(&p->lock);
+		m->tcf_qstats.overlimits++;
+		m->tcf_bstats.bytes += skb->len;
+		m->tcf_bstats.packets++;
+		spin_unlock(&m->tcf_lock);
 		/* should we be asking for packet to be dropped?
 		 * may make sense for redirect case only
 		*/
@@ -189,59 +180,59 @@ tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 	skb2 = skb_clone(skb, GFP_ATOMIC);
 	if (skb2 == NULL)
 		goto bad_mirred;
-	if (p->eaction != TCA_EGRESS_MIRROR && p->eaction != TCA_EGRESS_REDIR) {
+	if (m->tcfm_eaction != TCA_EGRESS_MIRROR &&
+	    m->tcfm_eaction != TCA_EGRESS_REDIR) {
 		if (net_ratelimit())
-			printk("tcf_mirred unknown action %d\n", p->eaction);
+			printk("tcf_mirred unknown action %d\n",
+			       m->tcfm_eaction);
 		goto bad_mirred;
 	}
 
-	p->bstats.bytes += skb2->len;
-	p->bstats.packets++;
+	m->tcf_bstats.bytes += skb2->len;
+	m->tcf_bstats.packets++;
 	if (!(at & AT_EGRESS))
-		if (p->ok_push)
+		if (m->tcfm_ok_push)
 			skb_push(skb2, skb2->dev->hard_header_len);
 
 	/* mirror is always swallowed */
-	if (p->eaction != TCA_EGRESS_MIRROR)
+	if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
 		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
 
 	skb2->dev = dev;
 	skb2->input_dev = skb->dev;
 	dev_queue_xmit(skb2);
-	spin_unlock(&p->lock);
-	return p->action;
+	spin_unlock(&m->tcf_lock);
+	return m->tcf_action;
 }
 
-static int
-tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char *b = skb->tail;
+	struct tcf_mirred *m = a->priv;
 	struct tc_mirred opt;
-	struct tcf_mirred *p = PRIV(a, mirred);
 	struct tcf_t t;
 
-	opt.index = p->index;
-	opt.action = p->action;
-	opt.refcnt = p->refcnt - ref;
-	opt.bindcnt = p->bindcnt - bind;
-	opt.eaction = p->eaction;
-	opt.ifindex = p->ifindex;
-	DPRINTK("tcf_mirred_dump index %d action %d eaction %d ifindex %d\n",
-	         p->index, p->action, p->eaction, p->ifindex);
+	opt.index = m->tcf_index;
+	opt.action = m->tcf_action;
+	opt.refcnt = m->tcf_refcnt - ref;
+	opt.bindcnt = m->tcf_bindcnt - bind;
+	opt.eaction = m->tcfm_eaction;
+	opt.ifindex = m->tcfm_ifindex;
 	RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
-	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
-	t.expires = jiffies_to_clock_t(p->tm.expires);
+	t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
 	RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
 	return skb->len;
 
-      rtattr_failure:
+rtattr_failure:
 	skb_trim(skb, b - skb->data);
 	return -1;
 }
 
 static struct tc_action_ops act_mirred_ops = {
 	.kind		=	"mirred",
+	.hinfo		=	&mirred_hash_info,
 	.type		=	TCA_ACT_MIRRED,
 	.capab		=	TCA_CAP_NONE,
 	.owner		=	THIS_MODULE,
@@ -257,15 +248,13 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002)");
 MODULE_DESCRIPTION("Device Mirror/redirect actions");
 MODULE_LICENSE("GPL");
 
-static int __init
-mirred_init_module(void)
+static int __init mirred_init_module(void)
 {
 	printk("Mirror/redirect action on\n");
 	return tcf_register_action(&act_mirred_ops);
 }
 
-static void __exit
-mirred_cleanup_module(void)
+static void __exit mirred_cleanup_module(void)
 {
 	tcf_unregister_action(&act_mirred_ops);
 }
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index f257475e0e0c9..8ac65c219b980 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -33,32 +33,25 @@
 #include <linux/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_pedit.h>
 
-
-#define PEDIT_DEB 1
-
-/* use generic hash table */
-#define MY_TAB_SIZE     16
-#define MY_TAB_MASK     15
-static u32 idx_gen;
-static struct tcf_pedit *tcf_pedit_ht[MY_TAB_SIZE];
+#define PEDIT_TAB_MASK	15
+static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1];
+static u32 pedit_idx_gen;
 static DEFINE_RWLOCK(pedit_lock);
 
-#define tcf_st		tcf_pedit
-#define tc_st		tc_pedit
-#define tcf_t_lock	pedit_lock
-#define tcf_ht		tcf_pedit_ht
-
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
+static struct tcf_hashinfo pedit_hash_info = {
+	.htab	=	tcf_pedit_ht,
+	.hmask	=	PEDIT_TAB_MASK,
+	.lock	=	&pedit_lock,
+};
 
-static int
-tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
-               int ovr, int bind)
+static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est,
+			  struct tc_action *a, int ovr, int bind)
 {
 	struct rtattr *tb[TCA_PEDIT_MAX];
 	struct tc_pedit *parm;
 	int ret = 0;
 	struct tcf_pedit *p;
+	struct tcf_common *pc;
 	struct tc_pedit_key *keys = NULL;
 	int ksize;
 
@@ -73,54 +66,56 @@ tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
 	if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize)
 		return -EINVAL;
 
-	p = tcf_hash_check(parm->index, a, ovr, bind);
-	if (p == NULL) {
+	pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info);
+	if (!pc) {
 		if (!parm->nkeys)
 			return -EINVAL;
-		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
-		if (p == NULL)
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
+				     &pedit_idx_gen, &pedit_hash_info);
+		if (unlikely(!pc))
 			return -ENOMEM;
+		p = to_pedit(pc);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
-			kfree(p);
+			kfree(pc);
 			return -ENOMEM;
 		}
 		ret = ACT_P_CREATED;
 	} else {
+		p = to_pedit(pc);
 		if (!ovr) {
-			tcf_hash_release(p, bind);
+			tcf_hash_release(pc, bind, &pedit_hash_info);
 			return -EEXIST;
 		}
-		if (p->nkeys && p->nkeys != parm->nkeys) {
+		if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
 			keys = kmalloc(ksize, GFP_KERNEL);
 			if (keys == NULL)
 				return -ENOMEM;
 		}
 	}
 
-	spin_lock_bh(&p->lock);
-	p->flags = parm->flags;
-	p->action = parm->action;
+	spin_lock_bh(&p->tcf_lock);
+	p->tcfp_flags = parm->flags;
+	p->tcf_action = parm->action;
 	if (keys) {
-		kfree(p->keys);
-		p->keys = keys;
-		p->nkeys = parm->nkeys;
+		kfree(p->tcfp_keys);
+		p->tcfp_keys = keys;
+		p->tcfp_nkeys = parm->nkeys;
 	}
-	memcpy(p->keys, parm->keys, ksize);
-	spin_unlock_bh(&p->lock);
+	memcpy(p->tcfp_keys, parm->keys, ksize);
+	spin_unlock_bh(&p->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(p);
+		tcf_hash_insert(pc, &pedit_hash_info);
 	return ret;
 }
 
-static int
-tcf_pedit_cleanup(struct tc_action *a, int bind)
+static int tcf_pedit_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_pedit *p = PRIV(a, pedit);
+	struct tcf_pedit *p = a->priv;
 
-	if (p != NULL) {
-		struct tc_pedit_key *keys = p->keys;
-		if (tcf_hash_release(p, bind)) {
+	if (p) {
+		struct tc_pedit_key *keys = p->tcfp_keys;
+		if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) {
 			kfree(keys);
 			return 1;
 		}
@@ -128,30 +123,30 @@ tcf_pedit_cleanup(struct tc_action *a, int bind)
 	return 0;
 }
 
-static int
-tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
+		     struct tcf_result *res)
 {
-	struct tcf_pedit *p = PRIV(a, pedit);
+	struct tcf_pedit *p = a->priv;
 	int i, munged = 0;
 	u8 *pptr;
 
 	if (!(skb->tc_verd & TC_OK2MUNGE)) {
 		/* should we set skb->cloned? */
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
-			return p->action;
+			return p->tcf_action;
 		}
 	}
 
 	pptr = skb->nh.raw;
 
-	spin_lock(&p->lock);
+	spin_lock(&p->tcf_lock);
 
-	p->tm.lastuse = jiffies;
+	p->tcf_tm.lastuse = jiffies;
 
-	if (p->nkeys > 0) {
-		struct tc_pedit_key *tkey = p->keys;
+	if (p->tcfp_nkeys > 0) {
+		struct tc_pedit_key *tkey = p->tcfp_keys;
 
-		for (i = p->nkeys; i > 0; i--, tkey++) {
+		for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
 			u32 *ptr;
 			int offset = tkey->off;
 
@@ -169,7 +164,8 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 				printk("offset must be on 32 bit boundaries\n");
 				goto bad;
 			}
-			if (skb->len < 0 || (offset > 0 && offset > skb->len)) {
+			if (skb->len < 0 ||
+			    (offset > 0 && offset > skb->len)) {
 				printk("offset %d cant exceed pkt length %d\n",
 				       offset, skb->len);
 				goto bad;
@@ -185,63 +181,47 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 			skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
 		goto done;
 	} else {
-		printk("pedit BUG: index %d\n",p->index);
+		printk("pedit BUG: index %d\n", p->tcf_index);
 	}
 
 bad:
-	p->qstats.overlimits++;
+	p->tcf_qstats.overlimits++;
 done:
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
-	spin_unlock(&p->lock);
-	return p->action;
+	p->tcf_bstats.bytes += skb->len;
+	p->tcf_bstats.packets++;
+	spin_unlock(&p->tcf_lock);
+	return p->tcf_action;
 }
 
-static int
-tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref)
+static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
+			  int bind, int ref)
 {
 	unsigned char *b = skb->tail;
+	struct tcf_pedit *p = a->priv;
 	struct tc_pedit *opt;
-	struct tcf_pedit *p = PRIV(a, pedit);
 	struct tcf_t t;
 	int s; 
 		
-	s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key);
+	s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key);
 
 	/* netlink spinlocks held above us - must use ATOMIC */
 	opt = kzalloc(s, GFP_ATOMIC);
-	if (opt == NULL)
+	if (unlikely(!opt))
 		return -ENOBUFS;
 
-	memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key));
-	opt->index = p->index;
-	opt->nkeys = p->nkeys;
-	opt->flags = p->flags;
-	opt->action = p->action;
-	opt->refcnt = p->refcnt - ref;
-	opt->bindcnt = p->bindcnt - bind;
-
-
-#ifdef PEDIT_DEB
-	{                
-		/* Debug - get rid of later */
-		int i;
-		struct tc_pedit_key *key = opt->keys;
-
-		for (i=0; i<opt->nkeys; i++, key++) {
-			printk( "\n key #%d",i);
-			printk( "  at %d: val %08x mask %08x",
-			(unsigned int)key->off,
-			(unsigned int)key->val,
-			(unsigned int)key->mask);
-		}
-	}
-#endif
+	memcpy(opt->keys, p->tcfp_keys,
+	       p->tcfp_nkeys * sizeof(struct tc_pedit_key));
+	opt->index = p->tcf_index;
+	opt->nkeys = p->tcfp_nkeys;
+	opt->flags = p->tcfp_flags;
+	opt->action = p->tcf_action;
+	opt->refcnt = p->tcf_refcnt - ref;
+	opt->bindcnt = p->tcf_bindcnt - bind;
 
 	RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
-	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
-	t.expires = jiffies_to_clock_t(p->tm.expires);
+	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
 	RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
 	kfree(opt);
 	return skb->len;
@@ -252,9 +232,9 @@ tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref)
 	return -1;
 }
 
-static
-struct tc_action_ops act_pedit_ops = {
+static struct tc_action_ops act_pedit_ops = {
 	.kind		=	"pedit",
+	.hinfo		=	&pedit_hash_info,
 	.type		=	TCA_ACT_PEDIT,
 	.capab		=	TCA_CAP_NONE,
 	.owner		=	THIS_MODULE,
@@ -270,14 +250,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
 MODULE_DESCRIPTION("Generic Packet Editor actions");
 MODULE_LICENSE("GPL");
 
-static int __init
-pedit_init_module(void)
+static int __init pedit_init_module(void)
 {
 	return tcf_register_action(&act_pedit_ops);
 }
 
-static void __exit
-pedit_cleanup_module(void)
+static void __exit pedit_cleanup_module(void)
 {
 	tcf_unregister_action(&act_pedit_ops);
 }
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index da905d7b4b40f..fed47b658837a 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -32,43 +32,27 @@
 #include <net/sock.h>
 #include <net/act_api.h>
 
-#define L2T(p,L)   ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
-#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
-#define PRIV(a) ((struct tcf_police *) (a)->priv)
-
-/* use generic hash table */
-#define MY_TAB_SIZE     16
-#define MY_TAB_MASK     15
-static u32 idx_gen;
-static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
-/* Policer hash table lock */
-static DEFINE_RWLOCK(police_lock);
-
-/* Each policer is serialized by its individual spinlock */
+#define L2T(p,L)   ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
+#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
 
-static __inline__ unsigned tcf_police_hash(u32 index)
-{
-	return index&0xF;
-}
+#define POL_TAB_MASK     15
+static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
+static u32 police_idx_gen;
+static DEFINE_RWLOCK(police_lock);
 
-static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
-{
-	struct tcf_police *p;
+static struct tcf_hashinfo police_hash_info = {
+	.htab	=	tcf_police_ht,
+	.hmask	=	POL_TAB_MASK,
+	.lock	=	&police_lock,
+};
 
-	read_lock(&police_lock);
-	for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
-		if (p->index == index)
-			break;
-	}
-	read_unlock(&police_lock);
-	return p;
-}
+/* Each policer is serialized by its individual spinlock */
 
 #ifdef CONFIG_NET_CLS_ACT
 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
                               int type, struct tc_action *a)
 {
-	struct tcf_police *p;
+	struct tcf_common *p;
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
 	struct rtattr *r;
 
@@ -76,10 +60,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 
 	s_i = cb->args[0];
 
-	for (i = 0; i < MY_TAB_SIZE; i++) {
-		p = tcf_police_ht[tcf_police_hash(i)];
+	for (i = 0; i < (POL_TAB_MASK + 1); i++) {
+		p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
 
-		for (; p; p = p->next) {
+		for (; p; p = p->tcfc_next) {
 			index++;
 			if (index < s_i)
 				continue;
@@ -110,48 +94,26 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 	skb_trim(skb, (u8*)r - skb->data);
 	goto done;
 }
-
-static inline int
-tcf_act_police_hash_search(struct tc_action *a, u32 index)
-{
-	struct tcf_police *p = tcf_police_lookup(index);
-
-	if (p != NULL) {
-		a->priv = p;
-		return 1;
-	} else {
-		return 0;
-	}
-}
 #endif
 
-static inline u32 tcf_police_new_index(void)
-{
-	do {
-		if (++idx_gen == 0)
-			idx_gen = 1;
-	} while (tcf_police_lookup(idx_gen));
-
-	return idx_gen;
-}
-
 void tcf_police_destroy(struct tcf_police *p)
 {
-	unsigned h = tcf_police_hash(p->index);
-	struct tcf_police **p1p;
+	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
+	struct tcf_common **p1p;
 	
-	for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
-		if (*p1p == p) {
+	for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
+		if (*p1p == &p->common) {
 			write_lock_bh(&police_lock);
-			*p1p = p->next;
+			*p1p = p->tcf_next;
 			write_unlock_bh(&police_lock);
 #ifdef CONFIG_NET_ESTIMATOR
-			gen_kill_estimator(&p->bstats, &p->rate_est);
+			gen_kill_estimator(&p->tcf_bstats,
+					   &p->tcf_rate_est);
 #endif
-			if (p->R_tab)
-				qdisc_put_rtab(p->R_tab);
-			if (p->P_tab)
-				qdisc_put_rtab(p->P_tab);
+			if (p->tcfp_R_tab)
+				qdisc_put_rtab(p->tcfp_R_tab);
+			if (p->tcfp_P_tab)
+				qdisc_put_rtab(p->tcfp_P_tab);
 			kfree(p);
 			return;
 		}
@@ -167,7 +129,7 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
 	int ret = 0, err;
 	struct rtattr *tb[TCA_POLICE_MAX];
 	struct tc_police *parm;
-	struct tcf_police *p;
+	struct tcf_police *police;
 	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
 
 	if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
@@ -185,27 +147,32 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
 	    RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
 		return -EINVAL;
 
-	if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
-		a->priv = p;
-		if (bind) {
-			p->bindcnt += 1;
-			p->refcnt += 1;
+	if (parm->index) {
+		struct tcf_common *pc;
+
+		pc = tcf_hash_lookup(parm->index, &police_hash_info);
+		if (pc != NULL) {
+			a->priv = pc;
+			police = to_police(pc);
+			if (bind) {
+				police->tcf_bindcnt += 1;
+				police->tcf_refcnt += 1;
+			}
+			if (ovr)
+				goto override;
+			return ret;
 		}
-		if (ovr)
-			goto override;
-		return ret;
 	}
 
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
-	if (p == NULL)
+	police = kzalloc(sizeof(*police), GFP_KERNEL);
+	if (police == NULL)
 		return -ENOMEM;
-
 	ret = ACT_P_CREATED;
-	p->refcnt = 1;
-	spin_lock_init(&p->lock);
-	p->stats_lock = &p->lock;
+	police->tcf_refcnt = 1;
+	spin_lock_init(&police->tcf_lock);
+	police->tcf_stats_lock = &police->tcf_lock;
 	if (bind)
-		p->bindcnt = 1;
+		police->tcf_bindcnt = 1;
 override:
 	if (parm->rate.rate) {
 		err = -ENOMEM;
@@ -215,67 +182,71 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
 		if (parm->peakrate.rate) {
 			P_tab = qdisc_get_rtab(&parm->peakrate,
 					       tb[TCA_POLICE_PEAKRATE-1]);
-			if (p->P_tab == NULL) {
+			if (P_tab == NULL) {
 				qdisc_put_rtab(R_tab);
 				goto failure;
 			}
 		}
 	}
 	/* No failure allowed after this point */
-	spin_lock_bh(&p->lock);
+	spin_lock_bh(&police->tcf_lock);
 	if (R_tab != NULL) {
-		qdisc_put_rtab(p->R_tab);
-		p->R_tab = R_tab;
+		qdisc_put_rtab(police->tcfp_R_tab);
+		police->tcfp_R_tab = R_tab;
 	}
 	if (P_tab != NULL) {
-		qdisc_put_rtab(p->P_tab);
-		p->P_tab = P_tab;
+		qdisc_put_rtab(police->tcfp_P_tab);
+		police->tcfp_P_tab = P_tab;
 	}
 
 	if (tb[TCA_POLICE_RESULT-1])
-		p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
-	p->toks = p->burst = parm->burst;
-	p->mtu = parm->mtu;
-	if (p->mtu == 0) {
-		p->mtu = ~0;
-		if (p->R_tab)
-			p->mtu = 255<<p->R_tab->rate.cell_log;
+		police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+	police->tcfp_toks = police->tcfp_burst = parm->burst;
+	police->tcfp_mtu = parm->mtu;
+	if (police->tcfp_mtu == 0) {
+		police->tcfp_mtu = ~0;
+		if (police->tcfp_R_tab)
+			police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
 	}
-	if (p->P_tab)
-		p->ptoks = L2T_P(p, p->mtu);
-	p->action = parm->action;
+	if (police->tcfp_P_tab)
+		police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
+	police->tcf_action = parm->action;
 
 #ifdef CONFIG_NET_ESTIMATOR
 	if (tb[TCA_POLICE_AVRATE-1])
-		p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+		police->tcfp_ewma_rate =
+			*(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
 	if (est)
-		gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+		gen_replace_estimator(&police->tcf_bstats,
+				      &police->tcf_rate_est,
+				      police->tcf_stats_lock, est);
 #endif
 
-	spin_unlock_bh(&p->lock);
+	spin_unlock_bh(&police->tcf_lock);
 	if (ret != ACT_P_CREATED)
 		return ret;
 
-	PSCHED_GET_TIME(p->t_c);
-	p->index = parm->index ? : tcf_police_new_index();
-	h = tcf_police_hash(p->index);
+	PSCHED_GET_TIME(police->tcfp_t_c);
+	police->tcf_index = parm->index ? parm->index :
+		tcf_hash_new_index(&police_idx_gen, &police_hash_info);
+	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
 	write_lock_bh(&police_lock);
-	p->next = tcf_police_ht[h];
-	tcf_police_ht[h] = p;
+	police->tcf_next = tcf_police_ht[h];
+	tcf_police_ht[h] = &police->common;
 	write_unlock_bh(&police_lock);
 
-	a->priv = p;
+	a->priv = police;
 	return ret;
 
 failure:
 	if (ret == ACT_P_CREATED)
-		kfree(p);
+		kfree(police);
 	return err;
 }
 
 static int tcf_act_police_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_police *p = PRIV(a);
+	struct tcf_police *p = a->priv;
 
 	if (p != NULL)
 		return tcf_police_release(p, bind);
@@ -285,86 +256,87 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind)
 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
                           struct tcf_result *res)
 {
+	struct tcf_police *police = a->priv;
 	psched_time_t now;
-	struct tcf_police *p = PRIV(a);
 	long toks;
 	long ptoks = 0;
 
-	spin_lock(&p->lock);
+	spin_lock(&police->tcf_lock);
 
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
+	police->tcf_bstats.bytes += skb->len;
+	police->tcf_bstats.packets++;
 
 #ifdef CONFIG_NET_ESTIMATOR
-	if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
-		p->qstats.overlimits++;
-		spin_unlock(&p->lock);
-		return p->action;
+	if (police->tcfp_ewma_rate &&
+	    police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
+		police->tcf_qstats.overlimits++;
+		spin_unlock(&police->tcf_lock);
+		return police->tcf_action;
 	}
 #endif
 
-	if (skb->len <= p->mtu) {
-		if (p->R_tab == NULL) {
-			spin_unlock(&p->lock);
-			return p->result;
+	if (skb->len <= police->tcfp_mtu) {
+		if (police->tcfp_R_tab == NULL) {
+			spin_unlock(&police->tcf_lock);
+			return police->tcfp_result;
 		}
 
 		PSCHED_GET_TIME(now);
 
-		toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
-
-		if (p->P_tab) {
-			ptoks = toks + p->ptoks;
-			if (ptoks > (long)L2T_P(p, p->mtu))
-				ptoks = (long)L2T_P(p, p->mtu);
-			ptoks -= L2T_P(p, skb->len);
+		toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
+					 police->tcfp_burst);
+		if (police->tcfp_P_tab) {
+			ptoks = toks + police->tcfp_ptoks;
+			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
+				ptoks = (long)L2T_P(police, police->tcfp_mtu);
+			ptoks -= L2T_P(police, skb->len);
 		}
-		toks += p->toks;
-		if (toks > (long)p->burst)
-			toks = p->burst;
-		toks -= L2T(p, skb->len);
-
+		toks += police->tcfp_toks;
+		if (toks > (long)police->tcfp_burst)
+			toks = police->tcfp_burst;
+		toks -= L2T(police, skb->len);
 		if ((toks|ptoks) >= 0) {
-			p->t_c = now;
-			p->toks = toks;
-			p->ptoks = ptoks;
-			spin_unlock(&p->lock);
-			return p->result;
+			police->tcfp_t_c = now;
+			police->tcfp_toks = toks;
+			police->tcfp_ptoks = ptoks;
+			spin_unlock(&police->tcf_lock);
+			return police->tcfp_result;
 		}
 	}
 
-	p->qstats.overlimits++;
-	spin_unlock(&p->lock);
-	return p->action;
+	police->tcf_qstats.overlimits++;
+	spin_unlock(&police->tcf_lock);
+	return police->tcf_action;
 }
 
 static int
 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char	 *b = skb->tail;
+	struct tcf_police *police = a->priv;
 	struct tc_police opt;
-	struct tcf_police *p = PRIV(a);
-
-	opt.index = p->index;
-	opt.action = p->action;
-	opt.mtu = p->mtu;
-	opt.burst = p->burst;
-	opt.refcnt = p->refcnt - ref;
-	opt.bindcnt = p->bindcnt - bind;
-	if (p->R_tab)
-		opt.rate = p->R_tab->rate;
+
+	opt.index = police->tcf_index;
+	opt.action = police->tcf_action;
+	opt.mtu = police->tcfp_mtu;
+	opt.burst = police->tcfp_burst;
+	opt.refcnt = police->tcf_refcnt - ref;
+	opt.bindcnt = police->tcf_bindcnt - bind;
+	if (police->tcfp_R_tab)
+		opt.rate = police->tcfp_R_tab->rate;
 	else
 		memset(&opt.rate, 0, sizeof(opt.rate));
-	if (p->P_tab)
-		opt.peakrate = p->P_tab->rate;
+	if (police->tcfp_P_tab)
+		opt.peakrate = police->tcfp_P_tab->rate;
 	else
 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
 	RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
-	if (p->result)
-		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+	if (police->tcfp_result)
+		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
+			&police->tcfp_result);
 #ifdef CONFIG_NET_ESTIMATOR
-	if (p->ewma_rate)
-		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+	if (police->tcfp_ewma_rate)
+		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
 #endif
 	return skb->len;
 
@@ -379,13 +351,14 @@ MODULE_LICENSE("GPL");
 
 static struct tc_action_ops act_police_ops = {
 	.kind		=	"police",
+	.hinfo		=	&police_hash_info,
 	.type		=	TCA_ID_POLICE,
 	.capab		=	TCA_CAP_NONE,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_act_police,
 	.dump		=	tcf_act_police_dump,
 	.cleanup	=	tcf_act_police_cleanup,
-	.lookup		=	tcf_act_police_hash_search,
+	.lookup		=	tcf_hash_search,
 	.init		=	tcf_act_police_locate,
 	.walk		=	tcf_act_police_walker
 };
@@ -407,10 +380,39 @@ module_exit(police_cleanup_module);
 
 #else /* CONFIG_NET_CLS_ACT */
 
-struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
+static struct tcf_common *tcf_police_lookup(u32 index)
 {
-	unsigned h;
-	struct tcf_police *p;
+	struct tcf_hashinfo *hinfo = &police_hash_info;
+	struct tcf_common *p;
+
+	read_lock(hinfo->lock);
+	for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
+	     p = p->tcfc_next) {
+		if (p->tcfc_index == index)
+			break;
+	}
+	read_unlock(hinfo->lock);
+
+	return p;
+}
+
+static u32 tcf_police_new_index(void)
+{
+	u32 *idx_gen = &police_idx_gen;
+	u32 val = *idx_gen;
+
+	do {
+		if (++val == 0)
+			val = 1;
+	} while (tcf_police_lookup(val));
+
+	return (*idx_gen = val);
+}
+
+struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
+{
+	unsigned int h;
+	struct tcf_police *police;
 	struct rtattr *tb[TCA_POLICE_MAX];
 	struct tc_police *parm;
 
@@ -423,149 +425,158 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
 
 	parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
 
-	if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
-		p->refcnt++;
-		return p;
-	}
+	if (parm->index) {
+		struct tcf_common *pc;
 
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
-	if (p == NULL)
+		pc = tcf_police_lookup(parm->index);
+		if (pc) {
+			police = to_police(pc);
+			police->tcf_refcnt++;
+			return police;
+		}
+	}
+	police = kzalloc(sizeof(*police), GFP_KERNEL);
+	if (unlikely(!police))
 		return NULL;
 
-	p->refcnt = 1;
-	spin_lock_init(&p->lock);
-	p->stats_lock = &p->lock;
+	police->tcf_refcnt = 1;
+	spin_lock_init(&police->tcf_lock);
+	police->tcf_stats_lock = &police->tcf_lock;
 	if (parm->rate.rate) {
-		p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
-		if (p->R_tab == NULL)
+		police->tcfp_R_tab =
+			qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
+		if (police->tcfp_R_tab == NULL)
 			goto failure;
 		if (parm->peakrate.rate) {
-			p->P_tab = qdisc_get_rtab(&parm->peakrate,
-			                          tb[TCA_POLICE_PEAKRATE-1]);
-			if (p->P_tab == NULL)
+			police->tcfp_P_tab =
+				qdisc_get_rtab(&parm->peakrate,
+					       tb[TCA_POLICE_PEAKRATE-1]);
+			if (police->tcfp_P_tab == NULL)
 				goto failure;
 		}
 	}
 	if (tb[TCA_POLICE_RESULT-1]) {
 		if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
 			goto failure;
-		p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+		police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
 	}
 #ifdef CONFIG_NET_ESTIMATOR
 	if (tb[TCA_POLICE_AVRATE-1]) {
 		if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
 			goto failure;
-		p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+		police->tcfp_ewma_rate =
+			*(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
 	}
 #endif
-	p->toks = p->burst = parm->burst;
-	p->mtu = parm->mtu;
-	if (p->mtu == 0) {
-		p->mtu = ~0;
-		if (p->R_tab)
-			p->mtu = 255<<p->R_tab->rate.cell_log;
+	police->tcfp_toks = police->tcfp_burst = parm->burst;
+	police->tcfp_mtu = parm->mtu;
+	if (police->tcfp_mtu == 0) {
+		police->tcfp_mtu = ~0;
+		if (police->tcfp_R_tab)
+			police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
 	}
-	if (p->P_tab)
-		p->ptoks = L2T_P(p, p->mtu);
-	PSCHED_GET_TIME(p->t_c);
-	p->index = parm->index ? : tcf_police_new_index();
-	p->action = parm->action;
+	if (police->tcfp_P_tab)
+		police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
+	PSCHED_GET_TIME(police->tcfp_t_c);
+	police->tcf_index = parm->index ? parm->index :
+		tcf_police_new_index();
+	police->tcf_action = parm->action;
 #ifdef CONFIG_NET_ESTIMATOR
 	if (est)
-		gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+		gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est,
+				  police->tcf_stats_lock, est);
 #endif
-	h = tcf_police_hash(p->index);
+	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
 	write_lock_bh(&police_lock);
-	p->next = tcf_police_ht[h];
-	tcf_police_ht[h] = p;
+	police->tcf_next = tcf_police_ht[h];
+	tcf_police_ht[h] = &police->common;
 	write_unlock_bh(&police_lock);
-	return p;
+	return police;
 
 failure:
-	if (p->R_tab)
-		qdisc_put_rtab(p->R_tab);
-	kfree(p);
+	if (police->tcfp_R_tab)
+		qdisc_put_rtab(police->tcfp_R_tab);
+	kfree(police);
 	return NULL;
 }
 
-int tcf_police(struct sk_buff *skb, struct tcf_police *p)
+int tcf_police(struct sk_buff *skb, struct tcf_police *police)
 {
 	psched_time_t now;
 	long toks;
 	long ptoks = 0;
 
-	spin_lock(&p->lock);
+	spin_lock(&police->tcf_lock);
 
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
+	police->tcf_bstats.bytes += skb->len;
+	police->tcf_bstats.packets++;
 
 #ifdef CONFIG_NET_ESTIMATOR
-	if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
-		p->qstats.overlimits++;
-		spin_unlock(&p->lock);
-		return p->action;
+	if (police->tcfp_ewma_rate &&
+	    police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
+		police->tcf_qstats.overlimits++;
+		spin_unlock(&police->tcf_lock);
+		return police->tcf_action;
 	}
 #endif
-
-	if (skb->len <= p->mtu) {
-		if (p->R_tab == NULL) {
-			spin_unlock(&p->lock);
-			return p->result;
+	if (skb->len <= police->tcfp_mtu) {
+		if (police->tcfp_R_tab == NULL) {
+			spin_unlock(&police->tcf_lock);
+			return police->tcfp_result;
 		}
 
 		PSCHED_GET_TIME(now);
-
-		toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
-
-		if (p->P_tab) {
-			ptoks = toks + p->ptoks;
-			if (ptoks > (long)L2T_P(p, p->mtu))
-				ptoks = (long)L2T_P(p, p->mtu);
-			ptoks -= L2T_P(p, skb->len);
+		toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
+					 police->tcfp_burst);
+		if (police->tcfp_P_tab) {
+			ptoks = toks + police->tcfp_ptoks;
+			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
+				ptoks = (long)L2T_P(police, police->tcfp_mtu);
+			ptoks -= L2T_P(police, skb->len);
 		}
-		toks += p->toks;
-		if (toks > (long)p->burst)
-			toks = p->burst;
-		toks -= L2T(p, skb->len);
-
+		toks += police->tcfp_toks;
+		if (toks > (long)police->tcfp_burst)
+			toks = police->tcfp_burst;
+		toks -= L2T(police, skb->len);
 		if ((toks|ptoks) >= 0) {
-			p->t_c = now;
-			p->toks = toks;
-			p->ptoks = ptoks;
-			spin_unlock(&p->lock);
-			return p->result;
+			police->tcfp_t_c = now;
+			police->tcfp_toks = toks;
+			police->tcfp_ptoks = ptoks;
+			spin_unlock(&police->tcf_lock);
+			return police->tcfp_result;
 		}
 	}
 
-	p->qstats.overlimits++;
-	spin_unlock(&p->lock);
-	return p->action;
+	police->tcf_qstats.overlimits++;
+	spin_unlock(&police->tcf_lock);
+	return police->tcf_action;
 }
 EXPORT_SYMBOL(tcf_police);
 
-int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
+int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb->tail;
 	struct tc_police opt;
 
-	opt.index = p->index;
-	opt.action = p->action;
-	opt.mtu = p->mtu;
-	opt.burst = p->burst;
-	if (p->R_tab)
-		opt.rate = p->R_tab->rate;
+	opt.index = police->tcf_index;
+	opt.action = police->tcf_action;
+	opt.mtu = police->tcfp_mtu;
+	opt.burst = police->tcfp_burst;
+	if (police->tcfp_R_tab)
+		opt.rate = police->tcfp_R_tab->rate;
 	else
 		memset(&opt.rate, 0, sizeof(opt.rate));
-	if (p->P_tab)
-		opt.peakrate = p->P_tab->rate;
+	if (police->tcfp_P_tab)
+		opt.peakrate = police->tcfp_P_tab->rate;
 	else
 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
 	RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
-	if (p->result)
-		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+	if (police->tcfp_result)
+		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
+			&police->tcfp_result);
 #ifdef CONFIG_NET_ESTIMATOR
-	if (p->ewma_rate)
-		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+	if (police->tcfp_ewma_rate)
+		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
 #endif
 	return skb->len;
 
@@ -574,19 +585,20 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
 	return -1;
 }
 
-int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
+int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police)
 {
 	struct gnet_dump d;
 	
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
-			TCA_XSTATS, p->stats_lock, &d) < 0)
+					 TCA_XSTATS, police->tcf_stats_lock,
+					 &d) < 0)
 		goto errout;
 	
-	if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
+	if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 ||
 #ifdef CONFIG_NET_ESTIMATOR
-	    gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
+	    gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 ||
 #endif
-	    gnet_stats_copy_queue(&d, &p->qstats) < 0)
+	    gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0)
 		goto errout;
 
 	if (gnet_stats_finish_copy(&d) < 0)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 17105c82537fa..8c1ab8ad8fa6d 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -20,54 +20,175 @@
 
 #define TCA_ACT_SIMP 22
 
-/* XXX: Hide all these common elements under some macro 
- * probably
-*/
 #include <linux/tc_act/tc_defact.h>
 #include <net/tc_act/tc_defact.h>
 
-/* use generic hash table with 8 buckets */
-#define MY_TAB_SIZE     8
-#define MY_TAB_MASK     (MY_TAB_SIZE - 1)
-static u32 idx_gen;
-static struct tcf_defact *tcf_simp_ht[MY_TAB_SIZE];
+#define SIMP_TAB_MASK     7
+static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1];
+static u32 simp_idx_gen;
 static DEFINE_RWLOCK(simp_lock);
 
-/* override the defaults */
-#define tcf_st		tcf_defact
-#define tc_st		tc_defact
-#define tcf_t_lock	simp_lock
-#define tcf_ht		tcf_simp_ht
-
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
-#include <net/act_generic.h>
+struct tcf_hashinfo simp_hash_info = {
+	.htab	=	tcf_simp_ht,
+	.hmask	=	SIMP_TAB_MASK,
+	.lock	=	&simp_lock,
+};
 
 static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 {
-	struct tcf_defact *p = PRIV(a, defact);
+	struct tcf_defact *d = a->priv;
 
-	spin_lock(&p->lock);
-	p->tm.lastuse = jiffies;
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
+	spin_lock(&d->tcf_lock);
+	d->tcf_tm.lastuse = jiffies;
+	d->tcf_bstats.bytes += skb->len;
+	d->tcf_bstats.packets++;
 
 	/* print policy string followed by _ then packet count 
 	 * Example if this was the 3rd packet and the string was "hello" 
 	 * then it would look like "hello_3" (without quotes) 
 	 **/
-	printk("simple: %s_%d\n", (char *)p->defdata, p->bstats.packets);
-	spin_unlock(&p->lock);
-	return p->action;
+	printk("simple: %s_%d\n",
+	       (char *)d->tcfd_defdata, d->tcf_bstats.packets);
+	spin_unlock(&d->tcf_lock);
+	return d->tcf_action;
+}
+
+static int tcf_simp_release(struct tcf_defact *d, int bind)
+{
+	int ret = 0;
+	if (d) {
+		if (bind)
+			d->tcf_bindcnt--;
+		d->tcf_refcnt--;
+		if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) {
+			kfree(d->tcfd_defdata);
+			tcf_hash_destroy(&d->common, &simp_hash_info);
+			ret = 1;
+		}
+	}
+	return ret;
+}
+
+static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
+{
+	d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL);
+	if (unlikely(!d->tcfd_defdata))
+		return -ENOMEM;
+	d->tcfd_datalen = datalen;
+	memcpy(d->tcfd_defdata, defdata, datalen);
+	return 0;
+}
+
+static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
+{
+	kfree(d->tcfd_defdata);
+	return alloc_defdata(d, datalen, defdata);
+}
+
+static int tcf_simp_init(struct rtattr *rta, struct rtattr *est,
+			 struct tc_action *a, int ovr, int bind)
+{
+	struct rtattr *tb[TCA_DEF_MAX];
+	struct tc_defact *parm;
+	struct tcf_defact *d;
+	struct tcf_common *pc;
+	void *defdata;
+	u32 datalen = 0;
+	int ret = 0;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_DEF_PARMS - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm))
+		return -EINVAL;
+
+	parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]);
+	defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]);
+	if (defdata == NULL)
+		return -EINVAL;
+
+	datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]);
+	if (datalen <= 0)
+		return -EINVAL;
+
+	pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
+				     &simp_idx_gen, &simp_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+
+		d = to_defact(pc);
+		ret = alloc_defdata(d, datalen, defdata);
+		if (ret < 0) {
+			kfree(pc);
+			return ret;
+		}
+		ret = ACT_P_CREATED;
+	} else {
+		d = to_defact(pc);
+		if (!ovr) {
+			tcf_simp_release(d, bind);
+			return -EEXIST;
+		}
+		realloc_defdata(d, datalen, defdata);
+	}
+
+	spin_lock_bh(&d->tcf_lock);
+	d->tcf_action = parm->action;
+	spin_unlock_bh(&d->tcf_lock);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &simp_hash_info);
+	return ret;
+}
+
+static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_defact *d = a->priv;
+
+	if (d)
+		return tcf_simp_release(d, bind);
+	return 0;
+}
+
+static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
+				int bind, int ref)
+{
+	unsigned char *b = skb->tail;
+	struct tcf_defact *d = a->priv;
+	struct tc_defact opt;
+	struct tcf_t t;
+
+	opt.index = d->tcf_index;
+	opt.refcnt = d->tcf_refcnt - ref;
+	opt.bindcnt = d->tcf_bindcnt - bind;
+	opt.action = d->tcf_action;
+	RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
+	RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata);
+	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+	RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
 }
 
 static struct tc_action_ops act_simp_ops = {
-	.kind = "simple",
-	.type = TCA_ACT_SIMP,
-	.capab = TCA_CAP_NONE,
-	.owner = THIS_MODULE,
-	.act = tcf_simp,
-	tca_use_default_ops
+	.kind		=	"simple",
+	.hinfo		=	&simp_hash_info,
+	.type		=	TCA_ACT_SIMP,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_simp,
+	.dump		=	tcf_simp_dump,
+	.cleanup	=	tcf_simp_cleanup,
+	.init		=	tcf_simp_init,
+	.walk		=	tcf_generic_walker,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2005)");
-- 
GitLab


From e0a1ad73d34fd6dfdb630479400511e9879069c0 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 22 Aug 2006 00:00:21 -0700
Subject: [PATCH 0492/1063] [IPv6] route: Simplify ip6_del_rt()

Provide a simple ip6_del_rt() for the majority of users and
an alternative for the exception via netlink. Avoids code
obfuscation.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |  5 +----
 net/ipv6/addrconf.c     |  6 +++---
 net/ipv6/ndisc.c        |  4 ++--
 net/ipv6/route.c        | 18 ++++++++++++------
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 01bfe404784f5..a7e6086a2bd42 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -69,10 +69,7 @@ extern int			ip6_ins_rt(struct rt6_info *,
 					   struct nlmsghdr *,
 					   void *rtattr,
 					   struct netlink_skb_parms *req);
-extern int			ip6_del_rt(struct rt6_info *,
-					   struct nlmsghdr *,
-					   void *rtattr,
-					   struct netlink_skb_parms *req);
+extern int			ip6_del_rt(struct rt6_info *);
 
 extern int			ip6_rt_addr_add(struct in6_addr *addr,
 						struct net_device *dev,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f1ede90048870..27f2e3309598d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -736,7 +736,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
 			if (onlink == 0) {
-				ip6_del_rt(rt, NULL, NULL, NULL);
+				ip6_del_rt(rt);
 				rt = NULL;
 			} else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
 				rt->rt6i_expires = expires;
@@ -1662,7 +1662,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
 			if (rt->rt6i_flags&RTF_EXPIRES) {
 				if (valid_lft == 0) {
-					ip6_del_rt(rt, NULL, NULL, NULL);
+					ip6_del_rt(rt);
 					rt = NULL;
 				} else {
 					rt->rt6i_expires = jiffies + rt_expires;
@@ -3557,7 +3557,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		dst_hold(&ifp->rt->u.dst);
-		if (ip6_del_rt(ifp->rt, NULL, NULL, NULL))
+		if (ip6_del_rt(ifp->rt))
 			dst_free(&ifp->rt->u.dst);
 		break;
 	}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 5743e8bffefd5..419d65163819e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -961,7 +961,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 			struct rt6_info *rt;
 			rt = rt6_get_dflt_router(saddr, dev);
 			if (rt)
-				ip6_del_rt(rt, NULL, NULL, NULL);
+				ip6_del_rt(rt);
 		}
 
 out:
@@ -1114,7 +1114,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 
 	if (rt && lifetime == 0) {
 		neigh_clone(neigh);
-		ip6_del_rt(rt, NULL, NULL, NULL);
+		ip6_del_rt(rt);
 		rt = NULL;
 	}
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1aca787ead85b..8d511de0db1b8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -457,7 +457,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
 
 	if (rt && !lifetime) {
-		ip6_del_rt(rt, NULL, NULL, NULL);
+		ip6_del_rt(rt);
 		rt = NULL;
 	}
 
@@ -813,7 +813,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE)
-			ip6_del_rt(rt, NULL, NULL, NULL);
+			ip6_del_rt(rt);
 		else
 			dst_release(dst);
 	}
@@ -1218,7 +1218,8 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 	return err;
 }
 
-int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
+			void *_rtattr, struct netlink_skb_parms *req)
 {
 	int err;
 	struct fib6_table *table;
@@ -1237,6 +1238,11 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct
 	return err;
 }
 
+int ip6_del_rt(struct rt6_info *rt)
+{
+	return __ip6_del_rt(rt, NULL, NULL, NULL);
+}
+
 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 			 void *_rtattr, struct netlink_skb_parms *req,
 			 u32 table_id)
@@ -1271,7 +1277,7 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 			dst_hold(&rt->u.dst);
 			read_unlock_bh(&table->tb6_lock);
 
-			return ip6_del_rt(rt, nlh, _rtattr, req);
+			return __ip6_del_rt(rt, nlh, _rtattr, req);
 		}
 	}
 	read_unlock_bh(&table->tb6_lock);
@@ -1395,7 +1401,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 	if (rt->rt6i_flags&RTF_CACHE) {
-		ip6_del_rt(rt, NULL, NULL, NULL);
+		ip6_del_rt(rt);
 		return;
 	}
 
@@ -1631,7 +1637,7 @@ void rt6_purge_dflt_routers(void)
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
 			dst_hold(&rt->u.dst);
 			read_unlock_bh(&table->tb6_lock);
-			ip6_del_rt(rt, NULL, NULL, NULL);
+			ip6_del_rt(rt);
 			goto restart;
 		}
 	}
-- 
GitLab


From 40e22e8f3d4d4f1ff68fb03683f007c53ee8b348 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 22 Aug 2006 00:00:45 -0700
Subject: [PATCH 0493/1063] [IPv6] route: Simplify ip6_ins_rt()

Provide a simple ip6_ins_rt() for the majority of users and
an alternative for the exception via netlink. Avoids code
obfuscation.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |  5 +----
 net/ipv6/addrconf.c     |  2 +-
 net/ipv6/anycast.c      |  2 +-
 net/ipv6/route.c        | 19 ++++++++++++-------
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index a7e6086a2bd42..172c4761e2bf3 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -65,10 +65,7 @@ extern int			ip6_route_add(struct in6_rtmsg *rtmsg,
 					      void *rtattr,
 					      struct netlink_skb_parms *req,
 					      u32 table_id);
-extern int			ip6_ins_rt(struct rt6_info *,
-					   struct nlmsghdr *,
-					   void *rtattr,
-					   struct netlink_skb_parms *req);
+extern int			ip6_ins_rt(struct rt6_info *);
 extern int			ip6_del_rt(struct rt6_info *);
 
 extern int			ip6_rt_addr_add(struct in6_addr *addr,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 27f2e3309598d..aafba9ea9cb6f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3548,7 +3548,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 
 	switch (event) {
 	case RTM_NEWADDR:
-		ip6_ins_rt(ifp->rt, NULL, NULL, NULL);
+		ip6_ins_rt(ifp->rt);
 		if (ifp->idev->cnf.forwarding)
 			addrconf_join_anycast(ifp);
 		break;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f6881d7a03859..abbc35a13e088 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -335,7 +335,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
 	write_unlock_bh(&idev->lock);
 
 	dst_hold(&rt->u.dst);
-	if (ip6_ins_rt(rt, NULL, NULL, NULL))
+	if (ip6_ins_rt(rt))
 		dst_release(&rt->u.dst);
 
 	addrconf_join_solict(dev, &aca->aca_addr);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8d511de0db1b8..9ec348a72a959 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -546,8 +546,8 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
    be destroyed.
  */
 
-int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
-		void *_rtattr, struct netlink_skb_parms *req)
+static int __ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
+			void *_rtattr, struct netlink_skb_parms *req)
 {
 	int err;
 	struct fib6_table *table;
@@ -560,6 +560,11 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
 	return err;
 }
 
+int ip6_ins_rt(struct rt6_info *rt)
+{
+	return __ip6_ins_rt(rt, NULL, NULL, NULL);
+}
+
 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
 				      struct in6_addr *saddr)
 {
@@ -657,7 +662,7 @@ static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
 
 	dst_hold(&rt->u.dst);
 	if (nrt) {
-		err = ip6_ins_rt(nrt, NULL, NULL, NULL);
+		err = ip6_ins_rt(nrt);
 		if (!err)
 			goto out2;
 	}
@@ -752,7 +757,7 @@ static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
 
 	dst_hold(&rt->u.dst);
 	if (nrt) {
-		err = ip6_ins_rt(nrt, NULL, NULL, NULL);
+		err = ip6_ins_rt(nrt);
 		if (!err)
 			goto out2;
 	}
@@ -1206,7 +1211,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 	rt->u.dst.dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
-	return ip6_ins_rt(rt, nlh, _rtattr, req);
+	return __ip6_ins_rt(rt, nlh, _rtattr, req);
 
 out:
 	if (dev)
@@ -1393,7 +1398,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
 
-	if (ip6_ins_rt(nrt, NULL, NULL, NULL))
+	if (ip6_ins_rt(nrt))
 		goto out;
 
 	netevent.old = &rt->u.dst;
@@ -1483,7 +1488,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
-		ip6_ins_rt(nrt, NULL, NULL, NULL);
+		ip6_ins_rt(nrt);
 	}
 out:
 	dst_release(&rt->u.dst);
-- 
GitLab


From 86872cb57925c46a6499887d77afb880a892c0ec Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 22 Aug 2006 00:01:08 -0700
Subject: [PATCH 0494/1063] [IPv6] route: FIB6 configuration using struct
 fib6_config

Replaces the struct in6_rtmsg based interface orignating from
the ioctl interface with a struct fib6_config based on. Allows
changing the interface without breaking the ioctl interface
and avoids passing on tons of parameters.

The recently introduced struct nl_info is used to pass on
netlink authorship information for notifications.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h   |  38 +++--
 include/net/ip6_route.h |   6 +-
 net/ipv6/addrconf.c     |  65 ++++----
 net/ipv6/ip6_fib.c      |  19 ++-
 net/ipv6/route.c        | 331 +++++++++++++++++++++++-----------------
 5 files changed, 259 insertions(+), 200 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 69c444209781e..9610b887ffb55 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -16,14 +16,35 @@
 #ifdef __KERNEL__
 
 #include <linux/ipv6_route.h>
-
-#include <net/dst.h>
-#include <net/flow.h>
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/netlink.h>
 
 struct rt6_info;
 
+struct fib6_config
+{
+	u32		fc_table;
+	u32		fc_metric;
+	int		fc_dst_len;
+	int		fc_src_len;
+	int		fc_ifindex;
+	u32		fc_flags;
+	u32		fc_protocol;
+
+	struct in6_addr	fc_dst;
+	struct in6_addr	fc_src;
+	struct in6_addr	fc_gateway;
+
+	unsigned long	fc_expires;
+	struct nlattr	*fc_mx;
+	int		fc_mx_len;
+
+	struct nl_info	fc_nlinfo;
+};
+
 struct fib6_node
 {
 	struct fib6_node	*parent;
@@ -175,18 +196,13 @@ extern void			fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
 
 extern int			fib6_add(struct fib6_node *root,
 					 struct rt6_info *rt,
-					 struct nlmsghdr *nlh,
-					 void *rtattr,
-					 struct netlink_skb_parms *req);
+					 struct nl_info *info);
 
 extern int			fib6_del(struct rt6_info *rt,
-					 struct nlmsghdr *nlh,
-					 void *rtattr,
-					 struct netlink_skb_parms *req);
+					 struct nl_info *info);
 
 extern void			inet6_rt_notify(int event, struct rt6_info *rt,
-						struct nlmsghdr *nlh,
-						struct netlink_skb_parms *req);
+						struct nl_info *info);
 
 extern void			fib6_run_gc(unsigned long dummy);
 
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 172c4761e2bf3..3f170f667c7b8 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -60,11 +60,7 @@ extern void			ip6_route_cleanup(void);
 
 extern int			ipv6_route_ioctl(unsigned int cmd, void __user *arg);
 
-extern int			ip6_route_add(struct in6_rtmsg *rtmsg,
-					      struct nlmsghdr *,
-					      void *rtattr,
-					      struct netlink_skb_parms *req,
-					      u32 table_id);
+extern int			ip6_route_add(struct fib6_config *cfg);
 extern int			ip6_ins_rt(struct rt6_info *);
 extern int			ip6_del_rt(struct rt6_info *);
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index aafba9ea9cb6f..fc9cff3426c4c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1509,59 +1509,56 @@ static void
 addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		      unsigned long expires, u32 flags)
 {
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg = {
+		.fc_table = RT6_TABLE_PREFIX,
+		.fc_metric = IP6_RT_PRIO_ADDRCONF,
+		.fc_ifindex = dev->ifindex,
+		.fc_expires = expires,
+		.fc_dst_len = plen,
+		.fc_flags = RTF_UP | flags,
+	};
 
-	memset(&rtmsg, 0, sizeof(rtmsg));
-	ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx);
-	rtmsg.rtmsg_dst_len = plen;
-	rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
-	rtmsg.rtmsg_ifindex = dev->ifindex;
-	rtmsg.rtmsg_info = expires;
-	rtmsg.rtmsg_flags = RTF_UP|flags;
-	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+	ipv6_addr_copy(&cfg.fc_dst, pfx);
 
 	/* Prevent useless cloning on PtP SIT.
 	   This thing is done here expecting that the whole
 	   class of non-broadcast devices need not cloning.
 	 */
-	if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
-		rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
+	if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
+		cfg.fc_flags |= RTF_NONEXTHOP;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_PREFIX);
+	ip6_route_add(&cfg);
 }
 
 /* Create "default" multicast route to the interface */
 
 static void addrconf_add_mroute(struct net_device *dev)
 {
-	struct in6_rtmsg rtmsg;
-
-	memset(&rtmsg, 0, sizeof(rtmsg));
-	ipv6_addr_set(&rtmsg.rtmsg_dst,
-		      htonl(0xFF000000), 0, 0, 0);
-	rtmsg.rtmsg_dst_len = 8;
-	rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
-	rtmsg.rtmsg_ifindex = dev->ifindex;
-	rtmsg.rtmsg_flags = RTF_UP;
-	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_LOCAL);
+	struct fib6_config cfg = {
+		.fc_table = RT6_TABLE_LOCAL,
+		.fc_metric = IP6_RT_PRIO_ADDRCONF,
+		.fc_ifindex = dev->ifindex,
+		.fc_dst_len = 8,
+		.fc_flags = RTF_UP,
+	};
+
+	ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
+
+	ip6_route_add(&cfg);
 }
 
 static void sit_route_add(struct net_device *dev)
 {
-	struct in6_rtmsg rtmsg;
-
-	memset(&rtmsg, 0, sizeof(rtmsg));
-
-	rtmsg.rtmsg_type	= RTMSG_NEWROUTE;
-	rtmsg.rtmsg_metric	= IP6_RT_PRIO_ADDRCONF;
+	struct fib6_config cfg = {
+		.fc_table = RT6_TABLE_MAIN,
+		.fc_metric = IP6_RT_PRIO_ADDRCONF,
+		.fc_ifindex = dev->ifindex,
+		.fc_dst_len = 96,
+		.fc_flags = RTF_UP | RTF_NONEXTHOP,
+	};
 
 	/* prefix length - 96 bits "::d.d.d.d" */
-	rtmsg.rtmsg_dst_len	= 96;
-	rtmsg.rtmsg_flags	= RTF_UP|RTF_NONEXTHOP;
-	rtmsg.rtmsg_ifindex	= dev->ifindex;
-
-	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_MAIN);
+	ip6_route_add(&cfg);
 }
 
 static void addrconf_add_lroute(struct net_device *dev)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index be36f4acda944..667b1b1ea25d0 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -610,7 +610,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
  */
 
 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
-		struct nlmsghdr *nlh,  struct netlink_skb_parms *req)
+			    struct nl_info *info)
 {
 	struct rt6_info *iter = NULL;
 	struct rt6_info **ins;
@@ -665,7 +665,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	*ins = rt;
 	rt->rt6i_node = fn;
 	atomic_inc(&rt->rt6i_ref);
-	inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req);
+	inet6_rt_notify(RTM_NEWROUTE, rt, info);
 	rt6_stats.fib_rt_entries++;
 
 	if ((fn->fn_flags & RTN_RTINFO) == 0) {
@@ -695,8 +695,7 @@ void fib6_force_start_gc(void)
  *	with source addr info in sub-trees
  */
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, 
-		struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
 {
 	struct fib6_node *fn;
 	int err = -ENOMEM;
@@ -769,7 +768,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	}
 #endif
 
-	err = fib6_add_rt2node(fn, rt, nlh, req);
+	err = fib6_add_rt2node(fn, rt, info);
 
 	if (err == 0) {
 		fib6_start_gc(rt);
@@ -1076,7 +1075,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
 }
 
 static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
-    struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+			   struct nl_info *info)
 {
 	struct fib6_walker_t *w;
 	struct rt6_info *rt = *rtp;
@@ -1132,11 +1131,11 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 		if (atomic_read(&rt->rt6i_ref) != 1) BUG();
 	}
 
-	inet6_rt_notify(RTM_DELROUTE, rt, nlh, req);
+	inet6_rt_notify(RTM_DELROUTE, rt, info);
 	rt6_release(rt);
 }
 
-int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+int fib6_del(struct rt6_info *rt, struct nl_info *info)
 {
 	struct fib6_node *fn = rt->rt6i_node;
 	struct rt6_info **rtp;
@@ -1161,7 +1160,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne
 
 	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
 		if (*rtp == rt) {
-			fib6_del_route(fn, rtp, nlh, _rtattr, req);
+			fib6_del_route(fn, rtp, info);
 			return 0;
 		}
 	}
@@ -1290,7 +1289,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
 		res = c->func(rt, c->arg);
 		if (res < 0) {
 			w->leaf = rt;
-			res = fib6_del(rt, NULL, NULL, NULL);
+			res = fib6_del(rt, NULL);
 			if (res) {
 #if RT6_DEBUG >= 2
 				printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9ec348a72a959..7bcffa6ddba37 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -546,15 +546,14 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
    be destroyed.
  */
 
-static int __ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
-			void *_rtattr, struct netlink_skb_parms *req)
+static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 {
 	int err;
 	struct fib6_table *table;
 
 	table = rt->rt6i_table;
 	write_lock_bh(&table->tb6_lock);
-	err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
+	err = fib6_add(&table->tb6_root, rt, info);
 	write_unlock_bh(&table->tb6_lock);
 
 	return err;
@@ -562,7 +561,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
 
 int ip6_ins_rt(struct rt6_info *rt)
 {
-	return __ip6_ins_rt(rt, NULL, NULL, NULL);
+	return __ip6_ins_rt(rt, NULL);
 }
 
 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
@@ -1014,30 +1013,24 @@ int ipv6_get_hoplimit(struct net_device *dev)
  *
  */
 
-int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
-		  void *_rtattr, struct netlink_skb_parms *req,
-		  u32 table_id)
+int ip6_route_add(struct fib6_config *cfg)
 {
 	int err;
-	struct rtmsg *r;
-	struct rtattr **rta;
 	struct rt6_info *rt = NULL;
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 	struct fib6_table *table;
 	int addr_type;
 
-	rta = (struct rtattr **) _rtattr;
-
-	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
+	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
 		return -EINVAL;
 #ifndef CONFIG_IPV6_SUBTREES
-	if (rtmsg->rtmsg_src_len)
+	if (cfg->fc_src_len)
 		return -EINVAL;
 #endif
-	if (rtmsg->rtmsg_ifindex) {
+	if (cfg->fc_ifindex) {
 		err = -ENODEV;
-		dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
+		dev = dev_get_by_index(cfg->fc_ifindex);
 		if (!dev)
 			goto out;
 		idev = in6_dev_get(dev);
@@ -1045,10 +1038,10 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 			goto out;
 	}
 
-	if (rtmsg->rtmsg_metric == 0)
-		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
+	if (cfg->fc_metric == 0)
+		cfg->fc_metric = IP6_RT_PRIO_USER;
 
-	table = fib6_new_table(table_id);
+	table = fib6_new_table(cfg->fc_table);
 	if (table == NULL) {
 		err = -ENOBUFS;
 		goto out;
@@ -1062,14 +1055,13 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 	}
 
 	rt->u.dst.obsolete = -1;
-	rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
-	if (nlh && (r = NLMSG_DATA(nlh))) {
-		rt->rt6i_protocol = r->rtm_protocol;
-	} else {
-		rt->rt6i_protocol = RTPROT_BOOT;
-	}
+	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
+
+	if (cfg->fc_protocol == RTPROT_UNSPEC)
+		cfg->fc_protocol = RTPROT_BOOT;
+	rt->rt6i_protocol = cfg->fc_protocol;
 
-	addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
+	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
 	if (addr_type & IPV6_ADDR_MULTICAST)
 		rt->u.dst.input = ip6_mc_input;
@@ -1078,24 +1070,22 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 
 	rt->u.dst.output = ip6_output;
 
-	ipv6_addr_prefix(&rt->rt6i_dst.addr, 
-			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
-	rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
+	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
+	rt->rt6i_dst.plen = cfg->fc_dst_len;
 	if (rt->rt6i_dst.plen == 128)
 	       rt->u.dst.flags = DST_HOST;
 
 #ifdef CONFIG_IPV6_SUBTREES
-	ipv6_addr_prefix(&rt->rt6i_src.addr, 
-			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
-	rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
+	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
+	rt->rt6i_src.plen = cfg->fc_src_len;
 #endif
 
-	rt->rt6i_metric = rtmsg->rtmsg_metric;
+	rt->rt6i_metric = cfg->fc_metric;
 
 	/* We cannot add true routes via loopback here,
 	   they would result in kernel looping; promote them to reject routes
 	 */
-	if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
+	if ((cfg->fc_flags & RTF_REJECT) ||
 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
 		/* hold loopback dev/idev if we haven't done so. */
 		if (dev != &loopback_dev) {
@@ -1118,12 +1108,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 		goto install_route;
 	}
 
-	if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
+	if (cfg->fc_flags & RTF_GATEWAY) {
 		struct in6_addr *gw_addr;
 		int gwa_type;
 
-		gw_addr = &rtmsg->rtmsg_gateway;
-		ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
+		gw_addr = &cfg->fc_gateway;
+		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
 		gwa_type = ipv6_addr_type(gw_addr);
 
 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -1140,7 +1130,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 			if (!(gwa_type&IPV6_ADDR_UNICAST))
 				goto out;
 
-			grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
+			grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
 
 			err = -EHOSTUNREACH;
 			if (grt == NULL)
@@ -1172,7 +1162,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 	if (dev == NULL)
 		goto out;
 
-	if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
+	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
 		if (IS_ERR(rt->rt6i_nexthop)) {
 			err = PTR_ERR(rt->rt6i_nexthop);
@@ -1181,24 +1171,24 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 		}
 	}
 
-	rt->rt6i_flags = rtmsg->rtmsg_flags;
+	rt->rt6i_flags = cfg->fc_flags;
 
 install_route:
-	if (rta && rta[RTA_METRICS-1]) {
-		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
-		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
-
-		while (RTA_OK(attr, attrlen)) {
-			unsigned flavor = attr->rta_type;
-			if (flavor) {
-				if (flavor > RTAX_MAX) {
+	if (cfg->fc_mx) {
+		struct nlattr *nla;
+		int remaining;
+
+		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+			int type = nla->nla_type;
+
+			if (type) {
+				if (type > RTAX_MAX) {
 					err = -EINVAL;
 					goto out;
 				}
-				rt->u.dst.metrics[flavor-1] =
-					*(u32 *)RTA_DATA(attr);
+
+				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
 			}
-			attr = RTA_NEXT(attr, attrlen);
 		}
 	}
 
@@ -1211,7 +1201,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 	rt->u.dst.dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
-	return __ip6_ins_rt(rt, nlh, _rtattr, req);
+	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
 
 out:
 	if (dev)
@@ -1223,8 +1213,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
 	return err;
 }
 
-static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
-			void *_rtattr, struct netlink_skb_parms *req)
+static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 {
 	int err;
 	struct fib6_table *table;
@@ -1235,7 +1224,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
 	table = rt->rt6i_table;
 	write_lock_bh(&table->tb6_lock);
 
-	err = fib6_del(rt, nlh, _rtattr, req);
+	err = fib6_del(rt, info);
 	dst_release(&rt->u.dst);
 
 	write_unlock_bh(&table->tb6_lock);
@@ -1245,44 +1234,41 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
 
 int ip6_del_rt(struct rt6_info *rt)
 {
-	return __ip6_del_rt(rt, NULL, NULL, NULL);
+	return __ip6_del_rt(rt, NULL);
 }
 
-static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
-			 void *_rtattr, struct netlink_skb_parms *req,
-			 u32 table_id)
+static int ip6_route_del(struct fib6_config *cfg)
 {
 	struct fib6_table *table;
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 	int err = -ESRCH;
 
-	table = fib6_get_table(table_id);
+	table = fib6_get_table(cfg->fc_table);
 	if (table == NULL)
 		return err;
 
 	read_lock_bh(&table->tb6_lock);
 
 	fn = fib6_locate(&table->tb6_root,
-			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
-			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
+			 &cfg->fc_dst, cfg->fc_dst_len,
+			 &cfg->fc_src, cfg->fc_src_len);
 	
 	if (fn) {
 		for (rt = fn->leaf; rt; rt = rt->u.next) {
-			if (rtmsg->rtmsg_ifindex &&
+			if (cfg->fc_ifindex &&
 			    (rt->rt6i_dev == NULL ||
-			     rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
+			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
 				continue;
-			if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
-			    !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
+			if (cfg->fc_flags & RTF_GATEWAY &&
+			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
 				continue;
-			if (rtmsg->rtmsg_metric &&
-			    rtmsg->rtmsg_metric != rt->rt6i_metric)
+			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
 				continue;
 			dst_hold(&rt->u.dst);
 			read_unlock_bh(&table->tb6_lock);
 
-			return __ip6_del_rt(rt, nlh, _rtattr, req);
+			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
 		}
 	}
 	read_unlock_bh(&table->tb6_lock);
@@ -1565,21 +1551,23 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
 					   struct in6_addr *gwaddr, int ifindex,
 					   unsigned pref)
 {
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg = {
+		.fc_table	= RT6_TABLE_INFO,
+		.fc_metric	= 1024,
+		.fc_ifindex	= ifindex,
+		.fc_dst_len	= prefixlen,
+		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
+				  RTF_UP | RTF_PREF(pref),
+	};
+
+	ipv6_addr_copy(&cfg.fc_dst, prefix);
+	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
 
-	memset(&rtmsg, 0, sizeof(rtmsg));
-	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-	ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
-	rtmsg.rtmsg_dst_len = prefixlen;
-	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
-	rtmsg.rtmsg_metric = 1024;
-	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
 	/* We should treat it as a default route if prefix length is 0. */
 	if (!prefixlen)
-		rtmsg.rtmsg_flags |= RTF_DEFAULT;
-	rtmsg.rtmsg_ifindex = ifindex;
+		cfg.fc_flags |= RTF_DEFAULT;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
+	ip6_route_add(&cfg);
 
 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
 }
@@ -1611,18 +1599,18 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
 				     struct net_device *dev,
 				     unsigned int pref)
 {
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg = {
+		.fc_table	= RT6_TABLE_DFLT,
+		.fc_metric	= 1024,
+		.fc_ifindex	= dev->ifindex,
+		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
+				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
+	};
 
-	memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
-	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
-	rtmsg.rtmsg_metric = 1024;
-	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
-			    RTF_PREF(pref);
+	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
 
-	rtmsg.rtmsg_ifindex = dev->ifindex;
+	ip6_route_add(&cfg);
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
 	return rt6_get_dflt_router(gwaddr, dev);
 }
 
@@ -1649,8 +1637,27 @@ void rt6_purge_dflt_routers(void)
 	read_unlock_bh(&table->tb6_lock);
 }
 
+static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
+				 struct fib6_config *cfg)
+{
+	memset(cfg, 0, sizeof(*cfg));
+
+	cfg->fc_table = RT6_TABLE_MAIN;
+	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
+	cfg->fc_metric = rtmsg->rtmsg_metric;
+	cfg->fc_expires = rtmsg->rtmsg_info;
+	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
+	cfg->fc_src_len = rtmsg->rtmsg_src_len;
+	cfg->fc_flags = rtmsg->rtmsg_flags;
+
+	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
+	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
+	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
+}
+
 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
 {
+	struct fib6_config cfg;
 	struct in6_rtmsg rtmsg;
 	int err;
 
@@ -1663,16 +1670,16 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
 				     sizeof(struct in6_rtmsg));
 		if (err)
 			return -EFAULT;
-			
+
+		rtmsg_to_fib6_config(&rtmsg, &cfg);
+
 		rtnl_lock();
 		switch (cmd) {
 		case SIOCADDRT:
-			err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
-					    RT6_TABLE_MAIN);
+			err = ip6_route_add(&cfg);
 			break;
 		case SIOCDELRT:
-			err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
-					    RT6_TABLE_MAIN);
+			err = ip6_route_del(&cfg);
 			break;
 		default:
 			err = -EINVAL;
@@ -1823,66 +1830,104 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu)
 	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
 }
 
-static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
-			      struct in6_rtmsg *rtmsg)
+static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
+	[RTA_GATEWAY]           = { .minlen = sizeof(struct in6_addr) },
+	[RTA_OIF]               = { .type = NLA_U32 },
+	[RTA_PRIORITY]          = { .type = NLA_U32 },
+	[RTA_METRICS]           = { .type = NLA_NESTED },
+};
+
+static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+			      struct fib6_config *cfg)
 {
-	memset(rtmsg, 0, sizeof(*rtmsg));
+	struct rtmsg *rtm;
+	struct nlattr *tb[RTA_MAX+1];
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+	if (err < 0)
+		goto errout;
 
-	rtmsg->rtmsg_dst_len = r->rtm_dst_len;
-	rtmsg->rtmsg_src_len = r->rtm_src_len;
-	rtmsg->rtmsg_flags = RTF_UP;
-	if (r->rtm_type == RTN_UNREACHABLE)
-		rtmsg->rtmsg_flags |= RTF_REJECT;
+	err = -EINVAL;
+	rtm = nlmsg_data(nlh);
+	memset(cfg, 0, sizeof(*cfg));
 
-	if (rta[RTA_GATEWAY-1]) {
-		if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
-		rtmsg->rtmsg_flags |= RTF_GATEWAY;
-	}
-	if (rta[RTA_DST-1]) {
-		if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
+	cfg->fc_table = rtm->rtm_table;
+	cfg->fc_dst_len = rtm->rtm_dst_len;
+	cfg->fc_src_len = rtm->rtm_src_len;
+	cfg->fc_flags = RTF_UP;
+	cfg->fc_protocol = rtm->rtm_protocol;
+
+	if (rtm->rtm_type == RTN_UNREACHABLE)
+		cfg->fc_flags |= RTF_REJECT;
+
+	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+	cfg->fc_nlinfo.nlh = nlh;
+
+	if (tb[RTA_GATEWAY]) {
+		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
+		cfg->fc_flags |= RTF_GATEWAY;
 	}
-	if (rta[RTA_SRC-1]) {
-		if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
+
+	if (tb[RTA_DST]) {
+		int plen = (rtm->rtm_dst_len + 7) >> 3;
+
+		if (nla_len(tb[RTA_DST]) < plen)
+			goto errout;
+
+		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
 	}
-	if (rta[RTA_OIF-1]) {
-		if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+
+	if (tb[RTA_SRC]) {
+		int plen = (rtm->rtm_src_len + 7) >> 3;
+
+		if (nla_len(tb[RTA_SRC]) < plen)
+			goto errout;
+
+		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
 	}
-	if (rta[RTA_PRIORITY-1]) {
-		if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
+
+	if (tb[RTA_OIF])
+		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
+
+	if (tb[RTA_PRIORITY])
+		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
+
+	if (tb[RTA_METRICS]) {
+		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
+		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
 	}
-	return 0;
+
+	if (tb[RTA_TABLE])
+		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
+
+	err = 0;
+errout:
+	return err;
 }
 
 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct rtmsg *r = NLMSG_DATA(nlh);
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg;
+	int err;
 
-	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
-		return -EINVAL;
-	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb),
-			     rtm_get_table(arg, r->rtm_table));
+	err = rtm_to_fib6_config(skb, nlh, &cfg);
+	if (err < 0)
+		return err;
+
+	return ip6_route_del(&cfg);
 }
 
 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct rtmsg *r = NLMSG_DATA(nlh);
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg;
+	int err;
 
-	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
-		return -EINVAL;
-	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb),
-			     rtm_get_table(arg, r->rtm_table));
+	err = rtm_to_fib6_config(skb, nlh, &cfg);
+	if (err < 0)
+		return err;
+
+	return ip6_route_add(&cfg);
 }
 
 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
@@ -2063,15 +2108,21 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 	goto out;	
 }
 
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
-			struct netlink_skb_parms *req)
+void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 {
 	struct sk_buff *skb;
-	u32 pid = req ? req->pid : 0;
-	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	u32 pid = 0, seq = 0;
+	struct nlmsghdr *nlh = NULL;
 	int payload = sizeof(struct rtmsg) + 256;
 	int err = -ENOBUFS;
 
+	if (info) {
+		pid = info->pid;
+		nlh = info->nlh;
+		if (nlh)
+			seq = nlh->nlmsg_seq;
+	}
+
 	skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
 	if (skb == NULL)
 		goto errout;
-- 
GitLab


From 2d7202bfdd28687073f5efef8d2f51bbab0af867 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 22 Aug 2006 00:01:27 -0700
Subject: [PATCH 0495/1063] [IPv6] route: Convert FIB6 dumping to use new
 netlink api

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 31 +++++++++++++++-----------
 net/ipv6/route.c     | 52 ++++++++++++++++++++++++--------------------
 2 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dfc58269240a6..eeff0b23e944c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -188,22 +188,27 @@ void rtnl_set_sk_err(u32 group, int error)
 
 int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 {
-	struct rtattr *mx = (struct rtattr*)skb->tail;
-	int i;
+	struct nlattr *mx;
+	int i, valid = 0;
+
+	mx = nla_nest_start(skb, RTA_METRICS);
+	if (mx == NULL)
+		return -ENOBUFS;
 
-	RTA_PUT(skb, RTA_METRICS, 0, NULL);
-	for (i=0; i<RTAX_MAX; i++) {
-		if (metrics[i])
-			RTA_PUT(skb, i+1, sizeof(u32), metrics+i);
+	for (i = 0; i < RTAX_MAX; i++) {
+		if (metrics[i]) {
+			valid++;
+			NLA_PUT_U32(skb, i+1, metrics[i]);
+		}
 	}
-	mx->rta_len = skb->tail - (u8*)mx;
-	if (mx->rta_len == RTA_LENGTH(0))
-		skb_trim(skb, (u8*)mx - skb->data);
-	return 0;
 
-rtattr_failure:
-	skb_trim(skb, (u8*)mx - skb->data);
-	return -1;
+	if (!valid)
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, mx);
+
+nla_put_failure:
+	return nla_nest_cancel(skb, mx);
 }
 
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7bcffa6ddba37..f0a66de843312 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1936,8 +1936,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 			 int prefix, unsigned int flags)
 {
 	struct rtmsg *rtm;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	struct nlmsghdr *nlh;
 	struct rta_cacheinfo ci;
 	u32 table;
 
@@ -1948,8 +1947,11 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 		}
 	}
 
-	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	rtm = nlmsg_data(nlh);
 	rtm->rtm_family = AF_INET6;
 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
 	rtm->rtm_src_len = rt->rt6i_src.plen;
@@ -1959,7 +1961,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 	else
 		table = RT6_TABLE_UNSPEC;
 	rtm->rtm_table = table;
-	RTA_PUT_U32(skb, RTA_TABLE, table);
+	NLA_PUT_U32(skb, RTA_TABLE, table);
 	if (rt->rt6i_flags&RTF_REJECT)
 		rtm->rtm_type = RTN_UNREACHABLE;
 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
@@ -1980,31 +1982,35 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 		rtm->rtm_flags |= RTM_F_CLONED;
 
 	if (dst) {
-		RTA_PUT(skb, RTA_DST, 16, dst);
+		NLA_PUT(skb, RTA_DST, 16, dst);
 	        rtm->rtm_dst_len = 128;
 	} else if (rtm->rtm_dst_len)
-		RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
 #ifdef CONFIG_IPV6_SUBTREES
 	if (src) {
-		RTA_PUT(skb, RTA_SRC, 16, src);
+		NLA_PUT(skb, RTA_SRC, 16, src);
 	        rtm->rtm_src_len = 128;
 	} else if (rtm->rtm_src_len)
-		RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
 #endif
 	if (iif)
-		RTA_PUT(skb, RTA_IIF, 4, &iif);
+		NLA_PUT_U32(skb, RTA_IIF, iif);
 	else if (dst) {
 		struct in6_addr saddr_buf;
 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
-			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
+
 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
+
 	if (rt->u.dst.neighbour)
-		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+
 	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
-	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
+		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
+
+	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
 	if (rt->rt6i_expires)
 		ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
@@ -2016,14 +2022,12 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 	ci.rta_id = 0;
 	ci.rta_ts = 0;
 	ci.rta_tsage = 0;
-	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+
+	return nlmsg_end(skb, nlh);
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
@@ -2031,8 +2035,8 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
 	int prefix;
 
-	if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
-		struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
+	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
+		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
 	} else
 		prefix = 0;
-- 
GitLab


From ab364a6f96bad9625bdb97b5688c76c44eb1e96e Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 22 Aug 2006 00:01:47 -0700
Subject: [PATCH 0496/1063] [IPv6] route: Convert GETROUTE to use new netlink
 api

Fixes various unvalidated netlink attributes causing memory
corruptions when left empty by userspace applications.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 80 ++++++++++++++++++++++++++----------------------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f0a66de843312..5d6e9083ca2cf 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1833,6 +1833,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu)
 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
 	[RTA_GATEWAY]           = { .minlen = sizeof(struct in6_addr) },
 	[RTA_OIF]               = { .type = NLA_U32 },
+	[RTA_IIF]		= { .type = NLA_U32 },
 	[RTA_PRIORITY]          = { .type = NLA_U32 },
 	[RTA_METRICS]           = { .type = NLA_NESTED },
 };
@@ -2048,68 +2049,75 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct rtattr **rta = arg;
-	int iif = 0;
-	int err = -ENOBUFS;
+	struct nlattr *tb[RTA_MAX+1];
+	struct rt6_info *rt;
 	struct sk_buff *skb;
+	struct rtmsg *rtm;
 	struct flowi fl;
-	struct rt6_info *rt;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (skb == NULL)
-		goto out;
+	int err, iif = 0;
 
-	/* Reserve room for dummy headers, this skb can pass
-	   through good chunk of routing engine.
-	 */
-	skb->mac.raw = skb->data;
-	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+	if (err < 0)
+		goto errout;
 
+	err = -EINVAL;
 	memset(&fl, 0, sizeof(fl));
-	if (rta[RTA_SRC-1])
-		ipv6_addr_copy(&fl.fl6_src,
-			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
-	if (rta[RTA_DST-1])
-		ipv6_addr_copy(&fl.fl6_dst,
-			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
 
-	if (rta[RTA_IIF-1])
-		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
+	if (tb[RTA_SRC]) {
+		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
+			goto errout;
+
+		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
+	}
+
+	if (tb[RTA_DST]) {
+		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
+			goto errout;
+
+		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
+	}
+
+	if (tb[RTA_IIF])
+		iif = nla_get_u32(tb[RTA_IIF]);
+
+	if (tb[RTA_OIF])
+		fl.oif = nla_get_u32(tb[RTA_OIF]);
 
 	if (iif) {
 		struct net_device *dev;
 		dev = __dev_get_by_index(iif);
 		if (!dev) {
 			err = -ENODEV;
-			goto out_free;
+			goto errout;
 		}
 	}
 
-	fl.oif = 0;
-	if (rta[RTA_OIF-1])
-		memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
 
-	rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
+	/* Reserve room for dummy headers, this skb can pass
+	   through good chunk of routing engine.
+	 */
+	skb->mac.raw = skb->data;
+	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
+	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
 	skb->dst = &rt->u.dst;
 
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-	err = rt6_fill_node(skb, rt, 
-			    &fl.fl6_dst, &fl.fl6_src,
-			    iif,
+	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
 			    nlh->nlmsg_seq, 0, 0);
 	if (err < 0) {
-		err = -EMSGSIZE;
-		goto out_free;
+		kfree_skb(skb);
+		goto errout;
 	}
 
 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
-out:
+errout:
 	return err;
-out_free:
-	kfree_skb(skb);
-	goto out;	
 }
 
 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
-- 
GitLab


From 72d3b2c970a2d5d2ccb1d1cab4fb76663c4f2e49 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 22 Aug 2006 00:13:07 -0700
Subject: [PATCH 0497/1063] [IPV6]: Fixup ip6_del_rt() call for new args.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/anycast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index abbc35a13e088..b80fc502ca038 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -378,7 +378,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
 	dst_hold(&aca->aca_rt->u.dst);
-	if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL))
+	if (ip6_del_rt(aca->aca_rt))
 		dst_free(&aca->aca_rt->u.dst);
 	else
 		dst_release(&aca->aca_rt->u.dst);
-- 
GitLab


From ac0b04627269ff16c3c7ab854a65fe6780c6e3e5 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Tue, 22 Aug 2006 00:15:33 -0700
Subject: [PATCH 0498/1063] [SCTP]: Extend /proc/net/sctp/snmp to provide more
 statistics.

This patch adds more statistics info under /proc/net/sctp/snmp
that should be useful for debugging. The additional events that
are counted now include timer expirations, retransmits, packet
and data chunk discards.

The Data chunk discards include all the cases where a data chunk
is discarded including high tsn, bad stream, dup tsn and the most
useful one(out of receive buffer/rwnd).

Also moved the SCTP MIB data structures from the generic include
directories to include/sctp/sctp.h.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h    | 33 -------------------------------
 include/net/sctp/sctp.h | 44 +++++++++++++++++++++++++++++++++++++++++
 include/net/snmp.h      |  6 ------
 net/sctp/input.c        |  8 ++++++--
 net/sctp/inqueue.c      |  4 ++--
 net/sctp/outqueue.c     |  6 +++++-
 net/sctp/proc.c         | 17 +++++++++++++++-
 net/sctp/sm_statefuns.c | 15 ++++++++++++++
 8 files changed, 88 insertions(+), 45 deletions(-)

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 30156556f78d7..854aa6b543f10 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -160,39 +160,6 @@ enum
 	__UDP_MIB_MAX
 };
 
-/* sctp mib definitions */
-/*
- * draft-ietf-sigtran-sctp-mib-07.txt
- */
-enum
-{
-	SCTP_MIB_NUM = 0,
-	SCTP_MIB_CURRESTAB,			/* CurrEstab */
-	SCTP_MIB_ACTIVEESTABS,			/* ActiveEstabs */
-	SCTP_MIB_PASSIVEESTABS,			/* PassiveEstabs */
-	SCTP_MIB_ABORTEDS,			/* Aborteds */
-	SCTP_MIB_SHUTDOWNS,			/* Shutdowns */
-	SCTP_MIB_OUTOFBLUES,			/* OutOfBlues */
-	SCTP_MIB_CHECKSUMERRORS,		/* ChecksumErrors */
-	SCTP_MIB_OUTCTRLCHUNKS,			/* OutCtrlChunks */
-	SCTP_MIB_OUTORDERCHUNKS,		/* OutOrderChunks */
-	SCTP_MIB_OUTUNORDERCHUNKS,		/* OutUnorderChunks */
-	SCTP_MIB_INCTRLCHUNKS,			/* InCtrlChunks */
-	SCTP_MIB_INORDERCHUNKS,			/* InOrderChunks */
-	SCTP_MIB_INUNORDERCHUNKS,		/* InUnorderChunks */
-	SCTP_MIB_FRAGUSRMSGS,			/* FragUsrMsgs */
-	SCTP_MIB_REASMUSRMSGS,			/* ReasmUsrMsgs */
-	SCTP_MIB_OUTSCTPPACKS,			/* OutSCTPPacks */
-	SCTP_MIB_INSCTPPACKS,			/* InSCTPPacks */
-	SCTP_MIB_RTOALGORITHM,			/* RtoAlgorithm */
-	SCTP_MIB_RTOMIN,			/* RtoMin */
-	SCTP_MIB_RTOMAX,			/* RtoMax */
-	SCTP_MIB_RTOINITIAL,			/* RtoInitial */
-	SCTP_MIB_VALCOOKIELIFE,			/* ValCookieLife */
-	SCTP_MIB_MAXINITRETR,			/* MaxInitRetr */
-	__SCTP_MIB_MAX
-};
-
 /* linux mib definitions */
 enum
 {
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 1c1abce5f6b64..e274fd479990b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -216,6 +216,50 @@ DECLARE_SNMP_STAT(struct sctp_mib, sctp_statistics);
 
 #endif /* !TEST_FRAME */
 
+/* sctp mib definitions */
+enum
+{
+	SCTP_MIB_NUM = 0,
+	SCTP_MIB_CURRESTAB,			/* CurrEstab */
+	SCTP_MIB_ACTIVEESTABS,			/* ActiveEstabs */
+	SCTP_MIB_PASSIVEESTABS,			/* PassiveEstabs */
+	SCTP_MIB_ABORTEDS,			/* Aborteds */
+	SCTP_MIB_SHUTDOWNS,			/* Shutdowns */
+	SCTP_MIB_OUTOFBLUES,			/* OutOfBlues */
+	SCTP_MIB_CHECKSUMERRORS,		/* ChecksumErrors */
+	SCTP_MIB_OUTCTRLCHUNKS,			/* OutCtrlChunks */
+	SCTP_MIB_OUTORDERCHUNKS,		/* OutOrderChunks */
+	SCTP_MIB_OUTUNORDERCHUNKS,		/* OutUnorderChunks */
+	SCTP_MIB_INCTRLCHUNKS,			/* InCtrlChunks */
+	SCTP_MIB_INORDERCHUNKS,			/* InOrderChunks */
+	SCTP_MIB_INUNORDERCHUNKS,		/* InUnorderChunks */
+	SCTP_MIB_FRAGUSRMSGS,			/* FragUsrMsgs */
+	SCTP_MIB_REASMUSRMSGS,			/* ReasmUsrMsgs */
+	SCTP_MIB_OUTSCTPPACKS,			/* OutSCTPPacks */
+	SCTP_MIB_INSCTPPACKS,			/* InSCTPPacks */
+	SCTP_MIB_T1_INIT_EXPIREDS,
+	SCTP_MIB_T1_COOKIE_EXPIREDS,
+	SCTP_MIB_T2_SHUTDOWN_EXPIREDS,
+	SCTP_MIB_T3_RTX_EXPIREDS,
+	SCTP_MIB_T4_RTO_EXPIREDS,
+	SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS,
+	SCTP_MIB_DELAY_SACK_EXPIREDS,
+	SCTP_MIB_AUTOCLOSE_EXPIREDS,
+	SCTP_MIB_T3_RETRANSMITS,
+	SCTP_MIB_PMTUD_RETRANSMITS,
+	SCTP_MIB_FAST_RETRANSMITS,
+	SCTP_MIB_IN_PKT_SOFTIRQ,
+	SCTP_MIB_IN_PKT_BACKLOG,
+	SCTP_MIB_IN_PKT_DISCARDS,
+	SCTP_MIB_IN_DATA_CHUNK_DISCARDS,
+	__SCTP_MIB_MAX
+};
+
+#define SCTP_MIB_MAX    __SCTP_MIB_MAX
+struct sctp_mib {
+        unsigned long   mibs[SCTP_MIB_MAX];
+} __SNMP_MIB_ALIGN__;
+
 
 /* Print debugging messages.  */
 #if SCTP_DEBUG
diff --git a/include/net/snmp.h b/include/net/snmp.h
index a36bed8ea2100..464970e39ec08 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -100,12 +100,6 @@ struct udp_mib {
 	unsigned long	mibs[UDP_MIB_MAX];
 } __SNMP_MIB_ALIGN__;
 
-/* SCTP */
-#define SCTP_MIB_MAX	__SCTP_MIB_MAX
-struct sctp_mib {
-	unsigned long	mibs[SCTP_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
-
 /* Linux */
 #define LINUX_MIB_MAX	__LINUX_MIB_MAX
 struct linux_mib {
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 42b66e74bbb50..8a34d95602cef 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -255,10 +255,13 @@ int sctp_rcv(struct sk_buff *skb)
 	 */
 	sctp_bh_lock_sock(sk);
 
-	if (sock_owned_by_user(sk))
+	if (sock_owned_by_user(sk)) {
+		SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
 		sctp_add_backlog(sk, skb);
-	else
+	} else {
+		SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ);
 		sctp_inq_push(&chunk->rcvr->inqueue, chunk);
+	}
 
 	sctp_bh_unlock_sock(sk);
 
@@ -271,6 +274,7 @@ int sctp_rcv(struct sk_buff *skb)
 	return 0;
 
 discard_it:
+	SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS);
 	kfree_skb(skb);
 	return 0;
 
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index cf0c767d43ae3..cf6deed7e8497 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -87,7 +87,7 @@ void sctp_inq_free(struct sctp_inq *queue)
 /* Put a new packet in an SCTP inqueue.
  * We assume that packet->sctp_hdr is set and in host byte order.
  */
-void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet)
+void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
 {
 	/* Directly call the packet handling routine. */
 
@@ -96,7 +96,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet)
 	 * Eventually, we should clean up inqueue to not rely
 	 * on the BH related data structures.
 	 */
-	list_add_tail(&packet->list, &q->in_chunk_list);
+	list_add_tail(&chunk->list, &q->in_chunk_list);
 	q->immediate.func(q->immediate.data);
 }
 
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 30b710c54e649..37074a39ecbbf 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -467,6 +467,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 
 	switch(reason) {
 	case SCTP_RTXR_T3_RTX:
+		SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS);
 		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX);
 		/* Update the retran path if the T3-rtx timer has expired for
 		 * the current retran path.
@@ -475,12 +476,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 			sctp_assoc_update_retran_path(transport->asoc);
 		break;
 	case SCTP_RTXR_FAST_RTX:
+		SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
 		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
 		fast_retransmit = 1;
 		break;
 	case SCTP_RTXR_PMTUD:
-	default:
+		SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
 		break;
+	default:
+		BUG();
 	}
 
 	sctp_retransmit_mark(q, transport, fast_retransmit);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 5b3b0e0ae7e50..a356d8d310a95 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -57,6 +57,21 @@ static struct snmp_mib sctp_snmp_list[] = {
 	SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS),
 	SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS),
 	SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS),
+	SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS),
+	SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS),
+	SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS),
+	SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS),
+	SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS),
+	SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ),
+	SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG),
+	SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS),
+	SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -328,8 +343,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 			   "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ",
 			   assoc, sk, sctp_sk(sk)->type, sk->sk_state,
 			   assoc->state, hash, assoc->assoc_id,
-			   (sk->sk_rcvbuf - assoc->rwnd),
 			   assoc->sndbuf_used,
+			   (sk->sk_rcvbuf - assoc->rwnd),
 			   sock_i_uid(sk), sock_i_ino(sk),
 			   epb->bind_addr.port,
 			   assoc->peer.port);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 5b5ae79583223..32f57f42af9ee 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2663,9 +2663,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
 		break;
 	case SCTP_IERROR_HIGH_TSN:
 	case SCTP_IERROR_BAD_STREAM:
+		SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
 		goto discard_noforce;
 	case SCTP_IERROR_DUP_TSN:
 	case SCTP_IERROR_IGNORE_TSN:
+		SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
 		goto discard_force;
 	case SCTP_IERROR_NO_DATA:
 		goto consume;
@@ -3652,6 +3654,7 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
+	SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
 
 	return SCTP_DISPOSITION_CONSUME;
@@ -4548,6 +4551,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
 {
 	struct sctp_transport *transport = arg;
 
+	SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS);
+
 	if (asoc->overall_error_count >= asoc->max_retrans) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
 				SCTP_ERROR(ETIMEDOUT));
@@ -4616,6 +4621,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
+	SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
 	return SCTP_DISPOSITION_CONSUME;
 }
@@ -4650,6 +4656,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
 	int attempts = asoc->init_err_counter + 1;
 
 	SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n");
+	SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS);
 
 	if (attempts <= asoc->max_init_attempts) {
 		bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
@@ -4709,6 +4716,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
 	int attempts = asoc->init_err_counter + 1;
 
 	SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n");
+	SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS);
 
 	if (attempts <= asoc->max_init_attempts) {
 		repl = sctp_make_cookie_echo(asoc, NULL);
@@ -4753,6 +4761,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
 	struct sctp_chunk *reply = NULL;
 
 	SCTP_DEBUG_PRINTK("Timer T2 expired.\n");
+	SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
+
 	if (asoc->overall_error_count >= asoc->max_retrans) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
 				SCTP_ERROR(ETIMEDOUT));
@@ -4814,6 +4824,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
 	struct sctp_chunk *chunk = asoc->addip_last_asconf;
 	struct sctp_transport *transport = chunk->transport;
 
+	SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS);
+
 	/* ADDIP 4.1 B1) Increment the error counters and perform path failure
 	 * detection on the appropriate destination address as defined in
 	 * RFC2960 [5] section 8.1 and 8.2.
@@ -4880,6 +4892,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
 	struct sctp_chunk *reply = NULL;
 
 	SCTP_DEBUG_PRINTK("Timer T5 expired.\n");
+	SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
 
 	reply = sctp_make_abort(asoc, NULL, 0);
 	if (!reply)
@@ -4910,6 +4923,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 {
 	int disposition;
 
+	SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS);
+
 	/* From 9.2 Shutdown of an Association
 	 * Upon receipt of the SHUTDOWN primitive from its upper
 	 * layer, the endpoint enters SHUTDOWN-PENDING state and
-- 
GitLab


From df7deeb5402087ea0387173aaf067d37a264a8f0 Mon Sep 17 00:00:00 2001
From: Vladislav Yasevich <vladislav.yasevich@hp.com>
Date: Tue, 22 Aug 2006 00:19:51 -0700
Subject: [PATCH 0499/1063] [SCTP]: Cleanup nomem handling in the state
 functions.

This patch cleans up the "nomem" conditions that may occur during the
processing by the state machine functions. In most cases we delay adding
side-effect commands until all memory allocations are done.

Signed-off-by: Vladislav Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 159 ++++++++++++++++++++++------------------
 1 file changed, 86 insertions(+), 73 deletions(-)

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 32f57f42af9ee..1c42fe983a5bc 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -187,10 +187,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
 	 */
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
 					     0, 0, 0, GFP_ATOMIC);
-	if (!ev)
-		goto nomem;
-
-	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+	if (ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+			        SCTP_ULPEVENT(ev));
 
 	/* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint
 	 * will verify that it is in SHUTDOWN-ACK-SENT state, if it is
@@ -215,9 +214,6 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 
 	return SCTP_DISPOSITION_DELETE_TCB;
-
-nomem:
-	return SCTP_DISPOSITION_NOMEM;
 }
 
 /*
@@ -347,8 +343,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 			       GFP_ATOMIC))
 		goto nomem_init;
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
-
 	/* B) "Z" shall respond immediately with an INIT ACK chunk.  */
 
 	/* If there are errors need to be reported for unknown parameters,
@@ -360,11 +354,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 			sizeof(sctp_chunkhdr_t);
 
 	if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0)
-		goto nomem_ack;
+		goto nomem_init;
 
 	repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
 	if (!repl)
-		goto nomem_ack;
+		goto nomem_init;
 
 	/* If there are errors need to be reported for unknown parameters,
 	 * include them in the outgoing INIT ACK as "Unrecognized parameter"
@@ -388,6 +382,8 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 		sctp_chunk_free(err_chunk);
 	}
 
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 
 	/*
@@ -400,12 +396,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 
 	return SCTP_DISPOSITION_DELETE_TCB;
 
-nomem_ack:
-	if (err_chunk)
-		sctp_chunk_free(err_chunk);
 nomem_init:
 	sctp_association_free(new_asoc);
 nomem:
+	if (err_chunk)
+		sctp_chunk_free(err_chunk);
 	return SCTP_DISPOSITION_NOMEM;
 }
 
@@ -600,7 +595,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	struct sctp_association *new_asoc;
 	sctp_init_chunk_t *peer_init;
 	struct sctp_chunk *repl;
-	struct sctp_ulpevent *ev;
+	struct sctp_ulpevent *ev, *ai_ev = NULL;
 	int error = 0;
 	struct sctp_chunk *err_chk_p;
 
@@ -659,20 +654,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 		};
 	}
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
-	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
-			SCTP_STATE(SCTP_STATE_ESTABLISHED));
-	SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
-	SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
-	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
 
-	if (new_asoc->autoclose)
-		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
-				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
-
-	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
-
-	/* Re-build the bind address for the association is done in
+	/* Delay state machine commands until later.
+	 *
+	 * Re-build the bind address for the association is done in
 	 * the sctp_unpack_cookie() already.
 	 */
 	/* This is a brand-new association, so these are not yet side
@@ -687,9 +672,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 
 	repl = sctp_make_cookie_ack(new_asoc, chunk);
 	if (!repl)
-		goto nomem_repl;
-
-	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+		goto nomem_init;
 
 	/* RFC 2960 5.1 Normal Establishment of an Association
 	 *
@@ -704,28 +687,53 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	if (!ev)
 		goto nomem_ev;
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
-
 	/* Sockets API Draft Section 5.3.1.6 	
 	 * When a peer sends a Adaption Layer Indication parameter , SCTP
 	 * delivers this notification to inform the application that of the
 	 * peers requested adaption layer.
 	 */
 	if (new_asoc->peer.adaption_ind) {
-		ev = sctp_ulpevent_make_adaption_indication(new_asoc,
+		ai_ev = sctp_ulpevent_make_adaption_indication(new_asoc,
 							    GFP_ATOMIC);
-		if (!ev)
-			goto nomem_ev;
+		if (!ai_ev)
+			goto nomem_aiev;
+	}
+
+	/* Add all the state machine commands now since we've created
+	 * everything.  This way we don't introduce memory corruptions
+	 * during side-effect processing and correclty count established
+	 * associations.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_ESTABLISHED));
+	SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+	SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
+	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
+
+	if (new_asoc->autoclose)
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
 
+	/* This will send the COOKIE ACK */
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+
+	/* Queue the ASSOC_CHANGE event */
+	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+
+	/* Send up the Adaptation Layer Indication event */
+	if (ai_ev)
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
-				SCTP_ULPEVENT(ev));
-	}
+				SCTP_ULPEVENT(ai_ev));
 
 	return SCTP_DISPOSITION_CONSUME;
 
+nomem_aiev:
+	sctp_ulpevent_free(ev);
 nomem_ev:
 	sctp_chunk_free(repl);
-nomem_repl:
 nomem_init:
 	sctp_association_free(new_asoc);
 nomem:
@@ -1360,10 +1368,8 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type,
 			       sctp_source(chunk),
 			       (sctp_init_chunk_t *)chunk->chunk_hdr,
-			       GFP_ATOMIC)) {
-		retval = SCTP_DISPOSITION_NOMEM;
-		goto nomem_init;
-	}
+			       GFP_ATOMIC))
+		goto nomem;
 
 	/* Make sure no new addresses are being added during the
 	 * restart.   Do not do this check for COOKIE-WAIT state,
@@ -1374,7 +1380,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 		if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk,
 						 commands)) {
 			retval = SCTP_DISPOSITION_CONSUME;
-			goto cleanup_asoc;
+			goto nomem_retval;
 		}
 	}
 
@@ -1430,17 +1436,17 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 	retval = SCTP_DISPOSITION_CONSUME;
 
+	return retval;
+
+nomem:
+	retval = SCTP_DISPOSITION_NOMEM;
+nomem_retval:
+	if (new_asoc)
+		sctp_association_free(new_asoc);
 cleanup:
 	if (err_chunk)
 		sctp_chunk_free(err_chunk);
 	return retval;
-nomem:
-	retval = SCTP_DISPOSITION_NOMEM;
-	goto cleanup;
-nomem_init:
-cleanup_asoc:
-	sctp_association_free(new_asoc);
-	goto cleanup;
 }
 
 /*
@@ -1611,15 +1617,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
 	 */
 	sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL());
 
-	/* Update the content of current association. */
-	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
-
 	repl = sctp_make_cookie_ack(new_asoc, chunk);
 	if (!repl)
 		goto nomem;
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
-
 	/* Report association restart to upper layer. */
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
 					     new_asoc->c.sinit_num_ostreams,
@@ -1628,6 +1629,9 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
 	if (!ev)
 		goto nomem_ev;
 
+	/* Update the content of current association. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
 	return SCTP_DISPOSITION_CONSUME;
 
@@ -1751,7 +1755,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
 					sctp_cmd_seq_t *commands,
 					struct sctp_association *new_asoc)
 {
-	struct sctp_ulpevent *ev = NULL;
+	struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
 	struct sctp_chunk *repl;
 
 	/* Clarification from Implementor's Guide:
@@ -1778,29 +1782,25 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
 		 * SCTP user upon reception of a valid COOKIE
 		 * ECHO chunk.
 		 */
-		ev = sctp_ulpevent_make_assoc_change(new_asoc, 0,
+		ev = sctp_ulpevent_make_assoc_change(asoc, 0,
 					     SCTP_COMM_UP, 0,
-					     new_asoc->c.sinit_num_ostreams,
-					     new_asoc->c.sinit_max_instreams,
+					     asoc->c.sinit_num_ostreams,
+					     asoc->c.sinit_max_instreams,
                                              GFP_ATOMIC);
 		if (!ev)
 			goto nomem;
-		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
-				SCTP_ULPEVENT(ev));
 
 		/* Sockets API Draft Section 5.3.1.6
 		 * When a peer sends a Adaption Layer Indication parameter,
 		 * SCTP delivers this notification to inform the application
 		 * that of the peers requested adaption layer.
 		 */
-		if (new_asoc->peer.adaption_ind) {
-			ev = sctp_ulpevent_make_adaption_indication(new_asoc,
+		if (asoc->peer.adaption_ind) {
+			ai_ev = sctp_ulpevent_make_adaption_indication(asoc,
 								 GFP_ATOMIC);
-			if (!ev)
+			if (!ai_ev)
 				goto nomem;
 
-			sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
-					SCTP_ULPEVENT(ev));
 		}
 	}
 	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
@@ -1809,12 +1809,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
 	if (!repl)
 		goto nomem;
 
+	if (ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(ev));
+	if (ai_ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+					SCTP_ULPEVENT(ai_ev));
+
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
 
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem:
+	if (ai_ev)
+		sctp_ulpevent_free(ai_ev);
 	if (ev)
 		sctp_ulpevent_free(ev);
 	return SCTP_DISPOSITION_NOMEM;
@@ -3019,7 +3028,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
 		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
 						  commands);
-
 	/* 10.2 H) SHUTDOWN COMPLETE notification
 	 *
 	 * When SCTP completes the shutdown procedures (section 9.2) this
@@ -3030,6 +3038,14 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
 	if (!ev)
 		goto nomem;
 
+	/* ...send a SHUTDOWN COMPLETE chunk to its peer, */
+	reply = sctp_make_shutdown_complete(asoc, chunk);
+	if (!reply)
+		goto nomem_chunk;
+
+	/* Do all the commands now (after allocation), so that we
+	 * have consistent state if memory allocation failes
+	 */
 	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
 
 	/* Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall
@@ -3041,11 +3057,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
 
-	/* ...send a SHUTDOWN COMPLETE chunk to its peer, */
-	reply = sctp_make_shutdown_complete(asoc, chunk);
-	if (!reply)
-		goto nomem;
-
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
 	SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
@@ -3056,6 +3067,8 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 	return SCTP_DISPOSITION_DELETE_TCB;
 
+nomem_chunk:
+	sctp_ulpevent_free(ev);
 nomem:
 	return SCTP_DISPOSITION_NOMEM;
 }
-- 
GitLab


From eb5fa39f5ef490c72901b547ac5e7211efd47d56 Mon Sep 17 00:00:00 2001
From: Vladislav Yasevich <vladislav.yasevich@hp.com>
Date: Tue, 22 Aug 2006 00:23:13 -0700
Subject: [PATCH 0500/1063] [SCTP]: Fix IPv6 address flag setting when doing
 peel-off/accept.

During accept/peeloff we try to copy the list of bound addresses from
the original endpoint to the new one. However, we forgot to set the flag
to say that IPv6 is allowed on the new endpoint.

Signed-off-by: Vladislav Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 85caf79638867..30d2dbeebb433 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5619,6 +5619,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	/* Copy the bind_addr list from the original endpoint to the new
 	 * endpoint so that we can handle restarts properly
 	 */
+	if (PF_INET6 == assoc->base.sk->sk_family)
+		flags = SCTP_ADDR6_ALLOWED;
 	if (assoc->peer.ipv4_address)
 		flags |= SCTP_ADDR4_PEERSUPP;
 	if (assoc->peer.ipv6_address)
-- 
GitLab


From 8abfedd889e46ad4977dfcdab737edf5c5803c62 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Tue, 22 Aug 2006 00:24:09 -0700
Subject: [PATCH 0501/1063] [SCTP]: Use the flags value that is passed as an
 arg to sctp_accept.

No need to do multiple dereferences - sk->sk_socket->file->f_flags

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 30d2dbeebb433..3b6e82cb372f4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2970,7 +2970,7 @@ SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err)
 		goto out;
 	}
 
-	timeo = sock_rcvtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK);
+	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 
 	error = sctp_wait_for_accept(sk, timeo);
 	if (error)
-- 
GitLab


From 131852176c1f5b4350b4af811d1836db387d0c61 Mon Sep 17 00:00:00 2001
From: Henrik Kretzschmar <henne@nachtwindheim.de>
Date: Tue, 22 Aug 2006 00:28:33 -0700
Subject: [PATCH 0502/1063] [TG3]: Convert the pci_device_id table to
 PCI_DEVICE()

Convert the pci_device_ids to PCI_DEVICE() macro.  Saves 1.5k in the
sourcefile.

Signed-off-by: Henrik Kretzschmar <henne@nachtwindheim.de>
Acked-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c | 167 ++++++++++++++++------------------------------
 1 file changed, 56 insertions(+), 111 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 34078a7c1a843..fb70261538618 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -149,117 +149,62 @@ module_param(tg3_debug, int, 0);
 MODULE_PARM_DESC(tg3_debug, "Tigon3 bitmapped debugging message enable value");
 
 static struct pci_device_id tg3_pci_tbl[] = {
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5701,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702FE,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705_2,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M_2,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702X,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703X,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702A3,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703A3,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5782,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5788,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5789,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901_2,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S_2,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5720,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753F,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714S,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715S,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780S,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5781,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1001,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1003,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC9100,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_TIGON3,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ 0, }
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5701)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702FE)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705_2)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M_2)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702X)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703X)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702A3)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703A3)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5782)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5788)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5789)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901_2)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S_2)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5720)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753F)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714S)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715S)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780S)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5781)},
+	{PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)},
+	{PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1001)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1003)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC9100)},
+	{PCI_DEVICE(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_TIGON3)},
+	{}
 };
 
 MODULE_DEVICE_TABLE(pci, tg3_pci_tbl);
-- 
GitLab


From 9ba1627617d396135a4d679542a3623d5819e628 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Tue, 22 Aug 2006 00:29:37 -0700
Subject: [PATCH 0503/1063] [NETFILTER]: x_tables: replace IPv4 dscp match by
 address family independent version

This replaces IPv4 dscp match by address family independent version.
This also
	- utilizes dsfield.h to get the DS field in IPv4/IPv6 header, and
	- checks for the DSCP value from user space.
	- fixes Kconfig help text.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/xt_dscp.h       |  23 +++++
 include/linux/netfilter_ipv4/ipt_dscp.h |  14 ++-
 net/ipv4/netfilter/Kconfig              |  11 ---
 net/ipv4/netfilter/Makefile             |   1 -
 net/ipv4/netfilter/ipt_dscp.c           |  54 -----------
 net/netfilter/Kconfig                   |  11 +++
 net/netfilter/Makefile                  |   1 +
 net/netfilter/xt_dscp.c                 | 113 ++++++++++++++++++++++++
 8 files changed, 154 insertions(+), 74 deletions(-)
 create mode 100644 include/linux/netfilter/xt_dscp.h
 delete mode 100644 net/ipv4/netfilter/ipt_dscp.c
 create mode 100644 net/netfilter/xt_dscp.c

diff --git a/include/linux/netfilter/xt_dscp.h b/include/linux/netfilter/xt_dscp.h
new file mode 100644
index 0000000000000..1da61e6acaf77
--- /dev/null
+++ b/include/linux/netfilter/xt_dscp.h
@@ -0,0 +1,23 @@
+/* x_tables module for matching the IPv4/IPv6 DSCP field
+ *
+ * (C) 2002 Harald Welte <laforge@gnumonks.org>
+ * This software is distributed under GNU GPL v2, 1991
+ *
+ * See RFC2474 for a description of the DSCP field within the IP Header.
+ *
+ * xt_dscp.h,v 1.3 2002/08/05 19:00:21 laforge Exp
+*/
+#ifndef _XT_DSCP_H
+#define _XT_DSCP_H
+
+#define XT_DSCP_MASK	0xfc	/* 11111100 */
+#define XT_DSCP_SHIFT	2
+#define XT_DSCP_MAX	0x3f	/* 00111111 */
+
+/* match info */
+struct xt_dscp_info {
+	u_int8_t dscp;
+	u_int8_t invert;
+};
+
+#endif /* _XT_DSCP_H */
diff --git a/include/linux/netfilter_ipv4/ipt_dscp.h b/include/linux/netfilter_ipv4/ipt_dscp.h
index 2fa6dfe92894d..4b82ca912b0e0 100644
--- a/include/linux/netfilter_ipv4/ipt_dscp.h
+++ b/include/linux/netfilter_ipv4/ipt_dscp.h
@@ -10,14 +10,12 @@
 #ifndef _IPT_DSCP_H
 #define _IPT_DSCP_H
 
-#define IPT_DSCP_MASK	0xfc	/* 11111100 */
-#define IPT_DSCP_SHIFT	2
-#define IPT_DSCP_MAX	0x3f	/* 00111111 */
+#include <linux/netfilter/xt_dscp.h>
 
-/* match info */
-struct ipt_dscp_info {
-	u_int8_t dscp;
-	u_int8_t invert;
-};
+#define IPT_DSCP_MASK	XT_DSCP_MASK
+#define IPT_DSCP_SHIFT	XT_DSCP_SHIFT
+#define IPT_DSCP_MAX	XT_DSCP_MAX
+
+#define ipt_dscp_info	xt_dscp_info
 
 #endif /* _IPT_DSCP_H */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index ef0b5aac58382..d88d71d1ce0de 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -278,17 +278,6 @@ config IP_NF_MATCH_ECN
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_DSCP
-	tristate "DSCP match support"
-	depends on IP_NF_IPTABLES
-	help
-	  This option adds a `DSCP' match, which allows you to match against
-	  the IPv4 header DSCP field (DSCP codepoint).
-
-	  The DSCP codepoint can have any value between 0x0 and 0x4f.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_AH
 	tristate "AH match support"
 	depends on IP_NF_IPTABLES
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3ded4a3af59c3..b946b0f3ea9da 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -59,7 +59,6 @@ obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
 obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
 obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
-obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
 obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
 obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
 obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
deleted file mode 100644
index 47177591aeb65..0000000000000
--- a/net/ipv4/netfilter/ipt_dscp.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/* IP tables module for matching the value of the IPv4 DSCP field
- *
- * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
- *
- * (C) 2002 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_dscp.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables DSCP matching module");
-MODULE_LICENSE("GPL");
-
-static int match(const struct sk_buff *skb,
-		 const struct net_device *in, const struct net_device *out,
-		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, int *hotdrop)
-{
-	const struct ipt_dscp_info *info = matchinfo;
-	const struct iphdr *iph = skb->nh.iph;
-
-	u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
-
-	return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
-}
-
-static struct ipt_match dscp_match = {
-	.name		= "dscp",
-	.match		= match,
-	.matchsize	= sizeof(struct ipt_dscp_info),
-	.me		= THIS_MODULE,
-};
-
-static int __init ipt_dscp_init(void)
-{
-	return ipt_register_match(&dscp_match);
-}
-
-static void __exit ipt_dscp_fini(void)
-{
-	ipt_unregister_match(&dscp_match);
-
-}
-
-module_init(ipt_dscp_init);
-module_exit(ipt_dscp_fini);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a9894ddfd72af..f781405f5d659 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -263,6 +263,17 @@ config NETFILTER_XT_MATCH_DCCP
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_DSCP
+	tristate '"DSCP" match support'
+	depends on NETFILTER_XTABLES
+	help
+	  This option adds a `DSCP' match, which allows you to match against
+	  the IPv4/IPv6 header DSCP field (differentiated services codepoint).
+
+	  The DSCP field can have any value between 0x0 and 0x3f inclusive.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_ESP
 	tristate '"ESP" match support'
 	depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6fa4b75804586..0b8a70c1df46b 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
new file mode 100644
index 0000000000000..82e250d1f007b
--- /dev/null
+++ b/net/netfilter/xt_dscp.c
@@ -0,0 +1,113 @@
+/* IP tables module for matching the value of the IPv4/IPv6 DSCP field
+ *
+ * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+
+#include <linux/netfilter/xt_dscp.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("x_tables DSCP matching module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_dscp");
+MODULE_ALIAS("ip6t_dscp");
+
+static int match(const struct sk_buff *skb,
+		 const struct net_device *in,
+		 const struct net_device *out,
+		 const struct xt_match *match,
+		 const void *matchinfo,
+		 int offset,
+		 unsigned int protoff,
+		 int *hotdrop)
+{
+	const struct xt_dscp_info *info = matchinfo;
+	u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT;
+
+	return (dscp == info->dscp) ^ !!info->invert;
+}
+
+static int match6(const struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  const struct xt_match *match,
+		  const void *matchinfo,
+		  int offset,
+		  unsigned int protoff,
+		  int *hotdrop)
+{
+	const struct xt_dscp_info *info = matchinfo;
+	u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT;
+
+	return (dscp == info->dscp) ^ !!info->invert;
+}
+
+static int checkentry(const char *tablename,
+		      const void *info,
+		      const struct xt_match *match,
+		      void *matchinfo,
+		      unsigned int matchsize,
+		      unsigned int hook_mask)
+{
+	const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp;
+
+	if (dscp > XT_DSCP_MAX) {
+		printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct xt_match dscp_match = {
+	.name		= "dscp",
+	.match		= match,
+	.checkentry	= checkentry,
+	.matchsize	= sizeof(struct xt_dscp_info),
+	.family		= AF_INET,
+	.me		= THIS_MODULE,
+};
+
+static struct xt_match dscp6_match = {
+	.name		= "dscp",
+	.match		= match6,
+	.checkentry	= checkentry,
+	.matchsize	= sizeof(struct xt_dscp_info),
+	.family		= AF_INET6,
+	.me		= THIS_MODULE,
+};
+
+static int __init xt_dscp_match_init(void)
+{
+	int ret;
+	ret = xt_register_match(&dscp_match);
+	if (ret)
+		return ret;
+
+	ret = xt_register_match(&dscp6_match);
+	if (ret)
+		xt_unregister_match(&dscp_match);
+
+	return ret;
+}
+
+static void __exit xt_dscp_match_fini(void)
+{
+	xt_unregister_match(&dscp_match);
+	xt_unregister_match(&dscp6_match);
+}
+
+module_init(xt_dscp_match_init);
+module_exit(xt_dscp_match_fini);
-- 
GitLab


From a468701db58a8b3e08e3f55fa6ac66db42014922 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Tue, 22 Aug 2006 00:30:26 -0700
Subject: [PATCH 0504/1063] [NETFILTER]: x_tables: replace IPv4 DSCP target by
 address family independent version

This replaces IPv4 DSCP target by address family independent version.
This also
	- utilizes dsfield.h to get/mangle DS field in IPv4/IPv6 header
	- fixes Kconfig help text.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/xt_DSCP.h       |  20 ++++
 include/linux/netfilter_ipv4/ipt_DSCP.h |   6 +-
 net/ipv4/netfilter/Kconfig              |  11 --
 net/ipv4/netfilter/Makefile             |   1 -
 net/ipv4/netfilter/ipt_DSCP.c           |  96 -----------------
 net/netfilter/Kconfig                   |  12 +++
 net/netfilter/Makefile                  |   1 +
 net/netfilter/xt_DSCP.c                 | 130 ++++++++++++++++++++++++
 8 files changed, 165 insertions(+), 112 deletions(-)
 create mode 100644 include/linux/netfilter/xt_DSCP.h
 delete mode 100644 net/ipv4/netfilter/ipt_DSCP.c
 create mode 100644 net/netfilter/xt_DSCP.c

diff --git a/include/linux/netfilter/xt_DSCP.h b/include/linux/netfilter/xt_DSCP.h
new file mode 100644
index 0000000000000..3c7c963997bd3
--- /dev/null
+++ b/include/linux/netfilter/xt_DSCP.h
@@ -0,0 +1,20 @@
+/* x_tables module for setting the IPv4/IPv6 DSCP field
+ *
+ * (C) 2002 Harald Welte <laforge@gnumonks.org>
+ * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
+ * This software is distributed under GNU GPL v2, 1991
+ *
+ * See RFC2474 for a description of the DSCP field within the IP Header.
+ *
+ * xt_DSCP.h,v 1.7 2002/03/14 12:03:13 laforge Exp
+*/
+#ifndef _XT_DSCP_TARGET_H
+#define _XT_DSCP_TARGET_H
+#include <linux/netfilter/xt_dscp.h>
+
+/* target info */
+struct xt_DSCP_info {
+	u_int8_t dscp;
+};
+
+#endif /* _XT_DSCP_TARGET_H */
diff --git a/include/linux/netfilter_ipv4/ipt_DSCP.h b/include/linux/netfilter_ipv4/ipt_DSCP.h
index b30f510b5befd..3491e524d5ea6 100644
--- a/include/linux/netfilter_ipv4/ipt_DSCP.h
+++ b/include/linux/netfilter_ipv4/ipt_DSCP.h
@@ -11,10 +11,8 @@
 #ifndef _IPT_DSCP_TARGET_H
 #define _IPT_DSCP_TARGET_H
 #include <linux/netfilter_ipv4/ipt_dscp.h>
+#include <linux/netfilter/xt_DSCP.h>
 
-/* target info */
-struct ipt_DSCP_info {
-	u_int8_t dscp;
-};
+#define ipt_DSCP_info xt_DSCP_info
 
 #endif /* _IPT_DSCP_TARGET_H */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index d88d71d1ce0de..a55b8ff70ded9 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -557,17 +557,6 @@ config IP_NF_TARGET_ECN
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_DSCP
-	tristate "DSCP target support"
-	depends on IP_NF_MANGLE
-	help
-	  This option adds a `DSCP' match, which allows you to match against
-	  the IPv4 header DSCP field (DSCP codepoint).
-
-	  The DSCP codepoint can have any value between 0x0 and 0x4f.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_TARGET_TTL
 	tristate  'TTL target support'
 	depends on IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index b946b0f3ea9da..09aaed1a80639 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -67,7 +67,6 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
 obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
 obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
 obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
-obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
 obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
 obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
 obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
deleted file mode 100644
index c8e971288dfe6..0000000000000
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/* iptables module for setting the IPv4 DSCP field, Version 1.8
- *
- * (C) 2002 by Harald Welte <laforge@netfilter.org>
- * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as 
- * published by the Free Software Foundation.
- * 
- * See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
-*/
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_DSCP.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables DSCP modification module");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-target(struct sk_buff **pskb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
-{
-	const struct ipt_DSCP_info *dinfo = targinfo;
-	u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
-
-
-	if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) {
-		u_int16_t diffs[2];
-
-		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
-			return NF_DROP;
-
-		diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
-		(*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK)
-			| sh_dscp;
-		diffs[1] = htons((*pskb)->nh.iph->tos);
-		(*pskb)->nh.iph->check
-			= csum_fold(csum_partial((char *)diffs,
-						 sizeof(diffs),
-						 (*pskb)->nh.iph->check
-						 ^ 0xFFFF));
-	}
-	return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
-	   const void *e_void,
-	   const struct xt_target *target,
-           void *targinfo,
-           unsigned int targinfosize,
-           unsigned int hook_mask)
-{
-	const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp;
-
-	if ((dscp > IPT_DSCP_MAX)) {
-		printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
-		return 0;
-	}
-	return 1;
-}
-
-static struct ipt_target ipt_dscp_reg = {
-	.name		= "DSCP",
-	.target		= target,
-	.targetsize	= sizeof(struct ipt_DSCP_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init ipt_dscp_init(void)
-{
-	return ipt_register_target(&ipt_dscp_reg);
-}
-
-static void __exit ipt_dscp_fini(void)
-{
-	ipt_unregister_target(&ipt_dscp_reg);
-}
-
-module_init(ipt_dscp_init);
-module_exit(ipt_dscp_fini);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f781405f5d659..0a28d2c5c44fc 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -148,6 +148,18 @@ config NETFILTER_XT_TARGET_CONNMARK
 	  <file:Documentation/modules.txt>.  The module will be called
 	  ipt_CONNMARK.o.  If unsure, say `N'.
 
+config NETFILTER_XT_TARGET_DSCP
+	tristate '"DSCP" target support'
+	depends on NETFILTER_XTABLES
+	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	help
+	  This option adds a `DSCP' target, which allows you to manipulate
+	  the IPv4/IPv6 header DSCP field (differentiated services codepoint).
+
+	  The DSCP field can have any value between 0x0 and 0x3f inclusive.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_MARK
 	tristate '"MARK" target support'
 	depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 0b8a70c1df46b..a74be492fd0a4 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 # targets
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
new file mode 100644
index 0000000000000..79df8165cd791
--- /dev/null
+++ b/net/netfilter/xt_DSCP.c
@@ -0,0 +1,130 @@
+/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * See RFC2474 for a description of the DSCP field within the IP Header.
+ *
+ * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
+*/
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_DSCP.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("x_tables DSCP modification module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_DSCP");
+MODULE_ALIAS("ip6t_DSCP");
+
+static unsigned int target(struct sk_buff **pskb,
+			   const struct net_device *in,
+			   const struct net_device *out,
+			   unsigned int hooknum,
+			   const struct xt_target *target,
+			   const void *targinfo,
+			   void *userinfo)
+{
+	const struct xt_DSCP_info *dinfo = targinfo;
+	u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT;
+
+	if (dscp != dinfo->dscp) {
+		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+			return NF_DROP;
+
+		ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK),
+				    dinfo->dscp << XT_DSCP_SHIFT);
+
+	}
+	return XT_CONTINUE;
+}
+
+static unsigned int target6(struct sk_buff **pskb,
+			    const struct net_device *in,
+			    const struct net_device *out,
+			    unsigned int hooknum,
+			    const struct xt_target *target,
+			    const void *targinfo,
+			    void *userinfo)
+{
+	const struct xt_DSCP_info *dinfo = targinfo;
+	u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT;
+
+	if (dscp != dinfo->dscp) {
+		if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
+			return NF_DROP;
+
+		ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK),
+				    dinfo->dscp << XT_DSCP_SHIFT);
+	}
+	return XT_CONTINUE;
+}
+
+static int checkentry(const char *tablename,
+		      const void *e_void,
+		      const struct xt_target *target,
+		      void *targinfo,
+		      unsigned int targinfosize,
+		      unsigned int hook_mask)
+{
+	const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
+
+	if ((dscp > XT_DSCP_MAX)) {
+		printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
+		return 0;
+	}
+	return 1;
+}
+
+static struct xt_target xt_dscp_reg = {
+	.name		= "DSCP",
+	.target		= target,
+	.targetsize	= sizeof(struct xt_DSCP_info),
+	.table		= "mangle",
+	.checkentry	= checkentry,
+	.family		= AF_INET,
+	.me		= THIS_MODULE,
+};
+
+static struct xt_target xt_dscp6_reg = {
+	.name		= "DSCP",
+	.target		= target6,
+	.targetsize	= sizeof(struct xt_DSCP_info),
+	.table		= "mangle",
+	.checkentry	= checkentry,
+	.family		= AF_INET6,
+	.me		= THIS_MODULE,
+};
+
+static int __init xt_dscp_target_init(void)
+{
+	int ret;
+	ret = xt_register_target(&xt_dscp_reg);
+	if (ret)
+		return ret;
+
+	ret = xt_register_target(&xt_dscp6_reg);
+	if (ret)
+		xt_unregister_target(&xt_dscp_reg);
+
+	return ret;
+}
+
+static void __exit xt_dscp_target_fini(void)
+{
+	xt_unregister_target(&xt_dscp_reg);
+	xt_unregister_target(&xt_dscp6_reg);
+}
+
+module_init(xt_dscp_target_init);
+module_exit(xt_dscp_target_fini);
-- 
GitLab


From b93ff78317c0b8f42830e2bb13dd8df596232528 Mon Sep 17 00:00:00 2001
From: Daniel De Graaf <danield@iastate.edu>
Date: Tue, 22 Aug 2006 00:30:55 -0700
Subject: [PATCH 0505/1063] [NETFILTER]: ipt_recent: add module parameter for
 changing ownership of /proc/net/ipt_recent/*

Signed-off-by: Daniel De Graaf <danield@iastate.edu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_recent.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 61a2139f9cfd2..682c0946201eb 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -35,14 +35,20 @@ static unsigned int ip_list_tot = 100;
 static unsigned int ip_pkt_list_tot = 20;
 static unsigned int ip_list_hash_size = 0;
 static unsigned int ip_list_perms = 0644;
+static unsigned int ip_list_uid = 0;
+static unsigned int ip_list_gid = 0;
 module_param(ip_list_tot, uint, 0400);
 module_param(ip_pkt_list_tot, uint, 0400);
 module_param(ip_list_hash_size, uint, 0400);
 module_param(ip_list_perms, uint, 0400);
+module_param(ip_list_uid, uint, 0400);
+module_param(ip_list_gid, uint, 0400);
 MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
 MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
 MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
 MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
+MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files");
+MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files");
 
 
 struct recent_entry {
@@ -274,6 +280,8 @@ ipt_recent_checkentry(const char *tablename, const void *ip,
 		goto out;
 	}
 	t->proc->proc_fops = &recent_fops;
+	t->proc->uid       = ip_list_uid;
+	t->proc->gid       = ip_list_gid;
 	t->proc->data      = t;
 #endif
 	spin_lock_bh(&recent_lock);
-- 
GitLab


From 2521c12cf1a29f6c380b13ca32a38175f6beed08 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 22 Aug 2006 00:31:24 -0700
Subject: [PATCH 0506/1063] [NETFILTER]: conntrack: introduce connection mark
 event

This patch introduces the mark event. ctnetlink can use this to know if
the mark needs to be dumped.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nf_conntrack_common.h |  4 ++++
 net/netfilter/xt_CONNMARK.c                   | 16 ++++++++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index d2e4bd7a7a142..9e0dae07861ef 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -125,6 +125,10 @@ enum ip_conntrack_events
 	/* Counter highest bit has been set */
 	IPCT_COUNTER_FILLING_BIT = 11,
 	IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
+
+	/* Mark is set */
+	IPCT_MARK_BIT = 12,
+	IPCT_MARK = (1 << IPCT_MARK_BIT),
 };
 
 enum ip_conntrack_expect_events {
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 60c375d36f017..784482b74e58a 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -52,13 +52,25 @@ target(struct sk_buff **pskb,
 	    switch(markinfo->mode) {
 	    case XT_CONNMARK_SET:
 		newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
-		if (newmark != *ctmark)
+		if (newmark != *ctmark) {
 		    *ctmark = newmark;
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+		    ip_conntrack_event_cache(IPCT_MARK, *pskb);
+#else
+		    nf_conntrack_event_cache(IPCT_MARK, *pskb);
+#endif
+		}
 		break;
 	    case XT_CONNMARK_SAVE:
 		newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
-		if (*ctmark != newmark)
+		if (*ctmark != newmark) {
 		    *ctmark = newmark;
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+		    ip_conntrack_event_cache(IPCT_MARK, *pskb);
+#else
+		    nf_conntrack_event_cache(IPCT_MARK, *pskb);
+#endif
+		}
 		break;
 	    case XT_CONNMARK_RESTORE:
 		nfmark = (*pskb)->nfmark;
-- 
GitLab


From b9a37e0c81c498be2db9f52063c53e55d76c815e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 22 Aug 2006 00:31:49 -0700
Subject: [PATCH 0507/1063] [NETFILTER]: ctnetlink: dump connection mark

ctnetlink dumps the mark iif the event mark happened

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_netlink.c | 4 ++++
 net/netfilter/nf_conntrack_netlink.c      | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 0d4cc92391fa5..38708e6cfae7c 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -385,6 +385,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
 		goto nfattr_failure;
 
+	if (events & IPCT_MARK
+	    && ctnetlink_dump_mark(skb, ct) < 0)
+		goto nfattr_failure;
+
 	nlh->nlmsg_len = skb->tail - b;
 	nfnetlink_send(skb, 0, group, 0);
 	return NOTIFY_DONE;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6527d4e048d81..aa0148f418a90 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -395,6 +395,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
 		goto nfattr_failure;
 
+	if (events & IPCT_MARK
+	    && ctnetlink_dump_mark(skb, ct) < 0)
+		goto nfattr_failure;
+
 	nlh->nlmsg_len = skb->tail - b;
 	nfnetlink_send(skb, 0, group, 0);
 	return NOTIFY_DONE;
-- 
GitLab


From b3a27bfba51d445784eb0cd6451b73a73fb69cf9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 22 Aug 2006 00:32:05 -0700
Subject: [PATCH 0508/1063] [NETFILTER]: ctnetlink: check for listeners before
 sending expectation events

This patch uses nfnetlink_has_listeners to check for listeners in
userspace.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_netlink.c | 3 +++
 net/netfilter/nf_conntrack_netlink.c      | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 38708e6cfae7c..ef84f43f07347 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -1257,6 +1257,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 	} else
 		return NOTIFY_DONE;
 
+	if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+		return NOTIFY_DONE;
+
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
 	if (!skb)
 		return NOTIFY_DONE;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index aa0148f418a90..dc4f081dca914 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1278,6 +1278,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 	} else
 		return NOTIFY_DONE;
 
+	if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+		return NOTIFY_DONE;
+
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
 	if (!skb)
 		return NOTIFY_DONE;
-- 
GitLab


From 1a31526baeed30aaa70503cee0ab281f78cae0d6 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 22 Aug 2006 00:32:23 -0700
Subject: [PATCH 0509/1063] [NETFILTER]: ctnetlink: remove impossible events
 tests for updates

IPCT_HELPER and IPCT_NATINFO bits are never set on updates.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_netlink.c | 6 +-----
 net/netfilter/nf_conntrack_netlink.c      | 6 +-----
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index ef84f43f07347..a20b0e385f1b9 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -329,11 +329,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 		/* dump everything */
 		events = ~0UL;
 		group = NFNLGRP_CONNTRACK_NEW;
-	} else if (events & (IPCT_STATUS |
-		      IPCT_PROTOINFO |
-		      IPCT_HELPER |
-		      IPCT_HELPINFO |
-		      IPCT_NATINFO)) {
+	} else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
 		type = IPCTNL_MSG_CT_NEW;
 		group = NFNLGRP_CONNTRACK_UPDATE;
 	} else 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dc4f081dca914..8cd85cfd9a02c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -339,11 +339,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 		/* dump everything */
 		events = ~0UL;
 		group = NFNLGRP_CONNTRACK_NEW;
-	} else  if (events & (IPCT_STATUS |
-		      IPCT_PROTOINFO |
-		      IPCT_HELPER |
-		      IPCT_HELPINFO |
-		      IPCT_NATINFO)) {
+	} else  if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
 		type = IPCTNL_MSG_CT_NEW;
 		group = NFNLGRP_CONNTRACK_UPDATE;
 	} else
-- 
GitLab


From 1158ba27bec6d1a20999099a938908cf85f47640 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Aug 2006 00:32:47 -0700
Subject: [PATCH 0510/1063] [NETFILTER]: nfnetlink_queue: fix typo in error
 message

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index eddfbe4441a2a..8eb2473d83e1d 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -584,7 +584,7 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
                 queue->queue_dropped++;
 		status = -ENOSPC;
 		if (net_ratelimit())
-		          printk(KERN_WARNING "ip_queue: full at %d entries, "
+		          printk(KERN_WARNING "nf_queue: full at %d entries, "
 				 "dropping packets(s). Dropped: %d\n", 
 				 queue->queue_total, queue->queue_dropped);
 		goto err_out_free_nskb;
@@ -635,7 +635,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
 			                         diff,
 			                         GFP_ATOMIC);
 			if (newskb == NULL) {
-				printk(KERN_WARNING "ip_queue: OOM "
+				printk(KERN_WARNING "nf_queue: OOM "
 				      "in mangle, dropping packet\n");
 				return -ENOMEM;
 			}
-- 
GitLab


From da878c8e5aae3eeceeee7af8d52633d7bc125edf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Aug 2006 00:33:09 -0700
Subject: [PATCH 0511/1063] [NETFILTER]: replace open coded checksum updates

Replace open coded checksum update by nf_csum_update calls and clean up
the surrounding code a bit.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_ECN.c | 22 +++++++++-------------
 net/ipv4/netfilter/ipt_TOS.c | 22 ++++++++--------------
 net/ipv4/netfilter/ipt_TTL.c |  9 +++------
 3 files changed, 20 insertions(+), 33 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 35916c74fe4eb..7e30e6d2b5da3 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -27,22 +27,18 @@ MODULE_DESCRIPTION("iptables ECN modification module");
 static inline int
 set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
-	if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK)
-	    != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
-		u_int16_t diffs[2];
+	struct iphdr *iph = (*pskb)->nh.iph;
+	u_int16_t oldtos;
 
+	if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
 			return 0;
-
-		diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
-		(*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK;
-		(*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
-		diffs[1] = htons((*pskb)->nh.iph->tos);
-		(*pskb)->nh.iph->check
-			= csum_fold(csum_partial((char *)diffs,
-						 sizeof(diffs),
-						 (*pskb)->nh.iph->check
-						 ^0xFFFF));
+		iph = (*pskb)->nh.iph;
+		oldtos = iph->tos;
+		iph->tos &= ~IPT_ECN_IP_MASK;
+		iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
+		iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
+					    iph->check);
 	} 
 	return 1;
 }
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 1c7a5ca399b32..52e9d705d48ec 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -30,23 +30,17 @@ target(struct sk_buff **pskb,
        void *userinfo)
 {
 	const struct ipt_tos_target_info *tosinfo = targinfo;
+	struct iphdr *iph = (*pskb)->nh.iph;
+	u_int16_t oldtos;
 
-	if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
-		u_int16_t diffs[2];
-
+	if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
 			return NF_DROP;
-
-		diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
-		(*pskb)->nh.iph->tos
-			= ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK)
-			| tosinfo->tos;
-		diffs[1] = htons((*pskb)->nh.iph->tos);
-		(*pskb)->nh.iph->check
-			= csum_fold(csum_partial((char *)diffs,
-						 sizeof(diffs),
-						 (*pskb)->nh.iph->check
-						 ^0xFFFF));
+		iph = (*pskb)->nh.iph;
+		oldtos = iph->tos;
+		iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
+		iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
+					    iph->check);
 	}
 	return IPT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index f48892ae0be5c..2afb2a8aa8c5c 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -27,7 +27,6 @@ ipt_ttl_target(struct sk_buff **pskb,
 {
 	struct iphdr *iph;
 	const struct ipt_TTL_info *info = targinfo;
-	u_int16_t diffs[2];
 	int new_ttl;
 
 	if (!skb_make_writable(pskb, (*pskb)->len))
@@ -55,12 +54,10 @@ ipt_ttl_target(struct sk_buff **pskb,
 	}
 
 	if (new_ttl != iph->ttl) {
-		diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF;
+		iph->check = nf_csum_update((iph->ttl << 8) ^ 0xFFFF,
+					    new_ttl << 8,
+					    iph->check);
 		iph->ttl = new_ttl;
-		diffs[1] = htons(((unsigned)iph->ttl) << 8);
-		iph->check = csum_fold(csum_partial((char *)diffs,
-						    sizeof(diffs),
-						    iph->check^0xFFFF));
 	}
 
 	return IPT_CONTINUE;
-- 
GitLab


From 90528e6fe92ee1a353d6a639930e7d70d85b5c85 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Aug 2006 00:33:26 -0700
Subject: [PATCH 0512/1063] [NETFILTER]: xt_CONNMARK: use tabs for indentation

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_CONNMARK.c | 57 +++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 784482b74e58a..19989a9154333 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -49,36 +49,37 @@ target(struct sk_buff **pskb,
 	u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
 
 	if (ctmark) {
-	    switch(markinfo->mode) {
-	    case XT_CONNMARK_SET:
-		newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
-		if (newmark != *ctmark) {
-		    *ctmark = newmark;
+		switch(markinfo->mode) {
+		case XT_CONNMARK_SET:
+			newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
+			if (newmark != *ctmark) {
+				*ctmark = newmark;
 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-		    ip_conntrack_event_cache(IPCT_MARK, *pskb);
+				ip_conntrack_event_cache(IPCT_MARK, *pskb);
 #else
-		    nf_conntrack_event_cache(IPCT_MARK, *pskb);
+				nf_conntrack_event_cache(IPCT_MARK, *pskb);
 #endif
 		}
-		break;
-	    case XT_CONNMARK_SAVE:
-		newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
-		if (*ctmark != newmark) {
-		    *ctmark = newmark;
+			break;
+		case XT_CONNMARK_SAVE:
+			newmark = (*ctmark & ~markinfo->mask) |
+				  ((*pskb)->nfmark & markinfo->mask);
+			if (*ctmark != newmark) {
+				*ctmark = newmark;
 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-		    ip_conntrack_event_cache(IPCT_MARK, *pskb);
+				ip_conntrack_event_cache(IPCT_MARK, *pskb);
 #else
-		    nf_conntrack_event_cache(IPCT_MARK, *pskb);
+				nf_conntrack_event_cache(IPCT_MARK, *pskb);
 #endif
+			}
+			break;
+		case XT_CONNMARK_RESTORE:
+			nfmark = (*pskb)->nfmark;
+			diff = (*ctmark ^ nfmark) & markinfo->mask;
+			if (diff != 0)
+				(*pskb)->nfmark = nfmark ^ diff;
+			break;
 		}
-		break;
-	    case XT_CONNMARK_RESTORE:
-		nfmark = (*pskb)->nfmark;
-		diff = (*ctmark ^ nfmark) & markinfo->mask;
-		if (diff != 0)
-		    (*pskb)->nfmark = nfmark ^ diff;
-		break;
-	    }
 	}
 
 	return XT_CONTINUE;
@@ -95,17 +96,17 @@ checkentry(const char *tablename,
 	struct xt_connmark_target_info *matchinfo = targinfo;
 
 	if (matchinfo->mode == XT_CONNMARK_RESTORE) {
-	    if (strcmp(tablename, "mangle") != 0) {
-		    printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename);
-		    return 0;
-	    }
+		if (strcmp(tablename, "mangle") != 0) {
+			printk(KERN_WARNING "CONNMARK: restore can only be "
+			       "called from \"mangle\" table, not \"%s\"\n",
+			       tablename);
+			return 0;
+		}
 	}
-
 	if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
 		printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
 		return 0;
 	}
-
 	return 1;
 }
 
-- 
GitLab


From 52d9c42ef2563d2c420eb23b96bf5a4cae9e167b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Aug 2006 00:33:45 -0700
Subject: [PATCH 0513/1063] [NETFILTER]: x_tables: add helpers for mass
 match/target registration

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h |  5 +++
 net/netfilter/x_tables.c           | 60 ++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 48cc32d83f77a..9a9912430e3ae 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -290,8 +290,13 @@ struct xt_table_info
 
 extern int xt_register_target(struct xt_target *target);
 extern void xt_unregister_target(struct xt_target *target);
+extern int xt_register_targets(struct xt_target *target, unsigned int n);
+extern void xt_unregister_targets(struct xt_target *target, unsigned int n);
+
 extern int xt_register_match(struct xt_match *target);
 extern void xt_unregister_match(struct xt_match *target);
+extern int xt_register_matches(struct xt_match *match, unsigned int n);
+extern void xt_unregister_matches(struct xt_match *match, unsigned int n);
 
 extern int xt_check_match(const struct xt_match *match, unsigned short family,
 			  unsigned int size, const char *table, unsigned int hook,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 174e8f9700951..8037ba63d5871 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -86,6 +86,36 @@ xt_unregister_target(struct xt_target *target)
 }
 EXPORT_SYMBOL(xt_unregister_target);
 
+int
+xt_register_targets(struct xt_target *target, unsigned int n)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < n; i++) {
+		err = xt_register_target(&target[i]);
+		if (err)
+			goto err;
+	}
+	return err;
+
+err:
+	if (i > 0)
+		xt_unregister_targets(target, i);
+	return err;
+}
+EXPORT_SYMBOL(xt_register_targets);
+
+void
+xt_unregister_targets(struct xt_target *target, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; i++)
+		xt_unregister_target(&target[i]);
+}
+EXPORT_SYMBOL(xt_unregister_targets);
+
 int
 xt_register_match(struct xt_match *match)
 {
@@ -113,6 +143,36 @@ xt_unregister_match(struct xt_match *match)
 }
 EXPORT_SYMBOL(xt_unregister_match);
 
+int
+xt_register_matches(struct xt_match *match, unsigned int n)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < n; i++) {
+		err = xt_register_match(&match[i]);
+		if (err)
+			goto err;
+	}
+	return err;
+
+err:
+	if (i > 0)
+		xt_unregister_matches(match, i);
+	return err;
+}
+EXPORT_SYMBOL(xt_register_matches);
+
+void
+xt_unregister_matches(struct xt_match *match, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; i++)
+		xt_unregister_match(&match[i]);
+}
+EXPORT_SYMBOL(xt_unregister_matches);
+
 
 /*
  * These are weird, but module loading must not be done with mutex
-- 
GitLab


From 4470bbc749e5551cce914529309456f631e25120 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Aug 2006 00:34:04 -0700
Subject: [PATCH 0514/1063] [NETFILTER]: x_tables: make use of mass registation
 helpers

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6t_REJECT.c |   4 +-
 net/netfilter/xt_CLASSIFY.c      |  60 ++++++++---------
 net/netfilter/xt_CONNMARK.c      |  51 ++++++--------
 net/netfilter/xt_CONNSECMARK.c   |  57 +++++++---------
 net/netfilter/xt_DSCP.c          |  51 ++++++--------
 net/netfilter/xt_MARK.c          |  84 +++++++++--------------
 net/netfilter/xt_NFQUEUE.c       |  68 +++++++------------
 net/netfilter/xt_NOTRACK.c       |  47 +++++--------
 net/netfilter/xt_SECMARK.c       |  55 ++++++---------
 net/netfilter/xt_comment.c       |  45 +++++--------
 net/netfilter/xt_connbytes.c     |  47 ++++++-------
 net/netfilter/xt_connmark.c      |  53 ++++++---------
 net/netfilter/xt_conntrack.c     |   5 +-
 net/netfilter/xt_dccp.c          |  51 ++++++--------
 net/netfilter/xt_dscp.c          |  47 ++++++-------
 net/netfilter/xt_esp.c           |  51 ++++++--------
 net/netfilter/xt_helper.c        |  52 ++++++---------
 net/netfilter/xt_length.c        |  43 +++++-------
 net/netfilter/xt_limit.c         |  47 ++++++-------
 net/netfilter/xt_mac.c           |  52 +++++++--------
 net/netfilter/xt_mark.c          |  47 ++++++-------
 net/netfilter/xt_multiport.c     | 111 ++++++++++++-------------------
 net/netfilter/xt_physdev.c       |  49 ++++++--------
 net/netfilter/xt_pkttype.c       |  44 +++++-------
 net/netfilter/xt_policy.c        |  51 ++++++--------
 net/netfilter/xt_quota.c         |  51 ++++++--------
 net/netfilter/xt_sctp.c          |  51 ++++++--------
 net/netfilter/xt_state.c         |  53 ++++++---------
 net/netfilter/xt_statistic.c     |  53 ++++++---------
 net/netfilter/xt_string.c        |  50 ++++++--------
 net/netfilter/xt_tcpmss.c        |  49 ++++++--------
 net/netfilter/xt_tcpudp.c        | 107 +++++++++++------------------
 32 files changed, 679 insertions(+), 1007 deletions(-)

diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index c4eba1aeb3233..7929ff4021660 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -257,9 +257,7 @@ static struct ip6t_target ip6t_reject_reg = {
 
 static int __init ip6t_reject_init(void)
 {
-	if (ip6t_register_target(&ip6t_reject_reg))
-		return -EINVAL;
-	return 0;
+	return ip6t_register_target(&ip6t_reject_reg);
 }
 
 static void __exit ip6t_reject_fini(void)
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index e54e57730012d..1f92edd059336 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -40,47 +40,41 @@ target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static struct xt_target classify_reg = { 
-	.name 		= "CLASSIFY", 
-	.target 	= target,
-	.targetsize	= sizeof(struct xt_classify_target_info),
-	.table		= "mangle",
-	.hooks		= (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
-		          (1 << NF_IP_POST_ROUTING),
-	.family		= AF_INET,
-	.me 		= THIS_MODULE,
+static struct xt_target xt_classify_target[] = {
+	{
+		.family		= AF_INET,
+		.name 		= "CLASSIFY",
+		.target 	= target,
+		.targetsize	= sizeof(struct xt_classify_target_info),
+		.table		= "mangle",
+		.hooks		= (1 << NF_IP_LOCAL_OUT) |
+				  (1 << NF_IP_FORWARD) |
+			          (1 << NF_IP_POST_ROUTING),
+		.me 		= THIS_MODULE,
+	},
+	{
+		.name 		= "CLASSIFY",
+		.family		= AF_INET6,
+		.target 	= target,
+		.targetsize	= sizeof(struct xt_classify_target_info),
+		.table		= "mangle",
+		.hooks		= (1 << NF_IP_LOCAL_OUT) |
+				  (1 << NF_IP_FORWARD) |
+			          (1 << NF_IP_POST_ROUTING),
+		.me 		= THIS_MODULE,
+	},
 };
-static struct xt_target classify6_reg = { 
-	.name 		= "CLASSIFY", 
-	.target 	= target,
-	.targetsize	= sizeof(struct xt_classify_target_info),
-	.table		= "mangle",
-	.hooks		= (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
-		          (1 << NF_IP_POST_ROUTING),
-	.family		= AF_INET6,
-	.me 		= THIS_MODULE,
-};
-
 
 static int __init xt_classify_init(void)
 {
-	int ret;
-
-	ret = xt_register_target(&classify_reg);
-	if (ret)
-		return ret;
-
-	ret = xt_register_target(&classify6_reg);
-	if (ret)
-		xt_unregister_target(&classify_reg);
-
-	return ret;
+	return xt_register_targets(xt_classify_target,
+				   ARRAY_SIZE(xt_classify_target));
 }
 
 static void __exit xt_classify_fini(void)
 {
-	xt_unregister_target(&classify_reg);
-	xt_unregister_target(&classify6_reg);
+	xt_unregister_targets(xt_classify_target,
+			      ARRAY_SIZE(xt_classify_target));
 }
 
 module_init(xt_classify_init);
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 19989a9154333..e577356b5c711 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -110,45 +110,36 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct xt_target connmark_reg = {
-	.name		= "CONNMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_connmark_target_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE
-};
-
-static struct xt_target connmark6_reg = {
-	.name		= "CONNMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_connmark_target_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE
+static struct xt_target xt_connmark_target[] = {
+	{
+		.name		= "CONNMARK",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_connmark_target_info),
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "CONNMARK",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_connmark_target_info),
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_connmark_init(void)
 {
-	int ret;
-
 	need_conntrack();
-
-	ret = xt_register_target(&connmark_reg);
-	if (ret)
-		return ret;
-
-	ret = xt_register_target(&connmark6_reg);
-	if (ret)
-		xt_unregister_target(&connmark_reg);
-
-	return ret;
+	return xt_register_targets(xt_connmark_target,
+				   ARRAY_SIZE(xt_connmark_target));
 }
 
 static void __exit xt_connmark_fini(void)
 {
-	xt_unregister_target(&connmark_reg);
-	xt_unregister_target(&connmark6_reg);
+	xt_unregister_targets(xt_connmark_target,
+			      ARRAY_SIZE(xt_connmark_target));
 }
 
 module_init(xt_connmark_init);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 8c011e0207695..48f7fc3c85cdd 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -106,49 +106,38 @@ static int checkentry(const char *tablename, const void *entry,
 	return 1;
 }
 
-static struct xt_target ipt_connsecmark_reg = {
-	.name		= "CONNSECMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_connsecmark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-	.family		= AF_INET,
-	.revision	= 0,
-};
-
-static struct xt_target ip6t_connsecmark_reg = {
-	.name		= "CONNSECMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_connsecmark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-	.family		= AF_INET6,
-	.revision	= 0,
+static struct xt_target xt_connsecmark_target[] = {
+	{
+		.name		= "CONNSECMARK",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_connsecmark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "CONNSECMARK",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_connsecmark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_connsecmark_init(void)
 {
-	int err;
-
 	need_conntrack();
-
-	err = xt_register_target(&ipt_connsecmark_reg);
-	if (err)
-		return err;
-
-	err = xt_register_target(&ip6t_connsecmark_reg);
-	if (err)
-		xt_unregister_target(&ipt_connsecmark_reg);
-
-	return err;
+	return xt_register_targets(xt_connsecmark_targets,
+				   ARRAY_SIZE(xt_connsecmark_targets));
 }
 
 static void __exit xt_connsecmark_fini(void)
 {
-	xt_unregister_target(&ip6t_connsecmark_reg);
-	xt_unregister_target(&ipt_connsecmark_reg);
+	xt_unregister_targets(xt_connsecmark_targets,
+			      ARRAY_SIZE(xt_connsecmark_targets));
 }
 
 module_init(xt_connsecmark_init);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 79df8165cd791..a1cd9723644fa 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -86,44 +86,35 @@ static int checkentry(const char *tablename,
 	return 1;
 }
 
-static struct xt_target xt_dscp_reg = {
-	.name		= "DSCP",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_DSCP_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_target xt_dscp6_reg = {
-	.name		= "DSCP",
-	.target		= target6,
-	.targetsize	= sizeof(struct xt_DSCP_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_target xt_dscp_target[] = {
+	{
+		.name		= "DSCP",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_DSCP_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "DSCP",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.target		= target6,
+		.targetsize	= sizeof(struct xt_DSCP_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_dscp_target_init(void)
 {
-	int ret;
-	ret = xt_register_target(&xt_dscp_reg);
-	if (ret)
-		return ret;
-
-	ret = xt_register_target(&xt_dscp6_reg);
-	if (ret)
-		xt_unregister_target(&xt_dscp_reg);
-
-	return ret;
+	return xt_register_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target));
 }
 
 static void __exit xt_dscp_target_fini(void)
 {
-	xt_unregister_target(&xt_dscp_reg);
-	xt_unregister_target(&xt_dscp6_reg);
+	xt_unregister_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target));
 }
 
 module_init(xt_dscp_target_init);
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index ee9c34edc76c4..0a61272194677 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -112,65 +112,47 @@ checkentry_v1(const char *tablename,
 	return 1;
 }
 
-static struct xt_target ipt_mark_reg_v0 = {
-	.name		= "MARK",
-	.target		= target_v0,
-	.targetsize	= sizeof(struct xt_mark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry_v0,
-	.me		= THIS_MODULE,
-	.family		= AF_INET,
-	.revision	= 0,
-};
-
-static struct xt_target ipt_mark_reg_v1 = {
-	.name		= "MARK",
-	.target		= target_v1,
-	.targetsize	= sizeof(struct xt_mark_target_info_v1),
-	.table		= "mangle",
-	.checkentry	= checkentry_v1,
-	.me		= THIS_MODULE,
-	.family		= AF_INET,
-	.revision	= 1,
-};
-
-static struct xt_target ip6t_mark_reg_v0 = {
-	.name		= "MARK",
-	.target		= target_v0,
-	.targetsize	= sizeof(struct xt_mark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry_v0,
-	.me		= THIS_MODULE,
-	.family		= AF_INET6,
-	.revision	= 0,
+static struct xt_target xt_mark_target[] = {
+	{
+		.name		= "MARK",
+		.family		= AF_INET,
+		.revision	= 0,
+		.checkentry	= checkentry_v0,
+		.target		= target_v0,
+		.targetsize	= sizeof(struct xt_mark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "MARK",
+		.family		= AF_INET,
+		.revision	= 1,
+		.checkentry	= checkentry_v1,
+		.target		= target_v1,
+		.targetsize	= sizeof(struct xt_mark_target_info_v1),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "MARK",
+		.family		= AF_INET6,
+		.revision	= 0,
+		.checkentry	= checkentry_v0,
+		.target		= target_v0,
+		.targetsize	= sizeof(struct xt_mark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_mark_init(void)
 {
-	int err;
-
-	err = xt_register_target(&ipt_mark_reg_v0);
-	if (err)
-		return err;
-
-	err = xt_register_target(&ipt_mark_reg_v1);
-	if (err)
-		xt_unregister_target(&ipt_mark_reg_v0);
-
-	err = xt_register_target(&ip6t_mark_reg_v0);
-	if (err) {
-		xt_unregister_target(&ipt_mark_reg_v0);
-		xt_unregister_target(&ipt_mark_reg_v1);
-	}
-
-	return err;
+	return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
 }
 
 static void __exit xt_mark_fini(void)
 {
-	xt_unregister_target(&ipt_mark_reg_v0);
-	xt_unregister_target(&ipt_mark_reg_v1);
-	xt_unregister_target(&ip6t_mark_reg_v0);
+	xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
 }
 
 module_init(xt_mark_init);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 86ccceb61fdde..7b982283abdbe 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -37,57 +37,39 @@ target(struct sk_buff **pskb,
 	return NF_QUEUE_NR(tinfo->queuenum);
 }
 
-static struct xt_target ipt_NFQ_reg = {
-	.name		= "NFQUEUE",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_NFQ_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_target ip6t_NFQ_reg = {
-	.name		= "NFQUEUE",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_NFQ_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_target arpt_NFQ_reg = {
-	.name		= "NFQUEUE",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_NFQ_info),
-	.family		= NF_ARP,
-	.me		= THIS_MODULE,
+static struct xt_target xt_nfqueue_target[] = {
+	{
+		.name		= "NFQUEUE",
+		.family		= AF_INET,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_NFQ_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "NFQUEUE",
+		.family		= AF_INET6,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_NFQ_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "NFQUEUE",
+		.family		= NF_ARP,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_NFQ_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_nfqueue_init(void)
 {
-	int ret;
-	ret = xt_register_target(&ipt_NFQ_reg);
-	if (ret)
-		return ret;
-	ret = xt_register_target(&ip6t_NFQ_reg);
-	if (ret)
-		goto out_ip;
-	ret = xt_register_target(&arpt_NFQ_reg);
-	if (ret)
-		goto out_ip6;
-
-	return ret;
-out_ip6:
-	xt_unregister_target(&ip6t_NFQ_reg);
-out_ip:
-	xt_unregister_target(&ipt_NFQ_reg);
-
-	return ret;
+	return xt_register_targets(xt_nfqueue_target,
+				   ARRAY_SIZE(xt_nfqueue_target));
 }
 
 static void __exit xt_nfqueue_fini(void)
 {
-	xt_unregister_target(&arpt_NFQ_reg);
-	xt_unregister_target(&ip6t_NFQ_reg);
-	xt_unregister_target(&ipt_NFQ_reg);
+	xt_register_targets(xt_nfqueue_target, ARRAY_SIZE(xt_nfqueue_target));
 }
 
 module_init(xt_nfqueue_init);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 98f4b5363ce8e..cab881d4424ca 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -34,43 +34,32 @@ target(struct sk_buff **pskb,
 	return XT_CONTINUE;
 }
 
-static struct xt_target notrack_reg = {
-	.name		= "NOTRACK",
-	.target		= target,
-	.targetsize	= 0,
-	.table		= "raw",
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_target notrack6_reg = {
-	.name		= "NOTRACK",
-	.target		= target,
-	.targetsize	= 0,
-	.table		= "raw",
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_target xt_notrack_target[] = {
+	{
+		.name		= "NOTRACK",
+		.family		= AF_INET,
+		.target		= target,
+		.table		= "raw",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "NOTRACK",
+		.family		= AF_INET6,
+		.target		= target,
+		.table		= "raw",
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_notrack_init(void)
 {
-	int ret;
-
-	ret = xt_register_target(&notrack_reg);
-	if (ret)
-		return ret;
-
-	ret = xt_register_target(&notrack6_reg);
-	if (ret)
-		xt_unregister_target(&notrack_reg);
-
-	return ret;
+	return xt_register_targets(xt_notrack_target,
+				   ARRAY_SIZE(xt_notrack_target));
 }
 
 static void __exit xt_notrack_fini(void)
 {
-	xt_unregister_target(&notrack6_reg);
-	xt_unregister_target(&notrack_reg);
+	xt_unregister_targets(xt_notrack_target, ARRAY_SIZE(xt_notrack_target));
 }
 
 module_init(xt_notrack_init);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index de9537ad9a7c5..4300988786c9c 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -111,47 +111,36 @@ static int checkentry(const char *tablename, const void *entry,
 	return 1;
 }
 
-static struct xt_target ipt_secmark_reg = {
-	.name		= "SECMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_secmark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-	.family		= AF_INET,
-	.revision	= 0,
-};
-
-static struct xt_target ip6t_secmark_reg = {
-	.name		= "SECMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_secmark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-	.family		= AF_INET6,
-	.revision	= 0,
+static struct xt_target xt_secmark_target = {
+	{
+		.name		= "SECMARK",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_secmark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "SECMARK",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_secmark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_secmark_init(void)
 {
-	int err;
-
-	err = xt_register_target(&ipt_secmark_reg);
-	if (err)
-		return err;
-
-	err = xt_register_target(&ip6t_secmark_reg);
-	if (err)
-		xt_unregister_target(&ipt_secmark_reg);
-
-	return err;
+	return xt_register_targets(xt_secmark_target,
+				   ARRAY_SIZE(xt_secmark_target));
 }
 
 static void __exit xt_secmark_fini(void)
 {
-	xt_unregister_target(&ip6t_secmark_reg);
-	xt_unregister_target(&ipt_secmark_reg);
+	xt_unregister_targets(xt_secmark_target, ARRAY_SIZE(xt_secmark_target));
 }
 
 module_init(xt_secmark_init);
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 197609cb06d7e..7db492d652203 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -29,41 +29,32 @@ match(const struct sk_buff *skb,
 	return 1;
 }
 
-static struct xt_match comment_match = {
-	.name		= "comment",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_comment_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE
-};
-
-static struct xt_match comment6_match = {
-	.name		= "comment",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_comment_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE
+static struct xt_match xt_comment_match[] = {
+	{
+		.name		= "comment",
+		.family		= AF_INET,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_comment_info),
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "comment",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_comment_info),
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_comment_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&comment_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&comment6_match);
-	if (ret)
-		xt_unregister_match(&comment_match);
-
-	return ret;
+	return xt_register_matches(xt_comment_match,
+				   ARRAY_SIZE(xt_comment_match));
 }
 
 static void __exit xt_comment_fini(void)
 {
-	xt_unregister_match(&comment_match);
-	xt_unregister_match(&comment6_match);
+	xt_unregister_matches(xt_comment_match, ARRAY_SIZE(xt_comment_match));
 }
 
 module_init(xt_comment_init);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 1396fe2d07c14..2d49948d3c387 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -143,40 +143,35 @@ static int check(const char *tablename,
 	return 1;
 }
 
-static struct xt_match connbytes_match = {
-	.name		= "connbytes",
-	.match		= match,
-	.checkentry	= check,
-	.matchsize	= sizeof(struct xt_connbytes_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE
-};
-static struct xt_match connbytes6_match = {
-	.name		= "connbytes",
-	.match		= match,
-	.checkentry	= check,
-	.matchsize	= sizeof(struct xt_connbytes_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE
+static struct xt_match xt_connbytes_match = {
+	{
+		.name		= "connbytes",
+		.family		= AF_INET,
+		.checkentry	= check,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_connbytes_info),
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "connbytes",
+		.family		= AF_INET6,
+		.checkentry	= check,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_connbytes_info),
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_connbytes_init(void)
 {
-	int ret;
-	ret = xt_register_match(&connbytes_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&connbytes6_match);
-	if (ret)
-		xt_unregister_match(&connbytes_match);
-	return ret;
+	return xt_register_matches(xt_connbytes_match,
+				   ARRAY_SIZE(xt_connbytes_match));
 }
 
 static void __exit xt_connbytes_fini(void)
 {
-	xt_unregister_match(&connbytes_match);
-	xt_unregister_match(&connbytes6_match);
+	xt_unregister_matches(xt_connbytes_match,
+			      ARRAY_SIZE(xt_connbytes_match));
 }
 
 module_init(xt_connbytes_init);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 56324c8aff0a7..a97b2d455b797 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -82,46 +82,37 @@ destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
 #endif
 }
 
-static struct xt_match connmark_match = {
-	.name		= "connmark",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_connmark_info),
-	.checkentry	= checkentry,
-	.destroy	= destroy,
-	.family		= AF_INET,
-	.me		= THIS_MODULE
-};
-
-static struct xt_match connmark6_match = {
-	.name		= "connmark",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_connmark_info),
-	.checkentry	= checkentry,
-	.destroy	= destroy,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE
+static struct xt_match xt_connmark_match[] = {
+	{
+		.name		= "connmark",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_connmark_info),
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "connmark",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_connmark_info),
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_connmark_init(void)
 {
-	int ret;
-
 	need_conntrack();
-
-	ret = xt_register_match(&connmark_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&connmark6_match);
-	if (ret)
-		xt_unregister_match(&connmark_match);
-	return ret;
+	return xt_register_matches(xt_connmark_match,
+				   ARRAY_SIZE(xt_connmark_match));
 }
 
 static void __exit xt_connmark_fini(void)
 {
-	xt_unregister_match(&connmark6_match);
-	xt_unregister_match(&connmark_match);
+	xt_register_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match));
 }
 
 module_init(xt_connmark_init);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 145489a4c3f29..1540885174ee4 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -241,11 +241,8 @@ static struct xt_match conntrack_match = {
 
 static int __init xt_conntrack_init(void)
 {
-	int ret;
 	need_conntrack();
-	ret = xt_register_match(&conntrack_match);
-
-	return ret;
+	return xt_register_match(&conntrack_match);
 }
 
 static void __exit xt_conntrack_fini(void)
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 2e2f825dad4c3..5ca6f5288f469 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -141,27 +141,26 @@ checkentry(const char *tablename,
 		&& !(info->invflags & ~info->flags);
 }
 
-static struct xt_match dccp_match = 
-{ 
-	.name 		= "dccp",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_dccp_info),
-	.proto		= IPPROTO_DCCP,
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me 		= THIS_MODULE,
+static struct xt_match xt_dccp_match[] = {
+	{
+		.name 		= "dccp",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_dccp_info),
+		.proto		= IPPROTO_DCCP,
+		.me 		= THIS_MODULE,
+	},
+	{
+		.name 		= "dccp",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_dccp_info),
+		.proto		= IPPROTO_DCCP,
+		.me 		= THIS_MODULE,
+	},
 };
-static struct xt_match dccp6_match = 
-{ 
-	.name 		= "dccp",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_dccp_info),
-	.proto		= IPPROTO_DCCP,
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me 		= THIS_MODULE,
-};
-
 
 static int __init xt_dccp_init(void)
 {
@@ -173,27 +172,19 @@ static int __init xt_dccp_init(void)
 	dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
 	if (!dccp_optbuf)
 		return -ENOMEM;
-	ret = xt_register_match(&dccp_match);
+	ret = xt_register_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match));
 	if (ret)
 		goto out_kfree;
-	ret = xt_register_match(&dccp6_match);
-	if (ret)
-		goto out_unreg;
-
 	return ret;
 
-out_unreg:
-	xt_unregister_match(&dccp_match);
 out_kfree:
 	kfree(dccp_optbuf);
-
 	return ret;
 }
 
 static void __exit xt_dccp_fini(void)
 {
-	xt_unregister_match(&dccp6_match);
-	xt_unregister_match(&dccp_match);
+	xt_unregister_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match));
 	kfree(dccp_optbuf);
 }
 
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 82e250d1f007b..d84075c30159b 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -71,42 +71,33 @@ static int checkentry(const char *tablename,
 	return 1;
 }
 
-static struct xt_match dscp_match = {
-	.name		= "dscp",
-	.match		= match,
-	.checkentry	= checkentry,
-	.matchsize	= sizeof(struct xt_dscp_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match dscp6_match = {
-	.name		= "dscp",
-	.match		= match6,
-	.checkentry	= checkentry,
-	.matchsize	= sizeof(struct xt_dscp_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_dscp_match[] = {
+	{
+		.name		= "dscp",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_dscp_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "dscp",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match6,
+		.matchsize	= sizeof(struct xt_dscp_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_dscp_match_init(void)
 {
-	int ret;
-	ret = xt_register_match(&dscp_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&dscp6_match);
-	if (ret)
-		xt_unregister_match(&dscp_match);
-
-	return ret;
+	return xt_register_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match));
 }
 
 static void __exit xt_dscp_match_fini(void)
 {
-	xt_unregister_match(&dscp_match);
-	xt_unregister_match(&dscp6_match);
+	xt_unregister_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match));
 }
 
 module_init(xt_dscp_match_init);
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 9dad6281e0c10..7b19bc9ea205c 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -92,44 +92,35 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct xt_match esp_match = {
-	.name		= "esp",
-	.family		= AF_INET,
-	.proto		= IPPROTO_ESP,
-	.match		= &match,
-	.matchsize	= sizeof(struct xt_esp),
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match esp6_match = {
-	.name		= "esp",
-	.family		= AF_INET6,
-	.proto		= IPPROTO_ESP,
-	.match		= &match,
-	.matchsize	= sizeof(struct xt_esp),
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
+static struct xt_match xt_esp_match[] = {
+	{
+		.name		= "esp",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_esp),
+		.proto		= IPPROTO_ESP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "esp",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_esp),
+		.proto		= IPPROTO_ESP,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_esp_init(void)
 {
-	int ret;
-	ret = xt_register_match(&esp_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&esp6_match);
-	if (ret)
-		xt_unregister_match(&esp_match);
-
-	return ret;
+	return xt_register_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match));
 }
 
 static void __exit xt_esp_cleanup(void)
 {
-	xt_unregister_match(&esp_match);
-	xt_unregister_match(&esp6_match);
+	xt_unregister_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match));
 }
 
 module_init(xt_esp_init);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 799c2a43e3b90..db453a7a154eb 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -163,45 +163,37 @@ destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
 #endif
 }
 
-static struct xt_match helper_match = {
-	.name		= "helper",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_helper_info),
-	.checkentry	= check,
-	.destroy	= destroy,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-static struct xt_match helper6_match = {
-	.name		= "helper",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_helper_info),
-	.checkentry	= check,
-	.destroy	= destroy,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_helper_match[] = {
+	{
+		.name		= "helper",
+		.family		= AF_INET,
+		.checkentry	= check,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_helper_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "helper",
+		.family		= AF_INET6,
+		.checkentry	= check,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_helper_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_helper_init(void)
 {
-	int ret;
 	need_conntrack();
-
-	ret = xt_register_match(&helper_match);
-	if (ret < 0)
-		return ret;
-
-	ret = xt_register_match(&helper6_match);
-	if (ret < 0)
-		xt_unregister_match(&helper_match);
-
-	return ret;
+	return xt_register_matches(xt_helper_match,
+				   ARRAY_SIZE(xt_helper_match));
 }
 
 static void __exit xt_helper_fini(void)
 {
-	xt_unregister_match(&helper_match);
-	xt_unregister_match(&helper6_match);
+	xt_unregister_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match));
 }
 
 module_init(xt_helper_init);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 109132c9a1462..67fd30d9f3032 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -52,39 +52,32 @@ match6(const struct sk_buff *skb,
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
 
-static struct xt_match length_match = {
-	.name		= "length",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_length_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match length6_match = {
-	.name		= "length",
-	.match		= match6,
-	.matchsize	= sizeof(struct xt_length_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_length_match[] = {
+	{
+		.name		= "length",
+		.family		= AF_INET,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_length_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "length",
+		.family		= AF_INET6,
+		.match		= match6,
+		.matchsize	= sizeof(struct xt_length_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_length_init(void)
 {
-	int ret;
-	ret = xt_register_match(&length_match);
-	if (ret)
-		return ret;
-	ret = xt_register_match(&length6_match);
-	if (ret)
-		xt_unregister_match(&length_match);
-
-	return ret;
+	return xt_register_matches(xt_length_match,
+				   ARRAY_SIZE(xt_length_match));
 }
 
 static void __exit xt_length_fini(void)
 {
-	xt_unregister_match(&length_match);
-	xt_unregister_match(&length6_match);
+	xt_unregister_matches(xt_length_match, ARRAY_SIZE(xt_length_match));
 }
 
 module_init(xt_length_init);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index ce7fdb7e4e07c..e8d5e7ac695ab 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -136,42 +136,33 @@ ipt_limit_checkentry(const char *tablename,
 	return 1;
 }
 
-static struct xt_match ipt_limit_reg = {
-	.name		= "limit",
-	.match		= ipt_limit_match,
-	.matchsize	= sizeof(struct xt_rateinfo),
-	.checkentry	= ipt_limit_checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-static struct xt_match limit6_reg = {
-	.name		= "limit",
-	.match		= ipt_limit_match,
-	.matchsize	= sizeof(struct xt_rateinfo),
-	.checkentry	= ipt_limit_checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_limit_match[] = {
+	{
+		.name		= "limit",
+		.family		= AF_INET,
+		.checkentry	= ipt_limit_checkentry,
+		.match		= ipt_limit_match,
+		.matchsize	= sizeof(struct xt_rateinfo),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "limit",
+		.family		= AF_INET6,
+		.checkentry	= ipt_limit_checkentry,
+		.match		= ipt_limit_match,
+		.matchsize	= sizeof(struct xt_rateinfo),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_limit_init(void)
 {
-	int ret;
-	
-	ret = xt_register_match(&ipt_limit_reg);
-	if (ret)
-		return ret;
-	
-	ret = xt_register_match(&limit6_reg);
-	if (ret)
-		xt_unregister_match(&ipt_limit_reg);
-
-	return ret;
+	return xt_register_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match));
 }
 
 static void __exit xt_limit_fini(void)
 {
-	xt_unregister_match(&ipt_limit_reg);
-	xt_unregister_match(&limit6_reg);
+	xt_unregister_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match));
 }
 
 module_init(xt_limit_init);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 356290ffe386e..425fc21e31f54 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -43,43 +43,37 @@ match(const struct sk_buff *skb,
 		^ info->invert));
 }
 
-static struct xt_match mac_match = {
-	.name		= "mac",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_mac_info),
-	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) |
-			  (1 << NF_IP_FORWARD),
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-static struct xt_match mac6_match = {
-	.name		= "mac",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_mac_info),
-	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) |
-			  (1 << NF_IP_FORWARD),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_mac_match[] = {
+	{
+		.name		= "mac",
+		.family		= AF_INET,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_mac_info),
+		.hooks		= (1 << NF_IP_PRE_ROUTING) |
+				  (1 << NF_IP_LOCAL_IN) |
+				  (1 << NF_IP_FORWARD),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "mac",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_mac_info),
+		.hooks		= (1 << NF_IP_PRE_ROUTING) |
+				  (1 << NF_IP_LOCAL_IN) |
+				  (1 << NF_IP_FORWARD),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_mac_init(void)
 {
-	int ret;
-	ret = xt_register_match(&mac_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&mac6_match);
-	if (ret)
-		xt_unregister_match(&mac_match);
-
-	return ret;
+	return xt_register_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match));
 }
 
 static void __exit xt_mac_fini(void)
 {
-	xt_unregister_match(&mac_match);
-	xt_unregister_match(&mac6_match);
+	xt_unregister_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match));
 }
 
 module_init(xt_mac_init);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 876bc57977381..39f9b079f5d4a 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -51,42 +51,33 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct xt_match mark_match = {
-	.name		= "mark",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_mark_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match mark6_match = {
-	.name		= "mark",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_mark_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_mark_match[] = {
+	{
+		.name		= "mark",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_mark_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "mark",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_mark_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_mark_init(void)
 {
-	int ret;
-	ret = xt_register_match(&mark_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&mark6_match);
-	if (ret)
-		xt_unregister_match(&mark_match);
-
-	return ret;
+	return xt_register_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match));
 }
 
 static void __exit xt_mark_fini(void)
 {
-	xt_unregister_match(&mark_match);
-	xt_unregister_match(&mark6_match);
+	xt_unregister_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match));
 }
 
 module_init(xt_mark_init);
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 1ff0a25396e75..e74f9bb98b3c6 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -231,84 +231,55 @@ checkentry6_v1(const char *tablename,
 		     multiinfo->count);
 }
 
-static struct xt_match multiport_match = {
-	.name		= "multiport",
-	.revision	= 0,
-	.matchsize	= sizeof(struct xt_multiport),
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match multiport_match_v1 = {
-	.name		= "multiport",
-	.revision	= 1,
-	.matchsize	= sizeof(struct xt_multiport_v1),
-	.match		= &match_v1,
-	.checkentry	= &checkentry_v1,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match multiport6_match = {
-	.name		= "multiport",
-	.revision	= 0,
-	.matchsize	= sizeof(struct xt_multiport),
-	.match		= &match,
-	.checkentry	= &checkentry6,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match multiport6_match_v1 = {
-	.name		= "multiport",
-	.revision	= 1,
-	.matchsize	= sizeof(struct xt_multiport_v1),
-	.match		= &match_v1,
-	.checkentry	= &checkentry6_v1,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_multiport_match[] = {
+	{
+		.name		= "multiport",
+		.family		= AF_INET,
+		.revision	= 0,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_multiport),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "multiport",
+		.family		= AF_INET,
+		.revision	= 1,
+		.checkentry	= checkentry_v1,
+		.match		= match_v1,
+		.matchsize	= sizeof(struct xt_multiport_v1),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "multiport",
+		.family		= AF_INET6,
+		.revision	= 0,
+		.checkentry	= checkentry6,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_multiport),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "multiport",
+		.family		= AF_INET6,
+		.revision	= 1,
+		.checkentry	= checkentry6_v1,
+		.match		= match_v1,
+		.matchsize	= sizeof(struct xt_multiport_v1),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_multiport_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&multiport_match);
-	if (ret)
-		goto out;
-
-	ret = xt_register_match(&multiport_match_v1);
-	if (ret)
-		goto out_unreg_multi_v0;
-
-	ret = xt_register_match(&multiport6_match);
-	if (ret)
-		goto out_unreg_multi_v1;
-
-	ret = xt_register_match(&multiport6_match_v1);
-	if (ret)
-		goto out_unreg_multi6_v0;
-
-	return ret;
-
-out_unreg_multi6_v0:
-	xt_unregister_match(&multiport6_match);
-out_unreg_multi_v1:
-	xt_unregister_match(&multiport_match_v1);
-out_unreg_multi_v0:
-	xt_unregister_match(&multiport_match);
-out:
-	return ret;
+	return xt_register_matches(xt_multiport_match,
+				   ARRAY_SIZE(xt_multiport_match));
 }
 
 static void __exit xt_multiport_fini(void)
 {
-	xt_unregister_match(&multiport_match);
-	xt_unregister_match(&multiport_match_v1);
-	xt_unregister_match(&multiport6_match);
-	xt_unregister_match(&multiport6_match_v1);
+	xt_unregister_matches(xt_multiport_match,
+			      ARRAY_SIZE(xt_multiport_match));
 }
 
 module_init(xt_multiport_init);
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 63a9654674657..af3d70f96ecdf 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -132,43 +132,34 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct xt_match physdev_match = {
-	.name		= "physdev",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_physdev_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match physdev6_match = {
-	.name		= "physdev",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_physdev_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_physdev_match[] = {
+	{
+		.name		= "physdev",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_physdev_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "physdev",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_physdev_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_physdev_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&physdev_match);
-	if (ret < 0)
-		return ret;
-
-	ret = xt_register_match(&physdev6_match);
-	if (ret < 0)
-		xt_unregister_match(&physdev_match);
-
-	return ret;
+	return xt_register_matches(xt_physdev_match,
+				   ARRAY_SIZE(xt_physdev_match));
 }
 
 static void __exit xt_physdev_fini(void)
 {
-	xt_unregister_match(&physdev_match);
-	xt_unregister_match(&physdev6_match);
+	xt_unregister_matches(xt_physdev_match, ARRAY_SIZE(xt_physdev_match));
 }
 
 module_init(xt_physdev_init);
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index d2f5320a80bf6..16e7b08042876 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -43,40 +43,32 @@ static int match(const struct sk_buff *skb,
 	return (type == info->pkttype) ^ info->invert;
 }
 
-static struct xt_match pkttype_match = {
-	.name		= "pkttype",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_pkttype_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match pkttype6_match = {
-	.name		= "pkttype",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_pkttype_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_pkttype_match[] = {
+	{
+		.name		= "pkttype",
+		.family		= AF_INET,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_pkttype_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "pkttype",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_pkttype_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_pkttype_init(void)
 {
-	int ret;
-	ret = xt_register_match(&pkttype_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&pkttype6_match);
-	if (ret)
-		xt_unregister_match(&pkttype_match);
-
-	return ret;
+	return xt_register_matches(xt_pkttype_match,
+				   ARRAY_SIZE(xt_pkttype_match));
 }
 
 static void __exit xt_pkttype_fini(void)
 {
-	xt_unregister_match(&pkttype_match);
-	xt_unregister_match(&pkttype6_match);
+	xt_unregister_matches(xt_pkttype_match, ARRAY_SIZE(xt_pkttype_match));
 }
 
 module_init(xt_pkttype_init);
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index ba1ca03abad32..f5639c451112c 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -165,43 +165,36 @@ static int checkentry(const char *tablename, const void *ip_void,
 	return 1;
 }
 
-static struct xt_match policy_match = {
-	.name		= "policy",
-	.family		= AF_INET,
-	.match		= match,
-	.matchsize	= sizeof(struct xt_policy_info),
-	.checkentry 	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match policy6_match = {
-	.name		= "policy",
-	.family		= AF_INET6,
-	.match		= match,
-	.matchsize	= sizeof(struct xt_policy_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_policy_match[] = {
+	{
+		.name		= "policy",
+		.family		= AF_INET,
+		.checkentry 	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_policy_info),
+		.family		= AF_INET,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "policy",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_policy_info),
+		.family		= AF_INET6,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&policy_match);
-	if (ret)
-		return ret;
-	ret = xt_register_match(&policy6_match);
-	if (ret)
-		xt_unregister_match(&policy_match);
-	return ret;
+	return xt_register_matches(xt_policy_match,
+				   ARRAY_SIZE(xt_policy_match));
 }
 
 static void __exit fini(void)
 {
-	xt_unregister_match(&policy6_match);
-	xt_unregister_match(&policy_match);
+	xt_unregister_matches(xt_policy_match, ARRAY_SIZE(xt_policy_match));
 }
 
 module_init(init);
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index be8d3c26b5682..cc44f87cb8e68 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -52,46 +52,33 @@ checkentry(const char *tablename, const void *entry,
 	return 1;
 }
 
-static struct xt_match quota_match = {
-	.name		= "quota",
-	.family		= AF_INET,
-	.match		= match,
-	.matchsize	= sizeof(struct xt_quota_info),
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE
-};
-
-static struct xt_match quota_match6 = {
-	.name		= "quota",
-	.family		= AF_INET6,
-	.match		= match,
-	.matchsize	= sizeof(struct xt_quota_info),
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE
+static struct xt_match xt_quota_match[] = {
+	{
+		.name		= "quota",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_quota_info),
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "quota",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_quota_info),
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_quota_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&quota_match);
-	if (ret)
-		goto err1;
-	ret = xt_register_match(&quota_match6);
-	if (ret)
-		goto err2;
-	return ret;
-
-err2:
-	xt_unregister_match(&quota_match);
-err1:
-	return ret;
+	return xt_register_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match));
 }
 
 static void __exit xt_quota_fini(void)
 {
-	xt_unregister_match(&quota_match6);
-	xt_unregister_match(&quota_match);
+	xt_unregister_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match));
 }
 
 module_init(xt_quota_init);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 843383e01d415..5628621170e6b 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -178,44 +178,35 @@ checkentry(const char *tablename,
 				| SCTP_CHUNK_MATCH_ONLY)));
 }
 
-static struct xt_match sctp_match = {
-	.name		= "sctp",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_sctp_info),
-	.proto		= IPPROTO_SCTP,
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE
-};
-
-static struct xt_match sctp6_match = {
-	.name		= "sctp",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_sctp_info),
-	.proto		= IPPROTO_SCTP,
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE
+static struct xt_match xt_sctp_match[] = {
+	{
+		.name		= "sctp",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_sctp_info),
+		.proto		= IPPROTO_SCTP,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "sctp",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_sctp_info),
+		.proto		= IPPROTO_SCTP,
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_sctp_init(void)
 {
-	int ret;
-	ret = xt_register_match(&sctp_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&sctp6_match);
-	if (ret)
-		xt_unregister_match(&sctp_match);
-
-	return ret;
+	return xt_register_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match));
 }
 
 static void __exit xt_sctp_fini(void)
 {
-	xt_unregister_match(&sctp6_match);
-	xt_unregister_match(&sctp_match);
+	xt_unregister_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match));
 }
 
 module_init(xt_sctp_init);
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index f9e304dc4504e..5f9492e3b2b1c 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -69,47 +69,36 @@ destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
 #endif
 }
 
-static struct xt_match state_match = {
-	.name		= "state",
-	.match		= match,
-	.checkentry	= check,
-	.destroy	= destroy,
-	.matchsize	= sizeof(struct xt_state_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match state6_match = {
-	.name		= "state",
-	.match		= match,
-	.checkentry	= check,
-	.destroy	= destroy,
-	.matchsize	= sizeof(struct xt_state_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_state_match[] = {
+	{
+		.name		= "state",
+		.family		= AF_INET,
+		.checkentry	= check,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_state_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "state",
+		.family		= AF_INET6,
+		.checkentry	= check,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_state_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_state_init(void)
 {
-	int ret;
-
 	need_conntrack();
-
-	ret = xt_register_match(&state_match);
-	if (ret < 0)
-		return ret;
-
-	ret = xt_register_match(&state6_match);
-	if (ret < 0)
-		xt_unregister_match(&state_match);
-
-	return ret;
+	return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match));
 }
 
 static void __exit xt_state_fini(void)
 {
-	xt_unregister_match(&state_match);
-	xt_unregister_match(&state6_match);
+	xt_unregister_matches(xt_state_match, ARRAY_SIZE(xt_state_match));
 }
 
 module_init(xt_state_init);
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index de1037f58596d..5181630a87fc9 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -66,46 +66,35 @@ checkentry(const char *tablename, const void *entry,
 	return 1;
 }
 
-static struct xt_match statistic_match = {
-	.name		= "statistic",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_statistic_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match statistic_match6 = {
-	.name		= "statistic",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_statistic_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_statistic_match[] = {
+	{
+		.name		= "statistic",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_statistic_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "statistic",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_statistic_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_statistic_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&statistic_match);
-	if (ret)
-		goto err1;
-
-	ret = xt_register_match(&statistic_match6);
-	if (ret)
-		goto err2;
-	return ret;
-err2:
-	xt_unregister_match(&statistic_match);
-err1:
-	return ret;
+	return xt_register_matches(xt_statistic_match,
+				   ARRAY_SIZE(xt_statistic_match));
 }
 
 static void __exit xt_statistic_fini(void)
 {
-	xt_unregister_match(&statistic_match6);
-	xt_unregister_match(&statistic_match);
+	xt_unregister_matches(xt_statistic_match,
+			      ARRAY_SIZE(xt_statistic_match));
 }
 
 module_init(xt_statistic_init);
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 275330fcdaaab..1a1c1d17d85e0 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -75,43 +75,35 @@ static void destroy(const struct xt_match *match, void *matchinfo,
 	textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
 }
 
-static struct xt_match string_match = {
-	.name 		= "string",
-	.match 		= match,
-	.matchsize	= sizeof(struct xt_string_info),
-	.checkentry	= checkentry,
-	.destroy 	= destroy,
-	.family		= AF_INET,
-	.me 		= THIS_MODULE
-};
-static struct xt_match string6_match = {
-	.name 		= "string",
-	.match 		= match,
-	.matchsize	= sizeof(struct xt_string_info),
-	.checkentry	= checkentry,
-	.destroy 	= destroy,
-	.family		= AF_INET6,
-	.me 		= THIS_MODULE
+static struct xt_match xt_string_match[] = {
+	{
+		.name 		= "string",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match 		= match,
+		.destroy 	= destroy,
+		.matchsize	= sizeof(struct xt_string_info),
+		.me 		= THIS_MODULE
+	},
+	{
+		.name 		= "string",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match 		= match,
+		.destroy 	= destroy,
+		.matchsize	= sizeof(struct xt_string_info),
+		.me 		= THIS_MODULE
+	},
 };
 
 static int __init xt_string_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&string_match);
-	if (ret)
-		return ret;
-	ret = xt_register_match(&string6_match);
-	if (ret)
-		xt_unregister_match(&string_match);
-
-	return ret;
+	return xt_register_matches(xt_string_match, ARRAY_SIZE(xt_string_match));
 }
 
 static void __exit xt_string_fini(void)
 {
-	xt_unregister_match(&string_match);
-	xt_unregister_match(&string6_match);
+	xt_unregister_matches(xt_string_match, ARRAY_SIZE(xt_string_match));
 }
 
 module_init(xt_string_init);
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index cf7d335cadcd1..7baa9ebc46c14 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -93,43 +93,34 @@ match(const struct sk_buff *skb,
 			       info->invert, hotdrop);
 }
 
-static struct xt_match tcpmss_match = {
-	.name		= "tcpmss",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_tcpmss_match_info),
-	.proto		= IPPROTO_TCP,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
+static struct xt_match xt_tcpmss_match[] = {
+	{
+		.name		= "tcpmss",
+		.family		= AF_INET,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_tcpmss_match_info),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "tcpmss",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_tcpmss_match_info),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
 };
 
-static struct xt_match tcpmss6_match = {
-	.name		= "tcpmss",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_tcpmss_match_info),
-	.proto		= IPPROTO_TCP,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
-};
-
-
 static int __init xt_tcpmss_init(void)
 {
-	int ret;
-	ret = xt_register_match(&tcpmss_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&tcpmss6_match);
-	if (ret)
-		xt_unregister_match(&tcpmss_match);
-
-	return ret;
+	return xt_register_matches(xt_tcpmss_match,
+				   ARRAY_SIZE(xt_tcpmss_match));
 }
 
 static void __exit xt_tcpmss_fini(void)
 {
-	xt_unregister_match(&tcpmss6_match);
-	xt_unregister_match(&tcpmss_match);
+	xt_unregister_matches(xt_tcpmss_match, ARRAY_SIZE(xt_tcpmss_match));
 }
 
 module_init(xt_tcpmss_init);
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index a9a63aa689369..54aab051af869 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -199,81 +199,54 @@ udp_checkentry(const char *tablename,
 	return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
 }
 
-static struct xt_match tcp_matchstruct = {
-	.name		= "tcp",
-	.match		= tcp_match,
-	.matchsize	= sizeof(struct xt_tcp),
-	.proto		= IPPROTO_TCP,
-	.family		= AF_INET,
-	.checkentry	= tcp_checkentry,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match tcp6_matchstruct = {
-	.name		= "tcp",
-	.match		= tcp_match,
-	.matchsize	= sizeof(struct xt_tcp),
-	.proto		= IPPROTO_TCP,
-	.family		= AF_INET6,
-	.checkentry	= tcp_checkentry,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match udp_matchstruct = {
-	.name		= "udp",
-	.match		= udp_match,
-	.matchsize	= sizeof(struct xt_udp),
-	.proto		= IPPROTO_UDP,
-	.family		= AF_INET,
-	.checkentry	= udp_checkentry,
-	.me		= THIS_MODULE,
-};
-static struct xt_match udp6_matchstruct = {
-	.name		= "udp",
-	.match		= udp_match,
-	.matchsize	= sizeof(struct xt_udp),
-	.proto		= IPPROTO_UDP,
-	.family		= AF_INET6,
-	.checkentry	= udp_checkentry,
-	.me		= THIS_MODULE,
+static struct xt_match xt_tcpudp_match[] = {
+	{
+		.name		= "tcp",
+		.family		= AF_INET,
+		.checkentry	= tcp_checkentry,
+		.match		= tcp_match,
+		.matchsize	= sizeof(struct xt_tcp),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "tcp",
+		.family		= AF_INET6,
+		.checkentry	= tcp_checkentry,
+		.match		= tcp_match,
+		.matchsize	= sizeof(struct xt_tcp),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "udp",
+		.family		= AF_INET,
+		.checkentry	= udp_checkentry,
+		.match		= udp_match,
+		.matchsize	= sizeof(struct xt_udp),
+		.proto		= IPPROTO_UDP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "udp",
+		.family		= AF_INET6,
+		.checkentry	= udp_checkentry,
+		.match		= udp_match,
+		.matchsize	= sizeof(struct xt_udp),
+		.proto		= IPPROTO_UDP,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_tcpudp_init(void)
 {
-	int ret;
-	ret = xt_register_match(&tcp_matchstruct);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&tcp6_matchstruct);
-	if (ret)
-		goto out_unreg_tcp;
-
-	ret = xt_register_match(&udp_matchstruct);
-	if (ret)
-		goto out_unreg_tcp6;
-	
-	ret = xt_register_match(&udp6_matchstruct);
-	if (ret)
-		goto out_unreg_udp;
-
-	return ret;
-
-out_unreg_udp:
-	xt_unregister_match(&udp_matchstruct);
-out_unreg_tcp6:
-	xt_unregister_match(&tcp6_matchstruct);
-out_unreg_tcp:
-	xt_unregister_match(&tcp_matchstruct);
-	return ret;
+	return xt_register_matches(xt_tcpudp_match,
+				   ARRAY_SIZE(xt_tcpudp_match));
 }
 
 static void __exit xt_tcpudp_fini(void)
 {
-	xt_unregister_match(&udp6_matchstruct);
-	xt_unregister_match(&udp_matchstruct);
-	xt_unregister_match(&tcp6_matchstruct);
-	xt_unregister_match(&tcp_matchstruct);
+	xt_unregister_matches(xt_tcpudp_match, ARRAY_SIZE(xt_tcpudp_match));
 }
 
 module_init(xt_tcpudp_init);
-- 
GitLab


From fe1cb10873b44cf89082465823ee6d4d4ac63ad7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Aug 2006 00:35:47 -0700
Subject: [PATCH 0515/1063] [NETFILTER]: x_tables: remove unused argument to
 target functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h        | 3 +--
 include/linux/netfilter_arp/arp_tables.h  | 3 +--
 include/linux/netfilter_ipv4/ip_tables.h  | 3 +--
 include/linux/netfilter_ipv6/ip6_tables.h | 3 +--
 net/ipv4/netfilter/arp_tables.c           | 9 +++------
 net/ipv4/netfilter/arpt_mangle.c          | 2 +-
 net/ipv4/netfilter/arptable_filter.c      | 2 +-
 net/ipv4/netfilter/ip_nat_rule.c          | 8 +++-----
 net/ipv4/netfilter/ip_tables.c            | 9 +++------
 net/ipv4/netfilter/ipt_CLUSTERIP.c        | 3 +--
 net/ipv4/netfilter/ipt_ECN.c              | 3 +--
 net/ipv4/netfilter/ipt_LOG.c              | 3 +--
 net/ipv4/netfilter/ipt_MASQUERADE.c       | 3 +--
 net/ipv4/netfilter/ipt_NETMAP.c           | 3 +--
 net/ipv4/netfilter/ipt_REDIRECT.c         | 3 +--
 net/ipv4/netfilter/ipt_REJECT.c           | 3 +--
 net/ipv4/netfilter/ipt_SAME.c             | 3 +--
 net/ipv4/netfilter/ipt_TCPMSS.c           | 3 +--
 net/ipv4/netfilter/ipt_TOS.c              | 3 +--
 net/ipv4/netfilter/ipt_TTL.c              | 2 +-
 net/ipv4/netfilter/ipt_ULOG.c             | 2 +-
 net/ipv4/netfilter/iptable_filter.c       | 4 ++--
 net/ipv4/netfilter/iptable_mangle.c       | 4 ++--
 net/ipv4/netfilter/iptable_raw.c          | 2 +-
 net/ipv6/netfilter/ip6_tables.c           | 9 +++------
 net/ipv6/netfilter/ip6t_HL.c              | 2 +-
 net/ipv6/netfilter/ip6t_LOG.c             | 3 +--
 net/ipv6/netfilter/ip6t_REJECT.c          | 3 +--
 net/ipv6/netfilter/ip6table_filter.c      | 4 ++--
 net/ipv6/netfilter/ip6table_mangle.c      | 4 ++--
 net/ipv6/netfilter/ip6table_raw.c         | 2 +-
 net/netfilter/xt_CLASSIFY.c               | 3 +--
 net/netfilter/xt_CONNMARK.c               | 3 +--
 net/netfilter/xt_CONNSECMARK.c            | 2 +-
 net/netfilter/xt_DSCP.c                   | 6 ++----
 net/netfilter/xt_MARK.c                   | 6 ++----
 net/netfilter/xt_NFQUEUE.c                | 3 +--
 net/netfilter/xt_NOTRACK.c                | 3 +--
 net/netfilter/xt_SECMARK.c                | 2 +-
 net/netfilter/xt_connbytes.c              | 2 +-
 net/sched/act_ipt.c                       | 2 +-
 41 files changed, 55 insertions(+), 90 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 9a9912430e3ae..9cef0e91542b8 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -211,8 +211,7 @@ struct xt_target
 			       const struct net_device *out,
 			       unsigned int hooknum,
 			       const struct xt_target *target,
-			       const void *targinfo,
-			       void *userdata);
+			       const void *targinfo);
 
 	/* Called when user tries to insert an entry of this type:
            hook_mask is a bitmask of hooks from which it can be
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 62cc27daca4ef..149e87c9ab136 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -248,8 +248,7 @@ extern unsigned int arpt_do_table(struct sk_buff **pskb,
 				  unsigned int hook,
 				  const struct net_device *in,
 				  const struct net_device *out,
-				  struct arpt_table *table,
-				  void *userdata);
+				  struct arpt_table *table);
 
 #define ARPT_ALIGN(s) (((s) + (__alignof__(struct arpt_entry)-1)) & ~(__alignof__(struct arpt_entry)-1))
 #endif /*__KERNEL__*/
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index c0dac16e1902d..a536bbdef1455 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -312,8 +312,7 @@ extern unsigned int ipt_do_table(struct sk_buff **pskb,
 				 unsigned int hook,
 				 const struct net_device *in,
 				 const struct net_device *out,
-				 struct ipt_table *table,
-				 void *userdata);
+				 struct ipt_table *table);
 
 #define IPT_ALIGN(s) XT_ALIGN(s)
 
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index d0d5d1ee4be31..d7a8e9c0dad06 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -300,8 +300,7 @@ extern unsigned int ip6t_do_table(struct sk_buff **pskb,
 				  unsigned int hook,
 				  const struct net_device *in,
 				  const struct net_device *out,
-				  struct ip6t_table *table,
-				  void *userdata);
+				  struct ip6t_table *table);
 
 /* Check for an extension */
 extern int ip6t_ext_hdr(u8 nexthdr);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8d1d7a6e72a56..c6bd270bf46a2 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -208,8 +208,7 @@ static unsigned int arpt_error(struct sk_buff **pskb,
 			       const struct net_device *out,
 			       unsigned int hooknum,
 			       const struct xt_target *target,
-			       const void *targinfo,
-			       void *userinfo)
+			       const void *targinfo)
 {
 	if (net_ratelimit())
 		printk("arp_tables: error: '%s'\n", (char *)targinfo);
@@ -226,8 +225,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 			   unsigned int hook,
 			   const struct net_device *in,
 			   const struct net_device *out,
-			   struct arpt_table *table,
-			   void *userdata)
+			   struct arpt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ];
 	unsigned int verdict = NF_DROP;
@@ -302,8 +300,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
-								     t->data,
-								     userdata);
+								     t->data);
 
 				/* Target might have changed stuff. */
 				arp = (*pskb)->nh.arph;
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index a58325c1ceb94..05fb2421bb26a 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -11,7 +11,7 @@ static unsigned int
 target(struct sk_buff **pskb,
        const struct net_device *in, const struct net_device *out,
        unsigned int hooknum, const struct xt_target *target,
-       const void *targinfo, void *userinfo)
+       const void *targinfo)
 {
 	const struct arpt_mangle *mangle = targinfo;
 	struct arphdr *arp;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index d7c472faa53b9..7edea2a1696c0 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -155,7 +155,7 @@ static unsigned int arpt_hook(unsigned int hook,
 			      const struct net_device *out,
 			      int (*okfn)(struct sk_buff *))
 {
-	return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return arpt_do_table(pskb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops arpt_ops[] = {
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 1aba926c1cb05..1aa0e4f462a51 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -104,8 +104,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
 				    const struct net_device *out,
 				    unsigned int hooknum,
 				    const struct ipt_target *target,
-				    const void *targinfo,
-				    void *userinfo)
+				    const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
@@ -147,8 +146,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
 				    const struct net_device *out,
 				    unsigned int hooknum,
 				    const struct ipt_target *target,
-				    const void *targinfo,
-				    void *userinfo)
+				    const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
@@ -255,7 +253,7 @@ int ip_nat_rule_find(struct sk_buff **pskb,
 {
 	int ret;
 
-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
+	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
 
 	if (ret == NF_ACCEPT) {
 		if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 048514f15f2ff..8ce5b6f76447f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -180,8 +180,7 @@ ipt_error(struct sk_buff **pskb,
 	  const struct net_device *out,
 	  unsigned int hooknum,
 	  const struct xt_target *target,
-	  const void *targinfo,
-	  void *userinfo)
+	  const void *targinfo)
 {
 	if (net_ratelimit())
 		printk("ip_tables: error: `%s'\n", (char *)targinfo);
@@ -217,8 +216,7 @@ ipt_do_table(struct sk_buff **pskb,
 	     unsigned int hook,
 	     const struct net_device *in,
 	     const struct net_device *out,
-	     struct ipt_table *table,
-	     void *userdata)
+	     struct ipt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	u_int16_t offset;
@@ -308,8 +306,7 @@ ipt_do_table(struct sk_buff **pskb,
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
-								     t->data,
-								     userdata);
+								     t->data);
 
 #ifdef CONFIG_NETFILTER_DEBUG
 				if (((struct ipt_entry *)table_base)->comefrom
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index d994c5f5744ce..a08383cf9e7a3 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -302,8 +302,7 @@ target(struct sk_buff **pskb,
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 7e30e6d2b5da3..1c3da4a48e5fa 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -85,8 +85,7 @@ target(struct sk_buff **pskb,
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct ipt_ECN_info *einfo = targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b98f7b08b0845..a8d356c6191ff 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -416,8 +416,7 @@ ipt_log_target(struct sk_buff **pskb,
 	       const struct net_device *out,
 	       unsigned int hooknum,
 	       const struct xt_target *target,
-	       const void *targinfo,
-	       void *userinfo)
+	       const void *targinfo)
 {
 	const struct ipt_log_info *loginfo = targinfo;
 	struct nf_loginfo li;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ebd94f2abf0d4..9659793c66c0a 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -64,8 +64,7 @@ masquerade_target(struct sk_buff **pskb,
 		  const struct net_device *out,
 		  unsigned int hooknum,
 		  const struct xt_target *target,
-		  const void *targinfo,
-		  void *userinfo)
+		  const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 736c4b5a86a79..fd5e74a19fb55 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -55,8 +55,7 @@ target(struct sk_buff **pskb,
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f290463232de4..839fe99f71d42 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -58,8 +58,7 @@ redirect_target(struct sk_buff **pskb,
 		const struct net_device *out,
 		unsigned int hooknum,
 		const struct xt_target *target,
-		const void *targinfo,
-		void *userinfo)
+		const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 95c6662b663c5..1dfd8e56be8b7 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -228,8 +228,7 @@ static unsigned int reject(struct sk_buff **pskb,
 			   const struct net_device *out,
 			   unsigned int hooknum,
 			   const struct xt_target *target,
-			   const void *targinfo,
-			   void *userinfo)
+			   const void *targinfo)
 {
 	const struct ipt_reject_info *reject = targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 7169b09b5a675..cf801749490f7 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -133,8 +133,7 @@ same_target(struct sk_buff **pskb,
 		const struct net_device *out,
 		unsigned int hooknum,
 		const struct xt_target *target,
-		const void *targinfo,
-		void *userinfo)
+		const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 0fce85e055071..6d668dcfc22a1 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -41,8 +41,7 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 		  const struct net_device *out,
 		  unsigned int hooknum,
 		  const struct xt_target *target,
-		  const void *targinfo,
-		  void *userinfo)
+		  const void *targinfo)
 {
 	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
 	struct tcphdr *tcph;
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 52e9d705d48ec..043df0137084f 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -26,8 +26,7 @@ target(struct sk_buff **pskb,
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct ipt_tos_target_info *tosinfo = targinfo;
 	struct iphdr *iph = (*pskb)->nh.iph;
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 2afb2a8aa8c5c..164007107b5e0 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -23,7 +23,7 @@ static unsigned int
 ipt_ttl_target(struct sk_buff **pskb,
 	       const struct net_device *in, const struct net_device *out,
 	       unsigned int hooknum, const struct xt_target *target,
-	       const void *targinfo, void *userinfo)
+	       const void *targinfo)
 {
 	struct iphdr *iph;
 	const struct ipt_TTL_info *info = targinfo;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d46fd677fa11f..4c5f0a117862c 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -308,7 +308,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb,
 				    const struct net_device *out,
 				    unsigned int hooknum,
 				    const struct xt_target *target,
-				    const void *targinfo, void *userinfo)
+				    const void *targinfo)
 {
 	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
 
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 7f417484bfbf3..e2e7dd8d7903e 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -90,7 +90,7 @@ ipt_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, &packet_filter);
 }
 
 static unsigned int
@@ -108,7 +108,7 @@ ipt_local_out_hook(unsigned int hook,
 		return NF_ACCEPT;
 	}
 
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops ipt_ops[] = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 4e7998beda635..79336cb425273 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -119,7 +119,7 @@ ipt_route_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	return ipt_do_table(pskb, hook, in, out, &packet_mangler);
 }
 
 static unsigned int
@@ -148,7 +148,7 @@ ipt_local_hook(unsigned int hook,
 	daddr = (*pskb)->nh.iph->daddr;
 	tos = (*pskb)->nh.iph->tos;
 
-	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
 	    && ((*pskb)->nh.iph->saddr != saddr
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 7912cce1e1b8d..bcbeb4aeacd91 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -95,7 +95,7 @@ ipt_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL);
+	return ipt_do_table(pskb, hook, in, out, &packet_raw);
 }
 
 /* 'raw' is the very first table. */
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c9d6b23cd3f71..38cd7ffda9a08 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -220,8 +220,7 @@ ip6t_error(struct sk_buff **pskb,
 	  const struct net_device *out,
 	  unsigned int hooknum,
 	  const struct xt_target *target,
-	  const void *targinfo,
-	  void *userinfo)
+	  const void *targinfo)
 {
 	if (net_ratelimit())
 		printk("ip6_tables: error: `%s'\n", (char *)targinfo);
@@ -258,8 +257,7 @@ ip6t_do_table(struct sk_buff **pskb,
 	      unsigned int hook,
 	      const struct net_device *in,
 	      const struct net_device *out,
-	      struct xt_table *table,
-	      void *userdata)
+	      struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	int offset = 0;
@@ -349,8 +347,7 @@ ip6t_do_table(struct sk_buff **pskb,
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
-								     t->data,
-								     userdata);
+								     t->data);
 
 #ifdef CONFIG_NETFILTER_DEBUG
 				if (((struct ip6t_entry *)table_base)->comefrom
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index b8eff8ee69b1f..c85d124f9a3d0 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -22,7 +22,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
 				   const struct net_device *out,
 				   unsigned int hooknum,
 				   const struct xt_target *target,
-				   const void *targinfo, void *userinfo)
+				   const void *targinfo)
 {
 	struct ipv6hdr *ip6h;
 	const struct ip6t_HL_info *info = targinfo;
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 73c6300109d61..acb91733e1fdc 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -427,8 +427,7 @@ ip6t_log_target(struct sk_buff **pskb,
 		const struct net_device *out,
 		unsigned int hooknum,
 		const struct xt_target *target,
-		const void *targinfo,
-		void *userinfo)
+		const void *targinfo)
 {
 	const struct ip6t_log_info *loginfo = targinfo;
 	struct nf_loginfo li;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 7929ff4021660..343acd3cbf5e1 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -180,8 +180,7 @@ static unsigned int reject6_target(struct sk_buff **pskb,
 			   const struct net_device *out,
 			   unsigned int hooknum,
 			   const struct xt_target *target,
-			   const void *targinfo,
-			   void *userinfo)
+			   const void *targinfo)
 {
 	const struct ip6t_reject_info *reject = targinfo;
 
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 60976c0c58e81..2fc07c74decf6 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -108,7 +108,7 @@ ip6t_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ip6t_do_table(pskb, hook, in, out, &packet_filter);
 }
 
 static unsigned int
@@ -128,7 +128,7 @@ ip6t_local_out_hook(unsigned int hook,
 	}
 #endif
 
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ip6t_do_table(pskb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 03a13eab1daef..32db04fd83101 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -138,7 +138,7 @@ ip6t_route_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	return ip6t_do_table(pskb, hook, in, out, &packet_mangler);
 }
 
 static unsigned int
@@ -174,7 +174,7 @@ ip6t_local_hook(unsigned int hook,
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
 
-	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
 
 	if (ret != NF_DROP && ret != NF_STOLEN 
 		&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 61a7c58e99f88..b4154da575c06 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -122,7 +122,7 @@ ip6t_hook(unsigned int hook,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL);
+	return ip6t_do_table(pskb, hook, in, out, &packet_raw);
 }
 
 static struct nf_hook_ops ip6t_ops[] = { 
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 1f92edd059336..50de965bb1043 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -29,8 +29,7 @@ target(struct sk_buff **pskb,
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct xt_classify_target_info *clinfo = targinfo;
 
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index e577356b5c711..c2125f6ee1284 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -38,8 +38,7 @@ target(struct sk_buff **pskb,
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct xt_connmark_target_info *markinfo = targinfo;
 	u_int32_t diff;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 48f7fc3c85cdd..4b9cc65bb82b2 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -66,7 +66,7 @@ static void secmark_restore(struct sk_buff *skb)
 static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
 			   const struct net_device *out, unsigned int hooknum,
 			   const struct xt_target *target,
-			   const void *targinfo, void *userinfo)
+			   const void *targinfo)
 {
 	struct sk_buff *skb = *pskb;
 	const struct xt_connsecmark_target_info *info = targinfo;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index a1cd9723644fa..9d23c9580d802 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -32,8 +32,7 @@ static unsigned int target(struct sk_buff **pskb,
 			   const struct net_device *out,
 			   unsigned int hooknum,
 			   const struct xt_target *target,
-			   const void *targinfo,
-			   void *userinfo)
+			   const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
 	u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT;
@@ -54,8 +53,7 @@ static unsigned int target6(struct sk_buff **pskb,
 			    const struct net_device *out,
 			    unsigned int hooknum,
 			    const struct xt_target *target,
-			    const void *targinfo,
-			    void *userinfo)
+			    const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
 	u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT;
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 0a61272194677..95a171c879941 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -27,8 +27,7 @@ target_v0(struct sk_buff **pskb,
 	  const struct net_device *out,
 	  unsigned int hooknum,
 	  const struct xt_target *target,
-	  const void *targinfo,
-	  void *userinfo)
+	  const void *targinfo)
 {
 	const struct xt_mark_target_info *markinfo = targinfo;
 
@@ -44,8 +43,7 @@ target_v1(struct sk_buff **pskb,
 	  const struct net_device *out,
 	  unsigned int hooknum,
 	  const struct xt_target *target,
-	  const void *targinfo,
-	  void *userinfo)
+	  const void *targinfo)
 {
 	const struct xt_mark_target_info_v1 *markinfo = targinfo;
 	int mark = 0;
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 7b982283abdbe..db9b896e57c85 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -29,8 +29,7 @@ target(struct sk_buff **pskb,
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct xt_NFQ_info *tinfo = targinfo;
 
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index cab881d4424ca..6d00dcaed2385 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -16,8 +16,7 @@ target(struct sk_buff **pskb,
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	/* Previously seen (loopback)? Ignore. */
 	if ((*pskb)->nfct != NULL)
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 4300988786c9c..8a04dcf2611e0 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -31,7 +31,7 @@ static u8 mode;
 static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
 			   const struct net_device *out, unsigned int hooknum,
 			   const struct xt_target *target,
-			   const void *targinfo, void *userinfo)
+			   const void *targinfo)
 {
 	u32 secmark = 0;
 	const struct xt_secmark_target_info *info = targinfo;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 2d49948d3c387..d725e8b845031 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -143,7 +143,7 @@ static int check(const char *tablename,
 	return 1;
 }
 
-static struct xt_match xt_connbytes_match = {
+static struct xt_match xt_connbytes_match[] = {
 	{
 		.name		= "connbytes",
 		.family		= AF_INET,
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 224c078a398e7..45a3143b86299 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -222,7 +222,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 	ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL,
 						   ipt->tcfi_hook,
 						   ipt->tcfi_t->u.kernel.target,
-						   ipt->tcfi_t->data, NULL);
+						   ipt->tcfi_t->data);
 	switch (ret) {
 	case NF_ACCEPT:
 		result = TC_ACT_OK;
-- 
GitLab


From efa741656e9ebf5fd6e0432b0d1b3c7f156392d3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Aug 2006 00:36:37 -0700
Subject: [PATCH 0516/1063] [NETFILTER]: x_tables: remove unused size argument
 to check/destroy functions

The size is verified by x_tables and isn't needed by the modules anymore.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h   |  8 ++------
 net/ipv4/netfilter/arp_tables.c      |  5 +----
 net/ipv4/netfilter/arpt_mangle.c     |  2 +-
 net/ipv4/netfilter/ip_nat_rule.c     |  2 --
 net/ipv4/netfilter/ip_tables.c       | 14 +++-----------
 net/ipv4/netfilter/ipt_CLUSTERIP.c   |  4 +---
 net/ipv4/netfilter/ipt_ECN.c         |  1 -
 net/ipv4/netfilter/ipt_LOG.c         |  1 -
 net/ipv4/netfilter/ipt_MASQUERADE.c  |  1 -
 net/ipv4/netfilter/ipt_NETMAP.c      |  1 -
 net/ipv4/netfilter/ipt_REDIRECT.c    |  1 -
 net/ipv4/netfilter/ipt_REJECT.c      |  1 -
 net/ipv4/netfilter/ipt_SAME.c        |  4 +---
 net/ipv4/netfilter/ipt_TCPMSS.c      |  1 -
 net/ipv4/netfilter/ipt_TOS.c         |  1 -
 net/ipv4/netfilter/ipt_TTL.c         |  1 -
 net/ipv4/netfilter/ipt_ULOG.c        |  1 -
 net/ipv4/netfilter/ipt_ah.c          |  1 -
 net/ipv4/netfilter/ipt_ecn.c         |  3 +--
 net/ipv4/netfilter/ipt_hashlimit.c   |  4 +---
 net/ipv4/netfilter/ipt_owner.c       |  1 -
 net/ipv4/netfilter/ipt_recent.c      |  5 ++---
 net/ipv6/netfilter/ip6_tables.c      | 10 ++--------
 net/ipv6/netfilter/ip6t_HL.c         |  1 -
 net/ipv6/netfilter/ip6t_LOG.c        |  1 -
 net/ipv6/netfilter/ip6t_REJECT.c     |  1 -
 net/ipv6/netfilter/ip6t_ah.c         |  1 -
 net/ipv6/netfilter/ip6t_dst.c        |  1 -
 net/ipv6/netfilter/ip6t_frag.c       |  1 -
 net/ipv6/netfilter/ip6t_hbh.c        |  1 -
 net/ipv6/netfilter/ip6t_ipv6header.c |  1 -
 net/ipv6/netfilter/ip6t_owner.c      |  1 -
 net/ipv6/netfilter/ip6t_rt.c         |  1 -
 net/netfilter/xt_CONNMARK.c          |  1 -
 net/netfilter/xt_CONNSECMARK.c       |  2 +-
 net/netfilter/xt_DSCP.c              |  1 -
 net/netfilter/xt_MARK.c              |  2 --
 net/netfilter/xt_SECMARK.c           |  2 +-
 net/netfilter/xt_connbytes.c         |  1 -
 net/netfilter/xt_connmark.c          |  3 +--
 net/netfilter/xt_conntrack.c         |  3 +--
 net/netfilter/xt_dccp.c              |  1 -
 net/netfilter/xt_dscp.c              |  1 -
 net/netfilter/xt_esp.c               |  1 -
 net/netfilter/xt_helper.c            |  3 +--
 net/netfilter/xt_limit.c             |  1 -
 net/netfilter/xt_mark.c              |  1 -
 net/netfilter/xt_multiport.c         |  4 ----
 net/netfilter/xt_physdev.c           |  1 -
 net/netfilter/xt_policy.c            |  3 +--
 net/netfilter/xt_quota.c             |  2 +-
 net/netfilter/xt_sctp.c              |  1 -
 net/netfilter/xt_state.c             |  3 +--
 net/netfilter/xt_statistic.c         |  2 +-
 net/netfilter/xt_string.c            |  4 +---
 net/netfilter/xt_tcpudp.c            |  2 --
 net/sched/act_ipt.c                  |  4 +---
 57 files changed, 26 insertions(+), 106 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 9cef0e91542b8..9d97102a93477 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -174,12 +174,10 @@ struct xt_match
 			  const void *ip,
 			  const struct xt_match *match,
 			  void *matchinfo,
-			  unsigned int matchinfosize,
 			  unsigned int hook_mask);
 
 	/* Called when entry of this type deleted. */
-	void (*destroy)(const struct xt_match *match, void *matchinfo,
-			unsigned int matchinfosize);
+	void (*destroy)(const struct xt_match *match, void *matchinfo);
 
 	/* Called when userspace align differs from kernel space one */
 	int (*compat)(void *match, void **dstptr, int *size, int convert);
@@ -221,12 +219,10 @@ struct xt_target
 			  const void *entry,
 			  const struct xt_target *target,
 			  void *targinfo,
-			  unsigned int targinfosize,
 			  unsigned int hook_mask);
 
 	/* Called when entry of this type deleted. */
-	void (*destroy)(const struct xt_target *target, void *targinfo,
-			unsigned int targinfosize);
+	void (*destroy)(const struct xt_target *target, void *targinfo);
 
 	/* Called when userspace align differs from kernel space one */
 	int (*compat)(void *target, void **dstptr, int *size, int convert);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index c6bd270bf46a2..4f10b06413a1c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -491,8 +491,6 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
 		}
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
-						      t->u.target_size
-						      - sizeof(*t),
 						      e->comefrom)) {
 		duprintf("arp_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
@@ -559,8 +557,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
 
 	t = arpt_get_target(e);
 	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data,
-					    t->u.target_size - sizeof(*t));
+		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
 	module_put(t->u.kernel.target->me);
 	return 0;
 }
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 05fb2421bb26a..d12b1df252a1d 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -67,7 +67,7 @@ target(struct sk_buff **pskb,
 
 static int
 checkentry(const char *tablename, const void *e, const struct xt_target *target,
-           void *targinfo, unsigned int targinfosize, unsigned int hook_mask)
+           void *targinfo, unsigned int hook_mask)
 {
 	const struct arpt_mangle *mangle = targinfo;
 
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 1aa0e4f462a51..e59f5a8ecb6bd 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -172,7 +172,6 @@ static int ipt_snat_checkentry(const char *tablename,
 			       const void *entry,
 			       const struct ipt_target *target,
 			       void *targinfo,
-			       unsigned int targinfosize,
 			       unsigned int hook_mask)
 {
 	struct ip_nat_multi_range_compat *mr = targinfo;
@@ -189,7 +188,6 @@ static int ipt_dnat_checkentry(const char *tablename,
 			       const void *entry,
 			       const struct ipt_target *target,
 			       void *targinfo,
-			       unsigned int targinfosize,
 			       unsigned int hook_mask)
 {
 	struct ip_nat_multi_range_compat *mr = targinfo;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 8ce5b6f76447f..a0f36806998c5 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -464,8 +464,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
 		return 1;
 
 	if (m->u.kernel.match->destroy)
-		m->u.kernel.match->destroy(m->u.kernel.match, m->data,
-					   m->u.match_size - sizeof(*m));
+		m->u.kernel.match->destroy(m->u.kernel.match, m->data);
 	module_put(m->u.kernel.match->me);
 	return 0;
 }
@@ -518,7 +517,6 @@ check_match(struct ipt_entry_match *m,
 
 	if (m->u.kernel.match->checkentry
 	    && !m->u.kernel.match->checkentry(name, ip, match, m->data,
-					      m->u.match_size - sizeof(*m),
 					      hookmask)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 m->u.kernel.match->name);
@@ -579,8 +577,6 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 		}
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
-						      t->u.target_size
-						      - sizeof(*t),
 						      e->comefrom)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
@@ -652,8 +648,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
 	IPT_MATCH_ITERATE(e, cleanup_match, NULL);
 	t = ipt_get_target(e);
 	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data,
-					    t->u.target_size - sizeof(*t));
+		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
 	module_put(t->u.kernel.target->me);
 	return 0;
 }
@@ -1599,7 +1594,6 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
 
 	if (m->u.kernel.match->checkentry
 	    && !m->u.kernel.match->checkentry(name, ip, match, dm->data,
-					      dm->u.match_size - sizeof(*dm),
 					      hookmask)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 m->u.kernel.match->name);
@@ -1658,8 +1652,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 			goto out;
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, de, target,
-				t->data, t->u.target_size - sizeof(*t),
-				de->comefrom)) {
+						      t->data, de->comefrom)) {
 		duprintf("ip_tables: compat: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
 		goto out;
@@ -2182,7 +2175,6 @@ icmp_checkentry(const char *tablename,
 	   const void *info,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct ipt_icmp *icmpinfo = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a08383cf9e7a3..41589665fc5dd 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -372,7 +372,6 @@ checkentry(const char *tablename,
 	   const void *e_void,
 	   const struct xt_target *target,
            void *targinfo,
-           unsigned int targinfosize,
            unsigned int hook_mask)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = targinfo;
@@ -449,8 +448,7 @@ checkentry(const char *tablename,
 }
 
 /* drop reference count of cluster config when rule is deleted */
-static void destroy(const struct xt_target *target, void *targinfo,
-		    unsigned int targinfosize)
+static void destroy(const struct xt_target *target, void *targinfo)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 1c3da4a48e5fa..23f9c7ebe7ebb 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -106,7 +106,6 @@ checkentry(const char *tablename,
 	   const void *e_void,
 	   const struct xt_target *target,
            void *targinfo,
-           unsigned int targinfosize,
            unsigned int hook_mask)
 {
 	const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index a8d356c6191ff..7dc820df8bc5a 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -439,7 +439,6 @@ static int ipt_log_checkentry(const char *tablename,
 			      const void *e,
 			      const struct xt_target *target,
 			      void *targinfo,
-			      unsigned int targinfosize,
 			      unsigned int hook_mask)
 {
 	const struct ipt_log_info *loginfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 9659793c66c0a..bc65168a3437d 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -42,7 +42,6 @@ masquerade_check(const char *tablename,
 		 const void *e,
 		 const struct xt_target *target,
 		 void *targinfo,
-		 unsigned int targinfosize,
 		 unsigned int hook_mask)
 {
 	const struct ip_nat_multi_range_compat *mr = targinfo;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index fd5e74a19fb55..beb2914225ff6 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -33,7 +33,6 @@ check(const char *tablename,
       const void *e,
       const struct xt_target *target,
       void *targinfo,
-      unsigned int targinfosize,
       unsigned int hook_mask)
 {
 	const struct ip_nat_multi_range_compat *mr = targinfo;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 839fe99f71d42..f03d43671c6d1 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -36,7 +36,6 @@ redirect_check(const char *tablename,
 	       const void *e,
 	       const struct xt_target *target,
 	       void *targinfo,
-	       unsigned int targinfosize,
 	       unsigned int hook_mask)
 {
 	const struct ip_nat_multi_range_compat *mr = targinfo;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 1dfd8e56be8b7..b81821edd8933 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -276,7 +276,6 @@ static int check(const char *tablename,
 		 const void *e_void,
 		 const struct xt_target *target,
 		 void *targinfo,
-		 unsigned int targinfosize,
 		 unsigned int hook_mask)
 {
  	const struct ipt_reject_info *rejinfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index cf801749490f7..efbcb11988324 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -52,7 +52,6 @@ same_check(const char *tablename,
 	      const void *e,
 	      const struct xt_target *target,
 	      void *targinfo,
-	      unsigned int targinfosize,
 	      unsigned int hook_mask)
 {
 	unsigned int count, countess, rangeip, index = 0;
@@ -116,8 +115,7 @@ same_check(const char *tablename,
 }
 
 static void 
-same_destroy(const struct xt_target *target, void *targinfo,
-		unsigned int targinfosize)
+same_destroy(const struct xt_target *target, void *targinfo)
 {
 	struct ipt_same_info *mr = targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 6d668dcfc22a1..ac8a35eeea3f7 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -207,7 +207,6 @@ ipt_tcpmss_checkentry(const char *tablename,
 		      const void *e_void,
 		      const struct xt_target *target,
 		      void *targinfo,
-		      unsigned int targinfosize,
 		      unsigned int hook_mask)
 {
 	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 043df0137084f..471a4c438b0af 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -49,7 +49,6 @@ checkentry(const char *tablename,
 	   const void *e_void,
 	   const struct xt_target *target,
            void *targinfo,
-           unsigned int targinfosize,
            unsigned int hook_mask)
 {
 	const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 164007107b5e0..214d9d9c428f9 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -67,7 +67,6 @@ static int ipt_ttl_checkentry(const char *tablename,
 		const void *e,
 		const struct xt_target *target,
 		void *targinfo,
-		unsigned int targinfosize,
 		unsigned int hook_mask)
 {
 	struct ipt_TTL_info *info = targinfo;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 4c5f0a117862c..2b104ea54f480 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -346,7 +346,6 @@ static int ipt_ulog_checkentry(const char *tablename,
 			       const void *e,
 			       const struct xt_target *target,
 			       void *targinfo,
-			       unsigned int targinfosize,
 			       unsigned int hookmask)
 {
 	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 2927135873d7a..1798f86bc534f 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -74,7 +74,6 @@ checkentry(const char *tablename,
 	   const void *ip_void,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ipt_ah *ahinfo = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b282504149332..dafbdec0efc07 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -88,8 +88,7 @@ static int match(const struct sk_buff *skb,
 
 static int checkentry(const char *tablename, const void *ip_void,
 		      const struct xt_match *match,
-		      void *matchinfo, unsigned int matchsize,
-		      unsigned int hook_mask)
+		      void *matchinfo, unsigned int hook_mask)
 {
 	const struct ipt_ecn_info *info = matchinfo;
 	const struct ipt_ip *ip = ip_void;
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 3bd2368e1fc97..b5b74b07370cc 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -478,7 +478,6 @@ hashlimit_checkentry(const char *tablename,
 		     const void *inf,
 		     const struct xt_match *match,
 		     void *matchinfo,
-		     unsigned int matchsize,
 		     unsigned int hook_mask)
 {
 	struct ipt_hashlimit_info *r = matchinfo;
@@ -529,8 +528,7 @@ hashlimit_checkentry(const char *tablename,
 }
 
 static void
-hashlimit_destroy(const struct xt_match *match, void *matchinfo,
-		  unsigned int matchsize)
+hashlimit_destroy(const struct xt_match *match, void *matchinfo)
 {
 	struct ipt_hashlimit_info *r = matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 5ac6ac023b5e4..78c336f12a9ee 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -56,7 +56,6 @@ checkentry(const char *tablename,
            const void *ip,
 	   const struct xt_match *match,
            void *matchinfo,
-           unsigned int matchsize,
            unsigned int hook_mask)
 {
 	const struct ipt_owner_info *info = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 682c0946201eb..32ae8d7ac5065 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -238,7 +238,7 @@ ipt_recent_match(const struct sk_buff *skb,
 static int
 ipt_recent_checkentry(const char *tablename, const void *ip,
 		      const struct xt_match *match, void *matchinfo,
-		      unsigned int matchsize, unsigned int hook_mask)
+		      unsigned int hook_mask)
 {
 	const struct ipt_recent_info *info = matchinfo;
 	struct recent_table *t;
@@ -294,8 +294,7 @@ ipt_recent_checkentry(const char *tablename, const void *ip,
 }
 
 static void
-ipt_recent_destroy(const struct xt_match *match, void *matchinfo,
-		   unsigned int matchsize)
+ipt_recent_destroy(const struct xt_match *match, void *matchinfo)
 {
 	const struct ipt_recent_info *info = matchinfo;
 	struct recent_table *t;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 38cd7ffda9a08..d1c315364ee77 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -504,8 +504,7 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
 		return 1;
 
 	if (m->u.kernel.match->destroy)
-		m->u.kernel.match->destroy(m->u.kernel.match, m->data,
-					   m->u.match_size - sizeof(*m));
+		m->u.kernel.match->destroy(m->u.kernel.match, m->data);
 	module_put(m->u.kernel.match->me);
 	return 0;
 }
@@ -558,7 +557,6 @@ check_match(struct ip6t_entry_match *m,
 
 	if (m->u.kernel.match->checkentry
 	    && !m->u.kernel.match->checkentry(name, ipv6, match,  m->data,
-					      m->u.match_size - sizeof(*m),
 					      hookmask)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 m->u.kernel.match->name);
@@ -619,8 +617,6 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 		}
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
-						      t->u.target_size
-						      - sizeof(*t),
 						      e->comefrom)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
@@ -692,8 +688,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i)
 	IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
 	t = ip6t_get_target(e);
 	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data,
-					    t->u.target_size - sizeof(*t));
+		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
 	module_put(t->u.kernel.target->me);
 	return 0;
 }
@@ -1349,7 +1344,6 @@ icmp6_checkentry(const char *tablename,
 	   const void *entry,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct ip6t_icmp *icmpinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index c85d124f9a3d0..e54ea92d107b7 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -66,7 +66,6 @@ static int ip6t_hl_checkentry(const char *tablename,
 		const void *entry,
 		const struct xt_target *target,
 		void *targinfo,
-		unsigned int targinfosize,
 		unsigned int hook_mask)
 {
 	struct ip6t_HL_info *info = targinfo;
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index acb91733e1fdc..0cf537d301854 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -451,7 +451,6 @@ static int ip6t_log_checkentry(const char *tablename,
 			       const void *entry,
 			       const struct xt_target *target,
 			       void *targinfo,
-			       unsigned int targinfosize,
 			       unsigned int hook_mask)
 {
 	const struct ip6t_log_info *loginfo = targinfo;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 343acd3cbf5e1..311eae82feb3a 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -223,7 +223,6 @@ static int check(const char *tablename,
 		 const void *entry,
 		 const struct xt_target *target,
 		 void *targinfo,
-		 unsigned int targinfosize,
 		 unsigned int hook_mask)
 {
  	const struct ip6t_reject_info *rejinfo = targinfo;
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 2f7bb20c758b6..ec1b1608156c5 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -102,7 +102,6 @@ checkentry(const char *tablename,
           const void *entry,
 	  const struct xt_match *match,
           void *matchinfo,
-          unsigned int matchinfosize,
           unsigned int hook_mask)
 {
 	const struct ip6t_ah *ahinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
index 9422413d0571c..223c335467ccb 100644
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -182,7 +182,6 @@ checkentry(const char *tablename,
 	   const void *info,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ip6t_opts *optsinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 06768c84bd311..78d9c8b9e28a5 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -119,7 +119,6 @@ checkentry(const char *tablename,
 	   const void *ip,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ip6t_frag *fraginfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 374f1be85c0d4..72defc8165635 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -182,7 +182,6 @@ checkentry(const char *tablename,
 	   const void *entry,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ip6t_opts *optsinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 9375eeb1369f5..3093c398002f0 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -128,7 +128,6 @@ ipv6header_checkentry(const char *tablename,
 		      const void *ip,
 		      const struct xt_match *match,
 		      void *matchinfo,
-		      unsigned int matchsize,
 		      unsigned int hook_mask)
 {
 	const struct ip6t_ipv6header_info *info = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index 5d047990cd446..4eb9bbc4ebc3c 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -57,7 +57,6 @@ checkentry(const char *tablename,
 	   const void *ip,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct ip6t_owner_info *info = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index fbb0184a41d8e..bcb2e168a5bc9 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -197,7 +197,6 @@ checkentry(const char *tablename,
 	   const void *entry,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ip6t_rt *rtinfo = matchinfo;
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index c2125f6ee1284..0e4249ddc17bb 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -89,7 +89,6 @@ checkentry(const char *tablename,
 	   const void *entry,
 	   const struct xt_target *target,
 	   void *targinfo,
-	   unsigned int targinfosize,
 	   unsigned int hook_mask)
 {
 	struct xt_connmark_target_info *matchinfo = targinfo;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 4b9cc65bb82b2..4b0e14bb17263 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -89,7 +89,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
 
 static int checkentry(const char *tablename, const void *entry,
 		      const struct xt_target *target, void *targinfo,
-		      unsigned int targinfosize, unsigned int hook_mask)
+		      unsigned int hook_mask)
 {
 	struct xt_connsecmark_target_info *info = targinfo;
 
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 9d23c9580d802..a7cc75aeb38dc 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -72,7 +72,6 @@ static int checkentry(const char *tablename,
 		      const void *e_void,
 		      const struct xt_target *target,
 		      void *targinfo,
-		      unsigned int targinfosize,
 		      unsigned int hook_mask)
 {
 	const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 95a171c879941..782f8d8c3edf3 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -74,7 +74,6 @@ checkentry_v0(const char *tablename,
 	      const void *entry,
 	      const struct xt_target *target,
 	      void *targinfo,
-	      unsigned int targinfosize,
 	      unsigned int hook_mask)
 {
 	struct xt_mark_target_info *markinfo = targinfo;
@@ -91,7 +90,6 @@ checkentry_v1(const char *tablename,
 	      const void *entry,
 	      const struct xt_target *target,
 	      void *targinfo,
-	      unsigned int targinfosize,
 	      unsigned int hook_mask)
 {
 	struct xt_mark_target_info_v1 *markinfo = targinfo;
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 8a04dcf2611e0..451b67c4bb531 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -85,7 +85,7 @@ static int checkentry_selinux(struct xt_secmark_target_info *info)
 
 static int checkentry(const char *tablename, const void *entry,
 		      const struct xt_target *target, void *targinfo,
-		      unsigned int targinfosize, unsigned int hook_mask)
+		      unsigned int hook_mask)
 {
 	struct xt_secmark_target_info *info = targinfo;
 
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index d725e8b845031..dcc497ea8183c 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -125,7 +125,6 @@ static int check(const char *tablename,
 		 const void *ip,
 		 const struct xt_match *match,
 		 void *matchinfo,
-		 unsigned int matchsize,
 		 unsigned int hook_mask)
 {
 	const struct xt_connbytes_info *sinfo = matchinfo;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index a97b2d455b797..c9104d05a19cf 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -55,7 +55,6 @@ checkentry(const char *tablename,
 	   const void *ip,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	struct xt_connmark_info *cm = matchinfo;
@@ -75,7 +74,7 @@ checkentry(const char *tablename,
 }
 
 static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+destroy(const struct xt_match *match, void *matchinfo)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_ct_l3proto_module_put(match->family);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 1540885174ee4..39c57e9f75635 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -208,7 +208,6 @@ checkentry(const char *tablename,
 	   const void *ip,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -222,7 +221,7 @@ checkentry(const char *tablename,
 }
 
 static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+destroy(const struct xt_match *match, void *matchinfo)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_ct_l3proto_module_put(match->family);
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 5ca6f5288f469..3e6cf430e518e 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -131,7 +131,6 @@ checkentry(const char *tablename,
 	   const void *inf,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct xt_dccp_info *info = matchinfo;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index d84075c30159b..26c7f4ad102a0 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -58,7 +58,6 @@ static int checkentry(const char *tablename,
 		      const void *info,
 		      const struct xt_match *match,
 		      void *matchinfo,
-		      unsigned int matchsize,
 		      unsigned int hook_mask)
 {
 	const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp;
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 7b19bc9ea205c..7c95f149d9426 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -79,7 +79,6 @@ checkentry(const char *tablename,
 	   const void *ip_void,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct xt_esp *espinfo = matchinfo;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index db453a7a154eb..5d7818b73e3ac 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -139,7 +139,6 @@ static int check(const char *tablename,
 		 const void *inf,
 		 const struct xt_match *match,
 		 void *matchinfo,
-		 unsigned int matchsize,
 		 unsigned int hook_mask)
 {
 	struct xt_helper_info *info = matchinfo;
@@ -156,7 +155,7 @@ static int check(const char *tablename,
 }
 
 static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+destroy(const struct xt_match *match, void *matchinfo)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_ct_l3proto_module_put(match->family);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index e8d5e7ac695ab..b9c9ff3a06ea1 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -110,7 +110,6 @@ ipt_limit_checkentry(const char *tablename,
 		     const void *inf,
 		     const struct xt_match *match,
 		     void *matchinfo,
-		     unsigned int matchsize,
 		     unsigned int hook_mask)
 {
 	struct xt_rateinfo *r = matchinfo;
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 39f9b079f5d4a..e8059cd172754 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -39,7 +39,6 @@ checkentry(const char *tablename,
            const void *entry,
 	   const struct xt_match *match,
            void *matchinfo,
-           unsigned int matchsize,
            unsigned int hook_mask)
 {
 	const struct xt_mark_info *minfo = matchinfo;
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index e74f9bb98b3c6..d3aefd3809304 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -176,7 +176,6 @@ checkentry(const char *tablename,
 	   const void *info,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct ipt_ip *ip = info;
@@ -191,7 +190,6 @@ checkentry_v1(const char *tablename,
 	      const void *info,
 	      const struct xt_match *match,
 	      void *matchinfo,
-	      unsigned int matchsize,
 	      unsigned int hook_mask)
 {
 	const struct ipt_ip *ip = info;
@@ -206,7 +204,6 @@ checkentry6(const char *tablename,
 	    const void *info,
 	    const struct xt_match *match,
 	    void *matchinfo,
-	    unsigned int matchsize,
 	    unsigned int hook_mask)
 {
 	const struct ip6t_ip6 *ip = info;
@@ -221,7 +218,6 @@ checkentry6_v1(const char *tablename,
 	       const void *info,
 	       const struct xt_match *match,
 	       void *matchinfo,
-	       unsigned int matchsize,
 	       unsigned int hook_mask)
 {
 	const struct ip6t_ip6 *ip = info;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index af3d70f96ecdf..fd8f954cded5b 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -106,7 +106,6 @@ checkentry(const char *tablename,
 		       const void *ip,
 		       const struct xt_match *match,
 		       void *matchinfo,
-		       unsigned int matchsize,
 		       unsigned int hook_mask)
 {
 	const struct xt_physdev_info *info = matchinfo;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index f5639c451112c..e9d81378d6532 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -135,8 +135,7 @@ static int match(const struct sk_buff *skb,
 
 static int checkentry(const char *tablename, const void *ip_void,
                       const struct xt_match *match,
-                      void *matchinfo, unsigned int matchsize,
-                      unsigned int hook_mask)
+                      void *matchinfo, unsigned int hook_mask)
 {
 	struct xt_policy_info *info = matchinfo;
 
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index cc44f87cb8e68..b75fa2c70e665 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -41,7 +41,7 @@ match(const struct sk_buff *skb,
 static int
 checkentry(const char *tablename, const void *entry,
 	   const struct xt_match *match, void *matchinfo,
-	   unsigned int matchsize, unsigned int hook_mask)
+	   unsigned int hook_mask)
 {
 	struct xt_quota_info *q = (struct xt_quota_info *)matchinfo;
 
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 5628621170e6b..7956acaaa24bf 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -163,7 +163,6 @@ checkentry(const char *tablename,
 	   const void *inf,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct xt_sctp_info *info = matchinfo;
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 5f9492e3b2b1c..d9010b16a1f97 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -48,7 +48,6 @@ static int check(const char *tablename,
 		 const void *inf,
 		 const struct xt_match *match,
 		 void *matchinfo,
-		 unsigned int matchsize,
 		 unsigned int hook_mask)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -62,7 +61,7 @@ static int check(const char *tablename,
 }
 
 static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+destroy(const struct xt_match *match, void *matchinfo)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_ct_l3proto_module_put(match->family);
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 5181630a87fc9..091a9f89f5d56 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -55,7 +55,7 @@ match(const struct sk_buff *skb,
 static int
 checkentry(const char *tablename, const void *entry,
 	   const struct xt_match *match, void *matchinfo,
-	   unsigned int matchsize, unsigned int hook_mask)
+	   unsigned int hook_mask)
 {
 	struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
 
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 1a1c1d17d85e0..4453252400aa2 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -46,7 +46,6 @@ static int checkentry(const char *tablename,
 		      const void *ip,
 		      const struct xt_match *match,
 		      void *matchinfo,
-		      unsigned int matchsize,
 		      unsigned int hook_mask)
 {
 	struct xt_string_info *conf = matchinfo;
@@ -69,8 +68,7 @@ static int checkentry(const char *tablename,
 	return 1;
 }
 
-static void destroy(const struct xt_match *match, void *matchinfo,
-		    unsigned int matchsize)
+static void destroy(const struct xt_match *match, void *matchinfo)
 {
 	textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
 }
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 54aab051af869..e76a68e0bc667 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -141,7 +141,6 @@ tcp_checkentry(const char *tablename,
 	       const void *info,
 	       const struct xt_match *match,
 	       void *matchinfo,
-	       unsigned int matchsize,
 	       unsigned int hook_mask)
 {
 	const struct xt_tcp *tcpinfo = matchinfo;
@@ -190,7 +189,6 @@ udp_checkentry(const char *tablename,
 	       const void *info,
 	       const struct xt_match *match,
 	       void *matchinfo,
-	       unsigned int matchsize,
 	       unsigned int hook_mask)
 {
 	const struct xt_tcp *udpinfo = matchinfo;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 45a3143b86299..d8c9310da6e56 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -69,7 +69,6 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 	if (t->u.kernel.target->checkentry
 	    && !t->u.kernel.target->checkentry(table, NULL,
 		    			       t->u.kernel.target, t->data,
-					       t->u.target_size - sizeof(*t),
 					       hook)) {
 		module_put(t->u.kernel.target->me);
 		ret = -EINVAL;
@@ -81,8 +80,7 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 static void ipt_destroy_target(struct ipt_entry_target *t)
 {
 	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data,
-		                            t->u.target_size - sizeof(*t));
+		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
         module_put(t->u.kernel.target->me);
 }
 
-- 
GitLab


From 53e26658282373b84ba85a0c9807cb762f7738a6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Aug 2006 00:43:20 -0700
Subject: [PATCH 0517/1063] [NETFILTER]: nfnetlink: remove unnecessary packed
 attributes

Remove unnecessary packed attributes in nfnetlink structures. Unfortunately
in a few cases they have to stay to avoid changing structure sizes.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink.h       | 4 ++--
 include/linux/netfilter/nfnetlink_log.h   | 6 +++---
 include/linux/netfilter/nfnetlink_queue.h | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 9f5b12cf489b9..6d8e3e5a80e90 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -43,7 +43,7 @@ struct nfattr
 	u_int16_t nfa_len;
 	u_int16_t nfa_type;	/* we use 15 bits for the type, and the highest
 				 * bit to indicate whether the payload is nested */
-} __attribute__ ((packed));
+};
 
 /* FIXME: Apart from NFNL_NFA_NESTED shamelessly copy and pasted from
  * rtnetlink.h, it's time to put this in a generic file */
@@ -79,7 +79,7 @@ struct nfgenmsg {
 	u_int8_t  nfgen_family;		/* AF_xxx */
 	u_int8_t  version;		/* nfnetlink version */
 	u_int16_t res_id;		/* resource id */
-} __attribute__ ((packed));
+};
 
 #define NFNETLINK_V0	0
 
diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h
index a7497c7436df5..87b92f8b988fb 100644
--- a/include/linux/netfilter/nfnetlink_log.h
+++ b/include/linux/netfilter/nfnetlink_log.h
@@ -19,18 +19,18 @@ struct nfulnl_msg_packet_hdr {
 	u_int16_t	hw_protocol;	/* hw protocol (network order) */
 	u_int8_t	hook;		/* netfilter hook */
 	u_int8_t	_pad;
-} __attribute__ ((packed));
+};
 
 struct nfulnl_msg_packet_hw {
 	u_int16_t	hw_addrlen;
 	u_int16_t	_pad;
 	u_int8_t	hw_addr[8];
-} __attribute__ ((packed));
+};
 
 struct nfulnl_msg_packet_timestamp {
 	aligned_u64	sec;
 	aligned_u64	usec;
-} __attribute__ ((packed));
+};
 
 #define NFULNL_PREFIXLEN	30	/* just like old log target */
 
diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h
index 9e774373244c7..36af0360b56d7 100644
--- a/include/linux/netfilter/nfnetlink_queue.h
+++ b/include/linux/netfilter/nfnetlink_queue.h
@@ -22,12 +22,12 @@ struct nfqnl_msg_packet_hw {
 	u_int16_t	hw_addrlen;
 	u_int16_t	_pad;
 	u_int8_t	hw_addr[8];
-} __attribute__ ((packed));
+};
 
 struct nfqnl_msg_packet_timestamp {
 	aligned_u64	sec;
 	aligned_u64	usec;
-} __attribute__ ((packed));
+};
 
 enum nfqnl_attr_type {
 	NFQA_UNSPEC,
@@ -49,7 +49,7 @@ enum nfqnl_attr_type {
 struct nfqnl_msg_verdict_hdr {
 	u_int32_t verdict;
 	u_int32_t id;
-} __attribute__ ((packed));
+};
 
 
 enum nfqnl_msg_config_cmds {
@@ -64,7 +64,7 @@ struct nfqnl_msg_config_cmd {
 	u_int8_t	command;	/* nfqnl_msg_config_cmds */
 	u_int8_t	_pad;
 	u_int16_t	pf;		/* AF_xxx for PF_[UN]BIND */
-} __attribute__ ((packed));
+};
 
 enum nfqnl_config_mode {
 	NFQNL_COPY_NONE,
-- 
GitLab


From 91270cf81765152f6e77953440beb4d3b34a71b5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Aug 2006 00:43:38 -0700
Subject: [PATCH 0518/1063] [NETFILTER]: x_tables: add data member to struct
 xt_match

Shared match functions can use this to make runtime decisions basen on the
used match.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 9d97102a93477..03d1027fb0e88 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -185,6 +185,9 @@ struct xt_match
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 
+	/* Free to use by each match */
+	unsigned long data;
+
 	char *table;
 	unsigned int matchsize;
 	unsigned int hooks;
-- 
GitLab


From 5fa2a7601f994bdd034e871b7ea1abd6969fbb6c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Aug 2006 00:43:55 -0700
Subject: [PATCH 0519/1063] [NETFILTER]: ip6_tables: consolidate dst and hbh
 matches

The matches are identical besides one looking for NEXTHDR_HOP, the other
for NEXTHDR_DEST. Remove ip6t_dst.c and handle both in ip6t_hbh.c.

Signed-off-by: Patrick McHardy <kaber@trash,net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/Makefile   |   2 +-
 net/ipv6/netfilter/ip6t_dst.c | 219 ----------------------------------
 net/ipv6/netfilter/ip6t_hbh.c |  48 ++++----
 3 files changed, 25 insertions(+), 244 deletions(-)
 delete mode 100644 net/ipv6/netfilter/ip6t_dst.c

diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index eeeb57d4c9c55..ac1dfebde1755 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -5,7 +5,7 @@
 # Link order matters here.
 obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
 obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
-obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o
+obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
 obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
 obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
deleted file mode 100644
index 223c335467ccb..0000000000000
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/* Kernel module to match Hop-by-Hop and Destination parameters. */
-
-/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <net/checksum.h>
-#include <net/ipv6.h>
-
-#include <asm/byteorder.h>
-
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv6/ip6t_opts.h>
-
-#define HOPBYHOP	0
-
-MODULE_LICENSE("GPL");
-#if HOPBYHOP
-MODULE_DESCRIPTION("IPv6 HbH match");
-#else
-MODULE_DESCRIPTION("IPv6 DST match");
-#endif
-MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/*
- *  (Type & 0xC0) >> 6
- *	0	-> ignorable
- *	1	-> must drop the packet
- *	2	-> send ICMP PARM PROB regardless and drop packet
- *	3	-> Send ICMP if not a multicast address and drop packet
- *  (Type & 0x20) >> 5
- *	0	-> invariant
- *	1	-> can change the routing
- *  (Type & 0x1F) Type
- *	0	-> Pad1 (only 1 byte!)
- *	1	-> PadN LENGTH info (total length = length + 2)
- *	C0 | 2	-> JUMBO 4 x x x x ( xxxx > 64k )
- *	5	-> RTALERT 2 x x
- */
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	struct ipv6_opt_hdr _optsh, *oh;
-	const struct ip6t_opts *optinfo = matchinfo;
-	unsigned int temp;
-	unsigned int ptr;
-	unsigned int hdrlen = 0;
-	unsigned int ret = 0;
-	u8 _opttype, *tp = NULL;
-	u8 _optlen, *lp = NULL;
-	unsigned int optlen;
-
-#if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
-#else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
-#endif
-		return 0;
-
-	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-	if (oh == NULL) {
-		*hotdrop = 1;
-		return 0;
-	}
-
-	hdrlen = ipv6_optlen(oh);
-	if (skb->len - ptr < hdrlen) {
-		/* Packet smaller than it's length field */
-		return 0;
-	}
-
-	DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
-
-	DEBUGP("len %02X %04X %02X ",
-	       optinfo->hdrlen, hdrlen,
-	       (!(optinfo->flags & IP6T_OPTS_LEN) ||
-		((optinfo->hdrlen == hdrlen) ^
-		 !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
-
-	ret = (oh != NULL) &&
-	      (!(optinfo->flags & IP6T_OPTS_LEN) ||
-	       ((optinfo->hdrlen == hdrlen) ^
-		!!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
-
-	ptr += 2;
-	hdrlen -= 2;
-	if (!(optinfo->flags & IP6T_OPTS_OPTS)) {
-		return ret;
-	} else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
-		DEBUGP("Not strict - not implemented");
-	} else {
-		DEBUGP("Strict ");
-		DEBUGP("#%d ", optinfo->optsnr);
-		for (temp = 0; temp < optinfo->optsnr; temp++) {
-			/* type field exists ? */
-			if (hdrlen < 1)
-				break;
-			tp = skb_header_pointer(skb, ptr, sizeof(_opttype),
-						&_opttype);
-			if (tp == NULL)
-				break;
-
-			/* Type check */
-			if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) {
-				DEBUGP("Tbad %02X %02X\n",
-				       *tp,
-				       (optinfo->opts[temp] & 0xFF00) >> 8);
-				return 0;
-			} else {
-				DEBUGP("Tok ");
-			}
-			/* Length check */
-			if (*tp) {
-				u16 spec_len;
-
-				/* length field exists ? */
-				if (hdrlen < 2)
-					break;
-				lp = skb_header_pointer(skb, ptr + 1,
-							sizeof(_optlen),
-							&_optlen);
-				if (lp == NULL)
-					break;
-				spec_len = optinfo->opts[temp] & 0x00FF;
-
-				if (spec_len != 0x00FF && spec_len != *lp) {
-					DEBUGP("Lbad %02X %04X\n", *lp,
-					       spec_len);
-					return 0;
-				}
-				DEBUGP("Lok ");
-				optlen = *lp + 2;
-			} else {
-				DEBUGP("Pad1\n");
-				optlen = 1;
-			}
-
-			/* Step to the next */
-			DEBUGP("len%04X \n", optlen);
-
-			if ((ptr > skb->len - optlen || hdrlen < optlen) &&
-			    (temp < optinfo->optsnr - 1)) {
-				DEBUGP("new pointer is too large! \n");
-				break;
-			}
-			ptr += optlen;
-			hdrlen -= optlen;
-		}
-		if (temp == optinfo->optsnr)
-			return ret;
-		else
-			return 0;
-	}
-
-	return 0;
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
-	   const void *info,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int hook_mask)
-{
-	const struct ip6t_opts *optsinfo = matchinfo;
-
-	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
-		DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
-		return 0;
-	}
-	return 1;
-}
-
-static struct ip6t_match opts_match = {
-#if HOPBYHOP
-	.name		= "hbh",
-#else
-	.name		= "dst",
-#endif
-	.match		= match,
-	.matchsize	= sizeof(struct ip6t_opts),
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init ip6t_dst_init(void)
-{
-	return ip6t_register_match(&opts_match);
-}
-
-static void __exit ip6t_dst_fini(void)
-{
-	ip6t_unregister_match(&opts_match);
-}
-
-module_init(ip6t_dst_init);
-module_exit(ip6t_dst_fini);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 72defc8165635..d32a205e3af29 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -19,15 +19,10 @@
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_opts.h>
 
-#define HOPBYHOP	1
-
 MODULE_LICENSE("GPL");
-#if HOPBYHOP
-MODULE_DESCRIPTION("IPv6 HbH match");
-#else
-MODULE_DESCRIPTION("IPv6 DST match");
-#endif
+MODULE_DESCRIPTION("IPv6 opts match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+MODULE_ALIAS("ip6t_dst");
 
 #if 0
 #define DEBUGP printk
@@ -71,11 +66,7 @@ match(const struct sk_buff *skb,
 	u8 _optlen, *lp = NULL;
 	unsigned int optlen;
 
-#if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
-#else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
-#endif
+	if (ipv6_find_hdr(skb, &ptr, match->data, NULL) < 0)
 		return 0;
 
 	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
@@ -193,26 +184,35 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ip6t_match opts_match = {
-#if HOPBYHOP
-	.name		= "hbh",
-#else
-	.name		= "dst",
-#endif
-	.match		= match,
-	.matchsize	= sizeof(struct ip6t_opts),
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
+static struct xt_match opts_match[] = {
+	{
+		.name		= "hbh",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct ip6t_opts),
+		.checkentry	= checkentry,
+		.me		= THIS_MODULE,
+		.data		= NEXTHDR_HOP,
+	},
+	{
+		.name		= "dst",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct ip6t_opts),
+		.checkentry	= checkentry,
+		.me		= THIS_MODULE,
+		.data		= NEXTHDR_DEST,
+	},
 };
 
 static int __init ip6t_hbh_init(void)
 {
-	return ip6t_register_match(&opts_match);
+	return xt_register_matches(opts_match, ARRAY_SIZE(opts_match));
 }
 
 static void __exit ip6t_hbh_fini(void)
 {
-	ip6t_unregister_match(&opts_match);
+	xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match));
 }
 
 module_init(ip6t_hbh_init);
-- 
GitLab


From ce556b3a591fff3bebf8c5590a86aa98e1b2f153 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Aug 2006 00:44:14 -0700
Subject: [PATCH 0520/1063] [NETFILTER]: xt_tcpmss: minor cleanups

- remove unused define
- remove useless wrapper function
- use new line for expression after condition

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_tcpmss.c | 48 +++++++++++++++------------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 7baa9ebc46c14..a3682fe2f1926 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -18,21 +18,22 @@
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
-#define TH_SYN 0x02
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables TCP MSS match module");
 MODULE_ALIAS("ipt_tcpmss");
 
-/* Returns 1 if the mss option is set and matched by the range, 0 otherwise */
-static inline int
-mssoption_match(u_int16_t min, u_int16_t max,
-		const struct sk_buff *skb,
-		unsigned int protoff,
-		int invert,
-		int *hotdrop)
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const struct xt_match *match,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
 {
+	const struct xt_tcpmss_match_info *info = matchinfo;
 	struct tcphdr _tcph, *th;
 	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
 	u8 _opt[15 * 4 - sizeof(_tcph)], *op;
@@ -64,35 +65,22 @@ mssoption_match(u_int16_t min, u_int16_t max,
 
 			mssval = (op[i+2] << 8) | op[i+3];
 			
-			return (mssval >= min && mssval <= max) ^ invert;
+			return (mssval >= info->mss_min &&
+			        mssval <= info->mss_max) ^ info->invert;
 		}
-		if (op[i] < 2) i++;
-		else i += op[i+1]?:1;
+		if (op[i] < 2)
+			i++;
+		else
+			i += op[i+1] ? : 1;
 	}
 out:
-	return invert;
+	return info->invert;
 
- dropit:
+dropit:
 	*hotdrop = 1;
 	return 0;
 }
 
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	const struct xt_tcpmss_match_info *info = matchinfo;
-
-	return mssoption_match(info->mss_min, info->mss_max, skb, protoff,
-			       info->invert, hotdrop);
-}
-
 static struct xt_match xt_tcpmss_match[] = {
 	{
 		.name		= "tcpmss",
-- 
GitLab


From 3fd091e73b81f131e1567c4d4a1ec042940bf2f7 Mon Sep 17 00:00:00 2001
From: Vladislav Yasevich <vladislav.yasevich@hp.com>
Date: Tue, 22 Aug 2006 13:29:17 -0700
Subject: [PATCH 0521/1063] [SCTP]: Remove multiple levels of msecs to jiffies
 conversions.

The SCTP sysctl entries are displayed in milliseconds, but stored
internally in jiffies. This results in multiple levels of msecs to
jiffies conversion and as a result produces a truncation error. This
patch makes things consistent in that we store and display defaults
in milliseconds and only convert once for use by association.
This patch also adds some sane min/max values so that we don't go off
the deep end.

Signed-off-by: Vladislav Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h |  13 ++--
 include/net/sctp/structs.h   |  12 +--
 net/sctp/protocol.c          |   2 +-
 net/sctp/socket.c            |  15 ++--
 net/sctp/sysctl.c            | 140 ++++++++++++++++-------------------
 net/sctp/transport.c         |   2 +-
 6 files changed, 84 insertions(+), 100 deletions(-)

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 57166bfdf8eb0..6c632e26f72df 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -264,10 +264,10 @@ enum { SCTP_MAX_DUP_TSNS = 16 };
 enum { SCTP_MAX_GABS = 16 };
 
 /* Heartbeat interval - 30 secs */
-#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT	(30 * HZ)
+#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT	(30*1000)
 
 /* Delayed sack timer - 200ms */
-#define SCTP_DEFAULT_TIMEOUT_SACK	((200 * HZ) / 1000)
+#define SCTP_DEFAULT_TIMEOUT_SACK	(200)
 
 /* RTO.Initial              - 3  seconds
  * RTO.Min                  - 1  second
@@ -275,9 +275,9 @@ enum { SCTP_MAX_GABS = 16 };
  * RTO.Alpha                - 1/8
  * RTO.Beta                 - 1/4
  */
-#define SCTP_RTO_INITIAL	(3 * HZ)
-#define SCTP_RTO_MIN		(1 * HZ)
-#define SCTP_RTO_MAX		(60 * HZ)
+#define SCTP_RTO_INITIAL	(3 * 1000)
+#define SCTP_RTO_MIN		(1 * 1000)
+#define SCTP_RTO_MAX		(60 * 1000)
 
 #define SCTP_RTO_ALPHA          3   /* 1/8 when converted to right shifts. */
 #define SCTP_RTO_BETA           2   /* 1/4 when converted to right shifts. */
@@ -290,8 +290,7 @@ enum { SCTP_MAX_GABS = 16 };
 #define SCTP_DEF_MAX_INIT 6
 #define SCTP_DEF_MAX_SEND 10
 
-#define SCTP_DEFAULT_COOKIE_LIFE_SEC	60 /* seconds */
-#define SCTP_DEFAULT_COOKIE_LIFE_USEC	0  /* microseconds */
+#define SCTP_DEFAULT_COOKIE_LIFE	(60 * 1000) /* 60 seconds */
 
 #define SCTP_DEFAULT_MINWINDOW	1500	/* default minimum rwnd size */
 #define SCTP_DEFAULT_MAXWINDOW	65535	/* default rwnd size */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0412e730c7653..c6d93bb0dcd2a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -128,9 +128,9 @@ extern struct sctp_globals {
 	 * RTO.Alpha		    - 1/8  (3 when converted to right shifts.)
 	 * RTO.Beta		    - 1/4  (2 when converted to right shifts.)
 	 */
-	unsigned long rto_initial;
-	unsigned long rto_min;
-	unsigned long rto_max;
+	unsigned int rto_initial;
+	unsigned int rto_min;
+	unsigned int rto_max;
 
 	/* Note: rto_alpha and rto_beta are really defined as inverse
 	 * powers of two to facilitate integer operations.
@@ -145,13 +145,13 @@ extern struct sctp_globals {
 	int cookie_preserve_enable;
 
 	/* Valid.Cookie.Life	    - 60  seconds  */
-	unsigned long valid_cookie_life;
+	unsigned int valid_cookie_life;
 
 	/* Delayed SACK timeout  200ms default*/
-	unsigned long sack_timeout;
+	unsigned int sack_timeout;
 
 	/* HB.interval		    - 30 seconds  */
-	unsigned long hb_interval;
+	unsigned int hb_interval;
 
 	/* Association.Max.Retrans  - 10 attempts
 	 * Path.Max.Retrans	    - 5	 attempts (per destination address)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1ab03a27a76e7..5692ef5485d35 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1049,7 +1049,7 @@ SCTP_STATIC __init int sctp_init(void)
 	sctp_rto_beta			= SCTP_RTO_BETA;
 
 	/* Valid.Cookie.Life        - 60  seconds */
-	sctp_valid_cookie_life		= 60 * HZ;
+	sctp_valid_cookie_life		= SCTP_DEFAULT_COOKIE_LIFE;
 
 	/* Whether Cookie Preservative is enabled(1) or not(0) */
 	sctp_cookie_preserve_enable 	= 1;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3b6e82cb372f4..7c1dbb1d10df9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3045,14 +3045,14 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->initmsg.sinit_num_ostreams   = sctp_max_outstreams;
 	sp->initmsg.sinit_max_instreams  = sctp_max_instreams;
 	sp->initmsg.sinit_max_attempts   = sctp_max_retrans_init;
-	sp->initmsg.sinit_max_init_timeo = jiffies_to_msecs(sctp_rto_max);
+	sp->initmsg.sinit_max_init_timeo = sctp_rto_max;
 
 	/* Initialize default RTO related parameters.  These parameters can
 	 * be modified for with the SCTP_RTOINFO socket option.
 	 */
-	sp->rtoinfo.srto_initial = jiffies_to_msecs(sctp_rto_initial);
-	sp->rtoinfo.srto_max     = jiffies_to_msecs(sctp_rto_max);
-	sp->rtoinfo.srto_min     = jiffies_to_msecs(sctp_rto_min);
+	sp->rtoinfo.srto_initial = sctp_rto_initial;
+	sp->rtoinfo.srto_max     = sctp_rto_max;
+	sp->rtoinfo.srto_min     = sctp_rto_min;
 
 	/* Initialize default association related parameters. These parameters
 	 * can be modified with the SCTP_ASSOCINFO socket option.
@@ -3061,8 +3061,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->assocparams.sasoc_number_peer_destinations = 0;
 	sp->assocparams.sasoc_peer_rwnd = 0;
 	sp->assocparams.sasoc_local_rwnd = 0;
-	sp->assocparams.sasoc_cookie_life = 
-		jiffies_to_msecs(sctp_valid_cookie_life);
+	sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life;
 
 	/* Initialize default event subscriptions. By default, all the
 	 * options are off. 
@@ -3072,10 +3071,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	/* Default Peer Address Parameters.  These defaults can
 	 * be modified via SCTP_PEER_ADDR_PARAMS
 	 */
-	sp->hbinterval  = jiffies_to_msecs(sctp_hb_interval);
+	sp->hbinterval  = sctp_hb_interval;
 	sp->pathmaxrxt  = sctp_max_retrans_path;
 	sp->pathmtu     = 0; // allow default discovery
-	sp->sackdelay   = jiffies_to_msecs(sctp_sack_timeout);
+	sp->sackdelay   = sctp_sack_timeout;
 	sp->param_flags = SPP_HB_ENABLE |
 	                  SPP_PMTUD_ENABLE |
 	                  SPP_SACKDELAY_ENABLE;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index dc6f3ff32358c..633cd178654b4 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -45,9 +45,10 @@
 #include <net/sctp/sctp.h>
 #include <linux/sysctl.h>
 
-static ctl_handler sctp_sysctl_jiffies_ms;
-static long rto_timer_min = 1;
-static long rto_timer_max = 86400000; /* One day */
+static int zero = 0;
+static int one = 1;
+static int timer_max = 86400000; /* ms in one day */
+static int int_max = INT_MAX;
 static long sack_timer_min = 1;
 static long sack_timer_max = 500;
 
@@ -56,45 +57,45 @@ static ctl_table sctp_table[] = {
 		.ctl_name	= NET_SCTP_RTO_INITIAL,
 		.procname	= "rto_initial",
 		.data		= &sctp_rto_initial,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
-		.extra1         = &rto_timer_min,
-		.extra2         = &rto_timer_max
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
 		.ctl_name	= NET_SCTP_RTO_MIN,
 		.procname	= "rto_min",
 		.data		= &sctp_rto_min,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
-		.extra1         = &rto_timer_min,
-		.extra2         = &rto_timer_max
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
 		.ctl_name	= NET_SCTP_RTO_MAX,
 		.procname	= "rto_max",
 		.data		= &sctp_rto_max,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
-		.extra1         = &rto_timer_min,
-		.extra2         = &rto_timer_max
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
 		.ctl_name	= NET_SCTP_VALID_COOKIE_LIFE,
 		.procname	= "valid_cookie_life",
 		.data		= &sctp_valid_cookie_life,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
-		.extra1         = &rto_timer_min,
-		.extra2         = &rto_timer_max
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
 		.ctl_name	= NET_SCTP_MAX_BURST,
@@ -102,7 +103,10 @@ static ctl_table sctp_table[] = {
 		.data		= &sctp_max_burst,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &int_max
 	},
 	{
 		.ctl_name	= NET_SCTP_ASSOCIATION_MAX_RETRANS,
@@ -110,7 +114,10 @@ static ctl_table sctp_table[] = {
 		.data		= &sctp_max_retrans_association,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+		.extra2		= &int_max
 	},
 	{
 		.ctl_name	= NET_SCTP_SNDBUF_POLICY,
@@ -118,7 +125,8 @@ static ctl_table sctp_table[] = {
 		.data		= &sctp_sndbuf_policy,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_RCVBUF_POLICY,
@@ -126,7 +134,8 @@ static ctl_table sctp_table[] = {
 		.data		= &sctp_rcvbuf_policy,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_PATH_MAX_RETRANS,
@@ -134,7 +143,10 @@ static ctl_table sctp_table[] = {
 		.data		= &sctp_max_retrans_path,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+		.extra2		= &int_max
 	},
 	{
 		.ctl_name	= NET_SCTP_MAX_INIT_RETRANSMITS,
@@ -142,18 +154,21 @@ static ctl_table sctp_table[] = {
 		.data		= &sctp_max_retrans_init,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+		.extra2		= &int_max
 	},
 	{
 		.ctl_name	= NET_SCTP_HB_INTERVAL,
 		.procname	= "hb_interval",
 		.data		= &sctp_hb_interval,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
-		.extra1         = &rto_timer_min,
-		.extra2         = &rto_timer_max
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
 		.ctl_name	= NET_SCTP_PRESERVE_ENABLE,
@@ -161,23 +176,26 @@ static ctl_table sctp_table[] = {
 		.data		= &sctp_cookie_preserve_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_RTO_ALPHA,
 		.procname	= "rto_alpha_exp_divisor",
 		.data		= &sctp_rto_alpha,
 		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.mode		= 0444,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_RTO_BETA,
 		.procname	= "rto_beta_exp_divisor",
 		.data		= &sctp_rto_beta,
 		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.mode		= 0444,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_ADDIP_ENABLE,
@@ -185,7 +203,8 @@ static ctl_table sctp_table[] = {
 		.data		= &sctp_addip_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_PRSCTP_ENABLE,
@@ -193,7 +212,8 @@ static ctl_table sctp_table[] = {
 		.data		= &sctp_prsctp_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_SACK_TIMEOUT,
@@ -201,8 +221,8 @@ static ctl_table sctp_table[] = {
 		.data		= &sctp_sack_timeout,
 		.maxlen		= sizeof(long),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
 		.extra1         = &sack_timer_min,
 		.extra2         = &sack_timer_max,
 	},
@@ -242,37 +262,3 @@ void sctp_sysctl_unregister(void)
 {
 	unregister_sysctl_table(sctp_sysctl_header);
 }
-
-/* Strategy function to convert jiffies to milliseconds.  */
-static int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen,
-		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context) {
-
-	if (oldval) {
-		size_t olen;
-
-		if (oldlenp) {
-			if (get_user(olen, oldlenp))
-				return -EFAULT;
-
-			if (olen != sizeof (int))
-				return -EINVAL;
-		}
-		if (put_user((*(int *)(table->data) * 1000) / HZ,
-			(int __user *)oldval) ||
-		    (oldlenp && put_user(sizeof (int), oldlenp)))
-			return -EFAULT;
-	}
-	if (newval && newlen) {
-		int new;
-
-		if (newlen != sizeof (int))
-			return -EINVAL;
-
-		if (get_user(new, (int __user *)newval))
-			return -EFAULT;
-
-		*(int *)(table->data) = (new * HZ) / 1000;
-	}
-	return 1;
-}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 2763aa93de1af..3e5936a5f671d 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -75,7 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	 * parameter 'RTO.Initial'.
 	 */
 	peer->rtt = 0;
-	peer->rto = sctp_rto_initial;
+	peer->rto = msecs_to_jiffies(sctp_rto_initial);
 	peer->rttvar = 0;
 	peer->srtt = 0;
 	peer->rto_pending = 0;
-- 
GitLab


From 2809486424df58043b380aeb9d7f402c031c46f6 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 22 Aug 2006 13:52:17 -0700
Subject: [PATCH 0522/1063] [NETFILTER]: x_tables: Fix typos after conversion
 to use mass registation helper

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_CONNSECMARK.c | 8 ++++----
 net/netfilter/xt_SECMARK.c     | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 4b0e14bb17263..467386266674f 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -130,14 +130,14 @@ static struct xt_target xt_connsecmark_target[] = {
 static int __init xt_connsecmark_init(void)
 {
 	need_conntrack();
-	return xt_register_targets(xt_connsecmark_targets,
-				   ARRAY_SIZE(xt_connsecmark_targets));
+	return xt_register_targets(xt_connsecmark_target,
+				   ARRAY_SIZE(xt_connsecmark_target));
 }
 
 static void __exit xt_connsecmark_fini(void)
 {
-	xt_unregister_targets(xt_connsecmark_targets,
-			      ARRAY_SIZE(xt_connsecmark_targets));
+	xt_unregister_targets(xt_connsecmark_target,
+			      ARRAY_SIZE(xt_connsecmark_target));
 }
 
 module_init(xt_connsecmark_init);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 451b67c4bb531..add752196290d 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -111,7 +111,7 @@ static int checkentry(const char *tablename, const void *entry,
 	return 1;
 }
 
-static struct xt_target xt_secmark_target = {
+static struct xt_target xt_secmark_target[] = {
 	{
 		.name		= "SECMARK",
 		.family		= AF_INET,
-- 
GitLab


From a57d27fc7107ddcc655ba2812cfebfce3163fd62 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 22 Aug 2006 22:20:14 -0700
Subject: [PATCH 0523/1063] [RTNETLINK]: Don't return error on no-metrics.

Instead just cancel the nested attribute and return 0.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index eeff0b23e944c..8f225499e32e1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -202,8 +202,10 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 		}
 	}
 
-	if (!valid)
-		goto nla_put_failure;
+	if (!valid) {
+		nla_nest_cancel(skb, mx);
+		return 0;
+	}
 
 	return nla_nest_end(skb, mx);
 
-- 
GitLab


From 5e032e32ecc2e6cb0385dc115ca9bfe5e19a9539 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:12:24 -0700
Subject: [PATCH 0524/1063] [IPV6] NDISC: Take source address into account for
 redirects.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 1 +
 net/ipv6/ndisc.c        | 3 ++-
 net/ipv6/route.c        | 5 +++--
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 3f170f667c7b8..249ce4545ef0b 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -110,6 +110,7 @@ extern int			rt6_route_rcv(struct net_device *dev,
 					      struct in6_addr *gwaddr);
 
 extern void			rt6_redirect(struct in6_addr *dest,
+					     struct in6_addr *src,
 					     struct in6_addr *saddr,
 					     struct neighbour *neigh,
 					     u8 *lladdr,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 419d65163819e..32f28dec399e1 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1346,7 +1346,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 
 	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
 	if (neigh) {
-		rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr, 
+		rt6_redirect(dest, &skb->nh.ipv6h->daddr,
+			     &skb->nh.ipv6h->saddr, neigh, lladdr,
 			     on_link);
 		neigh_release(neigh);
 	}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5d6e9083ca2cf..a9b08a2422e0f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1279,7 +1279,8 @@ static int ip6_route_del(struct fib6_config *cfg)
 /*
  *	Handle redirects
  */
-void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
+void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
+		  struct in6_addr *saddr,
 		  struct neighbour *neigh, u8 *lladdr, int on_link)
 {
 	struct rt6_info *rt, *nrt = NULL;
@@ -1304,7 +1305,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
 	 */
 
 	read_lock_bh(&table->tb6_lock);
-	fn = fib6_lookup(&table->tb6_root, dest, NULL);
+	fn = fib6_lookup(&table->tb6_root, dest, src);
 restart:
 	for (rt = fn->leaf; rt; rt = rt->u.next) {
 		/*
-- 
GitLab


From a6279458c534d01ccc39498aba61c93083ee0372 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:18:26 -0700
Subject: [PATCH 0525/1063] [IPV6] NDISC: Search over all possible rules on
 receipt of redirect.

Split up function for finding routes for redirects.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 85 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 61 insertions(+), 24 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a9b08a2422e0f..8d00a9d77f019 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1279,19 +1279,18 @@ static int ip6_route_del(struct fib6_config *cfg)
 /*
  *	Handle redirects
  */
-void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
-		  struct in6_addr *saddr,
-		  struct neighbour *neigh, u8 *lladdr, int on_link)
+struct ip6rd_flowi {
+	struct flowi fl;
+	struct in6_addr gateway;
+};
+
+static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
+					     struct flowi *fl,
+					     int flags)
 {
-	struct rt6_info *rt, *nrt = NULL;
+	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
+	struct rt6_info *rt;
 	struct fib6_node *fn;
-	struct fib6_table *table;
-	struct netevent_redirect netevent;
-
-	/* TODO: Very lazy, might need to check all tables */
-	table = fib6_get_table(RT6_TABLE_MAIN);
-	if (table == NULL)
-		return;
 
 	/*
 	 * Get the "current" route for this destination and
@@ -1305,7 +1304,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 	 */
 
 	read_lock_bh(&table->tb6_lock);
-	fn = fib6_lookup(&table->tb6_root, dest, src);
+	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 restart:
 	for (rt = fn->leaf; rt; rt = rt->u.next) {
 		/*
@@ -1320,29 +1319,67 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 			continue;
 		if (!(rt->rt6i_flags & RTF_GATEWAY))
 			continue;
-		if (neigh->dev != rt->rt6i_dev)
+		if (fl->oif != rt->rt6i_dev->ifindex)
 			continue;
-		if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
+		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
 			continue;
 		break;
 	}
-	if (rt)
-		dst_hold(&rt->u.dst);
-	else if (rt6_need_strict(dest)) {
-		while ((fn = fn->parent) != NULL) {
-			if (fn->fn_flags & RTN_ROOT)
-				break;
-			if (fn->fn_flags & RTN_RTINFO)
-				goto restart;
+
+	if (!rt) {
+		if (rt6_need_strict(&fl->fl6_dst)) {
+			while ((fn = fn->parent) != NULL) {
+				if (fn->fn_flags & RTN_ROOT)
+					break;
+				if (fn->fn_flags & RTN_RTINFO)
+					goto restart;
+			}
 		}
+		rt = &ip6_null_entry;
 	}
+	dst_hold(&rt->u.dst);
+
 	read_unlock_bh(&table->tb6_lock);
 
-	if (!rt) {
+	return rt;
+};
+
+static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
+					   struct in6_addr *src,
+					   struct in6_addr *gateway,
+					   struct net_device *dev)
+{
+	struct ip6rd_flowi rdfl = {
+		.fl = {
+			.oif = dev->ifindex,
+			.nl_u = {
+				.ip6_u = {
+					.daddr = *dest,
+					.saddr = *src,
+				},
+			},
+		},
+		.gateway = *gateway,
+	};
+	int flags = rt6_need_strict(dest) ? RT6_F_STRICT : 0;
+
+	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
+}
+
+void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
+		  struct in6_addr *saddr,
+		  struct neighbour *neigh, u8 *lladdr, int on_link)
+{
+	struct rt6_info *rt, *nrt = NULL;
+	struct netevent_redirect netevent;
+
+	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
+
+	if (rt == &ip6_null_entry) {
 		if (net_ratelimit())
 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
 			       "for redirect target\n");
-		return;
+		goto out;
 	}
 
 	/*
-- 
GitLab


From af184765848c280c7e6190f45c827c5ea3881126 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:18:57 -0700
Subject: [PATCH 0526/1063] [IPV6] NDISC: Initialize fl with outbound interface
 to lookup rules properly.

Based on MIPL2 kernel patch.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 32f28dec399e1..ed01f9a330d6d 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -412,7 +412,8 @@ static void pndisc_destructor(struct pneigh_entry *n)
  */
 
 static inline void ndisc_flow_init(struct flowi *fl, u8 type,
-			    struct in6_addr *saddr, struct in6_addr *daddr)
+			    struct in6_addr *saddr, struct in6_addr *daddr,
+			    int oif)
 {
 	memset(fl, 0, sizeof(*fl));
 	ipv6_addr_copy(&fl->fl6_src, saddr);
@@ -420,6 +421,7 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type,
 	fl->proto	 	= IPPROTO_ICMPV6;
 	fl->fl_icmp_type	= type;
 	fl->fl_icmp_code	= 0;
+	fl->oif			= oif;
 	security_sk_classify_flow(ndisc_socket->sk, fl);
 }
 
@@ -452,7 +454,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 		src_addr = &tmpaddr;
 	}
 
-	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr);
+	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr,
+			dev->ifindex);
 
 	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
 	if (!dst)
@@ -542,7 +545,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		saddr = &addr_buf;
 	}
 
-	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr);
+	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr,
+			dev->ifindex);
 
 	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
 	if (!dst)
@@ -617,7 +621,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
         int len;
 	int err;
 
-	ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr);
+	ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr,
+			dev->ifindex);
 
 	dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
 	if (!dst)
@@ -1383,7 +1388,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
  		return;
  	}
 
-	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr);
+	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr,
+			dev->ifindex);
 
 	dst = ip6_route_output(NULL, &fl);
 	if (dst == NULL)
-- 
GitLab


From cf6b1982599cbb60f410adeda659b0b29cdf7ad7 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:19:18 -0700
Subject: [PATCH 0527/1063] [IPV6] ROUTE: Introduce a helper to check route
 validity.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Acked-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 65514f21c1864..0a18cb6b1cbb9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -726,6 +726,14 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	return err;
 }
 
+static inline int ip6_rt_check(struct rt6key *rt_key,
+			       struct in6_addr *fl_addr,
+			       struct in6_addr *addr_cache)
+{
+	return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
+		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
+}
+
 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 					  struct dst_entry *dst,
 					  struct flowi *fl)
@@ -741,8 +749,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 	 * that we do not support routing by source, TOS,
 	 * and MSG_DONTROUTE 		--ANK (980726)
 	 *
-	 * 1. If route was host route, check that
-	 *    cached destination is current.
+	 * 1. ip6_rt_check(): If route was host route,
+	 *    check that cached destination is current.
 	 *    If it is network route, we still may
 	 *    check its validity using saved pointer
 	 *    to the last used address: daddr_cache.
@@ -753,11 +761,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 	 *    sockets.
 	 * 2. oif also should be the same.
 	 */
-	if (((rt->rt6i_dst.plen != 128 ||
-	      !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
-	     && (np->daddr_cache == NULL ||
-		 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
-	    || (fl->oif && fl->oif != dst->dev->ifindex)) {
+	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
+	    (fl->oif && fl->oif != dst->dev->ifindex)) {
 		dst_release(dst);
 		dst = NULL;
 	}
-- 
GitLab


From 8e1ef0a95b87e8b4292b2ba733e8cb854ea2d2fe Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 29 Aug 2006 17:15:09 -0700
Subject: [PATCH 0528/1063] [IPV6]: Cache source address as well in
 ipv6_pinfo{}.

Based on MIPL2 kernel patch.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h             | 3 +++
 include/net/ip6_route.h          | 9 ++++++---
 net/dccp/ipv6.c                  | 4 ++--
 net/ipv6/af_inet6.c              | 2 +-
 net/ipv6/datagram.c              | 7 ++++++-
 net/ipv6/inet6_connection_sock.c | 2 +-
 net/ipv6/ip6_output.c            | 3 +++
 net/ipv6/tcp_ipv6.c              | 4 ++--
 net/ipv6/udp.c                   | 7 ++++++-
 9 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 297853c841b4b..02d14a3ff2aff 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -242,6 +242,9 @@ struct ipv6_pinfo {
 	struct in6_addr 	rcv_saddr;
 	struct in6_addr		daddr;
 	struct in6_addr		*daddr_cache;
+#ifdef CONFIG_IPV6_SUBTREES
+	struct in6_addr		*saddr_cache;
+#endif
 
 	__u32			flow_label;
 	__u32			frag_size;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 249ce4545ef0b..0d40f84df21b0 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -144,21 +144,24 @@ extern rwlock_t rt6_lock;
  *	Store a destination cache entry in a socket
  */
 static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst,
-				   struct in6_addr *daddr)
+				   struct in6_addr *daddr, struct in6_addr *saddr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct rt6_info *rt = (struct rt6_info *) dst;
 
 	sk_setup_caps(sk, dst);
 	np->daddr_cache = daddr;
+#ifdef CONFIG_IPV6_SUBTREES
+	np->saddr_cache = saddr;
+#endif
 	np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
 }
 
 static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
-				 struct in6_addr *daddr)
+				 struct in6_addr *daddr, struct in6_addr *saddr)
 {
 	write_lock(&sk->sk_dst_lock);
-	__ip6_dst_store(sk, dst, daddr);
+	__ip6_dst_store(sk, dst, daddr, saddr);
 	write_unlock(&sk->sk_dst_lock);
 }
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 231bc7c7e749c..f9c5e12d70388 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -231,7 +231,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	ipv6_addr_copy(&np->saddr, saddr);
 	inet->rcv_saddr = LOOPBACK4_IPV6;
 
-	__ip6_dst_store(sk, dst, NULL);
+	__ip6_dst_store(sk, dst, NULL, NULL);
 
 	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt != NULL)
@@ -872,7 +872,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	 * comment in that function for the gory details. -acme
 	 */
 
-	__ip6_dst_store(newsk, dst, NULL);
+	__ip6_dst_store(newsk, dst, NULL, NULL);
 	newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
 						      NETIF_F_TSO);
 	newdp6 = (struct dccp6_sock *)newsk;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2ff600cfe3a47..57ee5ddea96f8 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -659,7 +659,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 			return err;
 		}
 
-		__ip6_dst_store(sk, dst, NULL);
+		__ip6_dst_store(sk, dst, NULL, NULL);
 	}
 
 	return 0;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c73508e090a64..8561b9da6db61 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -193,7 +193,12 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	ip6_dst_store(sk, dst,
 		      ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
-		      &np->daddr : NULL);
+		      &np->daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+		      ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
+		      &np->saddr :
+#endif
+		      NULL);
 
 	sk->sk_state = TCP_ESTABLISHED;
 out:
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 7a51a258615d5..827f41d1478b3 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -186,7 +186,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
 			return err;
 		}
 
-		__ip6_dst_store(sk, dst, NULL);
+		__ip6_dst_store(sk, dst, NULL, NULL);
 	}
 
 	skb->dst = dst_clone(dst);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0a18cb6b1cbb9..2a376b7d91b49 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -762,6 +762,9 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 	 * 2. oif also should be the same.
 	 */
 	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
+#ifdef CONFIG_IPV6_SUBTREES
+	    ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
+#endif
 	    (fl->oif && fl->oif != dst->dev->ifindex)) {
 		dst_release(dst);
 		dst = NULL;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7f1b660493b78..2b18918f30111 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -272,7 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	inet->rcv_saddr = LOOPBACK4_IPV6;
 
 	sk->sk_gso_type = SKB_GSO_TCPV6;
-	__ip6_dst_store(sk, dst, NULL);
+	__ip6_dst_store(sk, dst, NULL, NULL);
 
 	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt)
@@ -954,7 +954,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	 */
 
 	newsk->sk_gso_type = SKB_GSO_TCPV6;
-	__ip6_dst_store(newsk, dst, NULL);
+	__ip6_dst_store(newsk, dst, NULL, NULL);
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
 	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index eb9e1b39c8f84..b9cc55ccb0004 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -847,7 +847,12 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		if (connected) {
 			ip6_dst_store(sk, dst,
 				      ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
-				      &np->daddr : NULL);
+				      &np->daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+				      ipv6_addr_equal(&fl->fl6_src, &np->saddr) ?
+				      &np->saddr :
+#endif
+				      NULL);
 		} else {
 			dst_release(dst);
 		}
-- 
GitLab


From 66729e18df08ee20a9824148236b89f56371659e Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:20:34 -0700
Subject: [PATCH 0529/1063] [IPV6] ROUTE: Make sure we have fn->leaf when
 adding a node on subtree.

Based on MIPL2 kernel patch.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 667b1b1ea25d0..11f9660a4796d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -80,6 +80,7 @@ static DEFINE_RWLOCK(fib6_walker_lock);
 #endif
 
 static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
+static struct rt6_info * fib6_find_prefix(struct fib6_node *fn);
 static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
 static int fib6_walk(struct fib6_walker_t *w);
 static int fib6_walk_continue(struct fib6_walker_t *w);
@@ -697,7 +698,7 @@ void fib6_force_start_gc(void)
 
 int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
 {
-	struct fib6_node *fn;
+	struct fib6_node *fn, *pn = NULL;
 	int err = -ENOMEM;
 
 	fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
@@ -706,6 +707,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
 	if (fn == NULL)
 		goto out;
 
+	pn = fn;
+
 #ifdef CONFIG_IPV6_SUBTREES
 	if (rt->rt6i_src.plen) {
 		struct fib6_node *sn;
@@ -751,10 +754,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
 			/* Now link new subtree to main tree */
 			sfn->parent = fn;
 			fn->subtree = sfn;
-			if (fn->leaf == NULL) {
-				fn->leaf = rt;
-				atomic_inc(&rt->rt6i_ref);
-			}
 		} else {
 			sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
 					sizeof(struct in6_addr), rt->rt6i_src.plen,
@@ -764,6 +763,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
 				goto st_failure;
 		}
 
+		if (fn->leaf == NULL) {
+			fn->leaf = rt;
+			atomic_inc(&rt->rt6i_ref);
+		}
 		fn = sn;
 	}
 #endif
@@ -777,8 +780,25 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
 	}
 
 out:
-	if (err)
+	if (err) {
+#ifdef CONFIG_IPV6_SUBTREES
+		/*
+		 * If fib6_add_1 has cleared the old leaf pointer in the
+		 * super-tree leaf node we have to find a new one for it.
+		 */
+		if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
+			pn->leaf = fib6_find_prefix(pn);
+#if RT6_DEBUG >= 2
+			if (!pn->leaf) {
+				BUG_TRAP(pn->leaf != NULL);
+				pn->leaf = &ip6_null_entry;
+			}
+#endif
+			atomic_inc(&pn->leaf->rt6i_ref);
+		}
+#endif
 		dst_free(&rt->u.dst);
+	}
 	return err;
 
 #ifdef CONFIG_IPV6_SUBTREES
-- 
GitLab


From 2285adc1e6c9f964f9625e7edcd233fccd7a7c92 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:20:54 -0700
Subject: [PATCH 0530/1063] [IPV6] ROUTE: Prune clones from main tree as well.

Based on MIPL2 kernel patch.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 11f9660a4796d..35b91ff95db2c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -776,7 +776,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
 	if (err == 0) {
 		fib6_start_gc(rt);
 		if (!(rt->rt6i_flags&RTF_CACHE))
-			fib6_prune_clones(fn, rt);
+			fib6_prune_clones(pn, rt);
 	}
 
 out:
-- 
GitLab


From 3fc5e0440be7fab3abae4e801b0ef17e9b3b58c4 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:21:12 -0700
Subject: [PATCH 0531/1063] [IPV6] ROUTE: Fix looking up a route on subtree.

Even on RTN_ROOT node, we need to process its subtree first.
Fix NULL pointer dereference in fib6_locate().

Based on MIPL2 kernel patch.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 39 +++++++++++++++------------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 35b91ff95db2c..5408b64f3b5fa 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -850,33 +850,26 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
 		break;
 	}
 
-	while ((fn->fn_flags & RTN_ROOT) == 0) {
-#ifdef CONFIG_IPV6_SUBTREES
-		if (fn->subtree) {
-			struct fib6_node *st;
-			struct lookup_args *narg;
-
-			narg = args + 1;
-
-			if (narg->addr) {
-				st = fib6_lookup_1(fn->subtree, narg);
-
-				if (st && !(st->fn_flags & RTN_ROOT))
-					return st;
-			}
-		}
-#endif
-
-		if (fn->fn_flags & RTN_RTINFO) {
+	while(fn) {
+		if (SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
 			struct rt6key *key;
 
 			key = (struct rt6key *) ((u8 *) fn->leaf +
 						 args->offset);
 
-			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
-				return fn;
+			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
+#ifdef CONFIG_IPV6_SUBTREES
+				if (fn->subtree)
+					fn = fib6_lookup_1(fn->subtree, args + 1);
+#endif
+				if (!fn || fn->fn_flags & RTN_RTINFO)
+					return fn;
+			}
 		}
 
+		if (fn->fn_flags & RTN_ROOT)
+			break;
+
 		fn = fn->parent;
 	}
 
@@ -953,10 +946,8 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
 #ifdef CONFIG_IPV6_SUBTREES
 	if (src_len) {
 		BUG_TRAP(saddr!=NULL);
-		if (fn == NULL)
-			fn = fn->subtree;
-		if (fn)
-			fn = fib6_locate_1(fn, saddr, src_len,
+		if (fn && fn->subtree)
+			fn = fib6_locate_1(fn->subtree, saddr, src_len,
 					   offsetof(struct rt6_info, rt6i_src));
 	}
 #endif
-- 
GitLab


From 825e288ef4c55a379a97e104c825eb9b74874099 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:21:29 -0700
Subject: [PATCH 0532/1063] [IPV6] ROUTE: Make sure we do not exceed args in
 fib6_lookup_1().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Acked-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5408b64f3b5fa..19ee7375daa9d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -829,6 +829,9 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
 	struct fib6_node *fn;
 	int dir;
 
+	if (unlikely(args->offset == 0))
+		return NULL;
+
 	/*
 	 *	Descend on a tree
 	 */
@@ -879,16 +882,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
 struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
 			       struct in6_addr *saddr)
 {
-	struct lookup_args args[2];
 	struct fib6_node *fn;
-
-	args[0].offset = offsetof(struct rt6_info, rt6i_dst);
-	args[0].addr = daddr;
-
+	struct lookup_args args[] = {
+		{
+			.offset = offsetof(struct rt6_info, rt6i_dst),
+			.addr = daddr,
+		},
 #ifdef CONFIG_IPV6_SUBTREES
-	args[1].offset = offsetof(struct rt6_info, rt6i_src);
-	args[1].addr = saddr;
+		{
+			.offset = offsetof(struct rt6_info, rt6i_src),
+			.addr = saddr,
+		},
 #endif
+		{
+			.offset = 0,	/* sentinel */
+		}
+	};
 
 	fn = fib6_lookup_1(root, args);
 
-- 
GitLab


From fefc2a6c201aeafc1d0329a140de502d49f69d04 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:21:50 -0700
Subject: [PATCH 0533/1063] [IPV6] ROUTE: Allow searching subtree only.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Acked-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 19ee7375daa9d..b706424e70b87 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -899,7 +899,7 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
 		}
 	};
 
-	fn = fib6_lookup_1(root, args);
+	fn = fib6_lookup_1(root, daddr ? args : args + 1);
 
 	if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
 		fn = root;
-- 
GitLab


From 7fc33165a74301b2c5c90b2f2a1f6907cbd5c6f1 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:22:24 -0700
Subject: [PATCH 0534/1063] [IPV6] ROUTE: Put SUBTREE() as FIB6_SUBTREE() into
 ip6_fib.h for future use.

Based on MIPL2 kernel patch.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h |  5 +++++
 net/ipv6/ip6_fib.c    | 20 +++++++++-----------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 9610b887ffb55..6a3f26a04509c 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -60,6 +60,11 @@ struct fib6_node
 	__u32			fn_sernum;
 };
 
+#ifndef CONFIG_IPV6_SUBTREES
+#define FIB6_SUBTREE(fn)	NULL
+#else
+#define FIB6_SUBTREE(fn)	((fn)->subtree)
+#endif
 
 /*
  *	routing information
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b706424e70b87..6536e33d8353c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -73,10 +73,8 @@ static DEFINE_RWLOCK(fib6_walker_lock);
 
 #ifdef CONFIG_IPV6_SUBTREES
 #define FWS_INIT FWS_S
-#define SUBTREE(fn) ((fn)->subtree)
 #else
 #define FWS_INIT FWS_L
-#define SUBTREE(fn) NULL
 #endif
 
 static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
@@ -854,7 +852,7 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
 	}
 
 	while(fn) {
-		if (SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
+		if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
 			struct rt6key *key;
 
 			key = (struct rt6key *) ((u8 *) fn->leaf +
@@ -985,7 +983,7 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
 		if(fn->right)
 			return fn->right->leaf;
 
-		fn = SUBTREE(fn);
+		fn = FIB6_SUBTREE(fn);
 	}
 	return NULL;
 }
@@ -1016,7 +1014,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
 		if (fn->right) child = fn->right, children |= 1;
 		if (fn->left) child = fn->left, children |= 2;
 
-		if (children == 3 || SUBTREE(fn) 
+		if (children == 3 || FIB6_SUBTREE(fn)
 #ifdef CONFIG_IPV6_SUBTREES
 		    /* Subtree root (i.e. fn) may have one child */
 		    || (children && fn->fn_flags&RTN_ROOT)
@@ -1035,9 +1033,9 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
 
 		pn = fn->parent;
 #ifdef CONFIG_IPV6_SUBTREES
-		if (SUBTREE(pn) == fn) {
+		if (FIB6_SUBTREE(pn) == fn) {
 			BUG_TRAP(fn->fn_flags&RTN_ROOT);
-			SUBTREE(pn) = NULL;
+			FIB6_SUBTREE(pn) = NULL;
 			nstate = FWS_L;
 		} else {
 			BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
@@ -1085,7 +1083,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
 		read_unlock(&fib6_walker_lock);
 
 		node_free(fn);
-		if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
+		if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn))
 			return pn;
 
 		rt6_release(pn->leaf);
@@ -1228,8 +1226,8 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
 		switch (w->state) {
 #ifdef CONFIG_IPV6_SUBTREES
 		case FWS_S:
-			if (SUBTREE(fn)) {
-				w->node = SUBTREE(fn);
+			if (FIB6_SUBTREE(fn)) {
+				w->node = FIB6_SUBTREE(fn);
 				continue;
 			}
 			w->state = FWS_L;
@@ -1263,7 +1261,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
 			pn = fn->parent;
 			w->node = pn;
 #ifdef CONFIG_IPV6_SUBTREES
-			if (SUBTREE(pn) == fn) {
+			if (FIB6_SUBTREE(pn) == fn) {
 				BUG_TRAP(fn->fn_flags&RTN_ROOT);
 				w->state = FWS_L;
 				continue;
-- 
GitLab


From 982f56f3a9be4651520c0fdd3d80a5d02e95a178 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:22:39 -0700
Subject: [PATCH 0535/1063] [IPV6] ROUTE: Search subtree when backtracking.

Based on MIPL2 kernel patch.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8d00a9d77f019..bd4cf175ff10c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -481,17 +481,23 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 }
 #endif
 
-#define BACKTRACK() \
-if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
-	while ((fn = fn->parent) != NULL) { \
-		if (fn->fn_flags & RTN_TL_ROOT) { \
-			dst_hold(&rt->u.dst); \
-			goto out; \
+#define BACKTRACK(saddr) \
+do { \
+	if (rt == &ip6_null_entry) { \
+		struct fib6_node *pn; \
+		while (fn) { \
+			if (fn->fn_flags & RTN_TL_ROOT) \
+				goto out; \
+			pn = fn->parent; \
+			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
+				fn = fib6_lookup(pn->subtree, NULL, saddr); \
+			else \
+				fn = pn; \
+			if (fn->fn_flags & RTN_RTINFO) \
+				goto restart; \
 		} \
-		if (fn->fn_flags & RTN_RTINFO) \
-			goto restart; \
 	} \
-}
+} while(0)
 
 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
 					     struct flowi *fl, int flags)
@@ -504,7 +510,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
 restart:
 	rt = fn->leaf;
 	rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
-	BACKTRACK();
+	BACKTRACK(&fl->fl6_src);
 	dst_hold(&rt->u.dst);
 out:
 	read_unlock_bh(&table->tb6_lock);
@@ -638,7 +644,7 @@ static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
 
 restart:
 	rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
-	BACKTRACK();
+	BACKTRACK(&fl->fl6_src);
 	if (rt == &ip6_null_entry ||
 	    rt->rt6i_flags & RTF_CACHE)
 		goto out;
@@ -733,7 +739,7 @@ static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
 
 restart:
 	rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
-	BACKTRACK();
+	BACKTRACK(&fl->fl6_src);
 	if (rt == &ip6_null_entry ||
 	    rt->rt6i_flags & RTF_CACHE)
 		goto out;
-- 
GitLab


From 150730d5a53b1bbb486101b2a5fb82ff0d3f916e Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:22:55 -0700
Subject: [PATCH 0536/1063] [IPV6] ROUTE: Purge clones on other trees when
 deleting a route.

Based on MIPL2 kernel patch.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6536e33d8353c..f0fdaf182b3f3 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1169,8 +1169,18 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 
 	BUG_TRAP(fn->fn_flags&RTN_RTINFO);
 
-	if (!(rt->rt6i_flags&RTF_CACHE))
-		fib6_prune_clones(fn, rt);
+	if (!(rt->rt6i_flags&RTF_CACHE)) {
+		struct fib6_node *pn = fn;
+#ifdef CONFIG_IPV6_SUBTREES
+		/* clones of this route might be in another subtree */
+		if (rt->rt6i_src.plen) {
+			while (!(pn->fn_flags&RTN_ROOT))
+				pn = pn->parent;
+			pn = pn->parent;
+		}
+#endif
+		fib6_prune_clones(pn, rt);
+	}
 
 	/*
 	 *	Walk the leaf entries looking for ourself
-- 
GitLab


From cb15d9c224fcc03b32396c1c7416e777c2dcca34 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:23:11 -0700
Subject: [PATCH 0537/1063] [IPV6] NDISC: Search subtrees when backtracking on
 receipt of redirects.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Acked-by: Ville Nuorvala <vnuorval@tcs.hut.fi
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bd4cf175ff10c..fd626d420cd1d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1332,17 +1332,10 @@ static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
 		break;
 	}
 
-	if (!rt) {
-		if (rt6_need_strict(&fl->fl6_dst)) {
-			while ((fn = fn->parent) != NULL) {
-				if (fn->fn_flags & RTN_ROOT)
-					break;
-				if (fn->fn_flags & RTN_RTINFO)
-					goto restart;
-			}
-		}
+	if (!rt)
 		rt = &ip6_null_entry;
-	}
+	BACKTRACK(&fl->fl6_src);
+out:
 	dst_hold(&rt->u.dst);
 
 	read_unlock_bh(&table->tb6_lock);
-- 
GitLab


From c0bece9f2aec546c3750ae3972f80e024a923f34 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:23:25 -0700
Subject: [PATCH 0538/1063] [IPV6] ROUTE: Add credits about subtree fixes.

Based on MIPL2 kernel patch.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 1 +
 net/ipv6/route.c   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f0fdaf182b3f3..fbca60950b147 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -18,6 +18,7 @@
  * 	Yuji SEKIYA @USAGI:	Support default route on router node;
  * 				remove ip6_null_entry from the top of
  * 				routing table.
+ * 	Ville Nuorvala:		Fixed routing subtrees.
  */
 #include <linux/errno.h>
 #include <linux/types.h>
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fd626d420cd1d..fd6f2ec4fa099 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -22,6 +22,8 @@
  *		routers in REACHABLE, STALE, DELAY or PROBE states).
  *		- always select the same router if it is (probably)
  *		reachable.  otherwise, round-robin the list.
+ *	Ville Nuorvala
+ *		Fixed routing subtrees.
  */
 
 #include <linux/capability.h>
-- 
GitLab


From 4e96c2b4180aff4f080b77314712073c6ca430e7 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:23:39 -0700
Subject: [PATCH 0539/1063] [IPV6] KCONFIG: Add subtrees support.

This is for developers only.
Based on MIPL2 kernel patch.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/Kconfig | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 36a6c2b79889e..14f0b336519f4 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -136,6 +136,20 @@ config IPV6_TUNNEL
 
 	  If unsure, say N.
 
+config IPV6_SUBTREES
+	bool "IPv6: source address based routing"
+	depends on IPV6 && EXPERIMENTAL
+	---help---
+	  Enable routing by source address or prefix.
+
+	  The destination address is still the primary routing key, so mixing
+	  normal and source prefix specific routes in the same routing table
+	  may sometimes lead to unintended routing behavior.  This can be
+	  avoided by defining different routing tables for the normal and
+	  source prefix specific routes.
+
+	  If unsure, say N.
+
 config IPV6_MULTIPLE_TABLES
 	bool "IPv6: Multiple Routing Tables"
 	depends on IPV6 && EXPERIMENTAL
-- 
GitLab


From 77d16f450ae0452d7d4b009f78debb1294fb435c Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:25:05 -0700
Subject: [PATCH 0540/1063] [IPV6] ROUTE: Unify RT6_F_xxx and RT6_SELECT_F_xxx
 flags

Unify RT6_F_xxx and RT6_SELECT_F_xxx flags into
RT6_LOOKUP_F_xxx flags, and put them into ip6_route.h

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Acked-by: Ville Nuorvala <vnuorval@tcs.hut.fi
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h   |  3 ---
 include/net/ip6_route.h |  4 ++++
 net/ipv6/fib6_rules.c   |  2 +-
 net/ipv6/route.c        | 32 ++++++++++++--------------------
 4 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 6a3f26a04509c..e4438de3bd6b8 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -173,9 +173,6 @@ struct fib6_table {
 #define RT6_TABLE_LOCAL		RT6_TABLE_MAIN
 #endif
 
-#define RT6_F_STRICT		1
-#define RT6_F_HAS_SADDR		2
-
 typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *,
 					 struct flowi *, int);
 
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 0d40f84df21b0..2979095700419 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -32,6 +32,10 @@ struct route_info {
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 
+#define RT6_LOOKUP_F_IFACE	0x1
+#define RT6_LOOKUP_F_REACHABLE	0x2
+#define RT6_LOOKUP_F_HAS_SADDR	0x4
+
 struct pol_chain {
 	int			type;
 	int			priority;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2c4fbc855e6c7..7b4908cc52b36 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -117,7 +117,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
 		return 0;
 
-	if ((flags & RT6_F_HAS_SADDR) &&
+	if ((flags & RT6_LOOKUP_F_HAS_SADDR) &&
 	    !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
 		return 0;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fd6f2ec4fa099..20691285aee51 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -76,9 +76,6 @@
 
 #define CLONE_OFFLINK_ROUTE 0
 
-#define RT6_SELECT_F_IFACE	0x1
-#define RT6_SELECT_F_REACHABLE	0x2
-
 static int ip6_rt_max_size = 4096;
 static int ip6_rt_gc_min_interval = HZ / 2;
 static int ip6_rt_gc_timeout = 60*HZ;
@@ -340,7 +337,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
 	int m, n;
 		
 	m = rt6_check_dev(rt, oif);
-	if (!m && (strict & RT6_SELECT_F_IFACE))
+	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 		return -1;
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
@@ -348,7 +345,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
 	n = rt6_check_neigh(rt);
 	if (n > 1)
 		m |= 16;
-	else if (!n && strict & RT6_SELECT_F_REACHABLE)
+	else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
 		return -1;
 	return m;
 }
@@ -388,7 +385,7 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
 	}
 
 	if (!match &&
-	    (strict & RT6_SELECT_F_REACHABLE) &&
+	    (strict & RT6_LOOKUP_F_REACHABLE) &&
 	    last && last != rt0) {
 		/* no entries matched; do round-robin */
 		static DEFINE_SPINLOCK(lock);
@@ -511,7 +508,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 restart:
 	rt = fn->leaf;
-	rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
+	rt = rt6_device_match(rt, fl->oif, flags);
 	BACKTRACK(&fl->fl6_src);
 	dst_hold(&rt->u.dst);
 out:
@@ -537,7 +534,7 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
 		},
 	};
 	struct dst_entry *dst;
-	int flags = strict ? RT6_F_STRICT : 0;
+	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 
 	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
 	if (dst->error == 0)
@@ -633,10 +630,9 @@ static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
 	int strict = 0;
 	int attempts = 3;
 	int err;
-	int reachable = RT6_SELECT_F_REACHABLE;
+	int reachable = RT6_LOOKUP_F_REACHABLE;
 
-	if (flags & RT6_F_STRICT)
-		strict = RT6_SELECT_F_IFACE;
+	strict |= flags & RT6_LOOKUP_F_IFACE;
 
 relookup:
 	read_lock_bh(&table->tb6_lock);
@@ -712,10 +708,7 @@ void ip6_route_input(struct sk_buff *skb)
 		},
 		.proto = iph->nexthdr,
 	};
-	int flags = 0;
-
-	if (rt6_need_strict(&iph->daddr))
-		flags |= RT6_F_STRICT;
+	int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0;
 
 	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
 }
@@ -728,10 +721,9 @@ static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
 	int strict = 0;
 	int attempts = 3;
 	int err;
-	int reachable = RT6_SELECT_F_REACHABLE;
+	int reachable = RT6_LOOKUP_F_REACHABLE;
 
-	if (flags & RT6_F_STRICT)
-		strict = RT6_SELECT_F_IFACE;
+	strict |= flags & RT6_LOOKUP_F_IFACE;
 
 relookup:
 	read_lock_bh(&table->tb6_lock);
@@ -797,7 +789,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
 	int flags = 0;
 
 	if (rt6_need_strict(&fl->fl6_dst))
-		flags |= RT6_F_STRICT;
+		flags |= RT6_LOOKUP_F_IFACE;
 
 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
 }
@@ -1362,7 +1354,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
 		},
 		.gateway = *gateway,
 	};
-	int flags = rt6_need_strict(dest) ? RT6_F_STRICT : 0;
+	int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0;
 
 	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
 }
-- 
GitLab


From 7e49e6de30efa716614e280d97963c570f3acf29 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Fri, 22 Sep 2006 15:05:15 -0700
Subject: [PATCH 0541/1063] [XFRM]: Add XFRM_MODE_xxx for future use.

Transformation mode is used as either IPsec transport or tunnel.
It is required to add two more items, route optimization and inbound trigger
for Mobile IPv6.
Based on MIPL2 kernel patch.

This patch was also written by: Ville Nuorvala <vnuorval@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h    |  6 ++++--
 include/net/xfrm.h      |  2 +-
 net/ipv4/ah4.c          |  2 +-
 net/ipv4/esp4.c         |  6 +++---
 net/ipv4/ipcomp.c       |  8 ++++----
 net/ipv4/xfrm4_input.c  |  2 +-
 net/ipv4/xfrm4_output.c |  4 ++--
 net/ipv4/xfrm4_policy.c |  2 +-
 net/ipv4/xfrm4_state.c  |  2 +-
 net/ipv4/xfrm4_tunnel.c |  2 +-
 net/ipv6/ah6.c          |  2 +-
 net/ipv6/esp6.c         |  4 ++--
 net/ipv6/ipcomp6.c      |  6 +++---
 net/ipv6/xfrm6_input.c  |  2 +-
 net/ipv6/xfrm6_output.c |  4 ++--
 net/ipv6/xfrm6_policy.c |  2 +-
 net/ipv6/xfrm6_state.c  |  2 +-
 net/ipv6/xfrm6_tunnel.c |  2 +-
 net/key/af_key.c        |  6 +++---
 net/xfrm/xfrm_policy.c  | 11 ++++++-----
 net/xfrm/xfrm_user.c    |  4 ++--
 21 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 46a15c7a1a13e..5154064b6d958 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -120,7 +120,9 @@ enum
 
 #define XFRM_MODE_TRANSPORT 0
 #define XFRM_MODE_TUNNEL 1
-#define XFRM_MODE_MAX 2
+#define XFRM_MODE_ROUTEOPTIMIZATION 2
+#define XFRM_MODE_IN_TRIGGER 3
+#define XFRM_MODE_MAX 4
 
 /* Netlink configuration messages.  */
 enum {
@@ -247,7 +249,7 @@ struct xfrm_usersa_info {
 	__u32				seq;
 	__u32				reqid;
 	__u16				family;
-	__u8				mode; /* 0=transport,1=tunnel */
+	__u8				mode;		/* XFRM_MODE_xxx */
 	__u8				replay_window;
 	__u8				flags;
 #define XFRM_STATE_NOECN	1
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 00bf86e6e82b1..762795624b10a 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -298,7 +298,7 @@ struct xfrm_tmpl
 
 	__u32			reqid;
 
-/* Mode: transport/tunnel */
+/* Mode: transport, tunnel etc. */
 	__u8			mode;
 
 /* Sharing mode: unique, this session only, this user only etc. */
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 008e69d2e4239..99542977e47e1 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -265,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x)
 		goto error;
 	
 	x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len);
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
 	x->data = ahp;
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b428489f6ccd9..e87377e1d6b64 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -248,7 +248,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 		 *    as per draft-ietf-ipsec-udp-encaps-06,
 		 *    section 3.1.2
 		 */
-		if (!x->props.mode)
+		if (x->props.mode == XFRM_MODE_TRANSPORT)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
@@ -267,7 +267,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
 	struct esp_data *esp = x->data;
 	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		mtu = ALIGN(mtu + 2, blksize);
 	} else {
 		/* The worst case. */
@@ -383,7 +383,7 @@ static int esp_init_state(struct xfrm_state *x)
 	if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
 		goto error;
 	x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
 	if (x->encap) {
 		struct xfrm_encap_tmpl *encap = x->encap;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 5bb9c9f03fb6d..17342430a843b 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -176,7 +176,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 
 out_ok:
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		ip_send_check(iph);
 	return 0;
 }
@@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 	t->id.daddr.a4 = x->id.daddr.a4;
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET;
-	t->props.mode = 1;
+	t->props.mode = XFRM_MODE_TUNNEL;
 	t->props.saddr.a4 = x->props.saddr.a4;
 	t->props.flags = x->props.flags;
 
@@ -416,7 +416,7 @@ static int ipcomp_init_state(struct xfrm_state *x)
 		goto out;
 
 	x->props.header_len = 0;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
 
 	mutex_lock(&ipcomp_resource_mutex);
@@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x)
 		goto error;
 	mutex_unlock(&ipcomp_resource_mutex);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp_tunnel_attach(x);
 		if (err)
 			goto error_tunnel;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 817ed84511a6f..040e8475f295c 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 		if (x->mode->input(x, skb))
 			goto drop;
 
-		if (x->props.mode) {
+		if (x->props.mode == XFRM_MODE_TUNNEL) {
 			decaps = 1;
 			break;
 		}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 4a96a9e3ef3bc..5fd115f0c5478 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -54,7 +54,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
 			goto error_nolock;
 	}
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = xfrm4_tunnel_check_size(skb);
 		if (err)
 			goto error_nolock;
@@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
 		}
 		dst = skb->dst;
 		x = dst->xfrm;
-	} while (x && !x->props.mode);
+	} while (x && (x->props.mode != XFRM_MODE_TUNNEL));
 
 	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
 	err = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 8f50eae47d039..a5bed741de2c1 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -96,7 +96,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 		dst1->next = dst_prev;
 		dst_prev = dst1;
-		if (xfrm[i]->props.mode) {
+		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			remote = xfrm[i]->id.daddr.a4;
 			local  = xfrm[i]->props.saddr.a4;
 			tunnel = 1;
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 81e1751c966e1..97b0c7589711c 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,7 +42,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->props.saddr = tmpl->saddr;
 	if (x->props.saddr.a4 == 0)
 		x->props.saddr.a4 = saddr->a4;
-	if (tmpl->mode && x->props.saddr.a4 == 0) {
+	if (tmpl->mode == XFRM_MODE_TUNNEL && x->props.saddr.a4 == 0) {
 		struct rtable *rt;
 	        struct flowi fl_tunnel = {
         	        .nl_u = {
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index f8ceaa127c836..f110af5b1319f 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb)
 
 static int ipip_init_state(struct xfrm_state *x)
 {
-	if (!x->props.mode)
+	if (x->props.mode != XFRM_MODE_TUNNEL)
 		return -EINVAL;
 
 	if (x->encap)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 00ffa7bc6c9f4..60954fc7eb36e 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -398,7 +398,7 @@ static int ah6_init_state(struct xfrm_state *x)
 		goto error;
 	
 	x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len);
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct ipv6hdr);
 	x->data = ahp;
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 2ebfd281e7218..2b8e52e1d0ab8 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -237,7 +237,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
 	struct esp_data *esp = x->data;
 	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		mtu = ALIGN(mtu + 2, blksize);
 	} else {
 		/* The worst case. */
@@ -358,7 +358,7 @@ static int esp6_init_state(struct xfrm_state *x)
 	if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
 		goto error;
 	x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct ipv6hdr);
 	x->data = esp;
 	return 0;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index a81e9e9d93bd2..19eba8d9f851e 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -212,7 +212,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 	memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET6;
-	t->props.mode = 1;
+	t->props.mode = XFRM_MODE_TUNNEL;
 	memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
 
 	if (xfrm_init_state(t))
@@ -417,7 +417,7 @@ static int ipcomp6_init_state(struct xfrm_state *x)
 		goto out;
 
 	x->props.header_len = 0;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct ipv6hdr);
 	
 	mutex_lock(&ipcomp6_resource_mutex);
@@ -429,7 +429,7 @@ static int ipcomp6_init_state(struct xfrm_state *x)
 		goto error;
 	mutex_unlock(&ipcomp6_resource_mutex);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp6_tunnel_attach(x);
 		if (err)
 			goto error_tunnel;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 0405d74ff9100..ee2f6b3908b63 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -72,7 +72,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi)
 		if (x->mode->input(x, skb))
 			goto drop;
 
-		if (x->props.mode) { /* XXX */
+		if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */
 			decaps = 1;
 			break;
 		}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6d111743e5083..26f18869f77b1 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -47,7 +47,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
 			goto error_nolock;
 	}
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = xfrm6_tunnel_check_size(skb);
 		if (err)
 			goto error_nolock;
@@ -80,7 +80,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
 		}
 		dst = skb->dst;
 		x = dst->xfrm;
-	} while (x && !x->props.mode);
+	} while (x && (x->props.mode != XFRM_MODE_TUNNEL));
 
 	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
 	err = 0;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 73cd250aecbb7..81355bb503282 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -114,7 +114,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 		dst1->next = dst_prev;
 		dst_prev = dst1;
-		if (xfrm[i]->props.mode) {
+		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			remote = (struct in6_addr*)&xfrm[i]->id.daddr;
 			local  = (struct in6_addr*)&xfrm[i]->props.saddr;
 			tunnel = 1;
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b33296b3f6de5..a1a1f54764424 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -42,7 +42,7 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
 	if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
 		memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
-	if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) {
+	if (tmpl->mode == XFRM_MODE_TUNNEL && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) {
 		struct rt6_info *rt;
 		struct flowi fl_tunnel = {
 			.nl_u = {
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index c8f9369c2a876..59685ee8f700d 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -307,7 +307,7 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 static int xfrm6_tunnel_init_state(struct xfrm_state *x)
 {
-	if (!x->props.mode)
+	if (x->props.mode != XFRM_MODE_TUNNEL)
 		return -EINVAL;
 
 	if (x->encap)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 797c744a84382..19e047b0e6786 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1765,7 +1765,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
 	}
 
 	/* addresses present only in tunnel mode */
-	if (t->mode) {
+	if (t->mode == XFRM_MODE_TUNNEL) {
 		switch (xp->family) {
 		case AF_INET:
 			sin = (void*)(rq+1);
@@ -1997,7 +1997,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 		int req_size;
 
 		req_size = sizeof(struct sadb_x_ipsecrequest);
-		if (t->mode)
+		if (t->mode == XFRM_MODE_TUNNEL)
 			req_size += 2*socklen;
 		else
 			size -= 2*socklen;
@@ -2013,7 +2013,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 		if (t->optional)
 			rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE;
 		rq->sadb_x_ipsecrequest_reqid = t->reqid;
-		if (t->mode) {
+		if (t->mode == XFRM_MODE_TUNNEL) {
 			switch (xp->family) {
 			case AF_INET:
 				sin = (void*)(rq+1);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 32c963c905735..a0d58971391d0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -779,7 +779,7 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
 		xfrm_address_t *local  = saddr;
 		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
 
-		if (tmpl->mode) {
+		if (tmpl->mode == XFRM_MODE_TUNNEL) {
 			remote = &tmpl->id.daddr;
 			local = &tmpl->saddr;
 		}
@@ -1005,7 +1005,8 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
 		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
 		x->props.mode == tmpl->mode &&
 		(tmpl->aalgos & (1<<x->props.aalgo)) &&
-		!(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
+		!(x->props.mode != XFRM_MODE_TRANSPORT &&
+		  xfrm_state_addr_cmp(tmpl, x, family));
 }
 
 static inline int
@@ -1015,14 +1016,14 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
 	int idx = start;
 
 	if (tmpl->optional) {
-		if (!tmpl->mode)
+		if (tmpl->mode == XFRM_MODE_TRANSPORT)
 			return start;
 	} else
 		start = -1;
 	for (; idx < sp->len; idx++) {
 		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
 			return ++idx;
-		if (sp->xvec[idx]->props.mode)
+		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT)
 			break;
 	}
 	return start;
@@ -1047,7 +1048,7 @@ EXPORT_SYMBOL(xfrm_decode_session);
 static inline int secpath_has_tunnel(struct sec_path *sp, int k)
 {
 	for (; k < sp->len; k++) {
-		if (sp->xvec[k]->props.mode)
+		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT)
 			return 1;
 	}
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f70e158874d26..0d580ac197715 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -174,8 +174,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 
 	err = -EINVAL;
 	switch (p->mode) {
-	case 0:
-	case 1:
+	case XFRM_MODE_TRANSPORT:
+	case XFRM_MODE_TUNNEL:
 		break;
 
 	default:
-- 
GitLab


From 5794708f11551b6d19b10673abf4b0202f66b44d Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Fri, 22 Sep 2006 15:06:24 -0700
Subject: [PATCH 0542/1063] [XFRM]: Introduce a helper to compare id protocol.

Put the helper to header for future use.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h    | 6 ++++++
 net/xfrm/xfrm_state.c | 6 +++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 762795624b10a..5b364b0a6a281 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -9,6 +9,7 @@
 #include <linux/skbuff.h>
 #include <linux/socket.h>
 #include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
 #include <linux/in6.h>
 #include <linux/mutex.h>
 
@@ -835,6 +836,11 @@ static inline int xfrm_state_kern(struct xfrm_state *x)
 	return atomic_read(&x->tunnel_users);
 }
 
+static inline int xfrm_id_proto_match(u8 proto, u8 userproto)
+{
+	return (userproto == IPSEC_PROTO_ANY || proto == userproto);
+}
+
 /*
  * xfrm algorithm information
  */
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 1c796087ee786..34c038cbdf464 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -294,7 +294,7 @@ void xfrm_state_flush(u8 proto)
 restart:
 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
 			if (!xfrm_state_kern(x) &&
-			    (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
+			    xfrm_id_proto_match(x->id.proto, proto)) {
 				xfrm_state_hold(x);
 				spin_unlock_bh(&xfrm_state_lock);
 
@@ -772,7 +772,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
 	spin_lock_bh(&xfrm_state_lock);
 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
+			if (xfrm_id_proto_match(x->id.proto, proto))
 				count++;
 		}
 	}
@@ -783,7 +783,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
 
 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
+			if (!xfrm_id_proto_match(x->id.proto, proto))
 				continue;
 			err = func(x, --count, data);
 			if (err)
-- 
GitLab


From dc00a525603650a1471c823a1e48c6505c2f9765 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:49:52 -0700
Subject: [PATCH 0543/1063] [XFRM] STATE: Allow non IPsec protocol.

It will be added two more transformation protocols (routing header
and destination options header) for Mobile IPv6.
xfrm_id_proto_match() can be handle zero as all, IPSEC_PROTO_ANY as
all IPsec and otherwise as exact one.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h   | 5 ++++-
 net/xfrm/xfrm_user.c | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 5b364b0a6a281..2a7d2132a1ae5 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -838,7 +838,10 @@ static inline int xfrm_state_kern(struct xfrm_state *x)
 
 static inline int xfrm_id_proto_match(u8 proto, u8 userproto)
 {
-	return (userproto == IPSEC_PROTO_ANY || proto == userproto);
+	return (!userproto || proto == userproto ||
+		(userproto == IPSEC_PROTO_ANY && (proto == IPPROTO_AH ||
+						  proto == IPPROTO_ESP ||
+						  proto == IPPROTO_COMP)));
 }
 
 /*
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0d580ac197715..41f3d51ffc332 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -542,7 +542,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
 	info.nlmsg_flags = NLM_F_MULTI;
 	info.this_idx = 0;
 	info.start_idx = cb->args[0];
-	(void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info);
+	(void) xfrm_state_walk(0, dump_one_state, &info);
 	cb->args[0] = info.this_idx;
 
 	return skb->len;
-- 
GitLab


From 622dc8281a80374873686514e46f852093d91106 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:52:01 -0700
Subject: [PATCH 0544/1063] [XFRM]: Expand XFRM_MAX_DEPTH for route
 optimization.

XFRM_MAX_DEPTH is a limit of transformation states to be applied to the same
flow. Two more extension headers are used by Mobile IPv6 transformation.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2a7d2132a1ae5..aa3be68041be3 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -314,7 +314,7 @@ struct xfrm_tmpl
 	__u32			calgos;
 };
 
-#define XFRM_MAX_DEPTH		4
+#define XFRM_MAX_DEPTH		6
 
 struct xfrm_policy
 {
-- 
GitLab


From 6c44e6b7ab500d7e3e3f406c83325671be51a752 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:53:57 -0700
Subject: [PATCH 0545/1063] [XFRM] STATE: Add source address list.

Support source address based searching.
Mobile IPv6 will use it.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     | 26 ++++++++++++++++++++++++++
 net/ipv4/xfrm4_state.c |  3 +++
 net/ipv6/xfrm6_state.c |  3 +++
 net/xfrm/xfrm_state.c  | 21 +++++++++++++++++++--
 4 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index aa3be68041be3..88145e3348d05 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -95,6 +95,7 @@ struct xfrm_state
 {
 	/* Note: bydst is re-used during gc */
 	struct list_head	bydst;
+	struct list_head	bysrc;
 	struct list_head	byspi;
 
 	atomic_t		refcnt;
@@ -236,6 +237,7 @@ extern int __xfrm_state_delete(struct xfrm_state *x);
 struct xfrm_state_afinfo {
 	unsigned short		family;
 	struct list_head	*state_bydst;
+	struct list_head	*state_bysrc;
 	struct list_head	*state_byspi;
 	int			(*init_flags)(struct xfrm_state *x);
 	void			(*init_tempsel)(struct xfrm_state *x, struct flowi *fl,
@@ -420,6 +422,30 @@ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
 	return 0;
 }
 
+static __inline__
+unsigned __xfrm4_src_hash(xfrm_address_t *addr)
+{
+	return __xfrm4_dst_hash(addr);
+}
+
+static __inline__
+unsigned __xfrm6_src_hash(xfrm_address_t *addr)
+{
+	return __xfrm6_dst_hash(addr);
+}
+
+static __inline__
+unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_src_hash(addr);
+	case AF_INET6:
+		return __xfrm6_src_hash(addr);
+	}
+	return 0;
+}
+
 static __inline__
 unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
 {
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 97b0c7589711c..c56b258fad735 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -122,6 +122,9 @@ __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto,
 		add_timer(&x0->timer);
 		xfrm_state_hold(x0);
 		list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h);
+		h = __xfrm4_src_hash(saddr);
+		xfrm_state_hold(x0);
+		list_add_tail(&x0->bysrc, xfrm4_state_afinfo.state_bysrc+h);
 		wake_up(&km_waitq);
 	}
 	if (x0)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index a1a1f54764424..2fb07850449f5 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -126,6 +126,9 @@ __xfrm6_find_acq(u8 mode, u32 reqid, u8 proto,
 		add_timer(&x0->timer);
 		xfrm_state_hold(x0);
 		list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h);
+		h = __xfrm6_src_hash(saddr);
+		xfrm_state_hold(x0);
+		list_add_tail(&x0->bysrc, xfrm6_state_afinfo.state_bysrc+h);
 		wake_up(&km_waitq);
 	}
 	if (x0)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 34c038cbdf464..2a9992894e69d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -45,6 +45,7 @@ static DEFINE_SPINLOCK(xfrm_state_lock);
  * Also, it can be used by ah/esp icmp error handler to find offending SA.
  */
 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
+static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE];
 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
 
 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
@@ -200,6 +201,7 @@ struct xfrm_state *xfrm_state_alloc(void)
 		atomic_set(&x->refcnt, 1);
 		atomic_set(&x->tunnel_users, 0);
 		INIT_LIST_HEAD(&x->bydst);
+		INIT_LIST_HEAD(&x->bysrc);
 		INIT_LIST_HEAD(&x->byspi);
 		init_timer(&x->timer);
 		x->timer.function = xfrm_timer_handler;
@@ -240,6 +242,8 @@ int __xfrm_state_delete(struct xfrm_state *x)
 		spin_lock(&xfrm_state_lock);
 		list_del(&x->bydst);
 		__xfrm_state_put(x);
+		list_del(&x->bysrc);
+		__xfrm_state_put(x);
 		if (x->id.spi) {
 			list_del(&x->byspi);
 			__xfrm_state_put(x);
@@ -415,6 +419,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			x->km.state = XFRM_STATE_ACQ;
 			list_add_tail(&x->bydst, xfrm_state_bydst+h);
 			xfrm_state_hold(x);
+			list_add_tail(&x->bysrc, xfrm_state_bysrc+h);
+			xfrm_state_hold(x);
 			if (x->id.spi) {
 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
 				list_add(&x->byspi, xfrm_state_byspi+h);
@@ -448,11 +454,19 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 	list_add(&x->bydst, xfrm_state_bydst+h);
 	xfrm_state_hold(x);
 
-	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+	h = xfrm_src_hash(&x->props.saddr, x->props.family);
 
-	list_add(&x->byspi, xfrm_state_byspi+h);
+	list_add(&x->bysrc, xfrm_state_bysrc+h);
 	xfrm_state_hold(x);
 
+	if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
+		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
+				  x->props.family);
+
+		list_add(&x->byspi, xfrm_state_byspi+h);
+		xfrm_state_hold(x);
+	}
+
 	if (!mod_timer(&x->timer, jiffies + HZ))
 		xfrm_state_hold(x);
 
@@ -1075,6 +1089,7 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
 		err = -ENOBUFS;
 	else {
 		afinfo->state_bydst = xfrm_state_bydst;
+		afinfo->state_bysrc = xfrm_state_bysrc;
 		afinfo->state_byspi = xfrm_state_byspi;
 		xfrm_state_afinfo[afinfo->family] = afinfo;
 	}
@@ -1097,6 +1112,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
 		else {
 			xfrm_state_afinfo[afinfo->family] = NULL;
 			afinfo->state_byspi = NULL;
+			afinfo->state_bysrc = NULL;
 			afinfo->state_bydst = NULL;
 		}
 	}
@@ -1218,6 +1234,7 @@ void __init xfrm_state_init(void)
 
 	for (i=0; i<XFRM_DST_HSIZE; i++) {
 		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
+		INIT_LIST_HEAD(&xfrm_state_bysrc[i]);
 		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
 	}
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
-- 
GitLab


From eb2971b68a7d17a7d0fa2c7fc6fbc4bfe41cd694 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:56:04 -0700
Subject: [PATCH 0546/1063] [XFRM] STATE: Search by address using source
 address list.

This is a support to search transformation states by its addresses
by using source address list for Mobile IPv6 usage.
To use it from user-space, it is also added a message type for
source address as a xfrm state option.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h   |  1 +
 include/net/xfrm.h     |  2 ++
 net/ipv4/xfrm4_state.c |  9 +++++++
 net/ipv6/xfrm6_state.c | 21 +++++++++++++++
 net/xfrm/xfrm_state.c  | 37 +++++++++++++++++++++++---
 net/xfrm/xfrm_user.c   | 59 +++++++++++++++++++++++++++++++++++++-----
 6 files changed, 119 insertions(+), 10 deletions(-)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 5154064b6d958..66343d3d4b91e 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -234,6 +234,7 @@ enum xfrm_attr_type_t {
 	XFRMA_REPLAY_VAL,
 	XFRMA_REPLAY_THRESH,
 	XFRMA_ETIMER_THRESH,
+	XFRMA_SRCADDR,		/* xfrm_address_t */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 88145e3348d05..d9c40e713184b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -244,6 +244,7 @@ struct xfrm_state_afinfo {
 						struct xfrm_tmpl *tmpl,
 						xfrm_address_t *daddr, xfrm_address_t *saddr);
 	struct xfrm_state	*(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto);
+	struct xfrm_state	*(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto);
 	struct xfrm_state	*(*find_acq)(u8 mode, u32 reqid, u8 proto, 
 					     xfrm_address_t *daddr, xfrm_address_t *saddr, 
 					     int create);
@@ -937,6 +938,7 @@ extern void xfrm_state_insert(struct xfrm_state *x);
 extern int xfrm_state_add(struct xfrm_state *x);
 extern int xfrm_state_update(struct xfrm_state *x);
 extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family);
+extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family);
 extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
 extern void xfrm_state_flush(u8 proto);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index c56b258fad735..616be131b4e36 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -80,6 +80,14 @@ __xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
 	return NULL;
 }
 
+/* placeholder until ipv4's code is written */
+static struct xfrm_state *
+__xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
+			    u8 proto)
+{
+	return NULL;
+}
+
 static struct xfrm_state *
 __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, 
 		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
@@ -137,6 +145,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
 	.state_lookup		= __xfrm4_state_lookup,
+	.state_lookup_byaddr	= __xfrm4_state_lookup_byaddr,
 	.find_acq		= __xfrm4_find_acq,
 };
 
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 2fb07850449f5..9c95b9d3e1108 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -63,6 +63,26 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->props.family = AF_INET6;
 }
 
+static struct xfrm_state *
+__xfrm6_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
+			    u8 proto)
+{
+	struct xfrm_state *x = NULL;
+	unsigned h;
+
+	h = __xfrm6_src_hash(saddr);
+	list_for_each_entry(x, xfrm6_state_afinfo.state_bysrc+h, bysrc) {
+		if (x->props.family == AF_INET6 &&
+		    ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
+		    ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) &&
+		    proto == x->id.proto) {
+			xfrm_state_hold(x);
+			return x;
+		}
+	}
+	return NULL;
+}
+
 static struct xfrm_state *
 __xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
 {
@@ -140,6 +160,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.state_lookup		= __xfrm6_state_lookup,
+	.state_lookup_byaddr	= __xfrm6_state_lookup_byaddr,
 	.find_acq		= __xfrm6_find_acq,
 };
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 2a9992894e69d..11f480b129527 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -487,6 +487,16 @@ void xfrm_state_insert(struct xfrm_state *x)
 }
 EXPORT_SYMBOL(xfrm_state_insert);
 
+static inline struct xfrm_state *
+__xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x,
+		    int use_spi)
+{
+	if (use_spi)
+		return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+	else
+		return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto);
+}
+
 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
 
 int xfrm_state_add(struct xfrm_state *x)
@@ -495,6 +505,7 @@ int xfrm_state_add(struct xfrm_state *x)
 	struct xfrm_state *x1;
 	int family;
 	int err;
+	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
 
 	family = x->props.family;
 	afinfo = xfrm_state_get_afinfo(family);
@@ -503,7 +514,7 @@ int xfrm_state_add(struct xfrm_state *x)
 
 	spin_lock_bh(&xfrm_state_lock);
 
-	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+	x1 = __xfrm_state_locate(afinfo, x, use_spi);
 	if (x1) {
 		xfrm_state_put(x1);
 		x1 = NULL;
@@ -511,7 +522,7 @@ int xfrm_state_add(struct xfrm_state *x)
 		goto out;
 	}
 
-	if (x->km.seq) {
+	if (use_spi && x->km.seq) {
 		x1 = __xfrm_find_acq_byseq(x->km.seq);
 		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
 			xfrm_state_put(x1);
@@ -519,7 +530,7 @@ int xfrm_state_add(struct xfrm_state *x)
 		}
 	}
 
-	if (!x1)
+	if (use_spi && !x1)
 		x1 = afinfo->find_acq(
 			x->props.mode, x->props.reqid, x->id.proto,
 			&x->id.daddr, &x->props.saddr, 0);
@@ -548,13 +559,14 @@ int xfrm_state_update(struct xfrm_state *x)
 	struct xfrm_state_afinfo *afinfo;
 	struct xfrm_state *x1;
 	int err;
+	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
 
 	afinfo = xfrm_state_get_afinfo(x->props.family);
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+	x1 = __xfrm_state_locate(afinfo, x, use_spi);
 
 	err = -ESRCH;
 	if (!x1)
@@ -674,6 +686,23 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
 }
 EXPORT_SYMBOL(xfrm_state_lookup);
 
+struct xfrm_state *
+xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
+			 u8 proto, unsigned short family)
+{
+	struct xfrm_state *x;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return NULL;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = afinfo->state_lookup_byaddr(daddr, saddr, proto);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return x;
+}
+EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
+
 struct xfrm_state *
 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
 	      xfrm_address_t *daddr, xfrm_address_t *saddr, 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 41f3d51ffc332..b5f8ab71aa549 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -87,6 +87,22 @@ static int verify_encap_tmpl(struct rtattr **xfrma)
 	return 0;
 }
 
+static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type,
+			   xfrm_address_t **addrp)
+{
+	struct rtattr *rt = xfrma[type - 1];
+
+	if (!rt)
+		return 0;
+
+	if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp))
+		return -EINVAL;
+
+	if (addrp)
+		*addrp = RTA_DATA(rt);
+
+	return 0;
+}
 
 static inline int verify_sec_ctx_len(struct rtattr **xfrma)
 {
@@ -418,16 +434,48 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 	return err;
 }
 
+static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p,
+						 struct rtattr **xfrma,
+						 int *errp)
+{
+	struct xfrm_state *x = NULL;
+	int err;
+
+	if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) {
+		err = -ESRCH;
+		x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+	} else {
+		xfrm_address_t *saddr = NULL;
+
+		err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr);
+		if (err)
+			goto out;
+
+		if (!saddr) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto,
+					     p->family);
+	}
+
+ out:
+	if (!x && errp)
+		*errp = err;
+	return x;
+}
+
 static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 {
 	struct xfrm_state *x;
-	int err;
+	int err = -ESRCH;
 	struct km_event c;
 	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
 
-	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+	x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err);
 	if (x == NULL)
-		return -ESRCH;
+		return err;
 
 	if ((err = security_xfrm_state_delete(x)) != 0)
 		goto out;
@@ -578,10 +626,9 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
 	struct xfrm_state *x;
 	struct sk_buff *resp_skb;
-	int err;
+	int err = -ESRCH;
 
-	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
-	err = -ESRCH;
+	x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err);
 	if (x == NULL)
 		goto out_noput;
 
-- 
GitLab


From aee5adb4307c4c63a4dc5f3b49984d76f8a71b5b Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:57:28 -0700
Subject: [PATCH 0547/1063] [XFRM] STATE: Add a hook to find offset to be
 inserted header in outbound.

On current kernel, ip6_find_1stfragopt() is used by IPv6 IPsec to find
offset to be inserted header in outbound for transport mode. (BTW, no
usage may be needed for IPv4 case.)  Mobile IPv6 requires another
logic for routing header and destination options header
respectively. This patch is common platform for the offset and adopts
it to IPsec.

Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h              | 3 +++
 net/ipv6/ah6.c                  | 3 ++-
 net/ipv6/esp6.c                 | 3 ++-
 net/ipv6/ipcomp6.c              | 1 +
 net/ipv6/ipv6_syms.c            | 1 +
 net/ipv6/xfrm6_mode_transport.c | 2 +-
 net/ipv6/xfrm6_output.c         | 6 ++++++
 7 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index d9c40e713184b..eed48f832ce15 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -265,6 +265,7 @@ struct xfrm_type
 	void			(*destructor)(struct xfrm_state *);
 	int			(*input)(struct xfrm_state *, struct sk_buff *skb);
 	int			(*output)(struct xfrm_state *, struct sk_buff *pskb);
+	int			(*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **);
 	/* Estimate maximal size of result of transformation of a dgram */
 	u32			(*get_max_size)(struct xfrm_state *, int size);
 };
@@ -960,6 +961,8 @@ extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr);
 extern void xfrm6_tunnel_free_spi(xfrm_address_t *saddr);
 extern u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr);
 extern int xfrm6_output(struct sk_buff *skb);
+extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
+				 u8 **prevhdr);
 
 #ifdef CONFIG_XFRM
 extern int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 60954fc7eb36e..6c0aa51319a55 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -435,7 +435,8 @@ static struct xfrm_type ah6_type =
 	.init_state	= ah6_init_state,
 	.destructor	= ah6_destroy,
 	.input		= ah6_input,
-	.output		= ah6_output
+	.output		= ah6_output,
+	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
 static struct inet6_protocol ah6_protocol = {
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 2b8e52e1d0ab8..ae50b95111510 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -379,7 +379,8 @@ static struct xfrm_type esp6_type =
 	.destructor	= esp6_destroy,
 	.get_max_size	= esp6_get_max_size,
 	.input		= esp6_input,
-	.output		= esp6_output
+	.output		= esp6_output,
+	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
 static struct inet6_protocol esp6_protocol = {
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 19eba8d9f851e..ad9c6e824e629 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -461,6 +461,7 @@ static struct xfrm_type ipcomp6_type =
 	.destructor	= ipcomp6_destroy,
 	.input		= ipcomp6_input,
 	.output		= ipcomp6_output,
+	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
 static struct inet6_protocol ipcomp6_protocol = 
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
index dd4d1ce777699..e1a741612888a 100644
--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -31,6 +31,7 @@ EXPORT_SYMBOL(ipv6_chk_addr);
 EXPORT_SYMBOL(in6_dev_finish_destroy);
 #ifdef CONFIG_XFRM
 EXPORT_SYMBOL(xfrm6_rcv);
+EXPORT_SYMBOL(xfrm6_find_1stfragopt);
 #endif
 EXPORT_SYMBOL(rt6_lookup);
 EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 711d713e36d84..a5dce216024d5 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -35,7 +35,7 @@ static int xfrm6_transport_output(struct sk_buff *skb)
 	skb_push(skb, x->props.header_len);
 	iph = skb->nh.ipv6h;
 
-	hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
+	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
 	skb->nh.raw = prevhdr - x->props.header_len;
 	skb->h.raw = skb->data + hdr_len;
 	memmove(skb->data, iph, hdr_len);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 26f18869f77b1..b4628fbf8ff59 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -17,6 +17,12 @@
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
+int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
+			  u8 **prevhdr)
+{
+	return ip6_find_1stfragopt(skb, prevhdr);
+}
+
 static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 {
 	int mtu, ret = 0;
-- 
GitLab


From 1d71627d699eca831c1fbfb66ea67bb1fba41415 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 17:59:44 -0700
Subject: [PATCH 0548/1063] [XFRM] STATE: Introduce route optimization mode.

Route optimization is used with routing header and destination options
header for Mobile IPv6.

At outbound it makes header space like IPsec transport. At inbound it
does nothing because exhdrs.c functions have responsibility to update
skbuff information for these headers.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/Kconfig         |  7 +++
 net/ipv6/Makefile        |  1 +
 net/ipv6/xfrm6_mode_ro.c | 94 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+)
 create mode 100644 net/ipv6/xfrm6_mode_ro.c

diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 14f0b336519f4..1188d9560242d 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -127,6 +127,13 @@ config INET6_XFRM_MODE_TUNNEL
 
 	  If unsure, say Y.
 
+config INET6_XFRM_MODE_ROUTEOPTIMIZATION
+	tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
+	depends on IPV6 && EXPERIMENTAL
+	select XFRM
+	---help---
+	  Support for MIPv6 route optimization mode.
+
 config IPV6_TUNNEL
 	tristate "IPv6: IPv6-in-IPv6 tunnel"
 	select INET6_TUNNEL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 9eebf6091279d..87e912e319223 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o
 obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
 obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
 obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
+obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
 obj-$(CONFIG_NETFILTER)	+= netfilter/
 
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
new file mode 100644
index 0000000000000..c11c335312f9c
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -0,0 +1,94 @@
+/*
+ * xfrm6_mode_ro.c - Route optimization mode for IPv6.
+ *
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/*
+ * Authors:
+ *	Noriaki TAKAMIYA @USAGI
+ *	Masahide NAKAMURA @USAGI
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+/* Add route optimization header space.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the route optimization header.
+ *
+ * On exit, skb->h will be set to the start of the encapsulation header to be
+ * filled in by x->type->output and skb->nh will be set to the nextheader field
+ * of the extension header directly preceding the encapsulation header, or in
+ * its absence, that of the top IP header.  The value of skb->data will always
+ * point to the top IP header.
+ */
+static int xfrm6_ro_output(struct sk_buff *skb)
+{
+	struct xfrm_state *x = skb->dst->xfrm;
+	struct ipv6hdr *iph;
+	u8 *prevhdr;
+	int hdr_len;
+
+	skb_push(skb, x->props.header_len);
+	iph = skb->nh.ipv6h;
+
+	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	skb->nh.raw = prevhdr - x->props.header_len;
+	skb->h.raw = skb->data + hdr_len;
+	memmove(skb->data, iph, hdr_len);
+	return 0;
+}
+
+/*
+ * Do nothing about routing optimization header unlike IPsec.
+ */
+static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static struct xfrm_mode xfrm6_ro_mode = {
+	.input = xfrm6_ro_input,
+	.output = xfrm6_ro_output,
+	.owner = THIS_MODULE,
+	.encap = XFRM_MODE_ROUTEOPTIMIZATION,
+};
+
+static int __init xfrm6_ro_init(void)
+{
+	return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6);
+}
+
+static void __exit xfrm6_ro_exit(void)
+{
+	int err;
+
+	err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6);
+	BUG_ON(err);
+}
+
+module_init(xfrm6_ro_init);
+module_exit(xfrm6_ro_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION);
-- 
GitLab


From f3bd484021d9486b826b422a017d75dd0bd258ad Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 18:00:48 -0700
Subject: [PATCH 0549/1063] [XFRM]: Restrict authentication algorithm only when
 inbound transformation protocol is IPsec.

For Mobile IPv6 usage, routing header or destination options header is
used and it doesn't require this comparison. It is checked only for
IPsec template.

Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a0d58971391d0..f1cdcfb90959f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1004,7 +1004,8 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
 		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
 		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
 		x->props.mode == tmpl->mode &&
-		(tmpl->aalgos & (1<<x->props.aalgo)) &&
+		((tmpl->aalgos & (1<<x->props.aalgo)) ||
+		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
 		!(x->props.mode != XFRM_MODE_TRANSPORT &&
 		  xfrm_state_addr_cmp(tmpl, x, family));
 }
-- 
GitLab


From fbd9a5b47ee9c319ff0cae584391241ce78ffd6b Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 18:08:21 -0700
Subject: [PATCH 0550/1063] [XFRM] STATE: Common receive function for route
 optimization extension headers.

XFRM_STATE_WILDRECV flag is introduced; the last resort state is set
it and receives packet which is not route optimized but uses such
extension headers i.e. Mobile IPv6 signaling (binding update and
acknowledgement).  A node enabled Mobile IPv6 adds the state.

Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h   |   1 +
 include/net/xfrm.h     |   2 +
 net/ipv6/ipv6_syms.c   |   1 +
 net/ipv6/xfrm6_input.c | 108 +++++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_state.c  |   1 +
 5 files changed, 113 insertions(+)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 66343d3d4b91e..a7c9e4cfb15b3 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -256,6 +256,7 @@ struct xfrm_usersa_info {
 #define XFRM_STATE_NOECN	1
 #define XFRM_STATE_DECAP_DSCP	2
 #define XFRM_STATE_NOPMTUDISC	4
+#define XFRM_STATE_WILDRECV	8
 };
 
 struct xfrm_usersa_id {
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index eed48f832ce15..0d735a5aba614 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -955,6 +955,8 @@ extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler);
 extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler);
 extern int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi);
 extern int xfrm6_rcv(struct sk_buff **pskb);
+extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
+			    xfrm_address_t *saddr, u8 proto);
 extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler);
 extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler);
 extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr);
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
index e1a741612888a..7b7b90d9c3d06 100644
--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -31,6 +31,7 @@ EXPORT_SYMBOL(ipv6_chk_addr);
 EXPORT_SYMBOL(in6_dev_finish_destroy);
 #ifdef CONFIG_XFRM
 EXPORT_SYMBOL(xfrm6_rcv);
+EXPORT_SYMBOL(xfrm6_input_addr);
 EXPORT_SYMBOL(xfrm6_find_1stfragopt);
 #endif
 EXPORT_SYMBOL(rt6_lookup);
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index ee2f6b3908b63..a40a057890133 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -138,3 +138,111 @@ int xfrm6_rcv(struct sk_buff **pskb)
 {
 	return xfrm6_rcv_spi(*pskb, 0);
 }
+
+int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
+		     xfrm_address_t *saddr, u8 proto)
+{
+ 	struct xfrm_state *x = NULL;
+ 	int wildcard = 0;
+	struct in6_addr any;
+	xfrm_address_t *xany;
+	struct xfrm_state *xfrm_vec_one = NULL;
+ 	int nh = 0;
+	int i = 0;
+
+	ipv6_addr_set(&any, 0, 0, 0, 0);
+	xany = (xfrm_address_t *)&any;
+
+	for (i = 0; i < 3; i++) {
+		xfrm_address_t *dst, *src;
+		switch (i) {
+		case 0:
+			dst = daddr;
+			src = saddr;
+			break;
+		case 1:
+			/* lookup state with wild-card source address */
+			wildcard = 1;
+			dst = daddr;
+			src = xany;
+			break;
+		case 2:
+		default:
+ 			/* lookup state with wild-card addresses */
+			wildcard = 1; /* XXX */
+			dst = xany;
+			src = xany;
+			break;
+ 		}
+
+		x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6);
+		if (!x)
+			continue;
+
+		spin_lock(&x->lock);
+
+		if (wildcard) {
+			if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) {
+				spin_unlock(&x->lock);
+				xfrm_state_put(x);
+				x = NULL;
+				continue;
+			}
+		}
+
+		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+			spin_unlock(&x->lock);
+			xfrm_state_put(x);
+ 			x = NULL;
+ 			continue;
+		}
+		if (xfrm_state_check_expire(x)) {
+			spin_unlock(&x->lock);
+			xfrm_state_put(x);
+			x = NULL;
+			continue;
+		}
+
+		nh = x->type->input(x, skb);
+		if (nh <= 0) {
+			spin_unlock(&x->lock);
+			xfrm_state_put(x);
+			x = NULL;
+			continue;
+		}
+
+		x->curlft.bytes += skb->len;
+		x->curlft.packets++;
+
+		spin_unlock(&x->lock);
+
+		xfrm_vec_one = x;
+		break;
+	}
+
+	if (!xfrm_vec_one)
+		goto drop;
+
+	/* Allocate new secpath or COW existing one. */
+	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
+		struct sec_path *sp;
+		sp = secpath_dup(skb->sp);
+		if (!sp)
+			goto drop;
+		if (skb->sp)
+			secpath_put(skb->sp);
+		skb->sp = sp;
+	}
+
+	if (1 + skb->sp->len > XFRM_MAX_DEPTH)
+		goto drop;
+
+	skb->sp->xvec[skb->sp->len] = xfrm_vec_one;
+	skb->sp->len ++;
+
+	return 1;
+drop:
+	if (xfrm_vec_one)
+		xfrm_state_put(xfrm_vec_one);
+	return -1;
+}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 11f480b129527..f05371556cced 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -352,6 +352,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == tmpl->reqid &&
+		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 		    xfrm_state_addr_check(x, daddr, saddr, family) &&
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
-- 
GitLab


From 9e51fd371a022318c5b64b831c43026e89bc4f75 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 18:09:09 -0700
Subject: [PATCH 0551/1063] [XFRM]: Rename secpath_has_tunnel to
 secpath_has_nontransport.

On current kernel inbound transformation state is allowed transport and
disallowed tunnel mode when mismatch is occurred between tempates and states.
As the result of adding two more modes by Mobile IPv6, this function name
is misleading. Inbound transformation can allow only transport mode
when mismatch is occurred between template and secpath.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f1cdcfb90959f..56abb5c057d4a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1046,7 +1046,7 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family
 }
 EXPORT_SYMBOL(xfrm_decode_session);
 
-static inline int secpath_has_tunnel(struct sec_path *sp, int k)
+static inline int secpath_has_nontransport(struct sec_path *sp, int k)
 {
 	for (; k < sp->len; k++) {
 		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT)
@@ -1087,7 +1087,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 					xfrm_policy_lookup);
 
 	if (!pol)
-		return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
+		return !skb->sp || !secpath_has_nontransport(skb->sp, 0);
 
 	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
 
@@ -1111,7 +1111,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 				goto reject;
 		}
 
-		if (secpath_has_tunnel(sp, k))
+		if (secpath_has_nontransport(sp, k))
 			goto reject;
 
 		xfrm_pol_put(pol);
-- 
GitLab


From 99505a843673faeae962a8cde128c7c034ba6b5e Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 18:10:33 -0700
Subject: [PATCH 0552/1063] [XFRM] STATE: Add a hook to obtain local/remote
 outbound address.

Outbound transformation replaces both source and destination address with
state's end-point addresses at the same time when IPsec tunnel mode.
It is also required to change them for Mobile IPv6 route optimization, but we
should care about the following differences:
 - changing result is not end-point but care-of address
 - either source or destination is replaced for each state
This hook is a common platform to change outbound address.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      |  2 ++
 net/ipv6/xfrm6_policy.c | 20 ++++++++++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0d735a5aba614..aa3ac994477b9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -266,6 +266,8 @@ struct xfrm_type
 	int			(*input)(struct xfrm_state *, struct sk_buff *skb);
 	int			(*output)(struct xfrm_state *, struct sk_buff *pskb);
 	int			(*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **);
+	xfrm_address_t		*(*local_addr)(struct xfrm_state *, xfrm_address_t *);
+	xfrm_address_t		*(*remote_addr)(struct xfrm_state *, xfrm_address_t *);
 	/* Estimate maximal size of result of transformation of a dgram */
 	u32			(*get_max_size)(struct xfrm_state *, int size);
 };
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 81355bb503282..9328fc88708ab 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -59,6 +59,22 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 	return dst;
 }
 
+static inline struct in6_addr*
+__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr)
+{
+	return (x->type->remote_addr) ?
+		(struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) :
+		(struct in6_addr*)&x->id.daddr;
+}
+
+static inline struct in6_addr*
+__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr)
+{
+	return (x->type->local_addr) ?
+		(struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) :
+		(struct in6_addr*)&x->props.saddr;
+}
+
 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
  * all the metrics... Shortly, bundle a bundle.
  */
@@ -115,8 +131,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst1->next = dst_prev;
 		dst_prev = dst1;
 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
-			remote = (struct in6_addr*)&xfrm[i]->id.daddr;
-			local  = (struct in6_addr*)&xfrm[i]->props.saddr;
+			remote = __xfrm6_bundle_addr_remote(xfrm[i], remote);
+			local  = __xfrm6_bundle_addr_local(xfrm[i], local);
 			tunnel = 1;
 		}
 		header_len += xfrm[i]->props.header_len;
-- 
GitLab


From 1b5c229987dc4d0c92a38fac0cde2aeec08cd775 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 18:11:50 -0700
Subject: [PATCH 0553/1063] [XFRM] STATE: Support non-fragment outbound
 transformation headers.

For originated outbound IPv6 packets which will fragment, ip6_append_data()
should know length of extension headers before sending them and
the length is carried by dst_entry.
IPv6 IPsec headers fragment then transformation was
designed to place all headers after fragment header.
OTOH Mobile IPv6 extension headers do not fragment then
it is a good idea to make dst_entry have non-fragment length to tell it
to ip6_append_data().

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h       |  1 +
 include/net/xfrm.h      |  2 ++
 net/ipv4/xfrm4_policy.c |  1 +
 net/ipv6/ip6_output.c   |  2 +-
 net/ipv6/xfrm6_policy.c | 24 ++++++++++++++++++++++--
 5 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 36d54fc248b00..a8d825f90305a 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -54,6 +54,7 @@ struct dst_entry
 	unsigned long		expires;
 
 	unsigned short		header_len;	/* more space at head required */
+	unsigned short		nfheader_len;	/* more non-fragment space at head required */
 	unsigned short		trailer_len;	/* space to reserve at tail */
 
 	u32			metrics[RTAX_MAX];
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index aa3ac994477b9..aa93cc1f62990 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -260,6 +260,8 @@ struct xfrm_type
 	char			*description;
 	struct module		*owner;
 	__u8			proto;
+	__u8			flags;
+#define XFRM_TYPE_NON_FRAGMENT	1
 
 	int			(*init_state)(struct xfrm_state *x);
 	void			(*destructor)(struct xfrm_state *);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index a5bed741de2c1..e517981ceadd4 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -135,6 +135,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst_prev->flags	       |= DST_HOST;
 		dst_prev->lastuse	= jiffies;
 		dst_prev->header_len	= header_len;
+		dst_prev->nfheader_len	= 0;
 		dst_prev->trailer_len	= trailer_len;
 		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2a376b7d91b49..258e3e45f5e02 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -971,7 +971,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 
 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
 
-	fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
+	fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
 
 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 9328fc88708ab..a3f68c8b737e8 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -75,6 +75,24 @@ __xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr)
 		(struct in6_addr*)&x->props.saddr;
 }
 
+static inline void
+__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x)
+{
+	if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
+		*nflen += x->props.header_len;
+	else
+		*len += x->props.header_len;
+}
+
+static inline void
+__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x)
+{
+	if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
+		*nflen -= x->props.header_len;
+	else
+		*len -= x->props.header_len;
+}
+
 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
  * all the metrics... Shortly, bundle a bundle.
  */
@@ -99,6 +117,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 	int i;
 	int err = 0;
 	int header_len = 0;
+	int nfheader_len = 0;
 	int trailer_len = 0;
 
 	dst = dst_prev = NULL;
@@ -135,7 +154,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 			local  = __xfrm6_bundle_addr_local(xfrm[i], local);
 			tunnel = 1;
 		}
-		header_len += xfrm[i]->props.header_len;
+		__xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]);
 		trailer_len += xfrm[i]->props.trailer_len;
 
 		if (tunnel) {
@@ -170,6 +189,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		dst_prev->flags	       |= DST_HOST;
 		dst_prev->lastuse	= jiffies;
 		dst_prev->header_len	= header_len;
+		dst_prev->nfheader_len	= nfheader_len;
 		dst_prev->trailer_len	= trailer_len;
 		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
 
@@ -188,7 +208,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		x->u.rt6.rt6i_src      = rt0->rt6i_src;	
 		x->u.rt6.rt6i_idev     = rt0->rt6i_idev;
 		in6_dev_hold(rt0->rt6i_idev);
-		header_len -= x->u.dst.xfrm->props.header_len;
+		__xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm);
 		trailer_len -= x->u.dst.xfrm->props.trailer_len;
 	}
 
-- 
GitLab


From 060f02a3bdd4d9ba8aa3c48e9b470672b1f3a585 Mon Sep 17 00:00:00 2001
From: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Date: Wed, 23 Aug 2006 18:18:55 -0700
Subject: [PATCH 0554/1063] [XFRM] STATE: Introduce care-of address.

Care-of address is carried by state as a transformation option like
IPsec encryption/authentication algorithm.

Based on MIPL2 kernel patch.

Signed-off-by: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/xfrm.h  |  1 +
 include/net/xfrm.h    |  3 +++
 net/xfrm/xfrm_state.c |  6 ++++++
 net/xfrm/xfrm_user.c  | 28 +++++++++++++++++++++++++++-
 4 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index a7c9e4cfb15b3..b53f799189af9 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -235,6 +235,7 @@ enum xfrm_attr_type_t {
 	XFRMA_REPLAY_THRESH,
 	XFRMA_ETIMER_THRESH,
 	XFRMA_SRCADDR,		/* xfrm_address_t */
+	XFRMA_COADDR,		/* xfrm_address_t */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index aa93cc1f62990..872a2a4022b25 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -134,6 +134,9 @@ struct xfrm_state
 	/* Data for encapsulator */
 	struct xfrm_encap_tmpl	*encap;
 
+	/* Data for care-of address */
+	xfrm_address_t	*coaddr;
+
 	/* IPComp needs an IPIP tunnel for handling uncompressed packets */
 	struct xfrm_state	*tunnel;
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f05371556cced..3da89c01ea711 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -78,6 +78,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 	kfree(x->ealg);
 	kfree(x->calg);
 	kfree(x->encap);
+	kfree(x->coaddr);
 	if (x->mode)
 		xfrm_put_mode(x->mode);
 	if (x->type) {
@@ -603,6 +604,11 @@ int xfrm_state_update(struct xfrm_state *x)
 	if (likely(x1->km.state == XFRM_STATE_VALID)) {
 		if (x->encap && x1->encap)
 			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
+		if (x->coaddr && x1->coaddr) {
+			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
+		}
+		if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
+			memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
 		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
 		x1->km.dying = 0;
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b5f8ab71aa549..939808de9e204 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -187,11 +187,14 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		goto out;
 	if ((err = verify_sec_ctx_len(xfrma)))
 		goto out;
+	if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL)))
+		goto out;
 
 	err = -EINVAL;
 	switch (p->mode) {
 	case XFRM_MODE_TRANSPORT:
 	case XFRM_MODE_TUNNEL:
+	case XFRM_MODE_ROUTEOPTIMIZATION:
 		break;
 
 	default:
@@ -276,6 +279,24 @@ static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg)
 	return security_xfrm_state_alloc(x, uctx);
 }
 
+static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg)
+{
+	struct rtattr *rta = u_arg;
+	xfrm_address_t *p, *uaddrp;
+
+	if (!rta)
+		return 0;
+
+	uaddrp = RTA_DATA(rta);
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, uaddrp, sizeof(*p));
+	*addrpp = p;
+	return 0;
+}
+
 static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
 {
 	memcpy(&x->id, &p->id, sizeof(x->id));
@@ -365,7 +386,8 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
 		goto error;
 	if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1])))
 		goto error;
-
+	if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1])))
+		goto error;
 	err = xfrm_init_state(x);
 	if (err)
 		goto error;
@@ -569,6 +591,10 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
 		uctx->ctx_len = x->security->ctx_len;
 		memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len);
 	}
+
+	if (x->coaddr)
+		RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
+
 	nlh->nlmsg_len = skb->tail - b;
 out:
 	sp->this_idx++;
-- 
GitLab


From 9afaca057980c02771f4657c455cc7592fcd7373 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 18:20:16 -0700
Subject: [PATCH 0555/1063] [XFRM] IPV6: Update outbound state timestamp for
 each sending.

With this patch transformation state is updated last used time
for each sending. Xtime is used for it like other state lifetime
expiration.
Mobile IPv6 enabled nodes will want to know traffic status of each
binding (e.g. judgement to request binding refresh by correspondent node,
or to keep home/care-of nonce alive by mobile node).
The last used timestamp is an important hint about it.
Based on MIPL2 kernel patch.

This patch was also written by: Henrik Petander <petander@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h    | 1 +
 include/net/xfrm.h      | 3 +++
 net/ipv6/xfrm6_output.c | 2 ++
 net/xfrm/xfrm_user.c    | 3 +++
 4 files changed, 9 insertions(+)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index b53f799189af9..1d8c1f22c12d1 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -236,6 +236,7 @@ enum xfrm_attr_type_t {
 	XFRMA_ETIMER_THRESH,
 	XFRMA_SRCADDR,		/* xfrm_address_t */
 	XFRMA_COADDR,		/* xfrm_address_t */
+	XFRMA_LASTUSED,
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 872a2a4022b25..248874ecf8dff 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -167,6 +167,9 @@ struct xfrm_state
 	struct xfrm_lifetime_cur curlft;
 	struct timer_list	timer;
 
+	/* Last used time */
+	u64			lastused;
+
 	/* Reference to data common to all the instances of this
 	 * transformer. */
 	struct xfrm_type	*type;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index b4628fbf8ff59..db58104e710b4 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -75,6 +75,8 @@ static int xfrm6_output_one(struct sk_buff *skb)
 
 		x->curlft.bytes += skb->len;
 		x->curlft.packets++;
+		if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION)
+			x->lastused = (u64)xtime.tv_sec;
 
 		spin_unlock_bh(&x->lock);
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 939808de9e204..f643063a1cbd2 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -595,6 +595,9 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
 	if (x->coaddr)
 		RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
 
+	if (x->lastused)
+		RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused);
+
 	nlh->nlmsg_len = skb->tail - b;
 out:
 	sp->this_idx++;
-- 
GitLab


From e53820de0f81da1429048634cadc6ef5f50c2f8b Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 19:12:01 -0700
Subject: [PATCH 0556/1063] [XFRM] IPV6: Restrict bundle reusing

For outbound transformation, bundle is checked whether it is
suitable for current flow to be reused or not. In such IPv6 case
as below, transformation may apply incorrect bundle for the flow instead
of creating another bundle:

- The policy selector has destination prefix length < 128
  (Two or more addresses can be matched it)
- Its bundle holds dst entry of default route whose prefix length < 128
  (Previous traffic was used such route as next hop)
- The policy and the bundle were used a transport mode state and
  this time flow address is not matched the bundled state.

This issue is found by Mobile IPv6 usage to protect mobility signaling
by IPsec, but it is not a Mobile IPv6 specific.
This patch adds strict check to xfrm_bundle_ok() for each
state mode and address when prefix length is less than 128.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 19 ++++++++++++++++++-
 net/ipv4/xfrm4_policy.c |  2 +-
 net/ipv6/xfrm6_policy.c |  4 +++-
 net/xfrm/xfrm_policy.c  |  8 ++++++--
 4 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 248874ecf8dff..7f1630630dcf3 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -869,6 +869,23 @@ xfrm_state_addr_check(struct xfrm_state *x,
 	return 0;
 }
 
+static __inline__ int
+xfrm_state_addr_flow_check(struct xfrm_state *x, struct flowi *fl,
+			   unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_state_addr_check(x,
+						(xfrm_address_t *)&fl->fl4_dst,
+						(xfrm_address_t *)&fl->fl4_src);
+	case AF_INET6:
+		return __xfrm6_state_addr_check(x,
+						(xfrm_address_t *)&fl->fl6_dst,
+						(xfrm_address_t *)&fl->fl6_src);
+	}
+	return 0;
+}
+
 static inline int xfrm_state_kern(struct xfrm_state *x)
 {
 	return atomic_read(&x->tunnel_users);
@@ -1014,7 +1031,7 @@ extern void xfrm_policy_flush(void);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
 extern int xfrm_flush_bundles(void);
 extern void xfrm_flush_all_bundles(void);
-extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family);
+extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict);
 extern void xfrm_init_pmtu(struct dst_entry *dst);
 
 extern wait_queue_head_t km_waitq;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index e517981ceadd4..42d8ded0f96a8 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -33,7 +33,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 		    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
 	    	    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
 	    	    xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
-		    xfrm_bundle_ok(xdst, fl, AF_INET)) {
+		    xfrm_bundle_ok(xdst, fl, AF_INET, 0)) {
 			dst_clone(dst);
 			break;
 		}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index a3f68c8b737e8..729b4748d6d3d 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -50,7 +50,9 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 				 xdst->u.rt6.rt6i_src.plen);
 		if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
 		    ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
-		    xfrm_bundle_ok(xdst, fl, AF_INET6)) {
+		    xfrm_bundle_ok(xdst, fl, AF_INET6,
+				   (xdst->u.rt6.rt6i_dst.plen != 128 ||
+				    xdst->u.rt6.rt6i_src.plen != 128))) {
 			dst_clone(dst);
 			break;
 		}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 56abb5c057d4a..ad2a5cba1f5b0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1167,7 +1167,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 
 static int stale_bundle(struct dst_entry *dst)
 {
-	return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
+	return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
 }
 
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
@@ -1282,7 +1282,7 @@ EXPORT_SYMBOL(xfrm_init_pmtu);
  * still valid.
  */
 
-int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
+int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict)
 {
 	struct dst_entry *dst = &first->u.dst;
 	struct xfrm_dst *last;
@@ -1304,6 +1304,10 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
 			return 0;
 
+		if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL &&
+		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
+			return 0;
+
 		mtu = dst_mtu(dst->child);
 		if (xdst->child_mtu_cached != mtu) {
 			last = xdst;
-- 
GitLab


From 654b32c6aad19d2fd363813cd8a1a1e64daf611b Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 19:12:56 -0700
Subject: [PATCH 0557/1063] [XFRM]: Fix message about transformation user
 interface.

Transformation user interface is not only for IPsec.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/Kconfig     | 6 +++---
 net/xfrm/xfrm_user.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 0c1c04322baf5..43228f7fd3a05 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -6,11 +6,11 @@ config XFRM
        depends on NET
 
 config XFRM_USER
-	tristate "IPsec user configuration interface"
+	tristate "Transformation user configuration interface"
 	depends on INET && XFRM
 	---help---
-	  Support for IPsec user configuration interface used
-	  by native Linux tools.
+	  Support for Transformation(XFRM) user configuration interface
+	  like IPsec used by native Linux tools.
 
 	  If unsure, say Y.
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f643063a1cbd2..3a83c5987c267 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2054,7 +2054,7 @@ static int __init xfrm_user_init(void)
 {
 	struct sock *nlsk;
 
-	printk(KERN_INFO "Initializing IPsec netlink socket\n");
+	printk(KERN_INFO "Initializing XFRM netlink socket\n");
 
 	nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
 	                             xfrm_netlink_rcv, THIS_MODULE);
-- 
GitLab


From ee53826801a8fa7a0e333895421ef6d0e5fbfbf0 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 19:13:46 -0700
Subject: [PATCH 0558/1063] [IPV6]: Add Kconfig to enable Mobile IPv6.

Add Kconfig to enable Mobile IPv6.
Based on MIPL2 kernel patch.

Signed-off-by: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/Kconfig | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 1188d9560242d..21e0cc808f44e 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -98,6 +98,15 @@ config INET6_IPCOMP
 
 	  If unsure, say Y.
 
+config IPV6_MIP6
+	bool "IPv6: Mobility (EXPERIMENTAL)"
+	depends on IPV6 && EXPERIMENTAL
+	select XFRM
+	---help---
+	  Support for IPv6 Mobility described in RFC 3775.
+
+	  If unsure, say N.
+
 config INET6_XFRM_TUNNEL
 	tristate
 	select INET6_TUNNEL
-- 
GitLab


From 642ec62eee5bdc158e01029220c8a23c685778fb Mon Sep 17 00:00:00 2001
From: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Date: Wed, 23 Aug 2006 19:15:07 -0700
Subject: [PATCH 0559/1063] [IPV6] MIP6: Add routing header type 2 definition.

Add routing header type 2 definition for Mobile IPv6.
Based on MIPL2 kernel patch.

Signed-off-by: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/ipv6.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 02d14a3ff2aff..d995662e94c46 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -29,6 +29,7 @@ struct in6_ifreq {
 
 #define IPV6_SRCRT_STRICT	0x01	/* this hop must be a neighbor	*/
 #define IPV6_SRCRT_TYPE_0	0	/* IPv6 type 0 Routing Header	*/
+#define IPV6_SRCRT_TYPE_2	2	/* IPv6 type 2 Routing Header	*/
 
 /*
  *	routing header
@@ -73,6 +74,18 @@ struct rt0_hdr {
 #define rt0_type		rt_hdr.type
 };
 
+/*
+ *	routing header type 2
+ */
+
+struct rt2_hdr {
+	struct ipv6_rt_hdr	rt_hdr;
+	__u32			reserved;
+	struct in6_addr		addr;
+
+#define rt2_type		rt_hdr.type
+};
+
 struct ipv6_auth_hdr {
 	__u8  nexthdr;
 	__u8  hdrlen;           /* This one is measured in 32 bit units! */
-- 
GitLab


From 65d4ed92219b28875efb52de5700da8c3dfa83e1 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 19:16:22 -0700
Subject: [PATCH 0560/1063] [IPV6] MIP6: Add inbound interface of routing
 header type 2.

Add inbound interface of routing header type 2 for Mobile IPv6.
Based on MIPL2 kernel patch.

This patch was also written by: Ville Nuorvala <vnuorval@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h |  7 +++++
 net/ipv6/exthdrs.c     | 69 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 3d71251b3eca7..5fc8627435eb3 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -61,6 +61,13 @@ extern int			addrconf_set_dstaddr(void __user *arg);
 extern int			ipv6_chk_addr(struct in6_addr *addr,
 					      struct net_device *dev,
 					      int strict);
+/* XXX: this is a placeholder till addrconf supports */
+#ifdef CONFIG_IPV6_MIP6
+static inline int ipv6_chk_home_addr(struct in6_addr *addr)
+{
+	return 0;
+}
+#endif
 extern struct inet6_ifaddr *	ipv6_get_ifaddr(struct in6_addr *addr,
 						struct net_device *dev,
 						int strict);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 05afa6b1912b7..8d3a0e17314de 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -43,6 +43,9 @@
 #include <net/ndisc.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
+#ifdef CONFIG_IPV6_MIP6
+#include <net/xfrm.h>
+#endif
 
 #include <asm/uaccess.h>
 
@@ -219,7 +222,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
-	struct in6_addr *addr;
+	struct in6_addr *addr = NULL;
 	struct in6_addr daddr;
 	int n, i;
 
@@ -244,6 +247,23 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 
 looped_back:
 	if (hdr->segments_left == 0) {
+		switch (hdr->type) {
+#ifdef CONFIG_IPV6_MIP6
+		case IPV6_SRCRT_TYPE_2:
+			/* Silently discard type 2 header unless it was
+			 * processed by own
+			 */
+			if (!addr) {
+				IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+				kfree_skb(skb);
+				return -1;
+			}
+			break;
+#endif
+		default:
+			break;
+		}
+
 		opt->lastopt = skb->h.raw - skb->nh.raw;
 		opt->srcrt = skb->h.raw - skb->nh.raw;
 		skb->h.raw += (hdr->hdrlen + 1) << 3;
@@ -253,17 +273,29 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 		return 1;
 	}
 
-	if (hdr->type != IPV6_SRCRT_TYPE_0) {
+	switch (hdr->type) {
+	case IPV6_SRCRT_TYPE_0:
+		if (hdr->hdrlen & 0x01) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
+			return -1;
+		}
+		break;
+#ifdef CONFIG_IPV6_MIP6
+	case IPV6_SRCRT_TYPE_2:
+		/* Silently discard invalid RTH type 2 */
+		if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+			kfree_skb(skb);
+			return -1;
+		}
+		break;
+#endif
+	default:
 		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
 		return -1;
 	}
-	
-	if (hdr->hdrlen & 0x01) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
-		return -1;
-	}
 
 	/*
 	 *	This is the routing header forwarding algorithm from
@@ -303,6 +335,27 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 	addr = rthdr->addr;
 	addr += i - 1;
 
+	switch (hdr->type) {
+#ifdef CONFIG_IPV6_MIP6
+	case IPV6_SRCRT_TYPE_2:
+		if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
+				     (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+				     IPPROTO_ROUTING) < 0) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+			kfree_skb(skb);
+			return -1;
+		}
+		if (!ipv6_chk_home_addr(addr)) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+			kfree_skb(skb);
+			return -1;
+		}
+		break;
+#endif
+	default:
+		break;
+	}
+
 	if (ipv6_addr_is_multicast(addr)) {
 		IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
 		kfree_skb(skb);
-- 
GitLab


From 280a9d340057ce1b3cca63084df22f4ef5b35fba Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 19:17:12 -0700
Subject: [PATCH 0561/1063] [IPV6] MIP6: Add socket option and ancillary data
 interface of routing header type 2.

Add socket option and ancillary data interface of routing header type
2.  Mobile IPv6 application will use this to send binding
acknowledgement with the header without relation of confirmed route
optimization (binding).

Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/datagram.c      | 11 +++++++----
 net/ipv6/ipv6_sockglue.c | 10 +++++++++-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 8561b9da6db61..7206747022fcf 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -648,10 +648,13 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 
 			rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
 
-			/*
-			 *	TYPE 0
-			 */
-			if (rthdr->type) {
+			switch (rthdr->type) {
+			case IPV6_SRCRT_TYPE_0:
+#ifdef CONFIG_IPV6_MIP6
+			case IPV6_SRCRT_TYPE_2:
+#endif
+				break;
+			default:
 				err = -EINVAL;
 				goto exit_f;
 			}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a5eaaf693abf7..4f3bb7fcc8b50 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -407,8 +407,16 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		/* routing header option needs extra check */
 		if (optname == IPV6_RTHDR && opt->srcrt) {
 			struct ipv6_rt_hdr *rthdr = opt->srcrt;
-			if (rthdr->type)
+			switch (rthdr->type) {
+			case IPV6_SRCRT_TYPE_0:
+#ifdef CONFIG_IPV6_MIP6
+			case IPV6_SRCRT_TYPE_2:
+#endif
+				break;
+			default:
 				goto sticky_done;
+			}
+
 			if ((rthdr->hdrlen & 1) ||
 			    (rthdr->hdrlen >> 1) != rthdr->segments_left)
 				goto sticky_done;
-- 
GitLab


From c61a404325093250b676f40ad8f4dd00f3bcab5f Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 19:18:35 -0700
Subject: [PATCH 0562/1063] [IPV6]: Find option offset by type.

This is a helper to search option offset from extension header which
can carry TLV option like destination options header.

Mobile IPv6 home address option will use it.

Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h |  2 ++
 net/ipv6/exthdrs.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ece7e8a84ffd0..c4ea127105760 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -506,6 +506,8 @@ extern int			ipv6_skip_exthdr(const struct sk_buff *, int start,
 
 extern int 			ipv6_ext_hdr(u8 nexthdr);
 
+extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type);
+
 extern struct ipv6_txoptions *	ipv6_invert_rthdr(struct sock *sk,
 						  struct ipv6_rt_hdr *hdr);
 
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 8d3a0e17314de..50ff49e518bcd 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -49,6 +49,49 @@
 
 #include <asm/uaccess.h>
 
+int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
+{
+	int packet_len = skb->tail - skb->nh.raw;
+	struct ipv6_opt_hdr *hdr;
+	int len;
+
+	if (offset + 2 > packet_len)
+		goto bad;
+	hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	len = ((hdr->hdrlen + 1) << 3);
+
+	if (offset + len > packet_len)
+		goto bad;
+
+	offset += 2;
+	len -= 2;
+
+	while (len > 0) {
+		int opttype = skb->nh.raw[offset];
+		int optlen;
+
+		if (opttype == type)
+			return offset;
+
+		switch (opttype) {
+		case IPV6_TLV_PAD0:
+			optlen = 1;
+			break;
+		default:
+			optlen = skb->nh.raw[offset + 1] + 2;
+			if (optlen > len)
+				goto bad;
+			break;
+		}
+		offset += optlen;
+		len -= optlen;
+	}
+	/* not_found */
+	return -1;
+ bad:
+	return -1;
+}
+
 /*
  *	Parsing tlv encoded headers.
  *
-- 
GitLab


From a80ff03e05e4343d647780c116b02ec86078fd24 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 19:19:50 -0700
Subject: [PATCH 0563/1063] [IPV6]: Allow to replace skbuff by TLV parser.

In receiving Mobile IPv6 home address option which is a TLV carried by
destination options header, kernel will try to mangle source adderss
of packet. Think of cloned skbuff it is required to replace it by the
parser just like routing header case.

This is a framework to achieve that to allow TLV parser to replace
inbound skbuff pointer.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h   |  2 +-
 net/ipv6/exthdrs.c   | 29 +++++++++++++++++++----------
 net/ipv6/ip6_input.c |  2 +-
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c4ea127105760..8e6ec6063f8ce 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -229,7 +229,7 @@ extern int 			ip6_ra_control(struct sock *sk, int sel,
 					       void (*destructor)(struct sock *));
 
 
-extern int			ipv6_parse_hopopts(struct sk_buff *skb);
+extern int			ipv6_parse_hopopts(struct sk_buff **skbp);
 
 extern struct ipv6_txoptions *  ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt);
 extern struct ipv6_txoptions *	ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 50ff49e518bcd..1cdd0f0b5d34d 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -102,7 +102,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 
 struct tlvtype_proc {
 	int	type;
-	int	(*func)(struct sk_buff *skb, int offset);
+	int	(*func)(struct sk_buff **skbp, int offset);
 };
 
 /*********************
@@ -111,8 +111,10 @@ struct tlvtype_proc {
 
 /* An unknown option is detected, decide what to do */
 
-static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
+static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
 {
+	struct sk_buff *skb = *skbp;
+
 	switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
 	case 0: /* ignore */
 		return 1;
@@ -137,8 +139,9 @@ static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
 
 /* Parse tlv encoded option header (hop-by-hop or destination) */
 
-static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
+static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 {
+	struct sk_buff *skb = *skbp;
 	struct tlvtype_proc *curr;
 	int off = skb->h.raw - skb->nh.raw;
 	int len = ((skb->h.raw[1]+1)<<3);
@@ -168,13 +171,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
 					/* type specific length/alignment 
 					   checks will be performed in the 
 					   func(). */
-					if (curr->func(skb, off) == 0)
+					if (curr->func(skbp, off) == 0)
 						return 0;
 					break;
 				}
 			}
 			if (curr->type < 0) {
-				if (ip6_tlvopt_unknown(skb, off) == 0)
+				if (ip6_tlvopt_unknown(skbp, off) == 0)
 					return 0;
 			}
 			break;
@@ -213,7 +216,8 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 	opt->lastopt = skb->h.raw - skb->nh.raw;
 	opt->dst1 = skb->h.raw - skb->nh.raw;
 
-	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
+	if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
+		skb = *skbp;
 		skb->h.raw += ((skb->h.raw[1]+1)<<3);
 		opt->nhoff = opt->dst1;
 		return 1;
@@ -517,8 +521,10 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
 
 /* Router Alert as of RFC 2711 */
 
-static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
+static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
 {
+	struct sk_buff *skb = *skbp;
+
 	if (skb->nh.raw[optoff+1] == 2) {
 		IP6CB(skb)->ra = optoff;
 		return 1;
@@ -531,8 +537,9 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
 
 /* Jumbo payload */
 
-static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
+static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
 {
+	struct sk_buff *skb = *skbp;
 	u32 pkt_len;
 
 	if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
@@ -581,8 +588,9 @@ static struct tlvtype_proc tlvprochopopt_lst[] = {
 	{ -1, }
 };
 
-int ipv6_parse_hopopts(struct sk_buff *skb)
+int ipv6_parse_hopopts(struct sk_buff **skbp)
 {
+	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 
 	/*
@@ -598,7 +606,8 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
 	}
 
 	opt->hop = sizeof(struct ipv6hdr);
-	if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
+	if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
+		skb = *skbp;
 		skb->h.raw += (skb->h.raw[1]+1)<<3;
 		opt->nhoff = sizeof(struct ipv6hdr);
 		return 1;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 25c2a9e038954..6b8e6d76a58bf 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -111,7 +111,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	}
 
 	if (hdr->nexthdr == NEXTHDR_HOP) {
-		if (ipv6_parse_hopopts(skb) < 0) {
+		if (ipv6_parse_hopopts(&skb) < 0) {
 			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
 			return 0;
 		}
-- 
GitLab


From 842426e719f86cd5709617208efae93ff1a1e2d8 Mon Sep 17 00:00:00 2001
From: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Date: Wed, 23 Aug 2006 19:21:34 -0700
Subject: [PATCH 0564/1063] [IPV6] MIP6: Add home address option definition.

Add home address option definition for Mobile IPv6.
Based on MIPL2 kernel patch.

Signed-off-by: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h  |  1 +
 include/linux/ipv6.h | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/include/linux/in6.h b/include/linux/in6.h
index 304aaedea305d..086ec2ac8c5fa 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -142,6 +142,7 @@ struct in6_flowlabel_req
 #define IPV6_TLV_PADN		1
 #define IPV6_TLV_ROUTERALERT	5
 #define IPV6_TLV_JUMBO		194
+#define IPV6_TLV_HAO		201	/* home address option */
 
 /*
  *	IPV6 socket options
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index d995662e94c46..5bf4406e26d47 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -86,6 +86,16 @@ struct rt2_hdr {
 #define rt2_type		rt_hdr.type
 };
 
+/*
+ *	home address option in destination options header
+ */
+
+struct ipv6_destopt_hao {
+	__u8			type;
+	__u8			length;
+	struct in6_addr		addr;
+} __attribute__ ((__packed__));
+
 struct ipv6_auth_hdr {
 	__u8  nexthdr;
 	__u8  hdrlen;           /* This one is measured in 32 bit units! */
-- 
GitLab


From a831f5bbc89a9978795504be9e1ff412043f8f77 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 19:24:48 -0700
Subject: [PATCH 0565/1063] [IPV6] MIP6: Add inbound interface of home address
 option.

Add inbound function of home address option by registering it to TLV
table for destination options header.

Based on MIPL2 kernel patch.

This patch was also written by: Ville Nuorvala <vnuorval@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h |  3 ++
 net/ipv6/exthdrs.c   | 84 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5bf4406e26d47..db3b2ba0f4f88 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -226,6 +226,9 @@ struct inet6_skb_parm {
 	__u16			dst0;
 	__u16			srcrt;
 	__u16			dst1;
+#ifdef CONFIG_IPV6_MIP6
+	__u16			dsthao;
+#endif
 	__u16			lastopt;
 	__u32			nhoff;
 	__u16			flags;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 1cdd0f0b5d34d..6a6466bb5f26e 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -196,8 +196,80 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
   Destination options header.
  *****************************/
 
+#ifdef CONFIG_IPV6_MIP6
+static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
+{
+	struct sk_buff *skb = *skbp;
+	struct ipv6_destopt_hao *hao;
+	struct inet6_skb_parm *opt = IP6CB(skb);
+	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw;
+	struct in6_addr tmp_addr;
+	int ret;
+
+	if (opt->dsthao) {
+		LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n");
+		goto discard;
+	}
+	opt->dsthao = opt->dst1;
+	opt->dst1 = 0;
+
+	hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff);
+
+	if (hao->length != 16) {
+		LIMIT_NETDEBUG(
+			KERN_DEBUG "hao invalid option length = %d\n", hao->length);
+		goto discard;
+	}
+
+	if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
+		LIMIT_NETDEBUG(
+			KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr));
+		goto discard;
+	}
+
+	ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr,
+			       (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
+	if (unlikely(ret < 0))
+		goto discard;
+
+	if (skb_cloned(skb)) {
+		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
+		if (skb2 == NULL)
+			goto discard;
+
+		kfree_skb(skb);
+
+		/* update all variable using below by copied skbuff */
+		*skbp = skb = skb2;
+		hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff);
+		ipv6h = (struct ipv6hdr *)skb2->nh.raw;
+	}
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
+
+	ipv6_addr_copy(&tmp_addr, &ipv6h->saddr);
+	ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
+	ipv6_addr_copy(&hao->addr, &tmp_addr);
+
+	if (skb->tstamp.off_sec == 0)
+		__net_timestamp(skb);
+
+	return 1;
+
+ discard:
+	kfree_skb(skb);
+	return 0;
+}
+#endif
+
 static struct tlvtype_proc tlvprocdestopt_lst[] = {
-	/* No destination options are defined now */
+#ifdef CONFIG_IPV6_MIP6
+	{
+		.type	= IPV6_TLV_HAO,
+		.func	= ipv6_dest_hao,
+	},
+#endif
 	{-1,			NULL}
 };
 
@@ -205,6 +277,9 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
+#ifdef CONFIG_IPV6_MIP6
+	__u16 dstbuf;
+#endif
 
 	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
 	    !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
@@ -215,11 +290,18 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 
 	opt->lastopt = skb->h.raw - skb->nh.raw;
 	opt->dst1 = skb->h.raw - skb->nh.raw;
+#ifdef CONFIG_IPV6_MIP6
+	dstbuf = opt->dst1;
+#endif
 
 	if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
 		skb = *skbp;
 		skb->h.raw += ((skb->h.raw[1]+1)<<3);
+#ifdef CONFIG_IPV6_MIP6
+		opt->nhoff = dstbuf;
+#else
 		opt->nhoff = opt->dst1;
+#endif
 		return 1;
 	}
 
-- 
GitLab


From 8dd7368dd97def967bbb3aec67b882e8dfd1a528 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 23 Aug 2006 19:25:55 -0700
Subject: [PATCH 0566/1063] [IPV6]: Put dsthao after flags in order to pack
 inet6_skb_parm better.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index db3b2ba0f4f88..1d6d3ccc9413c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -226,12 +226,12 @@ struct inet6_skb_parm {
 	__u16			dst0;
 	__u16			srcrt;
 	__u16			dst1;
-#ifdef CONFIG_IPV6_MIP6
-	__u16			dsthao;
-#endif
 	__u16			lastopt;
 	__u32			nhoff;
 	__u16			flags;
+#ifdef CONFIG_IPV6_MIP6
+	__u16			dsthao;
+#endif
 
 #define IP6SKB_XFRM_TRANSFORMED	1
 };
-- 
GitLab


From 793832361fe7e9c3fcae2edd1d293c583a0a095c Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 19:27:25 -0700
Subject: [PATCH 0567/1063] [IPV6] MIP6: Revert address to send ICMPv6 error.

IPv6 source address is replaced in receiving packet
with home address option carried by destination options header.
To send ICMPv6 error back, original address which is received one on wire
should be used. This function checks such header is included
and reverts them.
Based on MIPL2 kernel patch.

This patch was also written by: Ville Nuorvala <vnuorval@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e3a8e27af950c..4ec876066b3fd 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -273,6 +273,29 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
 	return 0;
 }
 
+#ifdef CONFIG_IPV6_MIP6
+static void mip6_addr_swap(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct inet6_skb_parm *opt = IP6CB(skb);
+	struct ipv6_destopt_hao *hao;
+	struct in6_addr tmp;
+	int off;
+
+	if (opt->dsthao) {
+		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
+		if (likely(off >= 0)) {
+			hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
+			ipv6_addr_copy(&tmp, &iph->saddr);
+			ipv6_addr_copy(&iph->saddr, &hao->addr);
+			ipv6_addr_copy(&hao->addr, &tmp);
+		}
+	}
+}
+#else
+static inline void mip6_addr_swap(struct sk_buff *skb) {}
+#endif
+
 /*
  *	Send an ICMP message in response to a packet in error
  */
@@ -350,6 +373,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 		return;
 	}
 
+	mip6_addr_swap(skb);
+
 	memset(&fl, 0, sizeof(fl));
 	fl.proto = IPPROTO_ICMPV6;
 	ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
-- 
GitLab


From 27637df92e25dfb45dd71a93a2f4bf9c080fa627 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 19:29:47 -0700
Subject: [PATCH 0568/1063] [IPV6] IPSEC: Support sending with Mobile IPv6
 extension headers.

Mobile IPv6 defines home address option as an option of destination
options header. It is placed before fragment header then
ip6_find_1stfragopt() is fixed to know about it.

Home address option also carries final source address of the flow,
then outbound AH calculation should take care of it like routing
header case.  Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ah6.c        | 109 ++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_output.c |  18 +++++--
 2 files changed, 122 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 6c0aa51319a55..0f2b4e330aa90 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -74,6 +74,68 @@ static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
 	return 0;
 }
 
+#ifdef CONFIG_IPV6_MIP6
+/**
+ *	ipv6_rearrange_destopt - rearrange IPv6 destination options header
+ *	@iph: IPv6 header
+ *	@destopt: destionation options header
+ */
+static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt)
+{
+	u8 *opt = (u8 *)destopt;
+	int len = ipv6_optlen(destopt);
+	int off = 0;
+	int optlen = 0;
+
+	off += 2;
+	len -= 2;
+
+	while (len > 0) {
+
+		switch (opt[off]) {
+
+		case IPV6_TLV_PAD0:
+			optlen = 1;
+			break;
+		default:
+			if (len < 2)
+				goto bad;
+			optlen = opt[off+1]+2;
+			if (len < optlen)
+				goto bad;
+
+			/* Rearrange the source address in @iph and the
+			 * addresses in home address option for final source.
+			 * See 11.3.2 of RFC 3775 for details.
+			 */
+			if (opt[off] == IPV6_TLV_HAO) {
+				struct in6_addr final_addr;
+				struct ipv6_destopt_hao *hao;
+
+				hao = (struct ipv6_destopt_hao *)&opt[off];
+				if (hao->length != sizeof(hao->addr)) {
+					if (net_ratelimit())
+						printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length);
+					goto bad;
+				}
+				ipv6_addr_copy(&final_addr, &hao->addr);
+				ipv6_addr_copy(&hao->addr, &iph->saddr);
+				ipv6_addr_copy(&iph->saddr, &final_addr);
+			}
+			break;
+		}
+
+		off += optlen;
+		len -= optlen;
+	}
+	if (len == 0)
+		return;
+
+bad:
+	return;
+}
+#endif
+
 /**
  *	ipv6_rearrange_rthdr - rearrange IPv6 routing header
  *	@iph: IPv6 header
@@ -113,7 +175,11 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
 	ipv6_addr_copy(&iph->daddr, &final_addr);
 }
 
+#ifdef CONFIG_IPV6_MIP6
+static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
+#else
 static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
+#endif
 {
 	union {
 		struct ipv6hdr *iph;
@@ -128,6 +194,28 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
 
 	while (exthdr.raw < end) {
 		switch (nexthdr) {
+#ifdef CONFIG_IPV6_MIP6
+		case NEXTHDR_HOP:
+			if (!zero_out_mutable_opts(exthdr.opth)) {
+				LIMIT_NETDEBUG(
+					KERN_WARNING "overrun %sopts\n",
+					nexthdr == NEXTHDR_HOP ?
+						"hop" : "dest");
+				return -EINVAL;
+			}
+			break;
+		case NEXTHDR_DEST:
+			if (dir == XFRM_POLICY_OUT)
+				ipv6_rearrange_destopt(iph, exthdr.opth);
+			if (!zero_out_mutable_opts(exthdr.opth)) {
+				LIMIT_NETDEBUG(
+					KERN_WARNING "overrun %sopts\n",
+					nexthdr == NEXTHDR_HOP ?
+						"hop" : "dest");
+				return -EINVAL;
+			}
+			break;
+#else
 		case NEXTHDR_HOP:
 		case NEXTHDR_DEST:
 			if (!zero_out_mutable_opts(exthdr.opth)) {
@@ -138,6 +226,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
 				return -EINVAL;
 			}
 			break;
+#endif
 
 		case NEXTHDR_ROUTING:
 			ipv6_rearrange_rthdr(iph, exthdr.rth);
@@ -164,6 +253,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	u8 nexthdr;
 	char tmp_base[8];
 	struct {
+#ifdef CONFIG_IPV6_MIP6
+		struct in6_addr saddr;
+#endif
 		struct in6_addr daddr;
 		char hdrs[0];
 	} *tmp_ext;
@@ -188,10 +280,18 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 			err = -ENOMEM;
 			goto error;
 		}
+#ifdef CONFIG_IPV6_MIP6
+		memcpy(tmp_ext, &top_iph->saddr, extlen);
+		err = ipv6_clear_mutable_options(top_iph,
+						 extlen - sizeof(*tmp_ext) +
+						 sizeof(*top_iph),
+						 XFRM_POLICY_OUT);
+#else
 		memcpy(tmp_ext, &top_iph->daddr, extlen);
 		err = ipv6_clear_mutable_options(top_iph,
 						 extlen - sizeof(*tmp_ext) +
 						 sizeof(*top_iph));
+#endif
 		if (err)
 			goto error_free_iph;
 	}
@@ -222,7 +322,11 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	memcpy(top_iph, tmp_base, sizeof(tmp_base));
 	if (tmp_ext) {
+#ifdef CONFIG_IPV6_MIP6
+		memcpy(&top_iph->saddr, tmp_ext, extlen);
+#else
 		memcpy(&top_iph->daddr, tmp_ext, extlen);
+#endif
 error_free_iph:
 		kfree(tmp_ext);
 	}
@@ -282,8 +386,13 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!tmp_hdr)
 		goto out;
 	memcpy(tmp_hdr, skb->nh.raw, hdr_len);
+#ifdef CONFIG_IPV6_MIP6
+	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
+		goto free_out;
+#else
 	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len))
 		goto free_out;
+#endif
 	skb->nh.ipv6h->priority    = 0;
 	skb->nh.ipv6h->flow_lbl[0] = 0;
 	skb->nh.ipv6h->flow_lbl[1] = 0;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 258e3e45f5e02..c14ea1ecf3792 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -475,17 +475,25 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 		switch (**nexthdr) {
 
 		case NEXTHDR_HOP:
+			break;
 		case NEXTHDR_ROUTING:
+			found_rhdr = 1;
+			break;
 		case NEXTHDR_DEST:
-			if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
-			if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
-			offset += ipv6_optlen(exthdr);
-			*nexthdr = &exthdr->nexthdr;
-			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+#ifdef CONFIG_IPV6_MIP6
+			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+				break;
+#endif
+			if (found_rhdr)
+				return offset;
 			break;
 		default :
 			return offset;
 		}
+
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
+		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
 	}
 
 	return offset;
-- 
GitLab


From 2c8d7ca0f76103855ad1f2a930e05683b64a00eb Mon Sep 17 00:00:00 2001
From: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Date: Wed, 23 Aug 2006 20:31:11 -0700
Subject: [PATCH 0569/1063] [IPV6] MIP6: Add routing header type 2
 transformation.

Add routing header type 2 transformation for Mobile IPv6.
Based on MIPL2 kernel patch.

Signed-off-by: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mip6.h  |  31 ++++++++
 net/ipv6/Makefile   |   2 +
 net/ipv6/af_inet6.c |   9 +++
 net/ipv6/mip6.c     | 181 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 223 insertions(+)
 create mode 100644 include/net/mip6.h
 create mode 100644 net/ipv6/mip6.c

diff --git a/include/net/mip6.h b/include/net/mip6.h
new file mode 100644
index 0000000000000..644b8b673048f
--- /dev/null
+++ b/include/net/mip6.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/*
+ * Authors:
+ *	Noriaki TAKAMIYA @USAGI
+ *	Masahide NAKAMURA @USAGI
+ *	YOSHIFUJI Hideaki @USAGI
+ */
+#ifndef _NET_MIP6_H
+#define _NET_MIP6_H
+
+extern int mip6_init(void);
+extern void mip6_fini(void);
+
+#endif
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 87e912e319223..0213c6612b58d 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -14,6 +14,8 @@ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
 	xfrm6_output.o
 ipv6-$(CONFIG_NETFILTER) += netfilter.o
 ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
+ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
+
 ipv6-objs += $(ipv6-y)
 
 obj-$(CONFIG_INET6_AH) += ah6.o
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 57ee5ddea96f8..fc9c8a99bea66 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -59,6 +59,9 @@
 #ifdef CONFIG_IPV6_TUNNEL
 #include <net/ip6_tunnel.h>
 #endif
+#ifdef CONFIG_IPV6_MIP6
+#include <net/mip6.h>
+#endif
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -857,6 +860,9 @@ static int __init inet6_init(void)
 	ipv6_frag_init();
 	ipv6_nodata_init();
 	ipv6_destopt_init();
+#ifdef CONFIG_IPV6_MIP6
+	mip6_init();
+#endif
 
 	/* Init v6 transport protocols. */
 	udpv6_init();
@@ -919,6 +925,9 @@ static void __exit inet6_exit(void)
  	udp6_proc_exit();
  	tcp6_proc_exit();
  	raw6_proc_exit();
+#endif
+#ifdef CONFIG_IPV6_MIP6
+	mip6_fini();
 #endif
 	/* Cleanup code parts. */
 	sit_cleanup();
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
new file mode 100644
index 0000000000000..63e548b6f81ef
--- /dev/null
+++ b/net/ipv6/mip6.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/*
+ * Authors:
+ *	Noriaki TAKAMIYA @USAGI
+ *	Masahide NAKAMURA @USAGI
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+#include <net/mip6.h>
+
+static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr)
+{
+	return x->coaddr;
+}
+
+static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
+
+	if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) &&
+	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
+		return -ENOENT;
+
+	return rt2->rt_hdr.nexthdr;
+}
+
+/* Routing Header type 2 is inserted.
+ * IP Header's dst address is replaced with Routing Header's Home Address.
+ */
+static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph;
+	struct rt2_hdr *rt2;
+	u8 nexthdr;
+
+	iph = (struct ipv6hdr *)skb->data;
+	iph->payload_len = htons(skb->len - sizeof(*iph));
+
+	nexthdr = *skb->nh.raw;
+	*skb->nh.raw = IPPROTO_ROUTING;
+
+	rt2 = (struct rt2_hdr *)skb->h.raw;
+	rt2->rt_hdr.nexthdr = nexthdr;
+	rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
+	rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
+	rt2->rt_hdr.segments_left = 1;
+	memset(&rt2->reserved, 0, sizeof(rt2->reserved));
+
+	BUG_TRAP(rt2->rt_hdr.hdrlen == 2);
+
+	memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr));
+	memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr));
+
+	return 0;
+}
+
+static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
+			     u8 **nexthdr)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+	unsigned int packet_len = skb->tail - skb->nh.raw;
+	int found_rhdr = 0;
+
+	*nexthdr = &skb->nh.ipv6h->nexthdr;
+
+	while (offset + 1 <= packet_len) {
+
+		switch (**nexthdr) {
+		case NEXTHDR_HOP:
+			break;
+		case NEXTHDR_ROUTING:
+			if (offset + 3 <= packet_len) {
+				struct ipv6_rt_hdr *rt;
+				rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset);
+				if (rt->type != 0)
+					return offset;
+			}
+			found_rhdr = 1;
+			break;
+		case NEXTHDR_DEST:
+			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+				return offset;
+
+			if (found_rhdr)
+				return offset;
+
+			break;
+		default:
+			return offset;
+		}
+
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
+		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	}
+
+	return offset;
+}
+
+static int mip6_rthdr_init_state(struct xfrm_state *x)
+{
+	if (x->id.spi) {
+		printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__,
+		       x->id.spi);
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
+		printk(KERN_INFO "%s: state's mode is not %u: %u\n",
+		       __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+		return -EINVAL;
+	}
+
+	x->props.header_len = sizeof(struct rt2_hdr);
+
+	return 0;
+}
+
+/*
+ * Do nothing about destroying since it has no specific operation for routing
+ * header type 2 unlike IPsec protocols.
+ */
+static void mip6_rthdr_destroy(struct xfrm_state *x)
+{
+}
+
+static struct xfrm_type mip6_rthdr_type =
+{
+	.description	= "MIP6RT",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_ROUTING,
+	.flags		= XFRM_TYPE_NON_FRAGMENT,
+	.init_state	= mip6_rthdr_init_state,
+	.destructor	= mip6_rthdr_destroy,
+	.input		= mip6_rthdr_input,
+	.output		= mip6_rthdr_output,
+	.hdr_offset	= mip6_rthdr_offset,
+	.remote_addr	= mip6_xfrm_addr,
+};
+
+int __init mip6_init(void)
+{
+	printk(KERN_INFO "Mobile IPv6\n");
+
+	if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) {
+		printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__);
+		goto mip6_rthdr_xfrm_fail;
+	}
+	return 0;
+
+ mip6_rthdr_xfrm_fail:
+	return -EAGAIN;
+}
+
+void __exit mip6_fini(void)
+{
+	if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
+		printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__);
+}
-- 
GitLab


From 3d126890dd67beffec27c1b6f51c040fc8d0b526 Mon Sep 17 00:00:00 2001
From: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Date: Wed, 23 Aug 2006 20:32:34 -0700
Subject: [PATCH 0570/1063] [IPV6] MIP6: Add destination options header
 transformation.

Add destination options header transformation for Mobile IPv6.
Based on MIPL2 kernel patch.

This patch was also written by: Ville Nuorvala <vnuorval@tcs.hut.fi>

Signed-off-by: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mip6.h |   3 +
 net/ipv6/mip6.c    | 167 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 170 insertions(+)

diff --git a/include/net/mip6.h b/include/net/mip6.h
index 644b8b673048f..42b65bace1227 100644
--- a/include/net/mip6.h
+++ b/include/net/mip6.h
@@ -25,6 +25,9 @@
 #ifndef _NET_MIP6_H
 #define _NET_MIP6_H
 
+#define MIP6_OPT_PAD_1	0
+#define MIP6_OPT_PAD_N	1
+
 extern int mip6_init(void);
 extern void mip6_fini(void);
 
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 63e548b6f81ef..a8adf891fe0e4 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -35,6 +35,165 @@ static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr
 	return x->coaddr;
 }
 
+static inline unsigned int calc_padlen(unsigned int len, unsigned int n)
+{
+	return (n - len + 16) & 0x7;
+}
+
+static inline void *mip6_padn(__u8 *data, __u8 padlen)
+{
+	if (!data)
+		return NULL;
+	if (padlen == 1) {
+		data[0] = MIP6_OPT_PAD_1;
+	} else if (padlen > 1) {
+		data[0] = MIP6_OPT_PAD_N;
+		data[1] = padlen - 2;
+		if (padlen > 2)
+			memset(data+2, 0, data[1]);
+	}
+	return data + padlen;
+}
+
+static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
+
+	if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
+	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
+		return -ENOENT;
+
+	return destopt->nexthdr;
+}
+
+/* Destination Option Header is inserted.
+ * IP Header's src address is replaced with Home Address Option in
+ * Destination Option Header.
+ */
+static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph;
+	struct ipv6_destopt_hdr *dstopt;
+	struct ipv6_destopt_hao *hao;
+	u8 nexthdr;
+	int len;
+
+	iph = (struct ipv6hdr *)skb->data;
+	iph->payload_len = htons(skb->len - sizeof(*iph));
+
+	nexthdr = *skb->nh.raw;
+	*skb->nh.raw = IPPROTO_DSTOPTS;
+
+	dstopt = (struct ipv6_destopt_hdr *)skb->h.raw;
+	dstopt->nexthdr = nexthdr;
+
+	hao = mip6_padn((char *)(dstopt + 1),
+			calc_padlen(sizeof(*dstopt), 6));
+
+	hao->type = IPV6_TLV_HAO;
+	hao->length = sizeof(*hao) - 2;
+	BUG_TRAP(hao->length == 16);
+
+	len = ((char *)hao - (char *)dstopt) + sizeof(*hao);
+
+	memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr));
+	memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr));
+
+	BUG_TRAP(len == x->props.header_len);
+	dstopt->hdrlen = (x->props.header_len >> 3) - 1;
+
+	return 0;
+}
+
+static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
+			       u8 **nexthdr)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+	unsigned int packet_len = skb->tail - skb->nh.raw;
+	int found_rhdr = 0;
+
+	*nexthdr = &skb->nh.ipv6h->nexthdr;
+
+	while (offset + 1 <= packet_len) {
+
+		switch (**nexthdr) {
+		case NEXTHDR_HOP:
+			break;
+		case NEXTHDR_ROUTING:
+			found_rhdr = 1;
+			break;
+		case NEXTHDR_DEST:
+			/*
+			 * HAO MUST NOT appear more than once.
+			 * XXX: It is better to try to find by the end of
+			 * XXX: packet if HAO exists.
+			 */
+			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
+				LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n");
+				return offset;
+			}
+
+			if (found_rhdr)
+				return offset;
+
+			break;
+		default:
+			return offset;
+		}
+
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
+		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	}
+
+	return offset;
+}
+
+static int mip6_destopt_init_state(struct xfrm_state *x)
+{
+	if (x->id.spi) {
+		printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__,
+		       x->id.spi);
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
+		printk(KERN_INFO "%s: state's mode is not %u: %u\n",
+		       __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+		return -EINVAL;
+	}
+
+	x->props.header_len = sizeof(struct ipv6_destopt_hdr) +
+		calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) +
+		sizeof(struct ipv6_destopt_hao);
+	BUG_TRAP(x->props.header_len == 24);
+
+	return 0;
+}
+
+/*
+ * Do nothing about destroying since it has no specific operation for
+ * destination options header unlike IPsec protocols.
+ */
+static void mip6_destopt_destroy(struct xfrm_state *x)
+{
+}
+
+static struct xfrm_type mip6_destopt_type =
+{
+	.description	= "MIP6DESTOPT",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_DSTOPTS,
+	.flags		= XFRM_TYPE_NON_FRAGMENT,
+	.init_state	= mip6_destopt_init_state,
+	.destructor	= mip6_destopt_destroy,
+	.input		= mip6_destopt_input,
+	.output		= mip6_destopt_output,
+	.hdr_offset	= mip6_destopt_offset,
+	.local_addr	= mip6_xfrm_addr,
+};
+
 static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
@@ -164,6 +323,10 @@ int __init mip6_init(void)
 {
 	printk(KERN_INFO "Mobile IPv6\n");
 
+	if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) {
+		printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __FUNCTION__);
+		goto mip6_destopt_xfrm_fail;
+	}
 	if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) {
 		printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__);
 		goto mip6_rthdr_xfrm_fail;
@@ -171,6 +334,8 @@ int __init mip6_init(void)
 	return 0;
 
  mip6_rthdr_xfrm_fail:
+	xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
+ mip6_destopt_xfrm_fail:
 	return -EAGAIN;
 }
 
@@ -178,4 +343,6 @@ void __exit mip6_fini(void)
 {
 	if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
 		printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__);
+	if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
+		printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__);
 }
-- 
GitLab


From e23c7194a8a21e96b99106bdabde94614c4b84d6 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 20:33:28 -0700
Subject: [PATCH 0571/1063] [XFRM] STATE: Add Mobile IPv6 route optimization
 protocols to netlink interface.

Add Mobile IPv6 route optimization protocols to netlink interface.
Route optimization states carry care-of address.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 3a83c5987c267..770bd2410749a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -28,6 +28,9 @@
 #include <net/xfrm.h>
 #include <net/netlink.h>
 #include <asm/uaccess.h>
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#include <linux/in6.h>
+#endif
 
 static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
 {
@@ -173,6 +176,19 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 			goto out;
 		break;
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case IPPROTO_DSTOPTS:
+	case IPPROTO_ROUTING:
+		if (xfrma[XFRMA_ALG_COMP-1]	||
+		    xfrma[XFRMA_ALG_AUTH-1]	||
+		    xfrma[XFRMA_ALG_CRYPT-1]	||
+		    xfrma[XFRMA_ENCAP-1]	||
+		    xfrma[XFRMA_SEC_CTX-1]	||
+		    !xfrma[XFRMA_COADDR-1])
+			goto out;
+		break;
+#endif
+
 	default:
 		goto out;
 	};
-- 
GitLab


From 2b741653b6c824fe7520ee92b6795f11c5f24b24 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 20:34:26 -0700
Subject: [PATCH 0572/1063] [IPV6] MIP6: Add Mobility header definition.

Add Mobility header definition for Mobile IPv6.
Based on MIPL2 kernel patch.

This patch was also written by: Antti Tuominen <anttit@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h |  1 +
 include/net/flow.h  |  9 +++++++++
 include/net/ipv6.h  |  1 +
 include/net/mip6.h  | 23 +++++++++++++++++++++++
 4 files changed, 34 insertions(+)

diff --git a/include/linux/in6.h b/include/linux/in6.h
index 086ec2ac8c5fa..d776829b443f5 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -134,6 +134,7 @@ struct in6_flowlabel_req
 #define IPPROTO_ICMPV6		58	/* ICMPv6			*/
 #define IPPROTO_NONE		59	/* IPv6 no next header		*/
 #define IPPROTO_DSTOPTS		60	/* IPv6 destination options	*/
+#define IPPROTO_MH		135	/* IPv6 mobility header		*/
 
 /*
  *	IPv6 TLV options.
diff --git a/include/net/flow.h b/include/net/flow.h
index 21d988b2058ab..e0522914316e3 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -72,12 +72,21 @@ struct flowi {
 		} dnports;
 
 		__u32		spi;
+
+#ifdef CONFIG_IPV6_MIP6
+		struct {
+			__u8	type;
+		} mht;
+#endif
 	} uli_u;
 #define fl_ip_sport	uli_u.ports.sport
 #define fl_ip_dport	uli_u.ports.dport
 #define fl_icmp_type	uli_u.icmpt.type
 #define fl_icmp_code	uli_u.icmpt.code
 #define fl_ipsec_spi	uli_u.spi
+#ifdef CONFIG_IPV6_MIP6
+#define fl_mh_type	uli_u.mht.type
+#endif
 	__u32           secid;	/* used by xfrm; see secid.txt */
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 8e6ec6063f8ce..72bf47b2a4e0c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -40,6 +40,7 @@
 #define NEXTHDR_ICMP		58	/* ICMP for IPv6. */
 #define NEXTHDR_NONE		59	/* No next header */
 #define NEXTHDR_DEST		60	/* Destination options header. */
+#define NEXTHDR_MOBILITY	135	/* Mobility header. */
 
 #define NEXTHDR_MAX		255
 
diff --git a/include/net/mip6.h b/include/net/mip6.h
index 42b65bace1227..fd43178faace1 100644
--- a/include/net/mip6.h
+++ b/include/net/mip6.h
@@ -28,6 +28,29 @@
 #define MIP6_OPT_PAD_1	0
 #define MIP6_OPT_PAD_N	1
 
+/*
+ * Mobility Header
+ */
+struct ip6_mh {
+	__u8	ip6mh_proto;
+	__u8	ip6mh_hdrlen;
+	__u8	ip6mh_type;
+	__u8	ip6mh_reserved;
+	__u16	ip6mh_cksum;
+	/* Followed by type specific messages */
+	__u8	data[0];
+} __attribute__ ((__packed__));
+
+#define IP6_MH_TYPE_BRR		0   /* Binding Refresh Request */
+#define IP6_MH_TYPE_HOTI	1   /* HOTI Message   */
+#define IP6_MH_TYPE_COTI	2   /* COTI Message  */
+#define IP6_MH_TYPE_HOT		3   /* HOT Message   */
+#define IP6_MH_TYPE_COT		4   /* COT Message  */
+#define IP6_MH_TYPE_BU		5   /* Binding Update */
+#define IP6_MH_TYPE_BACK	6   /* Binding ACK */
+#define IP6_MH_TYPE_BERROR	7   /* Binding Error */
+#define IP6_MH_TYPE_MAX		IP6_MH_TYPE_BERROR
+
 extern int mip6_init(void);
 extern void mip6_fini(void);
 
-- 
GitLab


From 7be96f7628469e56f91d51f13b03e9bcff113c7f Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 20:35:31 -0700
Subject: [PATCH 0573/1063] [IPV6] MIP6: Add receiving mobility header
 functions through raw socket.

Like ICMPv6, mobility header is handled through raw socket.
In inbound case, check only whether ICMPv6 error should be sent as a reply
or not by kernel.
Based on MIPL2 kernel patch.

This patch was also written by: Ville Nuorvala <vnuorval@tcs.hut.fi>
This patch was also written by: Antti Tuominen <anttit@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mip6.h |  4 +++
 net/ipv6/mip6.c    | 83 ++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/raw.c     | 29 +++++++++++++++-
 3 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/include/net/mip6.h b/include/net/mip6.h
index fd43178faace1..68263c6d99960 100644
--- a/include/net/mip6.h
+++ b/include/net/mip6.h
@@ -25,6 +25,9 @@
 #ifndef _NET_MIP6_H
 #define _NET_MIP6_H
 
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
 #define MIP6_OPT_PAD_1	0
 #define MIP6_OPT_PAD_N	1
 
@@ -53,5 +56,6 @@ struct ip6_mh {
 
 extern int mip6_init(void);
 extern void mip6_fini(void);
+extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb);
 
 #endif
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index a8adf891fe0e4..7b5f893214826 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -26,7 +26,10 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <net/sock.h>
 #include <net/ipv6.h>
+#include <net/ip6_checksum.h>
 #include <net/xfrm.h>
 #include <net/mip6.h>
 
@@ -55,6 +58,86 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen)
 	return data + padlen;
 }
 
+static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos)
+{
+	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
+}
+
+static int mip6_mh_len(int type)
+{
+	int len = 0;
+
+	switch (type) {
+	case IP6_MH_TYPE_BRR:
+		len = 0;
+		break;
+	case IP6_MH_TYPE_HOTI:
+	case IP6_MH_TYPE_COTI:
+	case IP6_MH_TYPE_BU:
+	case IP6_MH_TYPE_BACK:
+		len = 1;
+		break;
+	case IP6_MH_TYPE_HOT:
+	case IP6_MH_TYPE_COT:
+	case IP6_MH_TYPE_BERROR:
+		len = 2;
+		break;
+	}
+	return len;
+}
+
+int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
+{
+	struct ip6_mh *mh;
+	int mhlen;
+
+	if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) ||
+	    !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3)))
+		return -1;
+
+	mh = (struct ip6_mh *)skb->h.raw;
+
+	if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
+		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
+			       mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
+		mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw);
+		return -1;
+	}
+	mhlen = (mh->ip6mh_hdrlen + 1) << 3;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+				    &skb->nh.ipv6h->daddr,
+				    mhlen, IPPROTO_MH,
+				    skb->csum)) {
+			LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH hw checksum failed\n");
+			skb->ip_summed = CHECKSUM_NONE;
+		}
+	}
+	if (skb->ip_summed == CHECKSUM_NONE) {
+		if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+				    &skb->nh.ipv6h->daddr,
+				    mhlen, IPPROTO_MH,
+				    skb_checksum(skb, 0, mhlen, 0))) {
+			LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
+				       NIP6(skb->nh.ipv6h->saddr),
+				       NIP6(skb->nh.ipv6h->daddr));
+			return -1;
+		}
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	if (mh->ip6mh_proto != IPPROTO_NONE) {
+		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
+			       mh->ip6mh_proto);
+		mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw);
+		return -1;
+	}
+
+	return 0;
+}
+
 static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct ipv6hdr *iph = skb->nh.ipv6h;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index d4af1cb5e19fe..ecca8aae3c4b2 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -50,6 +50,9 @@
 #include <net/udp.h>
 #include <net/inet_common.h>
 #include <net/tcp_states.h>
+#ifdef CONFIG_IPV6_MIP6
+#include <net/mip6.h>
+#endif
 
 #include <net/rawv6.h>
 #include <net/xfrm.h>
@@ -169,8 +172,32 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 	sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
 
 	while (sk) {
+		int filtered;
+
 		delivered = 1;
-		if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) {
+		switch (nexthdr) {
+		case IPPROTO_ICMPV6:
+			filtered = icmpv6_filter(sk, skb);
+			break;
+#ifdef CONFIG_IPV6_MIP6
+		case IPPROTO_MH:
+			/* XXX: To validate MH only once for each packet,
+			 * this is placed here. It should be after checking
+			 * xfrm policy, however it doesn't. The checking xfrm
+			 * policy is placed in rawv6_rcv() because it is
+			 * required for each socket.
+			 */
+			filtered = mip6_mh_filter(sk, skb);
+			break;
+#endif
+		default:
+			filtered = 0;
+			break;
+		}
+
+		if (filtered < 0)
+			break;
+		if (filtered == 0) {
 			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
 
 			/* Not releasing hash table! */
-- 
GitLab


From 6e8f4d48b265225bdf437bbf3151b0d6700dda22 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 20:36:47 -0700
Subject: [PATCH 0574/1063] [IPV6] MIP6: Add sending mobility header functions
 through raw socket.

Mobility header is built by user-space and sent through raw socket.
Kernel just extracts its type to flow.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ecca8aae3c4b2..d09329ca32670 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -609,6 +609,9 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
 	struct iovec *iov;
 	u8 __user *type = NULL;
 	u8 __user *code = NULL;
+#ifdef CONFIG_IPV6_MIP6
+	u8 len = 0;
+#endif
 	int probed = 0;
 	int i;
 
@@ -640,6 +643,20 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
 				probed = 1;
 			}
 			break;
+#ifdef CONFIG_IPV6_MIP6
+		case IPPROTO_MH:
+			if (iov->iov_base && iov->iov_len < 1)
+				break;
+			/* check if type field is readable or not. */
+			if (iov->iov_len > 2 - len) {
+				u8 __user *p = iov->iov_base;
+				get_user(fl->fl_mh_type, &p[2 - len]);
+				probed = 1;
+			} else
+				len += iov->iov_len;
+
+			break;
+#endif
 		default:
 			probed = 1;
 			break;
-- 
GitLab


From 2ce4272a699c731b9736d76126dc742353e381db Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 20:39:03 -0700
Subject: [PATCH 0575/1063] [IPV6] MIP6: Transformation support mobility
 header.

Transformation support mobility header.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      |  5 +++++
 net/ipv6/xfrm6_policy.c | 15 +++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7f1630630dcf3..13488e7ba68cc 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -546,6 +546,11 @@ u16 xfrm_flowi_sport(struct flowi *fl)
 	case IPPROTO_ICMPV6:
 		port = htons(fl->fl_icmp_type);
 		break;
+#ifdef CONFIG_IPV6_MIP6
+	case IPPROTO_MH:
+		port = htons(fl->fl_mh_type);
+		break;
+#endif
 	default:
 		port = 0;	/*XXX*/
 	}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 729b4748d6d3d..98c2fe449b3f1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -18,6 +18,9 @@
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#ifdef CONFIG_IPV6_MIP6
+#include <net/mip6.h>
+#endif
 
 static struct dst_ops xfrm6_dst_ops;
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
@@ -270,6 +273,18 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 			fl->proto = nexthdr;
 			return;
 
+#ifdef CONFIG_IPV6_MIP6
+		case IPPROTO_MH:
+			if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) {
+				struct ip6_mh *mh;
+				mh = (struct ip6_mh *)exthdr;
+
+				fl->fl_mh_type = mh->ip6mh_type;
+			}
+			fl->proto = nexthdr;
+			return;
+#endif
+
 		/* XXX Why are there these headers? */
 		case IPPROTO_AH:
 		case IPPROTO_ESP:
-- 
GitLab


From df0ba92a99ca757039dfa84a929281ea3f7a50e8 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 20:41:00 -0700
Subject: [PATCH 0576/1063] [XFRM]: Trace which secpath state is reject factor.

For Mobile IPv6 usage, it is required to trace which secpath state is
reject factor in order to notify it to user space (to know the address
which cannot be used route optimized communication).

Based on MIPL2 kernel patch.

This patch was also written by: Henrik Petander <petander@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  1 +
 net/xfrm/xfrm_policy.c | 55 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 13488e7ba68cc..9ebbdc1dd4717 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -273,6 +273,7 @@ struct xfrm_type
 	void			(*destructor)(struct xfrm_state *);
 	int			(*input)(struct xfrm_state *, struct sk_buff *skb);
 	int			(*output)(struct xfrm_state *, struct sk_buff *pskb);
+	int			(*reject)(struct xfrm_state *, struct sk_buff *, struct flowi *);
 	int			(*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **);
 	xfrm_address_t		*(*local_addr)(struct xfrm_state *, xfrm_address_t *);
 	xfrm_address_t		*(*remote_addr)(struct xfrm_state *, xfrm_address_t *);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ad2a5cba1f5b0..d125a2649037c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -988,6 +988,23 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 }
 EXPORT_SYMBOL(xfrm_lookup);
 
+static inline int
+xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
+{
+	struct xfrm_state *x;
+	int err;
+
+	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
+		return 0;
+	x = skb->sp->xvec[idx];
+	if (!x->type->reject)
+		return 0;
+	xfrm_state_hold(x);
+	err = x->type->reject(x, skb, fl);
+	xfrm_state_put(x);
+	return err;
+}
+
 /* When skb is transformed back to its "native" form, we have to
  * check policy restrictions. At the moment we make this in maximally
  * stupid way. Shame on me. :-) Of course, connected sockets must
@@ -1010,6 +1027,13 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
 		  xfrm_state_addr_cmp(tmpl, x, family));
 }
 
+/*
+ * 0 or more than 0 is returned when validation is succeeded (either bypass
+ * because of optional transport mode, or next index of the mathced secpath
+ * state with the template.
+ * -1 is returned when no matching template is found.
+ * Otherwise "-2 - errored_index" is returned.
+ */
 static inline int
 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
 	       unsigned short family)
@@ -1024,8 +1048,11 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
 	for (; idx < sp->len; idx++) {
 		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
 			return ++idx;
-		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT)
+		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
+			if (start == -1)
+				start = -2-idx;
 			break;
+		}
 	}
 	return start;
 }
@@ -1046,11 +1073,14 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family
 }
 EXPORT_SYMBOL(xfrm_decode_session);
 
-static inline int secpath_has_nontransport(struct sec_path *sp, int k)
+static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
 {
 	for (; k < sp->len; k++) {
-		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT)
+		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
+			if (idxp)
+				*idxp = k;
 			return 1;
+		}
 	}
 
 	return 0;
@@ -1062,6 +1092,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	struct xfrm_policy *pol;
 	struct flowi fl;
 	u8 fl_dir = policy_to_flow_dir(dir);
+	int xerr_idx = -1;
+	int *xerr_idxp = &xerr_idx;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0)
 		return 0;
@@ -1086,8 +1118,13 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		pol = flow_cache_lookup(&fl, family, fl_dir,
 					xfrm_policy_lookup);
 
-	if (!pol)
-		return !skb->sp || !secpath_has_nontransport(skb->sp, 0);
+	if (!pol) {
+		if (skb->sp && secpath_has_nontransport(skb->sp, 0, xerr_idxp)) {
+			xfrm_secpath_reject(xerr_idx, skb, &fl);
+			return 0;
+		}
+		return 1;
+	}
 
 	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
 
@@ -1107,11 +1144,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		 */
 		for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
 			k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
-			if (k < 0)
+			if (k < 0) {
+				if (k < -1 && xerr_idxp)
+					*xerr_idxp = -(2+k);
 				goto reject;
+			}
 		}
 
-		if (secpath_has_nontransport(sp, k))
+		if (secpath_has_nontransport(sp, k, xerr_idxp))
 			goto reject;
 
 		xfrm_pol_put(pol);
@@ -1119,6 +1159,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	}
 
 reject:
+	xfrm_secpath_reject(xerr_idx, skb, &fl);
 	xfrm_pol_put(pol);
 	return 0;
 }
-- 
GitLab


From 97a64b4577ae2bc5599dbd008a3cd9e25de9b9f5 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 20:44:06 -0700
Subject: [PATCH 0577/1063] [XFRM]: Introduce XFRM_MSG_REPORT.

XFRM_MSG_REPORT is a message as notification of state protocol and
selector from kernel to user-space.

Mobile IPv6 will use it when inbound reject is occurred at route
optimization to make user-space know a binding error requirement.

Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h  | 12 +++++++++++
 include/net/xfrm.h    |  2 ++
 net/xfrm/xfrm_state.c | 19 ++++++++++++++++++
 net/xfrm/xfrm_user.c  | 46 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 79 insertions(+)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 1d8c1f22c12d1..4009f4445fa9f 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -166,6 +166,10 @@ enum {
 #define XFRM_MSG_NEWAE XFRM_MSG_NEWAE
 	XFRM_MSG_GETAE,
 #define XFRM_MSG_GETAE XFRM_MSG_GETAE
+
+	XFRM_MSG_REPORT,
+#define XFRM_MSG_REPORT XFRM_MSG_REPORT
+
 	__XFRM_MSG_MAX
 };
 #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
@@ -325,12 +329,18 @@ struct xfrm_usersa_flush {
 	__u8				proto;
 };
 
+struct xfrm_user_report {
+	__u8				proto;
+	struct xfrm_selector		sel;
+};
+
 #ifndef __KERNEL__
 /* backwards compatibility for userspace */
 #define XFRMGRP_ACQUIRE		1
 #define XFRMGRP_EXPIRE		2
 #define XFRMGRP_SA		4
 #define XFRMGRP_POLICY		8
+#define XFRMGRP_REPORT		0x10
 #endif
 
 enum xfrm_nlgroups {
@@ -346,6 +356,8 @@ enum xfrm_nlgroups {
 #define XFRMNLGRP_POLICY	XFRMNLGRP_POLICY
 	XFRMNLGRP_AEVENTS,
 #define XFRMNLGRP_AEVENTS	XFRMNLGRP_AEVENTS
+	XFRMNLGRP_REPORT,
+#define XFRMNLGRP_REPORT	XFRMNLGRP_REPORT
 	__XFRMNLGRP_MAX
 };
 #define XFRMNLGRP_MAX	(__XFRMNLGRP_MAX - 1)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9ebbdc1dd4717..0b223eed4c9ba 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -381,6 +381,7 @@ struct xfrm_mgr
 	struct xfrm_policy	*(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir);
 	int			(*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport);
 	int			(*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c);
+	int			(*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
 };
 
 extern int xfrm_register_km(struct xfrm_mgr *km);
@@ -1043,6 +1044,7 @@ extern void xfrm_init_pmtu(struct dst_entry *dst);
 extern wait_queue_head_t km_waitq;
 extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport);
 extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid);
+extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
 
 extern void xfrm_input_init(void);
 extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3da89c01ea711..a26ef6952c304 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1055,6 +1055,25 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
 }
 EXPORT_SYMBOL(km_policy_expired);
 
+int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
+{
+	int err = -EINVAL;
+	int ret;
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		if (km->report) {
+			ret = km->report(proto, sel, addr);
+			if (!ret)
+				err = ret;
+		}
+	}
+	read_unlock(&xfrm_km_lock);
+	return err;
+}
+EXPORT_SYMBOL(km_report);
+
 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
 	int err;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 770bd2410749a..7303b820bea42 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1491,6 +1491,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
 	[XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0),
 	[XFRM_MSG_NEWAE       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
 	[XFRM_MSG_GETAE       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
+	[XFRM_MSG_REPORT      - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
 };
 
 #undef XMSGSIZE
@@ -2058,12 +2059,57 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev
 
 }
 
+static int build_report(struct sk_buff *skb, u8 proto,
+			struct xfrm_selector *sel, xfrm_address_t *addr)
+{
+	struct xfrm_user_report *ur;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur));
+	ur = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	ur->proto = proto;
+	memcpy(&ur->sel, sel, sizeof(ur->sel));
+
+	if (addr)
+		RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr);
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_report(u8 proto, struct xfrm_selector *sel,
+			    xfrm_address_t *addr)
+{
+	struct sk_buff *skb;
+	size_t len;
+
+	len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report)));
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_report(skb, proto, sel, addr) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT;
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC);
+}
+
 static struct xfrm_mgr netlink_mgr = {
 	.id		= "netlink",
 	.notify		= xfrm_send_state_notify,
 	.acquire	= xfrm_send_acquire,
 	.compile_policy	= xfrm_compile_policy,
 	.notify_policy	= xfrm_send_policy_notify,
+	.report		= xfrm_send_report,
 };
 
 static int __init xfrm_user_init(void)
-- 
GitLab


From 70182ed23d2559345aadb3cfb6a68a7c1cc0aa39 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 20:45:55 -0700
Subject: [PATCH 0578/1063] [IPV6] MIP6: Report to user-space when home address
 option is rejected.

Report to user-space when home address option is rejected.
In receiving this message user-space application will send Mobile IPv6 binding
error. It is rate-limited by kernel.
Based on MIPL2 kernel patch.

This patch was also written by: Ville Nuorvala <vnuorval@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mip6.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 7b5f893214826..31445d09261e4 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -25,6 +25,7 @@
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/time.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <net/sock.h>
@@ -138,6 +139,18 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
+struct mip6_report_rate_limiter {
+	spinlock_t lock;
+	struct timeval stamp;
+	int iif;
+	struct in6_addr src;
+	struct in6_addr dst;
+};
+
+static struct mip6_report_rate_limiter mip6_report_rl = {
+	.lock = SPIN_LOCK_UNLOCKED
+};
+
 static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct ipv6hdr *iph = skb->nh.ipv6h;
@@ -189,6 +202,75 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
+static inline int mip6_report_rl_allow(struct timeval *stamp,
+				       struct in6_addr *dst,
+				       struct in6_addr *src, int iif)
+{
+	int allow = 0;
+
+	spin_lock_bh(&mip6_report_rl.lock);
+	if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec ||
+	    mip6_report_rl.stamp.tv_usec != stamp->tv_usec ||
+	    mip6_report_rl.iif != iif ||
+	    !ipv6_addr_equal(&mip6_report_rl.src, src) ||
+	    !ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
+		mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
+		mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
+		mip6_report_rl.iif = iif;
+		ipv6_addr_copy(&mip6_report_rl.src, src);
+		ipv6_addr_copy(&mip6_report_rl.dst, dst);
+		allow = 1;
+	}
+	spin_unlock_bh(&mip6_report_rl.lock);
+	return allow;
+}
+
+static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl)
+{
+	struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+	struct ipv6_destopt_hao *hao = NULL;
+	struct xfrm_selector sel;
+	int offset;
+	struct timeval stamp;
+	int err = 0;
+
+	if (likely(opt->dsthao)) {
+		offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
+		if (likely(offset >= 0))
+			hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset);
+	}
+
+	skb_get_timestamp(skb, &stamp);
+
+	if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr,
+				  hao ? &hao->addr : &skb->nh.ipv6h->saddr,
+				  opt->iif))
+		goto out;
+
+	memset(&sel, 0, sizeof(sel));
+	memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr,
+	       sizeof(sel.daddr));
+	sel.prefixlen_d = 128;
+	memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+	       sizeof(sel.saddr));
+	sel.prefixlen_s = 128;
+	sel.family = AF_INET6;
+	sel.proto = fl->proto;
+	sel.dport = xfrm_flowi_dport(fl);
+	if (sel.dport)
+		sel.dport_mask = ~((__u16)0);
+	sel.sport = xfrm_flowi_sport(fl);
+	if (sel.sport)
+		sel.sport_mask = ~((__u16)0);
+	sel.ifindex = fl->oif;
+
+	err = km_report(IPPROTO_DSTOPTS, &sel,
+			(hao ? (xfrm_address_t *)&hao->addr : NULL));
+
+ out:
+	return err;
+}
+
 static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 			       u8 **nexthdr)
 {
@@ -273,6 +355,7 @@ static struct xfrm_type mip6_destopt_type =
 	.destructor	= mip6_destopt_destroy,
 	.input		= mip6_destopt_input,
 	.output		= mip6_destopt_output,
+ 	.reject		= mip6_destopt_reject,
 	.hdr_offset	= mip6_destopt_offset,
 	.local_addr	= mip6_xfrm_addr,
 };
-- 
GitLab


From 01be8e5d59d7e6da5c425a31b43709c2a4a69b5d Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 20:47:44 -0700
Subject: [PATCH 0579/1063] [IPV6] MIP6: Ignore to report if mobility headers
 is rejected.

Ignore to report user-space for known mobility headers rejected by
destination options header transformation.
Mobile IPv6 specification (RFC3775) says that mobility header
is used with destination options header carrying home address option
only for binding update message. Other type message cannot be used
and node must drop it silently (and must not send binding error) if
receving such packet.
To achieve it, (1) application should use transformation policy and
wild-card states to catch binding update message prior other packets
(2) kernel doesn't report the reject to user-space not to send
binding error message by application.
This patch is for (2).
Based on MIPL2 kernel patch.

This patch was also written by: Ville Nuorvala <vnuorval@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mip6.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 31445d09261e4..70854035c1314 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -234,6 +234,9 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
 	struct timeval stamp;
 	int err = 0;
 
+	if (unlikely(fl->proto == IPPROTO_MH && fl->fl_mh_type <= IP6_MH_TYPE_MAX))
+		goto out;
+
 	if (likely(opt->dsthao)) {
 		offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 		if (likely(offset >= 0))
-- 
GitLab


From c11f1a15c522ddd3bbd2c32b5ce3e0b1831b22f2 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 22:38:14 -0700
Subject: [PATCH 0580/1063] [XFRM] POLICY: Add Kconfig to support sub policy.

Add Kconfig to support sub policy.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/Kconfig | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 43228f7fd3a05..0faab6332586a 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -14,6 +14,16 @@ config XFRM_USER
 
 	  If unsure, say Y.
 
+config XFRM_SUB_POLICY
+	bool "Transformation sub policy support (EXPERIMENTAL)"
+	depends on XFRM && EXPERIMENTAL
+	---help---
+	  Support sub policy for developers. By using sub policy with main
+	  one, two policies can be applied to the same packet at once.
+	  Policy which lives shorter time in kernel should be a sub.
+
+	  If unsure, say N.
+
 config NET_KEY
 	tristate "PF_KEY sockets"
 	select XFRM
-- 
GitLab


From 4e81bb8336a0ac50289d4d4c7a55e559b994ee8f Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 22:43:30 -0700
Subject: [PATCH 0581/1063] [XFRM] POLICY: sub policy support.

Sub policy is introduced. Main and sub policy are applied the same flow.
(Policy that current kernel uses is named as main.)
It is required another transformation policy management to keep IPsec
and Mobile IPv6 lives separate.
Policy which lives shorter time in kernel should be a sub i.e. normally
main is for IPsec and sub is for Mobile IPv6.
(Such usage as two IPsec policies on different database can be used, too.)

Limitation or TODOs:
 - Sub policy is not supported for per socket one (it is always inserted as main).
 - Current kernel makes cached outbound with flowi to skip searching database.
   However this patch makes it disabled only when "two policies are used and
   the first matched one is bypass case" because neither flowi nor bundle
   information knows about transformation template size.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/xfrm.h   |   7 ++
 include/net/xfrm.h     |  45 ++++++--
 net/xfrm/xfrm_policy.c | 252 +++++++++++++++++++++++++++++++++++------
 3 files changed, 260 insertions(+), 44 deletions(-)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 4009f4445fa9f..492fb9818747a 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -102,6 +102,13 @@ struct xfrm_stats {
 	__u32	integrity_failed;
 };
 
+enum
+{
+	XFRM_POLICY_TYPE_MAIN	= 0,
+	XFRM_POLICY_TYPE_SUB	= 1,
+	XFRM_POLICY_TYPE_MAX	= 2
+};
+
 enum
 {
 	XFRM_POLICY_IN	= 0,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0b223eed4c9ba..4655ca25f8082 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -341,6 +341,7 @@ struct xfrm_policy
 	atomic_t		refcnt;
 	struct timer_list	timer;
 
+	u8			type;
 	u32			priority;
 	u32			index;
 	struct xfrm_selector	selector;
@@ -389,6 +390,19 @@ extern int xfrm_unregister_km(struct xfrm_mgr *km);
 
 
 extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
+#ifdef CONFIG_XFRM_SUB_POLICY
+extern struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2];
+
+static inline int xfrm_policy_lists_empty(int dir)
+{
+	return (!xfrm_policy_list[dir] && !xfrm_policy_list_sub[dir]);
+}
+#else
+static inline int xfrm_policy_lists_empty(int dir)
+{
+	return (!xfrm_policy_list[dir]);
+}
+#endif
 
 static inline void xfrm_pol_hold(struct xfrm_policy *policy)
 {
@@ -404,6 +418,20 @@ static inline void xfrm_pol_put(struct xfrm_policy *policy)
 		__xfrm_policy_destroy(policy);
 }
 
+#ifdef CONFIG_XFRM_SUB_POLICY
+static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
+{
+	int i;
+	for (i = npols - 1; i >= 0; --i)
+		xfrm_pol_put(pols[i]);
+}
+#else
+static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
+{
+	xfrm_pol_put(pols[0]);
+}
+#endif
+
 #define XFRM_DST_HSIZE		1024
 
 static __inline__
@@ -737,8 +765,8 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk
 {
 	if (sk && sk->sk_policy[XFRM_POLICY_IN])
 		return __xfrm_policy_check(sk, dir, skb, family);
-		
-	return	(!xfrm_policy_list[dir] && !skb->sp) ||
+
+	return	(xfrm_policy_lists_empty(dir) && !skb->sp) ||
 		(skb->dst->flags & DST_NOPOLICY) ||
 		__xfrm_policy_check(sk, dir, skb, family);
 }
@@ -758,7 +786,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
 
 static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
-	return	!xfrm_policy_list[XFRM_POLICY_OUT] ||
+	return	xfrm_policy_lists_empty(XFRM_POLICY_OUT) ||
 		(skb->dst->flags & DST_NOXFRM) ||
 		__xfrm_route_forward(skb, family);
 }
@@ -1023,18 +1051,19 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig
 #endif
 
 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp);
-extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *);
+extern int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
+					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete);
-struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete);
-void xfrm_policy_flush(void);
+struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete);
+void xfrm_policy_flush(u8 type);
 u32 xfrm_get_acqseq(void);
 void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
 struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
 				  xfrm_address_t *daddr, xfrm_address_t *saddr, 
 				  int create, unsigned short family);
-extern void xfrm_policy_flush(void);
+extern void xfrm_policy_flush(u8 type);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
 extern int xfrm_flush_bundles(void);
 extern void xfrm_flush_all_bundles(void);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d125a2649037c..96de6c76ed571 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -32,6 +32,24 @@ static DEFINE_RWLOCK(xfrm_policy_lock);
 
 struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
 EXPORT_SYMBOL(xfrm_policy_list);
+#ifdef CONFIG_XFRM_SUB_POLICY
+struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2];
+EXPORT_SYMBOL(xfrm_policy_list_sub);
+
+#define XFRM_POLICY_LISTS(type) \
+	((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub : \
+	 xfrm_policy_list)
+#define XFRM_POLICY_LISTHEAD(type, dir) \
+	((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub[dir] : \
+	 xfrm_policy_list[dir])
+#define XFRM_POLICY_LISTHEADP(type, dir) \
+	((type == XFRM_POLICY_TYPE_SUB) ? &xfrm_policy_list_sub[dir] : \
+	 &xfrm_policy_list[dir])
+#else
+#define XFRM_POLICY_LISTS(type)              xfrm_policy_list
+#define XFRM_POLICY_LISTHEAD(type, dif)      xfrm_policy_list[dir]
+#define XFRM_POLICY_LISTHEADP(type, dif)     &xfrm_policy_list[dir]
+#endif
 
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
@@ -397,7 +415,7 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 
 /* Generate new index... KAME seems to generate them ordered by cost
  * of an absolute inpredictability of ordering of rules. This will not pass. */
-static u32 xfrm_gen_index(int dir)
+static u32 xfrm_gen_index(u8 type, int dir)
 {
 	u32 idx;
 	struct xfrm_policy *p;
@@ -408,7 +426,7 @@ static u32 xfrm_gen_index(int dir)
 		idx_generator += 8;
 		if (idx == 0)
 			idx = 8;
-		for (p = xfrm_policy_list[dir]; p; p = p->next) {
+		for (p = XFRM_POLICY_LISTHEAD(type, dir); p; p = p->next) {
 			if (p->index == idx)
 				break;
 		}
@@ -425,7 +443,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	struct dst_entry *gc_list;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
+	for (p = XFRM_POLICY_LISTHEADP(policy->type, dir); (pol=*p)!=NULL;) {
 		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
 		    xfrm_sec_ctx_match(pol->security, policy->security)) {
 			if (excl) {
@@ -452,7 +470,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	policy->next = *p;
 	*p = policy;
 	atomic_inc(&flow_cache_genid);
-	policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
+	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
 	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
@@ -493,13 +511,14 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
+					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete)
 {
 	struct xfrm_policy *pol, **p;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
+	for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) {
 		if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
 		    (xfrm_sec_ctx_match(ctx, pol->security))) {
 			xfrm_pol_hold(pol);
@@ -518,12 +537,12 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
-struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
+struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete)
 {
 	struct xfrm_policy *pol, **p;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
+	for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) {
 		if (pol->index == id) {
 			xfrm_pol_hold(pol);
 			if (delete)
@@ -541,15 +560,16 @@ struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
 }
 EXPORT_SYMBOL(xfrm_policy_byid);
 
-void xfrm_policy_flush(void)
+void xfrm_policy_flush(u8 type)
 {
 	struct xfrm_policy *xp;
+	struct xfrm_policy **p_list = XFRM_POLICY_LISTS(type);
 	int dir;
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
-		while ((xp = xfrm_policy_list[dir]) != NULL) {
-			xfrm_policy_list[dir] = xp->next;
+		while ((xp = p_list[dir]) != NULL) {
+			p_list[dir] = xp->next;
 			write_unlock_bh(&xfrm_policy_lock);
 
 			xfrm_policy_kill(xp);
@@ -562,7 +582,7 @@ void xfrm_policy_flush(void)
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
 
-int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
+int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
 		     void *data)
 {
 	struct xfrm_policy *xp;
@@ -572,7 +592,7 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
 
 	read_lock_bh(&xfrm_policy_lock);
 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
+		for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next)
 			count++;
 	}
 
@@ -582,7 +602,7 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
 	}
 
 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
+		for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) {
 			error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
 			if (error)
 				goto out;
@@ -597,13 +617,13 @@ EXPORT_SYMBOL(xfrm_policy_walk);
 
 /* Find policy to apply to this flow. */
 
-static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
-			       void **objp, atomic_t **obj_refp)
+static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
+						     u16 family, u8 dir)
 {
 	struct xfrm_policy *pol;
 
 	read_lock_bh(&xfrm_policy_lock);
-	for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
+	for (pol = XFRM_POLICY_LISTHEAD(type, dir); pol; pol = pol->next) {
 		struct xfrm_selector *sel = &pol->selector;
 		int match;
 
@@ -620,6 +640,25 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 		}
 	}
 	read_unlock_bh(&xfrm_policy_lock);
+
+	return pol;
+}
+
+static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+			       void **objp, atomic_t **obj_refp)
+{
+	struct xfrm_policy *pol;
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
+	if (pol)
+		goto end;
+#endif
+	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+ end:
+#endif
 	if ((*objp = (void *) pol) != NULL)
 		*obj_refp = &pol->refcnt;
 }
@@ -665,8 +704,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
 
 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 {
-	pol->next = xfrm_policy_list[dir];
-	xfrm_policy_list[dir] = pol;
+	struct xfrm_policy **p_list = XFRM_POLICY_LISTS(pol->type);
+
+	pol->next = p_list[dir];
+	p_list[dir] = pol;
 	xfrm_pol_hold(pol);
 }
 
@@ -675,7 +716,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 {
 	struct xfrm_policy **polp;
 
-	for (polp = &xfrm_policy_list[dir];
+	for (polp = XFRM_POLICY_LISTHEADP(pol->type, dir);
 	     *polp != NULL; polp = &(*polp)->next) {
 		if (*polp == pol) {
 			*polp = pol->next;
@@ -704,12 +745,17 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 {
 	struct xfrm_policy *old_pol;
 
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
+		return -EINVAL;
+#endif
+
 	write_lock_bh(&xfrm_policy_lock);
 	old_pol = sk->sk_policy[dir];
 	sk->sk_policy[dir] = pol;
 	if (pol) {
 		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
-		pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
+		pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
 	}
 	if (old_pol)
@@ -738,6 +784,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
 		newp->flags = old->flags;
 		newp->xfrm_nr = old->xfrm_nr;
 		newp->index = old->index;
+		newp->type = old->type;
 		memcpy(newp->xfrm_vec, old->xfrm_vec,
 		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
 		write_lock_bh(&xfrm_policy_lock);
@@ -764,9 +811,9 @@ int __xfrm_sk_clone_policy(struct sock *sk)
 /* Resolve list of templates for the flow, given policy. */
 
 static int
-xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
-		  struct xfrm_state **xfrm,
-		  unsigned short family)
+xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
+		      struct xfrm_state **xfrm,
+		      unsigned short family)
 {
 	int nx;
 	int i, error;
@@ -809,6 +856,38 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
 	return error;
 }
 
+static int
+xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
+		  struct xfrm_state **xfrm,
+		  unsigned short family)
+{
+	int cnx = 0;
+	int error;
+	int ret;
+	int i;
+
+	for (i = 0; i < npols; i++) {
+		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
+			error = -ENOBUFS;
+			goto fail;
+		}
+		ret = xfrm_tmpl_resolve_one(pols[i], fl, &xfrm[cnx], family);
+		if (ret < 0) {
+			error = ret;
+			goto fail;
+		} else
+			cnx += ret;
+	}
+
+	return cnx;
+
+ fail:
+	for (cnx--; cnx>=0; cnx--)
+		xfrm_state_put(xfrm[cnx]);
+	return error;
+
+}
+
 /* Check that the bundle accepts the flow and its components are
  * still valid.
  */
@@ -855,6 +934,11 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 		struct sock *sk, int flags)
 {
 	struct xfrm_policy *policy;
+	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+	int npols;
+	int pol_dead;
+	int xfrm_nr;
+	int pi;
 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
 	struct dst_entry *dst, *dst_orig = *dst_p;
 	int nx = 0;
@@ -866,12 +950,18 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 restart:
 	genid = atomic_read(&flow_cache_genid);
 	policy = NULL;
+	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
+		pols[pi] = NULL;
+	npols = 0;
+	pol_dead = 0;
+	xfrm_nr = 0;
+
 	if (sk && sk->sk_policy[1])
 		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
 
 	if (!policy) {
 		/* To accelerate a bit...  */
-		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
+		if ((dst_orig->flags & DST_NOXFRM) || xfrm_policy_lists_empty(XFRM_POLICY_OUT))
 			return 0;
 
 		policy = flow_cache_lookup(fl, dst_orig->ops->family,
@@ -883,6 +973,9 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 
 	family = dst_orig->ops->family;
 	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+	pols[0] = policy;
+	npols ++;
+	xfrm_nr += pols[0]->xfrm_nr;
 
 	switch (policy->action) {
 	case XFRM_POLICY_BLOCK:
@@ -891,11 +984,13 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 		goto error;
 
 	case XFRM_POLICY_ALLOW:
+#ifndef CONFIG_XFRM_SUB_POLICY
 		if (policy->xfrm_nr == 0) {
 			/* Flow passes not transformed. */
 			xfrm_pol_put(policy);
 			return 0;
 		}
+#endif
 
 		/* Try to find matching bundle.
 		 *
@@ -911,7 +1006,36 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 		if (dst)
 			break;
 
-		nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+#ifdef CONFIG_XFRM_SUB_POLICY
+		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+			pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
+							    fl, family,
+							    XFRM_POLICY_OUT);
+			if (pols[1]) {
+				if (pols[1]->action == XFRM_POLICY_BLOCK) {
+					err = -EPERM;
+					goto error;
+				}
+				npols ++;
+				xfrm_nr += pols[1]->xfrm_nr;
+			}
+		}
+
+		/*
+		 * Because neither flowi nor bundle information knows about
+		 * transformation template size. On more than one policy usage
+		 * we can realize whether all of them is bypass or not after
+		 * they are searched. See above not-transformed bypass
+		 * is surrounded by non-sub policy configuration, too.
+		 */
+		if (xfrm_nr == 0) {
+			/* Flow passes not transformed. */
+			xfrm_pols_put(pols, npols);
+			return 0;
+		}
+
+#endif
+		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
 
 		if (unlikely(nx<0)) {
 			err = nx;
@@ -924,7 +1048,7 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 				set_current_state(TASK_RUNNING);
 				remove_wait_queue(&km_waitq, &wait);
 
-				nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
 
 				if (nx == -EAGAIN && signal_pending(current)) {
 					err = -ERESTART;
@@ -932,7 +1056,7 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 				}
 				if (nx == -EAGAIN ||
 				    genid != atomic_read(&flow_cache_genid)) {
-					xfrm_pol_put(policy);
+					xfrm_pols_put(pols, npols);
 					goto restart;
 				}
 				err = nx;
@@ -942,7 +1066,7 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 		}
 		if (nx == 0) {
 			/* Flow passes not transformed. */
-			xfrm_pol_put(policy);
+			xfrm_pols_put(pols, npols);
 			return 0;
 		}
 
@@ -956,8 +1080,14 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 			goto error;
 		}
 
+		for (pi = 0; pi < npols; pi++) {
+			read_lock_bh(&pols[pi]->lock);
+			pol_dead |= pols[pi]->dead;
+			read_unlock_bh(&pols[pi]->lock);
+		}
+
 		write_lock_bh(&policy->lock);
-		if (unlikely(policy->dead || stale_bundle(dst))) {
+		if (unlikely(pol_dead || stale_bundle(dst))) {
 			/* Wow! While we worked on resolving, this
 			 * policy has gone. Retry. It is not paranoia,
 			 * we just cannot enlist new bundle to dead object.
@@ -977,12 +1107,12 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 	}
 	*dst_p = dst;
 	dst_release(dst_orig);
-	xfrm_pol_put(policy);
+ 	xfrm_pols_put(pols, npols);
 	return 0;
 
 error:
 	dst_release(dst_orig);
-	xfrm_pol_put(policy);
+	xfrm_pols_put(pols, npols);
 	*dst_p = NULL;
 	return err;
 }
@@ -1090,6 +1220,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 			unsigned short family)
 {
 	struct xfrm_policy *pol;
+	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+	int npols = 0;
+	int xfrm_nr;
+	int pi;
 	struct flowi fl;
 	u8 fl_dir = policy_to_flow_dir(dir);
 	int xerr_idx = -1;
@@ -1128,22 +1262,50 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
 
+	pols[0] = pol;
+	npols ++;
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+		pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
+						    &fl, family,
+						    XFRM_POLICY_IN);
+		if (pols[1]) {
+			pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
+			npols ++;
+		}
+	}
+#endif
+
 	if (pol->action == XFRM_POLICY_ALLOW) {
 		struct sec_path *sp;
 		static struct sec_path dummy;
+		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
+		struct xfrm_tmpl **tpp = tp;
+		int ti = 0;
 		int i, k;
 
 		if ((sp = skb->sp) == NULL)
 			sp = &dummy;
 
+		for (pi = 0; pi < npols; pi++) {
+			if (pols[pi] != pol &&
+			    pols[pi]->action != XFRM_POLICY_ALLOW)
+				goto reject;
+			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)
+				goto reject_error;
+			for (i = 0; i < pols[pi]->xfrm_nr; i++)
+				tpp[ti++] = &pols[pi]->xfrm_vec[i];
+		}
+		xfrm_nr = ti;
+
 		/* For each tunnel xfrm, find the first matching tmpl.
 		 * For each tmpl before that, find corresponding xfrm.
 		 * Order is _important_. Later we will implement
 		 * some barriers, but at the moment barriers
 		 * are implied between each two transformations.
 		 */
-		for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
-			k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
+		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
+			k = xfrm_policy_ok(tpp[i], sp, k, family);
 			if (k < 0) {
 				if (k < -1 && xerr_idxp)
 					*xerr_idxp = -(2+k);
@@ -1154,13 +1316,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		if (secpath_has_nontransport(sp, k, xerr_idxp))
 			goto reject;
 
-		xfrm_pol_put(pol);
+		xfrm_pols_put(pols, npols);
 		return 1;
 	}
 
 reject:
 	xfrm_secpath_reject(xerr_idx, skb, &fl);
-	xfrm_pol_put(pol);
+reject_error:
+	xfrm_pols_put(pols, npols);
 	return 0;
 }
 EXPORT_SYMBOL(__xfrm_policy_check);
@@ -1246,6 +1409,23 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
 
 	read_lock_bh(&xfrm_policy_lock);
 	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
+#ifdef CONFIG_XFRM_SUB_POLICY
+		for (pol = xfrm_policy_list_sub[i]; pol; pol = pol->next) {
+			write_lock(&pol->lock);
+			dstp = &pol->bundles;
+			while ((dst=*dstp) != NULL) {
+				if (func(dst)) {
+					*dstp = dst->next;
+					dst->next = gc_list;
+					gc_list = dst;
+				} else {
+					dstp = &dst->next;
+				}
+			}
+			write_unlock(&pol->lock);
+		}
+
+#endif
 		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
 			write_lock(&pol->lock);
 			dstp = &pol->bundles;
-- 
GitLab


From 41a49cc3c02ace59d4dddae91ea211c330970ee3 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 22:48:31 -0700
Subject: [PATCH 0582/1063] [XFRM]: Add sorting interface for state and
 template.

Under two transformation policies it is required to merge them.
This is a platform to sort state for outbound and templates
for inbound respectively.
It will be used when Mobile IPv6 and IPsec are used at the same time.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     | 20 ++++++++++++++++++++
 net/xfrm/xfrm_policy.c | 16 ++++++++++++++--
 net/xfrm/xfrm_state.c  | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4655ca25f8082..d341603e4ba8e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -254,6 +254,8 @@ struct xfrm_state_afinfo {
 	struct xfrm_state	*(*find_acq)(u8 mode, u32 reqid, u8 proto, 
 					     xfrm_address_t *daddr, xfrm_address_t *saddr, 
 					     int create);
+	int			(*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
+	int			(*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
 };
 
 extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -1002,6 +1004,24 @@ extern int xfrm_state_add(struct xfrm_state *x);
 extern int xfrm_state_update(struct xfrm_state *x);
 extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family);
 extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family);
+#ifdef CONFIG_XFRM_SUB_POLICY
+extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src,
+			  int n, unsigned short family);
+extern int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src,
+			   int n, unsigned short family);
+#else
+static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src,
+				 int n, unsigned short family)
+{
+	return -ENOSYS;
+}
+
+static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src,
+				  int n, unsigned short family)
+{
+	return -ENOSYS;
+}
+#endif
 extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
 extern void xfrm_state_flush(u8 proto);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 96de6c76ed571..1732159ffd011 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -861,6 +861,8 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
 		  struct xfrm_state **xfrm,
 		  unsigned short family)
 {
+	struct xfrm_state *tp[XFRM_MAX_DEPTH];
+	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
 	int cnx = 0;
 	int error;
 	int ret;
@@ -871,7 +873,8 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
 			error = -ENOBUFS;
 			goto fail;
 		}
-		ret = xfrm_tmpl_resolve_one(pols[i], fl, &xfrm[cnx], family);
+
+		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
 		if (ret < 0) {
 			error = ret;
 			goto fail;
@@ -879,11 +882,15 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
 			cnx += ret;
 	}
 
+	/* found states are sorted for outbound processing */
+	if (npols > 1)
+		xfrm_state_sort(xfrm, tpp, cnx, family);
+
 	return cnx;
 
  fail:
 	for (cnx--; cnx>=0; cnx--)
-		xfrm_state_put(xfrm[cnx]);
+		xfrm_state_put(tpp[cnx]);
 	return error;
 
 }
@@ -1280,6 +1287,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		struct sec_path *sp;
 		static struct sec_path dummy;
 		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
+		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
 		struct xfrm_tmpl **tpp = tp;
 		int ti = 0;
 		int i, k;
@@ -1297,6 +1305,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 				tpp[ti++] = &pols[pi]->xfrm_vec[i];
 		}
 		xfrm_nr = ti;
+		if (npols > 1) {
+			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
+			tpp = stp;
+		}
 
 		/* For each tunnel xfrm, find the first matching tmpl.
 		 * For each tmpl before that, find corresponding xfrm.
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a26ef6952c304..622e92a08d0ba 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -728,6 +728,44 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
 }
 EXPORT_SYMBOL(xfrm_find_acq);
 
+#ifdef CONFIG_XFRM_SUB_POLICY
+int
+xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
+	       unsigned short family)
+{
+	int err = 0;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+	if (afinfo->tmpl_sort)
+		err = afinfo->tmpl_sort(dst, src, n);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_tmpl_sort);
+
+int
+xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
+		unsigned short family)
+{
+	int err = 0;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+	if (afinfo->state_sort)
+		err = afinfo->state_sort(dst, src, n);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_sort);
+#endif
+
 /* Silly enough, but I'm lazy to build resolution list */
 
 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
-- 
GitLab


From f7b6983f0feeefcd2a594138adcffe640593d8de Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 22:49:28 -0700
Subject: [PATCH 0583/1063] [XFRM] POLICY: Support netlink socket interface for
 sub policy.

Sub policy can be used through netlink socket.
PF_KEY uses main only and it is TODO to support sub.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h |   7 +++
 include/net/xfrm.h   |   1 +
 net/key/af_key.c     |  18 ++++--
 net/xfrm/xfrm_user.c | 134 ++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 142 insertions(+), 18 deletions(-)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 492fb9818747a..14ecd19f4cdc5 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -230,6 +230,12 @@ enum xfrm_ae_ftype_t {
 #define XFRM_AE_MAX (__XFRM_AE_MAX - 1)
 };
 
+struct xfrm_userpolicy_type {
+	__u8		type;
+	__u16		reserved1;
+	__u8		reserved2;
+};
+
 /* Netlink message attributes.  */
 enum xfrm_attr_type_t {
 	XFRMA_UNSPEC,
@@ -248,6 +254,7 @@ enum xfrm_attr_type_t {
 	XFRMA_SRCADDR,		/* xfrm_address_t */
 	XFRMA_COADDR,		/* xfrm_address_t */
 	XFRMA_LASTUSED,
+	XFRMA_POLICY_TYPE,	/* struct xfrm_userpolicy_type */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index d341603e4ba8e..c75b3287d8f8a 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -203,6 +203,7 @@ struct km_event
 		u32 proto;
 		u32 byid;
 		u32 aevent;
+		u32 type;
 	} data;
 
 	u32	seq;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 19e047b0e6786..83b443ddc72f5 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1731,7 +1731,8 @@ static u32 gen_reqid(void)
 		++reqid;
 		if (reqid == 0)
 			reqid = IPSEC_MANUAL_REQID_MAX+1;
-		if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST)
+		if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid,
+				     (void*)&reqid) != -EEXIST)
 			return reqid;
 	} while (reqid != start);
 	return 0;
@@ -2268,7 +2269,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 			return err;
 	}
 
-	xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1);
+	xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1,
+				   &sel, tmp.security, 1);
 	security_xfrm_policy_free(&tmp);
 	if (xp == NULL)
 		return -ENOENT;
@@ -2330,7 +2332,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	if (dir >= XFRM_POLICY_MAX)
 		return -EINVAL;
 
-	xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id,
+	xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id,
 			      hdr->sadb_msg_type == SADB_X_SPDDELETE2);
 	if (xp == NULL)
 		return -ENOENT;
@@ -2378,7 +2380,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
 {
 	struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk };
 
-	return xfrm_policy_walk(dump_sp, &data);
+	return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data);
 }
 
 static int key_notify_policy_flush(struct km_event *c)
@@ -2405,7 +2407,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 {
 	struct km_event c;
 
-	xfrm_policy_flush();
+	xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN);
+	c.data.type = XFRM_POLICY_TYPE_MAIN;
 	c.event = XFRM_MSG_FLUSHPOLICY;
 	c.pid = hdr->sadb_msg_pid;
 	c.seq = hdr->sadb_msg_seq;
@@ -2667,6 +2670,9 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
 
 static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
 {
+	if (xp && xp->type != XFRM_POLICY_TYPE_MAIN)
+		return 0;
+
 	switch (c->event) {
 	case XFRM_MSG_POLEXPIRE:
 		return key_notify_policy_expire(xp, c);
@@ -2675,6 +2681,8 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e
 	case XFRM_MSG_UPDPOLICY:
 		return key_notify_policy(xp, dir, c);
 	case XFRM_MSG_FLUSHPOLICY:
+		if (c->data.type != XFRM_POLICY_TYPE_MAIN)
+			break;
 		return key_notify_policy_flush(c);
 	default:
 		printk("pfkey: Unknown policy event %d\n", c->event);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7303b820bea42..c59a78d2923a5 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -786,6 +786,22 @@ static int verify_policy_dir(__u8 dir)
 	return 0;
 }
 
+static int verify_policy_type(__u8 type)
+{
+	switch (type) {
+	case XFRM_POLICY_TYPE_MAIN:
+#ifdef CONFIG_XFRM_SUB_POLICY
+	case XFRM_POLICY_TYPE_SUB:
+#endif
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	return 0;
+}
+
 static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 {
 	switch (p->share) {
@@ -879,6 +895,29 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
 	return 0;
 }
 
+static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1];
+	struct xfrm_userpolicy_type *upt;
+	__u8 type = XFRM_POLICY_TYPE_MAIN;
+	int err;
+
+	if (rt) {
+		if (rt->rta_len < sizeof(*upt))
+			return -EINVAL;
+
+		upt = RTA_DATA(rt);
+		type = upt->type;
+	}
+
+	err = verify_policy_type(type);
+	if (err)
+		return err;
+
+	*tp = type;
+	return 0;
+}
+
 static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p)
 {
 	xp->priority = p->priority;
@@ -917,16 +956,20 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
 
 	copy_from_user_policy(xp, p);
 
+	err = copy_from_user_policy_type(&xp->type, xfrma);
+	if (err)
+		goto error;
+
 	if (!(err = copy_from_user_tmpl(xp, xfrma)))
 		err = copy_from_user_sec_ctx(xp, xfrma);
-
-	if (err) {
-		*errp = err;
-		kfree(xp);
-		xp = NULL;
-	}
+	if (err)
+		goto error;
 
 	return xp;
+ error:
+	*errp = err;
+	kfree(xp);
+	return NULL;
 }
 
 static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
@@ -1037,6 +1080,29 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s
 	return 0;
 }
 
+#ifdef CONFIG_XFRM_SUB_POLICY
+static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	struct xfrm_userpolicy_type upt;
+
+	memset(&upt, 0, sizeof(upt));
+	upt.type = xp->type;
+
+	RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+#else
+static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	return 0;
+}
+#endif
+
 static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
 {
 	struct xfrm_dump_info *sp = ptr;
@@ -1060,6 +1126,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 		goto nlmsg_failure;
 	if (copy_to_user_sec_ctx(xp, skb))
 		goto nlmsg_failure;
+	if (copy_to_user_policy_type(xp, skb) < 0)
+		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
 out:
@@ -1081,7 +1149,10 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
 	info.nlmsg_flags = NLM_F_MULTI;
 	info.this_idx = 0;
 	info.start_idx = cb->args[0];
-	(void) xfrm_policy_walk(dump_one_policy, &info);
+	(void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info);
+#ifdef CONFIG_XFRM_SUB_POLICY
+	(void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info);
+#endif
 	cb->args[0] = info.this_idx;
 
 	return skb->len;
@@ -1117,6 +1188,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 {
 	struct xfrm_policy *xp;
 	struct xfrm_userpolicy_id *p;
+	__u8 type = XFRM_POLICY_TYPE_MAIN;
 	int err;
 	struct km_event c;
 	int delete;
@@ -1124,12 +1196,16 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 	p = NLMSG_DATA(nlh);
 	delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
 
+	err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+	if (err)
+		return err;
+
 	err = verify_policy_dir(p->dir);
 	if (err)
 		return err;
 
 	if (p->index)
-		xp = xfrm_policy_byid(p->dir, p->index, delete);
+		xp = xfrm_policy_byid(type, p->dir, p->index, delete);
 	else {
 		struct rtattr **rtattrs = (struct rtattr **)xfrma;
 		struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
@@ -1146,7 +1222,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 			if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete);
+		xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete);
 		security_xfrm_policy_free(&tmp);
 	}
 	if (xp == NULL)
@@ -1329,9 +1405,16 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 
 static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 {
-struct km_event c;
+	struct km_event c;
+	__u8 type = XFRM_POLICY_TYPE_MAIN;
+	int err;
+
+	err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+	if (err)
+		return err;
 
-	xfrm_policy_flush();
+	xfrm_policy_flush(type);
+	c.data.type = type;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
 	c.pid = nlh->nlmsg_pid;
@@ -1344,10 +1427,15 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void *
 	struct xfrm_policy *xp;
 	struct xfrm_user_polexpire *up = NLMSG_DATA(nlh);
 	struct xfrm_userpolicy_info *p = &up->pol;
+	__u8 type = XFRM_POLICY_TYPE_MAIN;
 	int err = -ENOENT;
 
+	err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+	if (err)
+		return err;
+
 	if (p->index)
-		xp = xfrm_policy_byid(p->dir, p->index, 0);
+		xp = xfrm_policy_byid(type, p->dir, p->index, 0);
 	else {
 		struct rtattr **rtattrs = (struct rtattr **)xfrma;
 		struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
@@ -1364,7 +1452,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void *
 			if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, 0);
+		xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, 0);
 		security_xfrm_policy_free(&tmp);
 	}
 
@@ -1818,6 +1906,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 		goto nlmsg_failure;
 	if (copy_to_user_state_sec_ctx(x, skb))
 		goto nlmsg_failure;
+	if (copy_to_user_policy_type(xp, skb) < 0)
+		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
 	return skb->len;
@@ -1898,6 +1988,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
 	}
 
 	copy_from_user_policy(xp, p);
+	xp->type = XFRM_POLICY_TYPE_MAIN;
 	copy_templates(xp, ut, nr);
 
 	if (!xp->security) {
@@ -1931,6 +2022,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 		goto nlmsg_failure;
 	if (copy_to_user_sec_ctx(xp, skb))
 		goto nlmsg_failure;
+	if (copy_to_user_policy_type(xp, skb) < 0)
+		goto nlmsg_failure;
 	upe->hard = !!hard;
 
 	nlh->nlmsg_len = skb->tail - b;
@@ -2002,6 +2095,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
 	copy_to_user_policy(xp, p, dir);
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
+	if (copy_to_user_policy_type(xp, skb) < 0)
+		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
 
@@ -2019,6 +2114,9 @@ static int xfrm_notify_policy_flush(struct km_event *c)
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
 	unsigned char *b;
+#ifdef CONFIG_XFRM_SUB_POLICY
+	struct xfrm_userpolicy_type upt;
+#endif
 	int len = NLMSG_LENGTH(0);
 
 	skb = alloc_skb(len, GFP_ATOMIC);
@@ -2028,6 +2126,13 @@ static int xfrm_notify_policy_flush(struct km_event *c)
 
 
 	nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0);
+	nlh->nlmsg_flags = 0;
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+	memset(&upt, 0, sizeof(upt));
+	upt.type = c->data.type;
+	RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
+#endif
 
 	nlh->nlmsg_len = skb->tail - b;
 
@@ -2035,6 +2140,9 @@ static int xfrm_notify_policy_flush(struct km_event *c)
 	return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
 
 nlmsg_failure:
+#ifdef CONFIG_XFRM_SUB_POLICY
+rtattr_failure:
+#endif
 	kfree_skb(skb);
 	return -1;
 }
-- 
GitLab


From 58c949d1b9551f3e4ba9dde4aeda341ecf5e42b5 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 22:51:02 -0700
Subject: [PATCH 0584/1063] [XFRM] IPV6: Add sort functions to combine
 templates/states for IPsec.

Add sort functions to combine templates/states for IPsec.
Think of outbound transformation order we should be careful with transport AH
which must be the last of all transport ones.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_state.c | 97 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)

diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 9c95b9d3e1108..e0b8f3c5caa24 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -156,12 +156,109 @@ __xfrm6_find_acq(u8 mode, u32 reqid, u8 proto,
 	return x0;
 }
 
+static int
+__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
+{
+	int i;
+	int j = 0;
+
+	/* Rule 1: select IPsec transport except AH */
+	for (i = 0; i < n; i++) {
+		if (src[i]->props.mode == XFRM_MODE_TRANSPORT &&
+		    src[i]->id.proto != IPPROTO_AH) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+	if (j == n)
+		goto end;
+
+	/* XXX: Rule 2: select MIPv6 RO or inbound trigger */
+
+	/* Rule 3: select IPsec transport AH */
+	for (i = 0; i < n; i++) {
+		if (src[i] &&
+		    src[i]->props.mode == XFRM_MODE_TRANSPORT &&
+		    src[i]->id.proto == IPPROTO_AH) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+	if (j == n)
+		goto end;
+
+	/* Rule 4: select IPsec tunnel */
+	for (i = 0; i < n; i++) {
+		if (src[i] &&
+		    src[i]->props.mode == XFRM_MODE_TUNNEL) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+	if (likely(j == n))
+		goto end;
+
+	/* Final rule */
+	for (i = 0; i < n; i++) {
+		if (src[i]) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+
+ end:
+	return 0;
+}
+
+static int
+__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
+{
+	int i;
+	int j = 0;
+
+	/* Rule 1: select IPsec transport */
+	for (i = 0; i < n; i++) {
+		if (src[i]->mode == XFRM_MODE_TRANSPORT) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+	if (j == n)
+		goto end;
+
+	/* XXX: Rule 2: select MIPv6 RO or inbound trigger */
+
+	/* Rule 3: select IPsec tunnel */
+	for (i = 0; i < n; i++) {
+		if (src[i] &&
+		    src[i]->mode == XFRM_MODE_TUNNEL) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+	if (likely(j == n))
+		goto end;
+
+	/* Final rule */
+	for (i = 0; i < n; i++) {
+		if (src[i]) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+
+ end:
+	return 0;
+}
+
 static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.state_lookup		= __xfrm6_state_lookup,
 	.state_lookup_byaddr	= __xfrm6_state_lookup_byaddr,
 	.find_acq		= __xfrm6_find_acq,
+	.tmpl_sort		= __xfrm6_tmpl_sort,
+	.state_sort		= __xfrm6_state_sort,
 };
 
 void __init xfrm6_state_init(void)
-- 
GitLab


From 64d9fdda8e1bdf416b2d9203c3ad9c249ea301be Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 22:54:07 -0700
Subject: [PATCH 0585/1063] [XFRM] IPV6: Support Mobile IPv6 extension headers
 sorting.

Support Mobile IPv6 extension headers sorting for two transformation policies.
Mobile IPv6 extension headers should be placed after IPsec
transport mode, but before transport AH when outbound.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_state.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index e0b8f3c5caa24..6269584e610e4 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -173,7 +173,19 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
 	if (j == n)
 		goto end;
 
-	/* XXX: Rule 2: select MIPv6 RO or inbound trigger */
+	/* Rule 2: select MIPv6 RO or inbound trigger */
+#ifdef CONFIG_IPV6_MIP6
+	for (i = 0; i < n; i++) {
+		if (src[i] &&
+		    (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION ||
+		     src[i]->props.mode == XFRM_MODE_IN_TRIGGER)) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+	if (j == n)
+		goto end;
+#endif
 
 	/* Rule 3: select IPsec transport AH */
 	for (i = 0; i < n; i++) {
@@ -226,7 +238,19 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
 	if (j == n)
 		goto end;
 
-	/* XXX: Rule 2: select MIPv6 RO or inbound trigger */
+	/* Rule 2: select MIPv6 RO or inbound trigger */
+#ifdef CONFIG_IPV6_MIP6
+	for (i = 0; i < n; i++) {
+		if (src[i] &&
+		    (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION ||
+		     src[i]->mode == XFRM_MODE_IN_TRIGGER)) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+	if (j == n)
+		goto end;
+#endif
 
 	/* Rule 3: select IPsec tunnel */
 	for (i = 0; i < n; i++) {
-- 
GitLab


From 2770834c9f44afd1bfa13914c7285470775af657 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 00:13:10 -0700
Subject: [PATCH 0586/1063] [XFRM]: Pull xfrm_state_bydst hash table knowledge
 out of afinfo.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  16 ------
 net/ipv4/xfrm4_state.c |  53 --------------------
 net/ipv6/xfrm6_state.c |  56 ---------------------
 net/xfrm/xfrm_state.c  | 110 +++++++++++++++++++++++++++++++++++++----
 4 files changed, 100 insertions(+), 135 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c75b3287d8f8a..cc83443f301ed 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -243,7 +243,6 @@ extern int __xfrm_state_delete(struct xfrm_state *x);
 
 struct xfrm_state_afinfo {
 	unsigned short		family;
-	struct list_head	*state_bydst;
 	struct list_head	*state_bysrc;
 	struct list_head	*state_byspi;
 	int			(*init_flags)(struct xfrm_state *x);
@@ -252,9 +251,6 @@ struct xfrm_state_afinfo {
 						xfrm_address_t *daddr, xfrm_address_t *saddr);
 	struct xfrm_state	*(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto);
 	struct xfrm_state	*(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto);
-	struct xfrm_state	*(*find_acq)(u8 mode, u32 reqid, u8 proto, 
-					     xfrm_address_t *daddr, xfrm_address_t *saddr, 
-					     int create);
 	int			(*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
 	int			(*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
 };
@@ -455,18 +451,6 @@ unsigned __xfrm6_dst_hash(xfrm_address_t *addr)
 	return h;
 }
 
-static __inline__
-unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
-{
-	switch (family) {
-	case AF_INET:
-		return __xfrm4_dst_hash(addr);
-	case AF_INET6:
-		return __xfrm6_dst_hash(addr);
-	}
-	return 0;
-}
-
 static __inline__
 unsigned __xfrm4_src_hash(xfrm_address_t *addr)
 {
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 616be131b4e36..9dc1afc17b6db 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -88,65 +88,12 @@ __xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	return NULL;
 }
 
-static struct xfrm_state *
-__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, 
-		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
-		 int create)
-{
-	struct xfrm_state *x, *x0;
-	unsigned h = __xfrm4_dst_hash(daddr);
-
-	x0 = NULL;
-
-	list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) {
-		if (x->props.family == AF_INET &&
-		    daddr->a4 == x->id.daddr.a4 &&
-		    mode == x->props.mode &&
-		    proto == x->id.proto &&
-		    saddr->a4 == x->props.saddr.a4 &&
-		    reqid == x->props.reqid &&
-		    x->km.state == XFRM_STATE_ACQ &&
-		    !x->id.spi) {
-			    x0 = x;
-			    break;
-		    }
-	}
-	if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
-		x0->sel.daddr.a4 = daddr->a4;
-		x0->sel.saddr.a4 = saddr->a4;
-		x0->sel.prefixlen_d = 32;
-		x0->sel.prefixlen_s = 32;
-		x0->props.saddr.a4 = saddr->a4;
-		x0->km.state = XFRM_STATE_ACQ;
-		x0->id.daddr.a4 = daddr->a4;
-		x0->id.proto = proto;
-		x0->props.family = AF_INET;
-		x0->props.mode = mode;
-		x0->props.reqid = reqid;
-		x0->props.family = AF_INET;
-		x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
-		xfrm_state_hold(x0);
-		x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
-		add_timer(&x0->timer);
-		xfrm_state_hold(x0);
-		list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h);
-		h = __xfrm4_src_hash(saddr);
-		xfrm_state_hold(x0);
-		list_add_tail(&x0->bysrc, xfrm4_state_afinfo.state_bysrc+h);
-		wake_up(&km_waitq);
-	}
-	if (x0)
-		xfrm_state_hold(x0);
-	return x0;
-}
-
 static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
 	.state_lookup		= __xfrm4_state_lookup,
 	.state_lookup_byaddr	= __xfrm4_state_lookup_byaddr,
-	.find_acq		= __xfrm4_find_acq,
 };
 
 void __init xfrm4_state_init(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 6269584e610e4..40fcaab7e0288 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -101,61 +101,6 @@ __xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
 	return NULL;
 }
 
-static struct xfrm_state *
-__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, 
-		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
-		 int create)
-{
-	struct xfrm_state *x, *x0;
-	unsigned h = __xfrm6_dst_hash(daddr);
-
-	x0 = NULL;
-
-	list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) {
-		if (x->props.family == AF_INET6 &&
-		    ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
-		    mode == x->props.mode &&
-		    proto == x->id.proto &&
-		    ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) &&
-		    reqid == x->props.reqid &&
-		    x->km.state == XFRM_STATE_ACQ &&
-		    !x->id.spi) {
-			    x0 = x;
-			    break;
-		    }
-	}
-	if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
-		ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6,
-			       (struct in6_addr *)daddr);
-		ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6,
-			       (struct in6_addr *)saddr);
-		x0->sel.prefixlen_d = 128;
-		x0->sel.prefixlen_s = 128;
-		ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6,
-			       (struct in6_addr *)saddr);
-		x0->km.state = XFRM_STATE_ACQ;
-		ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6,
-			       (struct in6_addr *)daddr);
-		x0->id.proto = proto;
-		x0->props.family = AF_INET6;
-		x0->props.mode = mode;
-		x0->props.reqid = reqid;
-		x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
-		xfrm_state_hold(x0);
-		x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
-		add_timer(&x0->timer);
-		xfrm_state_hold(x0);
-		list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h);
-		h = __xfrm6_src_hash(saddr);
-		xfrm_state_hold(x0);
-		list_add_tail(&x0->bysrc, xfrm6_state_afinfo.state_bysrc+h);
-		wake_up(&km_waitq);
-	}
-	if (x0)
-		xfrm_state_hold(x0);
-	return x0;
-}
-
 static int
 __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
 {
@@ -280,7 +225,6 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.state_lookup		= __xfrm6_state_lookup,
 	.state_lookup_byaddr	= __xfrm6_state_lookup_byaddr,
-	.find_acq		= __xfrm6_find_acq,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
 };
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 622e92a08d0ba..80f5f9dc2b9e1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -48,6 +48,18 @@ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
 static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE];
 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
 
+static __inline__
+unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_dst_hash(addr);
+	case AF_INET6:
+		return __xfrm6_dst_hash(addr);
+	}
+	return 0;
+}
+
 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
 EXPORT_SYMBOL(km_waitq);
 
@@ -489,6 +501,89 @@ void xfrm_state_insert(struct xfrm_state *x)
 }
 EXPORT_SYMBOL(xfrm_state_insert);
 
+/* xfrm_state_lock is held */
+static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
+{
+	unsigned int h = xfrm_dst_hash(daddr, family);
+	struct xfrm_state *x;
+
+	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
+		if (x->props.reqid  != reqid ||
+		    x->props.mode   != mode ||
+		    x->props.family != family ||
+		    x->km.state     != XFRM_STATE_ACQ ||
+		    x->id.spi       != 0)
+			continue;
+
+		switch (family) {
+		case AF_INET:
+			if (x->id.daddr.a4    != daddr->a4 ||
+			    x->props.saddr.a4 != saddr->a4)
+				continue;
+			break;
+		case AF_INET6:
+			if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
+					     (struct in6_addr *)daddr) ||
+			    !ipv6_addr_equal((struct in6_addr *)
+					     x->props.saddr.a6,
+					     (struct in6_addr *)saddr))
+				continue;
+			break;
+		};
+
+		xfrm_state_hold(x);
+		return x;
+	}
+
+	if (!create)
+		return NULL;
+
+	x = xfrm_state_alloc();
+	if (likely(x)) {
+		switch (family) {
+		case AF_INET:
+			x->sel.daddr.a4 = daddr->a4;
+			x->sel.saddr.a4 = saddr->a4;
+			x->sel.prefixlen_d = 32;
+			x->sel.prefixlen_s = 32;
+			x->props.saddr.a4 = saddr->a4;
+			x->id.daddr.a4 = daddr->a4;
+			break;
+
+		case AF_INET6:
+			ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
+				       (struct in6_addr *)daddr);
+			ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
+				       (struct in6_addr *)saddr);
+			x->sel.prefixlen_d = 128;
+			x->sel.prefixlen_s = 128;
+			ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
+				       (struct in6_addr *)saddr);
+			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
+				       (struct in6_addr *)daddr);
+			break;
+		};
+
+		x->km.state = XFRM_STATE_ACQ;
+		x->id.proto = proto;
+		x->props.family = family;
+		x->props.mode = mode;
+		x->props.reqid = reqid;
+		x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
+		xfrm_state_hold(x);
+		x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
+		add_timer(&x->timer);
+		xfrm_state_hold(x);
+		list_add_tail(&x->bydst, xfrm_state_bydst+h);
+		h = xfrm_src_hash(saddr, family);
+		xfrm_state_hold(x);
+		list_add_tail(&x->bysrc, xfrm_state_bysrc+h);
+		wake_up(&km_waitq);
+	}
+
+	return x;
+}
+
 static inline struct xfrm_state *
 __xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x,
 		    int use_spi)
@@ -533,9 +628,9 @@ int xfrm_state_add(struct xfrm_state *x)
 	}
 
 	if (use_spi && !x1)
-		x1 = afinfo->find_acq(
-			x->props.mode, x->props.reqid, x->id.proto,
-			&x->id.daddr, &x->props.saddr, 0);
+		x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
+				     x->id.proto,
+				     &x->id.daddr, &x->props.saddr, 0);
 
 	__xfrm_state_insert(x);
 	err = 0;
@@ -716,14 +811,11 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
 	      int create, unsigned short family)
 {
 	struct xfrm_state *x;
-	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	if (!afinfo)
-		return NULL;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
+	x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
 	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
+
 	return x;
 }
 EXPORT_SYMBOL(xfrm_find_acq);
@@ -1181,7 +1273,6 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
 	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
 		err = -ENOBUFS;
 	else {
-		afinfo->state_bydst = xfrm_state_bydst;
 		afinfo->state_bysrc = xfrm_state_bysrc;
 		afinfo->state_byspi = xfrm_state_byspi;
 		xfrm_state_afinfo[afinfo->family] = afinfo;
@@ -1206,7 +1297,6 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
 			xfrm_state_afinfo[afinfo->family] = NULL;
 			afinfo->state_byspi = NULL;
 			afinfo->state_bysrc = NULL;
-			afinfo->state_bydst = NULL;
 		}
 	}
 	write_unlock_bh(&xfrm_state_afinfo_lock);
-- 
GitLab


From edcd582152090bfb0ccb4ad444c151798a73eda8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 00:42:45 -0700
Subject: [PATCH 0587/1063] [XFRM]: Pull xfrm_state_by{spi,src} hash table
 knowledge out of afinfo.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  78 ---------------
 net/ipv4/xfrm4_state.c |  28 ------
 net/ipv6/xfrm6_state.c |  40 --------
 net/xfrm/xfrm_state.c  | 210 +++++++++++++++++++++++++++++++----------
 4 files changed, 159 insertions(+), 197 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index cc83443f301ed..dd3b84b9c04e8 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -243,14 +243,10 @@ extern int __xfrm_state_delete(struct xfrm_state *x);
 
 struct xfrm_state_afinfo {
 	unsigned short		family;
-	struct list_head	*state_bysrc;
-	struct list_head	*state_byspi;
 	int			(*init_flags)(struct xfrm_state *x);
 	void			(*init_tempsel)(struct xfrm_state *x, struct flowi *fl,
 						struct xfrm_tmpl *tmpl,
 						xfrm_address_t *daddr, xfrm_address_t *saddr);
-	struct xfrm_state	*(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto);
-	struct xfrm_state	*(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto);
 	int			(*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
 	int			(*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
 };
@@ -431,80 +427,6 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
 }
 #endif
 
-#define XFRM_DST_HSIZE		1024
-
-static __inline__
-unsigned __xfrm4_dst_hash(xfrm_address_t *addr)
-{
-	unsigned h;
-	h = ntohl(addr->a4);
-	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
-	return h;
-}
-
-static __inline__
-unsigned __xfrm6_dst_hash(xfrm_address_t *addr)
-{
-	unsigned h;
-	h = ntohl(addr->a6[2]^addr->a6[3]);
-	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
-	return h;
-}
-
-static __inline__
-unsigned __xfrm4_src_hash(xfrm_address_t *addr)
-{
-	return __xfrm4_dst_hash(addr);
-}
-
-static __inline__
-unsigned __xfrm6_src_hash(xfrm_address_t *addr)
-{
-	return __xfrm6_dst_hash(addr);
-}
-
-static __inline__
-unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
-{
-	switch (family) {
-	case AF_INET:
-		return __xfrm4_src_hash(addr);
-	case AF_INET6:
-		return __xfrm6_src_hash(addr);
-	}
-	return 0;
-}
-
-static __inline__
-unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
-{
-	unsigned h;
-	h = ntohl(addr->a4^spi^proto);
-	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
-	return h;
-}
-
-static __inline__
-unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
-{
-	unsigned h;
-	h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
-	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
-	return h;
-}
-
-static __inline__
-unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
-{
-	switch (family) {
-	case AF_INET:
-		return __xfrm4_spi_hash(addr, spi, proto);
-	case AF_INET6:
-		return __xfrm6_spi_hash(addr, spi, proto);
-	}
-	return 0;	/*XXX*/
-}
-
 extern void __xfrm_state_destroy(struct xfrm_state *);
 
 static inline void __xfrm_state_put(struct xfrm_state *x)
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 9dc1afc17b6db..6a2a4ab42772a 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -62,38 +62,10 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->props.family = AF_INET;
 }
 
-static struct xfrm_state *
-__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
-{
-	unsigned h = __xfrm4_spi_hash(daddr, spi, proto);
-	struct xfrm_state *x;
-
-	list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) {
-		if (x->props.family == AF_INET &&
-		    spi == x->id.spi &&
-		    daddr->a4 == x->id.daddr.a4 &&
-		    proto == x->id.proto) {
-			xfrm_state_hold(x);
-			return x;
-		}
-	}
-	return NULL;
-}
-
-/* placeholder until ipv4's code is written */
-static struct xfrm_state *
-__xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
-			    u8 proto)
-{
-	return NULL;
-}
-
 static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
-	.state_lookup		= __xfrm4_state_lookup,
-	.state_lookup_byaddr	= __xfrm4_state_lookup_byaddr,
 };
 
 void __init xfrm4_state_init(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 40fcaab7e0288..d88cd92c864e6 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -63,44 +63,6 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->props.family = AF_INET6;
 }
 
-static struct xfrm_state *
-__xfrm6_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
-			    u8 proto)
-{
-	struct xfrm_state *x = NULL;
-	unsigned h;
-
-	h = __xfrm6_src_hash(saddr);
-	list_for_each_entry(x, xfrm6_state_afinfo.state_bysrc+h, bysrc) {
-		if (x->props.family == AF_INET6 &&
-		    ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
-		    ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) &&
-		    proto == x->id.proto) {
-			xfrm_state_hold(x);
-			return x;
-		}
-	}
-	return NULL;
-}
-
-static struct xfrm_state *
-__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
-{
-	unsigned h = __xfrm6_spi_hash(daddr, spi, proto);
-	struct xfrm_state *x;
-
-	list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) {
-		if (x->props.family == AF_INET6 &&
-		    spi == x->id.spi &&
-		    ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
-		    proto == x->id.proto) {
-			xfrm_state_hold(x);
-			return x;
-		}
-	}
-	return NULL;
-}
-
 static int
 __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
 {
@@ -223,8 +185,6 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
 static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
 	.init_tempsel		= __xfrm6_init_tempsel,
-	.state_lookup		= __xfrm6_state_lookup,
-	.state_lookup_byaddr	= __xfrm6_state_lookup_byaddr,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
 };
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 80f5f9dc2b9e1..4a3832f81c374 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -38,6 +38,8 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
 
 static DEFINE_SPINLOCK(xfrm_state_lock);
 
+#define XFRM_DST_HSIZE		1024
+
 /* Hash table to find appropriate SA towards given target (endpoint
  * of tunnel or destination of transport mode) allowed by selector.
  *
@@ -48,6 +50,48 @@ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
 static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE];
 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
 
+static __inline__
+unsigned __xfrm4_dst_hash(xfrm_address_t *addr)
+{
+	unsigned h;
+	h = ntohl(addr->a4);
+	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
+	return h;
+}
+
+static __inline__
+unsigned __xfrm6_dst_hash(xfrm_address_t *addr)
+{
+	unsigned h;
+	h = ntohl(addr->a6[2]^addr->a6[3]);
+	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
+	return h;
+}
+
+static __inline__
+unsigned __xfrm4_src_hash(xfrm_address_t *addr)
+{
+	return __xfrm4_dst_hash(addr);
+}
+
+static __inline__
+unsigned __xfrm6_src_hash(xfrm_address_t *addr)
+{
+	return __xfrm6_dst_hash(addr);
+}
+
+static __inline__
+unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_src_hash(addr);
+	case AF_INET6:
+		return __xfrm6_src_hash(addr);
+	}
+	return 0;
+}
+
 static __inline__
 unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
 {
@@ -60,6 +104,36 @@ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
 	return 0;
 }
 
+static __inline__
+unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
+{
+	unsigned h;
+	h = ntohl(addr->a4^spi^proto);
+	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
+	return h;
+}
+
+static __inline__
+unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
+{
+	unsigned h;
+	h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
+	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
+	return h;
+}
+
+static __inline__
+unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_spi_hash(addr, spi, proto);
+	case AF_INET6:
+		return __xfrm6_spi_hash(addr, spi, proto);
+	}
+	return 0;	/*XXX*/
+}
+
 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
 EXPORT_SYMBOL(km_waitq);
 
@@ -342,6 +416,83 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	return 0;
 }
 
+static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
+{
+	unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
+	struct xfrm_state *x;
+
+	list_for_each_entry(x, xfrm_state_byspi+h, byspi) {
+		if (x->props.family != family ||
+		    x->id.spi       != spi ||
+		    x->id.proto     != proto)
+			continue;
+
+		switch (family) {
+		case AF_INET:
+			if (x->id.daddr.a4 != daddr->a4)
+				continue;
+			break;
+		case AF_INET6:
+			if (!ipv6_addr_equal((struct in6_addr *)daddr,
+					     (struct in6_addr *)
+					     x->id.daddr.a6))
+				continue;
+			break;
+		};
+
+		xfrm_state_hold(x);
+		return x;
+	}
+
+	return NULL;
+}
+
+static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
+{
+	unsigned int h = xfrm_src_hash(saddr, family);
+	struct xfrm_state *x;
+
+	list_for_each_entry(x, xfrm_state_bysrc+h, bysrc) {
+		if (x->props.family != family ||
+		    x->id.proto     != proto)
+			continue;
+
+		switch (family) {
+		case AF_INET:
+			if (x->id.daddr.a4 != daddr->a4 ||
+			    x->props.saddr.a4 != saddr->a4)
+				continue;
+			break;
+		case AF_INET6:
+			if (!ipv6_addr_equal((struct in6_addr *)daddr,
+					     (struct in6_addr *)
+					     x->id.daddr.a6) ||
+			    !ipv6_addr_equal((struct in6_addr *)saddr,
+					     (struct in6_addr *)
+					     x->props.saddr.a6))
+				continue;
+			break;
+		};
+
+		xfrm_state_hold(x);
+		return x;
+	}
+
+	return NULL;
+}
+
+static inline struct xfrm_state *
+__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
+{
+	if (use_spi)
+		return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
+					   x->id.proto, family);
+	else
+		return __xfrm_state_lookup_byaddr(&x->id.daddr,
+						  &x->props.saddr,
+						  x->id.proto, family);
+}
+
 struct xfrm_state *
 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
 		struct flowi *fl, struct xfrm_tmpl *tmpl,
@@ -353,14 +504,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	int acquire_in_progress = 0;
 	int error = 0;
 	struct xfrm_state *best = NULL;
-	struct xfrm_state_afinfo *afinfo;
 	
-	afinfo = xfrm_state_get_afinfo(family);
-	if (afinfo == NULL) {
-		*err = -EAFNOSUPPORT;
-		return NULL;
-	}
-
 	spin_lock_bh(&xfrm_state_lock);
 	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
 		if (x->props.family == family &&
@@ -406,8 +550,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	x = best;
 	if (!x && !error && !acquire_in_progress) {
 		if (tmpl->id.spi &&
-		    (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
-		                               tmpl->id.proto)) != NULL) {
+		    (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
+					      tmpl->id.proto, family)) != NULL) {
 			xfrm_state_put(x0);
 			error = -EEXIST;
 			goto out;
@@ -457,7 +601,6 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	else
 		*err = acquire_in_progress ? -EAGAIN : error;
 	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
 	return x;
 }
 
@@ -584,34 +727,20 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 	return x;
 }
 
-static inline struct xfrm_state *
-__xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x,
-		    int use_spi)
-{
-	if (use_spi)
-		return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
-	else
-		return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto);
-}
-
 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
 
 int xfrm_state_add(struct xfrm_state *x)
 {
-	struct xfrm_state_afinfo *afinfo;
 	struct xfrm_state *x1;
 	int family;
 	int err;
 	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
 
 	family = x->props.family;
-	afinfo = xfrm_state_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
 
 	spin_lock_bh(&xfrm_state_lock);
 
-	x1 = __xfrm_state_locate(afinfo, x, use_spi);
+	x1 = __xfrm_state_locate(x, use_spi, family);
 	if (x1) {
 		xfrm_state_put(x1);
 		x1 = NULL;
@@ -637,7 +766,6 @@ int xfrm_state_add(struct xfrm_state *x)
 
 out:
 	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
 
 	if (!err)
 		xfrm_flush_all_bundles();
@@ -653,17 +781,12 @@ EXPORT_SYMBOL(xfrm_state_add);
 
 int xfrm_state_update(struct xfrm_state *x)
 {
-	struct xfrm_state_afinfo *afinfo;
 	struct xfrm_state *x1;
 	int err;
 	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
 
-	afinfo = xfrm_state_get_afinfo(x->props.family);
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
-
 	spin_lock_bh(&xfrm_state_lock);
-	x1 = __xfrm_state_locate(afinfo, x, use_spi);
+	x1 = __xfrm_state_locate(x, use_spi, x->props.family);
 
 	err = -ESRCH;
 	if (!x1)
@@ -683,7 +806,6 @@ int xfrm_state_update(struct xfrm_state *x)
 
 out:
 	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
 
 	if (err)
 		return err;
@@ -776,14 +898,10 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
 		  unsigned short family)
 {
 	struct xfrm_state *x;
-	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	if (!afinfo)
-		return NULL;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = afinfo->state_lookup(daddr, spi, proto);
+	x = __xfrm_state_lookup(daddr, spi, proto, family);
 	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
 	return x;
 }
 EXPORT_SYMBOL(xfrm_state_lookup);
@@ -793,14 +911,10 @@ xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			 u8 proto, unsigned short family)
 {
 	struct xfrm_state *x;
-	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	if (!afinfo)
-		return NULL;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = afinfo->state_lookup_byaddr(daddr, saddr, proto);
+	x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
 	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
 	return x;
 }
 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
@@ -1272,11 +1386,8 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
 	write_lock_bh(&xfrm_state_afinfo_lock);
 	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
 		err = -ENOBUFS;
-	else {
-		afinfo->state_bysrc = xfrm_state_bysrc;
-		afinfo->state_byspi = xfrm_state_byspi;
+	else
 		xfrm_state_afinfo[afinfo->family] = afinfo;
-	}
 	write_unlock_bh(&xfrm_state_afinfo_lock);
 	return err;
 }
@@ -1293,11 +1404,8 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
 	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
 		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
 			err = -EINVAL;
-		else {
+		else
 			xfrm_state_afinfo[afinfo->family] = NULL;
-			afinfo->state_byspi = NULL;
-			afinfo->state_bysrc = NULL;
-		}
 	}
 	write_unlock_bh(&xfrm_state_afinfo_lock);
 	return err;
-- 
GitLab


From 8f126e37c0b250310a48a609bedf92a19a5559ec Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 02:45:07 -0700
Subject: [PATCH 0588/1063] [XFRM]: Convert xfrm_state hash linkage to hlists.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h    |  6 +--
 net/xfrm/xfrm_state.c | 92 ++++++++++++++++++++++++-------------------
 2 files changed, 54 insertions(+), 44 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dd3b84b9c04e8..3405e5d9d51c2 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -94,9 +94,9 @@ extern struct mutex xfrm_cfg_mutex;
 struct xfrm_state
 {
 	/* Note: bydst is re-used during gc */
-	struct list_head	bydst;
-	struct list_head	bysrc;
-	struct list_head	byspi;
+	struct hlist_node	bydst;
+	struct hlist_node	bysrc;
+	struct hlist_node	byspi;
 
 	atomic_t		refcnt;
 	spinlock_t		lock;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 4a3832f81c374..fe3c8c38d5e12 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -46,9 +46,9 @@ static DEFINE_SPINLOCK(xfrm_state_lock);
  * Main use is finding SA after policy selected tunnel or transport mode.
  * Also, it can be used by ah/esp icmp error handler to find offending SA.
  */
-static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
-static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE];
-static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
+static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE];
+static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE];
+static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE];
 
 static __inline__
 unsigned __xfrm4_dst_hash(xfrm_address_t *addr)
@@ -141,7 +141,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
 
 static struct work_struct xfrm_state_gc_work;
-static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
+static HLIST_HEAD(xfrm_state_gc_list);
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
 static int xfrm_state_gc_flush_bundles;
@@ -178,8 +178,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 static void xfrm_state_gc_task(void *data)
 {
 	struct xfrm_state *x;
-	struct list_head *entry, *tmp;
-	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+	struct hlist_node *entry, *tmp;
+	struct hlist_head gc_list;
 
 	if (xfrm_state_gc_flush_bundles) {
 		xfrm_state_gc_flush_bundles = 0;
@@ -187,13 +187,13 @@ static void xfrm_state_gc_task(void *data)
 	}
 
 	spin_lock_bh(&xfrm_state_gc_lock);
-	list_splice_init(&xfrm_state_gc_list, &gc_list);
+	gc_list.first = xfrm_state_gc_list.first;
+	INIT_HLIST_HEAD(&xfrm_state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
-	list_for_each_safe(entry, tmp, &gc_list) {
-		x = list_entry(entry, struct xfrm_state, bydst);
+	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
 		xfrm_state_gc_destroy(x);
-	}
+
 	wake_up(&km_waitq);
 }
 
@@ -287,9 +287,9 @@ struct xfrm_state *xfrm_state_alloc(void)
 	if (x) {
 		atomic_set(&x->refcnt, 1);
 		atomic_set(&x->tunnel_users, 0);
-		INIT_LIST_HEAD(&x->bydst);
-		INIT_LIST_HEAD(&x->bysrc);
-		INIT_LIST_HEAD(&x->byspi);
+		INIT_HLIST_NODE(&x->bydst);
+		INIT_HLIST_NODE(&x->bysrc);
+		INIT_HLIST_NODE(&x->byspi);
 		init_timer(&x->timer);
 		x->timer.function = xfrm_timer_handler;
 		x->timer.data	  = (unsigned long)x;
@@ -314,7 +314,7 @@ void __xfrm_state_destroy(struct xfrm_state *x)
 	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
 
 	spin_lock_bh(&xfrm_state_gc_lock);
-	list_add(&x->bydst, &xfrm_state_gc_list);
+	hlist_add_head(&x->bydst, &xfrm_state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 	schedule_work(&xfrm_state_gc_work);
 }
@@ -327,12 +327,12 @@ int __xfrm_state_delete(struct xfrm_state *x)
 	if (x->km.state != XFRM_STATE_DEAD) {
 		x->km.state = XFRM_STATE_DEAD;
 		spin_lock(&xfrm_state_lock);
-		list_del(&x->bydst);
+		hlist_del(&x->bydst);
 		__xfrm_state_put(x);
-		list_del(&x->bysrc);
+		hlist_del(&x->bysrc);
 		__xfrm_state_put(x);
 		if (x->id.spi) {
-			list_del(&x->byspi);
+			hlist_del(&x->byspi);
 			__xfrm_state_put(x);
 		}
 		spin_unlock(&xfrm_state_lock);
@@ -378,12 +378,13 @@ EXPORT_SYMBOL(xfrm_state_delete);
 void xfrm_state_flush(u8 proto)
 {
 	int i;
-	struct xfrm_state *x;
 
 	spin_lock_bh(&xfrm_state_lock);
 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		struct hlist_node *entry;
+		struct xfrm_state *x;
 restart:
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
 			if (!xfrm_state_kern(x) &&
 			    xfrm_id_proto_match(x->id.proto, proto)) {
 				xfrm_state_hold(x);
@@ -420,8 +421,9 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8
 {
 	unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
 	struct xfrm_state *x;
+	struct hlist_node *entry;
 
-	list_for_each_entry(x, xfrm_state_byspi+h, byspi) {
+	hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
 		if (x->props.family != family ||
 		    x->id.spi       != spi ||
 		    x->id.proto     != proto)
@@ -451,8 +453,9 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm
 {
 	unsigned int h = xfrm_src_hash(saddr, family);
 	struct xfrm_state *x;
+	struct hlist_node *entry;
 
-	list_for_each_entry(x, xfrm_state_bysrc+h, bysrc) {
+	hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
 		if (x->props.family != family ||
 		    x->id.proto     != proto)
 			continue;
@@ -499,14 +502,15 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		struct xfrm_policy *pol, int *err,
 		unsigned short family)
 {
-	unsigned h = xfrm_dst_hash(daddr, family);
+	unsigned int h = xfrm_dst_hash(daddr, family);
+	struct hlist_node *entry;
 	struct xfrm_state *x, *x0;
 	int acquire_in_progress = 0;
 	int error = 0;
 	struct xfrm_state *best = NULL;
 	
 	spin_lock_bh(&xfrm_state_lock);
-	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
+	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == tmpl->reqid &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
@@ -575,13 +579,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
-			list_add_tail(&x->bydst, xfrm_state_bydst+h);
+			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 			xfrm_state_hold(x);
-			list_add_tail(&x->bysrc, xfrm_state_bysrc+h);
+			h = xfrm_src_hash(saddr, family);
+			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 			xfrm_state_hold(x);
 			if (x->id.spi) {
 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
-				list_add(&x->byspi, xfrm_state_byspi+h);
+				hlist_add_head(&x->byspi, xfrm_state_byspi+h);
 				xfrm_state_hold(x);
 			}
 			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
@@ -608,19 +613,19 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 {
 	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
 
-	list_add(&x->bydst, xfrm_state_bydst+h);
+	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 	xfrm_state_hold(x);
 
 	h = xfrm_src_hash(&x->props.saddr, x->props.family);
 
-	list_add(&x->bysrc, xfrm_state_bysrc+h);
+	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 	xfrm_state_hold(x);
 
 	if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
 				  x->props.family);
 
-		list_add(&x->byspi, xfrm_state_byspi+h);
+		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
 		xfrm_state_hold(x);
 	}
 
@@ -648,9 +653,10 @@ EXPORT_SYMBOL(xfrm_state_insert);
 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
 {
 	unsigned int h = xfrm_dst_hash(daddr, family);
+	struct hlist_node *entry;
 	struct xfrm_state *x;
 
-	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
+	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
 		if (x->props.reqid  != reqid ||
 		    x->props.mode   != mode ||
 		    x->props.family != family ||
@@ -717,10 +723,10 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 		x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
 		add_timer(&x->timer);
 		xfrm_state_hold(x);
-		list_add_tail(&x->bydst, xfrm_state_bydst+h);
+		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 		h = xfrm_src_hash(saddr, family);
 		xfrm_state_hold(x);
-		list_add_tail(&x->bysrc, xfrm_state_bysrc+h);
+		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 		wake_up(&km_waitq);
 	}
 
@@ -977,11 +983,14 @@ EXPORT_SYMBOL(xfrm_state_sort);
 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
 {
 	int i;
-	struct xfrm_state *x;
 
 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-			if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
+		struct hlist_node *entry;
+		struct xfrm_state *x;
+
+		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+			if (x->km.seq == seq &&
+			    x->km.state == XFRM_STATE_ACQ) {
 				xfrm_state_hold(x);
 				return x;
 			}
@@ -1047,7 +1056,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
 	if (x->id.spi) {
 		spin_lock_bh(&xfrm_state_lock);
 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
-		list_add(&x->byspi, xfrm_state_byspi+h);
+		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
 		xfrm_state_hold(x);
 		spin_unlock_bh(&xfrm_state_lock);
 		wake_up(&km_waitq);
@@ -1060,12 +1069,13 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
 {
 	int i;
 	struct xfrm_state *x;
+	struct hlist_node *entry;
 	int count = 0;
 	int err = 0;
 
 	spin_lock_bh(&xfrm_state_lock);
 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
 			if (xfrm_id_proto_match(x->id.proto, proto))
 				count++;
 		}
@@ -1076,7 +1086,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
 	}
 
 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
 			if (!xfrm_id_proto_match(x->id.proto, proto))
 				continue;
 			err = func(x, --count, data);
@@ -1524,9 +1534,9 @@ void __init xfrm_state_init(void)
 	int i;
 
 	for (i=0; i<XFRM_DST_HSIZE; i++) {
-		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
-		INIT_LIST_HEAD(&xfrm_state_bysrc[i]);
-		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
+		INIT_HLIST_HEAD(&xfrm_state_bydst[i]);
+		INIT_HLIST_HEAD(&xfrm_state_bysrc[i]);
+		INIT_HLIST_HEAD(&xfrm_state_byspi[i]);
 	}
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
 }
-- 
GitLab


From f034b5d4efdfe0fb9e2a1ce1d95fa7914f24de49 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 03:08:07 -0700
Subject: [PATCH 0589/1063] [XFRM]: Dynamic xfrm_state hash table sizing.

The grow algorithm is simple, we grow if:

1) we see a hash chain collision at insert, and
2) we haven't hit the hash size limit (currently 1*1024*1024 slots), and
3) the number of xfrm_state objects is > the current hash mask

All of this needs some tweaking.

Remove __initdata from "hashdist" so we can use it safely at run time.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bootmem.h |   2 +-
 mm/page_alloc.c         |   2 +-
 net/xfrm/xfrm_state.c   | 247 +++++++++++++++++++++++++++++++---------
 3 files changed, 197 insertions(+), 54 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 1021f508d82ce..e319c649e4fdc 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -114,7 +114,7 @@ extern void *__init alloc_large_system_hash(const char *tablename,
 #else
 #define HASHDIST_DEFAULT 0
 #endif
-extern int __initdata hashdist;		/* Distribute hashes across NUMA nodes? */
+extern int hashdist;		/* Distribute hashes across NUMA nodes? */
 
 
 #endif /* _LINUX_BOOTMEM_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 54a4f5375bbae..3b5358a0561fa 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2363,7 +2363,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
 	return 0;
 }
 
-__initdata int hashdist = HASHDIST_DEFAULT;
+int hashdist = HASHDIST_DEFAULT;
 
 #ifdef CONFIG_NUMA
 static int __init set_hashdist(char *str)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index fe3c8c38d5e12..445263c54c94e 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -18,6 +18,9 @@
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
 #include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/vmalloc.h>
+#include <linux/cache.h>
 #include <asm/uaccess.h>
 
 struct sock *xfrm_nl;
@@ -38,102 +41,230 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
 
 static DEFINE_SPINLOCK(xfrm_state_lock);
 
-#define XFRM_DST_HSIZE		1024
-
 /* Hash table to find appropriate SA towards given target (endpoint
  * of tunnel or destination of transport mode) allowed by selector.
  *
  * Main use is finding SA after policy selected tunnel or transport mode.
  * Also, it can be used by ah/esp icmp error handler to find offending SA.
  */
-static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE];
-static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE];
-static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE];
-
-static __inline__
-unsigned __xfrm4_dst_hash(xfrm_address_t *addr)
+static struct hlist_head *xfrm_state_bydst __read_mostly;
+static struct hlist_head *xfrm_state_bysrc __read_mostly;
+static struct hlist_head *xfrm_state_byspi __read_mostly;
+static unsigned int xfrm_state_hmask __read_mostly;
+static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
+static unsigned int xfrm_state_num;
+
+static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask)
 {
-	unsigned h;
+	unsigned int h;
 	h = ntohl(addr->a4);
-	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
+	h = (h ^ (h>>16)) & hmask;
 	return h;
 }
 
-static __inline__
-unsigned __xfrm6_dst_hash(xfrm_address_t *addr)
+static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask)
 {
-	unsigned h;
+	unsigned int h;
 	h = ntohl(addr->a6[2]^addr->a6[3]);
-	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
+	h = (h ^ (h>>16)) & hmask;
 	return h;
 }
 
-static __inline__
-unsigned __xfrm4_src_hash(xfrm_address_t *addr)
+static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask)
 {
-	return __xfrm4_dst_hash(addr);
+	return __xfrm4_dst_hash(addr, hmask);
 }
 
-static __inline__
-unsigned __xfrm6_src_hash(xfrm_address_t *addr)
+static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask)
 {
-	return __xfrm6_dst_hash(addr);
+	return __xfrm6_dst_hash(addr, hmask);
 }
 
-static __inline__
-unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
+static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,  unsigned int hmask)
 {
 	switch (family) {
 	case AF_INET:
-		return __xfrm4_src_hash(addr);
+		return __xfrm4_src_hash(addr, hmask);
 	case AF_INET6:
-		return __xfrm6_src_hash(addr);
+		return __xfrm6_src_hash(addr, hmask);
 	}
 	return 0;
 }
 
-static __inline__
-unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
+static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
+{
+	return __xfrm_src_hash(addr, family, xfrm_state_hmask);
+}
+
+static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
 {
 	switch (family) {
 	case AF_INET:
-		return __xfrm4_dst_hash(addr);
+		return __xfrm4_dst_hash(addr, hmask);
 	case AF_INET6:
-		return __xfrm6_dst_hash(addr);
+		return __xfrm6_dst_hash(addr, hmask);
 	}
 	return 0;
 }
 
-static __inline__
-unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
+static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
+{
+	return __xfrm_dst_hash(addr, family, xfrm_state_hmask);
+}
+
+static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
+					unsigned int hmask)
 {
-	unsigned h;
+	unsigned int h;
 	h = ntohl(addr->a4^spi^proto);
-	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
+	h = (h ^ (h>>10) ^ (h>>20)) & hmask;
 	return h;
 }
 
-static __inline__
-unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
+static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
+					    unsigned int hmask)
 {
-	unsigned h;
+	unsigned int h;
 	h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
-	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
+	h = (h ^ (h>>10) ^ (h>>20)) & hmask;
 	return h;
 }
 
-static __inline__
-unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
+static inline
+unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
+			 unsigned int hmask)
 {
 	switch (family) {
 	case AF_INET:
-		return __xfrm4_spi_hash(addr, spi, proto);
+		return __xfrm4_spi_hash(addr, spi, proto, hmask);
 	case AF_INET6:
-		return __xfrm6_spi_hash(addr, spi, proto);
+		return __xfrm6_spi_hash(addr, spi, proto, hmask);
 	}
 	return 0;	/*XXX*/
 }
 
+static inline unsigned int
+xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
+{
+	return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
+}
+
+static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
+{
+	struct hlist_head *n;
+
+	if (sz <= PAGE_SIZE)
+		n = kmalloc(sz, GFP_KERNEL);
+	else if (hashdist)
+		n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
+	else
+		n = (struct hlist_head *)
+			__get_free_pages(GFP_KERNEL, get_order(sz));
+
+	if (n)
+		memset(n, 0, sz);
+
+	return n;
+}
+
+static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
+{
+	if (sz <= PAGE_SIZE)
+		kfree(n);
+	else if (hashdist)
+		vfree(n);
+	else
+		free_pages((unsigned long)n, get_order(sz));
+}
+
+static void xfrm_hash_transfer(struct hlist_head *list,
+			       struct hlist_head *ndsttable,
+			       struct hlist_head *nsrctable,
+			       struct hlist_head *nspitable,
+			       unsigned int nhashmask)
+{
+	struct hlist_node *entry, *tmp;
+	struct xfrm_state *x;
+
+	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
+		unsigned int h;
+
+		h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask);
+		hlist_add_head(&x->bydst, ndsttable+h);
+
+		h = __xfrm_src_hash(&x->props.saddr, x->props.family,
+				    nhashmask);
+		hlist_add_head(&x->bysrc, nsrctable+h);
+
+		h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
+				    x->props.family, nhashmask);
+		hlist_add_head(&x->byspi, nspitable+h);
+	}
+}
+
+static unsigned long xfrm_hash_new_size(void)
+{
+	return ((xfrm_state_hmask + 1) << 1) *
+		sizeof(struct hlist_head);
+}
+
+static DEFINE_MUTEX(hash_resize_mutex);
+
+static void xfrm_hash_resize(void *__unused)
+{
+	struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
+	unsigned long nsize, osize;
+	unsigned int nhashmask, ohashmask;
+	int i;
+
+	mutex_lock(&hash_resize_mutex);
+
+	nsize = xfrm_hash_new_size();
+	ndst = xfrm_state_hash_alloc(nsize);
+	if (!ndst)
+		goto out_unlock;
+	nsrc = xfrm_state_hash_alloc(nsize);
+	if (!nsrc) {
+		xfrm_state_hash_free(ndst, nsize);
+		goto out_unlock;
+	}
+	nspi = xfrm_state_hash_alloc(nsize);
+	if (!nspi) {
+		xfrm_state_hash_free(ndst, nsize);
+		xfrm_state_hash_free(nsrc, nsize);
+		goto out_unlock;
+	}
+
+	spin_lock_bh(&xfrm_state_lock);
+
+	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
+	for (i = xfrm_state_hmask; i >= 0; i--)
+		xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
+				   nhashmask);
+
+	odst = xfrm_state_bydst;
+	osrc = xfrm_state_bysrc;
+	ospi = xfrm_state_byspi;
+	ohashmask = xfrm_state_hmask;
+
+	xfrm_state_bydst = ndst;
+	xfrm_state_bysrc = nsrc;
+	xfrm_state_byspi = nspi;
+	xfrm_state_hmask = nhashmask;
+
+	spin_unlock_bh(&xfrm_state_lock);
+
+	osize = (ohashmask + 1) * sizeof(struct hlist_head);
+	xfrm_state_hash_free(odst, osize);
+	xfrm_state_hash_free(osrc, osize);
+	xfrm_state_hash_free(ospi, osize);
+
+out_unlock:
+	mutex_unlock(&hash_resize_mutex);
+}
+
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
+
 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
 EXPORT_SYMBOL(km_waitq);
 
@@ -335,6 +466,7 @@ int __xfrm_state_delete(struct xfrm_state *x)
 			hlist_del(&x->byspi);
 			__xfrm_state_put(x);
 		}
+		xfrm_state_num--;
 		spin_unlock(&xfrm_state_lock);
 		if (del_timer(&x->timer))
 			__xfrm_state_put(x);
@@ -380,7 +512,7 @@ void xfrm_state_flush(u8 proto)
 	int i;
 
 	spin_lock_bh(&xfrm_state_lock);
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+	for (i = 0; i < xfrm_state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 restart:
@@ -611,7 +743,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
 static void __xfrm_state_insert(struct xfrm_state *x)
 {
-	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
+	unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family);
 
 	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 	xfrm_state_hold(x);
@@ -637,6 +769,13 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 		xfrm_state_hold(x);
 
 	wake_up(&km_waitq);
+
+	xfrm_state_num++;
+
+	if (x->bydst.next != NULL &&
+	    (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
+	    xfrm_state_num > xfrm_state_hmask)
+		schedule_work(&xfrm_hash_work);
 }
 
 void xfrm_state_insert(struct xfrm_state *x)
@@ -984,7 +1123,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
 {
 	int i;
 
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+	for (i = 0; i <= xfrm_state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 
@@ -1026,7 +1165,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq);
 void
 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
 {
-	u32 h;
+	unsigned int h;
 	struct xfrm_state *x0;
 
 	if (x->id.spi)
@@ -1074,7 +1213,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
 	int err = 0;
 
 	spin_lock_bh(&xfrm_state_lock);
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+	for (i = 0; i <= xfrm_state_hmask; i++) {
 		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
 			if (xfrm_id_proto_match(x->id.proto, proto))
 				count++;
@@ -1085,7 +1224,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
 		goto out;
 	}
 
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+	for (i = 0; i <= xfrm_state_hmask; i++) {
 		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
 			if (!xfrm_id_proto_match(x->id.proto, proto))
 				continue;
@@ -1531,13 +1670,17 @@ EXPORT_SYMBOL(xfrm_init_state);
  
 void __init xfrm_state_init(void)
 {
-	int i;
+	unsigned int sz;
+
+	sz = sizeof(struct hlist_head) * 8;
+
+	xfrm_state_bydst = xfrm_state_hash_alloc(sz);
+	xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
+	xfrm_state_byspi = xfrm_state_hash_alloc(sz);
+	if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
+		panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
+	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
-	for (i=0; i<XFRM_DST_HSIZE; i++) {
-		INIT_HLIST_HEAD(&xfrm_state_bydst[i]);
-		INIT_HLIST_HEAD(&xfrm_state_bysrc[i]);
-		INIT_HLIST_HEAD(&xfrm_state_byspi[i]);
-	}
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
 }
 
-- 
GitLab


From 9d4a706d852411154d0c91b9ffb3bec68b94b25c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 03:18:09 -0700
Subject: [PATCH 0590/1063] [XFRM]: Add generation count to xfrm_state and
 xfrm_dst.

Each xfrm_state inserted gets a new generation counter
value.  When a bundle is created, the xfrm_dst objects
get the current generation counter of the xfrm_state
they will attach to at dst->xfrm.

xfrm_bundle_ok() will return false if it sees an
xfrm_dst with a generation count different from the
generation count of the xfrm_state that dst points to.

This provides a facility by which to passively and
cheaply invalidate cached IPSEC routes during SA
database changes.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 3 +++
 net/ipv4/xfrm4_policy.c | 1 +
 net/ipv6/xfrm6_policy.c | 1 +
 net/xfrm/xfrm_policy.c  | 2 ++
 net/xfrm/xfrm_state.c   | 3 +++
 5 files changed, 10 insertions(+)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3405e5d9d51c2..fd4a300b5bafa 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -104,6 +104,8 @@ struct xfrm_state
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
 
+	u32			genid;
+
 	/* Key manger bits */
 	struct {
 		u8		state;
@@ -590,6 +592,7 @@ struct xfrm_dst
 		struct rt6_info		rt6;
 	} u;
 	struct dst_entry *route;
+	u32 genid;
 	u32 route_mtu_cached;
 	u32 child_mtu_cached;
 	u32 route_cookie;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 42d8ded0f96a8..479598566f1de 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -93,6 +93,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 		xdst = (struct xfrm_dst *)dst1;
 		xdst->route = &rt->u.dst;
+		xdst->genid = xfrm[i]->genid;
 
 		dst1->next = dst_prev;
 		dst_prev = dst1;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 98c2fe449b3f1..9391c4c94febe 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -149,6 +149,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 		xdst = (struct xfrm_dst *)dst1;
 		xdst->route = &rt->u.dst;
+		xdst->genid = xfrm[i]->genid;
 		if (rt->rt6i_node)
 			xdst->route_cookie = rt->rt6i_node->fn_sernum;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1732159ffd011..7fc6944ee36f4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1536,6 +1536,8 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int str
 			return 0;
 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
 			return 0;
+		if (xdst->genid != dst->xfrm->genid)
+			return 0;
 
 		if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL &&
 		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 445263c54c94e..535d43c147200 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -53,6 +53,7 @@ static struct hlist_head *xfrm_state_byspi __read_mostly;
 static unsigned int xfrm_state_hmask __read_mostly;
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
+static unsigned int xfrm_state_genid;
 
 static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask)
 {
@@ -745,6 +746,8 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 {
 	unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family);
 
+	x->genid = ++xfrm_state_genid;
+
 	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 	xfrm_state_hold(x);
 
-- 
GitLab


From a624c108e5595b5827796c253481436929cd5344 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 03:24:33 -0700
Subject: [PATCH 0591/1063] [XFRM]: Put more keys into destination hash
 function.

Besides the daddr, key the hash on family and reqid too.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 75 ++++++++++++++++++++-----------------------
 1 file changed, 35 insertions(+), 40 deletions(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 535d43c147200..7e5daafc18631 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -35,7 +35,7 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
 /* Each xfrm_state may be linked to two tables:
 
    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
-   2. Hash table by daddr to find what SAs exist for given
+   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
       destination/tunnel endpoint. (output)
  */
 
@@ -55,62 +55,56 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
 static unsigned int xfrm_state_genid;
 
-static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask)
+static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
 {
-	unsigned int h;
-	h = ntohl(addr->a4);
-	h = (h ^ (h>>16)) & hmask;
-	return h;
-}
-
-static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask)
-{
-	unsigned int h;
-	h = ntohl(addr->a6[2]^addr->a6[3]);
-	h = (h ^ (h>>16)) & hmask;
-	return h;
+	return ntohl(addr->a4);
 }
 
-static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask)
+static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
 {
-	return __xfrm4_dst_hash(addr, hmask);
+	return ntohl(addr->a6[2]^addr->a6[3]);
 }
 
-static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask)
-{
-	return __xfrm6_dst_hash(addr, hmask);
-}
-
-static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,  unsigned int hmask)
+static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr,
+					   u32 reqid, unsigned short family,
+					   unsigned int hmask)
 {
+	unsigned int h = family ^ reqid;
 	switch (family) {
 	case AF_INET:
-		return __xfrm4_src_hash(addr, hmask);
+		h ^= __xfrm4_addr_hash(addr);
+		break;
 	case AF_INET6:
-		return __xfrm6_src_hash(addr, hmask);
-	}
-	return 0;
+		h ^= __xfrm6_addr_hash(addr);
+		break;
+	};
+	return (h ^ (h >> 16)) & hmask;
 }
 
-static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
+static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid,
+					 unsigned short family)
 {
-	return __xfrm_src_hash(addr, family, xfrm_state_hmask);
+	return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask);
 }
 
-static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
+static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
+				       unsigned int hmask)
 {
+	unsigned int h = family;
 	switch (family) {
 	case AF_INET:
-		return __xfrm4_dst_hash(addr, hmask);
+		h ^= __xfrm4_addr_hash(addr);
+		break;
 	case AF_INET6:
-		return __xfrm6_dst_hash(addr, hmask);
-	}
-	return 0;
+		h ^= __xfrm6_addr_hash(addr);
+		break;
+	};
+	return (h ^ (h >> 16)) & hmask;
 }
 
-static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
+static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
 {
-	return __xfrm_dst_hash(addr, family, xfrm_state_hmask);
+	return __xfrm_src_hash(addr, family, xfrm_state_hmask);
 }
 
 static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
@@ -190,7 +184,8 @@ static void xfrm_hash_transfer(struct hlist_head *list,
 	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
 		unsigned int h;
 
-		h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask);
+		h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid,
+				    x->props.family, nhashmask);
 		hlist_add_head(&x->bydst, ndsttable+h);
 
 		h = __xfrm_src_hash(&x->props.saddr, x->props.family,
@@ -635,7 +630,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		struct xfrm_policy *pol, int *err,
 		unsigned short family)
 {
-	unsigned int h = xfrm_dst_hash(daddr, family);
+	unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family);
 	struct hlist_node *entry;
 	struct xfrm_state *x, *x0;
 	int acquire_in_progress = 0;
@@ -744,15 +739,15 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
 static void __xfrm_state_insert(struct xfrm_state *x)
 {
-	unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family);
+	unsigned int h;
 
 	x->genid = ++xfrm_state_genid;
 
+	h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family);
 	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 	xfrm_state_hold(x);
 
 	h = xfrm_src_hash(&x->props.saddr, x->props.family);
-
 	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 	xfrm_state_hold(x);
 
@@ -794,7 +789,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
 /* xfrm_state_lock is held */
 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
 {
-	unsigned int h = xfrm_dst_hash(daddr, family);
+	unsigned int h = xfrm_dst_hash(daddr, reqid, family);
 	struct hlist_node *entry;
 	struct xfrm_state *x;
 
-- 
GitLab


From 2575b65434d56559bd03854450b9b6aaf19b9c90 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 03:26:44 -0700
Subject: [PATCH 0592/1063] [XFRM]: Simplify xfrm_spi_hash

It can use __xfrm{4,6}_addr_hash().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 33 +++++++++------------------------
 1 file changed, 9 insertions(+), 24 deletions(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7e5daafc18631..98200397e098d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -107,35 +107,20 @@ static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family
 	return __xfrm_src_hash(addr, family, xfrm_state_hmask);
 }
 
-static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
-					unsigned int hmask)
-{
-	unsigned int h;
-	h = ntohl(addr->a4^spi^proto);
-	h = (h ^ (h>>10) ^ (h>>20)) & hmask;
-	return h;
-}
-
-static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
-					    unsigned int hmask)
-{
-	unsigned int h;
-	h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
-	h = (h ^ (h>>10) ^ (h>>20)) & hmask;
-	return h;
-}
-
-static inline
-unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
-			 unsigned int hmask)
+static inline unsigned int
+__xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
+		unsigned int hmask)
 {
+	unsigned int h = spi ^ proto;
 	switch (family) {
 	case AF_INET:
-		return __xfrm4_spi_hash(addr, spi, proto, hmask);
+		h ^= __xfrm4_addr_hash(addr);
+		break;
 	case AF_INET6:
-		return __xfrm6_spi_hash(addr, spi, proto, hmask);
+		h ^= __xfrm6_addr_hash(addr);
+		break;
 	}
-	return 0;	/*XXX*/
+	return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
 }
 
 static inline unsigned int
-- 
GitLab


From c7f5ea3a4d1ae6b3b426e113358fdc57494bc754 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 03:29:04 -0700
Subject: [PATCH 0593/1063] [XFRM]: Do not flush all bundles on SA insert.

Instead, simply set all potentially aliasing existing xfrm_state
objects to have the current generation counter value.

This will make routes get relooked up the next time an existing
route mentioning these aliased xfrm_state objects gets used,
via xfrm_dst_check().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  1 -
 net/xfrm/xfrm_policy.c | 10 ----------
 net/xfrm/xfrm_state.c  | 25 ++++++++++++++++++++-----
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index fd4a300b5bafa..a620a43c9eeb9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -996,7 +996,6 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
 extern void xfrm_policy_flush(u8 type);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
 extern int xfrm_flush_bundles(void);
-extern void xfrm_flush_all_bundles(void);
 extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict);
 extern void xfrm_init_pmtu(struct dst_entry *dst);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7fc6944ee36f4..cfa5c692f2e81 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1478,16 +1478,6 @@ int xfrm_flush_bundles(void)
 	return 0;
 }
 
-static int always_true(struct dst_entry *dst)
-{
-	return 1;
-}
-
-void xfrm_flush_all_bundles(void)
-{
-	xfrm_prune_bundles(always_true);
-}
-
 void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 98200397e098d..77ef796c9d0dc 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -761,13 +761,30 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 		schedule_work(&xfrm_hash_work);
 }
 
+/* xfrm_state_lock is held */
+static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
+{
+	unsigned short family = xnew->props.family;
+	u32 reqid = xnew->props.reqid;
+	struct xfrm_state *x;
+	struct hlist_node *entry;
+	unsigned int h;
+
+	h = xfrm_dst_hash(&xnew->id.daddr, reqid, family);
+	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+		if (x->props.family	== family &&
+		    x->props.reqid	== reqid &&
+		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family))
+			x->genid = xfrm_state_genid;
+	}
+}
+
 void xfrm_state_insert(struct xfrm_state *x)
 {
 	spin_lock_bh(&xfrm_state_lock);
+	__xfrm_state_bump_genids(x);
 	__xfrm_state_insert(x);
 	spin_unlock_bh(&xfrm_state_lock);
-
-	xfrm_flush_all_bundles();
 }
 EXPORT_SYMBOL(xfrm_state_insert);
 
@@ -889,15 +906,13 @@ int xfrm_state_add(struct xfrm_state *x)
 				     x->id.proto,
 				     &x->id.daddr, &x->props.saddr, 0);
 
+	__xfrm_state_bump_genids(x);
 	__xfrm_state_insert(x);
 	err = 0;
 
 out:
 	spin_unlock_bh(&xfrm_state_lock);
 
-	if (!err)
-		xfrm_flush_all_bundles();
-
 	if (x1) {
 		xfrm_state_delete(x1);
 		xfrm_state_put(x1);
-- 
GitLab


From 1c0953997567b22e32fdf85d3b4bc0f2461fd161 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 03:30:28 -0700
Subject: [PATCH 0594/1063] [XFRM]: Purge dst references to deleted SAs
 passively.

Just let GC and other normal mechanisms take care of getting
rid of DST cache references to deleted xfrm_state objects
instead of walking all the policy bundles.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  1 -
 net/xfrm/xfrm_policy.c |  2 +-
 net/xfrm/xfrm_state.c  | 17 -----------------
 3 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a620a43c9eeb9..c7870b6eae018 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -995,7 +995,6 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
 				  int create, unsigned short family);
 extern void xfrm_policy_flush(u8 type);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
-extern int xfrm_flush_bundles(void);
 extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict);
 extern void xfrm_init_pmtu(struct dst_entry *dst);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cfa5c692f2e81..1bcaae4adf3ae 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1472,7 +1472,7 @@ static void __xfrm_garbage_collect(void)
 	xfrm_prune_bundles(unused_bundle);
 }
 
-int xfrm_flush_bundles(void)
+static int xfrm_flush_bundles(void)
 {
 	xfrm_prune_bundles(stale_bundle);
 	return 0;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 77ef796c9d0dc..9ff00b7d6ad35 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -256,8 +256,6 @@ static struct work_struct xfrm_state_gc_work;
 static HLIST_HEAD(xfrm_state_gc_list);
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
-static int xfrm_state_gc_flush_bundles;
-
 int __xfrm_state_delete(struct xfrm_state *x);
 
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
@@ -293,11 +291,6 @@ static void xfrm_state_gc_task(void *data)
 	struct hlist_node *entry, *tmp;
 	struct hlist_head gc_list;
 
-	if (xfrm_state_gc_flush_bundles) {
-		xfrm_state_gc_flush_bundles = 0;
-		xfrm_flush_bundles();
-	}
-
 	spin_lock_bh(&xfrm_state_gc_lock);
 	gc_list.first = xfrm_state_gc_list.first;
 	INIT_HLIST_HEAD(&xfrm_state_gc_list);
@@ -454,16 +447,6 @@ int __xfrm_state_delete(struct xfrm_state *x)
 		if (del_timer(&x->rtimer))
 			__xfrm_state_put(x);
 
-		/* The number two in this test is the reference
-		 * mentioned in the comment below plus the reference
-		 * our caller holds.  A larger value means that
-		 * there are DSTs attached to this xfrm_state.
-		 */
-		if (atomic_read(&x->refcnt) > 2) {
-			xfrm_state_gc_flush_bundles = 1;
-			schedule_work(&xfrm_state_gc_work);
-		}
-
 		/* All xfrm_state objects are created by xfrm_state_alloc.
 		 * The xfrm_state_alloc call gives a reference, and that
 		 * is what we are dropping here.
-- 
GitLab


From a47f0ce05ae12ce9acad62896ff703175764104e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 03:54:22 -0700
Subject: [PATCH 0595/1063] [XFRM]: Kill excessive refcounting of xfrm_state
 objects.

The refcounting done for timers and hash table insertions
are just wasted cycles.  We can eliminate all of this
refcounting because:

1) The implicit refcount when the xfrm_state object is active
   will always be held while the object is in the hash tables.
   We never kfree() the xfrm_state until long after we've made
   sure that it has been unhashed.

2) Timers are even easier.  Once we mark that x->km.state as
   anything other than XFRM_STATE_VALID (__xfrm_state_delete
   sets it to XFRM_STATE_DEAD), any timer that fires will
   do nothing and return without rearming the timer.

   Therefore we can defer the del_timer calls until when the
   object is about to be freed up during GC.  We have to use
   del_timer_sync() and defer it to GC because we can't do
   a del_timer_sync() while holding x->lock which all callers
   of __xfrm_state_delete hold.

This makes SA changes even more light-weight.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 53 ++++++++++---------------------------------
 1 file changed, 12 insertions(+), 41 deletions(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9ff00b7d6ad35..0bc6a4b1ceae0 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -266,10 +266,8 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
 
 static void xfrm_state_gc_destroy(struct xfrm_state *x)
 {
-	if (del_timer(&x->timer))
-		BUG();
-	if (del_timer(&x->rtimer))
-		BUG();
+	del_timer_sync(&x->timer);
+	del_timer_sync(&x->rtimer);
 	kfree(x->aalg);
 	kfree(x->ealg);
 	kfree(x->calg);
@@ -361,9 +359,9 @@ static void xfrm_timer_handler(unsigned long data)
 	if (warn)
 		km_state_expired(x, 0, 0);
 resched:
-	if (next != LONG_MAX &&
-	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
-		xfrm_state_hold(x);
+	if (next != LONG_MAX)
+		mod_timer(&x->timer, jiffies + make_jiffies(next));
+
 	goto out;
 
 expired:
@@ -378,7 +376,6 @@ static void xfrm_timer_handler(unsigned long data)
 
 out:
 	spin_unlock(&x->lock);
-	xfrm_state_put(x);
 }
 
 static void xfrm_replay_timer_handler(unsigned long data);
@@ -433,19 +430,11 @@ int __xfrm_state_delete(struct xfrm_state *x)
 		x->km.state = XFRM_STATE_DEAD;
 		spin_lock(&xfrm_state_lock);
 		hlist_del(&x->bydst);
-		__xfrm_state_put(x);
 		hlist_del(&x->bysrc);
-		__xfrm_state_put(x);
-		if (x->id.spi) {
+		if (x->id.spi)
 			hlist_del(&x->byspi);
-			__xfrm_state_put(x);
-		}
 		xfrm_state_num--;
 		spin_unlock(&xfrm_state_lock);
-		if (del_timer(&x->timer))
-			__xfrm_state_put(x);
-		if (del_timer(&x->rtimer))
-			__xfrm_state_put(x);
 
 		/* All xfrm_state objects are created by xfrm_state_alloc.
 		 * The xfrm_state_alloc call gives a reference, and that
@@ -676,17 +665,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
 			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
-			xfrm_state_hold(x);
 			h = xfrm_src_hash(saddr, family);
 			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
-			xfrm_state_hold(x);
 			if (x->id.spi) {
 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
 				hlist_add_head(&x->byspi, xfrm_state_byspi+h);
-				xfrm_state_hold(x);
 			}
 			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
-			xfrm_state_hold(x);
 			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
 			add_timer(&x->timer);
 		} else {
@@ -713,26 +698,20 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 
 	h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family);
 	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
-	xfrm_state_hold(x);
 
 	h = xfrm_src_hash(&x->props.saddr, x->props.family);
 	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
-	xfrm_state_hold(x);
 
 	if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
 				  x->props.family);
 
 		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
-		xfrm_state_hold(x);
 	}
 
-	if (!mod_timer(&x->timer, jiffies + HZ))
-		xfrm_state_hold(x);
-
-	if (x->replay_maxage &&
-	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
-		xfrm_state_hold(x);
+	mod_timer(&x->timer, jiffies + HZ);
+	if (x->replay_maxage)
+		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
 
 	wake_up(&km_waitq);
 
@@ -844,10 +823,8 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 		xfrm_state_hold(x);
 		x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
 		add_timer(&x->timer);
-		xfrm_state_hold(x);
 		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 		h = xfrm_src_hash(saddr, family);
-		xfrm_state_hold(x);
 		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 		wake_up(&km_waitq);
 	}
@@ -955,8 +932,7 @@ int xfrm_state_update(struct xfrm_state *x)
 		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
 		x1->km.dying = 0;
 
-		if (!mod_timer(&x1->timer, jiffies + HZ))
-			xfrm_state_hold(x1);
+		mod_timer(&x1->timer, jiffies + HZ);
 		if (x1->curlft.use_time)
 			xfrm_state_check_expire(x1);
 
@@ -981,8 +957,7 @@ int xfrm_state_check_expire(struct xfrm_state *x)
 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
 	    x->curlft.packets >= x->lft.hard_packet_limit) {
 		x->km.state = XFRM_STATE_EXPIRED;
-		if (!mod_timer(&x->timer, jiffies))
-			xfrm_state_hold(x);
+		mod_timer(&x->timer, jiffies);
 		return -EINVAL;
 	}
 
@@ -1177,7 +1152,6 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
 		spin_lock_bh(&xfrm_state_lock);
 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
 		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
-		xfrm_state_hold(x);
 		spin_unlock_bh(&xfrm_state_lock);
 		wake_up(&km_waitq);
 	}
@@ -1264,10 +1238,8 @@ void xfrm_replay_notify(struct xfrm_state *x, int event)
 	km_state_notify(x, &c);
 
 	if (x->replay_maxage &&
-	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
-		xfrm_state_hold(x);
+	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
 		x->xflags &= ~XFRM_TIME_DEFER;
-	}
 }
 EXPORT_SYMBOL(xfrm_replay_notify);
 
@@ -1285,7 +1257,6 @@ static void xfrm_replay_timer_handler(unsigned long data)
 	}
 
 	spin_unlock(&x->lock);
-	xfrm_state_put(x);
 }
 
 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
-- 
GitLab


From c1969f294e624d5b642fc8e6ab9468b7c7791fa8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 04:00:03 -0700
Subject: [PATCH 0596/1063] [XFRM]: Hash xfrm_state objects by source address
 too.

The source address is always non-prefixed so we should use
it to help give entropy to the bydst hash.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 53 ++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0bc6a4b1ceae0..37213f9f6a02c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -65,26 +65,40 @@ static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
 	return ntohl(addr->a6[2]^addr->a6[3]);
 }
 
-static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr,
+static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	return ntohl(daddr->a4 ^ saddr->a4);
+}
+
+static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
+		     saddr->a6[2] ^ saddr->a6[3]);
+}
+
+static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr,
+					   xfrm_address_t *saddr,
 					   u32 reqid, unsigned short family,
 					   unsigned int hmask)
 {
 	unsigned int h = family ^ reqid;
 	switch (family) {
 	case AF_INET:
-		h ^= __xfrm4_addr_hash(addr);
+		h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
 		break;
 	case AF_INET6:
-		h ^= __xfrm6_addr_hash(addr);
+		h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
 		break;
 	};
 	return (h ^ (h >> 16)) & hmask;
 }
 
-static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid,
+static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
+					 xfrm_address_t *saddr,
+					 u32 reqid,
 					 unsigned short family)
 {
-	return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask);
+	return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
 }
 
 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
@@ -108,25 +122,25 @@ static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family
 }
 
 static inline unsigned int
-__xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
-		unsigned int hmask)
+__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto,
+		unsigned short family, unsigned int hmask)
 {
 	unsigned int h = spi ^ proto;
 	switch (family) {
 	case AF_INET:
-		h ^= __xfrm4_addr_hash(addr);
+		h ^= __xfrm4_addr_hash(daddr);
 		break;
 	case AF_INET6:
-		h ^= __xfrm6_addr_hash(addr);
+		h ^= __xfrm6_addr_hash(daddr);
 		break;
 	}
 	return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
 }
 
 static inline unsigned int
-xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
+xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
 {
-	return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
+	return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
 }
 
 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
@@ -169,8 +183,9 @@ static void xfrm_hash_transfer(struct hlist_head *list,
 	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
 		unsigned int h;
 
-		h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid,
-				    x->props.family, nhashmask);
+		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
+				    x->props.reqid, x->props.family,
+				    nhashmask);
 		hlist_add_head(&x->bydst, ndsttable+h);
 
 		h = __xfrm_src_hash(&x->props.saddr, x->props.family,
@@ -587,7 +602,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		struct xfrm_policy *pol, int *err,
 		unsigned short family)
 {
-	unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family);
+	unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
 	struct hlist_node *entry;
 	struct xfrm_state *x, *x0;
 	int acquire_in_progress = 0;
@@ -696,7 +711,8 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 
 	x->genid = ++xfrm_state_genid;
 
-	h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family);
+	h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
+			  x->props.reqid, x->props.family);
 	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 
 	h = xfrm_src_hash(&x->props.saddr, x->props.family);
@@ -732,11 +748,12 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 	struct hlist_node *entry;
 	unsigned int h;
 
-	h = xfrm_dst_hash(&xnew->id.daddr, reqid, family);
+	h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
 	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
 		if (x->props.family	== family &&
 		    x->props.reqid	== reqid &&
-		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family))
+		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
+		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
 			x->genid = xfrm_state_genid;
 	}
 }
@@ -753,7 +770,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
 /* xfrm_state_lock is held */
 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
 {
-	unsigned int h = xfrm_dst_hash(daddr, reqid, family);
+	unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
 	struct hlist_node *entry;
 	struct xfrm_state *x;
 
-- 
GitLab


From 2518c7c2b3d7f0a6b302b4efe17c911f8dd4049f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 04:45:07 -0700
Subject: [PATCH 0597/1063] [XFRM]: Hash policies when non-prefixed.

This idea is from Alexey Kuznetsov.

It is common for policies to be non-prefixed.  And for
that case we can optimize lookups, insert, etc. quite
a bit.

For each direction, we have a dynamically sized policy
hash table for non-prefixed policies.  We also have a
hash table on policy->index.

For prefixed policies, we have a list per-direction which
we will consult on lookups when a non-prefix hashtable
lookup fails.

This still isn't as efficient as I would like it.  There
are four immediate problems:

1) Lots of excessive refcounting, which can be fixed just
   like xfrm_state was
2) We do 2 hash probes on insert, one to look for dups and
   one to allocate a unique policy->index.  Althought I wonder
   how much this matters since xfrm_state inserts do up to
   3 hash probes and that seems to perform fine.
3) xfrm_policy_insert() is very complex because of the priority
   ordering and entry replacement logic.
4) Lots of counter bumping, in addition to policy refcounts,
   in the form of xfrm_policy_count[].  This is merely used
   to let code path(s) know that some IPSEC rules exist.  So
   this count is indexed per-direction, maybe that is overkill.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  23 +-
 net/xfrm/xfrm_policy.c | 681 ++++++++++++++++++++++++++++++++---------
 2 files changed, 546 insertions(+), 158 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c7870b6eae018..0acabf2a0a8f2 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -331,7 +331,8 @@ struct xfrm_tmpl
 struct xfrm_policy
 {
 	struct xfrm_policy	*next;
-	struct list_head	list;
+	struct hlist_node	bydst;
+	struct hlist_node	byidx;
 
 	/* This lock only affects elements except for entry. */
 	rwlock_t		lock;
@@ -385,21 +386,7 @@ struct xfrm_mgr
 extern int xfrm_register_km(struct xfrm_mgr *km);
 extern int xfrm_unregister_km(struct xfrm_mgr *km);
 
-
-extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
-#ifdef CONFIG_XFRM_SUB_POLICY
-extern struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2];
-
-static inline int xfrm_policy_lists_empty(int dir)
-{
-	return (!xfrm_policy_list[dir] && !xfrm_policy_list_sub[dir]);
-}
-#else
-static inline int xfrm_policy_lists_empty(int dir)
-{
-	return (!xfrm_policy_list[dir]);
-}
-#endif
+extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
 
 static inline void xfrm_pol_hold(struct xfrm_policy *policy)
 {
@@ -678,7 +665,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk
 	if (sk && sk->sk_policy[XFRM_POLICY_IN])
 		return __xfrm_policy_check(sk, dir, skb, family);
 
-	return	(xfrm_policy_lists_empty(dir) && !skb->sp) ||
+	return	(!xfrm_policy_count[dir] && !skb->sp) ||
 		(skb->dst->flags & DST_NOPOLICY) ||
 		__xfrm_policy_check(sk, dir, skb, family);
 }
@@ -698,7 +685,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
 
 static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
-	return	xfrm_policy_lists_empty(XFRM_POLICY_OUT) ||
+	return	!xfrm_policy_count[XFRM_POLICY_OUT] ||
 		(skb->dst->flags & DST_NOXFRM) ||
 		__xfrm_route_forward(skb, family);
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1bcaae4adf3ae..087a5443b0514 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -22,6 +22,9 @@
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/vmalloc.h>
+#include <linux/cache.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 
@@ -30,26 +33,8 @@ EXPORT_SYMBOL(xfrm_cfg_mutex);
 
 static DEFINE_RWLOCK(xfrm_policy_lock);
 
-struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
-EXPORT_SYMBOL(xfrm_policy_list);
-#ifdef CONFIG_XFRM_SUB_POLICY
-struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2];
-EXPORT_SYMBOL(xfrm_policy_list_sub);
-
-#define XFRM_POLICY_LISTS(type) \
-	((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub : \
-	 xfrm_policy_list)
-#define XFRM_POLICY_LISTHEAD(type, dir) \
-	((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub[dir] : \
-	 xfrm_policy_list[dir])
-#define XFRM_POLICY_LISTHEADP(type, dir) \
-	((type == XFRM_POLICY_TYPE_SUB) ? &xfrm_policy_list_sub[dir] : \
-	 &xfrm_policy_list[dir])
-#else
-#define XFRM_POLICY_LISTS(type)              xfrm_policy_list
-#define XFRM_POLICY_LISTHEAD(type, dif)      xfrm_policy_list[dir]
-#define XFRM_POLICY_LISTHEADP(type, dif)     &xfrm_policy_list[dir]
-#endif
+unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
+EXPORT_SYMBOL(xfrm_policy_count);
 
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
@@ -57,8 +42,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
 static kmem_cache_t *xfrm_dst_cache __read_mostly;
 
 static struct work_struct xfrm_policy_gc_work;
-static struct list_head xfrm_policy_gc_list =
-	LIST_HEAD_INIT(xfrm_policy_gc_list);
+static HLIST_HEAD(xfrm_policy_gc_list);
 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
 
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
@@ -328,8 +312,10 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
 	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
 
 	if (policy) {
-		atomic_set(&policy->refcnt, 1);
+		INIT_HLIST_NODE(&policy->bydst);
+		INIT_HLIST_NODE(&policy->byidx);
 		rwlock_init(&policy->lock);
+		atomic_set(&policy->refcnt, 1);
 		init_timer(&policy->timer);
 		policy->timer.data = (unsigned long)policy;
 		policy->timer.function = xfrm_policy_timer;
@@ -375,17 +361,16 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
 static void xfrm_policy_gc_task(void *data)
 {
 	struct xfrm_policy *policy;
-	struct list_head *entry, *tmp;
-	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+	struct hlist_node *entry, *tmp;
+	struct hlist_head gc_list;
 
 	spin_lock_bh(&xfrm_policy_gc_lock);
-	list_splice_init(&xfrm_policy_gc_list, &gc_list);
+	gc_list.first = xfrm_policy_gc_list.first;
+	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
 	spin_unlock_bh(&xfrm_policy_gc_lock);
 
-	list_for_each_safe(entry, tmp, &gc_list) {
-		policy = list_entry(entry, struct xfrm_policy, list);
+	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
 		xfrm_policy_gc_kill(policy);
-	}
 }
 
 /* Rule must be locked. Release descentant resources, announce
@@ -407,70 +392,354 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 	}
 
 	spin_lock(&xfrm_policy_gc_lock);
-	list_add(&policy->list, &xfrm_policy_gc_list);
+	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
 	spin_unlock(&xfrm_policy_gc_lock);
 
 	schedule_work(&xfrm_policy_gc_work);
 }
 
+struct xfrm_policy_hash {
+	struct hlist_head	*table;
+	unsigned int		hmask;
+};
+
+static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
+static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
+static struct hlist_head *xfrm_policy_byidx __read_mostly;
+static unsigned int xfrm_idx_hmask __read_mostly;
+static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
+
+static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
+{
+	return (index ^ (index >> 8)) & hmask;
+}
+
+static inline unsigned int idx_hash(u32 index)
+{
+	return __idx_hash(index, xfrm_idx_hmask);
+}
+
+static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask)
+{
+	xfrm_address_t *daddr = &sel->daddr;
+	xfrm_address_t *saddr = &sel->saddr;
+	unsigned int h = 0;
+
+	switch (family) {
+	case AF_INET:
+		if (sel->prefixlen_d != 32 ||
+		    sel->prefixlen_s != 32)
+			return hmask + 1;
+
+		h = ntohl(daddr->a4 ^ saddr->a4);
+		break;
+
+	case AF_INET6:
+		if (sel->prefixlen_d != 128 ||
+		    sel->prefixlen_s != 128)
+			return hmask + 1;
+
+		h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^
+			  saddr->a6[2] ^ saddr->a6[3]);
+		break;
+	};
+	h ^= (h >> 16);
+	return h & hmask;
+}
+
+static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask)
+{
+	unsigned int h = 0;
+
+	switch (family) {
+	case AF_INET:
+		h = ntohl(daddr->a4 ^ saddr->a4);
+		break;
+
+	case AF_INET6:
+		h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^
+			  saddr->a6[2] ^ saddr->a6[3]);
+		break;
+	};
+	h ^= (h >> 16);
+	return h & hmask;
+}
+
+static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
+{
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int hash = __sel_hash(sel, family, hmask);
+
+	return (hash == hmask + 1 ?
+		&xfrm_policy_inexact[dir] :
+		xfrm_policy_bydst[dir].table + hash);
+}
+
+static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
+{
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
+
+	return xfrm_policy_bydst[dir].table + hash;
+}
+
+static struct hlist_head *xfrm_policy_hash_alloc(unsigned int sz)
+{
+	struct hlist_head *n;
+
+	if (sz <= PAGE_SIZE)
+		n = kmalloc(sz, GFP_KERNEL);
+	else if (hashdist)
+		n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
+	else
+		n = (struct hlist_head *)
+			__get_free_pages(GFP_KERNEL, get_order(sz));
+
+	if (n)
+		memset(n, 0, sz);
+
+	return n;
+}
+
+static void xfrm_policy_hash_free(struct hlist_head *n, unsigned int sz)
+{
+	if (sz <= PAGE_SIZE)
+		kfree(n);
+	else if (hashdist)
+		vfree(n);
+	else
+		free_pages((unsigned long)n, get_order(sz));
+}
+
+static void xfrm_dst_hash_transfer(struct hlist_head *list,
+				   struct hlist_head *ndsttable,
+				   unsigned int nhashmask)
+{
+	struct hlist_node *entry, *tmp;
+	struct xfrm_policy *pol;
+
+	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
+		unsigned int h;
+
+		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
+				pol->family, nhashmask);
+		hlist_add_head(&pol->bydst, ndsttable+h);
+	}
+}
+
+static void xfrm_idx_hash_transfer(struct hlist_head *list,
+				   struct hlist_head *nidxtable,
+				   unsigned int nhashmask)
+{
+	struct hlist_node *entry, *tmp;
+	struct xfrm_policy *pol;
+
+	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
+		unsigned int h;
+
+		h = __idx_hash(pol->index, nhashmask);
+		hlist_add_head(&pol->byidx, nidxtable+h);
+	}
+}
+
+static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
+{
+	return ((old_hmask + 1) << 1) - 1;
+}
+
+static void xfrm_bydst_resize(int dir)
+{
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
+	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
+	struct hlist_head *odst = xfrm_policy_bydst[dir].table;
+	struct hlist_head *ndst = xfrm_policy_hash_alloc(nsize);
+	int i;
+
+	if (!ndst)
+		return;
+
+	write_lock_bh(&xfrm_policy_lock);
+
+	for (i = hmask; i >= 0; i--)
+		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
+
+	xfrm_policy_bydst[dir].table = ndst;
+	xfrm_policy_bydst[dir].hmask = nhashmask;
+
+	write_unlock_bh(&xfrm_policy_lock);
+
+	xfrm_policy_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
+}
+
+static void xfrm_byidx_resize(int total)
+{
+	unsigned int hmask = xfrm_idx_hmask;
+	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
+	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
+	struct hlist_head *oidx = xfrm_policy_byidx;
+	struct hlist_head *nidx = xfrm_policy_hash_alloc(nsize);
+	int i;
+
+	if (!nidx)
+		return;
+
+	write_lock_bh(&xfrm_policy_lock);
+
+	for (i = hmask; i >= 0; i--)
+		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
+
+	xfrm_policy_byidx = nidx;
+	xfrm_idx_hmask = nhashmask;
+
+	write_unlock_bh(&xfrm_policy_lock);
+
+	xfrm_policy_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
+}
+
+static inline int xfrm_bydst_should_resize(int dir, int *total)
+{
+	unsigned int cnt = xfrm_policy_count[dir];
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+
+	if (total)
+		*total += cnt;
+
+	if ((hmask + 1) < xfrm_policy_hashmax &&
+	    cnt > hmask)
+		return 1;
+
+	return 0;
+}
+
+static inline int xfrm_byidx_should_resize(int total)
+{
+	unsigned int hmask = xfrm_idx_hmask;
+
+	if ((hmask + 1) < xfrm_policy_hashmax &&
+	    total > hmask)
+		return 1;
+
+	return 0;
+}
+
+static DEFINE_MUTEX(hash_resize_mutex);
+
+static void xfrm_hash_resize(void *__unused)
+{
+	int dir, total;
+
+	mutex_lock(&hash_resize_mutex);
+
+	total = 0;
+	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		if (xfrm_bydst_should_resize(dir, &total))
+			xfrm_bydst_resize(dir);
+	}
+	if (xfrm_byidx_should_resize(total))
+		xfrm_byidx_resize(total);
+
+	mutex_unlock(&hash_resize_mutex);
+}
+
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
+
 /* Generate new index... KAME seems to generate them ordered by cost
  * of an absolute inpredictability of ordering of rules. This will not pass. */
 static u32 xfrm_gen_index(u8 type, int dir)
 {
-	u32 idx;
-	struct xfrm_policy *p;
 	static u32 idx_generator;
 
 	for (;;) {
+		struct hlist_node *entry;
+		struct hlist_head *list;
+		struct xfrm_policy *p;
+		u32 idx;
+		int found;
+
 		idx = (idx_generator | dir);
 		idx_generator += 8;
 		if (idx == 0)
 			idx = 8;
-		for (p = XFRM_POLICY_LISTHEAD(type, dir); p; p = p->next) {
-			if (p->index == idx)
+		list = xfrm_policy_byidx + idx_hash(idx);
+		found = 0;
+		hlist_for_each_entry(p, entry, list, byidx) {
+			if (p->index == idx) {
+				found = 1;
 				break;
+			}
 		}
-		if (!p)
+		if (!found)
 			return idx;
 	}
 }
 
+static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
+{
+	u32 *p1 = (u32 *) s1;
+	u32 *p2 = (u32 *) s2;
+	int len = sizeof(struct xfrm_selector) / sizeof(u32);
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (p1[i] != p2[i])
+			return 1;
+	}
+
+	return 0;
+}
+
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 {
-	struct xfrm_policy *pol, **p;
-	struct xfrm_policy *delpol = NULL;
-	struct xfrm_policy **newpos = NULL;
+	struct xfrm_policy *pol;
+	struct xfrm_policy *delpol;
+	struct hlist_head *chain;
+	struct hlist_node *entry, *newpos, *last;
 	struct dst_entry *gc_list;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = XFRM_POLICY_LISTHEADP(policy->type, dir); (pol=*p)!=NULL;) {
-		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
+	chain = policy_hash_bysel(&policy->selector, policy->family, dir);
+	delpol = NULL;
+	newpos = NULL;
+	last = NULL;
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (!delpol &&
+		    pol->type == policy->type &&
+		    !selector_cmp(&pol->selector, &policy->selector) &&
 		    xfrm_sec_ctx_match(pol->security, policy->security)) {
 			if (excl) {
 				write_unlock_bh(&xfrm_policy_lock);
 				return -EEXIST;
 			}
-			*p = pol->next;
 			delpol = pol;
 			if (policy->priority > pol->priority)
 				continue;
 		} else if (policy->priority >= pol->priority) {
-			p = &pol->next;
+			last = &pol->bydst;
 			continue;
 		}
 		if (!newpos)
-			newpos = p;
+			newpos = &pol->bydst;
 		if (delpol)
 			break;
-		p = &pol->next;
+		last = &pol->bydst;
 	}
+	if (!newpos)
+		newpos = last;
 	if (newpos)
-		p = newpos;
+		hlist_add_after(newpos, &policy->bydst);
+	else
+		hlist_add_head(&policy->bydst, chain);
 	xfrm_pol_hold(policy);
-	policy->next = *p;
-	*p = policy;
+	xfrm_policy_count[dir]++;
 	atomic_inc(&flow_cache_genid);
+	if (delpol) {
+		hlist_del(&delpol->bydst);
+		hlist_del(&delpol->byidx);
+		xfrm_policy_count[dir]--;
+	}
 	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
+	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
 	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
@@ -479,10 +748,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 
 	if (delpol)
 		xfrm_policy_kill(delpol);
+	else if (xfrm_bydst_should_resize(dir, NULL))
+		schedule_work(&xfrm_hash_work);
 
 	read_lock_bh(&xfrm_policy_lock);
 	gc_list = NULL;
-	for (policy = policy->next; policy; policy = policy->next) {
+	entry = &policy->bydst;
+	hlist_for_each_entry_continue(policy, entry, bydst) {
 		struct dst_entry *dst;
 
 		write_lock(&policy->lock);
@@ -515,67 +787,112 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
 					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete)
 {
-	struct xfrm_policy *pol, **p;
+	struct xfrm_policy *pol, *ret;
+	struct hlist_head *chain;
+	struct hlist_node *entry;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) {
-		if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
-		    (xfrm_sec_ctx_match(ctx, pol->security))) {
+	chain = policy_hash_bysel(sel, sel->family, dir);
+	ret = NULL;
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (pol->type == type &&
+		    !selector_cmp(sel, &pol->selector) &&
+		    xfrm_sec_ctx_match(ctx, pol->security)) {
 			xfrm_pol_hold(pol);
-			if (delete)
-				*p = pol->next;
+			if (delete) {
+				hlist_del(&pol->bydst);
+				hlist_del(&pol->byidx);
+				xfrm_policy_count[dir]--;
+			}
+			ret = pol;
 			break;
 		}
 	}
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (pol && delete) {
+	if (ret && delete) {
 		atomic_inc(&flow_cache_genid);
-		xfrm_policy_kill(pol);
+		xfrm_policy_kill(ret);
 	}
-	return pol;
+	return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete)
 {
-	struct xfrm_policy *pol, **p;
+	struct xfrm_policy *pol, *ret;
+	struct hlist_head *chain;
+	struct hlist_node *entry;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) {
-		if (pol->index == id) {
+	chain = xfrm_policy_byidx + idx_hash(id);
+	ret = NULL;
+	hlist_for_each_entry(pol, entry, chain, byidx) {
+		if (pol->type == type && pol->index == id) {
 			xfrm_pol_hold(pol);
-			if (delete)
-				*p = pol->next;
+			if (delete) {
+				hlist_del(&pol->bydst);
+				hlist_del(&pol->byidx);
+				xfrm_policy_count[dir]--;
+			}
+			ret = pol;
 			break;
 		}
 	}
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (pol && delete) {
+	if (ret && delete) {
 		atomic_inc(&flow_cache_genid);
-		xfrm_policy_kill(pol);
+		xfrm_policy_kill(ret);
 	}
-	return pol;
+	return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_byid);
 
 void xfrm_policy_flush(u8 type)
 {
-	struct xfrm_policy *xp;
-	struct xfrm_policy **p_list = XFRM_POLICY_LISTS(type);
 	int dir;
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
-		while ((xp = p_list[dir]) != NULL) {
-			p_list[dir] = xp->next;
+		struct xfrm_policy *pol;
+		struct hlist_node *entry;
+		int i;
+
+	again1:
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst) {
+			if (pol->type != type)
+				continue;
+			hlist_del(&pol->bydst);
+			hlist_del(&pol->byidx);
 			write_unlock_bh(&xfrm_policy_lock);
 
-			xfrm_policy_kill(xp);
+			xfrm_policy_kill(pol);
 
 			write_lock_bh(&xfrm_policy_lock);
+			goto again1;
+		}
+
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+	again2:
+			hlist_for_each_entry(pol, entry,
+					     xfrm_policy_bydst[dir].table + i,
+					     bydst) {
+				if (pol->type != type)
+					continue;
+				hlist_del(&pol->bydst);
+				hlist_del(&pol->byidx);
+				write_unlock_bh(&xfrm_policy_lock);
+
+				xfrm_policy_kill(pol);
+
+				write_lock_bh(&xfrm_policy_lock);
+				goto again2;
+			}
 		}
+
+		xfrm_policy_count[dir] = 0;
 	}
 	atomic_inc(&flow_cache_genid);
 	write_unlock_bh(&xfrm_policy_lock);
@@ -585,15 +902,27 @@ EXPORT_SYMBOL(xfrm_policy_flush);
 int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
 		     void *data)
 {
-	struct xfrm_policy *xp;
-	int dir;
-	int count = 0;
-	int error = 0;
+	struct xfrm_policy *pol;
+	struct hlist_node *entry;
+	int dir, count, error;
 
 	read_lock_bh(&xfrm_policy_lock);
+	count = 0;
 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next)
-			count++;
+		struct hlist_head *table = xfrm_policy_bydst[dir].table;
+		int i;
+
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst) {
+			if (pol->type == type)
+				count++;
+		}
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+			hlist_for_each_entry(pol, entry, table + i, bydst) {
+				if (pol->type == type)
+					count++;
+			}
+		}
 	}
 
 	if (count == 0) {
@@ -602,13 +931,28 @@ int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*)
 	}
 
 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) {
-			error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
+		struct hlist_head *table = xfrm_policy_bydst[dir].table;
+		int i;
+
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst) {
+			if (pol->type != type)
+				continue;
+			error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
 			if (error)
 				goto out;
 		}
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+			hlist_for_each_entry(pol, entry, table + i, bydst) {
+				if (pol->type != type)
+					continue;
+				error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
+				if (error)
+					goto out;
+			}
+		}
 	}
-
+	error = 0;
 out:
 	read_unlock_bh(&xfrm_policy_lock);
 	return error;
@@ -617,31 +961,61 @@ EXPORT_SYMBOL(xfrm_policy_walk);
 
 /* Find policy to apply to this flow. */
 
-static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
-						     u16 family, u8 dir)
+static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
+			     u8 type, u16 family, int dir)
 {
-	struct xfrm_policy *pol;
+	struct xfrm_selector *sel = &pol->selector;
+	int match;
 
-	read_lock_bh(&xfrm_policy_lock);
-	for (pol = XFRM_POLICY_LISTHEAD(type, dir); pol; pol = pol->next) {
-		struct xfrm_selector *sel = &pol->selector;
-		int match;
+	if (pol->family != family ||
+	    pol->type != type)
+		return 0;
 
-		if (pol->family != family)
-			continue;
+	match = xfrm_selector_match(sel, fl, family);
+	if (match) {
+		if (!security_xfrm_policy_lookup(pol, fl->secid, dir))
+			return 1;
+	}
+
+	return 0;
+}
 
-		match = xfrm_selector_match(sel, fl, family);
+static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
+						     u16 family, u8 dir)
+{
+	struct xfrm_policy *pol, *ret;
+	xfrm_address_t *daddr, *saddr;
+	struct hlist_node *entry;
+	struct hlist_head *chain;
 
-		if (match) {
- 			if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) {
+	daddr = xfrm_flowi_daddr(fl, family);
+	saddr = xfrm_flowi_saddr(fl, family);
+	if (unlikely(!daddr || !saddr))
+		return NULL;
+
+	read_lock_bh(&xfrm_policy_lock);
+	chain = policy_hash_direct(daddr, saddr, family, dir);
+	ret = NULL;
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (xfrm_policy_match(pol, fl, type, family, dir)) {
+			xfrm_pol_hold(pol);
+			ret = pol;
+			break;
+		}
+	}
+	if (!ret) {
+		chain = &xfrm_policy_inexact[dir];
+		hlist_for_each_entry(pol, entry, chain, bydst) {
+			if (xfrm_policy_match(pol, fl, type, family, dir)) {
 				xfrm_pol_hold(pol);
+				ret = pol;
 				break;
 			}
 		}
 	}
 	read_unlock_bh(&xfrm_policy_lock);
 
-	return pol;
+	return ret;
 }
 
 static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
@@ -657,7 +1031,7 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 
 #ifdef CONFIG_XFRM_SUB_POLICY
- end:
+end:
 #endif
 	if ((*objp = (void *) pol) != NULL)
 		*obj_refp = &pol->refcnt;
@@ -704,26 +1078,29 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
 
 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 {
-	struct xfrm_policy **p_list = XFRM_POLICY_LISTS(pol->type);
+	struct hlist_head *chain = policy_hash_bysel(&pol->selector,
+						     pol->family, dir);
 
-	pol->next = p_list[dir];
-	p_list[dir] = pol;
+	hlist_add_head(&pol->bydst, chain);
+	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
+	xfrm_policy_count[dir]++;
 	xfrm_pol_hold(pol);
+
+	if (xfrm_bydst_should_resize(dir, NULL))
+		schedule_work(&xfrm_hash_work);
 }
 
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 						int dir)
 {
-	struct xfrm_policy **polp;
+	if (hlist_unhashed(&pol->bydst))
+		return NULL;
 
-	for (polp = XFRM_POLICY_LISTHEADP(pol->type, dir);
-	     *polp != NULL; polp = &(*polp)->next) {
-		if (*polp == pol) {
-			*polp = pol->next;
-			return pol;
-		}
-	}
-	return NULL;
+	hlist_del(&pol->bydst);
+	hlist_del(&pol->byidx);
+	xfrm_policy_count[dir]--;
+
+	return pol;
 }
 
 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
@@ -968,7 +1345,8 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 
 	if (!policy) {
 		/* To accelerate a bit...  */
-		if ((dst_orig->flags & DST_NOXFRM) || xfrm_policy_lists_empty(XFRM_POLICY_OUT))
+		if ((dst_orig->flags & DST_NOXFRM) ||
+		    !xfrm_policy_count[XFRM_POLICY_OUT])
 			return 0;
 
 		policy = flow_cache_lookup(fl, dst_orig->ops->family,
@@ -1413,50 +1791,50 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
 	return dst;
 }
 
+static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
+{
+	struct dst_entry *dst, **dstp;
+
+	write_lock(&pol->lock);
+	dstp = &pol->bundles;
+	while ((dst=*dstp) != NULL) {
+		if (func(dst)) {
+			*dstp = dst->next;
+			dst->next = *gc_list_p;
+			*gc_list_p = dst;
+		} else {
+			dstp = &dst->next;
+		}
+	}
+	write_unlock(&pol->lock);
+}
+
 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
 {
-	int i;
-	struct xfrm_policy *pol;
-	struct dst_entry *dst, **dstp, *gc_list = NULL;
+	struct dst_entry *gc_list = NULL;
+	int dir;
 
 	read_lock_bh(&xfrm_policy_lock);
-	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
-#ifdef CONFIG_XFRM_SUB_POLICY
-		for (pol = xfrm_policy_list_sub[i]; pol; pol = pol->next) {
-			write_lock(&pol->lock);
-			dstp = &pol->bundles;
-			while ((dst=*dstp) != NULL) {
-				if (func(dst)) {
-					*dstp = dst->next;
-					dst->next = gc_list;
-					gc_list = dst;
-				} else {
-					dstp = &dst->next;
-				}
-			}
-			write_unlock(&pol->lock);
-		}
+	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		struct xfrm_policy *pol;
+		struct hlist_node *entry;
+		struct hlist_head *table;
+		int i;
 
-#endif
-		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
-			write_lock(&pol->lock);
-			dstp = &pol->bundles;
-			while ((dst=*dstp) != NULL) {
-				if (func(dst)) {
-					*dstp = dst->next;
-					dst->next = gc_list;
-					gc_list = dst;
-				} else {
-					dstp = &dst->next;
-				}
-			}
-			write_unlock(&pol->lock);
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst)
+			prune_one_bundle(pol, func, &gc_list);
+
+		table = xfrm_policy_bydst[dir].table;
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+			hlist_for_each_entry(pol, entry, table + i, bydst)
+				prune_one_bundle(pol, func, &gc_list);
 		}
 	}
 	read_unlock_bh(&xfrm_policy_lock);
 
 	while (gc_list) {
-		dst = gc_list;
+		struct dst_entry *dst = gc_list;
 		gc_list = dst->next;
 		dst_free(dst);
 	}
@@ -1680,6 +2058,9 @@ static struct notifier_block xfrm_dev_notifier = {
 
 static void __init xfrm_policy_init(void)
 {
+	unsigned int hmask, sz;
+	int dir;
+
 	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
 					   sizeof(struct xfrm_dst),
 					   0, SLAB_HWCACHE_ALIGN,
@@ -1687,6 +2068,26 @@ static void __init xfrm_policy_init(void)
 	if (!xfrm_dst_cache)
 		panic("XFRM: failed to allocate xfrm_dst_cache\n");
 
+	hmask = 8 - 1;
+	sz = (hmask+1) * sizeof(struct hlist_head);
+
+	xfrm_policy_byidx = xfrm_policy_hash_alloc(sz);
+	xfrm_idx_hmask = hmask;
+	if (!xfrm_policy_byidx)
+		panic("XFRM: failed to allocate byidx hash\n");
+
+	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		struct xfrm_policy_hash *htab;
+
+		INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
+
+		htab = &xfrm_policy_bydst[dir];
+		htab->table = xfrm_policy_hash_alloc(sz);
+		htab->hmask = hmask;
+		if (!htab->table)
+			panic("XFRM: failed to allocate bydst hash\n");
+	}
+
 	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
 	register_netdevice_notifier(&xfrm_dev_notifier);
 }
-- 
GitLab


From 44e36b42a8378be1dcf7e6f8a1cb2710a8903387 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 04:50:50 -0700
Subject: [PATCH 0598/1063] [XFRM]: Extract common hashing code into
 xfrm_hash.[ch]

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/Makefile      |   3 +-
 net/xfrm/xfrm_hash.c   |  41 +++++++++++++
 net/xfrm/xfrm_hash.h   | 128 +++++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_policy.c |  95 +++---------------------------
 net/xfrm/xfrm_state.c  | 128 ++++++-----------------------------------
 5 files changed, 195 insertions(+), 200 deletions(-)
 create mode 100644 net/xfrm/xfrm_hash.c
 create mode 100644 net/xfrm/xfrm_hash.h

diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 693aac1aa8335..de3c1a625a46f 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -2,6 +2,7 @@
 # Makefile for the XFRM subsystem.
 #
 
-obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o
+obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
+		      xfrm_input.o xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
new file mode 100644
index 0000000000000..37643bb8768a8
--- /dev/null
+++ b/net/xfrm/xfrm_hash.c
@@ -0,0 +1,41 @@
+/* xfrm_hash.c: Common hash table code.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/xfrm.h>
+
+#include "xfrm_hash.h"
+
+struct hlist_head *xfrm_hash_alloc(unsigned int sz)
+{
+	struct hlist_head *n;
+
+	if (sz <= PAGE_SIZE)
+		n = kmalloc(sz, GFP_KERNEL);
+	else if (hashdist)
+		n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
+	else
+		n = (struct hlist_head *)
+			__get_free_pages(GFP_KERNEL, get_order(sz));
+
+	if (n)
+		memset(n, 0, sz);
+
+	return n;
+}
+
+void xfrm_hash_free(struct hlist_head *n, unsigned int sz)
+{
+	if (sz <= PAGE_SIZE)
+		kfree(n);
+	else if (hashdist)
+		vfree(n);
+	else
+		free_pages((unsigned long)n, get_order(sz));
+}
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
new file mode 100644
index 0000000000000..d3abb0b7dc621
--- /dev/null
+++ b/net/xfrm/xfrm_hash.h
@@ -0,0 +1,128 @@
+#ifndef _XFRM_HASH_H
+#define _XFRM_HASH_H
+
+#include <linux/xfrm.h>
+#include <linux/socket.h>
+
+static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
+{
+	return ntohl(addr->a4);
+}
+
+static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
+{
+	return ntohl(addr->a6[2] ^ addr->a6[3]);
+}
+
+static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	return ntohl(daddr->a4 ^ saddr->a4);
+}
+
+static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
+		     saddr->a6[2] ^ saddr->a6[3]);
+}
+
+static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr,
+					   u32 reqid, unsigned short family,
+					   unsigned int hmask)
+{
+	unsigned int h = family ^ reqid;
+	switch (family) {
+	case AF_INET:
+		h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
+		break;
+	case AF_INET6:
+		h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
+		break;
+	}
+	return (h ^ (h >> 16)) & hmask;
+}
+
+static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr,
+				       unsigned short family,
+				       unsigned int hmask)
+{
+	unsigned int h = family;
+	switch (family) {
+	case AF_INET:
+		h ^= __xfrm4_addr_hash(saddr);
+		break;
+	case AF_INET6:
+		h ^= __xfrm6_addr_hash(saddr);
+		break;
+	};
+	return (h ^ (h >> 16)) & hmask;
+}
+
+static inline unsigned int
+__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family,
+		unsigned int hmask)
+{
+	unsigned int h = spi ^ proto;
+	switch (family) {
+	case AF_INET:
+		h ^= __xfrm4_addr_hash(daddr);
+		break;
+	case AF_INET6:
+		h ^= __xfrm6_addr_hash(daddr);
+		break;
+	}
+	return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
+}
+
+static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
+{
+	return (index ^ (index >> 8)) & hmask;
+}
+
+static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask)
+{
+	xfrm_address_t *daddr = &sel->daddr;
+	xfrm_address_t *saddr = &sel->saddr;
+	unsigned int h = 0;
+
+	switch (family) {
+	case AF_INET:
+		if (sel->prefixlen_d != 32 ||
+		    sel->prefixlen_s != 32)
+			return hmask + 1;
+
+		h = __xfrm4_daddr_saddr_hash(daddr, saddr);
+		break;
+
+	case AF_INET6:
+		if (sel->prefixlen_d != 128 ||
+		    sel->prefixlen_s != 128)
+			return hmask + 1;
+
+		h = __xfrm6_daddr_saddr_hash(daddr, saddr);
+		break;
+	};
+	h ^= (h >> 16);
+	return h & hmask;
+}
+
+static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask)
+{
+	unsigned int h = 0;
+
+	switch (family) {
+	case AF_INET:
+		h = __xfrm4_daddr_saddr_hash(daddr, saddr);
+		break;
+
+	case AF_INET6:
+		h = __xfrm6_daddr_saddr_hash(daddr, saddr);
+		break;
+	};
+	h ^= (h >> 16);
+	return h & hmask;
+}
+
+extern struct hlist_head *xfrm_hash_alloc(unsigned int sz);
+extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz);
+
+#endif /* _XFRM_HASH_H */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 087a5443b0514..b446ca31fecc8 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -22,12 +22,12 @@
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
-#include <linux/vmalloc.h>
 #include <linux/cache.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 
+#include "xfrm_hash.h"
+
 DEFINE_MUTEX(xfrm_cfg_mutex);
 EXPORT_SYMBOL(xfrm_cfg_mutex);
 
@@ -409,62 +409,11 @@ static struct hlist_head *xfrm_policy_byidx __read_mostly;
 static unsigned int xfrm_idx_hmask __read_mostly;
 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
 
-static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
-{
-	return (index ^ (index >> 8)) & hmask;
-}
-
 static inline unsigned int idx_hash(u32 index)
 {
 	return __idx_hash(index, xfrm_idx_hmask);
 }
 
-static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask)
-{
-	xfrm_address_t *daddr = &sel->daddr;
-	xfrm_address_t *saddr = &sel->saddr;
-	unsigned int h = 0;
-
-	switch (family) {
-	case AF_INET:
-		if (sel->prefixlen_d != 32 ||
-		    sel->prefixlen_s != 32)
-			return hmask + 1;
-
-		h = ntohl(daddr->a4 ^ saddr->a4);
-		break;
-
-	case AF_INET6:
-		if (sel->prefixlen_d != 128 ||
-		    sel->prefixlen_s != 128)
-			return hmask + 1;
-
-		h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^
-			  saddr->a6[2] ^ saddr->a6[3]);
-		break;
-	};
-	h ^= (h >> 16);
-	return h & hmask;
-}
-
-static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask)
-{
-	unsigned int h = 0;
-
-	switch (family) {
-	case AF_INET:
-		h = ntohl(daddr->a4 ^ saddr->a4);
-		break;
-
-	case AF_INET6:
-		h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^
-			  saddr->a6[2] ^ saddr->a6[3]);
-		break;
-	};
-	h ^= (h >> 16);
-	return h & hmask;
-}
-
 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
 {
 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
@@ -483,34 +432,6 @@ static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address
 	return xfrm_policy_bydst[dir].table + hash;
 }
 
-static struct hlist_head *xfrm_policy_hash_alloc(unsigned int sz)
-{
-	struct hlist_head *n;
-
-	if (sz <= PAGE_SIZE)
-		n = kmalloc(sz, GFP_KERNEL);
-	else if (hashdist)
-		n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
-	else
-		n = (struct hlist_head *)
-			__get_free_pages(GFP_KERNEL, get_order(sz));
-
-	if (n)
-		memset(n, 0, sz);
-
-	return n;
-}
-
-static void xfrm_policy_hash_free(struct hlist_head *n, unsigned int sz)
-{
-	if (sz <= PAGE_SIZE)
-		kfree(n);
-	else if (hashdist)
-		vfree(n);
-	else
-		free_pages((unsigned long)n, get_order(sz));
-}
-
 static void xfrm_dst_hash_transfer(struct hlist_head *list,
 				   struct hlist_head *ndsttable,
 				   unsigned int nhashmask)
@@ -553,7 +474,7 @@ static void xfrm_bydst_resize(int dir)
 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
 	struct hlist_head *odst = xfrm_policy_bydst[dir].table;
-	struct hlist_head *ndst = xfrm_policy_hash_alloc(nsize);
+	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
 	int i;
 
 	if (!ndst)
@@ -569,7 +490,7 @@ static void xfrm_bydst_resize(int dir)
 
 	write_unlock_bh(&xfrm_policy_lock);
 
-	xfrm_policy_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
+	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
 }
 
 static void xfrm_byidx_resize(int total)
@@ -578,7 +499,7 @@ static void xfrm_byidx_resize(int total)
 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
 	struct hlist_head *oidx = xfrm_policy_byidx;
-	struct hlist_head *nidx = xfrm_policy_hash_alloc(nsize);
+	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
 	int i;
 
 	if (!nidx)
@@ -594,7 +515,7 @@ static void xfrm_byidx_resize(int total)
 
 	write_unlock_bh(&xfrm_policy_lock);
 
-	xfrm_policy_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
+	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
 }
 
 static inline int xfrm_bydst_should_resize(int dir, int *total)
@@ -2071,7 +1992,7 @@ static void __init xfrm_policy_init(void)
 	hmask = 8 - 1;
 	sz = (hmask+1) * sizeof(struct hlist_head);
 
-	xfrm_policy_byidx = xfrm_policy_hash_alloc(sz);
+	xfrm_policy_byidx = xfrm_hash_alloc(sz);
 	xfrm_idx_hmask = hmask;
 	if (!xfrm_policy_byidx)
 		panic("XFRM: failed to allocate byidx hash\n");
@@ -2082,7 +2003,7 @@ static void __init xfrm_policy_init(void)
 		INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
 
 		htab = &xfrm_policy_bydst[dir];
-		htab->table = xfrm_policy_hash_alloc(sz);
+		htab->table = xfrm_hash_alloc(sz);
 		htab->hmask = hmask;
 		if (!htab->table)
 			panic("XFRM: failed to allocate bydst hash\n");
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 37213f9f6a02c..4341795eb2441 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -18,11 +18,11 @@
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
-#include <linux/vmalloc.h>
 #include <linux/cache.h>
 #include <asm/uaccess.h>
 
+#include "xfrm_hash.h"
+
 struct sock *xfrm_nl;
 EXPORT_SYMBOL(xfrm_nl);
 
@@ -55,44 +55,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
 static unsigned int xfrm_state_genid;
 
-static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
-{
-	return ntohl(addr->a4);
-}
-
-static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
-{
-	return ntohl(addr->a6[2]^addr->a6[3]);
-}
-
-static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
-{
-	return ntohl(daddr->a4 ^ saddr->a4);
-}
-
-static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
-{
-	return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
-		     saddr->a6[2] ^ saddr->a6[3]);
-}
-
-static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr,
-					   xfrm_address_t *saddr,
-					   u32 reqid, unsigned short family,
-					   unsigned int hmask)
-{
-	unsigned int h = family ^ reqid;
-	switch (family) {
-	case AF_INET:
-		h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
-		break;
-	case AF_INET6:
-		h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
-		break;
-	};
-	return (h ^ (h >> 16)) & hmask;
-}
-
 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
 					 xfrm_address_t *saddr,
 					 u32 reqid,
@@ -101,76 +63,18 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
 	return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
 }
 
-static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
-				       unsigned int hmask)
-{
-	unsigned int h = family;
-	switch (family) {
-	case AF_INET:
-		h ^= __xfrm4_addr_hash(addr);
-		break;
-	case AF_INET6:
-		h ^= __xfrm6_addr_hash(addr);
-		break;
-	};
-	return (h ^ (h >> 16)) & hmask;
-}
-
-static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
+static inline unsigned int xfrm_src_hash(xfrm_address_t *addr,
+					 unsigned short family)
 {
 	return __xfrm_src_hash(addr, family, xfrm_state_hmask);
 }
 
-static inline unsigned int
-__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto,
-		unsigned short family, unsigned int hmask)
-{
-	unsigned int h = spi ^ proto;
-	switch (family) {
-	case AF_INET:
-		h ^= __xfrm4_addr_hash(daddr);
-		break;
-	case AF_INET6:
-		h ^= __xfrm6_addr_hash(daddr);
-		break;
-	}
-	return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
-}
-
 static inline unsigned int
 xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
 {
 	return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
 }
 
-static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
-{
-	struct hlist_head *n;
-
-	if (sz <= PAGE_SIZE)
-		n = kmalloc(sz, GFP_KERNEL);
-	else if (hashdist)
-		n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
-	else
-		n = (struct hlist_head *)
-			__get_free_pages(GFP_KERNEL, get_order(sz));
-
-	if (n)
-		memset(n, 0, sz);
-
-	return n;
-}
-
-static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
-{
-	if (sz <= PAGE_SIZE)
-		kfree(n);
-	else if (hashdist)
-		vfree(n);
-	else
-		free_pages((unsigned long)n, get_order(sz));
-}
-
 static void xfrm_hash_transfer(struct hlist_head *list,
 			       struct hlist_head *ndsttable,
 			       struct hlist_head *nsrctable,
@@ -216,18 +120,18 @@ static void xfrm_hash_resize(void *__unused)
 	mutex_lock(&hash_resize_mutex);
 
 	nsize = xfrm_hash_new_size();
-	ndst = xfrm_state_hash_alloc(nsize);
+	ndst = xfrm_hash_alloc(nsize);
 	if (!ndst)
 		goto out_unlock;
-	nsrc = xfrm_state_hash_alloc(nsize);
+	nsrc = xfrm_hash_alloc(nsize);
 	if (!nsrc) {
-		xfrm_state_hash_free(ndst, nsize);
+		xfrm_hash_free(ndst, nsize);
 		goto out_unlock;
 	}
-	nspi = xfrm_state_hash_alloc(nsize);
+	nspi = xfrm_hash_alloc(nsize);
 	if (!nspi) {
-		xfrm_state_hash_free(ndst, nsize);
-		xfrm_state_hash_free(nsrc, nsize);
+		xfrm_hash_free(ndst, nsize);
+		xfrm_hash_free(nsrc, nsize);
 		goto out_unlock;
 	}
 
@@ -251,9 +155,9 @@ static void xfrm_hash_resize(void *__unused)
 	spin_unlock_bh(&xfrm_state_lock);
 
 	osize = (ohashmask + 1) * sizeof(struct hlist_head);
-	xfrm_state_hash_free(odst, osize);
-	xfrm_state_hash_free(osrc, osize);
-	xfrm_state_hash_free(ospi, osize);
+	xfrm_hash_free(odst, osize);
+	xfrm_hash_free(osrc, osize);
+	xfrm_hash_free(ospi, osize);
 
 out_unlock:
 	mutex_unlock(&hash_resize_mutex);
@@ -1643,9 +1547,9 @@ void __init xfrm_state_init(void)
 
 	sz = sizeof(struct hlist_head) * 8;
 
-	xfrm_state_bydst = xfrm_state_hash_alloc(sz);
-	xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
-	xfrm_state_byspi = xfrm_state_hash_alloc(sz);
+	xfrm_state_bydst = xfrm_hash_alloc(sz);
+	xfrm_state_bysrc = xfrm_hash_alloc(sz);
+	xfrm_state_byspi = xfrm_hash_alloc(sz);
 	if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
 		panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
 	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
-- 
GitLab


From b27075735e371f481940a4920b329202334d2259 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 19 Sep 2006 11:13:24 +0300
Subject: [PATCH 0599/1063] IB/mthca: Fix lid used for sending traps

The SM LID used to send traps to is incorrectly set to port LID.  This
is a regression from 2.6.17 -- after a PortInfo MAD is received, no
traps are sent to the SM LID.  The traps go to the loopback interface
instead, and are dropped there.  The SM LID should be taken from the
sm_lid of the PortInfo response.

The bug was introduced by commit 12bbb2b7be7f5564952ebe0196623e97464b8ac5:
	IB/mthca: Add client reregister event generation

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_mad.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index d9bc030bcccc7..45e106f148071 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -119,7 +119,7 @@ static void smp_snoop(struct ib_device *ibdev,
 
 			mthca_update_rate(to_mdev(ibdev), port_num);
 			update_sm_ah(to_mdev(ibdev), port_num,
-				     be16_to_cpu(pinfo->lid),
+				     be16_to_cpu(pinfo->sm_lid),
 				     pinfo->neighbormtu_mastersmsl & 0xf);
 
 			event.device           = ibdev;
-- 
GitLab


From 9fd558f454b666aca218a990d44f9e1ffac6ed4d Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@mellanox.co.il>
Date: Fri, 22 Sep 2006 15:17:18 -0700
Subject: [PATCH 0600/1063] IB/mthca: Don't use privileged UAR for kernel
 access

Make kernel use UAR2 instead of UAR1 for hardware access: this adds
sanity checking from the hardware side, without any performance cost.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_uar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c
index 8e9219842be42..8b728486410d1 100644
--- a/drivers/infiniband/hw/mthca/mthca_uar.c
+++ b/drivers/infiniband/hw/mthca/mthca_uar.c
@@ -60,7 +60,7 @@ int mthca_init_uar_table(struct mthca_dev *dev)
 	ret = mthca_alloc_init(&dev->uar_table.alloc,
 			       dev->limits.num_uars,
 			       dev->limits.num_uars - 1,
-			       dev->limits.reserved_uars);
+			       dev->limits.reserved_uars + 1);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 9217b27b12eb5ab910d14b3376c2b6cd13d87711 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@mellanox.co.il>
Date: Thu, 3 Aug 2006 22:16:06 +0300
Subject: [PATCH 0601/1063] IB/ipoib: Fix flush/start xmit race (from code
 review)

Prevent flush task from freeing the ipoib_neigh pointer, while
ipoib_start_xmit() is accessing the ipoib_neigh through the pointer it
has loaded from the skb's hardware address.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index cf71d2a5515c1..36d76987a4811 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -336,7 +336,8 @@ void ipoib_flush_paths(struct net_device *dev)
 	struct ipoib_path *path, *tp;
 	LIST_HEAD(remove_list);
 
-	spin_lock_irq(&priv->lock);
+	spin_lock_irq(&priv->tx_lock);
+	spin_lock(&priv->lock);
 
 	list_splice(&priv->path_list, &remove_list);
 	INIT_LIST_HEAD(&priv->path_list);
@@ -347,12 +348,15 @@ void ipoib_flush_paths(struct net_device *dev)
 	list_for_each_entry_safe(path, tp, &remove_list, list) {
 		if (path->query)
 			ib_sa_cancel_query(path->query_id, path->query);
-		spin_unlock_irq(&priv->lock);
+		spin_unlock(&priv->lock);
+		spin_unlock_irq(&priv->tx_lock);
 		wait_for_completion(&path->done);
 		path_free(dev, path);
-		spin_lock_irq(&priv->lock);
+		spin_lock_irq(&priv->tx_lock);
+		spin_lock(&priv->lock);
 	}
-	spin_unlock_irq(&priv->lock);
+	spin_unlock(&priv->lock);
+	spin_unlock_irq(&priv->tx_lock);
 }
 
 static void path_rec_completion(int status,
-- 
GitLab


From ab10867621a96230757eb4a2a19d560b85f62ce9 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Fri, 22 Sep 2006 15:17:19 -0700
Subject: [PATCH 0602/1063] IB/uverbs: Use idr_read_cq() where appropriate

There were two functions that open-coded idr_read_cq() in terms of
idr_read_uobj() rather than using the helper.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 30923eb68ec74..b81307b625a6d 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -894,7 +894,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_poll_cq       cmd;
 	struct ib_uverbs_poll_cq_resp *resp;
-	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;
 	struct ib_wc                  *wc;
 	int                            ret = 0;
@@ -915,16 +914,15 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 		goto out_wc;
 	}
 
-	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-	if (!uobj) {
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+	if (!cq) {
 		ret = -EINVAL;
 		goto out;
 	}
-	cq = uobj->object;
 
 	resp->count = ib_poll_cq(cq, cmd.ne, wc);
 
-	put_uobj_read(uobj);
+	put_cq_read(cq);
 
 	for (i = 0; i < resp->count; i++) {
 		resp->wc[i].wr_id 	   = wc[i].wr_id;
@@ -959,21 +957,19 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
 				int out_len)
 {
 	struct ib_uverbs_req_notify_cq cmd;
-	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-	if (!uobj)
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+	if (!cq)
 		return -EINVAL;
-	cq = uobj->object;
 
 	ib_req_notify_cq(cq, cmd.solicited_only ?
 			 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
 
-	put_uobj_read(uobj);
+	put_cq_read(cq);
 
 	return in_len;
 }
-- 
GitLab


From 1ccf6aa19aabc11587d6d7818a5729adfed0e3de Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Fri, 22 Sep 2006 15:17:20 -0700
Subject: [PATCH 0603/1063] IB/uverbs: Fix lockdep warning when QP is created
 with 2 CQs

Lockdep warns when userspace creates a QP that uses different CQs for
send completions and receive completions, because both CQs are locked
and their mutexes belong to the same lock class.  However, we know
that the mutexes are distinct and the nesting is safe (there is no
possibility of AB-BA deadlock because the mutexes are locked with
down_read()), so annotate the situation with SINGLE_DEPTH_NESTING to
get rid of the lockdep warning.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 34 ++++++++++++++++------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index b81307b625a6d..8b6df7cec0bf9 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -155,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
 }
 
 static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
-					struct ib_ucontext *context)
+					struct ib_ucontext *context, int nested)
 {
 	struct ib_uobject *uobj;
 
@@ -163,7 +163,10 @@ static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
 	if (!uobj)
 		return NULL;
 
-	down_read(&uobj->mutex);
+	if (nested)
+		down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
+	else
+		down_read(&uobj->mutex);
 	if (!uobj->live) {
 		put_uobj_read(uobj);
 		return NULL;
@@ -190,17 +193,18 @@ static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
 	return uobj;
 }
 
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context)
+static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
+			  int nested)
 {
 	struct ib_uobject *uobj;
 
-	uobj = idr_read_uobj(idr, id, context);
+	uobj = idr_read_uobj(idr, id, context, nested);
 	return uobj ? uobj->object : NULL;
 }
 
 static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context);
+	return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
 }
 
 static void put_pd_read(struct ib_pd *pd)
@@ -208,9 +212,9 @@ static void put_pd_read(struct ib_pd *pd)
 	put_uobj_read(pd->uobject);
 }
 
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context)
+static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
 {
-	return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context);
+	return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
 }
 
 static void put_cq_read(struct ib_cq *cq)
@@ -220,7 +224,7 @@ static void put_cq_read(struct ib_cq *cq)
 
 static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context);
+	return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
 }
 
 static void put_ah_read(struct ib_ah *ah)
@@ -230,7 +234,7 @@ static void put_ah_read(struct ib_ah *ah)
 
 static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context);
+	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
 }
 
 static void put_qp_read(struct ib_qp *qp)
@@ -240,7 +244,7 @@ static void put_qp_read(struct ib_qp *qp)
 
 static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context);
+	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
 }
 
 static void put_srq_read(struct ib_srq *srq)
@@ -867,7 +871,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
-	cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
 	if (!cq)
 		return -EINVAL;
 
@@ -914,7 +918,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 		goto out_wc;
 	}
 
-	cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
 	if (!cq) {
 		ret = -EINVAL;
 		goto out;
@@ -962,7 +966,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
 	if (!cq)
 		return -EINVAL;
 
@@ -1060,9 +1064,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 
 	srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
 	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
-	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext);
+	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
 	rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
-		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext);
+		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
 
 	if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
 		ret = -EINVAL;
-- 
GitLab


From 2a214182d23594915d6ae517b6368ba2eae055d2 Mon Sep 17 00:00:00 2001
From: James Lentini <jlentini@netapp.com>
Date: Fri, 22 Sep 2006 15:17:20 -0700
Subject: [PATCH 0604/1063] IB/mthca: Include the header we really want

Signed-off-by: James Lentini <jlentini@netapp.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index deabc14b4ea4c..99a94d7109350 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -34,7 +34,7 @@
  * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <linux/sched.h>
+#include <linux/completion.h>
 #include <linux/pci.h>
 #include <linux/errno.h>
 #include <asm/io.h>
-- 
GitLab


From ded7f1a16d50527359be02f8b04f9ba56bc923e6 Mon Sep 17 00:00:00 2001
From: Ishai Rabinovitz <ishai@mellanox.co.il>
Date: Tue, 15 Aug 2006 17:34:52 +0300
Subject: [PATCH 0605/1063] IB/srp: Add port/device attributes

Add local_ib_device and local_ib_port attributes to srp scsi_host.
These are needed when we want to connect to the same target through
multiple distinct ports.

Signed-off-by: Ishai Rabinovitz <ishai@mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/srp/ib_srp.c | 30 +++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index fd8344cdc0dbc..249a98c06aeb0 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1449,12 +1449,28 @@ static ssize_t show_zero_req_lim(struct class_device *cdev, char *buf)
 	return sprintf(buf, "%d\n", target->zero_req_lim);
 }
 
-static CLASS_DEVICE_ATTR(id_ext,	S_IRUGO, show_id_ext,		NULL);
-static CLASS_DEVICE_ATTR(ioc_guid,	S_IRUGO, show_ioc_guid,		NULL);
-static CLASS_DEVICE_ATTR(service_id,	S_IRUGO, show_service_id,	NULL);
-static CLASS_DEVICE_ATTR(pkey,		S_IRUGO, show_pkey,		NULL);
-static CLASS_DEVICE_ATTR(dgid,		S_IRUGO, show_dgid,		NULL);
-static CLASS_DEVICE_ATTR(zero_req_lim,	S_IRUGO, show_zero_req_lim,	NULL);
+static ssize_t show_local_ib_port(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	return sprintf(buf, "%d\n", target->srp_host->port);
+}
+
+static ssize_t show_local_ib_device(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	return sprintf(buf, "%s\n", target->srp_host->dev->dev->name);
+}
+
+static CLASS_DEVICE_ATTR(id_ext,	  S_IRUGO, show_id_ext,		 NULL);
+static CLASS_DEVICE_ATTR(ioc_guid,	  S_IRUGO, show_ioc_guid,	 NULL);
+static CLASS_DEVICE_ATTR(service_id,	  S_IRUGO, show_service_id,	 NULL);
+static CLASS_DEVICE_ATTR(pkey,		  S_IRUGO, show_pkey,		 NULL);
+static CLASS_DEVICE_ATTR(dgid,		  S_IRUGO, show_dgid,		 NULL);
+static CLASS_DEVICE_ATTR(zero_req_lim,	  S_IRUGO, show_zero_req_lim,	 NULL);
+static CLASS_DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,	 NULL);
+static CLASS_DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
 
 static struct class_device_attribute *srp_host_attrs[] = {
 	&class_device_attr_id_ext,
@@ -1463,6 +1479,8 @@ static struct class_device_attribute *srp_host_attrs[] = {
 	&class_device_attr_pkey,
 	&class_device_attr_dgid,
 	&class_device_attr_zero_req_lim,
+	&class_device_attr_local_ib_port,
+	&class_device_attr_local_ib_device,
 	NULL
 };
 
-- 
GitLab


From e4bec827feda76d5e7417a2696a75424834d564f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 22 Sep 2006 15:17:35 -0700
Subject: [PATCH 0606/1063] [IPSEC] esp: Defer output IV initialization to
 first use.

First of all, if the xfrm_state only gets used for input
packets this entropy is a complete waste.

Secondly, it is often the case that a configuration loads
many rules (perhaps even dynamically) and they don't all
necessarily ever get used.

This get_random_bytes() call was showing up in the profiles
for xfrm_state inserts which is how I noticed this.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/esp.h | 5 +++--
 net/ipv4/esp4.c   | 9 +++++++--
 net/ipv6/esp6.c   | 9 +++++++--
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/include/net/esp.h b/include/net/esp.h
index 064366d66eead..713d039f4af7a 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -15,13 +15,14 @@ struct esp_data
 	struct {
 		u8			*key;		/* Key */
 		int			key_len;	/* Key length */
-		u8			*ivec;		/* ivec buffer */
+		int			padlen;		/* 0..255 */
 		/* ivlen is offset from enc_data, where encrypted data start.
 		 * It is logically different of crypto_tfm_alg_ivsize(tfm).
 		 * We assume that it is either zero (no ivec), or
 		 * >= crypto_tfm_alg_ivsize(tfm). */
 		int			ivlen;
-		int			padlen;		/* 0..255 */
+		int			ivinitted;
+		u8			*ivec;		/* ivec buffer */
 		struct crypto_blkcipher	*tfm;		/* crypto handle */
 	} conf;
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index e87377e1d6b64..13b29360d102f 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -95,8 +95,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	esph->seq_no = htonl(++x->replay.oseq);
 	xfrm_aevent_doreplay(x);
 
-	if (esp->conf.ivlen)
+	if (esp->conf.ivlen) {
+		if (unlikely(!esp->conf.ivinitted)) {
+			get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+			esp->conf.ivinitted = 1;
+		}
 		crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
+	}
 
 	do {
 		struct scatterlist *sg = &esp->sgbuf[0];
@@ -378,7 +383,7 @@ static int esp_init_state(struct xfrm_state *x)
 		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
 		if (unlikely(esp->conf.ivec == NULL))
 			goto error;
-		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+		esp->conf.ivinitted = 0;
 	}
 	if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
 		goto error;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ae50b95111510..e78680a9985b2 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -99,8 +99,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	esph->seq_no = htonl(++x->replay.oseq);
 	xfrm_aevent_doreplay(x);
 
-	if (esp->conf.ivlen)
+	if (esp->conf.ivlen) {
+		if (unlikely(!esp->conf.ivinitted)) {
+			get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+			esp->conf.ivinitted = 1;
+		}
 		crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
+	}
 
 	do {
 		struct scatterlist *sg = &esp->sgbuf[0];
@@ -353,7 +358,7 @@ static int esp6_init_state(struct xfrm_state *x)
 		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
 		if (unlikely(esp->conf.ivec == NULL))
 			goto error;
-		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+		esp->conf.ivinitted = 0;
 	}
 	if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
 		goto error;
-- 
GitLab


From e731c248ba9e8c7025ae8b4a3fa48e4236b82e52 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 24 Aug 2006 23:18:12 +0900
Subject: [PATCH 0607/1063] [IPV6] MIP6: Several obvious clean-ups.

- Remove redundant code.  Pointed out by Brian Haley <brian.haley@hp.com>.
- Unify code paths with/without CONFIG_IPV6_MIP.
- Use NIP6_FMT for IPv6 address textual presentation.
- Fold long line.  Pointed out by David Miller <davem@davemloft.net>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/ah6.c     | 45 ++++++---------------------------------------
 net/ipv6/exthdrs.c |  1 -
 net/ipv6/mip6.c    |  6 ++++--
 3 files changed, 10 insertions(+), 42 deletions(-)

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 0f2b4e330aa90..b0d83e8e42527 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -128,9 +128,7 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des
 		off += optlen;
 		len -= optlen;
 	}
-	if (len == 0)
-		return;
-
+	/* Note: ok if len == 0 */
 bad:
 	return;
 }
@@ -175,11 +173,7 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
 	ipv6_addr_copy(&iph->daddr, &final_addr);
 }
 
-#ifdef CONFIG_IPV6_MIP6
 static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
-#else
-static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
-#endif
 {
 	union {
 		struct ipv6hdr *iph;
@@ -194,30 +188,12 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
 
 	while (exthdr.raw < end) {
 		switch (nexthdr) {
-#ifdef CONFIG_IPV6_MIP6
-		case NEXTHDR_HOP:
-			if (!zero_out_mutable_opts(exthdr.opth)) {
-				LIMIT_NETDEBUG(
-					KERN_WARNING "overrun %sopts\n",
-					nexthdr == NEXTHDR_HOP ?
-						"hop" : "dest");
-				return -EINVAL;
-			}
-			break;
 		case NEXTHDR_DEST:
+#ifdef CONFIG_IPV6_MIP6
 			if (dir == XFRM_POLICY_OUT)
 				ipv6_rearrange_destopt(iph, exthdr.opth);
-			if (!zero_out_mutable_opts(exthdr.opth)) {
-				LIMIT_NETDEBUG(
-					KERN_WARNING "overrun %sopts\n",
-					nexthdr == NEXTHDR_HOP ?
-						"hop" : "dest");
-				return -EINVAL;
-			}
-			break;
-#else
+#endif
 		case NEXTHDR_HOP:
-		case NEXTHDR_DEST:
 			if (!zero_out_mutable_opts(exthdr.opth)) {
 				LIMIT_NETDEBUG(
 					KERN_WARNING "overrun %sopts\n",
@@ -226,7 +202,6 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
 				return -EINVAL;
 			}
 			break;
-#endif
 
 		case NEXTHDR_ROUTING:
 			ipv6_rearrange_rthdr(iph, exthdr.rth);
@@ -282,16 +257,13 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 		}
 #ifdef CONFIG_IPV6_MIP6
 		memcpy(tmp_ext, &top_iph->saddr, extlen);
-		err = ipv6_clear_mutable_options(top_iph,
-						 extlen - sizeof(*tmp_ext) +
-						 sizeof(*top_iph),
-						 XFRM_POLICY_OUT);
 #else
 		memcpy(tmp_ext, &top_iph->daddr, extlen);
+#endif
 		err = ipv6_clear_mutable_options(top_iph,
 						 extlen - sizeof(*tmp_ext) +
-						 sizeof(*top_iph));
-#endif
+						 sizeof(*top_iph),
+						 XFRM_POLICY_OUT);
 		if (err)
 			goto error_free_iph;
 	}
@@ -386,13 +358,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!tmp_hdr)
 		goto out;
 	memcpy(tmp_hdr, skb->nh.raw, hdr_len);
-#ifdef CONFIG_IPV6_MIP6
 	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
 		goto free_out;
-#else
-	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len))
-		goto free_out;
-#endif
 	skb->nh.ipv6h->priority    = 0;
 	skb->nh.ipv6h->flow_lbl[0] = 0;
 	skb->nh.ipv6h->flow_lbl[1] = 0;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 6a6466bb5f26e..084f78c3479b6 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -87,7 +87,6 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 		len -= optlen;
 	}
 	/* not_found */
-	return -1;
  bad:
 	return -1;
 }
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 70854035c1314..99d116caecda4 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -121,7 +121,8 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
 				    &skb->nh.ipv6h->daddr,
 				    mhlen, IPPROTO_MH,
 				    skb_checksum(skb, 0, mhlen, 0))) {
-			LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
+			LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed "
+				       "[" NIP6_FMT " > " NIP6_FMT "]\n",
 				       NIP6(skb->nh.ipv6h->saddr),
 				       NIP6(skb->nh.ipv6h->daddr));
 			return -1;
@@ -234,7 +235,8 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
 	struct timeval stamp;
 	int err = 0;
 
-	if (unlikely(fl->proto == IPPROTO_MH && fl->fl_mh_type <= IP6_MH_TYPE_MAX))
+	if (unlikely(fl->proto == IPPROTO_MH &&
+		     fl->fl_mh_type <= IP6_MH_TYPE_MAX))
 		goto out;
 
 	if (likely(opt->dsthao)) {
-- 
GitLab


From 2cc67cc731d9b693a08e781e98fec0e3a6d6ba44 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 21 Aug 2006 19:18:57 +0900
Subject: [PATCH 0608/1063] [IPV6] ROUTE: Routing by Traffic Class.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/fib6_rules.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 7b4908cc52b36..91f6233d8efd4 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -121,6 +121,9 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	    !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
 		return 0;
 
+	if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff))
+		return 0;
+
 	return 1;
 }
 
-- 
GitLab


From 75bff8f023e02b045a8f68f36fa7da98dca124b8 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 21 Aug 2006 19:22:01 +0900
Subject: [PATCH 0609/1063] [IPV6] ROUTE: Routing by FWMARK.

Based on patch by Jean Lorchat <lorchat@sfc.wide.ad.jp>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/fib_rules.h |  2 +-
 include/net/flow.h        |  2 ++
 net/ipv6/Kconfig          |  7 +++++++
 net/ipv6/fib6_rules.c     | 23 +++++++++++++++++++++++
 net/ipv6/route.c          |  1 +
 5 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 19a82b6c1c1f5..2987549d60444 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -34,7 +34,7 @@ enum
 	FRA_UNUSED3,
 	FRA_UNUSED4,
 	FRA_UNUSED5,
-	FRA_FWMARK,	/* netfilter mark (IPv4) */
+	FRA_FWMARK,	/* netfilter mark (IPv4/IPv6) */
 	FRA_FLOW,	/* flow/class id */
 	FRA_UNUSED6,
 	FRA_UNUSED7,
diff --git a/include/net/flow.h b/include/net/flow.h
index e0522914316e3..3ca210ec1379f 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -26,6 +26,7 @@ struct flowi {
 		struct {
 			struct in6_addr		daddr;
 			struct in6_addr		saddr;
+			__u32			fwmark;
 			__u32			flowlabel;
 		} ip6_u;
 
@@ -42,6 +43,7 @@ struct flowi {
 #define fld_scope	nl_u.dn_u.scope
 #define fl6_dst		nl_u.ip6_u.daddr
 #define fl6_src		nl_u.ip6_u.saddr
+#define fl6_fwmark	nl_u.ip6_u.fwmark
 #define fl6_flowlabel	nl_u.ip6_u.flowlabel
 #define fl4_dst		nl_u.ip4_u.daddr
 #define fl4_src		nl_u.ip4_u.saddr
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 21e0cc808f44e..a2d211da2abac 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -173,3 +173,10 @@ config IPV6_MULTIPLE_TABLES
 	---help---
 	  Support multiple routing tables.
 
+config IPV6_ROUTE_FWMARK
+	bool "IPv6: use netfilter MARK value as routing key"
+	depends on IPV6_MULTIPLE_TABLES && NETFILTER
+	---help---
+	  If you say Y here, you will be able to specify different routes for
+	  packets with different mark values (see iptables(8), MARK target).
+
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 91f6233d8efd4..aebd9e2b85a88 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -26,6 +26,9 @@ struct fib6_rule
 	struct fib_rule		common;
 	struct rt6key		src;
 	struct rt6key		dst;
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+	u8			fwmark;
+#endif
 	u8			tclass;
 };
 
@@ -124,6 +127,11 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff))
 		return 0;
 
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+	if (r->fwmark && (r->fwmark != fl->fl6_fwmark))
+		return 0;
+#endif
+
 	return 1;
 }
 
@@ -164,6 +172,11 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 		nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
 			   sizeof(struct in6_addr));
 
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+	if (tb[FRA_FWMARK])
+		rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+#endif
+
 	rule6->src.plen = frh->src_len;
 	rule6->dst.plen = frh->dst_len;
 	rule6->tclass = frh->tos;
@@ -195,6 +208,11 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 	    nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
 		return 0;
 
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+	if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+		return 0;
+#endif
+
 	return 1;
 }
 
@@ -216,6 +234,11 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 		NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr),
 			&rule6->src.addr);
 
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+	if (rule6->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark);
+#endif
+
 	return 0;
 
 nla_put_failure:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 20691285aee51..649350bd92999 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -703,6 +703,7 @@ void ip6_route_input(struct sk_buff *skb)
 			.ip6_u = {
 				.daddr = iph->daddr,
 				.saddr = iph->saddr,
+				.fwmark = skb->nfmark,
 				.flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
 			},
 		},
-- 
GitLab


From 1aaec67f9335a17856dfacdd3e5cc6f4c18faeec Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 25 Jun 2006 23:54:55 +0900
Subject: [PATCH 0610/1063] [NET]: Add common helper functions to convert
 IPv6/IPv4 address string to network address structure.

These helpers can be used in netfilter, cifs etc.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/inet.h |   2 +
 net/core/utils.c     | 215 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 217 insertions(+)

diff --git a/include/linux/inet.h b/include/linux/inet.h
index 6c5587af118d4..b7c6da7d6d324 100644
--- a/include/linux/inet.h
+++ b/include/linux/inet.h
@@ -46,5 +46,7 @@
 #include <linux/types.h>
 
 extern __be32 in_aton(const char *str);
+extern int in4_pton(const char *src, int srclen, u8 *dst, char delim, const char **end);
+extern int in6_pton(const char *src, int srclen, u8 *dst, char delim, const char **end);
 #endif
 #endif	/* _LINUX_INET_H */
diff --git a/net/core/utils.c b/net/core/utils.c
index e31c90e055941..5a06e8a72c17f 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -4,6 +4,7 @@
  *	Authors:
  *	net_random Alan Cox
  *	net_ratelimit Andy Kleen
+ *	in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project
  *
  *	Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  *
@@ -191,3 +192,217 @@ __be32 in_aton(const char *str)
 }
 
 EXPORT_SYMBOL(in_aton);
+
+#define IN6PTON_XDIGIT		0x00010000
+#define IN6PTON_DIGIT		0x00020000
+#define IN6PTON_COLON_MASK	0x00700000
+#define IN6PTON_COLON_1		0x00100000	/* single : requested */
+#define IN6PTON_COLON_2		0x00200000	/* second : requested */
+#define IN6PTON_COLON_1_2	0x00400000	/* :: requested */
+#define IN6PTON_DOT		0x00800000	/* . */
+#define IN6PTON_DELIM		0x10000000
+#define IN6PTON_NULL		0x20000000	/* first/tail */
+#define IN6PTON_UNKNOWN		0x40000000
+
+static inline int digit2bin(char c, char delim)
+{
+	if (c == delim || c == '\0')
+		return IN6PTON_DELIM;
+	if (c == '.')
+		return IN6PTON_DOT;
+	if (c >= '0' && c <= '9')
+		return (IN6PTON_DIGIT | (c - '0'));
+	return IN6PTON_UNKNOWN;
+}
+
+static inline int xdigit2bin(char c, char delim)
+{
+	if (c == delim || c == '\0')
+		return IN6PTON_DELIM;
+	if (c == ':')
+		return IN6PTON_COLON_MASK;
+	if (c == '.')
+		return IN6PTON_DOT;
+	if (c >= '0' && c <= '9')
+		return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0'));
+	if (c >= 'a' && c <= 'f')
+		return (IN6PTON_XDIGIT | (c - 'a' + 10));
+	if (c >= 'A' && c <= 'F')
+		return (IN6PTON_XDIGIT | (c - 'A' + 10));
+	return IN6PTON_UNKNOWN;
+}
+
+int in4_pton(const char *src, int srclen,
+	     u8 *dst,
+	     char delim, const char **end)
+{
+	const char *s;
+	u8 *d;
+	u8 dbuf[4];
+	int ret = 0;
+	int i;
+	int w = 0;
+
+	if (srclen < 0)
+		srclen = strlen(src);
+	s = src;
+	d = dbuf;
+	i = 0;
+	while(1) {
+		int c;
+		c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
+		if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM))) {
+			goto out;
+		}
+		if (c & (IN6PTON_DOT | IN6PTON_DELIM)) {
+			if (w == 0)
+				goto out;
+			*d++ = w & 0xff;
+			w = 0;
+			i++;
+			if (c & IN6PTON_DELIM) {
+				if (i != 4)
+					goto out;
+				break;
+			}
+			goto cont;
+		}
+		w = (w * 10) + c;
+		if ((w & 0xffff) > 255) {
+			goto out;
+		}
+cont:
+		if (i >= 4)
+			goto out;
+		s++;
+		srclen--;
+	}
+	ret = 1;
+	memcpy(dst, dbuf, sizeof(dbuf));
+out:
+	if (end)
+		*end = s;
+	return ret;
+}
+
+EXPORT_SYMBOL(in4_pton);
+
+int in6_pton(const char *src, int srclen,
+	     u8 *dst,
+	     char delim, const char **end)
+{
+	const char *s, *tok = NULL;
+	u8 *d, *dc = NULL;
+	u8 dbuf[16];
+	int ret = 0;
+	int i;
+	int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL;
+	int w = 0;
+
+	memset(dbuf, 0, sizeof(dbuf));
+
+	s = src;
+	d = dbuf;
+	if (srclen < 0)
+		srclen = strlen(src);
+
+	printf("srclen=%d\n", srclen);
+
+	while (1) {
+		int c;
+
+		c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
+		if (!(c & state))
+			goto out;
+		if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
+			/* process one 16-bit word */
+			if (!(state & IN6PTON_NULL)) {
+				*d++ = (w >> 8) & 0xff;
+				*d++ = w & 0xff;
+			}
+			w = 0;
+			if (c & IN6PTON_DELIM) {
+				/* We've processed last word */
+				break;
+			}
+			/*
+			 * COLON_1 => XDIGIT
+			 * COLON_2 => XDIGIT|DELIM
+			 * COLON_1_2 => COLON_2
+			 */
+			switch (state & IN6PTON_COLON_MASK) {
+			case IN6PTON_COLON_2:
+				dc = d;
+				state = IN6PTON_XDIGIT | IN6PTON_DELIM;
+				if (dc - dbuf >= sizeof(dbuf))
+					state |= IN6PTON_NULL;
+				break;
+			case IN6PTON_COLON_1|IN6PTON_COLON_1_2:
+				state = IN6PTON_XDIGIT | IN6PTON_COLON_2;
+				break;
+			case IN6PTON_COLON_1:
+				state = IN6PTON_XDIGIT;
+				break;
+			case IN6PTON_COLON_1_2:
+				state = IN6PTON_COLON_2;
+				break;
+			default:
+				state = 0;
+			}
+			tok = s + 1;
+			goto cont;
+		}
+
+		if (c & IN6PTON_DOT) {
+			ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s);
+			if (ret > 0) {
+				d += 4;
+				break;
+			}
+			goto out;
+		}
+
+		w = (w << 4) | (0xff & c);
+		state = IN6PTON_COLON_1 | IN6PTON_DELIM;
+		if (!(w & 0xf000)) {
+			state |= IN6PTON_XDIGIT;
+		}
+		if (!dc && d + 2 < dbuf + sizeof(dbuf)) {
+			state |= IN6PTON_COLON_1_2;
+			state &= ~IN6PTON_DELIM;
+		}
+		if (d + 2 >= dbuf + sizeof(dbuf)) {
+			state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2);
+		}
+cont:
+		if ((dc && d + 4 < dbuf + sizeof(dbuf)) ||
+		    d + 4 == dbuf + sizeof(dbuf)) {
+			state |= IN6PTON_DOT;
+		}
+		if (d >= dbuf + sizeof(dbuf)) {
+			state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK);
+		}
+		s++;
+		srclen--;
+	}
+
+	i = 15; d--;
+
+	if (dc) {
+		while(d >= dc)
+			dst[i--] = *d--;
+		while(i >= dc - dbuf)
+			dst[i--] = 0;
+		while(i >= 0)
+			dst[i--] = *d--;
+	} else
+		memcpy(dst, dbuf, sizeof(dbuf));
+
+	ret = 1;
+out:
+	if (end)
+		*end = s;
+	return ret;
+}
+
+EXPORT_SYMBOL(in6_pton);
-- 
GitLab


From 1884f78c7a8b456c654338e3eb2874a99688ea10 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 19 Jun 2006 03:20:32 +0900
Subject: [PATCH 0611/1063] [NETFILTER] NF_CONNTRACK_FTP: Use in6_pton() to
 convert address string.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/netfilter/nf_conntrack_ftp.c | 96 ++------------------------------
 1 file changed, 4 insertions(+), 92 deletions(-)

diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 960972d225f97..9dccb40398890 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -111,101 +111,13 @@ static struct ftp_search {
 	},
 };
 
-/* This code is based on inet_pton() in glibc-2.2.4 */
 static int
 get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term)
 {
-	static const char xdigits[] = "0123456789abcdef";
-	u_int8_t tmp[16], *tp, *endp, *colonp;
-	int ch, saw_xdigit;
-	u_int32_t val;
-	size_t clen = 0;
-
-	tp = memset(tmp, '\0', sizeof(tmp));
-	endp = tp + sizeof(tmp);
-	colonp = NULL;
-
-	/* Leading :: requires some special handling. */
-	if (*src == ':'){
-		if (*++src != ':') {
-			DEBUGP("invalid \":\" at the head of addr\n");
-			return 0;
-		}
-		clen++;
-	}
-
-	saw_xdigit = 0;
-	val = 0;
-	while ((clen < dlen) && (*src != term)) {
-		const char *pch;
-
-		ch = tolower(*src++);
-		clen++;
-
-                pch = strchr(xdigits, ch);
-                if (pch != NULL) {
-                        val <<= 4;
-                        val |= (pch - xdigits);
-                        if (val > 0xffff)
-                                return 0;
-
-			saw_xdigit = 1;
-                        continue;
-                }
-		if (ch != ':') {
-			DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch);
-			return 0;
-		}
-
-		if (!saw_xdigit) {
-			if (colonp) {
-				DEBUGP("invalid location of \"::\".\n");
-				return 0;
-			}
-			colonp = tp;
-			continue;
-		} else if (*src == term) {
-			DEBUGP("trancated IPv6 addr\n");
-			return 0;
-		}
-
-		if (tp + 2 > endp)
-			return 0;
-		*tp++ = (u_int8_t) (val >> 8) & 0xff;
-		*tp++ = (u_int8_t) val & 0xff;
-
-		saw_xdigit = 0;
-		val = 0;
-		continue;
-        }
-        if (saw_xdigit) {
-                if (tp + 2 > endp)
-                        return 0;
-                *tp++ = (u_int8_t) (val >> 8) & 0xff;
-                *tp++ = (u_int8_t) val & 0xff;
-        }
-        if (colonp != NULL) {
-                /*
-                 * Since some memmove()'s erroneously fail to handle
-                 * overlapping regions, we'll do the shift by hand.
-                 */
-                const int n = tp - colonp;
-                int i;
-
-                if (tp == endp)
-                        return 0;
-
-                for (i = 1; i <= n; i++) {
-                        endp[- i] = colonp[n - i];
-                        colonp[n - i] = 0;
-                }
-                tp = endp;
-        }
-        if (tp != endp || (*src != term))
-                return 0;
-
-        memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr));
-        return clen;
+	int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), dst, term, &end);
+	if (ret > 0)
+		return (int)(end - src);
+	return 0;
 }
 
 static int try_number(const char *data, size_t dlen, u_int32_t array[],
-- 
GitLab


From d4f3e9b735c72823aab597bfa4860d184658a609 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 25 Aug 2006 00:27:09 -0700
Subject: [PATCH 0612/1063] [NET] in6_pton: Kill errant printf statement.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/utils.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/core/utils.c b/net/core/utils.c
index 5a06e8a72c17f..2682490777dec 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -306,8 +306,6 @@ int in6_pton(const char *src, int srclen,
 	if (srclen < 0)
 		srclen = strlen(src);
 
-	printf("srclen=%d\n", srclen);
-
 	while (1) {
 		int c;
 
-- 
GitLab


From 298969727e7b855d53f3becfa92c055914082ec4 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 25 Aug 2006 00:37:24 -0700
Subject: [PATCH 0613/1063] [TCP] tcp_lp: use BUILD_BUG_ON

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_lp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 649ebaed1df1d..308fb7e071c56 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -327,7 +327,7 @@ static struct tcp_congestion_ops tcp_lp = {
 
 static int __init tcp_lp_register(void)
 {
-	BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
 	return tcp_register_congestion_control(&tcp_lp);
 }
 
-- 
GitLab


From 65e3d72654d9a33cdccd5c19777a5515ae9dd37d Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 25 Aug 2006 00:38:03 -0700
Subject: [PATCH 0614/1063] [TCP] tcp_bic: use BUILD_BUG_ON

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_bic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index b0134ab083794..5730333cd0ac9 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -231,7 +231,7 @@ static struct tcp_congestion_ops bictcp = {
 
 static int __init bictcp_register(void)
 {
-	BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
 	return tcp_register_congestion_control(&bictcp);
 }
 
-- 
GitLab


From acba48e1a3c95082af1e12c5efaaca3506103a92 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 25 Aug 2006 15:46:46 -0700
Subject: [PATCH 0615/1063] [XFRM]: Respect priority in policy lookups.

Even if we find an exact match in the hash table,
we must inspect the inexact list to look for a match
with a better priority.

Noticed by Masahide NAKAMURA <nakam@linux-ipv6.org>.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b446ca31fecc8..1cf3209cdf4b3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -908,6 +908,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
 	xfrm_address_t *daddr, *saddr;
 	struct hlist_node *entry;
 	struct hlist_head *chain;
+	u32 priority = ~0U;
 
 	daddr = xfrm_flowi_daddr(fl, family);
 	saddr = xfrm_flowi_saddr(fl, family);
@@ -919,21 +920,21 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
 	ret = NULL;
 	hlist_for_each_entry(pol, entry, chain, bydst) {
 		if (xfrm_policy_match(pol, fl, type, family, dir)) {
-			xfrm_pol_hold(pol);
 			ret = pol;
+			priority = ret->priority;
 			break;
 		}
 	}
-	if (!ret) {
-		chain = &xfrm_policy_inexact[dir];
-		hlist_for_each_entry(pol, entry, chain, bydst) {
-			if (xfrm_policy_match(pol, fl, type, family, dir)) {
-				xfrm_pol_hold(pol);
-				ret = pol;
-				break;
-			}
+	chain = &xfrm_policy_inexact[dir];
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (xfrm_policy_match(pol, fl, type, family, dir) &&
+		    pol->priority < priority) {
+			ret = pol;
+			break;
 		}
 	}
+	if (ret)
+		xfrm_pol_hold(ret);
 	read_unlock_bh(&xfrm_policy_lock);
 
 	return ret;
-- 
GitLab


From 6c5eb6a50741b882fd99fbb8178942ca2f74b724 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 25 Aug 2006 16:04:29 -0700
Subject: [PATCH 0616/1063] [IPV6] ROUTE: Fix FWMARK support.

- Add missing nla_policy entry.
- type of fwmark is u32, not u8.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index aebd9e2b85a88..b4cd5c03b0b61 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -27,7 +27,7 @@ struct fib6_rule
 	struct rt6key		src;
 	struct rt6key		dst;
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
-	u8			fwmark;
+	u32			fwmark;
 #endif
 	u8			tclass;
 };
@@ -140,6 +140,7 @@ static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = {
 	[FRA_PRIORITY]	= { .type = NLA_U32 },
 	[FRA_SRC]	= { .minlen = sizeof(struct in6_addr) },
 	[FRA_DST]	= { .minlen = sizeof(struct in6_addr) },
+	[FRA_FWMARK]	= { .type = NLA_U32 },
 	[FRA_TABLE]	= { .type = NLA_U32 },
 };
 
-- 
GitLab


From 2613aad5ab28579687519918cdc353af0eed5a3f Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 25 Aug 2006 16:05:00 -0700
Subject: [PATCH 0617/1063] [IPV6] ROUTE: Fix size of fib6_rule_policy.

It should not be RTA_MAX+1 but FRA_MAX+1.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b4cd5c03b0b61..3d64c71f52dee 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -135,7 +135,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	return 1;
 }
 
-static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = {
+static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = {
 	[FRA_IFNAME]	= { .type = NLA_STRING },
 	[FRA_PRIORITY]	= { .type = NLA_U32 },
 	[FRA_SRC]	= { .minlen = sizeof(struct in6_addr) },
-- 
GitLab


From cd9d742622fbc2190221e0b2aca80596bfd17733 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 25 Aug 2006 16:05:43 -0700
Subject: [PATCH 0618/1063] [IPV6] ROUTE: Add support for fwmask in routing
 rules.

Add support for fwmark masks.
A mask of 0xFFFFFFFF is used when a mark value != 0 is sent without a mask.

Based on patch for net/ipv4/fib_rules.c by Patrick McHardy <kaber@trash.net>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 3d64c71f52dee..ee4aa43ad9735 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -28,6 +28,7 @@ struct fib6_rule
 	struct rt6key		dst;
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
 	u32			fwmark;
+	u32			fwmask;
 #endif
 	u8			tclass;
 };
@@ -128,7 +129,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 		return 0;
 
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
-	if (r->fwmark && (r->fwmark != fl->fl6_fwmark))
+	if ((r->fwmark ^ fl->fl6_fwmark) / r->fwmask)
 		return 0;
 #endif
 
@@ -141,6 +142,7 @@ static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = {
 	[FRA_SRC]	= { .minlen = sizeof(struct in6_addr) },
 	[FRA_DST]	= { .minlen = sizeof(struct in6_addr) },
 	[FRA_FWMARK]	= { .type = NLA_U32 },
+	[FRA_FWMASK]	= { .type = NLA_U32 },
 	[FRA_TABLE]	= { .type = NLA_U32 },
 };
 
@@ -174,8 +176,20 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 			   sizeof(struct in6_addr));
 
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
-	if (tb[FRA_FWMARK])
+	if (tb[FRA_FWMARK]) {
 		rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+		if (rule6->fwmark) {
+			/*
+			 * if the mark value is non-zero,
+			 * all bits are compared by default
+			 * unless a mask is explicitly specified.
+			 */
+			rule6->fwmask = 0xFFFFFFFF;
+		}
+	}
+
+	if (tb[FRA_FWMASK])
+		rule6->fwmask = nla_get_u32(tb[FRA_FWMASK]);
 #endif
 
 	rule6->src.plen = frh->src_len;
@@ -212,6 +226,9 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
 	if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK])))
 		return 0;
+
+	if (tb[FRA_FWMASK] && (rule6->fwmask != nla_get_u32(tb[FRA_FWMASK])))
+		return 0;
 #endif
 
 	return 1;
@@ -238,6 +255,9 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
 	if (rule6->fwmark)
 		NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark);
+
+	if (rule6->fwmask)
+		NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask);
 #endif
 
 	return 0;
-- 
GitLab


From 267935b197d2a6e6924f9de2841f0470bfe63acd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 25 Aug 2006 16:07:48 -0700
Subject: [PATCH 0619/1063] [IPV6]: Fix build with fwmark disabled.

Based upon a patch by Brian Haley.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 649350bd92999..d83844d9499bf 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -703,7 +703,9 @@ void ip6_route_input(struct sk_buff *skb)
 			.ip6_u = {
 				.daddr = iph->daddr,
 				.saddr = iph->saddr,
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
 				.fwmark = skb->nfmark,
+#endif
 				.flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
 			},
 		},
-- 
GitLab


From bbfb39cbf63829d1db607aa90cbdca557a3a131d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 25 Aug 2006 16:10:14 -0700
Subject: [PATCH 0620/1063] [IPV4]: Add support for fwmark masks in routing
 rules

Add a FRA_FWMASK attributes for fwmark masks. For compatibility a mask of
0xFFFFFFFF is used when a mark value != 0 is sent without a mask.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h |  3 ++-
 net/ipv4/fib_rules.c      | 21 +++++++++++++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 2987549d60444..4418c8d9d4799 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -34,12 +34,13 @@ enum
 	FRA_UNUSED3,
 	FRA_UNUSED4,
 	FRA_UNUSED5,
-	FRA_FWMARK,	/* netfilter mark (IPv4/IPv6) */
+	FRA_FWMARK,	/* netfilter mark */
 	FRA_FLOW,	/* flow/class id */
 	FRA_UNUSED6,
 	FRA_UNUSED7,
 	FRA_UNUSED8,
 	FRA_TABLE,	/* Extended table id */
+	FRA_FWMASK,	/* mask for netfilter mark */
 	__FRA_MAX
 };
 
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index ce185ac6f2606..280f424ca9c9c 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -46,6 +46,7 @@ struct fib4_rule
 	u32			dstmask;
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	u32			fwmark;
+	u32			fwmask;
 #endif
 #ifdef CONFIG_NET_CLS_ROUTE
 	u32			tclassid;
@@ -160,7 +161,7 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 		return 0;
 
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	if (r->fwmark && (r->fwmark != fl->fl4_fwmark))
+	if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask)
 		return 0;
 #endif
 
@@ -183,6 +184,7 @@ static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
 	[FRA_SRC]	= { .type = NLA_U32 },
 	[FRA_DST]	= { .type = NLA_U32 },
 	[FRA_FWMARK]	= { .type = NLA_U32 },
+	[FRA_FWMASK]	= { .type = NLA_U32 },
 	[FRA_FLOW]	= { .type = NLA_U32 },
 	[FRA_TABLE]	= { .type = NLA_U32 },
 };
@@ -219,8 +221,17 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 		rule4->dst = nla_get_u32(tb[FRA_DST]);
 
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	if (tb[FRA_FWMARK])
+	if (tb[FRA_FWMARK]) {
 		rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+		if (rule4->fwmark)
+			/* compatibility: if the mark value is non-zero all bits
+			 * are compared unless a mask is explicitly specified.
+			 */
+			rule4->fwmask = 0xFFFFFFFF;
+	}
+
+	if (tb[FRA_FWMASK])
+		rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]);
 #endif
 
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -256,6 +267,9 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK])))
 		return 0;
+
+	if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK])))
+		return 0;
 #endif
 
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -285,6 +299,9 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	if (rule4->fwmark)
 		NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark);
+
+	if (rule4->fwmask || rule4->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask);
 #endif
 
 	if (rule4->dst_len)
-- 
GitLab


From 88e91f290307d22ae88302e3a24f0c36905e8a6c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 25 Aug 2006 16:11:08 -0700
Subject: [PATCH 0621/1063] [DECNET]: Add support for fwmark masks in routing
 rules

Add support for fwmark masks. For compatibility a mask of 0xFFFFFFFF is used
when a mark value != 0 is sent without a mask.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Steven Whitehouse <steve@chygwyn.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_rules.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 50e819edf8c72..63ad63dfd252c 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -47,6 +47,7 @@ struct dn_fib_rule
 	u8			flags;
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
 	u32			fwmark;
+	u32			fwmask;
 #endif
 };
 
@@ -116,6 +117,7 @@ static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
 	[FRA_SRC]	= { .type = NLA_U16 },
 	[FRA_DST]	= { .type = NLA_U16 },
 	[FRA_FWMARK]	= { .type = NLA_U32 },
+	[FRA_FWMASK]	= { .type = NLA_U32 },
 	[FRA_TABLE]     = { .type = NLA_U32 },
 };
 
@@ -130,7 +132,7 @@ static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 		return 0;
 
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-	if (r->fwmark && (r->fwmark != fl->fld_fwmark))
+	if ((r->fwmark ^ fl->fld_fwmark) & r->fwmask)
 		return 0;
 #endif
 
@@ -168,8 +170,17 @@ static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 		r->dst = nla_get_u16(tb[FRA_DST]);
 
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-	if (tb[FRA_FWMARK])
+	if (tb[FRA_FWMARK]) {
 		r->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+		if (r->fwmark)
+			/* compatibility: if the mark value is non-zero all bits
+			 * are compared unless a mask is explicitly specified.
+			 */
+			r->fwmask = 0xFFFFFFFF;
+	}
+
+	if (tb[FRA_FWMASK])
+		r->fwmask = nla_get_u32(tb[FRA_FWMASK]);
 #endif
 
 	r->src_len = frh->src_len;
@@ -195,6 +206,9 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
 	if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK])))
 		return 0;
+
+	if (tb[FRA_FWMASK] && (r->fwmask != nla_get_u32(tb[FRA_FWMASK])))
+		return 0;
 #endif
 
 	if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC])))
@@ -237,6 +251,8 @@ static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
 	if (r->fwmark)
 		NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark);
+	if (r->fwmask || r->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMASK, r->fwmask);
 #endif
 	if (r->dst_len)
 		NLA_PUT_U16(skb, FRA_DST, r->dst);
-- 
GitLab


From b4e9b520ca5d07a37ea59648e7f50f478e7487a3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 25 Aug 2006 16:11:42 -0700
Subject: [PATCH 0622/1063] [NET_SCHED]: Add mask support to fwmark classifier

Support masking the nfmark value before the search. The mask value is
global for all filters contained in one instance. It can only be set
when a new instance is created, all filters must specify the same mask.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_cls.h |  1 +
 net/sched/cls_fw.c      | 25 ++++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index bd2c5a2bbbf52..c3f01b3085a41 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -305,6 +305,7 @@ enum
 	TCA_FW_POLICE,
 	TCA_FW_INDEV, /*  used by CONFIG_NET_CLS_IND */
 	TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
+	TCA_FW_MASK,
 	__TCA_FW_MAX
 };
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index e6973d9b686d0..e54acc6bcccdc 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -50,6 +50,7 @@
 struct fw_head
 {
 	struct fw_filter *ht[HTSIZE];
+	u32 mask;
 };
 
 struct fw_filter
@@ -101,7 +102,7 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	struct fw_filter *f;
 	int r;
 #ifdef CONFIG_NETFILTER
-	u32 id = skb->nfmark;
+	u32 id = skb->nfmark & head->mask;
 #else
 	u32 id = 0;
 #endif
@@ -209,7 +210,9 @@ static int
 fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
 	struct rtattr **tb, struct rtattr **tca, unsigned long base)
 {
+	struct fw_head *head = (struct fw_head *)tp->root;
 	struct tcf_exts e;
+	u32 mask;
 	int err;
 
 	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map);
@@ -232,6 +235,15 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
 	}
 #endif /* CONFIG_NET_CLS_IND */
 
+	if (tb[TCA_FW_MASK-1]) {
+		if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+			goto errout;
+		mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+		if (mask != head->mask)
+			goto errout;
+	} else if (head->mask != 0xFFFFFFFF)
+		goto errout;
+
 	tcf_exts_change(tp, &f->exts, &e);
 
 	return 0;
@@ -267,9 +279,17 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
 		return -EINVAL;
 
 	if (head == NULL) {
+		u32 mask = 0xFFFFFFFF;
+		if (tb[TCA_FW_MASK-1]) {
+			if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+				return -EINVAL;
+			mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+		}
+
 		head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
 		if (head == NULL)
 			return -ENOBUFS;
+		head->mask = mask;
 
 		tcf_tree_lock(tp);
 		tp->root = head;
@@ -330,6 +350,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
+	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f = (struct fw_filter*)fh;
 	unsigned char	 *b = skb->tail;
 	struct rtattr *rta;
@@ -351,6 +372,8 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 	if (strlen(f->indev))
 		RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev);
 #endif /* CONFIG_NET_CLS_IND */
+	if (head->mask != 0xFFFFFFFF)
+		RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask);
 
 	if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
 		goto rtattr_failure;
-- 
GitLab


From 74975d40b16fd4bad24a2e2630dc7957d8cba013 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 25 Aug 2006 17:10:33 -0700
Subject: [PATCH 0623/1063] [TCP] Congestion control (modulo lp, bic): use
 BUILD_BUG_ON

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c     | 2 +-
 net/ipv4/tcp_highspeed.c | 2 +-
 net/ipv4/tcp_htcp.c      | 2 +-
 net/ipv4/tcp_hybla.c     | 2 +-
 net/ipv4/tcp_vegas.c     | 2 +-
 net/ipv4/tcp_veno.c      | 2 +-
 net/ipv4/tcp_westwood.c  | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 2be27980ca785..a60ef38d75c68 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -358,7 +358,7 @@ static struct tcp_congestion_ops cubictcp = {
 
 static int __init cubictcp_register(void)
 {
-	BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
 
 	/* Precompute a bunch of the scaling factors that are used per-packet
 	 * based on SRTT of 100ms
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index fa3e1aad660cf..c4fc811bf377f 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -189,7 +189,7 @@ static struct tcp_congestion_ops tcp_highspeed = {
 
 static int __init hstcp_register(void)
 {
-	BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
 	return tcp_register_congestion_control(&tcp_highspeed);
 }
 
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 6edfe5e4510ed..682e7d5b6f2f8 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -286,7 +286,7 @@ static struct tcp_congestion_ops htcp = {
 
 static int __init htcp_register(void)
 {
-	BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
 	BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
 	return tcp_register_congestion_control(&htcp);
 }
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 7406e0c5fb8e4..59e691d26f64e 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -170,7 +170,7 @@ static struct tcp_congestion_ops tcp_hybla = {
 
 static int __init hybla_register(void)
 {
-	BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
 	return tcp_register_congestion_control(&tcp_hybla);
 }
 
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 490360b5b4bf1..a3b7aa015a2fc 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -370,7 +370,7 @@ static struct tcp_congestion_ops tcp_vegas = {
 
 static int __init tcp_vegas_register(void)
 {
-	BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
 	tcp_register_congestion_control(&tcp_vegas);
 	return 0;
 }
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 5b2fe6d2aba98..ce57bf302f6cc 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -212,7 +212,7 @@ static struct tcp_congestion_ops tcp_veno = {
 
 static int __init tcp_veno_register(void)
 {
-	BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE);
 	tcp_register_congestion_control(&tcp_veno);
 	return 0;
 }
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 5446312ffd2ad..4f42a86c77f37 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -289,7 +289,7 @@ static struct tcp_congestion_ops tcp_westwood = {
 
 static int __init tcp_westwood_register(void)
 {
-	BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
 	return tcp_register_congestion_control(&tcp_westwood);
 }
 
-- 
GitLab


From 366e4adc0f9ef33f56c62f980a7d83775e64abd0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 26 Aug 2006 16:50:20 -0700
Subject: [PATCH 0624/1063] [IPV6]: Fix routing by fwmark

Fix mark comparison, also dump the mask to userspace when the mask is
zero, but the mark is not (in which case the mark is dumped, so the
mask is needed to make sense of it).

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ee4aa43ad9735..2fbc71d901874 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -129,7 +129,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 		return 0;
 
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
-	if ((r->fwmark ^ fl->fl6_fwmark) / r->fwmask)
+	if ((r->fwmark ^ fl->fl6_fwmark) & r->fwmask)
 		return 0;
 #endif
 
@@ -256,7 +256,7 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 	if (rule6->fwmark)
 		NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark);
 
-	if (rule6->fwmask)
+	if (rule6->fwmask || rule6->fwmark)
 		NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask);
 #endif
 
-- 
GitLab


From ef047f5e1085d6393748d1ee27d6327905f098dc Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 1 Sep 2006 00:29:06 -0700
Subject: [PATCH 0625/1063] [NET]: Use BUILD_BUG_ON() for checking size of
 skb->cb.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c       | 5 +----
 net/ipv6/af_inet6.c      | 7 ++-----
 net/netlink/af_netlink.c | 5 +----
 net/unix/af_unix.c       | 5 +----
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f2e8927f45960..fdd89e37b9aac 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1254,10 +1254,7 @@ static int __init inet_init(void)
 	struct list_head *r;
 	int rc = -EINVAL;
 
-	if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) {
-		printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
-		goto out;
-	}
+	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
 
 	rc = proto_register(&tcp_prot, 1);
 	if (rc)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index fc9c8a99bea66..bf6e8aff19d4f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -761,6 +761,8 @@ static int __init inet6_init(void)
         struct list_head *r;
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
+
 #ifdef MODULE
 #if 0 /* FIXME --RR */
 	if (!mod_member_present(&__this_module, can_unload))
@@ -770,11 +772,6 @@ static int __init inet6_init(void)
 #endif
 #endif
 
-	if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) {
-		printk(KERN_CRIT "inet6_proto_init: size fault\n");
-		return -EINVAL;
-	}
-
 	err = proto_register(&tcpv6_prot, 1);
 	if (err)
 		goto out;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a80e4456e204b..d56e0d21f919d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1762,8 +1762,6 @@ static struct net_proto_family netlink_family_ops = {
 	.owner	= THIS_MODULE,	/* for consistency 8) */
 };
 
-extern void netlink_skb_parms_too_large(void);
-
 static int __init netlink_proto_init(void)
 {
 	struct sk_buff *dummy_skb;
@@ -1775,8 +1773,7 @@ static int __init netlink_proto_init(void)
 	if (err != 0)
 		goto out;
 
-	if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb))
-		netlink_skb_parms_too_large();
+	BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
 
 	nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
 	if (!nl_table)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index de6ec519272e5..7c91c2024d494 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2060,10 +2060,7 @@ static int __init af_unix_init(void)
 	int rc = -1;
 	struct sk_buff *dummy_skb;
 
-	if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
-		printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
-		goto out;
-	}
+	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
 
 	rc = proto_register(&unix_proto, 1);
         if (rc != 0) {
-- 
GitLab


From 2a0109a707d2b0ae48f124d3be0fdf1715c0107a Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 19:15:35 -0700
Subject: [PATCH 0626/1063] [DCCP]: Shift sysctls into feat.h

This shifts further sysctls into feat.h. No change in
functionality - shifting code only.

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/feat.h   | 5 +++++
 net/dccp/sysctl.c | 8 +-------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index b44c45504fb6c..cee553d416cac 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -27,5 +27,10 @@ extern int  dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
 extern int  dccp_feat_init(struct dccp_minisock *dmsk);
 
 extern int  dccp_feat_default_sequence_window;
+extern int  dccp_feat_default_rx_ccid;
+extern int  dccp_feat_default_tx_ccid;
+extern int  dccp_feat_default_ack_ratio;
+extern int  dccp_feat_default_send_ack_vector;
+extern int  dccp_feat_default_send_ndp_count;
 
 #endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index c1ba9451bc3dc..38bc157876f3b 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -11,18 +11,12 @@
 
 #include <linux/mm.h>
 #include <linux/sysctl.h>
+#include "feat.h"
 
 #ifndef CONFIG_SYSCTL
 #error This file should not be compiled without CONFIG_SYSCTL defined
 #endif
 
-extern int dccp_feat_default_sequence_window;
-extern int dccp_feat_default_rx_ccid;
-extern int dccp_feat_default_tx_ccid;
-extern int dccp_feat_default_ack_ratio;
-extern int dccp_feat_default_send_ack_vector;
-extern int dccp_feat_default_send_ndp_count;
-
 static struct ctl_table dccp_default_table[] = {
 	{
 		.ctl_name	= NET_DCCP_DEFAULT_SEQ_WINDOW,
-- 
GitLab


From 97e5848dd39e7e76bd6077735ebb5473763ab9c5 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 19:16:45 -0700
Subject: [PATCH 0627/1063] [DCCP]: Introduce tx buffering

This adds transmit buffering to DCCP.

I have tested with CCID2/3 and with loss and rate limiting.

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h |  2 +
 net/dccp/dccp.h      |  2 +-
 net/dccp/output.c    | 90 ++++++++++++++++++++++++++++++++------------
 net/dccp/proto.c     | 16 +++-----
 4 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 676333b9fad02..2d7671c92c0b6 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -438,6 +438,7 @@ struct dccp_ackvec;
  * @dccps_role - Role of this sock, one of %dccp_role
  * @dccps_ndp_count - number of Non Data Packets since last data packet
  * @dccps_hc_rx_ackvec - rx half connection ack vector
+ * @dccps_xmit_timer - timer for when CCID is not ready to send
  */
 struct dccp_sock {
 	/* inet_connection_sock has to be the first member of dccp_sock */
@@ -470,6 +471,7 @@ struct dccp_sock {
 	enum dccp_role			dccps_role:2;
 	__u8				dccps_hc_rx_insert_options:1;
 	__u8				dccps_hc_tx_insert_options:1;
+	struct timer_list		dccps_xmit_timer;
 };
  
 static inline struct dccp_sock *dccp_sk(const struct sock *sk)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index a5c5475724c0b..0a21be437ed32 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -130,7 +130,7 @@ extern void dccp_send_delayed_ack(struct sock *sk);
 extern void dccp_send_sync(struct sock *sk, const u64 seq,
 			   const enum dccp_pkt_type pkt_type);
 
-extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo);
+extern void dccp_write_xmit(struct sock *sk, int block);
 extern void dccp_write_space(struct sock *sk);
 
 extern void dccp_init_xmit_timers(struct sock *sk);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 58669beee1329..7102e3aed4ca2 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -198,7 +198,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
 	while (1) {
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 
-		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+		if (sk->sk_err)
 			goto do_error;
 		if (!*timeo)
 			goto do_nonblock;
@@ -234,37 +234,72 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
 	goto out;
 }
 
-int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
+static void dccp_write_xmit_timer(unsigned long data) {
+	struct sock *sk = (struct sock *)data;
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk))
+		sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
+	else
+		dccp_write_xmit(sk, 0);
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+void dccp_write_xmit(struct sock *sk, int block)
 {
-	const struct dccp_sock *dp = dccp_sk(sk);
-	int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct sk_buff *skb;
+	long timeo = 30000; 	/* If a packet is taking longer than 2 secs
+				   we have other issues */
+
+	while ((skb = skb_peek(&sk->sk_write_queue))) {
+		int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
 					 skb->len);
 
-	if (err > 0)
-		err = dccp_wait_for_ccid(sk, skb, timeo);
+		if (err > 0) {
+			if (!block) {
+				sk_reset_timer(sk, &dp->dccps_xmit_timer,
+						msecs_to_jiffies(err)+jiffies);
+				break;
+			} else
+				err = dccp_wait_for_ccid(sk, skb, &timeo);
+			if (err) {
+				printk(KERN_CRIT "%s:err at dccp_wait_for_ccid"
+						 " %d\n", __FUNCTION__, err);
+				dump_stack();
+			}
+		}
 
-	if (err == 0) {
-		struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
-		const int len = skb->len;
+		skb_dequeue(&sk->sk_write_queue);
+		if (err == 0) {
+			struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+			const int len = skb->len;
 
-		if (sk->sk_state == DCCP_PARTOPEN) {
-			/* See 8.1.5.  Handshake Completion */
-			inet_csk_schedule_ack(sk);
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+			if (sk->sk_state == DCCP_PARTOPEN) {
+				/* See 8.1.5.  Handshake Completion */
+				inet_csk_schedule_ack(sk);
+				inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  inet_csk(sk)->icsk_rto,
 						  DCCP_RTO_MAX);
-			dcb->dccpd_type = DCCP_PKT_DATAACK;
-		} else if (dccp_ack_pending(sk))
-			dcb->dccpd_type = DCCP_PKT_DATAACK;
-		else
-			dcb->dccpd_type = DCCP_PKT_DATA;
-
-		err = dccp_transmit_skb(sk, skb);
-		ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
-	} else
-		kfree_skb(skb);
-
-	return err;
+				dcb->dccpd_type = DCCP_PKT_DATAACK;
+			} else if (dccp_ack_pending(sk))
+				dcb->dccpd_type = DCCP_PKT_DATAACK;
+			else
+				dcb->dccpd_type = DCCP_PKT_DATA;
+
+			err = dccp_transmit_skb(sk, skb);
+			ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
+			if (err) {
+				printk(KERN_CRIT "%s:err from "
+					         "ccid_hc_tx_packet_sent %d\n",
+					         __FUNCTION__, err);
+				dump_stack();
+			}
+		} else
+			kfree(skb);
+	}
 }
 
 int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
@@ -426,6 +461,9 @@ static inline void dccp_connect_init(struct sock *sk)
 	dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
 
 	icsk->icsk_retransmits = 0;
+	init_timer(&dp->dccps_xmit_timer);
+	dp->dccps_xmit_timer.data = (unsigned long)sk;
+	dp->dccps_xmit_timer.function = dccp_write_xmit_timer;
 }
 
 int dccp_connect(struct sock *sk)
@@ -560,8 +598,10 @@ void dccp_send_close(struct sock *sk, const int active)
 					DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
 
 	if (active) {
+		dccp_write_xmit(sk, 1);
 		dccp_skb_entail(sk, skb);
 		dccp_transmit_skb(sk, skb_clone(skb, prio));
+		/* FIXME do we need a retransmit timer here? */
 	} else
 		dccp_transmit_skb(sk, skb);
 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 6f14bb5a28d41..962df0ea31aa7 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -662,17 +662,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (rc != 0)
 		goto out_discard;
 
-	rc = dccp_write_xmit(sk, skb, &timeo);
-	/*
-	 * XXX we don't use sk_write_queue, so just discard the packet.
-	 *     Current plan however is to _use_ sk_write_queue with
-	 *     an algorith similar to tcp_sendmsg, where the main difference
-	 *     is that in DCCP we have to respect packet boundaries, so
-	 *     no coalescing of skbs.
-	 *
-	 *     This bug was _quickly_ found & fixed by just looking at an OSTRA
-	 *     generated callgraph 8) -acme
-	 */
+	skb_queue_tail(&sk->sk_write_queue, skb);
+	dccp_write_xmit(sk,0);
 out_release:
 	release_sock(sk);
 	return rc ? : len;
@@ -846,6 +837,7 @@ static int dccp_close_state(struct sock *sk)
 
 void dccp_close(struct sock *sk, long timeout)
 {
+	struct dccp_sock *dp = dccp_sk(sk);
 	struct sk_buff *skb;
 	int state;
 
@@ -862,6 +854,8 @@ void dccp_close(struct sock *sk, long timeout)
 		goto adjudge_to_death;
 	}
 
+	sk_stop_timer(sk, &dp->dccps_xmit_timer);
+
 	/*
 	 * We need to flush the recv. buffs.  We do this only on the
 	 * descriptor close, not protocol-sourced closes, because the
-- 
GitLab


From ff5dfe736dd9f6c74b206aa77c0465dfd503bdb9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 26 Aug 2006 19:17:53 -0700
Subject: [PATCH 0628/1063] [NETLINK]: remove third bogus argument from
 NLA_PUT_FLAG

This patch removes the 'value' argument from NLA_PUT_FLAG which is
unused anyway. The documentation comment was already correct so it
doesn't need an update :)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 47044da167c50..bcb27e3a312e4 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -828,7 +828,7 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype,
 #define NLA_PUT_STRING(skb, attrtype, value) \
 	NLA_PUT(skb, attrtype, strlen(value) + 1, value)
 
-#define NLA_PUT_FLAG(skb, attrtype, value) \
+#define NLA_PUT_FLAG(skb, attrtype) \
 	NLA_PUT(skb, attrtype, 0, NULL)
 
 #define NLA_PUT_MSECS(skb, attrtype, jiffies) \
-- 
GitLab


From e5d679f33900c71d1a76ba07c5b04055abd34480 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 26 Aug 2006 19:25:52 -0700
Subject: [PATCH 0629/1063] [NET]: Use SLAB_PANIC

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow.c        |  6 +-----
 net/core/neighbour.c   | 12 ++++--------
 net/core/skbuff.c      |  9 ++-------
 net/decnet/dn_route.c  | 11 +++--------
 net/ipv4/inetpeer.c    |  5 +----
 net/ipv4/ipmr.c        |  5 +----
 net/ipv4/route.c       | 10 +++-------
 net/ipv4/tcp.c         |  4 +---
 net/ipv6/ip6_fib.c     |  4 +---
 net/ipv6/route.c       | 10 +++-------
 net/xfrm/xfrm_input.c  |  4 +---
 net/xfrm/xfrm_policy.c |  4 +---
 12 files changed, 22 insertions(+), 62 deletions(-)

diff --git a/net/core/flow.c b/net/core/flow.c
index 645241165e6cf..f23e7e3865431 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -343,12 +343,8 @@ static int __init flow_cache_init(void)
 
 	flow_cachep = kmem_cache_create("flow_cache",
 					sizeof(struct flow_cache_entry),
-					0, SLAB_HWCACHE_ALIGN,
+					0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					NULL, NULL);
-
-	if (!flow_cachep)
-		panic("NET: failed to allocate flow cache slab\n");
-
 	flow_hash_shift = 10;
 	flow_lwm = 2 * flow_hash_size;
 	flow_hwm = 4 * flow_hash_size;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c0a27407f4452..a45bd2124d6b2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1339,14 +1339,10 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
 
 	if (!tbl->kmem_cachep)
-		tbl->kmem_cachep = kmem_cache_create(tbl->id,
-						     tbl->entry_size,
-						     0, SLAB_HWCACHE_ALIGN,
-						     NULL, NULL);
-
-	if (!tbl->kmem_cachep)
-		panic("cannot create neighbour cache");
-
+		tbl->kmem_cachep =
+			kmem_cache_create(tbl->id, tbl->entry_size, 0,
+					  SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+					  NULL, NULL);
 	tbl->stats = alloc_percpu(struct neigh_statistics);
 	if (!tbl->stats)
 		panic("cannot create neighbour cache statistics");
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8a476f1956e58..c448c7f6fde2a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2046,19 +2046,14 @@ void __init skb_init(void)
 	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
 					      sizeof(struct sk_buff),
 					      0,
-					      SLAB_HWCACHE_ALIGN,
+					      SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					      NULL, NULL);
-	if (!skbuff_head_cache)
-		panic("cannot create skbuff cache");
-
 	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
 						(2*sizeof(struct sk_buff)) +
 						sizeof(atomic_t),
 						0,
-						SLAB_HWCACHE_ALIGN,
+						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 						NULL, NULL);
-	if (!skbuff_fclone_cache)
-		panic("cannot create skbuff cache");
 }
 
 EXPORT_SYMBOL(___pskb_trim);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index c5daf3557c1f1..dd0761e3d280c 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1781,14 +1781,9 @@ void __init dn_route_init(void)
 {
 	int i, goal, order;
 
-	dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache",
-						   sizeof(struct dn_route),
-						   0, SLAB_HWCACHE_ALIGN,
-						   NULL, NULL);
-
-	if (!dn_dst_ops.kmem_cachep)
-		panic("DECnet: Failed to allocate dn_dst_cache\n");
-
+	dn_dst_ops.kmem_cachep =
+		kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 	init_timer(&dn_route_timer);
 	dn_route_timer.function = dn_dst_check_expire;
 	dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 03ff62ebcfeb5..a675602ef2953 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -126,12 +126,9 @@ void __init inet_initpeers(void)
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),
-			0, SLAB_HWCACHE_ALIGN,
+			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 			NULL, NULL);
 
-	if (!peer_cachep)
-		panic("cannot create inet_peer_cache");
-
 	/* All the timers, started at system startup tend
 	   to synchronize. Perturb it a bit.
 	 */
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 98f0aa0d42160..ba49588da2424 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1900,11 +1900,8 @@ void __init ip_mr_init(void)
 {
 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
 				       sizeof(struct mfc_cache),
-				       0, SLAB_HWCACHE_ALIGN,
+				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 				       NULL, NULL);
-	if (!mrt_cachep)
-		panic("cannot allocate ip_mrt_cache");
-
 	init_timer(&ipmr_expire_timer);
 	ipmr_expire_timer.function=ipmr_expire_process;
 	register_netdevice_notifier(&ip_mr_notifier);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a4d4cb85a16c2..20ffe8e88c0f0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3147,13 +3147,9 @@ int __init ip_rt_init(void)
 	}
 #endif
 
-	ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache",
-						     sizeof(struct rtable),
-						     0, SLAB_HWCACHE_ALIGN,
-						     NULL, NULL);
-
-	if (!ipv4_dst_ops.kmem_cachep)
-		panic("IP: failed to allocate ip_dst_cache\n");
+	ipv4_dst_ops.kmem_cachep =
+		kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
 	rt_hash_table = (struct rt_hash_bucket *)
 		alloc_large_system_hash("IP route cache",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e570db4d33c84..29e3d606db78d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2254,9 +2254,7 @@ void __init tcp_init(void)
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
-				  SLAB_HWCACHE_ALIGN, NULL, NULL);
-	if (!tcp_hashinfo.bind_bucket_cachep)
-		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
 	/* Size and allocate the main established and bind bucket
 	 * hash tables.
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index fbca60950b147..8fcae7a6510b9 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1472,10 +1472,8 @@ void __init fib6_init(void)
 {
 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
 					   sizeof(struct fib6_node),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL, NULL);
-	if (!fib6_node_kmem)
-		panic("cannot create fib6_nodes cache");
 
 	fib6_tables_init();
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d83844d9499bf..ba1b3d11865e4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2419,13 +2419,9 @@ void __init ip6_route_init(void)
 {
 	struct proc_dir_entry *p;
 
-	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
-						     sizeof(struct rt6_info),
-						     0, SLAB_HWCACHE_ALIGN,
-						     NULL, NULL);
-	if (!ip6_dst_ops.kmem_cachep)
-		panic("cannot create ip6_dst_cache");
-
+	ip6_dst_ops.kmem_cachep =
+		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 	fib6_init();
 #ifdef 	CONFIG_PROC_FS
 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 891a6090cc099..dfc90bb1cf1f5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -82,8 +82,6 @@ void __init xfrm_input_init(void)
 {
 	secpath_cachep = kmem_cache_create("secpath_cache",
 					   sizeof(struct sec_path),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL, NULL);
-	if (!secpath_cachep)
-		panic("XFRM: failed to allocate secpath_cache\n");
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1cf3209cdf4b3..7db1c48537f0d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1985,10 +1985,8 @@ static void __init xfrm_policy_init(void)
 
 	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
 					   sizeof(struct xfrm_dst),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL, NULL);
-	if (!xfrm_dst_cache)
-		panic("XFRM: failed to allocate xfrm_dst_cache\n");
 
 	hmask = 8 - 1;
 	sz = (hmask+1) * sizeof(struct hlist_head);
-- 
GitLab


From 6a28ec8cd0c6993a4ac0d52f4347f7ed077b5cac Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sat, 26 Aug 2006 19:48:49 -0700
Subject: [PATCH 0630/1063] [NETFILTER]: Fix nf_conntrack_ftp.c build.

Noticed by Adrian Bunk.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_ftp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 9dccb40398890..0c17a5bd112bb 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -21,6 +21,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/ctype.h>
+#include <linux/inet.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
 
@@ -114,7 +115,8 @@ static struct ftp_search {
 static int
 get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term)
 {
-	int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), dst, term, &end);
+	const char *end;
+	int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), (u8 *)dst, term, &end);
 	if (ret > 0)
 		return (int)(end - src);
 	return 0;
-- 
GitLab


From 25030a7f9eeab2dcefff036469e0e2b4f956198f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sat, 26 Aug 2006 20:06:05 -0700
Subject: [PATCH 0631/1063] [UDP]: Unify UDPv4 and UDPv6 ->get_port()

This patch creates one common function which is called by
udp_v4_get_port() and udp_v6_get_port(). As a result,
  * duplicated code is removed
  * udp_port_rover and local port lookup can now be removed from udp.h
  * further savings follow since the same function will be used by UDP-Litev4
    and UDP-Litev6

In contrast to the patch sent in response to Yoshifujis comments
(fixed by this variant), the code below also removes the
EXPORT_SYMBOL(udp_port_rover), since udp_port_rover can now remain
local to net/ipv4/udp.c.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 18 +--------
 net/ipv4/udp.c    | 96 +++++++++++++++++++++++++++++------------------
 net/ipv6/udp.c    | 76 +------------------------------------
 3 files changed, 64 insertions(+), 126 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index 766fba1369ce5..c490a0f662acf 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -30,25 +30,9 @@
 
 #define UDP_HTABLE_SIZE		128
 
-/* udp.c: This needs to be shared by v4 and v6 because the lookup
- *        and hashing code needs to work with different AF's yet
- *        the port space is shared.
- */
 extern struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 extern rwlock_t udp_hash_lock;
 
-extern int udp_port_rover;
-
-static inline int udp_lport_inuse(u16 num)
-{
-	struct sock *sk;
-	struct hlist_node *node;
-
-	sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
-		if (inet_sk(sk)->num == num)
-			return 1;
-	return 0;
-}
 
 /* Note: this must match 'valbool' in sock_setsockopt */
 #define UDP_CSUM_NOXMIT		1
@@ -63,6 +47,8 @@ extern struct proto udp_prot;
 
 struct sk_buff;
 
+extern int	udp_get_port(struct sock *sk, unsigned short snum,
+			     int (*saddr_cmp)(struct sock *, struct sock *));
 extern void	udp_err(struct sk_buff *, u32);
 
 extern int	udp_sendmsg(struct kiocb *iocb, struct sock *sk,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 514c1e9ae8103..7552b50bcd842 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -118,14 +118,34 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
 
-/* Shared by v4/v6 udp. */
+/* Shared by v4/v6 udp_get_port */
 int udp_port_rover;
 
-static int udp_v4_get_port(struct sock *sk, unsigned short snum)
+static inline int udp_lport_inuse(u16 num)
 {
+	struct sock *sk;
 	struct hlist_node *node;
+
+	sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
+		if (inet_sk(sk)->num == num)
+			return 1;
+	return 0;
+}
+
+/**
+ *  udp_get_port  -  common port lookup for IPv4 and IPv6
+ *
+ *  @sk:          socket struct in question
+ *  @snum:        port number to look up
+ *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
+ */
+int udp_get_port(struct sock *sk, unsigned short snum,
+		 int (*saddr_cmp)(struct sock *sk1, struct sock *sk2))
+{
+	struct hlist_node *node;
+	struct hlist_head *head;
 	struct sock *sk2;
-	struct inet_sock *inet = inet_sk(sk);
+	int    error = 1;
 
 	write_lock_bh(&udp_hash_lock);
 	if (snum == 0) {
@@ -137,11 +157,10 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
 		best_size_so_far = 32767;
 		best = result = udp_port_rover;
 		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
-			struct hlist_head *list;
 			int size;
 
-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
-			if (hlist_empty(list)) {
+			head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+			if (hlist_empty(head)) {
 				if (result > sysctl_local_port_range[1])
 					result = sysctl_local_port_range[0] +
 						((result - sysctl_local_port_range[0]) &
@@ -149,12 +168,11 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
 				goto gotit;
 			}
 			size = 0;
-			sk_for_each(sk2, node, list)
-				if (++size >= best_size_so_far)
-					goto next;
-			best_size_so_far = size;
-			best = result;
-		next:;
+			sk_for_each(sk2, node, head)
+				if (++size < best_size_so_far) {
+					best_size_so_far = size;
+					best = result;
+				}
 		}
 		result = best;
 		for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
@@ -170,38 +188,44 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
 gotit:
 		udp_port_rover = snum = result;
 	} else {
-		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
-			struct inet_sock *inet2 = inet_sk(sk2);
-
-			if (inet2->num == snum &&
-			    sk2 != sk &&
-			    !ipv6_only_sock(sk2) &&
-			    (!sk2->sk_bound_dev_if ||
-			     !sk->sk_bound_dev_if ||
-			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-			    (!inet2->rcv_saddr ||
-			     !inet->rcv_saddr ||
-			     inet2->rcv_saddr == inet->rcv_saddr) &&
-			    (!sk2->sk_reuse || !sk->sk_reuse))
+		head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+
+		sk_for_each(sk2, node, head)
+			if (inet_sk(sk2)->num == snum                        &&
+			    sk2 != sk                                        &&
+			    (!sk2->sk_reuse        || !sk->sk_reuse)         &&
+			    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+			     || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+			    (*saddr_cmp)(sk, sk2)                              )
 				goto fail;
-		}
 	}
-	inet->num = snum;
+	inet_sk(sk)->num = snum;
 	if (sk_unhashed(sk)) {
-		struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
-
-		sk_add_node(sk, h);
+		head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+		sk_add_node(sk, head);
 		sock_prot_inc_use(sk->sk_prot);
 	}
-	write_unlock_bh(&udp_hash_lock);
-	return 0;
-
+	error = 0;
 fail:
 	write_unlock_bh(&udp_hash_lock);
-	return 1;
+	return error;
+}
+
+static inline int  ipv4_rcv_saddr_equal(struct sock *sk1, struct sock *sk2)
+{
+	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
+
+	return 	( !ipv6_only_sock(sk2)  &&
+		  (!inet1->rcv_saddr || !inet2->rcv_saddr ||
+		   inet1->rcv_saddr == inet2->rcv_saddr      ));
+}
+
+static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+	return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
 }
 
+
 static void udp_v4_hash(struct sock *sk)
 {
 	BUG();
@@ -1596,7 +1620,7 @@ EXPORT_SYMBOL(udp_disconnect);
 EXPORT_SYMBOL(udp_hash);
 EXPORT_SYMBOL(udp_hash_lock);
 EXPORT_SYMBOL(udp_ioctl);
-EXPORT_SYMBOL(udp_port_rover);
+EXPORT_SYMBOL(udp_get_port);
 EXPORT_SYMBOL(udp_prot);
 EXPORT_SYMBOL(udp_sendmsg);
 EXPORT_SYMBOL(udp_poll);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b9cc55ccb0004..9662561701d1c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -61,81 +61,9 @@
 
 DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
 
-/* Grrr, addr_type already calculated by caller, but I don't want
- * to add some silly "cookie" argument to this method just for that.
- */
-static int udp_v6_get_port(struct sock *sk, unsigned short snum)
+static inline int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
-	struct sock *sk2;
-	struct hlist_node *node;
-
-	write_lock_bh(&udp_hash_lock);
-	if (snum == 0) {
-		int best_size_so_far, best, result, i;
-
-		if (udp_port_rover > sysctl_local_port_range[1] ||
-		    udp_port_rover < sysctl_local_port_range[0])
-			udp_port_rover = sysctl_local_port_range[0];
-		best_size_so_far = 32767;
-		best = result = udp_port_rover;
-		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
-			int size;
-			struct hlist_head *list;
-
-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
-			if (hlist_empty(list)) {
-				if (result > sysctl_local_port_range[1])
-					result = sysctl_local_port_range[0] +
-						((result - sysctl_local_port_range[0]) &
-						 (UDP_HTABLE_SIZE - 1));
-				goto gotit;
-			}
-			size = 0;
-			sk_for_each(sk2, node, list)
-				if (++size >= best_size_so_far)
-					goto next;
-			best_size_so_far = size;
-			best = result;
-		next:;
-		}
-		result = best;
-		for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
-			if (result > sysctl_local_port_range[1])
-				result = sysctl_local_port_range[0]
-					+ ((result - sysctl_local_port_range[0]) &
-					   (UDP_HTABLE_SIZE - 1));
-			if (!udp_lport_inuse(result))
-				break;
-		}
-		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
-			goto fail;
-gotit:
-		udp_port_rover = snum = result;
-	} else {
-		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
-			if (inet_sk(sk2)->num == snum &&
-			    sk2 != sk &&
-			    (!sk2->sk_bound_dev_if ||
-			     !sk->sk_bound_dev_if ||
-			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-			    (!sk2->sk_reuse || !sk->sk_reuse) &&
-			    ipv6_rcv_saddr_equal(sk, sk2))
-				goto fail;
-		}
-	}
-
-	inet_sk(sk)->num = snum;
-	if (sk_unhashed(sk)) {
-		sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
-		sock_prot_inc_use(sk->sk_prot);
-	}
-	write_unlock_bh(&udp_hash_lock);
-	return 0;
-
-fail:
-	write_unlock_bh(&udp_hash_lock);
-	return 1;
+	return udp_get_port(sk, snum, ipv6_rcv_saddr_equal);
 }
 
 static void udp_v6_hash(struct sock *sk)
-- 
GitLab


From bed53ea7fef37820b7c92ad74feff1b817c6aae3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sat, 26 Aug 2006 20:06:49 -0700
Subject: [PATCH 0632/1063] [UDP]: Mark udp_port_rover static.

It is not referenced outside of net/ipv4/udp.c any longer.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7552b50bcd842..aa1823050b00c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -118,8 +118,7 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
 
-/* Shared by v4/v6 udp_get_port */
-int udp_port_rover;
+static int udp_port_rover;
 
 static inline int udp_lport_inuse(u16 num)
 {
-- 
GitLab


From e3b4eadbea77ecb3c3a74d1bc81b392f454c7f2e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sat, 26 Aug 2006 20:10:15 -0700
Subject: [PATCH 0633/1063] [UDP]: saddr_cmp function should take const socket
 pointers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This also kills a warning while building ipv6:

net/ipv6/udp.c: In function ‘udp_v6_get_port’:
net/ipv6/udp.c:66: warning: passing argument 3 of ‘udp_get_port’ from incompatible pointer type

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 2 +-
 net/ipv4/udp.c    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index c490a0f662acf..db0c05f675469 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -48,7 +48,7 @@ extern struct proto udp_prot;
 struct sk_buff;
 
 extern int	udp_get_port(struct sock *sk, unsigned short snum,
-			     int (*saddr_cmp)(struct sock *, struct sock *));
+			     int (*saddr_cmp)(const struct sock *, const struct sock *));
 extern void	udp_err(struct sk_buff *, u32);
 
 extern int	udp_sendmsg(struct kiocb *iocb, struct sock *sk,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index aa1823050b00c..77e265d7bb8f8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -139,7 +139,7 @@ static inline int udp_lport_inuse(u16 num)
  *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
  */
 int udp_get_port(struct sock *sk, unsigned short snum,
-		 int (*saddr_cmp)(struct sock *sk1, struct sock *sk2))
+		 int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2))
 {
 	struct hlist_node *node;
 	struct hlist_head *head;
@@ -210,7 +210,7 @@ int udp_get_port(struct sock *sk, unsigned short snum,
 	return error;
 }
 
-static inline int  ipv4_rcv_saddr_equal(struct sock *sk1, struct sock *sk2)
+static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
 {
 	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
 
-- 
GitLab


From a5531a5d852008be40811496029012f4ad3093d1 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 26 Aug 2006 20:11:47 -0700
Subject: [PATCH 0634/1063] [NETLINK]: Improve string attribute validation

Introduces a new attribute type NLA_NUL_STRING to support NUL
terminated strings. Attributes of this kind require to carry
a terminating NUL within the maximum specified in the policy.

The `old' NLA_STRING which is not required to be NUL terminated
is extended to provide means to specify a maximum length of the
string.

Aims at easing the pain with using nla_strlcpy() on temporary
buffers.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 15 +++++++++----
 net/netlink/attr.c    | 49 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index bcb27e3a312e4..11dc2e7f679ab 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -167,6 +167,7 @@ enum {
 	NLA_FLAG,
 	NLA_MSECS,
 	NLA_NESTED,
+	NLA_NUL_STRING,
 	__NLA_TYPE_MAX,
 };
 
@@ -175,21 +176,27 @@ enum {
 /**
  * struct nla_policy - attribute validation policy
  * @type: Type of attribute or NLA_UNSPEC
- * @minlen: Minimal length of payload required to be available
+ * @len: Type specific length of payload
  *
  * Policies are defined as arrays of this struct, the array must be
  * accessible by attribute type up to the highest identifier to be expected.
  *
+ * Meaning of `len' field:
+ *    NLA_STRING           Maximum length of string
+ *    NLA_NUL_STRING       Maximum length of string (excluding NUL)
+ *    NLA_FLAG             Unused
+ *    All other            Exact length of attribute payload
+ *
  * Example:
  * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
- *	[ATTR_BAR] = { .type = NLA_STRING },
- *	[ATTR_BAZ] = { .minlen = sizeof(struct mystruct) },
+ *	[ATTR_BAR] = { .type = NLA_STRING, len = BARSIZ },
+ *	[ATTR_BAZ] = { .len = sizeof(struct mystruct) },
  * };
  */
 struct nla_policy {
 	u16		type;
-	u16		minlen;
+	u16		len;
 };
 
 /**
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index 136e529e5780c..004139557e095 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -20,7 +20,6 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
 	[NLA_U16]	= sizeof(u16),
 	[NLA_U32]	= sizeof(u32),
 	[NLA_U64]	= sizeof(u64),
-	[NLA_STRING]	= 1,
 	[NLA_NESTED]	= NLA_HDRLEN,
 };
 
@@ -28,7 +27,7 @@ static int validate_nla(struct nlattr *nla, int maxtype,
 			struct nla_policy *policy)
 {
 	struct nla_policy *pt;
-	int minlen = 0;
+	int minlen = 0, attrlen = nla_len(nla);
 
 	if (nla->nla_type <= 0 || nla->nla_type > maxtype)
 		return 0;
@@ -37,16 +36,46 @@ static int validate_nla(struct nlattr *nla, int maxtype,
 
 	BUG_ON(pt->type > NLA_TYPE_MAX);
 
-	if (pt->minlen)
-		minlen = pt->minlen;
-	else if (pt->type != NLA_UNSPEC)
-		minlen = nla_attr_minlen[pt->type];
+	switch (pt->type) {
+	case NLA_FLAG:
+		if (attrlen > 0)
+			return -ERANGE;
+		break;
 
-	if (pt->type == NLA_FLAG && nla_len(nla) > 0)
-		return -ERANGE;
+	case NLA_NUL_STRING:
+		if (pt->len)
+			minlen = min_t(int, attrlen, pt->len + 1);
+		else
+			minlen = attrlen;
 
-	if (nla_len(nla) < minlen)
-		return -ERANGE;
+		if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL)
+			return -EINVAL;
+		/* fall through */
+
+	case NLA_STRING:
+		if (attrlen < 1)
+			return -ERANGE;
+
+		if (pt->len) {
+			char *buf = nla_data(nla);
+
+			if (buf[attrlen - 1] == '\0')
+				attrlen--;
+
+			if (attrlen > pt->len)
+				return -ERANGE;
+		}
+		break;
+
+	default:
+		if (pt->len)
+			minlen = pt->len;
+		else if (pt->type != NLA_UNSPEC)
+			minlen = nla_attr_minlen[pt->type];
+
+		if (attrlen < minlen)
+			return -ERANGE;
+	}
 
 	return 0;
 }
-- 
GitLab


From 5176f91ea83f1a59eba4dba88634a4729d51d1ac Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 26 Aug 2006 20:13:18 -0700
Subject: [PATCH 0635/1063] [NETLINK]: Make use of NLA_STRING/NLA_NUL_STRING
 attribute validation

Converts existing NLA_STRING attributes to use the new
validation features, saving a couple of temporary buffers.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c    |  8 +-------
 net/core/rtnetlink.c    |  9 ++++-----
 net/decnet/dn_rules.c   |  2 +-
 net/ipv4/devinet.c      |  2 +-
 net/ipv4/fib_frontend.c |  2 +-
 net/ipv4/fib_rules.c    |  2 +-
 net/ipv6/fib6_rules.c   |  6 +++---
 net/ipv6/route.c        |  2 +-
 net/netlink/genetlink.c | 10 ++++------
 9 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 7b2e9bb1a605d..a99d87d82b7f1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -161,9 +161,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (err < 0)
 		goto errout;
 
-	if (tb[FRA_IFNAME] && nla_len(tb[FRA_IFNAME]) > IFNAMSIZ)
-		goto errout;
-
 	rule = kzalloc(ops->rule_size, GFP_KERNEL);
 	if (rule == NULL) {
 		err = -ENOMEM;
@@ -177,10 +174,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		struct net_device *dev;
 
 		rule->ifindex = -1;
-		if (nla_strlcpy(rule->ifname, tb[FRA_IFNAME],
-				IFNAMSIZ) >= IFNAMSIZ)
-			goto errout_free;
-
+		nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
 		dev = __dev_get_by_name(rule->ifname);
 		if (dev)
 			rule->ifindex = dev->ifindex;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8f225499e32e1..0ebcf8488e994 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -371,8 +371,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 }
 
 static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = {
-	[IFLA_IFNAME]		= { .type = NLA_STRING },
-	[IFLA_MAP]		= { .minlen = sizeof(struct rtnl_link_ifmap) },
+	[IFLA_IFNAME]		= { .type = NLA_STRING, .len = IFNAMSIZ-1 },
+	[IFLA_MAP]		= { .len = sizeof(struct rtnl_link_ifmap) },
 	[IFLA_MTU]		= { .type = NLA_U32 },
 	[IFLA_TXQLEN]		= { .type = NLA_U32 },
 	[IFLA_WEIGHT]		= { .type = NLA_U32 },
@@ -392,9 +392,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if (err < 0)
 		goto errout;
 
-	if (tb[IFLA_IFNAME] &&
-	    nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ) >= IFNAMSIZ)
-		return -EINVAL;
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
 
 	err = -EINVAL;
 	ifm = nlmsg_data(nlh);
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 63ad63dfd252c..3e0c882c90bfa 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -112,7 +112,7 @@ static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
 }
 
 static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
-	[FRA_IFNAME]	= { .type = NLA_STRING },
+	[FRA_IFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 	[FRA_PRIORITY]	= { .type = NLA_U32 },
 	[FRA_SRC]	= { .type = NLA_U16 },
 	[FRA_DST]	= { .type = NLA_U16 },
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 0487677729cf1..8e8d1f17d77a7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -85,7 +85,7 @@ static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
 	[IFA_ANYCAST]   	= { .type = NLA_U32 },
-	[IFA_LABEL]     	= { .type = NLA_STRING },
+	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 };
 
 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d0abeab16e663..cfb527c060e48 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -462,7 +462,7 @@ struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
 	[RTA_PRIORITY]		= { .type = NLA_U32 },
 	[RTA_PREFSRC]		= { .type = NLA_U32 },
 	[RTA_METRICS]		= { .type = NLA_NESTED },
-	[RTA_MULTIPATH]		= { .minlen = sizeof(struct rtnexthop) },
+	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
 	[RTA_FLOW]		= { .type = NLA_U32 },
 	[RTA_MP_ALGO]		= { .type = NLA_U32 },
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 280f424ca9c9c..52b2adae4f223 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -179,7 +179,7 @@ static struct fib_table *fib_empty_table(void)
 }
 
 static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
-	[FRA_IFNAME]	= { .type = NLA_STRING },
+	[FRA_IFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 	[FRA_PRIORITY]	= { .type = NLA_U32 },
 	[FRA_SRC]	= { .type = NLA_U32 },
 	[FRA_DST]	= { .type = NLA_U32 },
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2fbc71d901874..34f5bfaddfc29 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -137,10 +137,10 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 }
 
 static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = {
-	[FRA_IFNAME]	= { .type = NLA_STRING },
+	[FRA_IFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 	[FRA_PRIORITY]	= { .type = NLA_U32 },
-	[FRA_SRC]	= { .minlen = sizeof(struct in6_addr) },
-	[FRA_DST]	= { .minlen = sizeof(struct in6_addr) },
+	[FRA_SRC]	= { .len = sizeof(struct in6_addr) },
+	[FRA_DST]	= { .len = sizeof(struct in6_addr) },
 	[FRA_FWMARK]	= { .type = NLA_U32 },
 	[FRA_FWMASK]	= { .type = NLA_U32 },
 	[FRA_TABLE]	= { .type = NLA_U32 },
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ba1b3d11865e4..75f4bb9611cec 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1865,7 +1865,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu)
 }
 
 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
-	[RTA_GATEWAY]           = { .minlen = sizeof(struct in6_addr) },
+	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
 	[RTA_OIF]               = { .type = NLA_U32 },
 	[RTA_IIF]		= { .type = NLA_U32 },
 	[RTA_PRIORITY]          = { .type = NLA_U32 },
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index d32599116c567..3ac942cdb677e 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -455,7 +455,8 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
 
 static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = {
 	[CTRL_ATTR_FAMILY_ID]	= { .type = NLA_U16 },
-	[CTRL_ATTR_FAMILY_NAME]	= { .type = NLA_STRING },
+	[CTRL_ATTR_FAMILY_NAME]	= { .type = NLA_NUL_STRING,
+				    .len = GENL_NAMSIZ - 1 },
 };
 
 static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
@@ -470,12 +471,9 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (info->attrs[CTRL_ATTR_FAMILY_NAME]) {
-		char name[GENL_NAMSIZ];
-
-		if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME],
-				GENL_NAMSIZ) >= GENL_NAMSIZ)
-			goto errout;
+		char *name;
 
+		name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]);
 		res = genl_family_find_byname(name);
 	}
 
-- 
GitLab


From 33cc48966827165e49de1cb8ff4fb57c127d4be0 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 28 Aug 2006 13:19:30 -0700
Subject: [PATCH 0636/1063] [IPV6] ROUTE: Fix dst reference counting in
 ip6_pol_route_lookup().

In ip6_pol_route_lookup(), when we finish backtracking at the
top-level root entry, we need to hold it.

Bug noticed by Mitsuru Chinen <CHINEN@jp.ibm.com>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 75f4bb9611cec..d6b4b4f48d18c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -510,8 +510,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
 	rt = fn->leaf;
 	rt = rt6_device_match(rt, fl->oif, flags);
 	BACKTRACK(&fl->fl6_src);
-	dst_hold(&rt->u.dst);
 out:
+	dst_hold(&rt->u.dst);
 	read_unlock_bh(&table->tb6_lock);
 
 	rt->u.dst.lastuse = jiffies;
-- 
GitLab


From 0719bdf1b5e7eb0d9c3c73ebbd9c9d5d382bb9e1 Mon Sep 17 00:00:00 2001
From: Benoit Boissinot <benoit.boissinot@ens-lyon.org>
Date: Mon, 28 Aug 2006 17:50:37 -0700
Subject: [PATCH 0637/1063] [NETFILTER]: xt_CONNMARK.c build fix

net/netfilter/xt_CONNMARK.c: In function 'target':
net/netfilter/xt_CONNMARK.c:59: warning: implicit declaration of
function 'nf_conntrack_event_cache'

The warning is due to the following .config:
CONFIG_IP_NF_CONNTRACK=m
CONFIG_IP_NF_CONNTRACK_MARK=y
# CONFIG_IP_NF_CONNTRACK_EVENTS is not set
CONFIG_IP_NF_CONNTRACK_NETLINK=m

This change was introduced by:
http://www.kernel.org/git/?p=linux/kernel/git/davem/net-2.6.19.git;a=commit;h=76e4b41009b8a2e9dd246135cf43c7fe39553aa5

Proposed solution (based on the define in
include/net/netfilter/nf_conntrack_compat.h:

Signed-off-by: Benoit Boissinot <benoit.boissinot@ens-lyon.org>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_CONNMARK.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 0e4249ddc17bb..6ccb45ee08802 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -53,7 +53,7 @@ target(struct sk_buff **pskb,
 			newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
 			if (newmark != *ctmark) {
 				*ctmark = newmark;
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
 				ip_conntrack_event_cache(IPCT_MARK, *pskb);
 #else
 				nf_conntrack_event_cache(IPCT_MARK, *pskb);
@@ -65,7 +65,7 @@ target(struct sk_buff **pskb,
 				  ((*pskb)->nfmark & markinfo->mask);
 			if (*ctmark != newmark) {
 				*ctmark = newmark;
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
 				ip_conntrack_event_cache(IPCT_MARK, *pskb);
 #else
 				nf_conntrack_event_cache(IPCT_MARK, *pskb);
-- 
GitLab


From def42ff4dd6f54ebcf78192579a8ff1f81d8e2e8 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 28 Aug 2006 23:57:56 -0700
Subject: [PATCH 0638/1063] [IPV4]: Make struct in_addr::s_addr __be32

There will be relatively small increase in sparse endian warnings, but
this (and sin_port) patch is a first step to make networking code
endian clean.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/in.h b/include/linux/in.h
index 94f557fa46369..9a9d5dd32e737 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -52,7 +52,7 @@ enum {
 
 /* Internet address. */
 struct in_addr {
-	__u32	s_addr;
+	__be32	s_addr;
 };
 
 #define IP_TOS		1
-- 
GitLab


From cd360007a0eb8cbf17c006cca42aa884d33f96be Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 28 Aug 2006 23:58:32 -0700
Subject: [PATCH 0639/1063] [IPV4]: Make struct sockaddr_in::sin_port __be16

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/in.h b/include/linux/in.h
index 9a9d5dd32e737..bcaca8399aed6 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -177,7 +177,7 @@ struct in_pktinfo
 #define __SOCK_SIZE__	16		/* sizeof(struct sockaddr)	*/
 struct sockaddr_in {
   sa_family_t		sin_family;	/* Address family		*/
-  unsigned short int	sin_port;	/* Port number			*/
+  __be16		sin_port;	/* Port number			*/
   struct in_addr	sin_addr;	/* Internet address		*/
 
   /* Pad to size of `struct sockaddr'. */
-- 
GitLab


From 07317621d004e8e6967f2dac8562825267e56135 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 29 Aug 2006 17:48:17 -0700
Subject: [PATCH 0640/1063] [NETFILTER] bridge: code rearrangement for clarity

Cleanup and rearrangement for better style and clarity:
	Split the function nf_bridge_maybe_copy_header into two pieces
	Move copy portion out of line.
	Use Ethernet header size macros.
	Use header file to handle CONFIG_NETFILTER_BRIDGE differences

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_bridge.h | 26 +++++++-------------------
 net/bridge/br_forward.c          |  5 +----
 net/bridge/br_netfilter.c        | 27 +++++++++++++++++++++++++--
 3 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 427c67ff89e98..274fe4b331555 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -47,26 +47,12 @@ enum nf_br_hook_priorities {
 
 
 /* Only used in br_forward.c */
-static inline
-int nf_bridge_maybe_copy_header(struct sk_buff *skb)
+extern int nf_bridge_copy_header(struct sk_buff *skb);
+static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
 {
-	int err;
-
-	if (skb->nf_bridge) {
-		if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
-			err = skb_cow(skb, 18);
-			if (err)
-				return err;
-			memcpy(skb->data - 18, skb->nf_bridge->data, 18);
-			skb_push(skb, 4);
-		} else {
-			err = skb_cow(skb, 16);
-			if (err)
-				return err;
-			memcpy(skb->data - 16, skb->nf_bridge->data, 16);
-		}
-	}
-	return 0;
+	if (skb->nf_bridge)
+		return nf_bridge_copy_header(skb);
+  	return 0;
 }
 
 /* This is called by the IP fragmenting code and it ensures there is
@@ -90,6 +76,8 @@ struct bridge_skb_cb {
 };
 
 extern int brnf_deferred_hooks;
+#else
+#define nf_bridge_maybe_copy_header(skb)	(0)
 #endif /* CONFIG_BRIDGE_NETFILTER */
 
 #endif /* __KERNEL__ */
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 864fbbc7b24d0..191b861e5e53c 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -38,13 +38,10 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 	if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
 		kfree_skb(skb);
 	else {
-#ifdef CONFIG_BRIDGE_NETFILTER
 		/* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
 		if (nf_bridge_maybe_copy_header(skb))
 			kfree_skb(skb);
-		else
-#endif
-		{
+		else {
 			skb_push(skb, ETH_HLEN);
 
 			dev_queue_xmit(skb);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 05b3de8882436..b498efcfe451b 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -127,14 +127,37 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
 
 static inline void nf_bridge_save_header(struct sk_buff *skb)
 {
-        int header_size = 16;
+        int header_size = ETH_HLEN;
 
 	if (skb->protocol == htons(ETH_P_8021Q))
-		header_size = 18;
+		header_size += VLAN_HLEN;
 
 	memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
 }
 
+/*
+ * When forwarding bridge frames, we save a copy of the original
+ * header before processing.
+ */
+int nf_bridge_copy_header(struct sk_buff *skb)
+{
+	int err;
+        int header_size = ETH_HLEN;
+
+	if (skb->protocol == htons(ETH_P_8021Q))
+		header_size += VLAN_HLEN;
+
+	err = skb_cow(skb, header_size);
+	if (err)
+		return err;
+
+	memcpy(skb->data - header_size, skb->nf_bridge->data, header_size);
+
+	if (skb->protocol == htons(ETH_P_8021Q))
+		__skb_push(skb, VLAN_HLEN);
+	return 0;
+}
+
 /* PF_BRIDGE/PRE_ROUTING *********************************************/
 /* Undo the changes made for ip6tables PREROUTING and continue the
  * bridge PRE_ROUTING hook. */
-- 
GitLab


From 9bcfcaf5e9cc887eb39236e43bdbe4b4b2572229 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 29 Aug 2006 17:48:57 -0700
Subject: [PATCH 0641/1063] [NETFILTER] bridge: simplify nf_bridge_pad

Do some simple optimization on the nf_bridge_pad() function
and don't use magic constants. Eliminate a double call and
the #ifdef'd code for CONFIG_BRIDGE_NETFILTER.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_bridge.h | 16 +++++-----------
 net/ipv4/ip_output.c             | 15 +++++++--------
 2 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 274fe4b331555..9a4dd11af86ec 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -5,9 +5,8 @@
  */
 
 #include <linux/netfilter.h>
-#if defined(__KERNEL__) && defined(CONFIG_BRIDGE_NETFILTER)
 #include <linux/if_ether.h>
-#endif
+#include <linux/if_vlan.h>
 
 /* Bridge Hooks */
 /* After promisc drops, checksum checks. */
@@ -57,16 +56,10 @@ static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
 
 /* This is called by the IP fragmenting code and it ensures there is
  * enough room for the encapsulating header (if there is one). */
-static inline
-int nf_bridge_pad(struct sk_buff *skb)
+static inline int nf_bridge_pad(const struct sk_buff *skb)
 {
-	if (skb->protocol == __constant_htons(ETH_P_IP))
-		return 0;
-	if (skb->nf_bridge) {
-		if (skb->protocol == __constant_htons(ETH_P_8021Q))
-			return 4;
-	}
-	return 0;
+ 	return (skb->nf_bridge && skb->protocol == htons(ETH_P_8021Q))
+		? VLAN_HLEN : 0;
 }
 
 struct bridge_skb_cb {
@@ -78,6 +71,7 @@ struct bridge_skb_cb {
 extern int brnf_deferred_hooks;
 #else
 #define nf_bridge_maybe_copy_header(skb)	(0)
+#define nf_bridge_pad(skb)			(0)
 #endif /* CONFIG_BRIDGE_NETFILTER */
 
 #endif /* __KERNEL__ */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 81b2795a4c205..97aee76fb7463 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -426,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 	int ptr;
 	struct net_device *dev;
 	struct sk_buff *skb2;
-	unsigned int mtu, hlen, left, len, ll_rs;
+	unsigned int mtu, hlen, left, len, ll_rs, pad;
 	int offset;
 	__be16 not_last_frag;
 	struct rtable *rt = (struct rtable*)skb->dst;
@@ -556,14 +556,13 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = raw + hlen;		/* Where to start from */
 
-#ifdef CONFIG_BRIDGE_NETFILTER
 	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
-	 * we need to make room for the encapsulating header */
-	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb));
-	mtu -= nf_bridge_pad(skb);
-#else
-	ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev);
-#endif
+	 * we need to make room for the encapsulating header
+	 */
+	pad = nf_bridge_pad(skb);
+	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
+	mtu -= pad;
+
 	/*
 	 *	Fragment the datagram.
 	 */
-- 
GitLab


From 8394e9b2faf539f82470b36c86f0485cab5278bd Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 29 Aug 2006 17:49:31 -0700
Subject: [PATCH 0642/1063] [NETFILTER] bridge: debug message fixes

If CONFIG_NETFILTER_DEBUG is enabled, it shouldn't change the
actions of the filtering. The message about skb->dst being NULL
is commonly triggered by dhclient, so it is useless. Make sure all
messages end in newline.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index b498efcfe451b..cf80dd0e896d5 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -718,16 +718,6 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
 	else
 		pf = PF_INET6;
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	/* Sometimes we get packets with NULL ->dst here (for example,
-	 * running a dhcp client daemon triggers this). This should now
-	 * be fixed, but let's keep the check around. */
-	if (skb->dst == NULL) {
-		printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
-		return NF_ACCEPT;
-	}
-#endif
-
 	nf_bridge = skb->nf_bridge;
 	nf_bridge->physoutdev = skb->dev;
 	realindev = nf_bridge->physindev;
@@ -809,7 +799,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 	 * keep the check just to be sure... */
 	if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
 		printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
-		       "bad mac.raw pointer.");
+		       "bad mac.raw pointer.\n");
 		goto print_error;
 	}
 #endif
@@ -827,7 +817,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 
 #ifdef CONFIG_NETFILTER_DEBUG
 	if (skb->dst == NULL) {
-		printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
+		printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n");
 		goto print_error;
 	}
 #endif
@@ -864,6 +854,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 	}
 	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
 	       skb->data);
+	dump_stack();
 	return NF_ACCEPT;
 #endif
 }
-- 
GitLab


From fc747e82b40ea50a62eb2aef55bedd4465607cb0 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Tue, 29 Aug 2006 17:50:19 -0700
Subject: [PATCH 0643/1063] [DCCP]: Tidyup CCID3 list handling

As Arnaldo Carvalho de Melo points out I should be using list_entry in case
the structure changes in future. Current code functions but is reliant
on position and requires type cast.

Noticed when doing this that I have one more variable than I needed so
removing that also.

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 090bc39e8199d..195aa95662281 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -900,7 +900,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
 static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	struct dccp_li_hist_entry *next, *head;
+	struct dccp_li_hist_entry *head;
 	u64 seq_temp;
 
 	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
@@ -908,15 +908,15 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
 		   &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
 			return;
 
-		next = (struct dccp_li_hist_entry *)
-		   hcrx->ccid3hcrx_li_hist.next;
-		next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
+		   struct dccp_li_hist_entry, dccplih_node);
+		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
 	} else {
 		struct dccp_li_hist_entry *entry;
 		struct list_head *tail;
 
-		head = (struct dccp_li_hist_entry *)
-		   hcrx->ccid3hcrx_li_hist.next;
+		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
+		   struct dccp_li_hist_entry, dccplih_node);
 		/* FIXME win count check removed as was wrong */
 		/* should make this check with receive history */
 		/* and compare there as per section 10.2 of RFC4342 */
-- 
GitLab


From 99f59ed073d3c1b890690064ab285a201dea2e35 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Tue, 29 Aug 2006 17:53:48 -0700
Subject: [PATCH 0644/1063] [NetLabel]: Correctly initialize the NetLabel
 fields.

Fix a problem where the NetLabel specific fields of the sk_security_struct
structure were not being initialized early enough in some cases.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/selinux/hooks.c                    |  6 +++
 security/selinux/include/selinux_netlabel.h | 18 +++++++++
 security/selinux/ss/services.c              | 45 ++++++++++++++++++++-
 3 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 180b26b97d2d6..5a66c4c09f7aa 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -281,6 +281,8 @@ static int sk_alloc_security(struct sock *sk, int family, gfp_t priority)
 	ssec->sid = SECINITSID_UNLABELED;
 	sk->sk_security = ssec;
 
+	selinux_netlbl_sk_security_init(ssec, family);
+
 	return 0;
 }
 
@@ -3585,6 +3587,8 @@ static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
 
 	newssec->sid = ssec->sid;
 	newssec->peer_sid = ssec->peer_sid;
+
+	selinux_netlbl_sk_clone_security(ssec, newssec);
 }
 
 static void selinux_sk_getsecid(struct sock *sk, u32 *secid)
@@ -3648,6 +3652,8 @@ static void selinux_inet_csk_clone(struct sock *newsk,
 	   new socket in sync, but we don't have the isec available yet.
 	   So we will wait until sock_graft to do it, by which
 	   time it will have been created and available. */
+
+	selinux_netlbl_sk_security_init(newsksec, req->rsk_ops->family);
 }
 
 static void selinux_req_classify_flow(const struct request_sock *req,
diff --git a/security/selinux/include/selinux_netlabel.h b/security/selinux/include/selinux_netlabel.h
index 88c463eef1e1b..d885d880540e6 100644
--- a/security/selinux/include/selinux_netlabel.h
+++ b/security/selinux/include/selinux_netlabel.h
@@ -39,6 +39,10 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 				struct avc_audit_data *ad);
 u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock);
 u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb);
+void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec,
+				     int family);
+void selinux_netlbl_sk_clone_security(struct sk_security_struct *ssec,
+				      struct sk_security_struct *newssec);
 
 int __selinux_netlbl_inode_permission(struct inode *inode, int mask);
 /**
@@ -115,6 +119,20 @@ static inline u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb)
 	return SECSID_NULL;
 }
 
+static inline void selinux_netlbl_sk_security_init(
+	                                       struct sk_security_struct *ssec,
+					       int family)
+{
+	return;
+}
+
+static inline void selinux_netlbl_sk_clone_security(
+	                                   struct sk_security_struct *ssec,
+					   struct sk_security_struct *newssec)
+{
+	return;
+}
+
 static inline int selinux_netlbl_inode_permission(struct inode *inode,
 						  int mask)
 {
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 910afa1ffc31c..835b485b2afd1 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2422,6 +2422,45 @@ static int selinux_netlbl_socket_setsid(struct socket *sock, u32 sid)
 	return rc;
 }
 
+/**
+ * selinux_netlbl_sk_security_init - Setup the NetLabel fields
+ * @ssec: the sk_security_struct
+ * @family: the socket family
+ *
+ * Description:
+ * Called when a new sk_security_struct is allocated to initialize the NetLabel
+ * fields.
+ *
+ */
+void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec,
+				     int family)
+{
+        if (family == PF_INET)
+		ssec->nlbl_state = NLBL_REQUIRE;
+	else
+		ssec->nlbl_state = NLBL_UNSET;
+}
+
+/**
+ * selinux_netlbl_sk_clone_security - Copy the NetLabel fields
+ * @ssec: the original sk_security_struct
+ * @newssec: the cloned sk_security_struct
+ *
+ * Description:
+ * Clone the NetLabel specific sk_security_struct fields from @ssec to
+ * @newssec.
+ *
+ */
+void selinux_netlbl_sk_clone_security(struct sk_security_struct *ssec,
+				      struct sk_security_struct *newssec)
+{
+	newssec->sclass = ssec->sclass;
+	if (ssec->nlbl_state != NLBL_UNSET)
+		newssec->nlbl_state = NLBL_REQUIRE;
+	else
+		newssec->nlbl_state = NLBL_UNSET;
+}
+
 /**
  * selinux_netlbl_socket_post_create - Label a socket using NetLabel
  * @sock: the socket to label
@@ -2440,10 +2479,11 @@ int selinux_netlbl_socket_post_create(struct socket *sock,
 	struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
 	struct sk_security_struct *sksec = sock->sk->sk_security;
 
+	sksec->sclass = isec->sclass;
+
 	if (sock_family != PF_INET)
 		return 0;
 
-	sksec->sclass = isec->sclass;
 	sksec->nlbl_state = NLBL_REQUIRE;
 	return selinux_netlbl_socket_setsid(sock, sid);
 }
@@ -2463,12 +2503,13 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
 	struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
 	struct sk_security_struct *sksec = sk->sk_security;
 
+	sksec->sclass = isec->sclass;
+
 	if (sk->sk_family != PF_INET)
 		return;
 
 	sksec->nlbl_state = NLBL_REQUIRE;
 	sksec->peer_sid = sksec->sid;
-	sksec->sclass = isec->sclass;
 
 	/* Try to set the NetLabel on the socket to save time later, if we fail
 	 * here we will pick up the pieces in later calls to
-- 
GitLab


From 1b7f775209bbee6b993587bae69acb9fc12ceb17 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Tue, 29 Aug 2006 17:54:17 -0700
Subject: [PATCH 0645/1063] [NetLabel]: remove unused function prototypes

Removed some older function prototypes for functions that no longer exist.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cipso_ipv4.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index c7175e7258044..5aed72ab652b4 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -200,15 +200,9 @@ static inline int cipso_v4_cache_add(const struct sk_buff *skb,
 
 #ifdef CONFIG_NETLABEL
 void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway);
-int cipso_v4_socket_setopt(struct socket *sock,
-			   unsigned char *opt,
-			   u32 opt_len);
 int cipso_v4_socket_setattr(const struct socket *sock,
 			    const struct cipso_v4_doi *doi_def,
 			    const struct netlbl_lsm_secattr *secattr);
-int cipso_v4_socket_getopt(const struct socket *sock,
-			   unsigned char **opt,
-			   u32 *opt_len);
 int cipso_v4_socket_getattr(const struct socket *sock,
 			    struct netlbl_lsm_secattr *secattr);
 int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
-- 
GitLab


From c1b14c0a46232246f61d3157bac1201e1e102227 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Tue, 29 Aug 2006 17:54:41 -0700
Subject: [PATCH 0646/1063] [NetLabel]: Comment corrections.

Fix some incorrect comments.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/selinux/ss/services.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 835b485b2afd1..4f7642c7337e6 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2617,7 +2617,7 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 }
 
 /**
- * selinux_netlbl_socket_peersid - Return the peer SID of a connected socket
+ * selinux_netlbl_socket_getpeersec_stream - Return the connected peer's SID
  * @sock: the socket
  *
  * Description:
-- 
GitLab


From 7b3bbb926f4b3dd3a007dcf8dfa00203f52cb58d Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Tue, 29 Aug 2006 17:55:11 -0700
Subject: [PATCH 0647/1063] [NetLabel]: Cleanup ebitmap_import()

Rewrite ebitmap_import() so it is a bit cleaner and easier to read.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/selinux/ss/ebitmap.c | 36 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index 4b915eb60c45d..cfed1d30fa6ad 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -145,29 +145,28 @@ int ebitmap_import(const unsigned char *src,
 		   struct ebitmap *dst)
 {
 	size_t src_off = 0;
+	size_t node_limit;
 	struct ebitmap_node *node_new;
 	struct ebitmap_node *node_last = NULL;
-	size_t iter;
-	size_t iter_bit;
-	size_t iter_limit;
+	u32 i_byte;
+	u32 i_bit;
 	unsigned char src_byte;
 
-	do {
-		iter_limit = src_len - src_off;
-		if (iter_limit >= sizeof(MAPTYPE)) {
+	while (src_off < src_len) {
+		if (src_len - src_off >= sizeof(MAPTYPE)) {
 			if (*(MAPTYPE *)&src[src_off] == 0) {
 				src_off += sizeof(MAPTYPE);
 				continue;
 			}
-			iter_limit = sizeof(MAPTYPE);
+			node_limit = sizeof(MAPTYPE);
 		} else {
-			iter = src_off;
-			src_byte = 0;
-			do {
-				src_byte |= src[iter++];
-			} while (iter < src_len && src_byte == 0);
+			for (src_byte = 0, i_byte = src_off;
+			     i_byte < src_len && src_byte == 0;
+			     i_byte++)
+				src_byte |= src[i_byte];
 			if (src_byte == 0)
 				break;
+			node_limit = src_len - src_off;
 		}
 
 		node_new = kzalloc(sizeof(*node_new), GFP_ATOMIC);
@@ -176,24 +175,21 @@ int ebitmap_import(const unsigned char *src,
 			return -ENOMEM;
 		}
 		node_new->startbit = src_off * 8;
-		iter = 0;
-		do {
+		for (i_byte = 0; i_byte < node_limit; i_byte++) {
 			src_byte = src[src_off++];
-			iter_bit = iter++ * 8;
-			while (src_byte != 0) {
+			for (i_bit = i_byte * 8; src_byte != 0; i_bit++) {
 				if (src_byte & 0x80)
-					node_new->map |= MAPBIT << iter_bit;
-				iter_bit++;
+					node_new->map |= MAPBIT << i_bit;
 				src_byte <<= 1;
 			}
-		} while (iter < iter_limit);
+		}
 
 		if (node_last != NULL)
 			node_last->next = node_new;
 		else
 			dst->node = node_new;
 		node_last = node_new;
-	} while (src_off < src_len);
+	}
 
 	if (likely(node_last != NULL))
 		dst->highbit = node_last->startbit + MAPSIZE;
-- 
GitLab


From e448e931309e703f51d71a557973c620ff12fbda Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Tue, 29 Aug 2006 17:55:38 -0700
Subject: [PATCH 0648/1063] [NetLabel]: uninline
 selinux_netlbl_inode_permission()

Uninline the selinux_netlbl_inode_permission() at the request of
Andrew Morton.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/selinux/include/selinux_netlabel.h | 35 +--------------------
 security/selinux/ss/services.c              | 33 +++++++++++++------
 2 files changed, 25 insertions(+), 43 deletions(-)

diff --git a/security/selinux/include/selinux_netlabel.h b/security/selinux/include/selinux_netlabel.h
index d885d880540e6..d69ec650cdbeb 100644
--- a/security/selinux/include/selinux_netlabel.h
+++ b/security/selinux/include/selinux_netlabel.h
@@ -43,40 +43,7 @@ void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec,
 				     int family);
 void selinux_netlbl_sk_clone_security(struct sk_security_struct *ssec,
 				      struct sk_security_struct *newssec);
-
-int __selinux_netlbl_inode_permission(struct inode *inode, int mask);
-/**
- * selinux_netlbl_inode_permission - Verify the socket is NetLabel labeled
- * @inode: the file descriptor's inode
- * @mask: the permission mask
- *
- * Description:
- * Looks at a file's inode and if it is marked as a socket protected by
- * NetLabel then verify that the socket has been labeled, if not try to label
- * the socket now with the inode's SID.  Returns zero on success, negative
- * values on failure.
- *
- */
-static inline int selinux_netlbl_inode_permission(struct inode *inode,
-						  int mask)
-{
-	int rc = 0;
-	struct inode_security_struct *isec;
-	struct sk_security_struct *sksec;
-
-	if (!S_ISSOCK(inode->i_mode))
-		return 0;
-
-	isec = inode->i_security;
-	sksec = SOCKET_I(inode)->sk->sk_security;
-	down(&isec->sem);
-	if (unlikely(sksec->nlbl_state == NLBL_REQUIRE &&
-		     (mask & (MAY_WRITE | MAY_APPEND))))
-		rc = __selinux_netlbl_inode_permission(inode, mask);
-	up(&isec->sem);
-
-	return rc;
-}
+int selinux_netlbl_inode_permission(struct inode *inode, int mask);
 #else
 static inline void selinux_netlbl_cache_invalidate(void)
 {
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 4f7642c7337e6..27ee28ccf2669 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2544,24 +2544,39 @@ u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid)
 }
 
 /**
- * __selinux_netlbl_inode_permission - Label a socket using NetLabel
+ * selinux_netlbl_inode_permission - Verify the socket is NetLabel labeled
  * @inode: the file descriptor's inode
  * @mask: the permission mask
  *
  * Description:
- * Try to label a socket with the inode's SID using NetLabel.  Returns zero on
- * success, negative values on failure.
+ * Looks at a file's inode and if it is marked as a socket protected by
+ * NetLabel then verify that the socket has been labeled, if not try to label
+ * the socket now with the inode's SID.  Returns zero on success, negative
+ * values on failure.
  *
  */
-int __selinux_netlbl_inode_permission(struct inode *inode, int mask)
+int selinux_netlbl_inode_permission(struct inode *inode, int mask)
 {
 	int rc;
-	struct socket *sock = SOCKET_I(inode);
-	struct sk_security_struct *sksec = sock->sk->sk_security;
+	struct inode_security_struct *isec;
+	struct sk_security_struct *sksec;
+	struct socket *sock;
 
-	lock_sock(sock->sk);
-	rc = selinux_netlbl_socket_setsid(sock, sksec->sid);
-	release_sock(sock->sk);
+	if (!S_ISSOCK(inode->i_mode))
+		return 0;
+
+	sock = SOCKET_I(inode);
+	isec = inode->i_security;
+	sksec = sock->sk->sk_security;
+	down(&isec->sem);
+	if (unlikely(sksec->nlbl_state == NLBL_REQUIRE &&
+		     (mask & (MAY_WRITE | MAY_APPEND)))) {
+		lock_sock(sock->sk);
+		rc = selinux_netlbl_socket_setsid(sock, sksec->sid);
+		release_sock(sock->sk);
+	} else
+		rc = 0;
+	up(&isec->sem);
 
 	return rc;
 }
-- 
GitLab


From 7a0e1d602288370801c353221c6a938eab925053 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Tue, 29 Aug 2006 17:56:04 -0700
Subject: [PATCH 0649/1063] [NetLabel]: add some missing #includes to various
 header files

Add some missing include files to the NetLabel related header files.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cipso_ipv4.h                    | 2 ++
 include/net/netlabel.h                      | 1 +
 net/netlabel/netlabel_domainhash.h          | 4 ++++
 net/netlabel/netlabel_user.h                | 5 +++--
 security/selinux/include/selinux_netlabel.h | 9 +++++++++
 5 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 5aed72ab652b4..59406e0dc5b28 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -37,6 +37,8 @@
 #include <linux/types.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
 #include <net/netlabel.h>
 
 /* known doi values */
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 7cae730832c7f..fc2b72fc7e077 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -31,6 +31,7 @@
 #define _NETLABEL_H
 
 #include <linux/types.h>
+#include <linux/net.h>
 #include <linux/skbuff.h>
 #include <net/netlink.h>
 
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 9217863ce0d3a..99a2287de2460 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -32,6 +32,10 @@
 #ifndef _NETLABEL_DOMAINHASH_H
 #define _NETLABEL_DOMAINHASH_H
 
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+
 /* Domain hash table size */
 /* XXX - currently this number is an uneducated guess */
 #define NETLBL_DOMHSH_BITSIZE       7
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index ccf237b3a1282..385a6c7488c60 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -31,11 +31,12 @@
 #ifndef _NETLABEL_USER_H
 #define _NETLABEL_USER_H
 
+#include <linux/types.h>
 #include <linux/skbuff.h>
 #include <linux/capability.h>
-#include <linux/genetlink.h>
-#include <net/netlabel.h>
+#include <net/netlink.h>
 #include <net/genetlink.h>
+#include <net/netlabel.h>
 
 /* NetLabel NETLINK helper functions */
 
diff --git a/security/selinux/include/selinux_netlabel.h b/security/selinux/include/selinux_netlabel.h
index d69ec650cdbeb..ecab4bddaaf4a 100644
--- a/security/selinux/include/selinux_netlabel.h
+++ b/security/selinux/include/selinux_netlabel.h
@@ -27,6 +27,15 @@
 #ifndef _SELINUX_NETLABEL_H_
 #define _SELINUX_NETLABEL_H_
 
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+#include "avc.h"
+#include "objsec.h"
+
 #ifdef CONFIG_NETLABEL
 void selinux_netlbl_cache_invalidate(void);
 int selinux_netlbl_socket_post_create(struct socket *sock,
-- 
GitLab


From 28a7b327b8cc8ea35662d360d3d11d60195debc9 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 30 Aug 2006 15:03:07 -0700
Subject: [PATCH 0650/1063] [PKT_SCHED] act_simple.c: make struct
 simp_hash_info static

This patch makes the needlessly global struct simp_hash_info static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_simple.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 8c1ab8ad8fa6d..901571a677072 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -28,7 +28,7 @@ static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1];
 static u32 simp_idx_gen;
 static DEFINE_RWLOCK(simp_lock);
 
-struct tcf_hashinfo simp_hash_info = {
+static struct tcf_hashinfo simp_hash_info = {
 	.htab	=	tcf_simp_ht,
 	.hmask	=	SIMP_TAB_MASK,
 	.lock	=	&simp_lock,
-- 
GitLab


From 7a42c2175703f54a3640f25dc078c8190a4f904e Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Thu, 31 Aug 2006 15:03:02 -0700
Subject: [PATCH 0651/1063] [NET]: Change somaxconn sysctl to __read_mostly

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/socket.c b/net/socket.c
index d6f27ed9ba6c4..1bc4167e0da8d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1337,7 +1337,7 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
  *	ready for listening.
  */
 
-int sysctl_somaxconn = SOMAXCONN;
+int sysctl_somaxconn __read_mostly = SOMAXCONN;
 
 asmlinkage long sys_listen(int fd, int backlog)
 {
-- 
GitLab


From 18adaf067cf013fc2690d3830eba99ff800795b4 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Thu, 31 Aug 2006 15:03:36 -0700
Subject: [PATCH 0652/1063] [AF_UNIX]: Change max_dgram_qlen sysctl to
 __read_mostly

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7c91c2024d494..b43a27828df5d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,7 +117,7 @@
 #include <net/checksum.h>
 #include <linux/security.h>
 
-int sysctl_unix_max_dgram_qlen = 10;
+int sysctl_unix_max_dgram_qlen __read_mostly = 10;
 
 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 DEFINE_SPINLOCK(unix_table_lock);
-- 
GitLab


From 3015d5d4e5b15eddea272a697e83391100581932 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 31 Aug 2006 15:04:30 -0700
Subject: [PATCH 0653/1063] [RTNETLINK]: Fix typo causing wrong skb to be freed

A typo introduced by myself which leads to freeing the skb
containing the netlink message when it should free the newly
allocated skb for the reply.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0ebcf8488e994..63b882ac288ad 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -596,7 +596,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK,
 			       NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0);
 	if (err <= 0) {
-		kfree_skb(skb);
+		kfree_skb(nskb);
 		goto errout;
 	}
 
-- 
GitLab


From ff9b5e0f08cb650d113eef0c654f931c0a7ae730 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 31 Aug 2006 15:11:02 -0700
Subject: [PATCH 0654/1063] [TCP]: Fix rcv mss estimate for LRO

By passing a Linux-generated TSO packet straight back into Linux, Xen
becomes our first LRO user :) Unfortunately, there is at least one spot
in our stack that needs to be changed to cope with this.

The receive MSS estimate is computed from the raw packet size.  This is
broken if the packet is GSO/LRO.  Fortunately the real MSS can be found
in gso_size so we simply need to use that if it is non-zero.

Real LRO NICs should of course set the gso_size field in future.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index caf3c41dcc8c7..511b738f118a9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
 	/* skb->len may jitter because of SACKs, even if peer
 	 * sends good full-sized frames.
 	 */
-	len = skb->len;
+	len = skb_shinfo(skb)->gso_size ?: skb->len;
 	if (len >= icsk->icsk_ack.rcv_mss) {
 		icsk->icsk_ack.rcv_mss = len;
 	} else {
-- 
GitLab


From a9917c06652165fe4eeb9ab7a5d1e0674e90e508 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Thu, 31 Aug 2006 15:14:32 -0700
Subject: [PATCH 0655/1063] [XFRM] STATE: Fix flusing with hash mask.

This is a minor fix about transformation state flushing
for net-2.6.19. Please apply it.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 4341795eb2441..9f63edd393465 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -384,7 +384,7 @@ void xfrm_state_flush(u8 proto)
 	int i;
 
 	spin_lock_bh(&xfrm_state_lock);
-	for (i = 0; i < xfrm_state_hmask; i++) {
+	for (i = 0; i <= xfrm_state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 restart:
-- 
GitLab


From dc435e6dac1439340eaeceef84022c4e4749796d Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Thu, 31 Aug 2006 15:18:49 -0700
Subject: [PATCH 0656/1063] [IPV6] MIP6: Fix to update IP6CB when cloned skbuff
 is received at HAO.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/exthdrs.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 084f78c3479b6..88c96b10684c9 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -233,9 +233,14 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 
 	if (skb_cloned(skb)) {
 		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
+		struct inet6_skb_parm *opt2;
+
 		if (skb2 == NULL)
 			goto discard;
 
+		opt2 = IP6CB(skb2);
+		memcpy(opt2, opt, sizeof(*opt2));
+
 		kfree_skb(skb);
 
 		/* update all variable using below by copied skbuff */
@@ -296,6 +301,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 	if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
 		skb = *skbp;
 		skb->h.raw += ((skb->h.raw[1]+1)<<3);
+		opt = IP6CB(skb);
 #ifdef CONFIG_IPV6_MIP6
 		opt->nhoff = dstbuf;
 #else
@@ -690,6 +696,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 	if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
 		skb = *skbp;
 		skb->h.raw += (skb->h.raw[1]+1)<<3;
+		opt = IP6CB(skb);
 		opt->nhoff = sizeof(struct ipv6hdr);
 		return 1;
 	}
-- 
GitLab


From fda9ef5d679b07c9d9097aaf6ef7f069d794a8f9 Mon Sep 17 00:00:00 2001
From: Dmitry Mishin <dim@openvz.org>
Date: Thu, 31 Aug 2006 15:28:39 -0700
Subject: [PATCH 0657/1063] [NET]: Fix sk->sk_filter field access

Function sk_filter() is called from tcp_v{4,6}_rcv() functions with arg
needlock = 0, while socket is not locked at that moment. In order to avoid
this and similar issues in the future, use rcu for sk->sk_filter field read
protection.

Signed-off-by: Dmitry Mishin <dim@openvz.org>
Signed-off-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: Kirill Korotaev <dev@openvz.org>
---
 include/linux/filter.h | 13 +++++++------
 include/net/sock.h     | 34 ++++++++++++++++-----------------
 net/core/filter.c      |  8 ++++----
 net/core/sock.c        | 22 +++++++++------------
 net/dccp/ipv6.c        |  2 +-
 net/decnet/dn_nsp_in.c |  2 +-
 net/ipv4/tcp_ipv4.c    |  2 +-
 net/ipv6/tcp_ipv6.c    |  4 ++--
 net/packet/af_packet.c | 43 ++++++++++++++++++------------------------
 net/sctp/input.c       |  2 +-
 10 files changed, 61 insertions(+), 71 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index c6cb8f095088a..91b2e3b9251eb 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -25,10 +25,10 @@
  
 struct sock_filter	/* Filter block */
 {
-        __u16	code;   /* Actual filter code */
-        __u8	jt;	/* Jump true */
-        __u8	jf;	/* Jump false */
-        __u32	k;      /* Generic multiuse field */
+	__u16	code;   /* Actual filter code */
+	__u8	jt;	/* Jump true */
+	__u8	jf;	/* Jump false */
+	__u32	k;      /* Generic multiuse field */
 };
 
 struct sock_fprog	/* Required for SO_ATTACH_FILTER. */
@@ -41,8 +41,9 @@ struct sock_fprog	/* Required for SO_ATTACH_FILTER. */
 struct sk_filter
 {
 	atomic_t		refcnt;
-        unsigned int         	len;	/* Number of filter blocks */
-        struct sock_filter     	insns[0];
+	unsigned int         	len;	/* Number of filter blocks */
+	struct rcu_head		rcu;
+	struct sock_filter     	insns[0];
 };
 
 static inline unsigned int sk_filter_len(struct sk_filter *fp)
diff --git a/include/net/sock.h b/include/net/sock.h
index 337ebec84c700..edd4d73ce7f5c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -862,30 +862,24 @@ extern void sock_init_data(struct socket *sock, struct sock *sk);
  *
  */
 
-static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
+static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 {
 	int err;
+	struct sk_filter *filter;
 	
 	err = security_sock_rcv_skb(sk, skb);
 	if (err)
 		return err;
 	
-	if (sk->sk_filter) {
-		struct sk_filter *filter;
-		
-		if (needlock)
-			bh_lock_sock(sk);
-		
-		filter = sk->sk_filter;
-		if (filter) {
-			unsigned int pkt_len = sk_run_filter(skb, filter->insns,
-							     filter->len);
-			err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
-		}
-
-		if (needlock)
-			bh_unlock_sock(sk);
+	rcu_read_lock_bh();
+	filter = sk->sk_filter;
+	if (filter) {
+		unsigned int pkt_len = sk_run_filter(skb, filter->insns,
+				filter->len);
+		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
+ 	rcu_read_unlock_bh();
+
 	return err;
 }
 
@@ -897,6 +891,12 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
  *	Remove a filter from a socket and release its resources.
  */
  
+static inline void sk_filter_rcu_free(struct rcu_head *rcu)
+{
+	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+	kfree(fp);
+}
+
 static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
 {
 	unsigned int size = sk_filter_len(fp);
@@ -904,7 +904,7 @@ static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
 	atomic_sub(size, &sk->sk_omem_alloc);
 
 	if (atomic_dec_and_test(&fp->refcnt))
-		kfree(fp);
+		call_rcu_bh(&fp->rcu, sk_filter_rcu_free);
 }
 
 static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
diff --git a/net/core/filter.c b/net/core/filter.c
index 5b4486a60cf6b..6732782a5a400 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -422,10 +422,10 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (!err) {
 		struct sk_filter *old_fp;
 
-		spin_lock_bh(&sk->sk_lock.slock);
-		old_fp = sk->sk_filter;
-		sk->sk_filter = fp;
-		spin_unlock_bh(&sk->sk_lock.slock);
+		rcu_read_lock_bh();
+		old_fp = rcu_dereference(sk->sk_filter);
+		rcu_assign_pointer(sk->sk_filter, fp);
+		rcu_read_unlock_bh();
 		fp = old_fp;
 	}
 
diff --git a/net/core/sock.c b/net/core/sock.c
index cfaf09039b023..b77e155cbe6c0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -247,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		goto out;
 	}
 
-	/* It would be deadlock, if sock_queue_rcv_skb is used
-	   with socket lock! We assume that users of this
-	   function are lock free.
-	*/
-	err = sk_filter(sk, skb, 1);
+	err = sk_filter(sk, skb);
 	if (err)
 		goto out;
 
@@ -278,7 +274,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc = NET_RX_SUCCESS;
 
-	if (sk_filter(sk, skb, 0))
+	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
 	skb->dev = NULL;
@@ -606,15 +602,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			break;
 
 		case SO_DETACH_FILTER:
-			spin_lock_bh(&sk->sk_lock.slock);
-			filter = sk->sk_filter;
+			rcu_read_lock_bh();
+			filter = rcu_dereference(sk->sk_filter);
                         if (filter) {
-				sk->sk_filter = NULL;
-				spin_unlock_bh(&sk->sk_lock.slock);
+				rcu_assign_pointer(sk->sk_filter, NULL);
 				sk_filter_release(sk, filter);
+				rcu_read_unlock_bh();
 				break;
 			}
-			spin_unlock_bh(&sk->sk_lock.slock);
+			rcu_read_unlock_bh();
 			ret = -ENONET;
 			break;
 
@@ -884,10 +880,10 @@ void sk_free(struct sock *sk)
 	if (sk->sk_destruct)
 		sk->sk_destruct(sk);
 
-	filter = sk->sk_filter;
+	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
 		sk_filter_release(sk, filter);
-		sk->sk_filter = NULL;
+		rcu_assign_pointer(sk->sk_filter, NULL);
 	}
 
 	sock_disable_timestamp(sk);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index f9c5e12d70388..7a47399cf31fd 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -970,7 +970,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	if (skb->protocol == htons(ETH_P_IP))
 		return dccp_v4_do_rcv(sk, skb);
 
-	if (sk_filter(sk, skb, 0))
+	if (sk_filter(sk, skb))
 		goto discard;
 
 	/*
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 86f7f3b28e700..72ecc6e62ec4a 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -586,7 +586,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig
         	goto out;
         }
 
-	err = sk_filter(sk, skb, 0);
+	err = sk_filter(sk, skb);
 	if (err)
 		goto out;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 23b46e36b1471..39b1798560824 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1104,7 +1104,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 		goto discard_and_relse;
 	nf_reset(skb);
 
-	if (sk_filter(sk, skb, 0))
+	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
 	skb->dev = NULL;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2b18918f30111..2546fc9f0a78b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1075,7 +1075,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_do_rcv(sk, skb);
 
-	if (sk_filter(sk, skb, 0))
+	if (sk_filter(sk, skb))
 		goto discard;
 
 	/*
@@ -1232,7 +1232,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-	if (sk_filter(sk, skb, 0))
+	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
 	skb->dev = NULL;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 300215bdbf466..f4ccb90e67392 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -427,21 +427,24 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 }
 #endif
 
-static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
+static inline int run_filter(struct sk_buff *skb, struct sock *sk,
+							unsigned *snaplen)
 {
 	struct sk_filter *filter;
+	int err = 0;
 
-	bh_lock_sock(sk);
-	filter = sk->sk_filter;
-	/*
-	 * Our caller already checked that filter != NULL but we need to
-	 * verify that under bh_lock_sock() to be safe
-	 */
-	if (likely(filter != NULL))
-		res = sk_run_filter(skb, filter->insns, filter->len);
-	bh_unlock_sock(sk);
+	rcu_read_lock_bh();
+	filter = rcu_dereference(sk->sk_filter);
+	if (filter != NULL) {
+		err = sk_run_filter(skb, filter->insns, filter->len);
+		if (!err)
+			err = -EPERM;
+		else if (*snaplen > err)
+			*snaplen = err;
+	}
+	rcu_read_unlock_bh();
 
-	return res;
+	return err;
 }
 
 /*
@@ -491,13 +494,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 
 	snaplen = skb->len;
 
-	if (sk->sk_filter) {
-		unsigned res = run_filter(skb, sk, snaplen);
-		if (res == 0)
-			goto drop_n_restore;
-		if (snaplen > res)
-			snaplen = res;
-	}
+	if (run_filter(skb, sk, &snaplen) < 0)
+		goto drop_n_restore;
 
 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
 	    (unsigned)sk->sk_rcvbuf)
@@ -593,13 +591,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 
 	snaplen = skb->len;
 
-	if (sk->sk_filter) {
-		unsigned res = run_filter(skb, sk, snaplen);
-		if (res == 0)
-			goto drop_n_restore;
-		if (snaplen > res)
-			snaplen = res;
-	}
+	if (run_filter(skb, sk, &snaplen) < 0)
+		goto drop_n_restore;
 
 	if (sk->sk_type == SOCK_DGRAM) {
 		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 8a34d95602cef..03f65de75d882 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -228,7 +228,7 @@ int sctp_rcv(struct sk_buff *skb)
 		goto discard_release;
 	nf_reset(skb);
 
-	if (sk_filter(sk, skb, 1))
+	if (sk_filter(sk, skb))
                 goto discard_release;
 
 	/* Create an SCTP packet structure. */
-- 
GitLab


From eb878e84575fbce21d2edb079eada78bfa27023d Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Thu, 31 Aug 2006 17:42:59 -0700
Subject: [PATCH 0658/1063] [IPSEC]: output mode to take an xfrm state as input
 param

Expose IPSEC modes output path to take an xfrm state as input param.
This makes it consistent with the input mode processing (which already
takes the xfrm state as a param).

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h              | 2 +-
 net/ipv4/xfrm4_mode_transport.c | 4 +---
 net/ipv4/xfrm4_mode_tunnel.c    | 3 +--
 net/ipv4/xfrm4_output.c         | 2 +-
 net/ipv6/xfrm6_mode_ro.c        | 3 +--
 net/ipv6/xfrm6_mode_transport.c | 3 +--
 net/ipv6/xfrm6_mode_tunnel.c    | 3 +--
 net/ipv6/xfrm6_output.c         | 2 +-
 8 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0acabf2a0a8f2..4d6dc627df9b0 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -285,7 +285,7 @@ extern void xfrm_put_type(struct xfrm_type *type);
 
 struct xfrm_mode {
 	int (*input)(struct xfrm_state *x, struct sk_buff *skb);
-	int (*output)(struct sk_buff *skb);
+	int (*output)(struct xfrm_state *x,struct sk_buff *skb);
 
 	struct module *owner;
 	unsigned int encap;
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index a9e6b3dd19c9b..92676b7e40347 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -21,9 +21,8 @@
  * On exit, skb->h will be set to the start of the payload to be processed
  * by x->type->output and skb->nh will be set to the top IP header.
  */
-static int xfrm4_transport_output(struct sk_buff *skb)
+static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct xfrm_state *x;
 	struct iphdr *iph;
 	int ihl;
 
@@ -33,7 +32,6 @@ static int xfrm4_transport_output(struct sk_buff *skb)
 	ihl = iph->ihl * 4;
 	skb->h.raw += ihl;
 
-	x = skb->dst->xfrm;
 	skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl);
 	return 0;
 }
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 13cafbe56ce36..e23c21d31a531 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -33,10 +33,9 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
  * On exit, skb->h will be set to the start of the payload to be processed
  * by x->type->output and skb->nh will be set to the top IP header.
  */
-static int xfrm4_tunnel_output(struct sk_buff *skb)
+static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
-	struct xfrm_state *x = dst->xfrm;
 	struct iphdr *iph, *top_iph;
 	int flags;
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 5fd115f0c5478..04403fb01a589 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -66,7 +66,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
 		if (err)
 			goto error;
 
-		err = x->mode->output(skb);
+		err = x->mode->output(x, skb);
 		if (err)
 			goto error;
 
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index c11c335312f9c..6031c16d46ca8 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -43,9 +43,8 @@
  * its absence, that of the top IP header.  The value of skb->data will always
  * point to the top IP header.
  */
-static int xfrm6_ro_output(struct sk_buff *skb)
+static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct xfrm_state *x = skb->dst->xfrm;
 	struct ipv6hdr *iph;
 	u8 *prevhdr;
 	int hdr_len;
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index a5dce216024d5..3a4b39b12bad0 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -25,9 +25,8 @@
  * its absence, that of the top IP header.  The value of skb->data will always
  * point to the top IP header.
  */
-static int xfrm6_transport_output(struct sk_buff *skb)
+static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct xfrm_state *x = skb->dst->xfrm;
 	struct ipv6hdr *iph;
 	u8 *prevhdr;
 	int hdr_len;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 8af79be2edcad..5e7d8a7d6414c 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -37,10 +37,9 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
  * its absence, that of the top IP header.  The value of skb->data will always
  * point to the top IP header.
  */
-static int xfrm6_tunnel_output(struct sk_buff *skb)
+static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
-	struct xfrm_state *x = dst->xfrm;
 	struct ipv6hdr *iph, *top_iph;
 	int dsfield;
 
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index db58104e710b4..c260ea104c524 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -65,7 +65,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
 		if (err)
 			goto error;
 
-		err = x->mode->output(skb);
+		err = x->mode->output(x, skb);
 		if (err)
 			goto error;
 
-- 
GitLab


From d1d9facfd1b326e0df587c96f0ee55de2ae9f946 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Fri, 1 Sep 2006 00:32:12 -0700
Subject: [PATCH 0659/1063] [XFRM]: remove xerr_idxp from __xfrm_policy_check()

It seems that during the MIPv6 respin, some code which was originally
conditionally compiled around CONFIG_XFRM_ADVANCED was accidently left
in after the config option was removed.

This patch removes an extraneous pointer (xerr_idxp) which is no
longer needed.

Signed-off-by: James Morris <jmorris@namei.org>
Acked-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7db1c48537f0d..537854fe47ca5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1514,8 +1514,7 @@ static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp
 {
 	for (; k < sp->len; k++) {
 		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
-			if (idxp)
-				*idxp = k;
+			*idxp = k;
 			return 1;
 		}
 	}
@@ -1534,7 +1533,6 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	struct flowi fl;
 	u8 fl_dir = policy_to_flow_dir(dir);
 	int xerr_idx = -1;
-	int *xerr_idxp = &xerr_idx;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0)
 		return 0;
@@ -1560,7 +1558,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 					xfrm_policy_lookup);
 
 	if (!pol) {
-		if (skb->sp && secpath_has_nontransport(skb->sp, 0, xerr_idxp)) {
+		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
 			xfrm_secpath_reject(xerr_idx, skb, &fl);
 			return 0;
 		}
@@ -1619,13 +1617,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
 			k = xfrm_policy_ok(tpp[i], sp, k, family);
 			if (k < 0) {
-				if (k < -1 && xerr_idxp)
-					*xerr_idxp = -(2+k);
+				if (k < -1)
+					/* "-2 - errored_index" returned */
+					xerr_idx = -(2+k);
 				goto reject;
 			}
 		}
 
-		if (secpath_has_nontransport(sp, k, xerr_idxp))
+		if (secpath_has_nontransport(sp, k, &xerr_idx))
 			goto reject;
 
 		xfrm_pols_put(pols, npols);
-- 
GitLab


From 78e5b8916e7db119850f57ce8548fbb9767078fc Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 13 Sep 2006 20:35:36 -0700
Subject: [PATCH 0660/1063] [RTNETLINK]: Fix netdevice name corruption

When changing a device by ifindex without including a IFLA_IFNAME
attribute, the ifname variable contains random garbage and is used
to change the device name.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 63b882ac288ad..d8e25e08cb7e7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -394,6 +394,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 	if (tb[IFLA_IFNAME])
 		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+	else
+		ifname[0] = '\0';
 
 	err = -EINVAL;
 	ifm = nlmsg_data(nlh);
-- 
GitLab


From eb328111efde7bca782f340fe805756039ec6a0c Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 18 Sep 2006 00:01:59 -0700
Subject: [PATCH 0661/1063] [GENL]: Provide more information to userspace about
 registered genl families

Additionaly exports the following information when providing
the list of registered generic netlink families:
  - protocol version
  - header size
  - maximum number of attributes
  - list of available operations including
      - id
      - flags
      - avaiability of policy and doit/dumpit function

libnl HEAD provides a utility to read this new information:

	0x0010 nlctrl version 1
	    hdrsize 0 maxattr 6
	      op GETFAMILY (0x03) [POLICY,DOIT,DUMPIT]
	0x0011 NLBL_MGMT version 1
	    hdrsize 0 maxattr 0
	      op unknown (0x02) [DOIT]
	      op unknown (0x03) [DOIT]
	      ....

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/genetlink.h | 18 ++++++++++++++++++
 include/net/genetlink.h   |  2 --
 net/netlink/genetlink.c   | 40 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index 84f12a41dc01d..9049dc65ae51b 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -16,6 +16,8 @@ struct genlmsghdr {
 
 #define GENL_HDRLEN	NLMSG_ALIGN(sizeof(struct genlmsghdr))
 
+#define GENL_ADMIN_PERM		0x01
+
 /*
  * List of reserved static generic netlink identifiers:
  */
@@ -43,9 +45,25 @@ enum {
 	CTRL_ATTR_UNSPEC,
 	CTRL_ATTR_FAMILY_ID,
 	CTRL_ATTR_FAMILY_NAME,
+	CTRL_ATTR_VERSION,
+	CTRL_ATTR_HDRSIZE,
+	CTRL_ATTR_MAXATTR,
+	CTRL_ATTR_OPS,
 	__CTRL_ATTR_MAX,
 };
 
 #define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1)
 
+enum {
+	CTRL_ATTR_OP_UNSPEC,
+	CTRL_ATTR_OP_ID,
+	CTRL_ATTR_OP_FLAGS,
+	CTRL_ATTR_OP_POLICY,
+	CTRL_ATTR_OP_DOIT,
+	CTRL_ATTR_OP_DUMPIT,
+	__CTRL_ATTR_OP_MAX,
+};
+
+#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1)
+
 #endif	/* __LINUX_GENERIC_NETLINK_H */
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 97d6d3aba9d2c..4a38d85e4e25d 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -27,8 +27,6 @@ struct genl_family
 	struct list_head	family_list;	/* private */
 };
 
-#define GENL_ADMIN_PERM		0x01
-
 /**
  * struct genl_info - receiving information
  * @snd_seq: sending sequence number
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 3ac942cdb677e..49bc2db7982b6 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -387,7 +387,10 @@ static void genl_rcv(struct sock *sk, int len)
 static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
 			  u32 flags, struct sk_buff *skb, u8 cmd)
 {
+	struct nlattr *nla_ops;
+	struct genl_ops *ops;
 	void *hdr;
+	int idx = 1;
 
 	hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd,
 			  family->version);
@@ -396,6 +399,37 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
 
 	NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name);
 	NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id);
+	NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version);
+	NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize);
+	NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr);
+
+	nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS);
+	if (nla_ops == NULL)
+		goto nla_put_failure;
+
+	list_for_each_entry(ops, &family->ops_list, ops_list) {
+		struct nlattr *nest;
+
+		nest = nla_nest_start(skb, idx++);
+		if (nest == NULL)
+			goto nla_put_failure;
+
+		NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd);
+		NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags);
+
+		if (ops->policy)
+			NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY);
+
+		if (ops->doit)
+			NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT);
+
+		if (ops->dumpit)
+			NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT);
+
+		nla_nest_end(skb, nest);
+	}
+
+	nla_nest_end(skb, nla_ops);
 
 	return genlmsg_end(skb, hdr);
 
@@ -411,6 +445,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
 	int chains_to_skip = cb->args[0];
 	int fams_to_skip = cb->args[1];
 
+	if (chains_to_skip != 0)
+		genl_lock();
+
 	for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
 		if (i < chains_to_skip)
 			continue;
@@ -428,6 +465,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 
 errout:
+	if (chains_to_skip != 0)
+		genl_unlock();
+
 	cb->args[0] = i;
 	cb->args[1] = n;
 
-- 
GitLab


From 9c1ea148ad8bb06538b43908891afedebeaf361b Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Mon, 18 Sep 2006 00:03:41 -0700
Subject: [PATCH 0662/1063] [BRIDGE]: Change sysctl tunables to __read_mostly

Change some bridge sysctl tunables to __read_mostly.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index cf80dd0e896d5..ac181be13d83e 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -53,10 +53,10 @@
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_header *brnf_sysctl_header;
-static int brnf_call_iptables = 1;
-static int brnf_call_ip6tables = 1;
-static int brnf_call_arptables = 1;
-static int brnf_filter_vlan_tagged = 1;
+static int brnf_call_iptables __read_mostly = 1;
+static int brnf_call_ip6tables __read_mostly = 1;
+static int brnf_call_arptables __read_mostly = 1;
+static int brnf_filter_vlan_tagged __read_mostly = 1;
 #else
 #define brnf_filter_vlan_tagged 1
 #endif
-- 
GitLab


From 4cbf1cae9f08c76ed92700090a69a5b1f1f6a982 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Mon, 18 Sep 2006 00:04:22 -0700
Subject: [PATCH 0663/1063] [SCTP]: Change globals to __read_mostly

Change sctp globals to __read_mostly.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5692ef5485d35..d9dd4c47bc293 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -61,7 +61,7 @@
 #include <net/inet_ecn.h>
 
 /* Global data structures. */
-struct sctp_globals sctp_globals;
+struct sctp_globals sctp_globals __read_mostly;
 struct proc_dir_entry	*proc_net_sctp;
 DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly;
 
-- 
GitLab


From 94aec08ea426903a3fb3cafd4d8b900cd50df702 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Mon, 18 Sep 2006 00:05:22 -0700
Subject: [PATCH 0664/1063] [NETFILTER]: Change tunables to __read_mostly

Change some netfilter tunables to __read_mostly.  Also fixed some
incorrect file reference comments while I was in there.

(this will be my last __read_mostly patch unless someone points out
something else that needs it)

Signed-off-by: Brian Haley <brian.haley@hp.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_core.c        |  6 ++---
 .../netfilter/ip_conntrack_proto_generic.c    |  2 +-
 net/ipv4/netfilter/ip_conntrack_proto_icmp.c  |  2 +-
 net/ipv4/netfilter/ip_conntrack_proto_sctp.c  | 14 +++++------
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c   | 24 +++++++++----------
 net/ipv4/netfilter/ip_conntrack_proto_udp.c   |  4 ++--
 net/ipv4/netfilter/ip_conntrack_standalone.c  |  4 ++--
 net/ipv4/netfilter/ip_queue.c                 |  2 +-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c  |  2 +-
 net/ipv6/netfilter/ip6_queue.c                |  2 +-
 .../netfilter/nf_conntrack_l3proto_ipv6.c     |  2 +-
 .../netfilter/nf_conntrack_proto_icmpv6.c     |  2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c       |  6 ++---
 net/netfilter/nf_conntrack_core.c             |  6 ++---
 net/netfilter/nf_conntrack_proto_generic.c    |  2 +-
 net/netfilter/nf_conntrack_proto_sctp.c       | 14 +++++------
 net/netfilter/nf_conntrack_proto_tcp.c        | 24 +++++++++----------
 net/netfilter/nf_conntrack_proto_udp.c        |  4 ++--
 net/netfilter/nf_conntrack_standalone.c       |  2 +-
 19 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index aa459177c3f8e..5da25ad503090 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -66,13 +66,13 @@ void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
 LIST_HEAD(ip_conntrack_expect_list);
 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 static LIST_HEAD(helpers);
-unsigned int ip_conntrack_htable_size = 0;
-int ip_conntrack_max;
+unsigned int ip_conntrack_htable_size __read_mostly = 0;
+int ip_conntrack_max __read_mostly;
 struct list_head *ip_conntrack_hash;
 static kmem_cache_t *ip_conntrack_cachep __read_mostly;
 static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
 struct ip_conntrack ip_conntrack_untracked;
-unsigned int ip_ct_log_invalid;
+unsigned int ip_ct_log_invalid __read_mostly;
 static LIST_HEAD(unconfirmed);
 static int ip_conntrack_vmalloc;
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index f891308b5e4c1..36f2b5e5d80ad 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned int ip_ct_generic_timeout = 600*HZ;
+unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ;
 
 static int generic_pkt_to_tuple(const struct sk_buff *skb,
 				unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 23f1c504586db..09c40ebe3345f 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -21,7 +21,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned int ip_ct_icmp_timeout = 30*HZ;
+unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ;
 
 #if 0
 #define DEBUGP printk
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 2d3612cd5f18c..b908a4842e186 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS  * 24 HOURS
 
-static unsigned int ip_ct_sctp_timeout_closed            =  10 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_wait       =   3 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_echoed     =   3 SECS;
-static unsigned int ip_ct_sctp_timeout_established       =   5 DAYS;
-static unsigned int ip_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
+static unsigned int ip_ct_sctp_timeout_closed __read_mostly           = 10 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly      =  3 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly    =  3 SECS;
+static unsigned int ip_ct_sctp_timeout_established __read_mostly      =  5 DAYS;
+static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly    = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly    = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
 
 static const unsigned int * sctp_timeouts[]
 = { NULL,                                  /* SCTP_CONNTRACK_NONE  */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 9de81ff645d50..75a7237eb8c15 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -48,19 +48,19 @@ static DEFINE_RWLOCK(tcp_lock);
 /* "Be conservative in what you do, 
     be liberal in what you accept from others." 
     If it's non-zero, we mark only out of window RST segments as INVALID. */
-int ip_ct_tcp_be_liberal = 0;
+int ip_ct_tcp_be_liberal __read_mostly = 0;
 
 /* When connection is picked up from the middle, how many packets are required
    to pass in each direction when we assume we are in sync - if any side uses
    window scaling, we lost the game. 
    If it is set to zero, we disable picking up already established 
    connections. */
-int ip_ct_tcp_loose = 3;
+int ip_ct_tcp_loose __read_mostly = 3;
 
 /* Max number of the retransmitted packets without receiving an (acceptable) 
    ACK from the destination. If this number is reached, a shorter timer 
    will be started. */
-int ip_ct_tcp_max_retrans = 3;
+int ip_ct_tcp_max_retrans __read_mostly = 3;
 
   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
      closely.  They're more complex. --RR */
@@ -83,19 +83,19 @@ static const char *tcp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS * 24 HOURS
 
-unsigned int ip_ct_tcp_timeout_syn_sent =      2 MINS;
-unsigned int ip_ct_tcp_timeout_syn_recv =     60 SECS;
-unsigned int ip_ct_tcp_timeout_established =   5 DAYS;
-unsigned int ip_ct_tcp_timeout_fin_wait =      2 MINS;
-unsigned int ip_ct_tcp_timeout_close_wait =   60 SECS;
-unsigned int ip_ct_tcp_timeout_last_ack =     30 SECS;
-unsigned int ip_ct_tcp_timeout_time_wait =     2 MINS;
-unsigned int ip_ct_tcp_timeout_close =        10 SECS;
+unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly =      2 MINS;
+unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly =     60 SECS;
+unsigned int ip_ct_tcp_timeout_established __read_mostly =   5 DAYS;
+unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly =      2 MINS;
+unsigned int ip_ct_tcp_timeout_close_wait __read_mostly =   60 SECS;
+unsigned int ip_ct_tcp_timeout_last_ack __read_mostly =     30 SECS;
+unsigned int ip_ct_tcp_timeout_time_wait __read_mostly =     2 MINS;
+unsigned int ip_ct_tcp_timeout_close __read_mostly =        10 SECS;
 
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds 
    to ~13-30min depending on RTO. */
-unsigned int ip_ct_tcp_timeout_max_retrans =     5 MINS;
+unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
  
 static const unsigned int * tcp_timeouts[]
 = { NULL,                              /*      TCP_CONNTRACK_NONE */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index e58e52f145536..d0e8a16970ec3 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -18,8 +18,8 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned int ip_ct_udp_timeout = 30*HZ;
-unsigned int ip_ct_udp_timeout_stream = 180*HZ;
+unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ;
+unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ;
 
 static int udp_pkt_to_tuple(const struct sk_buff *skb,
 			     unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 7a9fa04a467a2..3f5d495b853b8 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -534,7 +534,7 @@ static struct nf_hook_ops ip_conntrack_ops[] = {
 
 /* Sysctl support */
 
-int ip_conntrack_checksum = 1;
+int ip_conntrack_checksum __read_mostly = 1;
 
 #ifdef CONFIG_SYSCTL
 
@@ -563,7 +563,7 @@ extern unsigned int ip_ct_udp_timeout_stream;
 /* From ip_conntrack_proto_icmp.c */
 extern unsigned int ip_ct_icmp_timeout;
 
-/* From ip_conntrack_proto_icmp.c */
+/* From ip_conntrack_proto_generic.c */
 extern unsigned int ip_ct_generic_timeout;
 
 /* Log invalid packets of a given protocol */
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 276a964ee6cfc..80060cbe4a07d 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -53,7 +53,7 @@ struct ipq_queue_entry {
 typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
 
 static unsigned char copy_mode = IPQ_COPY_NONE;
-static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
 static DEFINE_RWLOCK(queue_lock);
 static int peer_pid;
 static unsigned int copy_range;
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 663a73ee3f2fc..790f00d500c3c 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -25,7 +25,7 @@
 #include <net/netfilter/nf_conntrack_protocol.h>
 #include <net/netfilter/nf_conntrack_core.h>
 
-unsigned long nf_ct_icmp_timeout = 30*HZ;
+unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
 
 #if 0
 #define DEBUGP printk
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index c01c126224e2a..d322e8395794c 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -57,7 +57,7 @@ struct ipq_queue_entry {
 typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
 
 static unsigned char copy_mode = IPQ_COPY_NONE;
-static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
 static DEFINE_RWLOCK(queue_lock);
 static int peer_pid;
 static unsigned int copy_range;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c2ab38ff46af5..e5e53fff9e38f 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -335,7 +335,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = {
 /* From nf_conntrack_proto_icmpv6.c */
 extern unsigned int nf_ct_icmpv6_timeout;
 
-/* From nf_conntrack_frag6.c */
+/* From nf_conntrack_reasm.c */
 extern unsigned int nf_ct_frag6_timeout;
 extern unsigned int nf_ct_frag6_low_thresh;
 extern unsigned int nf_ct_frag6_high_thresh;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index ef18a7b7014b1..34d447208ffdf 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -33,7 +33,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
 
-unsigned long nf_ct_icmpv6_timeout = 30*HZ;
+unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
 
 #if 0
 #define DEBUGP printk
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 7a4e4c2e31972..bf93c1ea6be9b 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -54,9 +54,9 @@
 #define NF_CT_FRAG6_LOW_THRESH 196608  /* == 192*1024 */
 #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
 
-unsigned int nf_ct_frag6_high_thresh = 256*1024;
-unsigned int nf_ct_frag6_low_thresh = 192*1024;
-unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT;
+unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024;
+unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024;
+unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT;
 
 struct nf_ct_frag6_skb_cb
 {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8f2261965a68b..3b64dbee66203 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -77,12 +77,12 @@ LIST_HEAD(nf_conntrack_expect_list);
 struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
 struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
 static LIST_HEAD(helpers);
-unsigned int nf_conntrack_htable_size = 0;
-int nf_conntrack_max;
+unsigned int nf_conntrack_htable_size __read_mostly = 0;
+int nf_conntrack_max __read_mostly;
 struct list_head *nf_conntrack_hash;
 static kmem_cache_t *nf_conntrack_expect_cachep;
 struct nf_conn nf_conntrack_untracked;
-unsigned int nf_ct_log_invalid;
+unsigned int nf_ct_log_invalid __read_mostly;
 static LIST_HEAD(unconfirmed);
 static int nf_conntrack_vmalloc;
 
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 46bc27e2756d7..26408bb0955bf 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter.h>
 #include <net/netfilter/nf_conntrack_protocol.h>
 
-unsigned int nf_ct_generic_timeout = 600*HZ;
+unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
 
 static int generic_pkt_to_tuple(const struct sk_buff *skb,
 				unsigned int dataoff,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 9bd8a7877fd54..af568777372be 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -64,13 +64,13 @@ static const char *sctp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS  * 24 HOURS
 
-static unsigned int nf_ct_sctp_timeout_closed            =  10 SECS;
-static unsigned int nf_ct_sctp_timeout_cookie_wait       =   3 SECS;
-static unsigned int nf_ct_sctp_timeout_cookie_echoed     =   3 SECS;
-static unsigned int nf_ct_sctp_timeout_established       =   5 DAYS;
-static unsigned int nf_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
-static unsigned int nf_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
-static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
+static unsigned int nf_ct_sctp_timeout_closed __read_mostly          =  10 SECS;
+static unsigned int nf_ct_sctp_timeout_cookie_wait __read_mostly     =   3 SECS;
+static unsigned int nf_ct_sctp_timeout_cookie_echoed __read_mostly   =   3 SECS;
+static unsigned int nf_ct_sctp_timeout_established __read_mostly     =   5 DAYS;
+static unsigned int nf_ct_sctp_timeout_shutdown_sent __read_mostly   = 300 SECS / 1000;
+static unsigned int nf_ct_sctp_timeout_shutdown_recd __read_mostly   = 300 SECS / 1000;
+static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
 
 static unsigned int * sctp_timeouts[]
 = { NULL,                                  /* SCTP_CONNTRACK_NONE  */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 308d2abd7ee58..9fc0ee61f92a4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -57,19 +57,19 @@ static DEFINE_RWLOCK(tcp_lock);
 /* "Be conservative in what you do, 
     be liberal in what you accept from others." 
     If it's non-zero, we mark only out of window RST segments as INVALID. */
-int nf_ct_tcp_be_liberal = 0;
+int nf_ct_tcp_be_liberal __read_mostly = 0;
 
 /* When connection is picked up from the middle, how many packets are required
    to pass in each direction when we assume we are in sync - if any side uses
    window scaling, we lost the game. 
    If it is set to zero, we disable picking up already established 
    connections. */
-int nf_ct_tcp_loose = 3;
+int nf_ct_tcp_loose __read_mostly = 3;
 
 /* Max number of the retransmitted packets without receiving an (acceptable) 
    ACK from the destination. If this number is reached, a shorter timer 
    will be started. */
-int nf_ct_tcp_max_retrans = 3;
+int nf_ct_tcp_max_retrans __read_mostly = 3;
 
   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
      closely.  They're more complex. --RR */
@@ -92,19 +92,19 @@ static const char *tcp_conntrack_names[] = {
 #define HOURS * 60 MINS
 #define DAYS * 24 HOURS
 
-unsigned int nf_ct_tcp_timeout_syn_sent =      2 MINS;
-unsigned int nf_ct_tcp_timeout_syn_recv =     60 SECS;
-unsigned int nf_ct_tcp_timeout_established =   5 DAYS;
-unsigned int nf_ct_tcp_timeout_fin_wait =      2 MINS;
-unsigned int nf_ct_tcp_timeout_close_wait =   60 SECS;
-unsigned int nf_ct_tcp_timeout_last_ack =     30 SECS;
-unsigned int nf_ct_tcp_timeout_time_wait =     2 MINS;
-unsigned int nf_ct_tcp_timeout_close =        10 SECS;
+unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly =      2 MINS;
+unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly =     60 SECS;
+unsigned int nf_ct_tcp_timeout_established __read_mostly =   5 DAYS;
+unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly =      2 MINS;
+unsigned int nf_ct_tcp_timeout_close_wait __read_mostly =   60 SECS;
+unsigned int nf_ct_tcp_timeout_last_ack __read_mostly =     30 SECS;
+unsigned int nf_ct_tcp_timeout_time_wait __read_mostly =     2 MINS;
+unsigned int nf_ct_tcp_timeout_close __read_mostly =        10 SECS;
 
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds 
    to ~13-30min depending on RTO. */
-unsigned int nf_ct_tcp_timeout_max_retrans =     5 MINS;
+unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
  
 static unsigned int * tcp_timeouts[]
 = { NULL,                              /* TCP_CONNTRACK_NONE */
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index d36e03139e8b5..d28981cf9af50 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -27,8 +27,8 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack_protocol.h>
 
-unsigned int nf_ct_udp_timeout = 30*HZ;
-unsigned int nf_ct_udp_timeout_stream = 180*HZ;
+unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ;
+unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ;
 
 static int udp_pkt_to_tuple(const struct sk_buff *skb,
 			     unsigned int dataoff,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 4ef8366999623..9a1de0ca475b7 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -428,7 +428,7 @@ static struct file_operations ct_cpu_seq_fops = {
 
 /* Sysctl support */
 
-int nf_conntrack_checksum = 1;
+int nf_conntrack_checksum __read_mostly = 1;
 
 #ifdef CONFIG_SYSCTL
 
-- 
GitLab


From 461d8837faac141f4676bf451b3339d0e48656d1 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 18 Sep 2006 00:09:49 -0700
Subject: [PATCH 0665/1063] [IPV6] address: Convert address addition to new
 netlink api

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 72 ++++++++++++++++++++++++++++-----------------
 1 file changed, 45 insertions(+), 27 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fc9cff3426c4c..52ba96a64a1f6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2868,6 +2868,29 @@ static void addrconf_verify(unsigned long foo)
 	spin_unlock_bh(&addrconf_verify_lock);
 }
 
+static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
+{
+	struct in6_addr *pfx = NULL;
+
+	if (addr)
+		pfx = nla_data(addr);
+
+	if (local) {
+		if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
+			pfx = NULL;
+		else
+			pfx = nla_data(local);
+	}
+
+	return pfx;
+}
+
+static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = {
+	[IFA_ADDRESS]		= { .len = sizeof(struct in6_addr) },
+	[IFA_LOCAL]		= { .len = sizeof(struct in6_addr) },
+	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
+};
+
 static int
 inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
@@ -2945,46 +2968,41 @@ inet6_addr_modify(int ifindex, struct in6_addr *pfx,
 static int
 inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct rtattr  **rta = arg;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct ifaddrmsg *ifm;
+	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *pfx;
-	__u32 valid_lft = INFINITY_LIFE_TIME, prefered_lft = INFINITY_LIFE_TIME;
+	u32 valid_lft, preferred_lft;
+	int err;
 
-	pfx = NULL;
-	if (rta[IFA_ADDRESS-1]) {
-		if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
-	}
-	if (rta[IFA_LOCAL-1]) {
-		if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) ||
-		    (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_LOCAL-1]);
-	}
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+	if (err < 0)
+		return err;
+
+	ifm = nlmsg_data(nlh);
+	pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
 	if (pfx == NULL)
 		return -EINVAL;
 
-	if (rta[IFA_CACHEINFO-1]) {
+	if (tb[IFA_CACHEINFO]) {
 		struct ifa_cacheinfo *ci;
-		if (RTA_PAYLOAD(rta[IFA_CACHEINFO-1]) < sizeof(*ci))
-			return -EINVAL;
-		ci = RTA_DATA(rta[IFA_CACHEINFO-1]);
+
+		ci = nla_data(tb[IFA_CACHEINFO]);
 		valid_lft = ci->ifa_valid;
-		prefered_lft = ci->ifa_prefered;
+		preferred_lft = ci->ifa_prefered;
+	} else {
+		preferred_lft = INFINITY_LIFE_TIME;
+		valid_lft = INFINITY_LIFE_TIME;
 	}
 
 	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
-		int ret;
-		ret = inet6_addr_modify(ifm->ifa_index, pfx,
-					prefered_lft, valid_lft);
-		if (ret == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE))
-			return ret;
+		err = inet6_addr_modify(ifm->ifa_index, pfx,
+					preferred_lft, valid_lft);
+		if (err == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE))
+			return err;
 	}
 
 	return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
-			      prefered_lft, valid_lft);
-
+			      preferred_lft, valid_lft);
 }
 
 /* Maximum length of ifa_cacheinfo attributes */
-- 
GitLab


From b933f7166ba376967f88a598ff04256f6d1b0b21 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 18 Sep 2006 00:10:19 -0700
Subject: [PATCH 0666/1063] [IPV6] address: Convert address deletion to new
 netlink api

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 52ba96a64a1f6..61627036eb2bf 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2894,22 +2894,17 @@ static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = {
 static int
 inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct rtattr **rta = arg;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct ifaddrmsg *ifm;
+	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *pfx;
+	int err;
 
-	pfx = NULL;
-	if (rta[IFA_ADDRESS-1]) {
-		if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
-	}
-	if (rta[IFA_LOCAL-1]) {
-		if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) ||
-		    (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_LOCAL-1]);
-	}
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+	if (err < 0)
+		return err;
+
+	ifm = nlmsg_data(nlh);
+	pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
 	if (pfx == NULL)
 		return -EINVAL;
 
-- 
GitLab


From 1b29fc2c8bf42d8fc5310f3770d7fd7ddf4386c0 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 18 Sep 2006 00:10:50 -0700
Subject: [PATCH 0667/1063] [IPV6] address: Convert address lookup to new
 netlink api

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 52 +++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 28 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 61627036eb2bf..b2c38b3edb390 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3234,58 +3234,54 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	return inet6_dump_addr(skb, cb, type);
 }
 
-static int inet6_rtm_getaddr(struct sk_buff *in_skb,
-		struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
+			     void *arg)
 {
-	struct rtattr **rta = arg;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct ifaddrmsg *ifm;
+	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *addr = NULL;
 	struct net_device *dev = NULL;
 	struct inet6_ifaddr *ifa;
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE);
+	int payload = sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE;
 	int err;
 
-	if (rta[IFA_ADDRESS-1]) {
-		if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*addr))
-			return -EINVAL;
-		addr = RTA_DATA(rta[IFA_ADDRESS-1]);
-	}
-	if (rta[IFA_LOCAL-1]) {
-		if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*addr) ||
-		    (addr && memcmp(addr, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*addr))))
-			return -EINVAL;
-		addr = RTA_DATA(rta[IFA_LOCAL-1]);
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+	if (err < 0)
+		goto errout;
+
+	addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+	if (addr == NULL) {
+		err = -EINVAL;
+		goto errout;
 	}
-	if (addr == NULL)
-		return -EINVAL;
 
+	ifm = nlmsg_data(nlh);
 	if (ifm->ifa_index)
 		dev = __dev_get_by_index(ifm->ifa_index);
 
-	if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL)
-		return -EADDRNOTAVAIL;
+	if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
+		err = -EADDRNOTAVAIL;
+		goto errout;
+	}
 
-	if ((skb = alloc_skb(size, GFP_KERNEL)) == NULL) {
+	if ((skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL)) == NULL) {
 		err = -ENOBUFS;
-		goto out;
+		goto errout_ifa;
 	}
 
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
 	err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
 				nlh->nlmsg_seq, RTM_NEWADDR, 0);
 	if (err < 0) {
-		err = -EMSGSIZE;
-		goto out_free;
+		kfree_skb(skb);
+		goto errout_ifa;
 	}
 
 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
-out:
+errout_ifa:
 	in6_ifa_put(ifa);
+errout:
 	return err;
-out_free:
-	kfree_skb(skb);
-	goto out;
 }
 
 static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
-- 
GitLab


From 85486af00b620ebe26fe0fa72172c115667a2fba Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 18 Sep 2006 00:11:24 -0700
Subject: [PATCH 0668/1063] [IPV6] address: Add put_cacheinfo() to dump struct
 cacheinfo

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 72 ++++++++++++++++++++++++---------------------
 1 file changed, 38 insertions(+), 34 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b2c38b3edb390..d546f0e74530c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3000,6 +3000,21 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 			      preferred_lft, valid_lft);
 }
 
+static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
+			 unsigned long tstamp, u32 preferred, u32 valid)
+{
+	struct ifa_cacheinfo ci;
+
+	ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100
+			+ TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+	ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
+			+ TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+	ci.ifa_prefered = preferred;
+	ci.ifa_valid = valid;
+
+	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+}
+
 /* Maximum length of ifa_cacheinfo attributes */
 #define INET6_IFADDR_RTA_SPACE \
 		RTA_SPACE(16) /* IFA_ADDRESS */ + \
@@ -3010,8 +3025,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
-	struct ifa_cacheinfo ci;
 	unsigned char	 *b = skb->tail;
+	u32 preferred, valid;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
 	ifm = NLMSG_DATA(nlh);
@@ -3028,23 +3043,22 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 	ifm->ifa_index = ifa->idev->dev->ifindex;
 	RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr);
 	if (!(ifa->flags&IFA_F_PERMANENT)) {
-		ci.ifa_prefered = ifa->prefered_lft;
-		ci.ifa_valid = ifa->valid_lft;
-		if (ci.ifa_prefered != INFINITY_LIFE_TIME) {
+		preferred = ifa->prefered_lft;
+		valid = ifa->valid_lft;
+		if (preferred != INFINITY_LIFE_TIME) {
 			long tval = (jiffies - ifa->tstamp)/HZ;
-			ci.ifa_prefered -= tval;
-			if (ci.ifa_valid != INFINITY_LIFE_TIME)
-				ci.ifa_valid -= tval;
+			preferred -= tval;
+			if (valid != INFINITY_LIFE_TIME)
+				valid -= tval;
 		}
 	} else {
-		ci.ifa_prefered = INFINITY_LIFE_TIME;
-		ci.ifa_valid = INFINITY_LIFE_TIME;
+		preferred = INFINITY_LIFE_TIME;
+		valid = INFINITY_LIFE_TIME;
 	}
-	ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100
-		    + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100
-		    + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+
+	if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+		goto rtattr_failure;
+
 	nlh->nlmsg_len = skb->tail - b;
 	return skb->len;
 
@@ -3059,7 +3073,6 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
-	struct ifa_cacheinfo ci;
 	unsigned char	 *b = skb->tail;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
@@ -3072,15 +3085,11 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 		ifm->ifa_scope = RT_SCOPE_SITE;
 	ifm->ifa_index = ifmca->idev->dev->ifindex;
 	RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr);
-	ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.ifa_prefered = INFINITY_LIFE_TIME;
-	ci.ifa_valid = INFINITY_LIFE_TIME;
-	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+
+	if (put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
+			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0)
+		goto rtattr_failure;
+
 	nlh->nlmsg_len = skb->tail - b;
 	return skb->len;
 
@@ -3095,7 +3104,6 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
-	struct ifa_cacheinfo ci;
 	unsigned char	 *b = skb->tail;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
@@ -3108,15 +3116,11 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 		ifm->ifa_scope = RT_SCOPE_SITE;
 	ifm->ifa_index = ifaca->aca_idev->dev->ifindex;
 	RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr);
-	ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.ifa_prefered = INFINITY_LIFE_TIME;
-	ci.ifa_valid = INFINITY_LIFE_TIME;
-	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+
+	if (put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
+			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0)
+		goto rtattr_failure;
+
 	nlh->nlmsg_len = skb->tail - b;
 	return skb->len;
 
-- 
GitLab


From 101bb229691c438bce4d7f13006494dd4de6910a Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 18 Sep 2006 00:11:52 -0700
Subject: [PATCH 0669/1063] [IPV6] address: Add put_ifaddrmsg() and rt_scope()

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 77 +++++++++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 34 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d546f0e74530c..ca7ecf2f3e82b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3000,6 +3000,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 			      preferred_lft, valid_lft);
 }
 
+static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags,
+			  u8 scope, int ifindex)
+{
+	struct ifaddrmsg *ifm;
+
+	ifm = nlmsg_data(nlh);
+	ifm->ifa_family = AF_INET6;
+	ifm->ifa_prefixlen = prefixlen;
+	ifm->ifa_flags = flags;
+	ifm->ifa_scope = scope;
+	ifm->ifa_index = ifindex;
+}
+
 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
 			 unsigned long tstamp, u32 preferred, u32 valid)
 {
@@ -3015,6 +3028,18 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
 }
 
+static inline int rt_scope(int ifa_scope)
+{
+	if (ifa_scope & IFA_HOST)
+		return RT_SCOPE_HOST;
+	else if (ifa_scope & IFA_LINK)
+		return RT_SCOPE_LINK;
+	else if (ifa_scope & IFA_SITE)
+		return RT_SCOPE_SITE;
+	else
+		return RT_SCOPE_UNIVERSE;
+}
+
 /* Maximum length of ifa_cacheinfo attributes */
 #define INET6_IFADDR_RTA_SPACE \
 		RTA_SPACE(16) /* IFA_ADDRESS */ + \
@@ -3023,24 +3048,14 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
 static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 			     u32 pid, u32 seq, int event, unsigned int flags)
 {
-	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
 	unsigned char	 *b = skb->tail;
 	u32 preferred, valid;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
-	ifm->ifa_family = AF_INET6;
-	ifm->ifa_prefixlen = ifa->prefix_len;
-	ifm->ifa_flags = ifa->flags;
-	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
-	if (ifa->scope&IFA_HOST)
-		ifm->ifa_scope = RT_SCOPE_HOST;
-	else if (ifa->scope&IFA_LINK)
-		ifm->ifa_scope = RT_SCOPE_LINK;
-	else if (ifa->scope&IFA_SITE)
-		ifm->ifa_scope = RT_SCOPE_SITE;
-	ifm->ifa_index = ifa->idev->dev->ifindex;
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
+		      ifa->idev->dev->ifindex);
+
 	RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr);
 	if (!(ifa->flags&IFA_F_PERMANENT)) {
 		preferred = ifa->prefered_lft;
@@ -3071,19 +3086,16 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 				u32 pid, u32 seq, int event, u16 flags)
 {
-	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
 	unsigned char	 *b = skb->tail;
+	u8 scope = RT_SCOPE_UNIVERSE;
+	int ifindex = ifmca->idev->dev->ifindex;
+
+	if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
+		scope = RT_SCOPE_SITE;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
-	ifm->ifa_family = AF_INET6;	
-	ifm->ifa_prefixlen = 128;
-	ifm->ifa_flags = IFA_F_PERMANENT;
-	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
-	if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE)
-		ifm->ifa_scope = RT_SCOPE_SITE;
-	ifm->ifa_index = ifmca->idev->dev->ifindex;
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
 	RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr);
 
 	if (put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
@@ -3102,19 +3114,16 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 				u32 pid, u32 seq, int event, unsigned int flags)
 {
-	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
 	unsigned char	 *b = skb->tail;
+	u8 scope = RT_SCOPE_UNIVERSE;
+	int ifindex = ifaca->aca_idev->dev->ifindex;
+
+	if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
+		scope = RT_SCOPE_SITE;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
-	ifm->ifa_family = AF_INET6;	
-	ifm->ifa_prefixlen = 128;
-	ifm->ifa_flags = IFA_F_PERMANENT;
-	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
-	if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE)
-		ifm->ifa_scope = RT_SCOPE_SITE;
-	ifm->ifa_index = ifaca->aca_idev->dev->ifindex;
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
 	RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr);
 
 	if (put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
-- 
GitLab


From 0ab6803bc90a8ee5c860ef09334b30007d1746be Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 18 Sep 2006 00:12:35 -0700
Subject: [PATCH 0670/1063] [IPV6] address: Convert address dumping to new
 netlink api

Replaces INET6_IFADDR_RTA_SPACE with a new function calculating
the total required message size for all address messages.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 79 +++++++++++++++++++--------------------------
 1 file changed, 33 insertions(+), 46 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ca7ecf2f3e82b..75a69bac82a8c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3040,23 +3040,27 @@ static inline int rt_scope(int ifa_scope)
 		return RT_SCOPE_UNIVERSE;
 }
 
-/* Maximum length of ifa_cacheinfo attributes */
-#define INET6_IFADDR_RTA_SPACE \
-		RTA_SPACE(16) /* IFA_ADDRESS */ + \
-		RTA_SPACE(sizeof(struct ifa_cacheinfo)) /* CACHEINFO */
+static inline int inet6_ifaddr_msgsize(void)
+{
+	return nlmsg_total_size(sizeof(struct ifaddrmsg) +
+				nla_total_size(16) +
+				nla_total_size(sizeof(struct ifa_cacheinfo)) +
+				128);
+}
 
 static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 			     u32 pid, u32 seq, int event, unsigned int flags)
 {
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
 	u32 preferred, valid;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
 	put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
 		      ifa->idev->dev->ifindex);
 
-	RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr);
 	if (!(ifa->flags&IFA_F_PERMANENT)) {
 		preferred = ifa->prefered_lft;
 		valid = ifa->valid_lft;
@@ -3071,72 +3075,57 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 		valid = INFINITY_LIFE_TIME;
 	}
 
-	if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
-		goto rtattr_failure;
+	if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 ||
+	    put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+		return nlmsg_cancel(skb, nlh);
 
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return nlmsg_end(skb, nlh);
 }
 
 static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 				u32 pid, u32 seq, int event, u16 flags)
 {
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
 	u8 scope = RT_SCOPE_UNIVERSE;
 	int ifindex = ifmca->idev->dev->ifindex;
 
 	if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
 		scope = RT_SCOPE_SITE;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
-	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
-	RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr);
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
 
-	if (put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
+	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+	if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 ||
+	    put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
 			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0)
-		goto rtattr_failure;
+		return nlmsg_cancel(skb, nlh);
 
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return nlmsg_end(skb, nlh);
 }
 
 static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 				u32 pid, u32 seq, int event, unsigned int flags)
 {
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
 	u8 scope = RT_SCOPE_UNIVERSE;
 	int ifindex = ifaca->aca_idev->dev->ifindex;
 
 	if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
 		scope = RT_SCOPE_SITE;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
-	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
-	RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr);
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
 
-	if (put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
+	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+	if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 ||
+	    put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
 			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0)
-		goto rtattr_failure;
+		return nlmsg_cancel(skb, nlh);
 
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return nlmsg_end(skb, nlh);
 }
 
 enum addr_type_t
@@ -3256,7 +3245,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 	struct net_device *dev = NULL;
 	struct inet6_ifaddr *ifa;
 	struct sk_buff *skb;
-	int payload = sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE;
 	int err;
 
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3278,7 +3266,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 		goto errout;
 	}
 
-	if ((skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL)) == NULL) {
+	if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) {
 		err = -ENOBUFS;
 		goto errout_ifa;
 	}
@@ -3300,10 +3288,9 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 {
 	struct sk_buff *skb;
-	int payload = sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
 	if (skb == NULL)
 		goto errout;
 
-- 
GitLab


From 680a27a23af45307095ae432dd0bc859e1fbe219 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 18 Sep 2006 00:13:07 -0700
Subject: [PATCH 0671/1063] [IPV6] address: Allow address changes while device
 is administrative down

Same behaviour as IPv4, using IFF_UP is a no-no anyway.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 75a69bac82a8c..bb18b9c3a5cba 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1886,9 +1886,6 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
 	if ((dev = __dev_get_by_index(ifindex)) == NULL)
 		return -ENODEV;
 	
-	if (!(dev->flags&IFF_UP))
-		return -ENETDOWN;
-
 	if ((idev = addrconf_add_dev(dev)) == NULL)
 		return -ENOBUFS;
 
@@ -2922,9 +2919,6 @@ inet6_addr_modify(int ifindex, struct in6_addr *pfx,
 	if ((dev = __dev_get_by_index(ifindex)) == NULL)
 		return -ENODEV;
 
-	if (!(dev->flags&IFF_UP))
-		return -ENETDOWN;
-
 	if (!valid_lft || (prefered_lft > valid_lft))
 		return -EINVAL;
 
-- 
GitLab


From 7198f8cec12ccec6d6f2e18c08ecc8c66c8aaf93 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 18 Sep 2006 00:13:46 -0700
Subject: [PATCH 0672/1063] [IPV6] address: Support NLM_F_EXCL when adding
 addresses

iproute2 doesn't provide the NLM_F_CREATE flag when adding addresses,
it is assumed to be implied. The existing code issues a check on
said flag when the modify operation fails (likely due to ENOENT)
before continueing to create it, this leads to a hard to predict
result, therefore the NLM_F_CREATE check is removed.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 45 +++++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index bb18b9c3a5cba..1e5a296d0a82d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2908,24 +2908,14 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
 }
 
-static int
-inet6_addr_modify(int ifindex, struct in6_addr *pfx,
-		  __u32 prefered_lft, __u32 valid_lft)
+static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 prefered_lft,
+			     u32 valid_lft)
 {
-	struct inet6_ifaddr *ifp = NULL;
-	struct net_device *dev;
 	int ifa_flags = 0;
 
-	if ((dev = __dev_get_by_index(ifindex)) == NULL)
-		return -ENODEV;
-
 	if (!valid_lft || (prefered_lft > valid_lft))
 		return -EINVAL;
 
-	ifp = ipv6_get_ifaddr(pfx, dev, 1);
-	if (ifp == NULL)
-		return -ENOENT;
-
 	if (valid_lft == INFINITY_LIFE_TIME)
 		ifa_flags = IFA_F_PERMANENT;
 	else if (valid_lft >= 0x7FFFFFFF/HZ)
@@ -2947,7 +2937,6 @@ inet6_addr_modify(int ifindex, struct in6_addr *pfx,
 	spin_unlock_bh(&ifp->lock);
 	if (!(ifp->flags&IFA_F_TENTATIVE))
 		ipv6_ifa_notify(0, ifp);
-	in6_ifa_put(ifp);
 
 	addrconf_verify(0);
 
@@ -2960,6 +2949,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct ifaddrmsg *ifm;
 	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *pfx;
+	struct inet6_ifaddr *ifa;
+	struct net_device *dev;
 	u32 valid_lft, preferred_lft;
 	int err;
 
@@ -2983,15 +2974,29 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		valid_lft = INFINITY_LIFE_TIME;
 	}
 
-	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
-		err = inet6_addr_modify(ifm->ifa_index, pfx,
-					preferred_lft, valid_lft);
-		if (err == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE))
-			return err;
+	dev =  __dev_get_by_index(ifm->ifa_index);
+	if (dev == NULL)
+		return -ENODEV;
+
+	ifa = ipv6_get_ifaddr(pfx, dev, 1);
+	if (ifa == NULL) {
+		/*
+		 * It would be best to check for !NLM_F_CREATE here but
+		 * userspace alreay relies on not having to provide this.
+		 */
+		return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
+				      preferred_lft, valid_lft);
 	}
 
-	return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
-			      preferred_lft, valid_lft);
+	if (nlh->nlmsg_flags & NLM_F_EXCL ||
+	    !(nlh->nlmsg_flags & NLM_F_REPLACE))
+		err = -EEXIST;
+	else
+		err = inet6_addr_modify(ifa, preferred_lft, valid_lft);
+
+	in6_ifa_put(ifa);
+
+	return err;
 }
 
 static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags,
-- 
GitLab


From 161643660129dd7d98f0b12418c0a2710ffa7db6 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 18 Sep 2006 00:40:38 -0700
Subject: [PATCH 0673/1063] [SCTP]: Cleanups

This patch contains the following cleanups:
- make the following needlessly global function static:
  - socket.c: sctp_apply_peer_addr_params()
- add proper prototypes for the several global functions in
  include/net/sctp/sctp.h

Note that this fixes wrong prototypes for the following functions:
- sctp_snmp_proc_exit()
- sctp_eps_proc_exit()
- sctp_assocs_proc_exit()

The latter was spotted by the GNU C compiler and reported
by David Woodhouse.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 13 +++++++++++++
 net/sctp/ipv6.c         |  1 -
 net/sctp/protocol.c     |  7 -------
 net/sctp/socket.c       | 14 +++++++-------
 4 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index e274fd479990b..ee68a31240765 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -128,6 +128,8 @@ extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
 				     int flags);
 extern struct sctp_pf *sctp_get_pf_specific(sa_family_t family);
 extern int sctp_register_pf(struct sctp_pf *, sa_family_t);
+int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
+                        void *ptr);
 
 /*
  * sctp/socket.c
@@ -177,6 +179,17 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
 void sctp_backlog_migrate(struct sctp_association *assoc,
 			  struct sock *oldsk, struct sock *newsk);
 
+/*
+ * sctp/proc.c
+ */
+int sctp_snmp_proc_init(void);
+void sctp_snmp_proc_exit(void);
+int sctp_eps_proc_init(void);
+void sctp_eps_proc_exit(void);
+int sctp_assocs_proc_init(void);
+void sctp_assocs_proc_exit(void);
+
+
 /*
  *  Section:  Macros, externs, and inlines
  */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 99c0cefc04e07..fd87e3ceb56e6 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -78,7 +78,6 @@
 
 #include <asm/uaccess.h>
 
-extern int sctp_inetaddr_event(struct notifier_block *, unsigned long, void *);
 static struct notifier_block sctp_inet6addr_notifier = {
 	.notifier_call = sctp_inetaddr_event,
 };
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d9dd4c47bc293..fac7674438a4c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -82,13 +82,6 @@ static struct sctp_af *sctp_af_v6_specific;
 kmem_cache_t *sctp_chunk_cachep __read_mostly;
 kmem_cache_t *sctp_bucket_cachep __read_mostly;
 
-extern int sctp_snmp_proc_init(void);
-extern int sctp_snmp_proc_exit(void);
-extern int sctp_eps_proc_init(void);
-extern int sctp_eps_proc_exit(void);
-extern int sctp_assocs_proc_init(void);
-extern int sctp_assocs_proc_exit(void);
-
 /* Return the address of the control sock. */
 struct sock *sctp_get_ctl_sock(void)
 {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7c1dbb1d10df9..79c3e072cf282 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2081,13 +2081,13 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
  *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
  *                     results.
  */
-int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
-				struct sctp_transport   *trans,
-				struct sctp_association *asoc,
-				struct sctp_sock        *sp,
-				int                      hb_change,
-				int                      pmtud_change,
-				int                      sackdelay_change)
+static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
+				       struct sctp_transport   *trans,
+				       struct sctp_association *asoc,
+				       struct sctp_sock        *sp,
+				       int                      hb_change,
+				       int                      pmtud_change,
+				       int                      sackdelay_change)
 {
 	int error;
 
-- 
GitLab


From 4eb327b517cf85f6cb7dcd5691e7b748cbe8c343 Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Tue, 19 Sep 2006 10:24:19 -0700
Subject: [PATCH 0674/1063] [SELINUX]: Fix bug in security_sid_mls_copy

The following fixes a bug where random mem is being tampered with in the
non-mls case; encountered by Jashua Brindle on a gentoo box.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/ss/services.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 27ee28ccf2669..7eb69a602d8fb 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1841,7 +1841,7 @@ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid)
 	u32 len;
 	int rc = 0;
 
-	if (!ss_initialized) {
+	if (!ss_initialized || !selinux_mls_enabled) {
 		*new_sid = sid;
 		goto out;
 	}
-- 
GitLab


From 1ef9696c909060ccdae3ade245ca88692b49285b Mon Sep 17 00:00:00 2001
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Tue, 19 Sep 2006 12:52:50 -0700
Subject: [PATCH 0675/1063] [TCP]: Send ACKs each 2nd received segment.

It does not affect either mss-sized connections (obviously) or
connections controlled by Nagle (because there is only one small
segment in flight).

The idea is to record the fact that a small segment arrives on a
connection, where one small segment has already been received and
still not-ACKed. In this case ACK is forced after tcp_recvmsg() drains
receive buffer.

In other words, it is a "soft" each-2nd-segment ACK, which is enough
to preserve ACK clock even when ABC is enabled.

Signed-off-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 3 ++-
 net/ipv4/tcp.c                     | 7 +++++--
 net/ipv4/tcp_input.c               | 2 ++
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 9bf73fe50948e..de4e83b6da4be 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -147,7 +147,8 @@ extern struct sock *inet_csk_clone(struct sock *sk,
 enum inet_csk_ack_state_t {
 	ICSK_ACK_SCHED	= 1,
 	ICSK_ACK_TIMER  = 2,
-	ICSK_ACK_PUSHED = 4
+	ICSK_ACK_PUSHED = 4,
+	ICSK_ACK_PUSHED2 = 8
 };
 
 extern void inet_csk_init_xmit_timers(struct sock *sk,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 29e3d606db78d..66e9a729f6dfa 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -955,8 +955,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
 		     * receive buffer and there was a small segment
 		     * in queue.
 		     */
-		    (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
-		     !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
+		    (copied > 0 &&
+		     ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
+		      ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
+		       !icsk->icsk_ack.pingpong)) &&
+		      !atomic_read(&sk->sk_rmem_alloc)))
 			time_to_ack = 1;
 	}
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 511b738f118a9..b3def0df14fb8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -156,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk,
 				return;
 			}
 		}
+		if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
+			icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
 		icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
 	}
 }
-- 
GitLab


From a1e59abf824969554b90facd44a4ab16e265afa4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 19 Sep 2006 12:57:34 -0700
Subject: [PATCH 0676/1063] [XFRM]: Fix wildcard as tunnel source

Hashing SAs by source address breaks templates with wildcards as tunnel
source since the source address used for hashing/lookup is still 0/0.
Move source address lookup to xfrm_tmpl_resolve_one() so we can use the
real address in the lookup.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 13 +++++++++++++
 net/ipv4/xfrm4_policy.c | 20 ++++++++++++++++++++
 net/ipv4/xfrm4_state.c  | 15 ---------------
 net/ipv6/xfrm6_policy.c | 21 +++++++++++++++++++++
 net/ipv6/xfrm6_state.c  | 16 ----------------
 net/xfrm/xfrm_policy.c  | 21 +++++++++++++++++++++
 6 files changed, 75 insertions(+), 31 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4d6dc627df9b0..11e0b1d6bd473 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -222,6 +222,7 @@ struct xfrm_policy_afinfo {
 	struct dst_ops		*dst_ops;
 	void			(*garbage_collect)(void);
 	int			(*dst_lookup)(struct xfrm_dst **dst, struct flowi *fl);
+	int			(*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr);
 	struct dst_entry	*(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
 	int			(*bundle_create)(struct xfrm_policy *policy, 
 						 struct xfrm_state **xfrm, 
@@ -630,6 +631,18 @@ secpath_reset(struct sk_buff *skb)
 #endif
 }
 
+static inline int
+xfrm_addr_any(xfrm_address_t *addr, unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return addr->a4 == 0;
+	case AF_INET6:
+		return ipv6_addr_any((struct in6_addr *)&addr->a6);
+	}
+	return 0;
+}
+
 static inline int
 __xfrm4_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x)
 {
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 479598566f1de..eabcd27b17673 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -21,6 +21,25 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
 	return __ip_route_output_key((struct rtable**)dst, fl);
 }
 
+static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+{
+	struct rtable *rt;
+	struct flowi fl_tunnel = {
+		.nl_u = {
+			.ip4_u = {
+				.daddr = daddr->a4,
+			},
+		},
+	};
+
+	if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
+		saddr->a4 = rt->rt_src;
+		dst_release(&rt->u.dst);
+		return 0;
+	}
+	return -EHOSTUNREACH;
+}
+
 static struct dst_entry *
 __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 {
@@ -298,6 +317,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 	.family = 		AF_INET,
 	.dst_ops =		&xfrm4_dst_ops,
 	.dst_lookup =		xfrm4_dst_lookup,
+	.get_saddr =		xfrm4_get_saddr,
 	.find_bundle = 		__xfrm4_find_bundle,
 	.bundle_create =	__xfrm4_bundle_create,
 	.decode_session =	_decode_session4,
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 6a2a4ab42772a..fe2034494d085 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,21 +42,6 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->props.saddr = tmpl->saddr;
 	if (x->props.saddr.a4 == 0)
 		x->props.saddr.a4 = saddr->a4;
-	if (tmpl->mode == XFRM_MODE_TUNNEL && x->props.saddr.a4 == 0) {
-		struct rtable *rt;
-	        struct flowi fl_tunnel = {
-        	        .nl_u = {
-        			.ip4_u = {
-					.daddr = x->id.daddr.a4,
-				}
-			}
-		};
-		if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
-		                     &fl_tunnel, AF_INET)) {
-			x->props.saddr.a4 = rt->rt_src;
-			dst_release(&rt->u.dst);
-		}
-	}
 	x->props.mode = tmpl->mode;
 	x->props.reqid = tmpl->reqid;
 	x->props.family = AF_INET;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 9391c4c94febe..6a252e2134d11 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -34,6 +34,26 @@ static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
 	return err;
 }
 
+static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+{
+	struct rt6_info *rt;
+	struct flowi fl_tunnel = {
+		.nl_u = {
+			.ip6_u = {
+				.daddr = *(struct in6_addr *)&daddr->a6,
+			},
+		},
+	};
+
+	if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
+		ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6,
+			       (struct in6_addr *)&saddr->a6);
+		dst_release(&rt->u.dst);
+		return 0;
+	}
+	return -EHOSTUNREACH;
+}
+
 static struct dst_entry *
 __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 {
@@ -362,6 +382,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.family =		AF_INET6,
 	.dst_ops =		&xfrm6_dst_ops,
 	.dst_lookup =		xfrm6_dst_lookup,
+	.get_saddr = 		xfrm6_get_saddr,
 	.find_bundle =		__xfrm6_find_bundle,
 	.bundle_create =	__xfrm6_bundle_create,
 	.decode_session =	_decode_session6,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index d88cd92c864e6..711bfafb2472d 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -42,22 +42,6 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
 	if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
 		memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
-	if (tmpl->mode == XFRM_MODE_TUNNEL && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) {
-		struct rt6_info *rt;
-		struct flowi fl_tunnel = {
-			.nl_u = {
-				.ip6_u = {
-					.daddr = *(struct in6_addr *)daddr,
-				}
-			}
-		};
-		if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
-		                     &fl_tunnel, AF_INET6)) {
-			ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr,
-			               (struct in6_addr *)&x->props.saddr);
-			dst_release(&rt->u.dst);
-		}
-	}
 	x->props.mode = tmpl->mode;
 	x->props.reqid = tmpl->reqid;
 	x->props.family = AF_INET6;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 537854fe47ca5..b6e2e79d72612 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1107,6 +1107,20 @@ int __xfrm_sk_clone_policy(struct sock *sk)
 	return 0;
 }
 
+static int
+xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
+	       unsigned short family)
+{
+	int err;
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	err = afinfo->get_saddr(local, remote);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
 /* Resolve list of templates for the flow, given policy. */
 
 static int
@@ -1118,6 +1132,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
 	int i, error;
 	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
 	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
+	xfrm_address_t tmp;
 
 	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
 		struct xfrm_state *x;
@@ -1128,6 +1143,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
 		if (tmpl->mode == XFRM_MODE_TUNNEL) {
 			remote = &tmpl->id.daddr;
 			local = &tmpl->saddr;
+			if (xfrm_addr_any(local, family)) {
+				error = xfrm_get_saddr(&tmp, remote, family);
+				if (error)
+					goto fail;
+				local = &tmp;
+			}
 		}
 
 		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
-- 
GitLab


From 23d06e3b986677ec57007a24891fa9deb09ac973 Mon Sep 17 00:00:00 2001
From: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Date: Tue, 19 Sep 2006 13:04:54 -0700
Subject: [PATCH 0677/1063] [DCCP] ACKVEC: fix ackvector length calculation

Fix ackvector length calculation upon receiving an "ack-of-ack".  This
patch avoids the ackvector from growing too large which causes it to
not be inserted into packets.

Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 8c211c58893b2..8dab723cc7048 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -353,11 +353,13 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
 {
 	struct dccp_ackvec_record *next;
 
-	av->dccpav_buf_tail = avr->dccpavr_ack_ptr - 1;
-	if (av->dccpav_buf_tail == 0)
-		av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1;
-
-	av->dccpav_vec_len -= avr->dccpavr_sent_len;
+	/* sort out vector length */
+	if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr)
+		av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head;
+	else
+		av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1
+				     - av->dccpav_buf_head
+				     + avr->dccpavr_ack_ptr;
 
 	/* free records */
 	list_for_each_entry_safe_from(avr, next, &av->dccpav_records,
-- 
GitLab


From 8e27e4650cb7e73aa4dd97d860539e7605ac7e39 Mon Sep 17 00:00:00 2001
From: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Date: Tue, 19 Sep 2006 13:05:35 -0700
Subject: [PATCH 0678/1063] [DCCP] ackvec: Fix how
 DCCP_ACKVEC_STATE_NOT_RECEIVED is used

Fix the way state is masked out.  DCCP_ACKVEC_STATE_NOT_RECEIVED is
defined as appears in the packet, therefore bit shifting is not
required.  This fix allows CCID2 to correctly detect losses.

Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.c      | 3 +--
 net/dccp/ccids/ccid2.c | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 8dab723cc7048..bc5ff12124180 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -436,8 +436,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 		break;
 found:
 		if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) {
-			const u8 state = (*vector &
-					  DCCP_ACKVEC_STATE_MASK) >> 6;
+			const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
 			if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
 #ifdef CONFIG_IP_DCCP_DEBUG
 				struct dccp_sock *dp = dccp_sk(sk);
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e9615627dcd65..b1d90c07535ee 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -582,8 +582,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			 * run length
 			 */
 			while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
-				const u8 state = (*vector &
-						  DCCP_ACKVEC_STATE_MASK) >> 6;
+				const u8 state = *vector &
+						 DCCP_ACKVEC_STATE_MASK;
 
 				/* new packet received or marked */
 				if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
-- 
GitLab


From 4a0a50fb43912b4a593d2416c507a198fe607a6d Mon Sep 17 00:00:00 2001
From: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Date: Tue, 19 Sep 2006 13:06:16 -0700
Subject: [PATCH 0679/1063] [DCCP] ackvec: Remove unused variables

Get rid of unused variables in ackvector state.

Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.c | 5 ++---
 net/dccp/ackvec.h | 4 +---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index bc5ff12124180..4d176d33983fe 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -142,14 +142,13 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
 	struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
 
 	if (av != NULL) {
-		av->dccpav_buf_head	=
-			av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1;
+		av->dccpav_buf_head	= DCCP_MAX_ACKVEC_LEN - 1;
 		av->dccpav_buf_ackno	= DCCP_MAX_SEQNO + 1;
 		av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
 		av->dccpav_ack_ptr	= 0;
 		av->dccpav_time.tv_sec	= 0;
 		av->dccpav_time.tv_usec	= 0;
-		av->dccpav_sent_len	= av->dccpav_vec_len = 0;
+		av->dccpav_vec_len	= 0;
 		INIT_LIST_HEAD(&av->dccpav_records);
 	}
 
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 0adf4b56c34cf..2424effac7f69 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -54,9 +54,7 @@ struct dccp_ackvec {
 	struct list_head dccpav_records;
 	struct timeval	dccpav_time;
 	u8		dccpav_buf_head;
-	u8		dccpav_buf_tail;
 	u8		dccpav_ack_ptr;
-	u8		dccpav_sent_len;
 	u8		dccpav_vec_len;
 	u8		dccpav_buf_nonce;
 	u8		dccpav_ack_nonce;
@@ -107,7 +105,7 @@ extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
 
 static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
 {
-	return av->dccpav_sent_len != av->dccpav_vec_len;
+	return av->dccpav_vec_len;
 }
 #else /* CONFIG_IP_DCCP_ACKVEC */
 static inline int dccp_ackvec_init(void)
-- 
GitLab


From 29651cda97b0a9e4ac0fbeb5ea731a9909f0f128 Mon Sep 17 00:00:00 2001
From: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Date: Tue, 19 Sep 2006 13:06:46 -0700
Subject: [PATCH 0680/1063] [DCCP] CCID2: Fix jiffie wrap issues

Jiffies are now handled correctly (I hope) in CCID2.  If they wrap, no
problem.

Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index b1d90c07535ee..54a6b7ef3b7b3 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -27,7 +27,6 @@
  *
  * BUGS:
  * - sequence number wrapping
- * - jiffies wrapping
  */
 
 #include "../ccid.h"
@@ -71,7 +70,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
 
 			/* packets are sent sequentially */
 			BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq);
-			BUG_ON(seqp->ccid2s_sent < prev->ccid2s_sent);
+			BUG_ON(time_before(seqp->ccid2s_sent,
+					   prev->ccid2s_sent));
 			BUG_ON(len > ccid2_seq_len);
 
 			seqp = prev;
@@ -418,8 +418,8 @@ static inline void ccid2_new_ack(struct sock *sk,
 
 	/* update RTO */
 	if (hctx->ccid2hctx_srtt == -1 ||
-	    (jiffies - hctx->ccid2hctx_lastrtt) >= hctx->ccid2hctx_srtt) {
-		unsigned long r = jiffies - seqp->ccid2s_sent;
+	    time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) {
+		unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
 		int s;
 
 		/* first measurement */
-- 
GitLab


From d458c25ce24ce00ea547e9976e293e7835416253 Mon Sep 17 00:00:00 2001
From: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Date: Tue, 19 Sep 2006 13:07:20 -0700
Subject: [PATCH 0681/1063] [DCCP] CCID2: Initialize ssthresh to infinity

Initialize the slow-start threshold to infinity.  This way, upon connection
initiation, slow-start will be exited only upon a packet loss.  This patch will
allow connections to quickly gain speed.

Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 7 +++++--
 net/dccp/ccids/ccid2.h | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 54a6b7ef3b7b3..699a56674659b 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -678,9 +678,12 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	int seqcount = ccid2_seq_len;
 	int i;
 
-	/* XXX init variables with proper values */
 	hctx->ccid2hctx_cwnd	  = 1;
-	hctx->ccid2hctx_ssthresh  = 10;
+	/* Initialize ssthresh to infinity.  This means that we will exit the
+	 * initial slow-start after the first packet loss.  This is what we
+	 * want.
+	 */
+	hctx->ccid2hctx_ssthresh  = ~0;
 	hctx->ccid2hctx_numdupack = 3;
 
 	/* XXX init ~ to window size... */
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 451a87464fa5d..b4cc6c0bf020f 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -50,7 +50,7 @@ struct ccid2_hc_tx_sock {
 	int			ccid2hctx_cwnd;
 	int			ccid2hctx_ssacks;
 	int			ccid2hctx_acks;
-	int			ccid2hctx_ssthresh;
+	unsigned int		ccid2hctx_ssthresh;
 	int			ccid2hctx_pipe;
 	int			ccid2hctx_numdupack;
 	struct ccid2_seq	*ccid2hctx_seqbuf;
-- 
GitLab


From 69263bcfb5016bc3bdd099607a4232cba06f8491 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 22 Sep 2006 14:28:11 -0700
Subject: [PATCH 0682/1063] [ATM]: proper prototypes in net/atm/mpc.h (and
 reduce ifdef clutter)

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/mpc.c | 11 -----------
 net/atm/mpc.h |  8 ++++++++
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 00704661e83f6..b87c2a88bdce4 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -98,11 +98,6 @@ static struct notifier_block mpoa_notifier = {
 	0
 };
 
-#ifdef CONFIG_PROC_FS
-extern int mpc_proc_init(void);
-extern void mpc_proc_clean(void);
-#endif
-
 struct mpoa_client *mpcs = NULL; /* FIXME */
 static struct atm_mpoa_qos *qos_head = NULL;
 static DEFINE_TIMER(mpc_timer, NULL, 0, 0);
@@ -1439,12 +1434,8 @@ static __init int atm_mpoa_init(void)
 {
 	register_atm_ioctl(&atm_ioctl_ops);
 
-#ifdef CONFIG_PROC_FS
 	if (mpc_proc_init() != 0)
 		printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n");
-	else
-		printk(KERN_INFO "mpoa: /proc/mpoa initialized\n");
-#endif
 
 	printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n");
 
@@ -1457,9 +1448,7 @@ static void __exit atm_mpoa_cleanup(void)
 	struct atm_mpoa_qos *qos, *nextqos;
 	struct lec_priv *priv;
 
-#ifdef CONFIG_PROC_FS
 	mpc_proc_clean();
-#endif
 
 	del_timer(&mpc_timer);
 	unregister_netdevice_notifier(&mpoa_notifier);
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
index 863ddf6079e15..3c7981a229e80 100644
--- a/net/atm/mpc.h
+++ b/net/atm/mpc.h
@@ -50,4 +50,12 @@ int atm_mpoa_delete_qos(struct atm_mpoa_qos *qos);
 struct seq_file;
 void atm_mpoa_disp_qos(struct seq_file *m);
 
+#ifdef CONFIG_PROC_FS
+int mpc_proc_init(void);
+void mpc_proc_clean(void);
+#else
+#define mpc_proc_init() (0)
+#define mpc_proc_clean() do { } while(0)
+#endif
+
 #endif /* _MPC_H_ */
-- 
GitLab


From 446dec30c7f305ed1bb0092b0a8d9367d842a33f Mon Sep 17 00:00:00 2001
From: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Date: Tue, 19 Sep 2006 13:10:11 -0700
Subject: [PATCH 0683/1063] [DCCP] CCID2: Tell DCCP to quickly check whether
 cwnd is available

If not enough cwnd is available, tell the sender to check again as
soon as possible.  This will increase CPU utilization (polling
frequently for cwnd) but will improve network performance.  That is,
the sender will need to wait less before detecting the increase of
cwnd.  A better architecture would be for the CCID to call-back (or
dequeue) from DCCP when it is able to transmit traffic -- not the
other way around as it currently occurs.

Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 699a56674659b..e0acd1ba4e887 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -122,7 +122,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk,
 		}
 	}
 
-	return 100; /* XXX */
+	return 1; /* XXX CCID should dequeue when ready instead of polling */
 }
 
 static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
-- 
GitLab


From 8d424f6ca2d02026dadff409770639d720375afb Mon Sep 17 00:00:00 2001
From: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Date: Tue, 19 Sep 2006 13:12:44 -0700
Subject: [PATCH 0684/1063] [DCCP] CCID2: Add Kconfig option for CCID2 debug

Allow the user to choose whether or not to enable CCID2 debugging via
Kconfig.

Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/Kconfig | 8 ++++++++
 net/dccp/ccids/ccid2.c | 7 +++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index ca00191628f79..32752f7504476 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -30,6 +30,14 @@ config IP_DCCP_CCID2
 
 	  If in doubt, say M.
 
+config IP_DCCP_CCID2_DEBUG
+	  bool "CCID2 debug"
+	  depends on IP_DCCP_CCID2
+	  ---help---
+	    Enable CCID2 debug messages.
+
+	    If in doubt, say N.
+
 config IP_DCCP_CCID3
 	tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
 	depends on IP_DCCP
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e0acd1ba4e887..dbcda7e868b76 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -35,8 +35,7 @@
 
 static int ccid2_debug;
 
-#undef CCID2_DEBUG
-#ifdef CCID2_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 #define ccid2_pr_debug(format, a...) \
         do { if (ccid2_debug) \
                 printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
@@ -47,7 +46,7 @@ static int ccid2_debug;
 
 static const int ccid2_seq_len = 128;
 
-#ifdef CCID2_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
 {
 	int len = 0;
@@ -295,7 +294,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
 	if (!timer_pending(&hctx->ccid2hctx_rtotimer))
 		ccid2_start_rto_timer(sk);
 
-#ifdef CCID2_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 	ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
 	ccid2_pr_debug("Sent: seq=%llu\n", seq);
 	do {
-- 
GitLab


From 07978aabd52ce67f59971872c80f76d6e3ca18ae Mon Sep 17 00:00:00 2001
From: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Date: Tue, 19 Sep 2006 13:13:37 -0700
Subject: [PATCH 0685/1063] [DCCP] CCID2: Allocate seq records on demand

Allocate more sequence state on demand.  Each time a packet is sent
out by CCID2, a record of it needs to be kept.  This list of records
grows proportionally to cwnd.  Previously, the length of this list was
hardcored and therefore the cwnd could only grow to this value (of
128).  Now, records are allocated on demand as necessary---cwnd may
grow as it wishes.  The exceptional case of when memory is not
available is not handled gracefully.  Perhaps, cwnd should be capped
at that point.

Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 96 ++++++++++++++++++++++++++++--------------
 net/dccp/ccids/ccid2.h |  6 ++-
 2 files changed, 70 insertions(+), 32 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index dbcda7e868b76..93a30ae8d07a6 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -44,8 +44,6 @@ static int ccid2_debug;
 #define ccid2_pr_debug(format, a...)
 #endif
 
-static const int ccid2_seq_len = 128;
-
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
 {
@@ -71,7 +69,6 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
 			BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq);
 			BUG_ON(time_before(seqp->ccid2s_sent,
 					   prev->ccid2s_sent));
-			BUG_ON(len > ccid2_seq_len);
 
 			seqp = prev;
 		}
@@ -83,16 +80,57 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
 	do {
 		seqp = seqp->ccid2s_prev;
 		len++;
-		BUG_ON(len > ccid2_seq_len);
 	} while (seqp != hctx->ccid2hctx_seqh);
 
-	BUG_ON(len != ccid2_seq_len);
 	ccid2_pr_debug("total len=%d\n", len);
+	BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
 }
 #else
 #define ccid2_hc_tx_check_sanity(hctx) do {} while (0)
 #endif
 
+static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
+				 gfp_t gfp)
+{
+	struct ccid2_seq *seqp;
+	int i;
+
+	/* check if we have space to preserve the pointer to the buffer */
+	if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) /
+					sizeof(struct ccid2_seq*)))
+		return -ENOMEM;
+
+	/* allocate buffer and initialize linked list */
+	seqp = kmalloc(sizeof(*seqp) * num, gfp);
+	if (seqp == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < (num - 1); i++) {
+		seqp[i].ccid2s_next = &seqp[i + 1];
+		seqp[i + 1].ccid2s_prev = &seqp[i];
+	}
+	seqp[num - 1].ccid2s_next = seqp;
+	seqp->ccid2s_prev = &seqp[num - 1];
+
+	/* This is the first allocation.  Initiate the head and tail.  */
+	if (hctx->ccid2hctx_seqbufc == 0)
+		hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp;
+	else {
+		/* link the existing list with the one we just created */
+		hctx->ccid2hctx_seqh->ccid2s_next = seqp;
+		seqp->ccid2s_prev = hctx->ccid2hctx_seqh;
+
+		hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1];
+		seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt;
+	}
+
+	/* store the original pointer to the buffer so we can free it */
+	hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp;
+	hctx->ccid2hctx_seqbufc++;
+
+	return 0;
+}
+
 static int ccid2_hc_tx_send_packet(struct sock *sk,
 				   struct sk_buff *skb, int len)
 {
@@ -231,6 +269,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+	struct ccid2_seq *next;
 	u64 seq;
 
 	ccid2_hc_tx_check_sanity(hctx);
@@ -250,15 +289,23 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
 	hctx->ccid2hctx_seqh->ccid2s_seq   = seq;
 	hctx->ccid2hctx_seqh->ccid2s_acked = 0;
 	hctx->ccid2hctx_seqh->ccid2s_sent  = jiffies;
-	hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqh->ccid2s_next;
 
-	ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
-		       hctx->ccid2hctx_pipe);
+	next = hctx->ccid2hctx_seqh->ccid2s_next;
+	/* check if we need to alloc more space */
+	if (next == hctx->ccid2hctx_seqt) {
+		int rc;
 
-	if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) {
-		/* XXX allocate more space */
-		WARN_ON(1);
+		ccid2_pr_debug("allocating more space in history\n");
+		rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_KERNEL);
+		BUG_ON(rc); /* XXX what do we do? */
+
+		next = hctx->ccid2hctx_seqh->ccid2s_next;
+		BUG_ON(next == hctx->ccid2hctx_seqt);
 	}
+	hctx->ccid2hctx_seqh = next;
+
+	ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
+		       hctx->ccid2hctx_pipe);
 
 	hctx->ccid2hctx_sent++;
 
@@ -674,8 +721,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 {
         struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
-	int seqcount = ccid2_seq_len;
-	int i;
 
 	hctx->ccid2hctx_cwnd	  = 1;
 	/* Initialize ssthresh to infinity.  This means that we will exit the
@@ -684,26 +729,12 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	 */
 	hctx->ccid2hctx_ssthresh  = ~0;
 	hctx->ccid2hctx_numdupack = 3;
+	hctx->ccid2hctx_seqbufc   = 0;
 
 	/* XXX init ~ to window size... */
-	hctx->ccid2hctx_seqbuf = kmalloc(sizeof(*hctx->ccid2hctx_seqbuf) *
-					 seqcount, gfp_any());
-	if (hctx->ccid2hctx_seqbuf == NULL)
+	if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0)
 		return -ENOMEM;
 
-	for (i = 0; i < (seqcount - 1); i++) {
-		hctx->ccid2hctx_seqbuf[i].ccid2s_next =
-					&hctx->ccid2hctx_seqbuf[i + 1];
-		hctx->ccid2hctx_seqbuf[i + 1].ccid2s_prev =
-					&hctx->ccid2hctx_seqbuf[i];
-	}
-	hctx->ccid2hctx_seqbuf[seqcount - 1].ccid2s_next =
-					hctx->ccid2hctx_seqbuf;
-	hctx->ccid2hctx_seqbuf->ccid2s_prev =
-					&hctx->ccid2hctx_seqbuf[seqcount - 1];
-
-	hctx->ccid2hctx_seqh	 = hctx->ccid2hctx_seqbuf;
-	hctx->ccid2hctx_seqt	 = hctx->ccid2hctx_seqh;
 	hctx->ccid2hctx_sent	 = 0;
 	hctx->ccid2hctx_rto	 = 3 * HZ;
 	hctx->ccid2hctx_srtt	 = -1;
@@ -722,10 +753,13 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 static void ccid2_hc_tx_exit(struct sock *sk)
 {
         struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+	int i;
 
 	ccid2_hc_tx_kill_rto_timer(sk);
-	kfree(hctx->ccid2hctx_seqbuf);
-	hctx->ccid2hctx_seqbuf = NULL;
+
+	for (i = 0; i < hctx->ccid2hctx_seqbufc; i++)
+		kfree(hctx->ccid2hctx_seqbuf[i]);
+	hctx->ccid2hctx_seqbufc = 0;
 }
 
 static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index b4cc6c0bf020f..2a02ce04ba85d 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -35,6 +35,9 @@ struct ccid2_seq {
 	struct ccid2_seq	*ccid2s_next;
 };
 
+#define CCID2_SEQBUF_LEN 256
+#define CCID2_SEQBUF_MAX 128
+
 /** struct ccid2_hc_tx_sock - CCID2 TX half connection
  *
  * @ccid2hctx_ssacks - ACKs recv in slow start
@@ -53,7 +56,8 @@ struct ccid2_hc_tx_sock {
 	unsigned int		ccid2hctx_ssthresh;
 	int			ccid2hctx_pipe;
 	int			ccid2hctx_numdupack;
-	struct ccid2_seq	*ccid2hctx_seqbuf;
+	struct ccid2_seq	*ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
+	int			ccid2hctx_seqbufc;
 	struct ccid2_seq	*ccid2hctx_seqh;
 	struct ccid2_seq	*ccid2hctx_seqt;
 	long			ccid2hctx_rto;
-- 
GitLab


From 374bcf32c86e1b56eab832bbb6b21e636707eab6 Mon Sep 17 00:00:00 2001
From: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Date: Tue, 19 Sep 2006 13:14:43 -0700
Subject: [PATCH 0686/1063] [DCCP] CCID2: Halve cwnd once upon multiple losses
 in a single RTT

When multiple losses occur in one RTT, the window should be halved
only once [a single "congestion event"].  This is now implemented,
although not perfectly.  Slightly changed the interface for changing
the cwnd: pass hctx instead of dp.  This is required in order to allow
for change_cwnd to be called from _init().

Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 49 ++++++++++++++++++++++++++----------------
 net/dccp/ccids/ccid2.h |  1 +
 2 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 93a30ae8d07a6..b88da035865f3 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -187,10 +187,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
 	dp->dccps_l_ack_ratio = val;
 }
 
-static void ccid2_change_cwnd(struct sock *sk, int val)
+static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val)
 {
-	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
-
 	if (val == 0)
 		val = 1;
 
@@ -234,7 +232,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
 	if (hctx->ccid2hctx_ssthresh < 2)
 		hctx->ccid2hctx_ssthresh = 2;
-	ccid2_change_cwnd(sk, 1);
+	ccid2_change_cwnd(hctx, 1);
 
 	/* clear state about stuff we sent */
 	hctx->ccid2hctx_seqt	= hctx->ccid2hctx_seqh;
@@ -444,7 +442,7 @@ static inline void ccid2_new_ack(struct sock *sk,
 			/* increase every 2 acks */
 			hctx->ccid2hctx_ssacks++;
 			if (hctx->ccid2hctx_ssacks == 2) {
-				ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1);
+				ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1);
 				hctx->ccid2hctx_ssacks = 0;
 				*maxincr = *maxincr - 1;
 			}
@@ -457,7 +455,7 @@ static inline void ccid2_new_ack(struct sock *sk,
 		hctx->ccid2hctx_acks++;
 
 		if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
-			ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1);
+			ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1);
 			hctx->ccid2hctx_acks = 0;
 		}
 	}
@@ -532,6 +530,22 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk)
 		ccid2_hc_tx_kill_rto_timer(sk);
 }
 
+static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx,
+				   struct ccid2_seq *seqp)
+{
+	if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
+		ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
+		return;
+	}
+
+	hctx->ccid2hctx_last_cong = jiffies;
+
+	ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1);
+	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
+	if (hctx->ccid2hctx_ssthresh < 2)
+		hctx->ccid2hctx_ssthresh = 2;
+}
+
 static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
@@ -542,7 +556,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	unsigned char veclen;
 	int offset = 0;
 	int done = 0;
-	int loss = 0;
 	unsigned int maxincr = 0;
 
 	ccid2_hc_tx_check_sanity(hctx);
@@ -636,7 +649,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 				    !seqp->ccid2s_acked) {
 				    	if (state ==
 					    DCCP_ACKVEC_STATE_ECN_MARKED) {
-						loss = 1;
+					    	ccid2_congestion_event(hctx,
+								       seqp);
 					} else
 						ccid2_new_ack(sk, seqp,
 							      &maxincr);
@@ -688,7 +702,13 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		/* check for lost packets */
 		while (1) {
 			if (!seqp->ccid2s_acked) {
-				loss = 1;
+				ccid2_pr_debug("Packet lost: %llu\n",
+					       seqp->ccid2s_seq);
+				/* XXX need to traverse from tail -> head in
+				 * order to detect multiple congestion events in
+				 * one ack vector.
+				 */
+				ccid2_congestion_event(hctx, seqp);
 				ccid2_hc_tx_dec_pipe(sk);
 			}
 			if (seqp == hctx->ccid2hctx_seqt)
@@ -707,14 +727,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
 	}
 
-	if (loss) {
-		/* XXX do bit shifts guarantee a 0 as the new bit? */
-		ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd >> 1);
-		hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
-		if (hctx->ccid2hctx_ssthresh < 2)
-			hctx->ccid2hctx_ssthresh = 2;
-	}
-
 	ccid2_hc_tx_check_sanity(hctx);
 }
 
@@ -722,7 +734,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 {
         struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
 
-	hctx->ccid2hctx_cwnd	  = 1;
+	ccid2_change_cwnd(hctx, 1);
 	/* Initialize ssthresh to infinity.  This means that we will exit the
 	 * initial slow-start after the first packet loss.  This is what we
 	 * want.
@@ -741,6 +753,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	hctx->ccid2hctx_rttvar	 = -1;
 	hctx->ccid2hctx_lastrtt  = 0;
 	hctx->ccid2hctx_rpdupack = -1;
+	hctx->ccid2hctx_last_cong = jiffies;
 
 	hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
 	hctx->ccid2hctx_rtotimer.data	  = (unsigned long)sk;
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 2a02ce04ba85d..5b2ef4acb300d 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -71,6 +71,7 @@ struct ccid2_hc_tx_sock {
 	u64			ccid2hctx_rpseq;
 	int			ccid2hctx_rpdupack;
 	int			ccid2hctx_sendwait;
+	unsigned long		ccid2hctx_last_cong;
 };
 
 struct ccid2_hc_rx_sock {
-- 
GitLab


From 593f16aa627d61da447c76ee5a159450174627f6 Mon Sep 17 00:00:00 2001
From: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Date: Tue, 19 Sep 2006 13:15:33 -0700
Subject: [PATCH 0687/1063] [DCCP] CCID2: Add helper functions for changing
 important CCID2 state

Introduce methods which manipulate interesting congestion control
state such as pipe and rtt estimate.  This is useful for people
wishing to monitor the variables of CCID and instrument the code
[perhaps using Kprobes].  Personally, I am a fan of
encapsulation---that justifies this change =D.

Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index b88da035865f3..457dd3db7f410 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -199,6 +199,17 @@ static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val)
 	hctx->ccid2hctx_cwnd = val;
 }
 
+static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
+{
+	ccid2_pr_debug("change SRTT to %ld\n", val);
+	hctx->ccid2hctx_srtt = val;
+}
+
+static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val)
+{
+	hctx->ccid2hctx_pipe = val;
+}
+
 static void ccid2_start_rto_timer(struct sock *sk);
 
 static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -228,7 +239,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
 	ccid2_start_rto_timer(sk);
 
 	/* adjust pipe, cwnd etc */
-	hctx->ccid2hctx_pipe = 0;
+	ccid2_change_pipe(hctx, 0);
 	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
 	if (hctx->ccid2hctx_ssthresh < 2)
 		hctx->ccid2hctx_ssthresh = 2;
@@ -274,7 +285,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
 
 	BUG_ON(!hctx->ccid2hctx_sendwait);
 	hctx->ccid2hctx_sendwait = 0;
-	hctx->ccid2hctx_pipe++;
+	ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1);
 	BUG_ON(hctx->ccid2hctx_pipe < 0);
 
 	/* There is an issue.  What if another packet is sent between
@@ -470,11 +481,13 @@ static inline void ccid2_new_ack(struct sock *sk,
 		if (hctx->ccid2hctx_srtt == -1) {
 			ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
 			       	       r, jiffies, seqp->ccid2s_seq);
-			hctx->ccid2hctx_srtt = r;
+			ccid2_change_srtt(hctx, r);
 			hctx->ccid2hctx_rttvar = r >> 1;
 		} else {
 			/* RTTVAR */
 			long tmp = hctx->ccid2hctx_srtt - r;
+			long srtt;
+
 			if (tmp < 0)
 				tmp *= -1;
 
@@ -484,10 +497,12 @@ static inline void ccid2_new_ack(struct sock *sk,
 			hctx->ccid2hctx_rttvar += tmp;
 
 			/* SRTT */
-			hctx->ccid2hctx_srtt *= 7;
-			hctx->ccid2hctx_srtt >>= 3;
+			srtt = hctx->ccid2hctx_srtt;
+			srtt *= 7;
+			srtt >>= 3;
 			tmp = r >> 3;
-			hctx->ccid2hctx_srtt += tmp;
+			srtt += tmp;
+			ccid2_change_srtt(hctx, srtt);
 		}
 		s = hctx->ccid2hctx_rttvar << 2;
 		/* clock granularity is 1 when based on jiffies */
@@ -523,7 +538,7 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk)
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	hctx->ccid2hctx_pipe--;
+	ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1);
 	BUG_ON(hctx->ccid2hctx_pipe < 0);
 
 	if (hctx->ccid2hctx_pipe == 0)
@@ -749,7 +764,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 
 	hctx->ccid2hctx_sent	 = 0;
 	hctx->ccid2hctx_rto	 = 3 * HZ;
-	hctx->ccid2hctx_srtt	 = -1;
+	ccid2_change_srtt(hctx, -1);
 	hctx->ccid2hctx_rttvar	 = -1;
 	hctx->ccid2hctx_lastrtt  = 0;
 	hctx->ccid2hctx_rpdupack = -1;
-- 
GitLab


From c55e2f4997a104d66b59bdf1aa8ab125d09ae00a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Sep 2006 13:23:19 -0700
Subject: [PATCH 0688/1063] [IPV4]: ipip and ip_gre encapsulation bugs

Handling of ipip and ip_gre ICMP error relaying is b0rken; it accesses
8bit field + 3 reserved octets as host-endian 32bit, does comparison,
subtraction and stuffs the result back.  That breaks on big-endian.

Fixed, made endian-clean.

[ Note that this effected code is permanently commented out with
  and ifdef, so this error couldn't actually cause problems for
  anyone. -DaveM ]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 23 +++++++++++++----------
 net/ipv4/ipip.c   | 22 ++++++++++++----------
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e66f6ff2e1989..f5fba051df3da 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -393,7 +393,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 	int code = skb->h.icmph->code;
 	int rel_type = 0;
 	int rel_code = 0;
-	int rel_info = 0;
+	__be32 rel_info = 0;
+	__u32 n = 0;
 	u16 flags;
 	int grehlen = (iph->ihl<<2) + 4;
 	struct sk_buff *skb2;
@@ -422,14 +423,16 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 	default:
 		return;
 	case ICMP_PARAMETERPROB:
-		if (skb->h.icmph->un.gateway < (iph->ihl<<2))
+		n = ntohl(skb->h.icmph->un.gateway) >> 24;
+		if (n < (iph->ihl<<2))
 			return;
 
 		/* So... This guy found something strange INSIDE encapsulated
 		   packet. Well, he is fool, but what can we do ?
 		 */
 		rel_type = ICMP_PARAMETERPROB;
-		rel_info = skb->h.icmph->un.gateway - grehlen;
+		n -= grehlen;
+		rel_info = htonl(n << 24);
 		break;
 
 	case ICMP_DEST_UNREACH:
@@ -440,13 +443,14 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 			return;
 		case ICMP_FRAG_NEEDED:
 			/* And it is the only really necessary thing :-) */
-			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
-			if (rel_info < grehlen+68)
+			n = ntohs(skb->h.icmph->un.frag.mtu);
+			if (n < grehlen+68)
 				return;
-			rel_info -= grehlen;
+			n -= grehlen;
 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
-			if (rel_info > ntohs(eiph->tot_len))
+			if (n > ntohs(eiph->tot_len))
 				return;
+			rel_info = htonl(n);
 			break;
 		default:
 			/* All others are translated to HOST_UNREACH.
@@ -508,12 +512,11 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 
 	/* change mtu on this route */
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-		if (rel_info > dst_mtu(skb2->dst)) {
+		if (n > dst_mtu(skb2->dst)) {
 			kfree_skb(skb2);
 			return;
 		}
-		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
-		rel_info = htonl(rel_info);
+		skb2->dst->ops->update_pmtu(skb2->dst, n);
 	} else if (type == ICMP_TIME_EXCEEDED) {
 		struct ip_tunnel *t = netdev_priv(skb2->dev);
 		if (t->parms.iph.ttl) {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 76ab50b0d6ef3..0c45565292284 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -341,7 +341,8 @@ static int ipip_err(struct sk_buff *skb, u32 info)
 	int code = skb->h.icmph->code;
 	int rel_type = 0;
 	int rel_code = 0;
-	int rel_info = 0;
+	__be32 rel_info = 0;
+	__u32 n = 0;
 	struct sk_buff *skb2;
 	struct flowi fl;
 	struct rtable *rt;
@@ -354,14 +355,15 @@ static int ipip_err(struct sk_buff *skb, u32 info)
 	default:
 		return 0;
 	case ICMP_PARAMETERPROB:
-		if (skb->h.icmph->un.gateway < hlen)
+		n = ntohl(skb->h.icmph->un.gateway) >> 24;
+		if (n < hlen)
 			return 0;
 
 		/* So... This guy found something strange INSIDE encapsulated
 		   packet. Well, he is fool, but what can we do ?
 		 */
 		rel_type = ICMP_PARAMETERPROB;
-		rel_info = skb->h.icmph->un.gateway - hlen;
+		rel_info = htonl((n - hlen) << 24);
 		break;
 
 	case ICMP_DEST_UNREACH:
@@ -372,13 +374,14 @@ static int ipip_err(struct sk_buff *skb, u32 info)
 			return 0;
 		case ICMP_FRAG_NEEDED:
 			/* And it is the only really necessary thing :-) */
-			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
-			if (rel_info < hlen+68)
+			n = ntohs(skb->h.icmph->un.frag.mtu);
+			if (n < hlen+68)
 				return 0;
-			rel_info -= hlen;
+			n -= hlen;
 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
-			if (rel_info > ntohs(eiph->tot_len))
+			if (n > ntohs(eiph->tot_len))
 				return 0;
+			rel_info = htonl(n);
 			break;
 		default:
 			/* All others are translated to HOST_UNREACH.
@@ -440,12 +443,11 @@ static int ipip_err(struct sk_buff *skb, u32 info)
 
 	/* change mtu on this route */
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-		if (rel_info > dst_mtu(skb2->dst)) {
+		if (n > dst_mtu(skb2->dst)) {
 			kfree_skb(skb2);
 			return 0;
 		}
-		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
-		rel_info = htonl(rel_info);
+		skb2->dst->ops->update_pmtu(skb2->dst, n);
 	} else if (type == ICMP_TIME_EXCEEDED) {
 		struct ip_tunnel *t = netdev_priv(skb2->dev);
 		if (t->parms.iph.ttl) {
-- 
GitLab


From 1bf38a36b6a0e810dafae048fdbb999e587f0f2f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 11:57:09 -0700
Subject: [PATCH 0689/1063] [NETFILTER]: remove unused include file

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_logging.h | 33 -------------------------------
 1 file changed, 33 deletions(-)
 delete mode 100644 include/linux/netfilter_logging.h

diff --git a/include/linux/netfilter_logging.h b/include/linux/netfilter_logging.h
deleted file mode 100644
index 562bb6aad4e17..0000000000000
--- a/include/linux/netfilter_logging.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Internal logging interface, which relies on the real 
-   LOG target modules */
-#ifndef __LINUX_NETFILTER_LOGGING_H
-#define __LINUX_NETFILTER_LOGGING_H
-
-#ifdef __KERNEL__
-#include <asm/atomic.h>
-
-struct nf_logging_t {
-	void (*nf_log_packet)(struct sk_buff **pskb,
-			      unsigned int hooknum,
-			      const struct net_device *in,
-			      const struct net_device *out,
-			      const char *prefix);
-	void (*nf_log)(char *pfh, size_t len,
-		       const char *prefix);
-};
-
-extern void nf_log_register(int pf, const struct nf_logging_t *logging);
-extern void nf_log_unregister(int pf, const struct nf_logging_t *logging);
-
-extern void nf_log_packet(int pf,
-			  struct sk_buff **pskb,
-			  unsigned int hooknum,
-			  const struct net_device *in,
-			  const struct net_device *out,
-			  const char *fmt, ...);
-extern void nf_log(int pf,
-		   char *pfh, size_t len,
-		   const char *fmt, ...);
-#endif /*__KERNEL__*/
-
-#endif /*__LINUX_NETFILTER_LOGGING_H*/
-- 
GitLab


From df0933dcb027e156cb5253570ad694b81bd52b69 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 11:57:53 -0700
Subject: [PATCH 0690/1063] [NETFILTER]: kill listhelp.h

Kill listhelp.h and use the list.h functions instead.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h           |   4 -
 include/linux/netfilter_ipv4/listhelp.h      | 123 ------------
 net/bridge/netfilter/ebtables.c              |  76 +++++---
 net/ipv4/netfilter/arp_tables.c              |   2 -
 net/ipv4/netfilter/ip_conntrack_core.c       | 189 +++++++++----------
 net/ipv4/netfilter/ip_conntrack_proto_gre.c  |  24 ++-
 net/ipv4/netfilter/ip_conntrack_standalone.c |   1 -
 net/ipv4/netfilter/ip_nat_core.c             |   4 -
 net/ipv4/netfilter/ip_nat_helper.c           |   4 -
 net/ipv4/netfilter/ip_nat_rule.c             |   4 -
 net/ipv4/netfilter/ip_nat_standalone.c       |   4 -
 net/ipv6/netfilter/ip6_tables.c              |   3 -
 net/netfilter/nf_conntrack_core.c            | 185 +++++++++---------
 net/netfilter/nf_conntrack_standalone.c      |   1 -
 net/netfilter/x_tables.c                     |  17 +-
 15 files changed, 237 insertions(+), 404 deletions(-)
 delete mode 100644 include/linux/netfilter_ipv4/listhelp.h

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 03d1027fb0e88..c832295dbf619 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -138,10 +138,6 @@ struct xt_counters_info
 
 #include <linux/netdevice.h>
 
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-#include <linux/netfilter_ipv4/listhelp.h>
-
 #ifdef CONFIG_COMPAT
 #define COMPAT_TO_USER		1
 #define COMPAT_FROM_USER	-1
diff --git a/include/linux/netfilter_ipv4/listhelp.h b/include/linux/netfilter_ipv4/listhelp.h
deleted file mode 100644
index 5d92cf044d919..0000000000000
--- a/include/linux/netfilter_ipv4/listhelp.h
+++ /dev/null
@@ -1,123 +0,0 @@
-#ifndef _LISTHELP_H
-#define _LISTHELP_H
-#include <linux/list.h>
-
-/* Header to do more comprehensive job than linux/list.h; assume list
-   is first entry in structure. */
-
-/* Return pointer to first true entry, if any, or NULL.  A macro
-   required to allow inlining of cmpfn. */
-#define LIST_FIND(head, cmpfn, type, args...)		\
-({							\
-	const struct list_head *__i, *__j = NULL;	\
-							\
-	ASSERT_READ_LOCK(head);				\
-	list_for_each(__i, (head))			\
-		if (cmpfn((const type)__i , ## args)) {	\
-			__j = __i;			\
-			break;				\
-		}					\
-	(type)__j;					\
-})
-
-#define LIST_FIND_W(head, cmpfn, type, args...)		\
-({							\
-	const struct list_head *__i, *__j = NULL;	\
-							\
-	ASSERT_WRITE_LOCK(head);			\
-	list_for_each(__i, (head))			\
-		if (cmpfn((type)__i , ## args)) {	\
-			__j = __i;			\
-			break;				\
-		}					\
-	(type)__j;					\
-})
-
-/* Just like LIST_FIND but we search backwards */
-#define LIST_FIND_B(head, cmpfn, type, args...)		\
-({							\
-	const struct list_head *__i, *__j = NULL;	\
-							\
-	ASSERT_READ_LOCK(head);				\
-	list_for_each_prev(__i, (head))			\
-		if (cmpfn((const type)__i , ## args)) {	\
-			__j = __i;			\
-			break;				\
-		}					\
-	(type)__j;					\
-})
-
-static inline int
-__list_cmp_same(const void *p1, const void *p2) { return p1 == p2; }
-
-/* Is this entry in the list? */
-static inline int
-list_inlist(struct list_head *head, const void *entry)
-{
-	return LIST_FIND(head, __list_cmp_same, void *, entry) != NULL;
-}
-
-/* Delete from list. */
-#ifdef CONFIG_NETFILTER_DEBUG
-#define LIST_DELETE(head, oldentry)					\
-do {									\
-	ASSERT_WRITE_LOCK(head);					\
-	if (!list_inlist(head, oldentry))				\
-		printk("LIST_DELETE: %s:%u `%s'(%p) not in %s.\n",	\
-		       __FILE__, __LINE__, #oldentry, oldentry, #head);	\
-        else list_del((struct list_head *)oldentry);			\
-} while(0)
-#else
-#define LIST_DELETE(head, oldentry) list_del((struct list_head *)oldentry)
-#endif
-
-/* Append. */
-static inline void
-list_append(struct list_head *head, void *new)
-{
-	ASSERT_WRITE_LOCK(head);
-	list_add((new), (head)->prev);
-}
-
-/* Prepend. */
-static inline void
-list_prepend(struct list_head *head, void *new)
-{
-	ASSERT_WRITE_LOCK(head);
-	list_add(new, head);
-}
-
-/* Insert according to ordering function; insert before first true. */
-#define LIST_INSERT(head, new, cmpfn)				\
-do {								\
-	struct list_head *__i;					\
-	ASSERT_WRITE_LOCK(head);				\
-	list_for_each(__i, (head))				\
-		if ((new), (typeof (new))__i)			\
-			break;					\
-	list_add((struct list_head *)(new), __i->prev);		\
-} while(0)
-
-/* If the field after the list_head is a nul-terminated string, you
-   can use these functions. */
-static inline int __list_cmp_name(const void *i, const char *name)
-{
-	return strcmp(name, i+sizeof(struct list_head)) == 0;
-}
-
-/* Returns false if same name already in list, otherwise does insert. */
-static inline int
-list_named_insert(struct list_head *head, void *new)
-{
-	if (LIST_FIND(head, __list_cmp_name, void *,
-		      new + sizeof(struct list_head)))
-		return 0;
-	list_prepend(head, new);
-	return 1;
-}
-
-/* Find this named element in the list. */
-#define list_named_find(head, name)			\
-LIST_FIND(head, __list_cmp_name, void *, name)
-
-#endif /*_LISTHELP_H*/
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index d06a5075b5f69..3df55b2bd91d7 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -24,6 +24,7 @@
 #include <linux/vmalloc.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 #include <linux/smp.h>
 #include <linux/cpumask.h>
@@ -31,12 +32,6 @@
 /* needed for logical [in,out]-dev filtering */
 #include "../br_private.h"
 
-/* list_named_find */
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-#include <linux/netfilter_ipv4/listhelp.h>
-#include <linux/mutex.h>
-
 #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\
                                          "report to author: "format, ## args)
 /* #define BUGPRINT(format, args...) */
@@ -278,18 +273,22 @@ static inline void *
 find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
    struct mutex *mutex)
 {
-	void *ret;
+	struct {
+		struct list_head list;
+		char name[EBT_FUNCTION_MAXNAMELEN];
+	} *e;
 
 	*error = mutex_lock_interruptible(mutex);
 	if (*error != 0)
 		return NULL;
 
-	ret = list_named_find(head, name);
-	if (!ret) {
-		*error = -ENOENT;
-		mutex_unlock(mutex);
+	list_for_each_entry(e, head, list) {
+		if (strcmp(e->name, name) == 0)
+			return e;
 	}
-	return ret;
+	*error = -ENOENT;
+	mutex_unlock(mutex);
+	return NULL;
 }
 
 #ifndef CONFIG_KMOD
@@ -1043,15 +1042,19 @@ static int do_replace(void __user *user, unsigned int len)
 
 int ebt_register_target(struct ebt_target *target)
 {
+	struct ebt_target *t;
 	int ret;
 
 	ret = mutex_lock_interruptible(&ebt_mutex);
 	if (ret != 0)
 		return ret;
-	if (!list_named_insert(&ebt_targets, target)) {
-		mutex_unlock(&ebt_mutex);
-		return -EEXIST;
+	list_for_each_entry(t, &ebt_targets, list) {
+		if (strcmp(t->name, target->name) == 0) {
+			mutex_unlock(&ebt_mutex);
+			return -EEXIST;
+		}
 	}
+	list_add(&target->list, &ebt_targets);
 	mutex_unlock(&ebt_mutex);
 
 	return 0;
@@ -1060,21 +1063,25 @@ int ebt_register_target(struct ebt_target *target)
 void ebt_unregister_target(struct ebt_target *target)
 {
 	mutex_lock(&ebt_mutex);
-	LIST_DELETE(&ebt_targets, target);
+	list_del(&target->list);
 	mutex_unlock(&ebt_mutex);
 }
 
 int ebt_register_match(struct ebt_match *match)
 {
+	struct ebt_match *m;
 	int ret;
 
 	ret = mutex_lock_interruptible(&ebt_mutex);
 	if (ret != 0)
 		return ret;
-	if (!list_named_insert(&ebt_matches, match)) {
-		mutex_unlock(&ebt_mutex);
-		return -EEXIST;
+	list_for_each_entry(m, &ebt_matches, list) {
+		if (strcmp(m->name, match->name) == 0) {
+			mutex_unlock(&ebt_mutex);
+			return -EEXIST;
+		}
 	}
+	list_add(&match->list, &ebt_matches);
 	mutex_unlock(&ebt_mutex);
 
 	return 0;
@@ -1083,21 +1090,25 @@ int ebt_register_match(struct ebt_match *match)
 void ebt_unregister_match(struct ebt_match *match)
 {
 	mutex_lock(&ebt_mutex);
-	LIST_DELETE(&ebt_matches, match);
+	list_del(&match->list);
 	mutex_unlock(&ebt_mutex);
 }
 
 int ebt_register_watcher(struct ebt_watcher *watcher)
 {
+	struct ebt_watcher *w;
 	int ret;
 
 	ret = mutex_lock_interruptible(&ebt_mutex);
 	if (ret != 0)
 		return ret;
-	if (!list_named_insert(&ebt_watchers, watcher)) {
-		mutex_unlock(&ebt_mutex);
-		return -EEXIST;
+	list_for_each_entry(w, &ebt_watchers, list) {
+		if (strcmp(w->name, watcher->name) == 0) {
+			mutex_unlock(&ebt_mutex);
+			return -EEXIST;
+		}
 	}
+	list_add(&watcher->list, &ebt_watchers);
 	mutex_unlock(&ebt_mutex);
 
 	return 0;
@@ -1106,13 +1117,14 @@ int ebt_register_watcher(struct ebt_watcher *watcher)
 void ebt_unregister_watcher(struct ebt_watcher *watcher)
 {
 	mutex_lock(&ebt_mutex);
-	LIST_DELETE(&ebt_watchers, watcher);
+	list_del(&watcher->list);
 	mutex_unlock(&ebt_mutex);
 }
 
 int ebt_register_table(struct ebt_table *table)
 {
 	struct ebt_table_info *newinfo;
+	struct ebt_table *t;
 	int ret, i, countersize;
 
 	if (!table || !table->table ||!table->table->entries ||
@@ -1158,10 +1170,12 @@ int ebt_register_table(struct ebt_table *table)
 	if (ret != 0)
 		goto free_chainstack;
 
-	if (list_named_find(&ebt_tables, table->name)) {
-		ret = -EEXIST;
-		BUGPRINT("Table name already exists\n");
-		goto free_unlock;
+	list_for_each_entry(t, &ebt_tables, list) {
+		if (strcmp(t->name, table->name) == 0) {
+			ret = -EEXIST;
+			BUGPRINT("Table name already exists\n");
+			goto free_unlock;
+		}
 	}
 
 	/* Hold a reference count if the chains aren't empty */
@@ -1169,7 +1183,7 @@ int ebt_register_table(struct ebt_table *table)
 		ret = -ENOENT;
 		goto free_unlock;
 	}
-	list_prepend(&ebt_tables, table);
+	list_add(&table->list, &ebt_tables);
 	mutex_unlock(&ebt_mutex);
 	return 0;
 free_unlock:
@@ -1195,7 +1209,7 @@ void ebt_unregister_table(struct ebt_table *table)
 		return;
 	}
 	mutex_lock(&ebt_mutex);
-	LIST_DELETE(&ebt_tables, table);
+	list_del(&table->list);
 	mutex_unlock(&ebt_mutex);
 	vfree(table->private->entries);
 	if (table->private->chainstack) {
@@ -1465,7 +1479,7 @@ static int __init ebtables_init(void)
 	int ret;
 
 	mutex_lock(&ebt_mutex);
-	list_named_insert(&ebt_targets, &ebt_standard_target);
+	list_add(&ebt_standard_target.list, &ebt_targets);
 	mutex_unlock(&ebt_mutex);
 	if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0)
 		return ret;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 4f10b06413a1c..aaeaa9ce0f28d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -56,8 +56,6 @@ do {								\
 #define ARP_NF_ASSERT(x)
 #endif
 
-#include <linux/netfilter_ipv4/listhelp.h>
-
 static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
 				      char *hdr_addr, int len)
 {
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 5da25ad503090..2568d480e9a92 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -47,7 +47,6 @@
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #define IP_CONNTRACK_VERSION	"2.4"
 
@@ -294,15 +293,10 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct)
 static void
 clean_from_lists(struct ip_conntrack *ct)
 {
-	unsigned int ho, hr;
-	
 	DEBUGP("clean_from_lists(%p)\n", ct);
 	ASSERT_WRITE_LOCK(&ip_conntrack_lock);
-
-	ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-	LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-	LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
 
 	/* Destroy all pending expectations */
 	ip_ct_remove_expectations(ct);
@@ -367,16 +361,6 @@ static void death_by_timeout(unsigned long ul_conntrack)
 	ip_conntrack_put(ct);
 }
 
-static inline int
-conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
-		    const struct ip_conntrack_tuple *tuple,
-		    const struct ip_conntrack *ignored_conntrack)
-{
-	ASSERT_READ_LOCK(&ip_conntrack_lock);
-	return tuplehash_to_ctrack(i) != ignored_conntrack
-		&& ip_ct_tuple_equal(tuple, &i->tuple);
-}
-
 struct ip_conntrack_tuple_hash *
 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
 		    const struct ip_conntrack *ignored_conntrack)
@@ -386,7 +370,8 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
 
 	ASSERT_READ_LOCK(&ip_conntrack_lock);
 	list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
-		if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+		if (tuplehash_to_ctrack(h) != ignored_conntrack &&
+		    ip_ct_tuple_equal(tuple, &h->tuple)) {
 			CONNTRACK_STAT_INC(found);
 			return h;
 		}
@@ -417,10 +402,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
 					unsigned int repl_hash) 
 {
 	ct->id = ++ip_conntrack_next_id;
-	list_prepend(&ip_conntrack_hash[hash],
-		     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	list_prepend(&ip_conntrack_hash[repl_hash],
-		     &ct->tuplehash[IP_CT_DIR_REPLY].list);
+	list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
+		 &ip_conntrack_hash[hash]);
+	list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
+		 &ip_conntrack_hash[repl_hash]);
 }
 
 void ip_conntrack_hash_insert(struct ip_conntrack *ct)
@@ -440,6 +425,7 @@ int
 __ip_conntrack_confirm(struct sk_buff **pskb)
 {
 	unsigned int hash, repl_hash;
+	struct ip_conntrack_tuple_hash *h;
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
 
@@ -470,43 +456,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
 	/* See if there's one in the list already, including reverse:
            NAT could have grabbed it without realizing, since we're
            not in the hash.  If there is, we lost race. */
-	if (!LIST_FIND(&ip_conntrack_hash[hash],
-		       conntrack_tuple_cmp,
-		       struct ip_conntrack_tuple_hash *,
-		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
-	    && !LIST_FIND(&ip_conntrack_hash[repl_hash],
-			  conntrack_tuple_cmp,
-			  struct ip_conntrack_tuple_hash *,
-			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
-		/* Remove from unconfirmed list */
-		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	list_for_each_entry(h, &ip_conntrack_hash[hash], list)
+		if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				      &h->tuple))
+			goto out;
+	list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
+		if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+				      &h->tuple))
+			goto out;
 
-		__ip_conntrack_hash_insert(ct, hash, repl_hash);
-		/* Timer relative to confirmation time, not original
-		   setting time, otherwise we'd get timer wrap in
-		   weird delay cases. */
-		ct->timeout.expires += jiffies;
-		add_timer(&ct->timeout);
-		atomic_inc(&ct->ct_general.use);
-		set_bit(IPS_CONFIRMED_BIT, &ct->status);
-		CONNTRACK_STAT_INC(insert);
-		write_unlock_bh(&ip_conntrack_lock);
-		if (ct->helper)
-			ip_conntrack_event_cache(IPCT_HELPER, *pskb);
+	/* Remove from unconfirmed list */
+	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+
+	__ip_conntrack_hash_insert(ct, hash, repl_hash);
+	/* Timer relative to confirmation time, not original
+	   setting time, otherwise we'd get timer wrap in
+	   weird delay cases. */
+	ct->timeout.expires += jiffies;
+	add_timer(&ct->timeout);
+	atomic_inc(&ct->ct_general.use);
+	set_bit(IPS_CONFIRMED_BIT, &ct->status);
+	CONNTRACK_STAT_INC(insert);
+	write_unlock_bh(&ip_conntrack_lock);
+	if (ct->helper)
+		ip_conntrack_event_cache(IPCT_HELPER, *pskb);
 #ifdef CONFIG_IP_NF_NAT_NEEDED
-		if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
-		    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-			ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
+	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+		ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
 #endif
-		ip_conntrack_event_cache(master_ct(ct) ?
-					 IPCT_RELATED : IPCT_NEW, *pskb);
+	ip_conntrack_event_cache(master_ct(ct) ?
+				 IPCT_RELATED : IPCT_NEW, *pskb);
 
-		return NF_ACCEPT;
-	}
+	return NF_ACCEPT;
 
+out:
 	CONNTRACK_STAT_INC(insert_failed);
 	write_unlock_bh(&ip_conntrack_lock);
-
 	return NF_DROP;
 }
 
@@ -527,23 +513,21 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
 
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
-{
-	return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
-}
-
 static int early_drop(struct list_head *chain)
 {
 	/* Traverse backwards: gives us oldest, which is roughly LRU */
 	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack *ct = NULL;
+	struct ip_conntrack *ct = NULL, *tmp;
 	int dropped = 0;
 
 	read_lock_bh(&ip_conntrack_lock);
-	h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
-	if (h) {
-		ct = tuplehash_to_ctrack(h);
-		atomic_inc(&ct->ct_general.use);
+	list_for_each_entry_reverse(h, chain, list) {
+		tmp = tuplehash_to_ctrack(h);
+		if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
+			ct = tmp;
+			atomic_inc(&ct->ct_general.use);
+			break;
+		}
 	}
 	read_unlock_bh(&ip_conntrack_lock);
 
@@ -559,18 +543,16 @@ static int early_drop(struct list_head *chain)
 	return dropped;
 }
 
-static inline int helper_cmp(const struct ip_conntrack_helper *i,
-			     const struct ip_conntrack_tuple *rtuple)
-{
-	return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
-}
-
 static struct ip_conntrack_helper *
 __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
 {
-	return LIST_FIND(&helpers, helper_cmp,
-			 struct ip_conntrack_helper *,
-			 tuple);
+	struct ip_conntrack_helper *h;
+
+	list_for_each_entry(h, &helpers, list) {
+		if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
+			return h;
+	}
+	return NULL;
 }
 
 struct ip_conntrack_helper *
@@ -1062,7 +1044,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
 {
 	BUG_ON(me->timeout == 0);
 	write_lock_bh(&ip_conntrack_lock);
-	list_prepend(&helpers, me);
+	list_add(&me->list, &helpers);
 	write_unlock_bh(&ip_conntrack_lock);
 
 	return 0;
@@ -1081,24 +1063,24 @@ __ip_conntrack_helper_find_byname(const char *name)
 	return NULL;
 }
 
-static inline int unhelp(struct ip_conntrack_tuple_hash *i,
-			 const struct ip_conntrack_helper *me)
+static inline void unhelp(struct ip_conntrack_tuple_hash *i,
+			  const struct ip_conntrack_helper *me)
 {
 	if (tuplehash_to_ctrack(i)->helper == me) {
  		ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
 		tuplehash_to_ctrack(i)->helper = NULL;
 	}
-	return 0;
 }
 
 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
 {
 	unsigned int i;
+	struct ip_conntrack_tuple_hash *h;
 	struct ip_conntrack_expect *exp, *tmp;
 
 	/* Need write lock here, to delete helper. */
 	write_lock_bh(&ip_conntrack_lock);
-	LIST_DELETE(&helpers, me);
+	list_del(&me->list);
 
 	/* Get rid of expectations */
 	list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
@@ -1108,10 +1090,12 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
 		}
 	}
 	/* Get rid of expecteds, set helpers to NULL. */
-	LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
-	for (i = 0; i < ip_conntrack_htable_size; i++)
-		LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
-			    struct ip_conntrack_tuple_hash *, me);
+	list_for_each_entry(h, &unconfirmed, list)
+		unhelp(h, me);
+	for (i = 0; i < ip_conntrack_htable_size; i++) {
+		list_for_each_entry(h, &ip_conntrack_hash[i], list)
+			unhelp(h, me);
+	}
 	write_unlock_bh(&ip_conntrack_lock);
 
 	/* Someone could be still looking at the helper in a bh. */
@@ -1237,46 +1221,43 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
 	nf_conntrack_get(nskb->nfct);
 }
 
-static inline int
-do_iter(const struct ip_conntrack_tuple_hash *i,
-	int (*iter)(struct ip_conntrack *i, void *data),
-	void *data)
-{
-	return iter(tuplehash_to_ctrack(i), data);
-}
-
 /* Bring out ya dead! */
-static struct ip_conntrack_tuple_hash *
+static struct ip_conntrack *
 get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
 		void *data, unsigned int *bucket)
 {
-	struct ip_conntrack_tuple_hash *h = NULL;
+	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack *ct;
 
 	write_lock_bh(&ip_conntrack_lock);
 	for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
-		h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
-				struct ip_conntrack_tuple_hash *, iter, data);
-		if (h)
-			break;
+		list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
+			ct = tuplehash_to_ctrack(h);
+			if (iter(ct, data))
+				goto found;
+		}
+	}
+	list_for_each_entry(h, &unconfirmed, list) {
+		ct = tuplehash_to_ctrack(h);
+		if (iter(ct, data))
+			goto found;
 	}
-	if (!h)
-		h = LIST_FIND_W(&unconfirmed, do_iter,
-				struct ip_conntrack_tuple_hash *, iter, data);
-	if (h)
-		atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
 	write_unlock_bh(&ip_conntrack_lock);
+	return NULL;
 
-	return h;
+found:
+	atomic_inc(&ct->ct_general.use);
+	write_unlock_bh(&ip_conntrack_lock);
+	return ct;
 }
 
 void
 ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
 {
-	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack *ct;
 	unsigned int bucket = 0;
 
-	while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
-		struct ip_conntrack *ct = tuplehash_to_ctrack(h);
+	while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
 		/* Time to push up daises... */
 		if (del_timer(&ct->timeout))
 			death_by_timeout((unsigned long)ct);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 4ee016c427b4e..92c6d8b178c97 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -37,7 +37,6 @@ static DEFINE_RWLOCK(ip_ct_gre_lock);
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
 
-#include <linux/netfilter_ipv4/listhelp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
@@ -82,10 +81,12 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t)
 	__be16 key = 0;
 
 	read_lock_bh(&ip_ct_gre_lock);
-	km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
-			struct ip_ct_gre_keymap *, t);
-	if (km)
-		key = km->tuple.src.u.gre.key;
+	list_for_each_entry(km, &gre_keymap_list, list) {
+		if (gre_key_cmpfn(km, t)) {
+			key = km->tuple.src.u.gre.key;
+			break;
+		}
+	}
 	read_unlock_bh(&ip_ct_gre_lock);
 	
 	DEBUGP("lookup src key 0x%x up key for ", key);
@@ -99,7 +100,7 @@ int
 ip_ct_gre_keymap_add(struct ip_conntrack *ct,
 		     struct ip_conntrack_tuple *t, int reply)
 {
-	struct ip_ct_gre_keymap **exist_km, *km, *old;
+	struct ip_ct_gre_keymap **exist_km, *km;
 
 	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
 		DEBUGP("refusing to add GRE keymap to non-pptp session\n");
@@ -113,13 +114,10 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct,
 
 	if (*exist_km) {
 		/* check whether it's a retransmission */
-		old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
-				struct ip_ct_gre_keymap *, t);
-		if (old == *exist_km) {
-			DEBUGP("retransmission\n");
-			return 0;
+		list_for_each_entry(km, &gre_keymap_list, list) {
+			if (gre_key_cmpfn(km, t) && km == *exist_km)
+				return 0;
 		}
-
 		DEBUGP("trying to override keymap_%s for ct %p\n", 
 			reply? "reply":"orig", ct);
 		return -EEXIST;
@@ -136,7 +134,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct,
 	DUMP_TUPLE_GRE(&km->tuple);
 
 	write_lock_bh(&ip_ct_gre_lock);
-	list_append(&gre_keymap_list, km);
+	list_add_tail(&km->list, &gre_keymap_list);
 	write_unlock_bh(&ip_ct_gre_lock);
 
 	return 0;
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 3f5d495b853b8..02135756562e2 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -35,7 +35,6 @@
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 4c540d03d48ee..71f3e09cbc84b 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -22,9 +22,6 @@
 #include <linux/udp.h>
 #include <linux/jhash.h>
 
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -33,7 +30,6 @@
 #include <linux/netfilter_ipv4/ip_nat_core.h>
 #include <linux/netfilter_ipv4/ip_nat_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index 021c3daae3edb..7f6a75984f6c7 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -27,16 +27,12 @@
 #include <net/tcp.h>
 #include <net/udp.h>
 
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_nat.h>
 #include <linux/netfilter_ipv4/ip_nat_protocol.h>
 #include <linux/netfilter_ipv4/ip_nat_core.h>
 #include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index e59f5a8ecb6bd..7b703839aa58c 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -19,14 +19,10 @@
 #include <net/route.h>
 #include <linux/bitops.h>
 
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ip_nat.h>
 #include <linux/netfilter_ipv4/ip_nat_core.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index f3b778355432a..9c577db62047a 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -30,9 +30,6 @@
 #include <net/checksum.h>
 #include <linux/spinlock.h>
 
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
 #include <linux/netfilter_ipv4/ip_nat.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_nat_protocol.h>
@@ -40,7 +37,6 @@
 #include <linux/netfilter_ipv4/ip_nat_helper.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d1c315364ee77..73d477ce216b3 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -70,9 +70,6 @@ do {								\
 #define IP_NF_ASSERT(x)
 #endif
 
-
-#include <linux/netfilter_ipv4/listhelp.h>
-
 #if 0
 /* All the better to debug you with... */
 #define static
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3b64dbee66203..927137b8b3b5d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -57,7 +57,6 @@
 #include <net/netfilter/nf_conntrack_protocol.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #define NF_CONNTRACK_VERSION	"0.5.0"
 
@@ -539,15 +538,10 @@ void nf_ct_remove_expectations(struct nf_conn *ct)
 static void
 clean_from_lists(struct nf_conn *ct)
 {
-	unsigned int ho, hr;
-	
 	DEBUGP("clean_from_lists(%p)\n", ct);
 	ASSERT_WRITE_LOCK(&nf_conntrack_lock);
-
-	ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-	LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-	LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
 
 	/* Destroy all pending expectations */
 	nf_ct_remove_expectations(ct);
@@ -617,16 +611,6 @@ static void death_by_timeout(unsigned long ul_conntrack)
 	nf_ct_put(ct);
 }
 
-static inline int
-conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
-		    const struct nf_conntrack_tuple *tuple,
-		    const struct nf_conn *ignored_conntrack)
-{
-	ASSERT_READ_LOCK(&nf_conntrack_lock);
-	return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
-		&& nf_ct_tuple_equal(tuple, &i->tuple);
-}
-
 struct nf_conntrack_tuple_hash *
 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
 		    const struct nf_conn *ignored_conntrack)
@@ -636,7 +620,8 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
 
 	ASSERT_READ_LOCK(&nf_conntrack_lock);
 	list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
-		if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+		if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
+		    nf_ct_tuple_equal(tuple, &h->tuple)) {
 			NF_CT_STAT_INC(found);
 			return h;
 		}
@@ -667,10 +652,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 				       unsigned int repl_hash) 
 {
 	ct->id = ++nf_conntrack_next_id;
-	list_prepend(&nf_conntrack_hash[hash],
-		     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	list_prepend(&nf_conntrack_hash[repl_hash],
-		     &ct->tuplehash[IP_CT_DIR_REPLY].list);
+	list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
+		 &nf_conntrack_hash[hash]);
+	list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
+		 &nf_conntrack_hash[repl_hash]);
 }
 
 void nf_conntrack_hash_insert(struct nf_conn *ct)
@@ -690,7 +675,9 @@ int
 __nf_conntrack_confirm(struct sk_buff **pskb)
 {
 	unsigned int hash, repl_hash;
+	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
+	struct nf_conn_help *help;
 	enum ip_conntrack_info ctinfo;
 
 	ct = nf_ct_get(*pskb, &ctinfo);
@@ -720,41 +707,41 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
 	/* See if there's one in the list already, including reverse:
 	   NAT could have grabbed it without realizing, since we're
 	   not in the hash.  If there is, we lost race. */
-	if (!LIST_FIND(&nf_conntrack_hash[hash],
-		       conntrack_tuple_cmp,
-		       struct nf_conntrack_tuple_hash *,
-		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
-	    && !LIST_FIND(&nf_conntrack_hash[repl_hash],
-			  conntrack_tuple_cmp,
-			  struct nf_conntrack_tuple_hash *,
-			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
-		struct nf_conn_help *help;
-		/* Remove from unconfirmed list */
-		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	list_for_each_entry(h, &nf_conntrack_hash[hash], list)
+		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				      &h->tuple))
+			goto out;
+	list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list)
+		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+				      &h->tuple))
+			goto out;
 
-		__nf_conntrack_hash_insert(ct, hash, repl_hash);
-		/* Timer relative to confirmation time, not original
-		   setting time, otherwise we'd get timer wrap in
-		   weird delay cases. */
-		ct->timeout.expires += jiffies;
-		add_timer(&ct->timeout);
-		atomic_inc(&ct->ct_general.use);
-		set_bit(IPS_CONFIRMED_BIT, &ct->status);
-		NF_CT_STAT_INC(insert);
-		write_unlock_bh(&nf_conntrack_lock);
-		help = nfct_help(ct);
-		if (help && help->helper)
-			nf_conntrack_event_cache(IPCT_HELPER, *pskb);
+	/* Remove from unconfirmed list */
+	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+
+	__nf_conntrack_hash_insert(ct, hash, repl_hash);
+	/* Timer relative to confirmation time, not original
+	   setting time, otherwise we'd get timer wrap in
+	   weird delay cases. */
+	ct->timeout.expires += jiffies;
+	add_timer(&ct->timeout);
+	atomic_inc(&ct->ct_general.use);
+	set_bit(IPS_CONFIRMED_BIT, &ct->status);
+	NF_CT_STAT_INC(insert);
+	write_unlock_bh(&nf_conntrack_lock);
+	help = nfct_help(ct);
+	if (help && help->helper)
+		nf_conntrack_event_cache(IPCT_HELPER, *pskb);
 #ifdef CONFIG_NF_NAT_NEEDED
-		if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
-		    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-			nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
+	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+		nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
 #endif
-		nf_conntrack_event_cache(master_ct(ct) ?
-					 IPCT_RELATED : IPCT_NEW, *pskb);
-		return NF_ACCEPT;
-	}
+	nf_conntrack_event_cache(master_ct(ct) ?
+				 IPCT_RELATED : IPCT_NEW, *pskb);
+	return NF_ACCEPT;
 
+out:
 	NF_CT_STAT_INC(insert_failed);
 	write_unlock_bh(&nf_conntrack_lock);
 	return NF_DROP;
@@ -777,24 +764,21 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
-{
-	return !(test_bit(IPS_ASSURED_BIT,
-			  &nf_ct_tuplehash_to_ctrack(i)->status));
-}
-
 static int early_drop(struct list_head *chain)
 {
 	/* Traverse backwards: gives us oldest, which is roughly LRU */
 	struct nf_conntrack_tuple_hash *h;
-	struct nf_conn *ct = NULL;
+	struct nf_conn *ct = NULL, *tmp;
 	int dropped = 0;
 
 	read_lock_bh(&nf_conntrack_lock);
-	h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *);
-	if (h) {
-		ct = nf_ct_tuplehash_to_ctrack(h);
-		atomic_inc(&ct->ct_general.use);
+	list_for_each_entry_reverse(h, chain, list) {
+		tmp = nf_ct_tuplehash_to_ctrack(h);
+		if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
+			ct = tmp;
+			atomic_inc(&ct->ct_general.use);
+			break;
+		}
 	}
 	read_unlock_bh(&nf_conntrack_lock);
 
@@ -810,18 +794,16 @@ static int early_drop(struct list_head *chain)
 	return dropped;
 }
 
-static inline int helper_cmp(const struct nf_conntrack_helper *i,
-			     const struct nf_conntrack_tuple *rtuple)
-{
-	return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
-}
-
 static struct nf_conntrack_helper *
 __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 {
-	return LIST_FIND(&helpers, helper_cmp,
-			 struct nf_conntrack_helper *,
-			 tuple);
+	struct nf_conntrack_helper *h;
+
+	list_for_each_entry(h, &helpers, list) {
+		if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
+			return h;
+	}
+	return NULL;
 }
 
 struct nf_conntrack_helper *
@@ -1323,7 +1305,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 		return ret;
 	}
 	write_lock_bh(&nf_conntrack_lock);
-	list_prepend(&helpers, me);
+	list_add(&me->list, &helpers);
 	write_unlock_bh(&nf_conntrack_lock);
 
 	return 0;
@@ -1342,8 +1324,8 @@ __nf_conntrack_helper_find_byname(const char *name)
 	return NULL;
 }
 
-static inline int unhelp(struct nf_conntrack_tuple_hash *i,
-			 const struct nf_conntrack_helper *me)
+static inline void unhelp(struct nf_conntrack_tuple_hash *i,
+			  const struct nf_conntrack_helper *me)
 {
 	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
 	struct nf_conn_help *help = nfct_help(ct);
@@ -1352,17 +1334,17 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 		nf_conntrack_event(IPCT_HELPER, ct);
 		help->helper = NULL;
 	}
-	return 0;
 }
 
 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 {
 	unsigned int i;
+	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp, *tmp;
 
 	/* Need write lock here, to delete helper. */
 	write_lock_bh(&nf_conntrack_lock);
-	LIST_DELETE(&helpers, me);
+	list_del(&me->list);
 
 	/* Get rid of expectations */
 	list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
@@ -1374,10 +1356,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	}
 
 	/* Get rid of expecteds, set helpers to NULL. */
-	LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
-	for (i = 0; i < nf_conntrack_htable_size; i++)
-		LIST_FIND_W(&nf_conntrack_hash[i], unhelp,
-			    struct nf_conntrack_tuple_hash *, me);
+	list_for_each_entry(h, &unconfirmed, list)
+		unhelp(h, me);
+	for (i = 0; i < nf_conntrack_htable_size; i++) {
+		list_for_each_entry(h, &nf_conntrack_hash[i], list)
+			unhelp(h, me);
+	}
 	write_unlock_bh(&nf_conntrack_lock);
 
 	/* Someone could be still looking at the helper in a bh. */
@@ -1510,37 +1494,40 @@ do_iter(const struct nf_conntrack_tuple_hash *i,
 }
 
 /* Bring out ya dead! */
-static struct nf_conntrack_tuple_hash *
+static struct nf_conn *
 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
 		void *data, unsigned int *bucket)
 {
-	struct nf_conntrack_tuple_hash *h = NULL;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
 
 	write_lock_bh(&nf_conntrack_lock);
 	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
-		h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter,
-				struct nf_conntrack_tuple_hash *, iter, data);
-		if (h)
-			break;
+		list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) {
+			ct = nf_ct_tuplehash_to_ctrack(h);
+			if (iter(ct, data))
+				goto found;
+		}
  	}
-	if (!h)
-		h = LIST_FIND_W(&unconfirmed, do_iter,
-				struct nf_conntrack_tuple_hash *, iter, data);
-	if (h)
-		atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
+	list_for_each_entry(h, &unconfirmed, list) {
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		if (iter(ct, data))
+			goto found;
+	}
+	return NULL;
+found:
+	atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
 	write_unlock_bh(&nf_conntrack_lock);
-
-	return h;
+	return ct;
 }
 
 void
 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
 {
-	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
 	unsigned int bucket = 0;
 
-	while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
-		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+	while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
 		/* Time to push up daises... */
 		if (del_timer(&ct->timeout))
 			death_by_timeout((unsigned long)ct);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9a1de0ca475b7..5954f67738105 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -37,7 +37,6 @@
 #include <net/netfilter/nf_conntrack_protocol.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8037ba63d5871..be7baf4f6846b 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -81,7 +81,7 @@ xt_unregister_target(struct xt_target *target)
 	int af = target->family;
 
 	mutex_lock(&xt[af].mutex);
-	LIST_DELETE(&xt[af].target, target);
+	list_del(&target->list);
 	mutex_unlock(&xt[af].mutex);
 }
 EXPORT_SYMBOL(xt_unregister_target);
@@ -138,7 +138,7 @@ xt_unregister_match(struct xt_match *match)
 	int af =  match->family;
 
 	mutex_lock(&xt[af].mutex);
-	LIST_DELETE(&xt[af].match, match);
+	list_del(&match->list);
 	mutex_unlock(&xt[af].mutex);
 }
 EXPORT_SYMBOL(xt_unregister_match);
@@ -575,15 +575,18 @@ int xt_register_table(struct xt_table *table,
 {
 	int ret;
 	struct xt_table_info *private;
+	struct xt_table *t;
 
 	ret = mutex_lock_interruptible(&xt[table->af].mutex);
 	if (ret != 0)
 		return ret;
 
 	/* Don't autoload: we'd eat our tail... */
-	if (list_named_find(&xt[table->af].tables, table->name)) {
-		ret = -EEXIST;
-		goto unlock;
+	list_for_each_entry(t, &xt[table->af].tables, list) {
+		if (strcmp(t->name, table->name) == 0) {
+			ret = -EEXIST;
+			goto unlock;
+		}
 	}
 
 	/* Simplifies replace_table code. */
@@ -598,7 +601,7 @@ int xt_register_table(struct xt_table *table,
 	/* save number of initial entries */
 	private->initial_entries = private->number;
 
-	list_prepend(&xt[table->af].tables, table);
+	list_add(&table->list, &xt[table->af].tables);
 
 	ret = 0;
  unlock:
@@ -613,7 +616,7 @@ void *xt_unregister_table(struct xt_table *table)
 
 	mutex_lock(&xt[table->af].mutex);
 	private = table->private;
-	LIST_DELETE(&xt[table->af].tables, table);
+	list_del(&table->list);
 	mutex_unlock(&xt[table->af].mutex);
 
 	return private;
-- 
GitLab


From 50b9f1d509eb998db73cd769c9511186474f566e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 11:58:17 -0700
Subject: [PATCH 0691/1063] [NETFILTER]: xt_conntrack: clean up overly long
 lines

Also fix some whitespace errors and use the NAT bits instead of deriving
the state manually.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_conntrack.c | 179 +++++++++++++++++++----------------
 1 file changed, 98 insertions(+), 81 deletions(-)

diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 39c57e9f75635..0ea501a2fda5b 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -45,7 +45,7 @@ match(const struct sk_buff *skb,
 
 	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
 
-#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
+#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg))
 
 	if (ct == &ip_conntrack_untracked)
 		statebit = XT_CONNTRACK_STATE_UNTRACKED;
@@ -54,63 +54,72 @@ match(const struct sk_buff *skb,
  	else
  		statebit = XT_CONNTRACK_STATE_INVALID;
  
-	if(sinfo->flags & XT_CONNTRACK_STATE) {
+	if (sinfo->flags & XT_CONNTRACK_STATE) {
 		if (ct) {
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
+			if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
 				statebit |= XT_CONNTRACK_STATE_SNAT;
-
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
+			if (test_bit(IPS_DST_NAT_BIT, &ct->status))
 				statebit |= XT_CONNTRACK_STATE_DNAT;
 		}
-
-		if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE))
+		if (FWINV((statebit & sinfo->statemask) == 0,
+			  XT_CONNTRACK_STATE))
 			return 0;
 	}
 
-	if(sinfo->flags & XT_CONNTRACK_PROTO) {
-		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO))
-                	return 0;
-	}
-
-	if(sinfo->flags & XT_CONNTRACK_ORIGSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC))
+	if (ct == NULL) {
+		if (sinfo->flags & ~XT_CONNTRACK_STATE)
 			return 0;
+		return 1;
 	}
 
-	if(sinfo->flags & XT_CONNTRACK_ORIGDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST))
-			return 0;
-	}
-
-	if(sinfo->flags & XT_CONNTRACK_REPLSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC))
-			return 0;
-	}
+	if (sinfo->flags & XT_CONNTRACK_PROTO &&
+	    FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
+		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
+		  XT_CONNTRACK_PROTO))
+                return 0;
+
+	if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip &
+		   sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
+		  XT_CONNTRACK_ORIGSRC))
+		return 0;
 
-	if(sinfo->flags & XT_CONNTRACK_REPLDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST))
-			return 0;
-	}
+	if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip &
+		   sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
+		  XT_CONNTRACK_ORIGDST))
+		return 0;
 
-	if(sinfo->flags & XT_CONNTRACK_STATUS) {
-		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS))
-			return 0;
-	}
+	if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip &
+		   sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
+		  XT_CONNTRACK_REPLSRC))
+		return 0;
 
-	if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
-		unsigned long expires;
+	if (sinfo->flags & XT_CONNTRACK_REPLDST &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip &
+		   sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
+		  XT_CONNTRACK_REPLDST))
+		return 0;
 
-		if(!ct)
-			return 0;
+	if (sinfo->flags & XT_CONNTRACK_STATUS &&
+	    FWINV((ct->status & sinfo->statusmask) == 0,
+		  XT_CONNTRACK_STATUS))
+		return 0;
 
-		expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
+	if (sinfo->flags & XT_CONNTRACK_EXPIRES) {
+		unsigned long expires = timer_pending(&ct->timeout) ?
+					(ct->timeout.expires - jiffies)/HZ : 0;
 
-		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES))
+		if (FWINV(!(expires >= sinfo->expires_min &&
+			    expires <= sinfo->expires_max),
+			  XT_CONNTRACK_EXPIRES))
 			return 0;
 	}
-
 	return 1;
 }
 
@@ -141,63 +150,72 @@ match(const struct sk_buff *skb,
  	else
  		statebit = XT_CONNTRACK_STATE_INVALID;
  
-	if(sinfo->flags & XT_CONNTRACK_STATE) {
+	if (sinfo->flags & XT_CONNTRACK_STATE) {
 		if (ct) {
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
+			if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
 				statebit |= XT_CONNTRACK_STATE_SNAT;
-
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
+			if (test_bit(IPS_DST_NAT_BIT, &ct->status))
 				statebit |= XT_CONNTRACK_STATE_DNAT;
 		}
-
-		if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE))
+		if (FWINV((statebit & sinfo->statemask) == 0,
+			  XT_CONNTRACK_STATE))
 			return 0;
 	}
 
-	if(sinfo->flags & XT_CONNTRACK_PROTO) {
-		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO))
-                	return 0;
-	}
-
-	if(sinfo->flags & XT_CONNTRACK_ORIGSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC))
+	if (ct == NULL) {
+		if (sinfo->flags & ~XT_CONNTRACK_STATE)
 			return 0;
+		return 1;
 	}
 
-	if(sinfo->flags & XT_CONNTRACK_ORIGDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST))
-			return 0;
-	}
-
-	if(sinfo->flags & XT_CONNTRACK_REPLSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC))
-			return 0;
-	}
+	if (sinfo->flags & XT_CONNTRACK_PROTO &&
+	    FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
+	    	  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
+		  XT_CONNTRACK_PROTO))
+                return 0;
+
+	if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip &
+	    	   sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
+		  XT_CONNTRACK_ORIGSRC))
+		return 0;
 
-	if(sinfo->flags & XT_CONNTRACK_REPLDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST))
-			return 0;
-	}
+	if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip &
+	    	   sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
+		  XT_CONNTRACK_ORIGDST))
+		return 0;
 
-	if(sinfo->flags & XT_CONNTRACK_STATUS) {
-		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS))
-			return 0;
-	}
+	if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip &
+	    	   sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
+		  XT_CONNTRACK_REPLSRC))
+		return 0;
 
-	if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
-		unsigned long expires;
+	if (sinfo->flags & XT_CONNTRACK_REPLDST &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip &
+	    	   sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
+		  XT_CONNTRACK_REPLDST))
+		return 0;
 
-		if(!ct)
-			return 0;
+	if (sinfo->flags & XT_CONNTRACK_STATUS &&
+	    FWINV((ct->status & sinfo->statusmask) == 0,
+	    	  XT_CONNTRACK_STATUS))
+		return 0;
 
-		expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
+	if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
+		unsigned long expires = timer_pending(&ct->timeout) ?
+					(ct->timeout.expires - jiffies)/HZ : 0;
 
-		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES))
+		if (FWINV(!(expires >= sinfo->expires_min &&
+			    expires <= sinfo->expires_max),
+			  XT_CONNTRACK_EXPIRES))
 			return 0;
 	}
-
 	return 1;
 }
 
@@ -220,8 +238,7 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static void
-destroy(const struct xt_match *match, void *matchinfo)
+static void destroy(const struct xt_match *match, void *matchinfo)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_ct_l3proto_module_put(match->family);
-- 
GitLab


From 68e1f188de535865d4543bae92d168c007857e7b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 11:58:35 -0700
Subject: [PATCH 0692/1063] [NETFILTER]: ipt_TCPMSS: reformat

- fix whitespace error
- break lines at 80 characters
- reformat some expressions to be more readable

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_TCPMSS.c | 58 ++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 27 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index ac8a35eeea3f7..bfc8d9c7d020c 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -31,8 +31,10 @@ static inline unsigned int
 optlen(const u_int8_t *opt, unsigned int offset)
 {
 	/* Beware zero-length options: make finite progress */
-	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1;
-	else return opt[offset+1];
+	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
+		return 1;
+	else
+		return opt[offset+1];
 }
 
 static unsigned int
@@ -55,7 +57,6 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 
 	iph = (*pskb)->nh.iph;
 	tcplen = (*pskb)->len - iph->ihl*4;
-
 	tcph = (void *)iph + iph->ihl*4;
 
 	/* Since it passed flags test in tcp match, we know it is is
@@ -71,37 +72,39 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 		return NF_DROP;
 	}
 
-	if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
-		if(!(*pskb)->dst) {
+	if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
+		if (!(*pskb)->dst) {
 			if (net_ratelimit())
-				printk(KERN_ERR
-			       		"ipt_tcpmss_target: no dst?! can't determine path-MTU\n");
+				printk(KERN_ERR "ipt_tcpmss_target: "
+				       "no dst?! can't determine path-MTU\n");
 			return NF_DROP; /* or IPT_CONTINUE ?? */
 		}
 
-		if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) {
+		if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) +
+					     sizeof(struct tcphdr)) {
 			if (net_ratelimit())
-				printk(KERN_ERR
-		       			"ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst));
+				printk(KERN_ERR "ipt_tcpmss_target: "
+				       "unknown or invalid path-MTU (%d)\n",
+				       dst_mtu((*pskb)->dst));
 			return NF_DROP; /* or IPT_CONTINUE ?? */
 		}
 
-		newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr);
+		newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) -
+						 sizeof(struct tcphdr);
 	} else
 		newmss = tcpmssinfo->mss;
 
  	opt = (u_int8_t *)tcph;
-	for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){
-		if ((opt[i] == TCPOPT_MSS) &&
-		    ((tcph->doff*4 - i) >= TCPOLEN_MSS) &&
-		    (opt[i+1] == TCPOLEN_MSS)) {
+	for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
+		if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
+		    opt[i+1] == TCPOLEN_MSS) {
 			u_int16_t oldmss;
 
 			oldmss = (opt[i+2] << 8) | opt[i+3];
 
-			if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) &&
-				(oldmss <= newmss))
-					return IPT_CONTINUE;
+			if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
+			    oldmss <= newmss)
+				return IPT_CONTINUE;
 
 			opt[i+2] = (newmss & 0xff00) >> 8;
 			opt[i+3] = (newmss & 0x00ff);
@@ -113,7 +116,7 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 
 			DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
 			       "->%u.%u.%u.%u:%hu changed TCP MSS option"
-			       " (from %u to %u)\n", 
+			       " (from %u to %u)\n",
 			       NIPQUAD((*pskb)->nh.iph->saddr),
 			       ntohs(tcph->source),
 			       NIPQUAD((*pskb)->nh.iph->daddr),
@@ -193,9 +196,9 @@ static inline int find_syn_match(const struct ipt_entry_match *m)
 {
 	const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data;
 
-	if (strcmp(m->u.kernel.match->name, "tcp") == 0
-	    && (tcpinfo->flg_cmp & TH_SYN)
-	    && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
+	if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
+	    tcpinfo->flg_cmp & TH_SYN &&
+	    !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
 		return 1;
 
 	return 0;
@@ -212,11 +215,12 @@ ipt_tcpmss_checkentry(const char *tablename,
 	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
 	const struct ipt_entry *e = e_void;
 
-	if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && 
-			((hook_mask & ~((1 << NF_IP_FORWARD)
-			   	| (1 << NF_IP_LOCAL_OUT)
-			   	| (1 << NF_IP_POST_ROUTING))) != 0)) {
-		printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+	if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
+	    (hook_mask & ~((1 << NF_IP_FORWARD) |
+			   (1 << NF_IP_LOCAL_OUT) |
+			   (1 << NF_IP_POST_ROUTING))) != 0) {
+		printk("TCPMSS: path-MTU clamping only supported in "
+		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return 0;
 	}
 
-- 
GitLab


From 2be344c4461d29b99113f62fa91c5ceab9997329 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 11:58:50 -0700
Subject: [PATCH 0693/1063] [NETFILTER]: ipt_TCPMSS: remove impossible
 condition

Every skb must have a dst_entry at this point.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_TCPMSS.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index bfc8d9c7d020c..b2d3c4f992d19 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -73,13 +73,6 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	}
 
 	if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
-		if (!(*pskb)->dst) {
-			if (net_ratelimit())
-				printk(KERN_ERR "ipt_tcpmss_target: "
-				       "no dst?! can't determine path-MTU\n");
-			return NF_DROP; /* or IPT_CONTINUE ?? */
-		}
-
 		if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) +
 					     sizeof(struct tcphdr)) {
 			if (net_ratelimit())
-- 
GitLab


From ecb70c95c45ece0935b076295388267f6d8db65c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 11:59:06 -0700
Subject: [PATCH 0694/1063] [NETFILTER]: ipt_TCPMSS: misc cleanup

- remove debugging cruft
- remove printk for reallocation failures
- remove unused addition

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_TCPMSS.c | 36 ++-------------------------------
 1 file changed, 2 insertions(+), 34 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index b2d3c4f992d19..4246c4321e5bb 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -21,12 +21,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables TCP MSS modification module");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static inline unsigned int
 optlen(const u_int8_t *opt, unsigned int offset)
 {
@@ -106,16 +100,7 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 							   htons(oldmss)^0xFFFF,
 							   htons(newmss),
 							   tcph->check, 0);
-
-			DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
-			       "->%u.%u.%u.%u:%hu changed TCP MSS option"
-			       " (from %u to %u)\n",
-			       NIPQUAD((*pskb)->nh.iph->saddr),
-			       ntohs(tcph->source),
-			       NIPQUAD((*pskb)->nh.iph->daddr),
-			       ntohs(tcph->dest),
-			       oldmss, newmss);
-			goto retmodified;
+			return IPT_CONTINUE;
 		}
 	}
 
@@ -127,13 +112,8 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 
 		newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
 					 TCPOLEN_MSS, GFP_ATOMIC);
-		if (!newskb) {
-			if (net_ratelimit())
-				printk(KERN_ERR "ipt_tcpmss_target:"
-				       " unable to allocate larger skb\n");
+		if (!newskb)
 			return NF_DROP;
-		}
-
 		kfree_skb(*pskb);
 		*pskb = newskb;
 		iph = (*pskb)->nh.iph;
@@ -149,8 +129,6 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 					   htons(tcplen) ^ 0xFFFF,
 				           htons(tcplen + TCPOLEN_MSS),
 					   tcph->check, 1);
-	tcplen += TCPOLEN_MSS;
-
 	opt[0] = TCPOPT_MSS;
 	opt[1] = TCPOLEN_MSS;
 	opt[2] = (newmss & 0xff00) >> 8;
@@ -170,16 +148,6 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF,
 				    newtotlen, iph->check);
 	iph->tot_len = newtotlen;
-
-	DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
-	       "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n",
-	       NIPQUAD((*pskb)->nh.iph->saddr),
-	       ntohs(tcph->source),
-	       NIPQUAD((*pskb)->nh.iph->daddr),
-	       ntohs(tcph->dest),
-	       newmss);
-
- retmodified:
 	return IPT_CONTINUE;
 }
 
-- 
GitLab


From 57dab5d0bfee21663ed20222b4cedeb0655ba1f3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 11:59:25 -0700
Subject: [PATCH 0695/1063] [NETFILTER]: xt_limit: don't reset state on
 unrelated rule updates

The limit match reinitializes its state whenever the ruleset changes,
which means it will forget about previously used credits.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_limit.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index b9c9ff3a06ea1..8bfcbdfa8783c 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -122,16 +122,16 @@ ipt_limit_checkentry(const char *tablename,
 		return 0;
 	}
 
-	/* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
-	   128. */
-	r->prev = jiffies;
-	r->credit = user2credits(r->avg * r->burst);	 /* Credits full. */
-	r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
-	r->cost = user2credits(r->avg);
-
 	/* For SMP, we only want to use one set of counters. */
 	r->master = r;
-
+	if (r->cost == 0) {
+		/* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
+		   128. */
+		r->prev = jiffies;
+		r->credit = user2credits(r->avg * r->burst);	 /* Credits full. */
+		r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
+		r->cost = user2credits(r->avg);
+	}
 	return 1;
 }
 
-- 
GitLab


From 9123de2c043996050bacf77031cad845f5976f5d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 11:59:42 -0700
Subject: [PATCH 0696/1063] [NETFILTER]: ip6table_mangle: reroute when nfmark
 changes in NF_IP6_LOCAL_OUT

Now that IPv6 supports policy routing we need to reroute in NF_IP6_LOCAL_OUT
when the mark value changes.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv6.h       | 1 +
 include/net/ip6_route.h              | 2 --
 net/ipv6/netfilter/ip6table_mangle.c | 8 ++------
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 52a7b9e76428c..d97e268cdfe50 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -73,6 +73,7 @@ enum nf_ip6_hook_priorities {
 };
 
 #ifdef CONFIG_NETFILTER
+extern int ip6_route_me_harder(struct sk_buff *skb);
 extern unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
 				    unsigned int dataoff, u_int8_t protocol);
 
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2979095700419..6ca6b71dfe0f0 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -57,8 +57,6 @@ extern void			ip6_route_input(struct sk_buff *skb);
 extern struct dst_entry *	ip6_route_output(struct sock *sk,
 						 struct flowi *fl);
 
-extern int			ip6_route_me_harder(struct sk_buff *skb);
-
 extern void			ip6_route_init(void);
 extern void			ip6_route_cleanup(void);
 
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 32db04fd83101..386ea260e7678 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -180,12 +180,8 @@ ip6t_local_hook(unsigned int hook,
 		&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
 		    || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
 		    || (*pskb)->nfmark != nfmark
-		    || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) {
-
-		/* something which could affect routing has changed */
-
-		DEBUGP("ip6table_mangle: we'd need to re-route a packet\n");
-	}
+		    || (*pskb)->nh.ipv6h->hop_limit != hop_limit))
+		return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
 
 	return ret;
 }
-- 
GitLab


From 90d47db4a06f93f7339618b2a4f0cb032ef8d6d5 Mon Sep 17 00:00:00 2001
From: Dmitry Mishin <dim@openvz.org>
Date: Wed, 20 Sep 2006 12:00:21 -0700
Subject: [PATCH 0697/1063] [NETFILTER]: x_tables: small check_entry &
 module_refcount cleanup

While standard_target has target->me == NULL, module_put() should be
called for it as for others, because there were try_module_get() before.

Signed-off-by: Dmitry Mishin <dim@openvz.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 2 +-
 net/ipv4/netfilter/ip_tables.c  | 2 +-
 net/ipv6/netfilter/ip6_tables.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index aaeaa9ce0f28d..85f0d73ebfb4c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -485,7 +485,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
 	if (t->u.kernel.target == &arpt_standard_target) {
 		if (!standard_check(t, size)) {
 			ret = -EINVAL;
-			goto out;
+			goto err;
 		}
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a0f36806998c5..38e1e4fba0db0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -573,7 +573,7 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 	if (t->u.kernel.target == &ipt_standard_target) {
 		if (!standard_check(t, size)) {
 			ret = -EINVAL;
-			goto cleanup_matches;
+			goto err;
 		}
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 73d477ce216b3..4ab368fa0b8f0 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -610,7 +610,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 	if (t->u.kernel.target == &ip6t_standard_target) {
 		if (!standard_check(t, size)) {
 			ret = -EINVAL;
-			goto cleanup_matches;
+			goto err;
 		}
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
-- 
GitLab


From 01f348484dd8509254d045e3ad49029716eca6a1 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 20 Sep 2006 12:00:45 -0700
Subject: [PATCH 0698/1063] [NETFILTER]: ctnetlink: simplify the code to dump
 the conntrack table

Merge the bits to dump the conntrack table and the ones to dump and
zero counters in a single piece of code. This patch does not change
the default behaviour if accounting is not enabled.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_netlink.c | 63 ++++-----------------
 net/netfilter/nf_conntrack_netlink.c      | 67 ++++-------------------
 2 files changed, 20 insertions(+), 110 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index a20b0e385f1b9..52eddea27e937 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -436,6 +436,11 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 				cb->args[1] = (unsigned long)ct;
 				goto out;
 			}
+#ifdef CONFIG_NF_CT_ACCT
+			if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
+						IPCTNL_MSG_CT_GET_CTRZERO)
+				memset(&ct->counters, 0, sizeof(ct->counters));
+#endif
 		}
 		if (cb->args[1]) {
 			cb->args[1] = 0;
@@ -451,46 +456,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-#ifdef CONFIG_IP_NF_CT_ACCT
-static int
-ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct ip_conntrack *ct = NULL;
-	struct ip_conntrack_tuple_hash *h;
-	struct list_head *i;
-	u_int32_t *id = (u_int32_t *) &cb->args[1];
-
-	DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, 
-			cb->args[0], *id);
-
-	write_lock_bh(&ip_conntrack_lock);
-	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
-			h = (struct ip_conntrack_tuple_hash *) i;
-			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
-				continue;
-			ct = tuplehash_to_ctrack(h);
-			if (ct->id <= *id)
-				continue;
-			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
-		                        	cb->nlh->nlmsg_seq,
-						IPCTNL_MSG_CT_NEW,
-						1, ct) < 0)
-				goto out;
-			*id = ct->id;
-
-			memset(&ct->counters, 0, sizeof(ct->counters));
-		}
-	}
-out:	
-	write_unlock_bh(&ip_conntrack_lock);
-
-	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
-	return skb->len;
-}
-#endif
-
 static const size_t cta_min_ip[CTA_IP_MAX] = {
 	[CTA_IP_V4_SRC-1]	= sizeof(u_int32_t),
 	[CTA_IP_V4_DST-1]	= sizeof(u_int32_t),
@@ -775,22 +740,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		if (msg->nfgen_family != AF_INET)
 			return -EAFNOSUPPORT;
 
-		if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
-					IPCTNL_MSG_CT_GET_CTRZERO) {
-#ifdef CONFIG_IP_NF_CT_ACCT
-			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_dump_table_w,
-						ctnetlink_done)) != 0)
-				return -EINVAL;
-#else
+#ifndef CONFIG_IP_NF_CT_ACCT
+		if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
 			return -ENOTSUPP;
 #endif
-		} else {
-			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-		      		                        ctnetlink_dump_table,
-		                                	ctnetlink_done)) != 0)
+		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+	      		                        ctnetlink_dump_table,
+	                                	ctnetlink_done)) != 0)
 			return -EINVAL;
-		}
 
 		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
 		if (rlen > skb->len)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 8cd85cfd9a02c..1721f7c78c77b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -455,6 +455,11 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 				cb->args[1] = (unsigned long)ct;
 				goto out;
 			}
+#ifdef CONFIG_NF_CT_ACCT
+			if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
+						IPCTNL_MSG_CT_GET_CTRZERO)
+				memset(&ct->counters, 0, sizeof(ct->counters));
+#endif
 		}
 		if (cb->args[1]) {
 			cb->args[1] = 0;
@@ -470,50 +475,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-#ifdef CONFIG_NF_CT_ACCT
-static int
-ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct nf_conn *ct = NULL;
-	struct nf_conntrack_tuple_hash *h;
-	struct list_head *i;
-	u_int32_t *id = (u_int32_t *) &cb->args[1];
-	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
-	u_int8_t l3proto = nfmsg->nfgen_family;	
-
-	DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, 
-			cb->args[0], *id);
-
-	write_lock_bh(&nf_conntrack_lock);
-	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
-		list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
-			h = (struct nf_conntrack_tuple_hash *) i;
-			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
-				continue;
-			ct = nf_ct_tuplehash_to_ctrack(h);
-			if (l3proto && L3PROTO(ct) != l3proto)
-				continue;
-			if (ct->id <= *id)
-				continue;
-			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
-		                        	cb->nlh->nlmsg_seq,
-						IPCTNL_MSG_CT_NEW,
-						1, ct) < 0)
-				goto out;
-			*id = ct->id;
-
-			memset(&ct->counters, 0, sizeof(ct->counters));
-		}
-	}
-out:	
-	write_unlock_bh(&nf_conntrack_lock);
-
-	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
-	return skb->len;
-}
-#endif
-
 static inline int
 ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple)
 {
@@ -788,22 +749,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		u32 rlen;
 
-		if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
-					IPCTNL_MSG_CT_GET_CTRZERO) {
-#ifdef CONFIG_NF_CT_ACCT
-			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_dump_table_w,
-						ctnetlink_done)) != 0)
-				return -EINVAL;
-#else
+#ifndef CONFIG_NF_CT_ACCT
+		if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
 			return -ENOTSUPP;
 #endif
-		} else {
-			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-		      		                        ctnetlink_dump_table,
-		                                	ctnetlink_done)) != 0)
+		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+						ctnetlink_dump_table,
+						ctnetlink_done)) != 0)
 			return -EINVAL;
-		}
 
 		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
 		if (rlen > skb->len)
-- 
GitLab


From 5251e2d2125407bbff0c39394a4011be9ed8b5d0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 20 Sep 2006 12:01:06 -0700
Subject: [PATCH 0699/1063] [NETFILTER]: conntrack: fix race condition in
 early_drop

On SMP environments the maximum number of conntracks can be overpassed
under heavy stress situations due to an existing race condition.

        CPU A                   CPU B
     atomic_read()               ...
     early_drop()                ...
        ...                  atomic_read()
   allocate conntrack      allocate conntrack
     atomic_inc()             atomic_inc()

This patch moves the counter incrementation before the early drop stage.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_core.c |  9 ++++++---
 net/netfilter/nf_conntrack_core.c      | 10 ++++++++--
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 2568d480e9a92..422a662194cc4 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -622,11 +622,15 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
 		ip_conntrack_hash_rnd_initted = 1;
 	}
 
+	/* We don't want any race condition at early drop stage */
+	atomic_inc(&ip_conntrack_count);
+
 	if (ip_conntrack_max
-	    && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+	    && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
 		unsigned int hash = hash_conntrack(orig);
 		/* Try dropping from this hash chain. */
 		if (!early_drop(&ip_conntrack_hash[hash])) {
+			atomic_dec(&ip_conntrack_count);
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "ip_conntrack: table full, dropping"
@@ -638,6 +642,7 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
 	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
 	if (!conntrack) {
 		DEBUGP("Can't allocate conntrack.\n");
+		atomic_dec(&ip_conntrack_count);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -651,8 +656,6 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
 	conntrack->timeout.data = (unsigned long)conntrack;
 	conntrack->timeout.function = death_by_timeout;
 
-	atomic_inc(&ip_conntrack_count);
-
 	return conntrack;
 }
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 927137b8b3b5d..adeafa2cc339e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -848,11 +848,15 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 		nf_conntrack_hash_rnd_initted = 1;
 	}
 
+	/* We don't want any race condition at early drop stage */
+	atomic_inc(&nf_conntrack_count);
+
 	if (nf_conntrack_max
-	    && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
+	    && atomic_read(&nf_conntrack_count) > nf_conntrack_max) {
 		unsigned int hash = hash_conntrack(orig);
 		/* Try dropping from this hash chain. */
 		if (!early_drop(&nf_conntrack_hash[hash])) {
+			atomic_dec(&nf_conntrack_count);
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "nf_conntrack: table full, dropping"
@@ -903,10 +907,12 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	init_timer(&conntrack->timeout);
 	conntrack->timeout.data = (unsigned long)conntrack;
 	conntrack->timeout.function = death_by_timeout;
+	read_unlock_bh(&nf_ct_cache_lock);
 
-	atomic_inc(&nf_conntrack_count);
+	return conntrack;
 out:
 	read_unlock_bh(&nf_ct_cache_lock);
+	atomic_dec(&nf_conntrack_count);
 	return conntrack;
 }
 
-- 
GitLab


From ca39df6cdfbe2ea210e31117f5d469576cfe9008 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:01:34 -0700
Subject: [PATCH 0700/1063] [NETFILTER]: ipt_TTL: fix checksum update bug

Fix regression introduced by the incremental checksum patches.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_TTL.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 214d9d9c428f9..96e79cc6d0f23 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -54,8 +54,8 @@ ipt_ttl_target(struct sk_buff **pskb,
 	}
 
 	if (new_ttl != iph->ttl) {
-		iph->check = nf_csum_update((iph->ttl << 8) ^ 0xFFFF,
-					    new_ttl << 8,
+		iph->check = nf_csum_update(ntohs((iph->ttl << 8)) ^ 0xFFFF,
+					    ntohs(new_ttl << 8),
 					    iph->check);
 		iph->ttl = new_ttl;
 	}
-- 
GitLab


From 7cf73936fe6bb9b027b75fd8fa3c634fe74843d3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:02:21 -0700
Subject: [PATCH 0701/1063] [NETFILTER]: ip6t_HL: remove write-only variable

Noticed by Alexey Dobriyan <adobriyan@gmail.com>

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6t_HL.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index e54ea92d107b7..435750f664dd1 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -26,7 +26,6 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
 {
 	struct ipv6hdr *ip6h;
 	const struct ip6t_HL_info *info = targinfo;
-	u_int16_t diffs[2];
 	int new_hl;
 
 	if (!skb_make_writable(pskb, (*pskb)->len))
@@ -53,11 +52,8 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
 			break;
 	}
 
-	if (new_hl != ip6h->hop_limit) {
-		diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF;
+	if (new_hl != ip6h->hop_limit)
 		ip6h->hop_limit = new_hl;
-		diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8);
-	}
 
 	return IP6T_CONTINUE;
 }
-- 
GitLab


From 71cd83a8bde61612b277fd5bf91503ac1ad61e23 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 20 Sep 2006 12:02:44 -0700
Subject: [PATCH 0702/1063] [NETFILTER]: xt_policy: remove dups in .family

sparse "defined twice" warning

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_policy.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index e9d81378d6532..46bde2b1e1e0f 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -171,7 +171,6 @@ static struct xt_match xt_policy_match[] = {
 		.checkentry 	= checkentry,
 		.match		= match,
 		.matchsize	= sizeof(struct xt_policy_info),
-		.family		= AF_INET,
 		.me		= THIS_MODULE,
 	},
 	{
@@ -180,7 +179,6 @@ static struct xt_match xt_policy_match[] = {
 		.checkentry	= checkentry,
 		.match		= match,
 		.matchsize	= sizeof(struct xt_policy_info),
-		.family		= AF_INET6,
 		.me		= THIS_MODULE,
 	},
 };
-- 
GitLab


From c1fe3ca5106d9568791433fa6c7f27e71ac69e1b Mon Sep 17 00:00:00 2001
From: George Hansper <georgeh@anstat.com.au>
Date: Wed, 20 Sep 2006 12:03:23 -0700
Subject: [PATCH 0703/1063] [NETFILTER]: TCP conntrack: improve dead connection
 detection

Don't count window updates as retransmissions.

Signed-off-by: George Hansper <georgeh@anstat.com.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_tcp.h  | 1 +
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 4 +++-
 net/netfilter/nf_conntrack_proto_tcp.c      | 4 +++-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index b2feeffde3849..6b01ba2977270 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -49,6 +49,7 @@ struct ip_ct_tcp
 	u_int32_t	last_seq;	/* Last sequence number seen in dir */
 	u_int32_t	last_ack;	/* Last sequence number seen in opposite dir */
 	u_int32_t	last_end;	/* Last seq + len */
+	u_int16_t	last_win;	/* Last window advertisement seen in dir */
 };
 
 #endif /* __KERNEL__ */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 75a7237eb8c15..03ae9a04cb37c 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -731,13 +731,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 			if (state->last_dir == dir
 			    && state->last_seq == seq
 			    && state->last_ack == ack
-			    && state->last_end == end)
+			    && state->last_end == end
+			    && state->last_win == win)
 				state->retrans++;
 			else {
 				state->last_dir = dir;
 				state->last_seq = seq;
 				state->last_ack = ack;
 				state->last_end = end;
+				state->last_win = win;
 				state->retrans = 0;
 			}
 		}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9fc0ee61f92a4..238bbb5b72ef5 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -688,13 +688,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
 			if (state->last_dir == dir
 			    && state->last_seq == seq
 			    && state->last_ack == ack
-			    && state->last_end == end)
+			    && state->last_end == end
+			    && state->last_win == win)
 				state->retrans++;
 			else {
 				state->last_dir = dir;
 				state->last_seq = seq;
 				state->last_ack = ack;
 				state->last_end = end;
+				state->last_win = win;
 				state->retrans = 0;
 			}
 		}
-- 
GitLab


From 1192e403e9ea2dc23bbbe2b4fe9bdbc47e8c6056 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Wed, 20 Sep 2006 12:03:46 -0700
Subject: [PATCH 0704/1063] [NETFILTER]: make some netfilter globals
 __read_mostly

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_core.c |  6 +++---
 net/ipv4/netfilter/ip_queue.c          |  8 ++++----
 net/ipv6/netfilter/ip6_queue.c         |  8 ++++----
 net/netfilter/nf_conntrack_core.c      | 10 +++++-----
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 422a662194cc4..2b6f24fc727e0 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -63,17 +63,17 @@ atomic_t ip_conntrack_count = ATOMIC_INIT(0);
 
 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
 LIST_HEAD(ip_conntrack_expect_list);
-struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
 static LIST_HEAD(helpers);
 unsigned int ip_conntrack_htable_size __read_mostly = 0;
 int ip_conntrack_max __read_mostly;
-struct list_head *ip_conntrack_hash;
+struct list_head *ip_conntrack_hash __read_mostly;
 static kmem_cache_t *ip_conntrack_cachep __read_mostly;
 static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
 struct ip_conntrack ip_conntrack_untracked;
 unsigned int ip_ct_log_invalid __read_mostly;
 static LIST_HEAD(unconfirmed);
-static int ip_conntrack_vmalloc;
+static int ip_conntrack_vmalloc __read_mostly;
 
 static unsigned int ip_conntrack_next_id;
 static unsigned int ip_conntrack_expect_next_id;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 80060cbe4a07d..7edad790478a3 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -52,15 +52,15 @@ struct ipq_queue_entry {
 
 typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
 
-static unsigned char copy_mode = IPQ_COPY_NONE;
+static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
 static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
 static DEFINE_RWLOCK(queue_lock);
-static int peer_pid;
-static unsigned int copy_range;
+static int peer_pid __read_mostly;
+static unsigned int copy_range __read_mostly;
 static unsigned int queue_total;
 static unsigned int queue_dropped = 0;
 static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl;
+static struct sock *ipqnl __read_mostly;
 static LIST_HEAD(queue_list);
 static DEFINE_MUTEX(ipqnl_mutex);
 
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index d322e8395794c..9510c24ca8d22 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -56,15 +56,15 @@ struct ipq_queue_entry {
 
 typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
 
-static unsigned char copy_mode = IPQ_COPY_NONE;
+static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
 static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
 static DEFINE_RWLOCK(queue_lock);
-static int peer_pid;
-static unsigned int copy_range;
+static int peer_pid __read_mostly;
+static unsigned int copy_range __read_mostly;
 static unsigned int queue_total;
 static unsigned int queue_dropped = 0;
 static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl;
+static struct sock *ipqnl __read_mostly;
 static LIST_HEAD(queue_list);
 static DEFINE_MUTEX(ipqnl_mutex);
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index adeafa2cc339e..093b3ddc513c9 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -73,17 +73,17 @@ atomic_t nf_conntrack_count = ATOMIC_INIT(0);
 
 void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
 LIST_HEAD(nf_conntrack_expect_list);
-struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
-struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
+struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly;
+struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly;
 static LIST_HEAD(helpers);
 unsigned int nf_conntrack_htable_size __read_mostly = 0;
 int nf_conntrack_max __read_mostly;
-struct list_head *nf_conntrack_hash;
-static kmem_cache_t *nf_conntrack_expect_cachep;
+struct list_head *nf_conntrack_hash __read_mostly;
+static kmem_cache_t *nf_conntrack_expect_cachep __read_mostly;
 struct nf_conn nf_conntrack_untracked;
 unsigned int nf_ct_log_invalid __read_mostly;
 static LIST_HEAD(unconfirmed);
-static int nf_conntrack_vmalloc;
+static int nf_conntrack_vmalloc __read_mostly;
 
 static unsigned int nf_conntrack_next_id;
 static unsigned int nf_conntrack_expect_next_id;
-- 
GitLab


From bec71b162747708d4b45b0cd399b484f52f2901a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:04:08 -0700
Subject: [PATCH 0705/1063] [NETFILTER]: ip_tables: fix module refcount leaks
 in compat error paths

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 39 +++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 38e1e4fba0db0..3d5d4a4640c32 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1529,7 +1529,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 	ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
 			e->comefrom, &off, &j);
 	if (ret != 0)
-		goto out;
+		goto cleanup_matches;
 
 	t = ipt_get_target(e);
 	target = try_then_request_module(xt_find_target(AF_INET,
@@ -1539,7 +1539,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 	if (IS_ERR(target) || !target) {
 		duprintf("check_entry: `%s' not found\n", t->u.user.name);
 		ret = target ? PTR_ERR(target) : -ENOENT;
-		goto out;
+		goto cleanup_matches;
 	}
 	t->u.kernel.target = target;
 
@@ -1566,14 +1566,17 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 
 	(*i)++;
 	return 0;
+
 out:
+	module_put(t->u.kernel.target->me);
+cleanup_matches:
 	IPT_MATCH_ITERATE(e, cleanup_match, &j);
 	return ret;
 }
 
 static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
 	void **dstptr, compat_uint_t *size, const char *name,
-	const struct ipt_ip *ip, unsigned int hookmask)
+	const struct ipt_ip *ip, unsigned int hookmask, int *i)
 {
 	struct ipt_entry_match *dm;
 	struct ipt_match *match;
@@ -1590,16 +1593,22 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
 			     name, hookmask, ip->proto,
 			     ip->invflags & IPT_INV_PROTO);
 	if (ret)
-		return ret;
+		goto err;
 
 	if (m->u.kernel.match->checkentry
 	    && !m->u.kernel.match->checkentry(name, ip, match, dm->data,
 					      hookmask)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 m->u.kernel.match->name);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err;
 	}
+	(*i)++;
 	return 0;
+
+err:
+	module_put(m->u.kernel.match->me);
+	return ret;
 }
 
 static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
@@ -1610,18 +1619,19 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 	struct ipt_target *target;
 	struct ipt_entry *de;
 	unsigned int origsize;
-	int ret, h;
+	int ret, h, j;
 
 	ret = 0;
 	origsize = *size;
 	de = (struct ipt_entry *)*dstptr;
 	memcpy(de, e, sizeof(struct ipt_entry));
 
+	j = 0;
 	*dstptr += sizeof(struct compat_ipt_entry);
 	ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
-			name, &de->ip, de->comefrom);
+			name, &de->ip, de->comefrom, &j);
 	if (ret)
-		goto out;
+		goto cleanup_matches;
 	de->target_offset = e->target_offset - (origsize - *size);
 	t = ipt_get_target(e);
 	target = t->u.kernel.target;
@@ -1644,21 +1654,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 			      name, e->comefrom, e->ip.proto,
 			      e->ip.invflags & IPT_INV_PROTO);
 	if (ret)
-		goto out;
+		goto err;
 
 	ret = -EINVAL;
 	if (t->u.kernel.target == &ipt_standard_target) {
 		if (!standard_check(t, *size))
-			goto out;
+			goto err;
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, de, target,
 						      t->data, de->comefrom)) {
 		duprintf("ip_tables: compat: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
-		goto out;
+		goto err;
 	}
 	ret = 0;
-out:
+	return ret;
+
+err:
+	module_put(t->u.kernel.target->me);
+cleanup_matches:
+	IPT_MATCH_ITERATE(e, cleanup_match, &j);
 	return ret;
 }
 
-- 
GitLab


From 79030ed07de673e8451a03aecb9ada9f4d75d491 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:05:08 -0700
Subject: [PATCH 0706/1063] [NETFILTER]: ip_tables: revision support for compat
 code

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3d5d4a4640c32..673581db986eb 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1994,6 +1994,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
 	return ret;
 }
 
+static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
+
 static int
 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 {
@@ -2007,8 +2009,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		ret = compat_get_entries(user, len);
 		break;
 	default:
-		duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd);
-		ret = -EINVAL;
+		ret = do_ipt_get_ctl(sk, cmd, user, len);
 	}
 	return ret;
 }
-- 
GitLab


From 9fa492cdc160cd27ce1046cb36f47d3b2b1efa21 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:05:37 -0700
Subject: [PATCH 0707/1063] [NETFILTER]: x_tables: simplify compat API

Split the xt_compat_match/xt_compat_target into smaller type-safe functions
performing just one operation. Handle all alignment and size-related
conversions centrally in these function instead of requiring each module to
implement a full-blown conversion function. Replace ->compat callback by
->compat_from_user and ->compat_to_user callbacks, responsible for
converting just a single private structure.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h |  29 +++--
 net/ipv4/netfilter/ip_tables.c     | 115 ++++-------------
 net/netfilter/x_tables.c           | 192 ++++++++++++++++-------------
 3 files changed, 151 insertions(+), 185 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index c832295dbf619..739a98eebe2c2 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -138,12 +138,6 @@ struct xt_counters_info
 
 #include <linux/netdevice.h>
 
-#ifdef CONFIG_COMPAT
-#define COMPAT_TO_USER		1
-#define COMPAT_FROM_USER	-1
-#define COMPAT_CALC_SIZE	0
-#endif
-
 struct xt_match
 {
 	struct list_head list;
@@ -176,7 +170,8 @@ struct xt_match
 	void (*destroy)(const struct xt_match *match, void *matchinfo);
 
 	/* Called when userspace align differs from kernel space one */
-	int (*compat)(void *match, void **dstptr, int *size, int convert);
+	void (*compat_from_user)(void *dst, void *src);
+	int (*compat_to_user)(void __user *dst, void *src);
 
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
@@ -186,6 +181,7 @@ struct xt_match
 
 	char *table;
 	unsigned int matchsize;
+	unsigned int compatsize;
 	unsigned int hooks;
 	unsigned short proto;
 
@@ -224,13 +220,15 @@ struct xt_target
 	void (*destroy)(const struct xt_target *target, void *targinfo);
 
 	/* Called when userspace align differs from kernel space one */
-	int (*compat)(void *target, void **dstptr, int *size, int convert);
+	void (*compat_from_user)(void *dst, void *src);
+	int (*compat_to_user)(void __user *dst, void *src);
 
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 
 	char *table;
 	unsigned int targetsize;
+	unsigned int compatsize;
 	unsigned int hooks;
 	unsigned short proto;
 
@@ -387,9 +385,18 @@ struct compat_xt_counters_info
 
 extern void xt_compat_lock(int af);
 extern void xt_compat_unlock(int af);
-extern int xt_compat_match(void *match, void **dstptr, int *size, int convert);
-extern int xt_compat_target(void *target, void **dstptr, int *size,
-		int convert);
+
+extern int xt_compat_match_offset(struct xt_match *match);
+extern void xt_compat_match_from_user(struct xt_entry_match *m,
+				      void **dstptr, int *size);
+extern int xt_compat_match_to_user(struct xt_entry_match *m,
+				   void * __user *dstptr, int *size);
+
+extern int xt_compat_target_offset(struct xt_target *target);
+extern void xt_compat_target_from_user(struct xt_entry_target *t,
+				       void **dstptr, int *size);
+extern int xt_compat_target_to_user(struct xt_entry_target *t,
+				    void * __user *dstptr, int *size);
 
 #endif /* CONFIG_COMPAT */
 #endif /* __KERNEL__ */
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 673581db986eb..800067d69a9ac 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -942,73 +942,28 @@ static short compat_calc_jump(u_int16_t offset)
 	return delta;
 }
 
-struct compat_ipt_standard_target
+static void compat_standard_from_user(void *dst, void *src)
 {
-	struct compat_xt_entry_target target;
-	compat_int_t verdict;
-};
-
-struct compat_ipt_standard
-{
-	struct compat_ipt_entry entry;
-	struct compat_ipt_standard_target target;
-};
+	int v = *(compat_int_t *)src;
 
-#define IPT_ST_LEN		XT_ALIGN(sizeof(struct ipt_standard_target))
-#define IPT_ST_COMPAT_LEN	COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target))
-#define IPT_ST_OFFSET		(IPT_ST_LEN - IPT_ST_COMPAT_LEN)
+	if (v > 0)
+		v += compat_calc_jump(v);
+	memcpy(dst, &v, sizeof(v));
+}
 
-static int compat_ipt_standard_fn(void *target,
-		void **dstptr, int *size, int convert)
+static int compat_standard_to_user(void __user *dst, void *src)
 {
-	struct compat_ipt_standard_target compat_st, *pcompat_st;
-	struct ipt_standard_target st, *pst;
-	int ret;
+	compat_int_t cv = *(int *)src;
 
-	ret = 0;
-	switch (convert) {
-		case COMPAT_TO_USER:
-			pst = target;
-			memcpy(&compat_st.target, &pst->target,
-				sizeof(compat_st.target));
-			compat_st.verdict = pst->verdict;
-			if (compat_st.verdict > 0)
-				compat_st.verdict -=
-					compat_calc_jump(compat_st.verdict);
-			compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN;
-			if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN))
-				ret = -EFAULT;
-			*size -= IPT_ST_OFFSET;
-			*dstptr += IPT_ST_COMPAT_LEN;
-			break;
-		case COMPAT_FROM_USER:
-			pcompat_st = target;
-			memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN);
-			st.verdict = pcompat_st->verdict;
-			if (st.verdict > 0)
-				st.verdict += compat_calc_jump(st.verdict);
-			st.target.u.user.target_size = IPT_ST_LEN;
-			memcpy(*dstptr, &st, IPT_ST_LEN);
-			*size += IPT_ST_OFFSET;
-			*dstptr += IPT_ST_LEN;
-			break;
-		case COMPAT_CALC_SIZE:
-			*size += IPT_ST_OFFSET;
-			break;
-		default:
-			ret = -ENOPROTOOPT;
-			break;
-	}
-	return ret;
+	if (cv > 0)
+		cv -= compat_calc_jump(cv);
+	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
 }
 
 static inline int
 compat_calc_match(struct ipt_entry_match *m, int * size)
 {
-	if (m->u.kernel.match->compat)
-		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
-	else
-		xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
+	*size += xt_compat_match_offset(m->u.kernel.match);
 	return 0;
 }
 
@@ -1023,10 +978,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
 	entry_offset = (void *)e - base;
 	IPT_MATCH_ITERATE(e, compat_calc_match, &off);
 	t = ipt_get_target(e);
-	if (t->u.kernel.target->compat)
-		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
-	else
-		xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
+	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
 	ret = compat_add_offset(entry_offset, off);
 	if (ret)
@@ -1412,17 +1364,13 @@ struct compat_ipt_replace {
 };
 
 static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
-		void __user **dstptr, compat_uint_t *size)
+		void * __user *dstptr, compat_uint_t *size)
 {
-	if (m->u.kernel.match->compat)
-		return m->u.kernel.match->compat(m, dstptr, size,
-				COMPAT_TO_USER);
-	else
-		return xt_compat_match(m, dstptr, size, COMPAT_TO_USER);
+	return xt_compat_match_to_user(m, dstptr, size);
 }
 
 static int compat_copy_entry_to_user(struct ipt_entry *e,
-		void __user **dstptr, compat_uint_t *size)
+		void * __user *dstptr, compat_uint_t *size)
 {
 	struct ipt_entry_target __user *t;
 	struct compat_ipt_entry __user *ce;
@@ -1442,11 +1390,7 @@ static int compat_copy_entry_to_user(struct ipt_entry *e,
 	if (ret)
 		goto out;
 	t = ipt_get_target(e);
-	if (t->u.kernel.target->compat)
-		ret = t->u.kernel.target->compat(t, dstptr, size,
-				COMPAT_TO_USER);
-	else
-		ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER);
+	ret = xt_compat_target_to_user(t, dstptr, size);
 	if (ret)
 		goto out;
 	ret = -EFAULT;
@@ -1478,11 +1422,7 @@ compat_check_calc_match(struct ipt_entry_match *m,
 		return match ? PTR_ERR(match) : -ENOENT;
 	}
 	m->u.kernel.match = match;
-
-	if (m->u.kernel.match->compat)
-		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
-	else
-		xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
+	*size += xt_compat_match_offset(match);
 
 	(*i)++;
 	return 0;
@@ -1543,10 +1483,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 	}
 	t->u.kernel.target = target;
 
-	if (t->u.kernel.target->compat)
-		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
-	else
-		xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
+	off += xt_compat_target_offset(target);
 	*size += off;
 	ret = compat_add_offset(entry_offset, off);
 	if (ret)
@@ -1584,10 +1521,7 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
 
 	dm = (struct ipt_entry_match *)*dstptr;
 	match = m->u.kernel.match;
-	if (match->compat)
-		match->compat(m, dstptr, size, COMPAT_FROM_USER);
-	else
-		xt_compat_match(m, dstptr, size, COMPAT_FROM_USER);
+	xt_compat_match_from_user(m, dstptr, size);
 
 	ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm),
 			     name, hookmask, ip->proto,
@@ -1635,10 +1569,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 	de->target_offset = e->target_offset - (origsize - *size);
 	t = ipt_get_target(e);
 	target = t->u.kernel.target;
-	if (target->compat)
-		target->compat(t, dstptr, size, COMPAT_FROM_USER);
-	else
-		xt_compat_target(t, dstptr, size, COMPAT_FROM_USER);
+	xt_compat_target_from_user(t, dstptr, size);
 
 	de->next_offset = e->next_offset - (origsize - *size);
 	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
@@ -2205,7 +2136,9 @@ static struct ipt_target ipt_standard_target = {
 	.targetsize	= sizeof(int),
 	.family		= AF_INET,
 #ifdef CONFIG_COMPAT
-	.compat		= &compat_ipt_standard_fn,
+	.compatsize	= sizeof(compat_int_t),
+	.compat_from_user = compat_standard_from_user,
+	.compat_to_user	= compat_standard_to_user,
 #endif
 };
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index be7baf4f6846b..58522fc65d333 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -333,52 +333,65 @@ int xt_check_match(const struct xt_match *match, unsigned short family,
 EXPORT_SYMBOL_GPL(xt_check_match);
 
 #ifdef CONFIG_COMPAT
-int xt_compat_match(void *match, void **dstptr, int *size, int convert)
+int xt_compat_match_offset(struct xt_match *match)
 {
-	struct xt_match *m;
-	struct compat_xt_entry_match *pcompat_m;
-	struct xt_entry_match *pm;
-	u_int16_t msize;
-	int off, ret;
+	u_int16_t csize = match->compatsize ? : match->matchsize;
+	return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize);
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_offset);
 
-	ret = 0;
-	m = ((struct xt_entry_match *)match)->u.kernel.match;
-	off = XT_ALIGN(m->matchsize) - COMPAT_XT_ALIGN(m->matchsize);
-	switch (convert) {
-		case COMPAT_TO_USER:
-			pm = (struct xt_entry_match *)match;
-			msize = pm->u.user.match_size;
-			if (copy_to_user(*dstptr, pm, msize)) {
-				ret = -EFAULT;
-				break;
-			}
-			msize -= off;
-			if (put_user(msize, (u_int16_t *)*dstptr))
-				ret = -EFAULT;
-			*size -= off;
-			*dstptr += msize;
-			break;
-		case COMPAT_FROM_USER:
-			pcompat_m = (struct compat_xt_entry_match *)match;
-			pm = (struct xt_entry_match *)*dstptr;
-			msize = pcompat_m->u.user.match_size;
-			memcpy(pm, pcompat_m, msize);
-			msize += off;
-			pm->u.user.match_size = msize;
-			*size += off;
-			*dstptr += msize;
-			break;
-		case COMPAT_CALC_SIZE:
-			*size += off;
-			break;
-		default:
-			ret = -ENOPROTOOPT;
-			break;
+void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+			       int *size)
+{
+	struct xt_match *match = m->u.kernel.match;
+	struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
+	int pad, off = xt_compat_match_offset(match);
+	u_int16_t msize = cm->u.user.match_size;
+
+	m = *dstptr;
+	memcpy(m, cm, sizeof(*cm));
+	if (match->compat_from_user)
+		match->compat_from_user(m->data, cm->data);
+	else
+		memcpy(m->data, cm->data, msize - sizeof(*cm));
+	pad = XT_ALIGN(match->matchsize) - match->matchsize;
+	if (pad > 0)
+		memset(m->data + match->matchsize, 0, pad);
+
+	msize += off;
+	m->u.user.match_size = msize;
+
+	*size += off;
+	*dstptr += msize;
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
+
+int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
+			    int *size)
+{
+	struct xt_match *match = m->u.kernel.match;
+	struct compat_xt_entry_match __user *cm = *dstptr;
+	int off = xt_compat_match_offset(match);
+	u_int16_t msize = m->u.user.match_size - off;
+
+	if (copy_to_user(cm, m, sizeof(*cm)) ||
+	    put_user(msize, &cm->u.user.match_size))
+	    	return -EFAULT;
+
+	if (match->compat_to_user) {
+		if (match->compat_to_user((void __user *)cm->data, m->data))
+			return -EFAULT;
+	} else {
+		if (copy_to_user(cm->data, m->data, msize - sizeof(*cm)))
+			return -EFAULT;
 	}
-	return ret;
+
+	*size -= off;
+	*dstptr += msize;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(xt_compat_match);
-#endif
+EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
+#endif /* CONFIG_COMPAT */
 
 int xt_check_target(const struct xt_target *target, unsigned short family,
 		    unsigned int size, const char *table, unsigned int hook_mask,
@@ -410,51 +423,64 @@ int xt_check_target(const struct xt_target *target, unsigned short family,
 EXPORT_SYMBOL_GPL(xt_check_target);
 
 #ifdef CONFIG_COMPAT
-int xt_compat_target(void *target, void **dstptr, int *size, int convert)
+int xt_compat_target_offset(struct xt_target *target)
 {
-	struct xt_target *t;
-	struct compat_xt_entry_target *pcompat;
-	struct xt_entry_target *pt;
-	u_int16_t tsize;
-	int off, ret;
+	u_int16_t csize = target->compatsize ? : target->targetsize;
+	return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize);
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_offset);
 
-	ret = 0;
-	t = ((struct xt_entry_target *)target)->u.kernel.target;
-	off = XT_ALIGN(t->targetsize) - COMPAT_XT_ALIGN(t->targetsize);
-	switch (convert) {
-		case COMPAT_TO_USER:
-			pt = (struct xt_entry_target *)target;
-			tsize = pt->u.user.target_size;
-			if (copy_to_user(*dstptr, pt, tsize)) {
-				ret = -EFAULT;
-				break;
-			}
-			tsize -= off;
-			if (put_user(tsize, (u_int16_t *)*dstptr))
-				ret = -EFAULT;
-			*size -= off;
-			*dstptr += tsize;
-			break;
-		case COMPAT_FROM_USER:
-			pcompat = (struct compat_xt_entry_target *)target;
-			pt = (struct xt_entry_target *)*dstptr;
-			tsize = pcompat->u.user.target_size;
-			memcpy(pt, pcompat, tsize);
-			tsize += off;
-			pt->u.user.target_size = tsize;
-			*size += off;
-			*dstptr += tsize;
-			break;
-		case COMPAT_CALC_SIZE:
-			*size += off;
-			break;
-		default:
-			ret = -ENOPROTOOPT;
-			break;
+void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
+			        int *size)
+{
+	struct xt_target *target = t->u.kernel.target;
+	struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
+	int pad, off = xt_compat_target_offset(target);
+	u_int16_t tsize = ct->u.user.target_size;
+
+	t = *dstptr;
+	memcpy(t, ct, sizeof(*ct));
+	if (target->compat_from_user)
+		target->compat_from_user(t->data, ct->data);
+	else
+		memcpy(t->data, ct->data, tsize - sizeof(*ct));
+	pad = XT_ALIGN(target->targetsize) - target->targetsize;
+	if (pad > 0)
+		memset(t->data + target->targetsize, 0, pad);
+
+	tsize += off;
+	t->u.user.target_size = tsize;
+
+	*size += off;
+	*dstptr += tsize;
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
+
+int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr,
+			     int *size)
+{
+	struct xt_target *target = t->u.kernel.target;
+	struct compat_xt_entry_target __user *ct = *dstptr;
+	int off = xt_compat_target_offset(target);
+	u_int16_t tsize = t->u.user.target_size - off;
+
+	if (copy_to_user(ct, t, sizeof(*ct)) ||
+	    put_user(tsize, &ct->u.user.target_size))
+	    	return -EFAULT;
+
+	if (target->compat_to_user) {
+		if (target->compat_to_user((void __user *)ct->data, t->data))
+			return -EFAULT;
+	} else {
+		if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct)))
+			return -EFAULT;
 	}
-	return ret;
+
+	*size -= off;
+	*dstptr += tsize;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(xt_compat_target);
+EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
 #endif
 
 struct xt_table_info *xt_alloc_table_info(unsigned int size)
-- 
GitLab


From bc80b656657251fc936d2d93fc70d5566c1c7d29 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:05:54 -0700
Subject: [PATCH 0708/1063] [NETFILTER]: xt_mark: add compat conversion
 functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_mark.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index e8059cd172754..934dddfbcd23d 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -50,6 +50,37 @@ checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_xt_mark_info {
+	compat_ulong_t	mark, mask;
+	u_int8_t	invert;
+	u_int8_t	__pad1;
+	u_int16_t	__pad2;
+};
+
+static void compat_from_user(void *dst, void *src)
+{
+	struct compat_xt_mark_info *cm = src;
+	struct xt_mark_info m = {
+		.mark	= cm->mark,
+		.mask	= cm->mask,
+		.invert	= cm->invert,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	struct xt_mark_info *m = src;
+	struct compat_xt_mark_info cm = {
+		.mark	= m->mark,
+		.mask	= m->mask,
+		.invert	= m->invert,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
 static struct xt_match xt_mark_match[] = {
 	{
 		.name		= "mark",
@@ -57,6 +88,11 @@ static struct xt_match xt_mark_match[] = {
 		.checkentry	= checkentry,
 		.match		= match,
 		.matchsize	= sizeof(struct xt_mark_info),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_mark_info),
+		.compat_from_user = compat_from_user,
+		.compat_to_user	= compat_to_user,
+#endif
 		.me		= THIS_MODULE,
 	},
 	{
-- 
GitLab


From be7263b7b72ed9d5d25958f2b71e77e889e4845a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:06:10 -0700
Subject: [PATCH 0709/1063] [NETFILTER]: xt_MARK: add compat conversion
 functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_MARK.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 782f8d8c3edf3..c6e860a7114f7 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -108,6 +108,35 @@ checkentry_v1(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_xt_mark_target_info_v1 {
+	compat_ulong_t	mark;
+	u_int8_t	mode;
+	u_int8_t	__pad1;
+	u_int16_t	__pad2;
+};
+
+static void compat_from_user_v1(void *dst, void *src)
+{
+	struct compat_xt_mark_target_info_v1 *cm = src;
+	struct xt_mark_target_info_v1 m = {
+		.mark	= cm->mark,
+		.mode	= cm->mode,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user_v1(void __user *dst, void *src)
+{
+	struct xt_mark_target_info_v1 *m = src;
+	struct compat_xt_mark_target_info_v1 cm = {
+		.mark	= m->mark,
+		.mode	= m->mode,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
 static struct xt_target xt_mark_target[] = {
 	{
 		.name		= "MARK",
@@ -126,6 +155,11 @@ static struct xt_target xt_mark_target[] = {
 		.checkentry	= checkentry_v1,
 		.target		= target_v1,
 		.targetsize	= sizeof(struct xt_mark_target_info_v1),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
+		.compat_from_user = compat_from_user_v1,
+		.compat_to_user	= compat_to_user_v1,
+#endif
 		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
-- 
GitLab


From f1eda05386ade8dad4e8e9b48ecbd9432b6739d9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:06:25 -0700
Subject: [PATCH 0710/1063] [NETFILTER]: xt_connmark: add compat conversion
 functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_connmark.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index c9104d05a19cf..92a5726ef237e 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -81,6 +81,37 @@ destroy(const struct xt_match *match, void *matchinfo)
 #endif
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_xt_connmark_info {
+	compat_ulong_t	mark, mask;
+	u_int8_t	invert;
+	u_int8_t	__pad1;
+	u_int16_t	__pad2;
+};
+
+static void compat_from_user(void *dst, void *src)
+{
+	struct compat_xt_connmark_info *cm = src;
+	struct xt_connmark_info m = {
+		.mark	= cm->mark,
+		.mask	= cm->mask,
+		.invert	= cm->invert,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	struct xt_connmark_info *m = src;
+	struct compat_xt_connmark_info cm = {
+		.mark	= m->mark,
+		.mask	= m->mask,
+		.invert	= m->invert,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
 static struct xt_match xt_connmark_match[] = {
 	{
 		.name		= "connmark",
@@ -89,6 +120,11 @@ static struct xt_match xt_connmark_match[] = {
 		.match		= match,
 		.destroy	= destroy,
 		.matchsize	= sizeof(struct xt_connmark_info),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_connmark_info),
+		.compat_from_user = compat_from_user,
+		.compat_to_user	= compat_to_user,
+#endif
 		.me		= THIS_MODULE
 	},
 	{
-- 
GitLab


From 7ce975b9da93b46dbf6ba70a1b4751bec211d079 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:06:40 -0700
Subject: [PATCH 0711/1063] [NETFILTER]: xt_CONNMARK: add compat conversion
 functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_CONNMARK.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 6ccb45ee08802..c01524f817f04 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -108,6 +108,37 @@ checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_xt_connmark_target_info {
+	compat_ulong_t	mark, mask;
+	u_int8_t	mode;
+	u_int8_t	__pad1;
+	u_int16_t	__pad2;
+};
+
+static void compat_from_user(void *dst, void *src)
+{
+	struct compat_xt_connmark_target_info *cm = src;
+	struct xt_connmark_target_info m = {
+		.mark	= cm->mark,
+		.mask	= cm->mask,
+		.mode	= cm->mode,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	struct xt_connmark_target_info *m = src;
+	struct compat_xt_connmark_target_info cm = {
+		.mark	= m->mark,
+		.mask	= m->mask,
+		.mode	= m->mode,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
 static struct xt_target xt_connmark_target[] = {
 	{
 		.name		= "CONNMARK",
@@ -115,6 +146,11 @@ static struct xt_target xt_connmark_target[] = {
 		.checkentry	= checkentry,
 		.target		= target,
 		.targetsize	= sizeof(struct xt_connmark_target_info),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_connmark_target_info),
+		.compat_from_user = compat_from_user,
+		.compat_to_user	= compat_to_user,
+#endif
 		.me		= THIS_MODULE
 	},
 	{
-- 
GitLab


From 02c63cf777c331121bfb6e9c1440a9835ad2f2a8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:07:06 -0700
Subject: [PATCH 0712/1063] [NETFILTER]: xt_limit: add compat conversion
 functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_limit.c | 49 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 8bfcbdfa8783c..fda7b7dec27d2 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -135,6 +135,50 @@ ipt_limit_checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_xt_rateinfo {
+	u_int32_t avg;
+	u_int32_t burst;
+
+	compat_ulong_t prev;
+	u_int32_t credit;
+	u_int32_t credit_cap, cost;
+
+	u_int32_t master;
+};
+
+/* To keep the full "prev" timestamp, the upper 32 bits are stored in the
+ * master pointer, which does not need to be preserved. */
+static void compat_from_user(void *dst, void *src)
+{
+	struct compat_xt_rateinfo *cm = src;
+	struct xt_rateinfo m = {
+		.avg		= cm->avg,
+		.burst		= cm->burst,
+		.prev		= cm->prev | (unsigned long)cm->master << 32,
+		.credit		= cm->credit,
+		.credit_cap	= cm->credit_cap,
+		.cost		= cm->cost,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	struct xt_rateinfo *m = src;
+	struct compat_xt_rateinfo cm = {
+		.avg		= m->avg,
+		.burst		= m->burst,
+		.prev		= m->prev,
+		.credit		= m->credit,
+		.credit_cap	= m->credit_cap,
+		.cost		= m->cost,
+		.master		= m->prev >> 32,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
 static struct xt_match xt_limit_match[] = {
 	{
 		.name		= "limit",
@@ -142,6 +186,11 @@ static struct xt_match xt_limit_match[] = {
 		.checkentry	= ipt_limit_checkentry,
 		.match		= ipt_limit_match,
 		.matchsize	= sizeof(struct xt_rateinfo),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_rateinfo),
+		.compat_from_user = compat_from_user,
+		.compat_to_user	= compat_to_user,
+#endif
 		.me		= THIS_MODULE,
 	},
 	{
-- 
GitLab


From 127f15dd659b20e722561ff8c86dc058e1a72323 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:07:23 -0700
Subject: [PATCH 0713/1063] [NETFILTER]: ipt_hashlimit: add compat conversion
 functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_hashlimit.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index b5b74b07370cc..4f73a61aa3dd4 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -535,10 +535,39 @@ hashlimit_destroy(const struct xt_match *match, void *matchinfo)
 	htable_put(r->hinfo);
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_ipt_hashlimit_info {
+	char name[IFNAMSIZ];
+	struct hashlimit_cfg cfg;
+	compat_uptr_t hinfo;
+	compat_uptr_t master;
+};
+
+static void compat_from_user(void *dst, void *src)
+{
+	int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
+
+	memcpy(dst, src, off);
+	memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off);
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
+
+	return copy_to_user(dst, src, off) ? -EFAULT : 0;
+}
+#endif
+
 static struct ipt_match ipt_hashlimit = {
 	.name		= "hashlimit",
 	.match		= hashlimit_match,
 	.matchsize	= sizeof(struct ipt_hashlimit_info),
+#ifdef CONFIG_COMPAT
+	.compatsize	= sizeof(struct compat_ipt_hashlimit_info),
+	.compat_from_user = compat_from_user,
+	.compat_to_user	= compat_to_user,
+#endif
 	.checkentry	= hashlimit_checkentry,
 	.destroy	= hashlimit_destroy,
 	.me		= THIS_MODULE
-- 
GitLab


From edd5a329cf69c112882e03c8ab55e985062a5d2a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:07:39 -0700
Subject: [PATCH 0714/1063] [NETFILTER]: PPTP conntrack: fix whitespace errors

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../linux/netfilter_ipv4/ip_conntrack_pptp.h  | 26 +++---
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 76 +++++++--------
 net/ipv4/netfilter/ip_conntrack_proto_gre.c   | 28 +++---
 net/ipv4/netfilter/ip_nat_helper_pptp.c       | 92 +++++++++----------
 net/ipv4/netfilter/ip_nat_proto_gre.c         | 20 ++--
 5 files changed, 121 insertions(+), 121 deletions(-)

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
index 816144c75de0e..88f66d3c87655 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
@@ -285,19 +285,19 @@ struct PptpSetLinkInfo {
 };
 
 union pptp_ctrl_union {
-		struct PptpStartSessionRequest	sreq;
-		struct PptpStartSessionReply	srep;
-		struct PptpStopSessionRequest	streq;
-		struct PptpStopSessionReply	strep;
-                struct PptpOutCallRequest       ocreq;
-                struct PptpOutCallReply         ocack;
-                struct PptpInCallRequest        icreq;
-                struct PptpInCallReply          icack;
-                struct PptpInCallConnected      iccon;
-		struct PptpClearCallRequest	clrreq;
-                struct PptpCallDisconnectNotify disc;
-                struct PptpWanErrorNotify       wanerr;
-                struct PptpSetLinkInfo          setlink;
+	struct PptpStartSessionRequest	sreq;
+	struct PptpStartSessionReply	srep;
+	struct PptpStopSessionRequest	streq;
+	struct PptpStopSessionReply	strep;
+	struct PptpOutCallRequest	ocreq;
+	struct PptpOutCallReply		ocack;
+	struct PptpInCallRequest	icreq;
+	struct PptpInCallReply		icack;
+	struct PptpInCallConnected	iccon;
+	struct PptpClearCallRequest	clrreq;
+	struct PptpCallDisconnectNotify disc;
+	struct PptpWanErrorNotify	wanerr;
+	struct PptpSetLinkInfo		setlink;
 };
 
 extern int
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index b020a33e65e94..6c94dd5d476cd 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -20,11 +20,11 @@
  * 	 - We can only support one single call within each session
  *
  * TODO:
- *	 - testing of incoming PPTP calls 
+ *	 - testing of incoming PPTP calls
  *
- * Changes: 
+ * Changes:
  * 	2002-02-05 - Version 1.3
- * 	  - Call ip_conntrack_unexpect_related() from 
+ * 	  - Call ip_conntrack_unexpect_related() from
  * 	    pptp_destroy_siblings() to destroy expectations in case
  * 	    CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
  * 	    (Philip Craig <philipc@snapgear.com>)
@@ -141,7 +141,7 @@ static void pptp_expectfn(struct ip_conntrack *ct,
 		invert_tuplepr(&inv_t, &exp->tuple);
 		DEBUGP("trying to unexpect other dir: ");
 		DUMP_TUPLE(&inv_t);
-	
+
 		exp_other = ip_conntrack_expect_find(&inv_t);
 		if (exp_other) {
 			/* delete other expectation.  */
@@ -194,7 +194,7 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
 {
 	struct ip_conntrack_tuple t;
 
-	/* Since ct->sibling_list has literally rusted away in 2.6.11, 
+	/* Since ct->sibling_list has literally rusted away in 2.6.11,
 	 * we now need another way to find out about our sibling
 	 * contrack and expects... -HW */
 
@@ -264,7 +264,7 @@ exp_gre(struct ip_conntrack *master,
 	exp_orig->mask.dst.u.gre.key = htons(0xffff);
 	exp_orig->mask.dst.ip = 0xffffffff;
 	exp_orig->mask.dst.protonum = 0xff;
-		
+
 	exp_orig->master = master;
 	exp_orig->expectfn = pptp_expectfn;
 	exp_orig->flags = 0;
@@ -322,7 +322,7 @@ exp_gre(struct ip_conntrack *master,
 	goto out_put_both;
 }
 
-static inline int 
+static inline int
 pptp_inbound_pkt(struct sk_buff **pskb,
 		 struct tcphdr *tcph,
 		 unsigned int nexthdr_off,
@@ -336,7 +336,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
 	u_int16_t msg;
 	__be16 *cid, *pcid;
-	u_int32_t seq;	
+	u_int32_t seq;
 
 	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
 	if (!ctlh) {
@@ -373,7 +373,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		}
 		if (pptpReq->srep.resultCode == PPTP_START_OK)
 			info->sstate = PPTP_SESSION_CONFIRMED;
-		else 
+		else
 			info->sstate = PPTP_SESSION_ERROR;
 		break;
 
@@ -420,22 +420,22 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		pcid = &pptpReq->ocack.peersCallID;
 
 		info->pac_call_id = ntohs(*cid);
-		
+
 		if (htons(info->pns_call_id) != *pcid) {
 			DEBUGP("%s for unknown callid %u\n",
 				pptp_msg_name[msg], ntohs(*pcid));
 			break;
 		}
 
-		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], 
+		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
 			ntohs(*cid), ntohs(*pcid));
-		
+
 		info->cstate = PPTP_CALL_OUT_CONF;
 
 		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
 				       + sizeof(struct PptpControlHeader)
 				       + ((void *)pcid - (void *)pptpReq);
-			
+
 		if (exp_gre(ct, seq, *cid, *pcid) != 0)
 			printk("ip_conntrack_pptp: error during exp_gre\n");
 		break;
@@ -479,7 +479,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		cid = &info->pac_call_id;
 
 		if (info->pns_call_id != ntohs(*pcid)) {
-			DEBUGP("%s for unknown CallID %u\n", 
+			DEBUGP("%s for unknown CallID %u\n",
 				pptp_msg_name[msg], ntohs(*pcid));
 			break;
 		}
@@ -491,7 +491,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
 				       + sizeof(struct PptpControlHeader)
 				       + ((void *)pcid - (void *)pptpReq);
-			
+
 		if (exp_gre(ct, seq, *cid, *pcid) != 0)
 			printk("ip_conntrack_pptp: error during exp_gre\n");
 
@@ -554,7 +554,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		return NF_ACCEPT;
 	nexthdr_off += sizeof(_ctlh);
 	datalen -= sizeof(_ctlh);
-	
+
 	reqlen = datalen;
 	if (reqlen > sizeof(*pptpReq))
 		reqlen = sizeof(*pptpReq);
@@ -606,7 +606,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		/* client answers incoming call */
 		if (info->cstate != PPTP_CALL_IN_REQ
 		    && info->cstate != PPTP_CALL_IN_REP) {
-			DEBUGP("%s without incall_req\n", 
+			DEBUGP("%s without incall_req\n",
 				pptp_msg_name[msg]);
 			break;
 		}
@@ -616,7 +616,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		}
 		pcid = &pptpReq->icack.peersCallID;
 		if (info->pac_call_id != ntohs(*pcid)) {
-			DEBUGP("%s for unknown call %u\n", 
+			DEBUGP("%s for unknown call %u\n",
 				pptp_msg_name[msg], ntohs(*pcid));
 			break;
 		}
@@ -644,12 +644,12 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		/* I don't have to explain these ;) */
 		break;
 	default:
-		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? 
+		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)?
 			pptp_msg_name[msg]:pptp_msg_name[0], msg);
 		/* unknown: no need to create GRE masq table entry */
 		break;
 	}
-	
+
 	if (ip_nat_pptp_hook_outbound)
 		return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
 						 pptpReq);
@@ -659,7 +659,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 
 
 /* track caller id inside control connection, call expect_related */
-static int 
+static int
 conntrack_pptp_help(struct sk_buff **pskb,
 		    struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
 
@@ -676,12 +676,12 @@ conntrack_pptp_help(struct sk_buff **pskb,
 	int ret;
 
 	/* don't do any tracking before tcp handshake complete */
-	if (ctinfo != IP_CT_ESTABLISHED 
+	if (ctinfo != IP_CT_ESTABLISHED
 	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
 		DEBUGP("ctinfo = %u, skipping\n", ctinfo);
 		return NF_ACCEPT;
 	}
-	
+
 	nexthdr_off = (*pskb)->nh.iph->ihl*4;
 	tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
 	BUG_ON(!tcph);
@@ -735,28 +735,28 @@ conntrack_pptp_help(struct sk_buff **pskb,
 }
 
 /* control protocol helper */
-static struct ip_conntrack_helper pptp = { 
+static struct ip_conntrack_helper pptp = {
 	.list = { NULL, NULL },
-	.name = "pptp", 
+	.name = "pptp",
 	.me = THIS_MODULE,
 	.max_expected = 2,
 	.timeout = 5 * 60,
-	.tuple = { .src = { .ip = 0, 
-		 	    .u = { .tcp = { .port =  
-				    __constant_htons(PPTP_CONTROL_PORT) } } 
-			  }, 
-		   .dst = { .ip = 0, 
+	.tuple = { .src = { .ip = 0,
+		 	    .u = { .tcp = { .port =
+				    __constant_htons(PPTP_CONTROL_PORT) } }
+			  },
+		   .dst = { .ip = 0,
 			    .u = { .all = 0 },
 			    .protonum = IPPROTO_TCP
-			  } 
+			  }
 		 },
-	.mask = { .src = { .ip = 0, 
-			   .u = { .tcp = { .port = __constant_htons(0xffff) } } 
-			 }, 
-		  .dst = { .ip = 0, 
+	.mask = { .src = { .ip = 0,
+			   .u = { .tcp = { .port = __constant_htons(0xffff) } }
+			 },
+		  .dst = { .ip = 0,
 			   .u = { .all = 0 },
-			   .protonum = 0xff 
-		 	 } 
+			   .protonum = 0xff
+		 	 }
 		},
 	.help = conntrack_pptp_help
 };
@@ -768,7 +768,7 @@ extern int __init ip_ct_proto_gre_init(void);
 static int __init ip_conntrack_helper_pptp_init(void)
 {
 	int retcode;
- 
+
 	retcode = ip_ct_proto_gre_init();
 	if (retcode < 0)
 		return retcode;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 92c6d8b178c97..5fe026f467d38 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -1,15 +1,15 @@
 /*
- * ip_conntrack_proto_gre.c - Version 3.0 
+ * ip_conntrack_proto_gre.c - Version 3.0
  *
  * Connection tracking protocol helper module for GRE.
  *
  * GRE is a generic encapsulation protocol, which is generally not very
  * suited for NAT, as it has no protocol-specific part as port numbers.
  *
- * It has an optional key field, which may help us distinguishing two 
+ * It has an optional key field, which may help us distinguishing two
  * connections between the same two hosts.
  *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
  *
  * PPTP is built on top of a modified version of GRE, and has a mandatory
  * field called "CallID", which serves us for the same purpose as the key
@@ -61,7 +61,7 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
 #define DEBUGP(x, args...)
 #define DUMP_TUPLE_GRE(x)
 #endif
-				
+
 /* GRE KEYMAP HANDLING FUNCTIONS */
 static LIST_HEAD(gre_keymap_list);
 
@@ -88,7 +88,7 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t)
 		}
 	}
 	read_unlock_bh(&ip_ct_gre_lock);
-	
+
 	DEBUGP("lookup src key 0x%x up key for ", key);
 	DUMP_TUPLE_GRE(t);
 
@@ -107,7 +107,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct,
 		return -1;
 	}
 
-	if (!reply) 
+	if (!reply)
 		exist_km = &ct->help.ct_pptp_info.keymap_orig;
 	else
 		exist_km = &ct->help.ct_pptp_info.keymap_reply;
@@ -118,7 +118,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct,
 			if (gre_key_cmpfn(km, t) && km == *exist_km)
 				return 0;
 		}
-		DEBUGP("trying to override keymap_%s for ct %p\n", 
+		DEBUGP("trying to override keymap_%s for ct %p\n",
 			reply? "reply":"orig", ct);
 		return -EEXIST;
 	}
@@ -152,7 +152,7 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
 
 	write_lock_bh(&ip_ct_gre_lock);
 	if (ct->help.ct_pptp_info.keymap_orig) {
-		DEBUGP("removing %p from list\n", 
+		DEBUGP("removing %p from list\n",
 			ct->help.ct_pptp_info.keymap_orig);
 		list_del(&ct->help.ct_pptp_info.keymap_orig->list);
 		kfree(ct->help.ct_pptp_info.keymap_orig);
@@ -220,7 +220,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb,
 static int gre_print_tuple(struct seq_file *s,
 			   const struct ip_conntrack_tuple *tuple)
 {
-	return seq_printf(s, "srckey=0x%x dstkey=0x%x ", 
+	return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
 			  ntohs(tuple->src.u.gre.key),
 			  ntohs(tuple->dst.u.gre.key));
 }
@@ -250,14 +250,14 @@ static int gre_packet(struct ip_conntrack *ct,
 	} else
 		ip_ct_refresh_acct(ct, conntrackinfo, skb,
 				   ct->proto.gre.timeout);
-	
+
 	return NF_ACCEPT;
 }
 
 /* Called when a new connection for this protocol found. */
 static int gre_new(struct ip_conntrack *ct,
 		   const struct sk_buff *skb)
-{ 
+{
 	DEBUGP(": ");
 	DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 
@@ -283,9 +283,9 @@ static void gre_destroy(struct ip_conntrack *ct)
 }
 
 /* protocol helper struct */
-static struct ip_conntrack_protocol gre = { 
+static struct ip_conntrack_protocol gre = {
 	.proto		 = IPPROTO_GRE,
-	.name		 = "gre", 
+	.name		 = "gre",
 	.pkt_to_tuple	 = gre_pkt_to_tuple,
 	.invert_tuple	 = gre_invert_tuple,
 	.print_tuple	 = gre_print_tuple,
@@ -323,7 +323,7 @@ void ip_ct_proto_gre_fini(void)
 	}
 	write_unlock_bh(&ip_ct_gre_lock);
 
-	ip_conntrack_protocol_unregister(&gre); 
+	ip_conntrack_protocol_unregister(&gre);
 }
 
 EXPORT_SYMBOL(ip_ct_gre_keymap_add);
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 1d149964dc38f..5dde1da1c3001 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -32,7 +32,7 @@
  *     2005-06-10 - Version 3.0
  *       - kernel >= 2.6.11 version,
  *	   funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- * 
+ *
  */
 
 #include <linux/module.h>
@@ -93,10 +93,10 @@ static void pptp_nat_expected(struct ip_conntrack *ct,
 		DEBUGP("we are PAC->PNS\n");
 		/* build tuple for PNS->PAC */
 		t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
-		t.src.u.gre.key = 
+		t.src.u.gre.key =
 			htons(master->nat.help.nat_pptp_info.pns_call_id);
 		t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
-		t.dst.u.gre.key = 
+		t.dst.u.gre.key =
 			htons(master->nat.help.nat_pptp_info.pac_call_id);
 		t.dst.protonum = IPPROTO_GRE;
 	}
@@ -153,47 +153,47 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 	unsigned int cid_off;
 
 	new_callid = htons(ct_pptp_info->pns_call_id);
-	
+
 	switch (msg = ntohs(ctlh->messageType)) {
-		case PPTP_OUT_CALL_REQUEST:
-			cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
-			/* FIXME: ideally we would want to reserve a call ID
-			 * here.  current netfilter NAT core is not able to do
-			 * this :( For now we use TCP source port. This breaks
-			 * multiple calls within one control session */
-
-			/* save original call ID in nat_info */
-			nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
-
-			/* don't use tcph->source since we are at a DSTmanip
-			 * hook (e.g. PREROUTING) and pkt is not mangled yet */
-			new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
-			/* save new call ID in ct info */
-			ct_pptp_info->pns_call_id = ntohs(new_callid);
-			break;
-		case PPTP_IN_CALL_REPLY:
-			cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
-			break;
-		case PPTP_CALL_CLEAR_REQUEST:
-			cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
-			break;
-		default:
-			DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
-			      (msg <= PPTP_MSG_MAX)? 
-			      pptp_msg_name[msg]:pptp_msg_name[0]);
-			/* fall through */
-
-		case PPTP_SET_LINK_INFO:
-			/* only need to NAT in case PAC is behind NAT box */
-		case PPTP_START_SESSION_REQUEST:
-		case PPTP_START_SESSION_REPLY:
-		case PPTP_STOP_SESSION_REQUEST:
-		case PPTP_STOP_SESSION_REPLY:
-		case PPTP_ECHO_REQUEST:
-		case PPTP_ECHO_REPLY:
-			/* no need to alter packet */
-			return NF_ACCEPT;
+	case PPTP_OUT_CALL_REQUEST:
+		cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
+		/* FIXME: ideally we would want to reserve a call ID
+		 * here.  current netfilter NAT core is not able to do
+		 * this :( For now we use TCP source port. This breaks
+		 * multiple calls within one control session */
+
+		/* save original call ID in nat_info */
+		nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+
+		/* don't use tcph->source since we are at a DSTmanip
+		 * hook (e.g. PREROUTING) and pkt is not mangled yet */
+		new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+		/* save new call ID in ct info */
+		ct_pptp_info->pns_call_id = ntohs(new_callid);
+		break;
+	case PPTP_IN_CALL_REPLY:
+		cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
+		break;
+	case PPTP_CALL_CLEAR_REQUEST:
+		cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
+		break;
+	default:
+		DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
+		      (msg <= PPTP_MSG_MAX)?
+		      pptp_msg_name[msg]:pptp_msg_name[0]);
+		/* fall through */
+
+	case PPTP_SET_LINK_INFO:
+		/* only need to NAT in case PAC is behind NAT box */
+	case PPTP_START_SESSION_REQUEST:
+	case PPTP_START_SESSION_REPLY:
+	case PPTP_STOP_SESSION_REQUEST:
+	case PPTP_STOP_SESSION_REPLY:
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* no need to alter packet */
+		return NF_ACCEPT;
 	}
 
 	/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
@@ -216,9 +216,9 @@ static int
 pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
 	     struct ip_conntrack_expect *expect_reply)
 {
-	struct ip_ct_pptp_master *ct_pptp_info = 
+	struct ip_ct_pptp_master *ct_pptp_info =
 				&expect_orig->master->help.ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info = 
+	struct ip_nat_pptp *nat_pptp_info =
 				&expect_orig->master->nat.help.nat_pptp_info;
 
 	struct ip_conntrack *ct = expect_orig->master;
@@ -324,7 +324,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		break;
 
 	default:
-		DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? 
+		DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
 			pptp_msg_name[msg]:pptp_msg_name[0]);
 		/* fall through */
 
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 70a65372225a2..a5226691f02ca 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -6,10 +6,10 @@
  * GRE is a generic encapsulation protocol, which is generally not very
  * suited for NAT, as it has no protocol-specific part as port numbers.
  *
- * It has an optional key field, which may help us distinguishing two 
+ * It has an optional key field, which may help us distinguishing two
  * connections between the same two hosts.
  *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
  *
  * PPTP is built on top of a modified version of GRE, and has a mandatory
  * field called "CallID", which serves us for the same purpose as the key
@@ -60,7 +60,7 @@ gre_in_range(const struct ip_conntrack_tuple *tuple,
 }
 
 /* generate unique tuple ... */
-static int 
+static int
 gre_unique_tuple(struct ip_conntrack_tuple *tuple,
 		 const struct ip_nat_range *range,
 		 enum ip_nat_manip_type maniptype,
@@ -84,7 +84,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple,
 		range_size = ntohs(range->max.gre.key) - min + 1;
 	}
 
-	DEBUGP("min = %u, range_size = %u\n", min, range_size); 
+	DEBUGP("min = %u, range_size = %u\n", min, range_size);
 
 	for (i = 0; i < range_size; i++, key++) {
 		*keyptr = htons(min + key % range_size);
@@ -117,7 +117,7 @@ gre_manip_pkt(struct sk_buff **pskb,
 	greh = (void *)(*pskb)->data + hdroff;
 	pgreh = (struct gre_hdr_pptp *) greh;
 
-	/* we only have destination manip of a packet, since 'source key' 
+	/* we only have destination manip of a packet, since 'source key'
 	 * is not present in the packet itself */
 	if (maniptype == IP_NAT_MANIP_DST) {
 		/* key manipulation is always dest */
@@ -129,7 +129,7 @@ gre_manip_pkt(struct sk_buff **pskb,
 			}
 			if (greh->csum) {
 				/* FIXME: Never tested this code... */
-				*(gre_csum(greh)) = 
+				*(gre_csum(greh)) =
 					nf_proto_csum_update(*pskb,
 							~*(gre_key(greh)),
 							tuple->dst.u.gre.key,
@@ -138,7 +138,7 @@ gre_manip_pkt(struct sk_buff **pskb,
 			*(gre_key(greh)) = tuple->dst.u.gre.key;
 			break;
 		case GRE_VERSION_PPTP:
-			DEBUGP("call_id -> 0x%04x\n", 
+			DEBUGP("call_id -> 0x%04x\n",
 				ntohs(tuple->dst.u.gre.key));
 			pgreh->call_id = tuple->dst.u.gre.key;
 			break;
@@ -152,8 +152,8 @@ gre_manip_pkt(struct sk_buff **pskb,
 }
 
 /* nat helper struct */
-static struct ip_nat_protocol gre = { 
-	.name		= "GRE", 
+static struct ip_nat_protocol gre = {
+	.name		= "GRE",
 	.protonum	= IPPROTO_GRE,
 	.manip_pkt	= gre_manip_pkt,
 	.in_range	= gre_in_range,
@@ -164,7 +164,7 @@ static struct ip_nat_protocol gre = {
 	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
 #endif
 };
-				  
+
 int __init ip_nat_proto_gre_init(void)
 {
 	return ip_nat_protocol_register(&gre);
-- 
GitLab


From 955b944293dd4c931ec866ebe19a6b2463b8f9a0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:08:03 -0700
Subject: [PATCH 0715/1063] [NETFILTER]: PPTP conntrack: get rid of unnecessary
 byte order conversions

The conntrack structure contains the call ID in host byte order for no
reason, get rid of back and forth conversions.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../linux/netfilter_ipv4/ip_conntrack_pptp.h  |  8 ++--
 .../netfilter_ipv4/ip_conntrack_proto_gre.h   | 22 +++++-----
 include/linux/netfilter_ipv4/ip_nat_pptp.h    |  4 +-
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 22 +++++-----
 net/ipv4/netfilter/ip_nat_helper_pptp.c       | 42 +++++++++----------
 net/ipv4/netfilter/ip_nat_proto_gre.c         |  2 +-
 6 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
index 88f66d3c87655..0d35623f9453f 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
@@ -31,8 +31,8 @@ struct ip_ct_pptp_master {
 	/* everything below is going to be per-expectation in newnat,
 	 * since there could be more than one call within one session */
 	enum pptp_ctrlcall_state cstate;	/* call state */
-	u_int16_t pac_call_id;			/* call id of PAC, host byte order */
-	u_int16_t pns_call_id;			/* call id of PNS, host byte order */
+	__be16 pac_call_id;			/* call id of PAC, host byte order */
+	__be16 pns_call_id;			/* call id of PNS, host byte order */
 
 	/* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack
 	 * and therefore imposes a fixed limit on the number of maps */
@@ -42,8 +42,8 @@ struct ip_ct_pptp_master {
 /* conntrack_expect private member */
 struct ip_ct_pptp_expect {
 	enum pptp_ctrlcall_state cstate; 	/* call state */
-	u_int16_t pac_call_id;			/* call id of PAC */
-	u_int16_t pns_call_id;			/* call id of PNS */
+	__be16 pac_call_id;			/* call id of PAC */
+	__be16 pns_call_id;			/* call id of PNS */
 };
 
 
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
index 8d090ef82f5ff..1d853aa873ebe 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
@@ -49,18 +49,18 @@ struct gre_hdr {
 #else
 #error "Adjust your <asm/byteorder.h> defines"
 #endif
-	__u16	protocol;
+	__be16	protocol;
 };
 
 /* modified GRE header for PPTP */
 struct gre_hdr_pptp {
-	__u8  flags;		/* bitfield */
-	__u8  version;		/* should be GRE_VERSION_PPTP */
-	__u16 protocol;		/* should be GRE_PROTOCOL_PPTP */
-	__u16 payload_len;	/* size of ppp payload, not inc. gre header */
-	__u16 call_id;		/* peer's call_id for this session */
-	__u32 seq;		/* sequence number.  Present if S==1 */
-	__u32 ack;		/* seq number of highest packet recieved by */
+	__u8   flags;		/* bitfield */
+	__u8   version;		/* should be GRE_VERSION_PPTP */
+	__be16 protocol;	/* should be GRE_PROTOCOL_PPTP */
+	__be16 payload_len;	/* size of ppp payload, not inc. gre header */
+	__be16 call_id;		/* peer's call_id for this session */
+	__be32 seq;		/* sequence number.  Present if S==1 */
+	__be32 ack;		/* seq number of highest packet recieved by */
 				/*  sender in this session */
 };
 
@@ -92,13 +92,13 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct);
 
 
 /* get pointer to gre key, if present */
-static inline u_int32_t *gre_key(struct gre_hdr *greh)
+static inline __be32 *gre_key(struct gre_hdr *greh)
 {
 	if (!greh->key)
 		return NULL;
 	if (greh->csum || greh->routing)
-		return (u_int32_t *) (greh+sizeof(*greh)+4);
-	return (u_int32_t *) (greh+sizeof(*greh));
+		return (__be32 *) (greh+sizeof(*greh)+4);
+	return (__be32 *) (greh+sizeof(*greh));
 }
 
 /* get pointer ot gre csum, if present */
diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h
index eaf66c2e8f937..36668bf0f373e 100644
--- a/include/linux/netfilter_ipv4/ip_nat_pptp.h
+++ b/include/linux/netfilter_ipv4/ip_nat_pptp.h
@@ -4,8 +4,8 @@
 
 /* conntrack private data */
 struct ip_nat_pptp {
-	u_int16_t pns_call_id;		/* NAT'ed PNS call id */
-	u_int16_t pac_call_id;		/* NAT'ed PAC call id */
+	__be16 pns_call_id;		/* NAT'ed PNS call id */
+	__be16 pac_call_id;		/* NAT'ed PAC call id */
 };
 
 #endif /* _NAT_PPTP_H */
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 6c94dd5d476cd..57637ca2b82c7 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -201,8 +201,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
 	/* try original (pns->pac) tuple */
 	memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
 	t.dst.protonum = IPPROTO_GRE;
-	t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
-	t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+	t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
+	t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
 
 	if (!destroy_sibling_or_exp(&t))
 		DEBUGP("failed to timeout original pns->pac ct/exp\n");
@@ -210,8 +210,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
 	/* try reply (pac->pns) tuple */
 	memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
 	t.dst.protonum = IPPROTO_GRE;
-	t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
-	t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+	t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
+	t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
 
 	if (!destroy_sibling_or_exp(&t))
 		DEBUGP("failed to timeout reply pac->pns ct/exp\n");
@@ -419,9 +419,9 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		cid = &pptpReq->ocack.callID;
 		pcid = &pptpReq->ocack.peersCallID;
 
-		info->pac_call_id = ntohs(*cid);
+		info->pac_call_id = *cid;
 
-		if (htons(info->pns_call_id) != *pcid) {
+		if (info->pns_call_id != *pcid) {
 			DEBUGP("%s for unknown callid %u\n",
 				pptp_msg_name[msg], ntohs(*pcid));
 			break;
@@ -454,7 +454,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		pcid = &pptpReq->icack.peersCallID;
 		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
 		info->cstate = PPTP_CALL_IN_REQ;
-		info->pac_call_id = ntohs(*pcid);
+		info->pac_call_id = *pcid;
 		break;
 
 	case PPTP_IN_CALL_CONNECT:
@@ -478,7 +478,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		pcid = &pptpReq->iccon.peersCallID;
 		cid = &info->pac_call_id;
 
-		if (info->pns_call_id != ntohs(*pcid)) {
+		if (info->pns_call_id != *pcid) {
 			DEBUGP("%s for unknown CallID %u\n",
 				pptp_msg_name[msg], ntohs(*pcid));
 			break;
@@ -595,7 +595,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		/* track PNS call id */
 		cid = &pptpReq->ocreq.callID;
 		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
-		info->pns_call_id = ntohs(*cid);
+		info->pns_call_id = *cid;
 		break;
 	case PPTP_IN_CALL_REPLY:
 		if (reqlen < sizeof(_pptpReq.icack)) {
@@ -615,7 +615,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 			break;
 		}
 		pcid = &pptpReq->icack.peersCallID;
-		if (info->pac_call_id != ntohs(*pcid)) {
+		if (info->pac_call_id != *pcid) {
 			DEBUGP("%s for unknown call %u\n",
 				pptp_msg_name[msg], ntohs(*pcid));
 			break;
@@ -623,7 +623,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
 		/* part two of the three-way handshake */
 		info->cstate = PPTP_CALL_IN_REP;
-		info->pns_call_id = ntohs(pptpReq->icack.callID);
+		info->pns_call_id = pptpReq->icack.callID;
 		break;
 
 	case PPTP_CALL_CLEAR_REQUEST:
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 5dde1da1c3001..6e8bd6b3431f0 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -85,19 +85,17 @@ static void pptp_nat_expected(struct ip_conntrack *ct,
 		DEBUGP("we are PNS->PAC\n");
 		/* therefore, build tuple for PAC->PNS */
 		t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
-		t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id);
+		t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id;
 		t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
-		t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id);
+		t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id;
 		t.dst.protonum = IPPROTO_GRE;
 	} else {
 		DEBUGP("we are PAC->PNS\n");
 		/* build tuple for PNS->PAC */
 		t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
-		t.src.u.gre.key =
-			htons(master->nat.help.nat_pptp_info.pns_call_id);
+		t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id;
 		t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
-		t.dst.u.gre.key =
-			htons(master->nat.help.nat_pptp_info.pac_call_id);
+		t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id;
 		t.dst.protonum = IPPROTO_GRE;
 	}
 
@@ -149,10 +147,11 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 {
 	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
 	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-	u_int16_t msg, new_callid;
+	u_int16_t msg;
+	__be16 new_callid;
 	unsigned int cid_off;
 
-	new_callid = htons(ct_pptp_info->pns_call_id);
+	new_callid = ct_pptp_info->pns_call_id;
 
 	switch (msg = ntohs(ctlh->messageType)) {
 	case PPTP_OUT_CALL_REQUEST:
@@ -170,7 +169,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
 
 		/* save new call ID in ct info */
-		ct_pptp_info->pns_call_id = ntohs(new_callid);
+		ct_pptp_info->pns_call_id = new_callid;
 		break;
 	case PPTP_IN_CALL_REPLY:
 		cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
@@ -235,14 +234,14 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
 
 	/* alter expectation for PNS->PAC direction */
 	invert_tuplepr(&inv_t, &expect_orig->tuple);
-	expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id);
-	expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
-	expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+	expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
+	expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
+	expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
 	expect_orig->dir = IP_CT_DIR_ORIGINAL;
 	inv_t.src.ip = reply_t->src.ip;
 	inv_t.dst.ip = reply_t->dst.ip;
-	inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
-	inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+	inv_t.src.u.gre.key = nat_pptp_info->pac_call_id;
+	inv_t.dst.u.gre.key = ct_pptp_info->pns_call_id;
 
 	if (!ip_conntrack_expect_related(expect_orig)) {
 		DEBUGP("successfully registered expect\n");
@@ -253,14 +252,14 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
 
 	/* alter expectation for PAC->PNS direction */
 	invert_tuplepr(&inv_t, &expect_reply->tuple);
-	expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
-	expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
-	expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+	expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
+	expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
+	expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
 	expect_reply->dir = IP_CT_DIR_REPLY;
 	inv_t.src.ip = orig_t->src.ip;
 	inv_t.dst.ip = orig_t->dst.ip;
-	inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
-	inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+	inv_t.src.u.gre.key = nat_pptp_info->pns_call_id;
+	inv_t.dst.u.gre.key = ct_pptp_info->pac_call_id;
 
 	if (!ip_conntrack_expect_related(expect_reply)) {
 		DEBUGP("successfully registered expect\n");
@@ -297,10 +296,11 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		 union pptp_ctrl_union *pptpReq)
 {
 	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-	u_int16_t msg, new_cid = 0, new_pcid;
+	u_int16_t msg, new_cid = 0;
+	__be16 new_pcid;
 	unsigned int pcid_off, cid_off = 0;
 
-	new_pcid = htons(nat_pptp_info->pns_call_id);
+	new_pcid = nat_pptp_info->pns_call_id;
 
 	switch (msg = ntohs(ctlh->messageType)) {
 	case PPTP_OUT_CALL_REPLY:
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index a5226691f02ca..bf91f9312b3c6 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -67,7 +67,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple,
 		 const struct ip_conntrack *conntrack)
 {
 	static u_int16_t key;
-	u_int16_t *keyptr;
+	__be16 *keyptr;
 	unsigned int min, i, range_size;
 
 	if (maniptype == IP_NAT_MANIP_SRC)
-- 
GitLab


From a1ad1deed5bf6fa06f2213b7f1a794de4cf791a6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:08:23 -0700
Subject: [PATCH 0716/1063] [NETFILTER]: PPTP conntrack: remove dead code

The call ID in reply packets is never changed, remove the code.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_nat_helper_pptp.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 6e8bd6b3431f0..0f5e753b481de 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -296,16 +296,15 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		 union pptp_ctrl_union *pptpReq)
 {
 	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-	u_int16_t msg, new_cid = 0;
+	u_int16_t msg;
 	__be16 new_pcid;
-	unsigned int pcid_off, cid_off = 0;
+	unsigned int pcid_off;
 
 	new_pcid = nat_pptp_info->pns_call_id;
 
 	switch (msg = ntohs(ctlh->messageType)) {
 	case PPTP_OUT_CALL_REPLY:
 		pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
-		cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
 		break;
 	case PPTP_IN_CALL_CONNECT:
 		pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
@@ -351,17 +350,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 				     sizeof(new_pcid), (char *)&new_pcid,
 				     sizeof(new_pcid)) == 0)
 		return NF_DROP;
-
-	if (new_cid) {
-		DEBUGP("altering call id from 0x%04x to 0x%04x\n",
-			ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_cid));
-		if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
-		                             cid_off + sizeof(struct pptp_pkt_hdr) +
-					     sizeof(struct PptpControlHeader),
-					     sizeof(new_cid), (char *)&new_cid,
-					     sizeof(new_cid)) == 0)
-			return NF_DROP;
-	}
 	return NF_ACCEPT;
 }
 
-- 
GitLab


From 5256f663a0228af9bf69ba74ad9f0928f35713f7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:08:41 -0700
Subject: [PATCH 0717/1063] [NETFILTER]: PPTP conntrack: remove more dead code

The calculated sequence numbers are not used for anything.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 57637ca2b82c7..0510ee50dc654 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -220,7 +220,6 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
 /* expect GRE connections (PNS->PAC and PAC->PNS direction) */
 static inline int
 exp_gre(struct ip_conntrack *master,
-	u_int32_t seq,
 	__be16 callid,
 	__be16 peer_callid)
 {
@@ -336,7 +335,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
 	u_int16_t msg;
 	__be16 *cid, *pcid;
-	u_int32_t seq;
 
 	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
 	if (!ctlh) {
@@ -432,12 +430,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 
 		info->cstate = PPTP_CALL_OUT_CONF;
 
-		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
-				       + sizeof(struct PptpControlHeader)
-				       + ((void *)pcid - (void *)pptpReq);
-
-		if (exp_gre(ct, seq, *cid, *pcid) != 0)
-			printk("ip_conntrack_pptp: error during exp_gre\n");
+		exp_gre(ct, *cid, *pcid);
 		break;
 
 	case PPTP_IN_CALL_REQUEST:
@@ -488,13 +481,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		info->cstate = PPTP_CALL_IN_CONF;
 
 		/* we expect a GRE connection from PAC to PNS */
-		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
-				       + sizeof(struct PptpControlHeader)
-				       + ((void *)pcid - (void *)pptpReq);
-
-		if (exp_gre(ct, seq, *cid, *pcid) != 0)
-			printk("ip_conntrack_pptp: error during exp_gre\n");
-
+		exp_gre(ct, *cid, *pcid);
 		break;
 
 	case PPTP_CALL_DISCONNECT_NOTIFY:
-- 
GitLab


From 6013c0a13e335674a783215e182c367406294392 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:08:56 -0700
Subject: [PATCH 0718/1063] [NETFILTER]: PPTP conntrack: fix header definitions

Fix a few header definitions to match RFC2637. Most importantly the
PptpOutCallRequest header included an invalid padding field and a
size check was disabled because of this.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 9 +++++----
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c    | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
index 0d35623f9453f..620bf06fabc20 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
@@ -107,8 +107,7 @@ struct PptpControlHeader {
 
 struct PptpStartSessionRequest {
 	__be16	protocolVersion;
-	__u8	reserved1;
-	__u8	reserved2;
+	__u16	reserved1;
 	__be32	framingCapability;
 	__be32	bearerCapability;
 	__be16	maxChannels;
@@ -143,6 +142,8 @@ struct PptpStartSessionReply {
 
 struct PptpStopSessionRequest {
 	__u8	reason;
+	__u8	reserved1;
+	__u16	reserved2;
 };
 
 /* PptpStopSessionResultCode */
@@ -152,6 +153,7 @@ struct PptpStopSessionRequest {
 struct PptpStopSessionReply {
 	__u8	resultCode;
 	__u8	generalErrorCode;
+	__u16	reserved1;
 };
 
 struct PptpEchoRequest {
@@ -188,9 +190,8 @@ struct PptpOutCallRequest {
 	__be32	framingType;
 	__be16	packetWindow;
 	__be16	packetProcDelay;
-	__u16	reserved1;
 	__be16	phoneNumberLength;
-	__u16	reserved2;
+	__u16	reserved1;
 	__u8	phoneNumber[64];
 	__u8	subAddress[64];
 };
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 0510ee50dc654..1a8da9015d8c5 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -569,7 +569,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 	case PPTP_OUT_CALL_REQUEST:
 		if (reqlen < sizeof(_pptpReq.ocreq)) {
 			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			/* FIXME: break; */
+			break;
 		}
 
 		/* client initiating connection to server */
-- 
GitLab


From 857c06da2ba2e00b81677c2f6740048d87da0207 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:09:19 -0700
Subject: [PATCH 0719/1063] [NETFILTER]: PPTP conntrack: remove unnecessary
 cid/pcid header pointers

Just the values are needed, not the memory locations.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 56 +++++++++----------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 1a8da9015d8c5..5f7af6ef3881f 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -334,7 +334,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	union pptp_ctrl_union _pptpReq, *pptpReq;
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
 	u_int16_t msg;
-	__be16 *cid, *pcid;
+	__be16 cid, pcid;
 
 	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
 	if (!ctlh) {
@@ -414,23 +414,23 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 			break;
 		}
 
-		cid = &pptpReq->ocack.callID;
-		pcid = &pptpReq->ocack.peersCallID;
+		cid = pptpReq->ocack.callID;
+		pcid = pptpReq->ocack.peersCallID;
 
-		info->pac_call_id = *cid;
+		info->pac_call_id = cid;
 
-		if (info->pns_call_id != *pcid) {
+		if (info->pns_call_id != pcid) {
 			DEBUGP("%s for unknown callid %u\n",
-				pptp_msg_name[msg], ntohs(*pcid));
+				pptp_msg_name[msg], ntohs(pcid));
 			break;
 		}
 
 		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
-			ntohs(*cid), ntohs(*pcid));
+			ntohs(cid), ntohs(pcid));
 
 		info->cstate = PPTP_CALL_OUT_CONF;
 
-		exp_gre(ct, *cid, *pcid);
+		exp_gre(ct, cid, pcid);
 		break;
 
 	case PPTP_IN_CALL_REQUEST:
@@ -444,10 +444,10 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
 			break;
 		}
-		pcid = &pptpReq->icack.peersCallID;
-		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		pcid = pptpReq->icack.peersCallID;
+		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
 		info->cstate = PPTP_CALL_IN_REQ;
-		info->pac_call_id = *pcid;
+		info->pac_call_id = pcid;
 		break;
 
 	case PPTP_IN_CALL_CONNECT:
@@ -468,20 +468,20 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 			break;
 		}
 
-		pcid = &pptpReq->iccon.peersCallID;
-		cid = &info->pac_call_id;
+		pcid = pptpReq->iccon.peersCallID;
+		cid = info->pac_call_id;
 
-		if (info->pns_call_id != *pcid) {
+		if (info->pns_call_id != pcid) {
 			DEBUGP("%s for unknown CallID %u\n",
-				pptp_msg_name[msg], ntohs(*pcid));
+				pptp_msg_name[msg], ntohs(pcid));
 			break;
 		}
 
-		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
 		info->cstate = PPTP_CALL_IN_CONF;
 
 		/* we expect a GRE connection from PAC to PNS */
-		exp_gre(ct, *cid, *pcid);
+		exp_gre(ct, cid, pcid);
 		break;
 
 	case PPTP_CALL_DISCONNECT_NOTIFY:
@@ -491,8 +491,8 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		}
 
 		/* server confirms disconnect */
-		cid = &pptpReq->disc.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+		cid = pptpReq->disc.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
 		info->cstate = PPTP_CALL_NONE;
 
 		/* untrack this call id, unexpect GRE packets */
@@ -534,7 +534,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 	union pptp_ctrl_union _pptpReq, *pptpReq;
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
 	u_int16_t msg;
-	__be16 *cid, *pcid;
+	__be16 cid, pcid;
 
 	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
 	if (!ctlh)
@@ -580,9 +580,9 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		}
 		info->cstate = PPTP_CALL_OUT_REQ;
 		/* track PNS call id */
-		cid = &pptpReq->ocreq.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
-		info->pns_call_id = *cid;
+		cid = pptpReq->ocreq.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+		info->pns_call_id = cid;
 		break;
 	case PPTP_IN_CALL_REPLY:
 		if (reqlen < sizeof(_pptpReq.icack)) {
@@ -601,16 +601,16 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 			info->cstate = PPTP_CALL_NONE;
 			break;
 		}
-		pcid = &pptpReq->icack.peersCallID;
-		if (info->pac_call_id != *pcid) {
+		pcid = pptpReq->icack.peersCallID;
+		if (info->pac_call_id != pcid) {
 			DEBUGP("%s for unknown call %u\n",
-				pptp_msg_name[msg], ntohs(*pcid));
+				pptp_msg_name[msg], ntohs(pcid));
 			break;
 		}
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(pcid));
 		/* part two of the three-way handshake */
 		info->cstate = PPTP_CALL_IN_REP;
-		info->pns_call_id = pptpReq->icack.callID;
+		info->pns_call_id = pcid;
 		break;
 
 	case PPTP_CALL_CLEAR_REQUEST:
-- 
GitLab


From cf9f81523ef3e95d9f222c896d266e4562999150 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:09:34 -0700
Subject: [PATCH 0720/1063] [NETFILTER]: PPTP conntrack: simplify expectation
 handling

Remove duplicated expectation handling in the NAT helper and simplify
the remains in the conntrack helper.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../linux/netfilter_ipv4/ip_conntrack_pptp.h  |  2 +-
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 92 +++++++------------
 net/ipv4/netfilter/ip_nat_helper_pptp.c       | 58 +-----------
 3 files changed, 35 insertions(+), 117 deletions(-)

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
index 620bf06fabc20..2644b1faddd6b 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
@@ -315,7 +315,7 @@ extern int
 			  struct PptpControlHeader *ctlh,
 			  union pptp_ctrl_union *pptpReq);
 
-extern int
+extern void
 (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig,
 			    struct ip_conntrack_expect *exp_reply);
 
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 5f7af6ef3881f..57eac6e3871a8 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -80,7 +80,7 @@ int
 			  struct PptpControlHeader *ctlh,
 			  union pptp_ctrl_union *pptpReq);
 
-int
+void
 (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
 			    struct ip_conntrack_expect *expect_reply);
 
@@ -219,93 +219,63 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
 
 /* expect GRE connections (PNS->PAC and PAC->PNS direction) */
 static inline int
-exp_gre(struct ip_conntrack *master,
+exp_gre(struct ip_conntrack *ct,
 	__be16 callid,
 	__be16 peer_callid)
 {
-	struct ip_conntrack_tuple inv_tuple;
-	struct ip_conntrack_tuple exp_tuples[] = {
-		/* tuple in original direction, PNS->PAC */
-		{ .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
-			   .u = { .gre = { .key = peer_callid } }
-			 },
-		  .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
-			   .u = { .gre = { .key = callid } },
-			   .protonum = IPPROTO_GRE
-			 },
-		 },
-		/* tuple in reply direction, PAC->PNS */
-		{ .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
-			   .u = { .gre = { .key = callid } }
-			 },
-		  .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
-			   .u = { .gre = { .key = peer_callid } },
-			   .protonum = IPPROTO_GRE
-			 },
-		 }
-	};
 	struct ip_conntrack_expect *exp_orig, *exp_reply;
 	int ret = 1;
 
-	exp_orig = ip_conntrack_expect_alloc(master);
+	exp_orig = ip_conntrack_expect_alloc(ct);
 	if (exp_orig == NULL)
 		goto out;
 
-	exp_reply = ip_conntrack_expect_alloc(master);
+	exp_reply = ip_conntrack_expect_alloc(ct);
 	if (exp_reply == NULL)
 		goto out_put_orig;
 
-	memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple));
+	/* original direction, PNS->PAC */
+	exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+	exp_orig->tuple.src.u.gre.key = peer_callid;
+	exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+	exp_orig->tuple.dst.u.gre.key = callid;
+	exp_orig->tuple.dst.protonum = IPPROTO_GRE;
 
 	exp_orig->mask.src.ip = 0xffffffff;
 	exp_orig->mask.src.u.all = 0;
-	exp_orig->mask.dst.u.all = 0;
 	exp_orig->mask.dst.u.gre.key = htons(0xffff);
 	exp_orig->mask.dst.ip = 0xffffffff;
 	exp_orig->mask.dst.protonum = 0xff;
 
-	exp_orig->master = master;
+	exp_orig->master = ct;
 	exp_orig->expectfn = pptp_expectfn;
 	exp_orig->flags = 0;
 
 	/* both expectations are identical apart from tuple */
 	memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
-	memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
-
-	if (ip_nat_pptp_hook_exp_gre)
-		ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
-	else {
 
-		DEBUGP("calling expect_related PNS->PAC");
-		DUMP_TUPLE(&exp_orig->tuple);
+	/* reply direction, PAC->PNS */
+	exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+	exp_reply->tuple.src.u.gre.key = callid;
+	exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+	exp_reply->tuple.dst.u.gre.key = peer_callid;
+	exp_reply->tuple.dst.protonum = IPPROTO_GRE;
 
-		if (ip_conntrack_expect_related(exp_orig) != 0) {
-			DEBUGP("cannot expect_related()\n");
-			goto out_put_both;
-		}
-
-		DEBUGP("calling expect_related PAC->PNS");
-		DUMP_TUPLE(&exp_reply->tuple);
-
-		if (ip_conntrack_expect_related(exp_reply) != 0) {
-			DEBUGP("cannot expect_related()\n");
-			goto out_unexpect_orig;
-		}
-
-		/* Add GRE keymap entries */
-		if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) {
-			DEBUGP("cannot keymap_add() exp\n");
-			goto out_unexpect_both;
-		}
-
-		invert_tuplepr(&inv_tuple, &exp_reply->tuple);
-		if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
-			ip_ct_gre_keymap_destroy(master);
-			DEBUGP("cannot keymap_add() exp_inv\n");
-			goto out_unexpect_both;
-		}
-		ret = 0;
+	if (ip_nat_pptp_hook_exp_gre)
+		ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
+	if (ip_conntrack_expect_related(exp_orig) != 0)
+		goto out_put_both;
+	if (ip_conntrack_expect_related(exp_reply) != 0)
+		goto out_unexpect_orig;
+
+	/* Add GRE keymap entries */
+	if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0)
+		goto out_unexpect_both;
+	if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) {
+		ip_ct_gre_keymap_destroy(ct);
+		goto out_unexpect_both;
 	}
+	ret = 0;
 
 out_put_both:
 	ip_conntrack_expect_put(exp_reply);
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 0f5e753b481de..84f6bd09fcd4d 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -211,80 +211,28 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 	return NF_ACCEPT;
 }
 
-static int
+static void
 pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
 	     struct ip_conntrack_expect *expect_reply)
 {
-	struct ip_ct_pptp_master *ct_pptp_info =
-				&expect_orig->master->help.ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info =
-				&expect_orig->master->nat.help.nat_pptp_info;
-
 	struct ip_conntrack *ct = expect_orig->master;
-
-	struct ip_conntrack_tuple inv_t;
-	struct ip_conntrack_tuple *orig_t, *reply_t;
+	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
+	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
 
 	/* save original PAC call ID in nat_info */
 	nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
 
-	/* alter expectation */
-	orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-	reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
 	/* alter expectation for PNS->PAC direction */
-	invert_tuplepr(&inv_t, &expect_orig->tuple);
 	expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
 	expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
 	expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
 	expect_orig->dir = IP_CT_DIR_ORIGINAL;
-	inv_t.src.ip = reply_t->src.ip;
-	inv_t.dst.ip = reply_t->dst.ip;
-	inv_t.src.u.gre.key = nat_pptp_info->pac_call_id;
-	inv_t.dst.u.gre.key = ct_pptp_info->pns_call_id;
-
-	if (!ip_conntrack_expect_related(expect_orig)) {
-		DEBUGP("successfully registered expect\n");
-	} else {
-		DEBUGP("can't expect_related(expect_orig)\n");
-		return 1;
-	}
 
 	/* alter expectation for PAC->PNS direction */
-	invert_tuplepr(&inv_t, &expect_reply->tuple);
 	expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
 	expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
 	expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
 	expect_reply->dir = IP_CT_DIR_REPLY;
-	inv_t.src.ip = orig_t->src.ip;
-	inv_t.dst.ip = orig_t->dst.ip;
-	inv_t.src.u.gre.key = nat_pptp_info->pns_call_id;
-	inv_t.dst.u.gre.key = ct_pptp_info->pac_call_id;
-
-	if (!ip_conntrack_expect_related(expect_reply)) {
-		DEBUGP("successfully registered expect\n");
-	} else {
-		DEBUGP("can't expect_related(expect_reply)\n");
-		ip_conntrack_unexpect_related(expect_orig);
-		return 1;
-	}
-
-	if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
-		DEBUGP("can't register original keymap\n");
-		ip_conntrack_unexpect_related(expect_orig);
-		ip_conntrack_unexpect_related(expect_reply);
-		return 1;
-	}
-
-	if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
-		DEBUGP("can't register reply keymap\n");
-		ip_conntrack_unexpect_related(expect_orig);
-		ip_conntrack_unexpect_related(expect_reply);
-		ip_ct_gre_keymap_destroy(ct);
-		return 1;
-	}
-
-	return 0;
 }
 
 /* inbound packets == from PAC to PNS */
-- 
GitLab


From a1073406a124c1d3b33a0f06bfb8078a9ddd1985 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:09:51 -0700
Subject: [PATCH 0721/1063] [NETFILTER]: PPTP conntrack: consolidate header
 size checks

Also make sure not to pass undersized messages to the NAT helper.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 65 +++++++------------
 1 file changed, 22 insertions(+), 43 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 57eac6e3871a8..3b5464fa4217a 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -291,6 +291,22 @@ exp_gre(struct ip_conntrack *ct,
 	goto out_put_both;
 }
 
+static const unsigned int pptp_msg_size[] = {
+	[PPTP_START_SESSION_REQUEST]  = sizeof(struct PptpStartSessionRequest),
+	[PPTP_START_SESSION_REPLY]    = sizeof(struct PptpStartSessionReply),
+	[PPTP_STOP_SESSION_REQUEST]   = sizeof(struct PptpStopSessionRequest),
+	[PPTP_STOP_SESSION_REPLY]     = sizeof(struct PptpStopSessionReply),
+	[PPTP_OUT_CALL_REQUEST]       = sizeof(struct PptpOutCallRequest),
+	[PPTP_OUT_CALL_REPLY]	      = sizeof(struct PptpOutCallReply),
+	[PPTP_IN_CALL_REQUEST]	      = sizeof(struct PptpInCallRequest),
+	[PPTP_IN_CALL_REPLY]	      = sizeof(struct PptpInCallReply),
+	[PPTP_IN_CALL_CONNECT]	      = sizeof(struct PptpInCallConnected),
+	[PPTP_CALL_CLEAR_REQUEST]     = sizeof(struct PptpClearCallRequest),
+	[PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
+	[PPTP_WAN_ERROR_NOTIFY]	      = sizeof(struct PptpWanErrorNotify),
+	[PPTP_SET_LINK_INFO]	      = sizeof(struct PptpSetLinkInfo),
+};
+
 static inline int
 pptp_inbound_pkt(struct sk_buff **pskb,
 		 struct tcphdr *tcph,
@@ -326,13 +342,11 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	msg = ntohs(ctlh->messageType);
 	DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
 
+	if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
+		return NF_ACCEPT;
+
 	switch (msg) {
 	case PPTP_START_SESSION_REPLY:
-		if (reqlen < sizeof(_pptpReq.srep)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server confirms new control session */
 		if (info->sstate < PPTP_SESSION_REQUESTED) {
 			DEBUGP("%s without START_SESS_REQUEST\n",
@@ -346,11 +360,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		break;
 
 	case PPTP_STOP_SESSION_REPLY:
-		if (reqlen < sizeof(_pptpReq.strep)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server confirms end of control session */
 		if (info->sstate > PPTP_SESSION_STOPREQ) {
 			DEBUGP("%s without STOP_SESS_REQUEST\n",
@@ -364,11 +373,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		break;
 
 	case PPTP_OUT_CALL_REPLY:
-		if (reqlen < sizeof(_pptpReq.ocack)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server accepted call, we now expect GRE frames */
 		if (info->sstate != PPTP_SESSION_CONFIRMED) {
 			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
@@ -404,11 +408,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		break;
 
 	case PPTP_IN_CALL_REQUEST:
-		if (reqlen < sizeof(_pptpReq.icack)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server tells us about incoming call request */
 		if (info->sstate != PPTP_SESSION_CONFIRMED) {
 			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
@@ -421,11 +420,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		break;
 
 	case PPTP_IN_CALL_CONNECT:
-		if (reqlen < sizeof(_pptpReq.iccon)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server tells us about incoming call established */
 		if (info->sstate != PPTP_SESSION_CONFIRMED) {
 			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
@@ -455,11 +449,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		break;
 
 	case PPTP_CALL_DISCONNECT_NOTIFY:
-		if (reqlen < sizeof(_pptpReq.disc)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server confirms disconnect */
 		cid = pptpReq->disc.callID;
 		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
@@ -470,8 +459,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		break;
 
 	case PPTP_WAN_ERROR_NOTIFY:
-		break;
-
 	case PPTP_ECHO_REQUEST:
 	case PPTP_ECHO_REPLY:
 		/* I don't have to explain these ;) */
@@ -522,6 +509,9 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 	msg = ntohs(ctlh->messageType);
 	DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
 
+	if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
+		return NF_ACCEPT;
+
 	switch (msg) {
 	case PPTP_START_SESSION_REQUEST:
 		/* client requests for new control session */
@@ -537,11 +527,6 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		break;
 
 	case PPTP_OUT_CALL_REQUEST:
-		if (reqlen < sizeof(_pptpReq.ocreq)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* client initiating connection to server */
 		if (info->sstate != PPTP_SESSION_CONFIRMED) {
 			DEBUGP("%s but no session\n",
@@ -555,11 +540,6 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		info->pns_call_id = cid;
 		break;
 	case PPTP_IN_CALL_REPLY:
-		if (reqlen < sizeof(_pptpReq.icack)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* client answers incoming call */
 		if (info->cstate != PPTP_CALL_IN_REQ
 		    && info->cstate != PPTP_CALL_IN_REP) {
@@ -595,7 +575,6 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		info->cstate = PPTP_CALL_CLEAR_REQ;
 		break;
 	case PPTP_SET_LINK_INFO:
-		break;
 	case PPTP_ECHO_REQUEST:
 	case PPTP_ECHO_REPLY:
 		/* I don't have to explain these ;) */
-- 
GitLab


From 4c651756d502e72a68b0bc6fb20bb18c68785227 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:10:06 -0700
Subject: [PATCH 0722/1063] [NETFILTER]: PPTP conntrack: consolidate header
 parsing

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 119 +++++++-----------
 1 file changed, 47 insertions(+), 72 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 3b5464fa4217a..9a98a6ce1901a 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -291,60 +291,21 @@ exp_gre(struct ip_conntrack *ct,
 	goto out_put_both;
 }
 
-static const unsigned int pptp_msg_size[] = {
-	[PPTP_START_SESSION_REQUEST]  = sizeof(struct PptpStartSessionRequest),
-	[PPTP_START_SESSION_REPLY]    = sizeof(struct PptpStartSessionReply),
-	[PPTP_STOP_SESSION_REQUEST]   = sizeof(struct PptpStopSessionRequest),
-	[PPTP_STOP_SESSION_REPLY]     = sizeof(struct PptpStopSessionReply),
-	[PPTP_OUT_CALL_REQUEST]       = sizeof(struct PptpOutCallRequest),
-	[PPTP_OUT_CALL_REPLY]	      = sizeof(struct PptpOutCallReply),
-	[PPTP_IN_CALL_REQUEST]	      = sizeof(struct PptpInCallRequest),
-	[PPTP_IN_CALL_REPLY]	      = sizeof(struct PptpInCallReply),
-	[PPTP_IN_CALL_CONNECT]	      = sizeof(struct PptpInCallConnected),
-	[PPTP_CALL_CLEAR_REQUEST]     = sizeof(struct PptpClearCallRequest),
-	[PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
-	[PPTP_WAN_ERROR_NOTIFY]	      = sizeof(struct PptpWanErrorNotify),
-	[PPTP_SET_LINK_INFO]	      = sizeof(struct PptpSetLinkInfo),
-};
-
 static inline int
 pptp_inbound_pkt(struct sk_buff **pskb,
-		 struct tcphdr *tcph,
-		 unsigned int nexthdr_off,
-		 unsigned int datalen,
+		 struct PptpControlHeader *ctlh,
+		 union pptp_ctrl_union *pptpReq,
+		 unsigned int reqlen,
 		 struct ip_conntrack *ct,
 		 enum ip_conntrack_info ctinfo)
 {
-	struct PptpControlHeader _ctlh, *ctlh;
-	unsigned int reqlen;
-	union pptp_ctrl_union _pptpReq, *pptpReq;
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
 	u_int16_t msg;
 	__be16 cid, pcid;
 
-	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
-	if (!ctlh) {
-		DEBUGP("error during skb_header_pointer\n");
-		return NF_ACCEPT;
-	}
-	nexthdr_off += sizeof(_ctlh);
-	datalen -= sizeof(_ctlh);
-
-	reqlen = datalen;
-	if (reqlen > sizeof(*pptpReq))
-		reqlen = sizeof(*pptpReq);
-	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
-	if (!pptpReq) {
-		DEBUGP("error during skb_header_pointer\n");
-		return NF_ACCEPT;
-	}
-
 	msg = ntohs(ctlh->messageType);
 	DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
 
-	if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
-		return NF_ACCEPT;
-
 	switch (msg) {
 	case PPTP_START_SESSION_REPLY:
 		/* server confirms new control session */
@@ -480,38 +441,19 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 
 static inline int
 pptp_outbound_pkt(struct sk_buff **pskb,
-		  struct tcphdr *tcph,
-		  unsigned int nexthdr_off,
-		  unsigned int datalen,
+		  struct PptpControlHeader *ctlh,
+		  union pptp_ctrl_union *pptpReq,
+		  unsigned int reqlen,
 		  struct ip_conntrack *ct,
 		  enum ip_conntrack_info ctinfo)
 {
-	struct PptpControlHeader _ctlh, *ctlh;
-	unsigned int reqlen;
-	union pptp_ctrl_union _pptpReq, *pptpReq;
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
 	u_int16_t msg;
 	__be16 cid, pcid;
 
-	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
-	if (!ctlh)
-		return NF_ACCEPT;
-	nexthdr_off += sizeof(_ctlh);
-	datalen -= sizeof(_ctlh);
-
-	reqlen = datalen;
-	if (reqlen > sizeof(*pptpReq))
-		reqlen = sizeof(*pptpReq);
-	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
-	if (!pptpReq)
-		return NF_ACCEPT;
-
 	msg = ntohs(ctlh->messageType);
 	DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
 
-	if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
-		return NF_ACCEPT;
-
 	switch (msg) {
 	case PPTP_START_SESSION_REQUEST:
 		/* client requests for new control session */
@@ -593,6 +535,21 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 	return NF_ACCEPT;
 }
 
+static const unsigned int pptp_msg_size[] = {
+	[PPTP_START_SESSION_REQUEST]  = sizeof(struct PptpStartSessionRequest),
+	[PPTP_START_SESSION_REPLY]    = sizeof(struct PptpStartSessionReply),
+	[PPTP_STOP_SESSION_REQUEST]   = sizeof(struct PptpStopSessionRequest),
+	[PPTP_STOP_SESSION_REPLY]     = sizeof(struct PptpStopSessionReply),
+	[PPTP_OUT_CALL_REQUEST]       = sizeof(struct PptpOutCallRequest),
+	[PPTP_OUT_CALL_REPLY]	      = sizeof(struct PptpOutCallReply),
+	[PPTP_IN_CALL_REQUEST]	      = sizeof(struct PptpInCallRequest),
+	[PPTP_IN_CALL_REPLY]	      = sizeof(struct PptpInCallReply),
+	[PPTP_IN_CALL_CONNECT]	      = sizeof(struct PptpInCallConnected),
+	[PPTP_CALL_CLEAR_REQUEST]     = sizeof(struct PptpClearCallRequest),
+	[PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
+	[PPTP_WAN_ERROR_NOTIFY]	      = sizeof(struct PptpWanErrorNotify),
+	[PPTP_SET_LINK_INFO]	      = sizeof(struct PptpSetLinkInfo),
+};
 
 /* track caller id inside control connection, call expect_related */
 static int
@@ -600,16 +557,17 @@ conntrack_pptp_help(struct sk_buff **pskb,
 		    struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
 
 {
-	struct pptp_pkt_hdr _pptph, *pptph;
-	struct tcphdr _tcph, *tcph;
-	u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
-	u_int32_t datalen;
 	int dir = CTINFO2DIR(ctinfo);
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	unsigned int nexthdr_off;
-
+	struct tcphdr _tcph, *tcph;
+	struct pptp_pkt_hdr _pptph, *pptph;
+	struct PptpControlHeader _ctlh, *ctlh;
+	union pptp_ctrl_union _pptpReq, *pptpReq;
+	unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
+	unsigned int datalen, reqlen, nexthdr_off;
 	int oldsstate, oldcstate;
 	int ret;
+	u_int16_t msg;
 
 	/* don't do any tracking before tcp handshake complete */
 	if (ctinfo != IP_CT_ESTABLISHED
@@ -648,6 +606,23 @@ conntrack_pptp_help(struct sk_buff **pskb,
 		return NF_ACCEPT;
 	}
 
+	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+	if (!ctlh)
+		return NF_ACCEPT;
+	nexthdr_off += sizeof(_ctlh);
+	datalen -= sizeof(_ctlh);
+
+	reqlen = datalen;
+	msg = ntohs(ctlh->messageType);
+	if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
+		return NF_ACCEPT;
+	if (reqlen > sizeof(*pptpReq))
+		reqlen = sizeof(*pptpReq);
+
+	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+	if (!pptpReq)
+		return NF_ACCEPT;
+
 	oldsstate = info->sstate;
 	oldcstate = info->cstate;
 
@@ -657,11 +632,11 @@ conntrack_pptp_help(struct sk_buff **pskb,
 	 * established from PNS->PAC.  However, RFC makes no guarantee */
 	if (dir == IP_CT_DIR_ORIGINAL)
 		/* client -> server (PNS -> PAC) */
-		ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+		ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
 					ctinfo);
 	else
 		/* server -> client (PAC -> PNS) */
-		ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+		ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
 				       ctinfo);
 	DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
 		oldsstate, info->sstate, oldcstate, info->cstate);
-- 
GitLab


From 87a0117afdfe64473a6c802501bc15aee145ebb8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:10:21 -0700
Subject: [PATCH 0723/1063] [NETFILTER]: PPTP conntrack: clean up debugging
 cruft

Also make sure not to hand packets received in an invalid state to the
NAT helper since it will mangle the packet with invalid data.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 128 +++++++-----------
 1 file changed, 51 insertions(+), 77 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 9a98a6ce1901a..7b6d5aaca4da0 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -301,7 +301,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 {
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
 	u_int16_t msg;
-	__be16 cid, pcid;
+	__be16 cid = 0, pcid = 0;
 
 	msg = ntohs(ctlh->messageType);
 	DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
@@ -309,11 +309,8 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	switch (msg) {
 	case PPTP_START_SESSION_REPLY:
 		/* server confirms new control session */
-		if (info->sstate < PPTP_SESSION_REQUESTED) {
-			DEBUGP("%s without START_SESS_REQUEST\n",
-				pptp_msg_name[msg]);
-			break;
-		}
+		if (info->sstate < PPTP_SESSION_REQUESTED)
+			goto invalid;
 		if (pptpReq->srep.resultCode == PPTP_START_OK)
 			info->sstate = PPTP_SESSION_CONFIRMED;
 		else
@@ -322,11 +319,8 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 
 	case PPTP_STOP_SESSION_REPLY:
 		/* server confirms end of control session */
-		if (info->sstate > PPTP_SESSION_STOPREQ) {
-			DEBUGP("%s without STOP_SESS_REQUEST\n",
-				pptp_msg_name[msg]);
-			break;
-		}
+		if (info->sstate > PPTP_SESSION_STOPREQ)
+			goto invalid;
 		if (pptpReq->strep.resultCode == PPTP_STOP_OK)
 			info->sstate = PPTP_SESSION_NONE;
 		else
@@ -335,15 +329,12 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 
 	case PPTP_OUT_CALL_REPLY:
 		/* server accepted call, we now expect GRE frames */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
-			break;
-		}
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
 		if (info->cstate != PPTP_CALL_OUT_REQ &&
-		    info->cstate != PPTP_CALL_OUT_CONF) {
-			DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]);
-			break;
-		}
+		    info->cstate != PPTP_CALL_OUT_CONF)
+			goto invalid;
+
 		if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
 			info->cstate = PPTP_CALL_NONE;
 			break;
@@ -354,11 +345,8 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 
 		info->pac_call_id = cid;
 
-		if (info->pns_call_id != pcid) {
-			DEBUGP("%s for unknown callid %u\n",
-				pptp_msg_name[msg], ntohs(pcid));
-			break;
-		}
+		if (info->pns_call_id != pcid)
+			goto invalid;
 
 		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
 			ntohs(cid), ntohs(pcid));
@@ -370,10 +358,9 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 
 	case PPTP_IN_CALL_REQUEST:
 		/* server tells us about incoming call request */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
-			break;
-		}
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
+
 		pcid = pptpReq->icack.peersCallID;
 		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
 		info->cstate = PPTP_CALL_IN_REQ;
@@ -382,25 +369,17 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 
 	case PPTP_IN_CALL_CONNECT:
 		/* server tells us about incoming call established */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
-			break;
-		}
-		if (info->cstate != PPTP_CALL_IN_REP
-		    && info->cstate != PPTP_CALL_IN_CONF) {
-			DEBUGP("%s but never sent IN_CALL_REPLY\n",
-				pptp_msg_name[msg]);
-			break;
-		}
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
+		if (info->cstate != PPTP_CALL_IN_REP &&
+		    info->cstate != PPTP_CALL_IN_CONF)
+			goto invalid;
 
 		pcid = pptpReq->iccon.peersCallID;
 		cid = info->pac_call_id;
 
-		if (info->pns_call_id != pcid) {
-			DEBUGP("%s for unknown CallID %u\n",
-				pptp_msg_name[msg], ntohs(pcid));
-			break;
-		}
+		if (info->pns_call_id != pcid)
+			goto invalid;
 
 		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
 		info->cstate = PPTP_CALL_IN_CONF;
@@ -425,18 +404,21 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		/* I don't have to explain these ;) */
 		break;
 	default:
-		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
-			? pptp_msg_name[msg]:pptp_msg_name[0], msg);
-		break;
+		goto invalid;
 	}
 
-
 	if (ip_nat_pptp_hook_inbound)
 		return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
 						pptpReq);
-
 	return NF_ACCEPT;
 
+invalid:
+	DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
+	       "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+	       msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+	       msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
+	       ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+	return NF_ACCEPT;
 }
 
 static inline int
@@ -449,7 +431,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 {
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
 	u_int16_t msg;
-	__be16 cid, pcid;
+	__be16 cid = 0, pcid = 0;
 
 	msg = ntohs(ctlh->messageType);
 	DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
@@ -457,10 +439,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 	switch (msg) {
 	case PPTP_START_SESSION_REQUEST:
 		/* client requests for new control session */
-		if (info->sstate != PPTP_SESSION_NONE) {
-			DEBUGP("%s but we already have one",
-				pptp_msg_name[msg]);
-		}
+		if (info->sstate != PPTP_SESSION_NONE)
+			goto invalid;
 		info->sstate = PPTP_SESSION_REQUESTED;
 		break;
 	case PPTP_STOP_SESSION_REQUEST:
@@ -470,11 +450,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 
 	case PPTP_OUT_CALL_REQUEST:
 		/* client initiating connection to server */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n",
-				pptp_msg_name[msg]);
-			break;
-		}
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
 		info->cstate = PPTP_CALL_OUT_REQ;
 		/* track PNS call id */
 		cid = pptpReq->ocreq.callID;
@@ -483,22 +460,17 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		break;
 	case PPTP_IN_CALL_REPLY:
 		/* client answers incoming call */
-		if (info->cstate != PPTP_CALL_IN_REQ
-		    && info->cstate != PPTP_CALL_IN_REP) {
-			DEBUGP("%s without incall_req\n",
-				pptp_msg_name[msg]);
-			break;
-		}
+		if (info->cstate != PPTP_CALL_IN_REQ &&
+		    info->cstate != PPTP_CALL_IN_REP)
+			goto invalid;
+
 		if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
 			info->cstate = PPTP_CALL_NONE;
 			break;
 		}
 		pcid = pptpReq->icack.peersCallID;
-		if (info->pac_call_id != pcid) {
-			DEBUGP("%s for unknown call %u\n",
-				pptp_msg_name[msg], ntohs(pcid));
-			break;
-		}
+		if (info->pac_call_id != pcid)
+			goto invalid;
 		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(pcid));
 		/* part two of the three-way handshake */
 		info->cstate = PPTP_CALL_IN_REP;
@@ -507,10 +479,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 
 	case PPTP_CALL_CLEAR_REQUEST:
 		/* client requests hangup of call */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("CLEAR_CALL but no session\n");
-			break;
-		}
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
 		/* FUTURE: iterate over all calls and check if
 		 * call ID is valid.  We don't do this without newnat,
 		 * because we only know about last call */
@@ -522,16 +492,20 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		/* I don't have to explain these ;) */
 		break;
 	default:
-		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)?
-			pptp_msg_name[msg]:pptp_msg_name[0], msg);
-		/* unknown: no need to create GRE masq table entry */
-		break;
+		goto invalid;
 	}
 
 	if (ip_nat_pptp_hook_outbound)
 		return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
 						 pptpReq);
+	return NF_ACCEPT;
 
+invalid:
+	DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
+	       "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+	       msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+	       msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
+	       ntohs(info->pns_call_id), ntohs(info->pac_call_id));
 	return NF_ACCEPT;
 }
 
-- 
GitLab


From 750a58423309b56751076329e9edf61b93213e0f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:10:37 -0700
Subject: [PATCH 0724/1063] [NETFILTER]: PPTP conntrack: check call ID before
 changing state

For rejected calls the state is set to PPTP_CALL_NONE even for non-matching
call ids.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 32 ++++++++-----------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 7b6d5aaca4da0..5cb6b61cd171a 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -335,25 +335,19 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		    info->cstate != PPTP_CALL_OUT_CONF)
 			goto invalid;
 
-		if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
-			info->cstate = PPTP_CALL_NONE;
-			break;
-		}
-
 		cid = pptpReq->ocack.callID;
 		pcid = pptpReq->ocack.peersCallID;
-
-		info->pac_call_id = cid;
-
 		if (info->pns_call_id != pcid)
 			goto invalid;
-
 		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
 			ntohs(cid), ntohs(pcid));
 
-		info->cstate = PPTP_CALL_OUT_CONF;
-
-		exp_gre(ct, cid, pcid);
+		if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
+			info->cstate = PPTP_CALL_OUT_CONF;
+			info->pac_call_id = cid;
+			exp_gre(ct, cid, pcid);
+		} else
+			info->cstate = PPTP_CALL_NONE;
 		break;
 
 	case PPTP_IN_CALL_REQUEST:
@@ -464,17 +458,17 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		    info->cstate != PPTP_CALL_IN_REP)
 			goto invalid;
 
-		if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
-			info->cstate = PPTP_CALL_NONE;
-			break;
-		}
 		pcid = pptpReq->icack.peersCallID;
 		if (info->pac_call_id != pcid)
 			goto invalid;
 		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(pcid));
-		/* part two of the three-way handshake */
-		info->cstate = PPTP_CALL_IN_REP;
-		info->pns_call_id = pcid;
+
+		if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
+			/* part two of the three-way handshake */
+			info->cstate = PPTP_CALL_IN_REP;
+			info->pns_call_id = pcid;
+		} else
+			info->cstate = PPTP_CALL_NONE;
 		break;
 
 	case PPTP_CALL_CLEAR_REQUEST:
-- 
GitLab


From 62fbe9c82b20197a4f9c54f7add5d368418ba277 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:10:52 -0700
Subject: [PATCH 0725/1063] [NETFILTER]: PPTP conntrack: fix PPTP_IN_CALL
 message types

Fix incorrectly used message types and call IDs:

- PPTP_IN_CALL_REQUEST (PAC->PNS) contains a PptpInCallRequest (icreq)
  message and the PAC call ID

- PPTP_IN_CALL_REPLY (PNS->PAC) contains a PptpInCallReply (icack)
  message and the PNS call ID

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 12 +++++++-----
 net/ipv4/netfilter/ip_nat_helper_pptp.c       |  2 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 5cb6b61cd171a..b0225b65ca351 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -355,10 +355,10 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 		if (info->sstate != PPTP_SESSION_CONFIRMED)
 			goto invalid;
 
-		pcid = pptpReq->icack.peersCallID;
-		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
+		cid = pptpReq->icreq.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
 		info->cstate = PPTP_CALL_IN_REQ;
-		info->pac_call_id = pcid;
+		info->pac_call_id = cid;
 		break;
 
 	case PPTP_IN_CALL_CONNECT:
@@ -458,15 +458,17 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		    info->cstate != PPTP_CALL_IN_REP)
 			goto invalid;
 
+		cid = pptpReq->icack.callID;
 		pcid = pptpReq->icack.peersCallID;
 		if (info->pac_call_id != pcid)
 			goto invalid;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(pcid));
+		DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
+		       ntohs(cid), ntohs(pcid));
 
 		if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
 			/* part two of the three-way handshake */
 			info->cstate = PPTP_CALL_IN_REP;
-			info->pns_call_id = pcid;
+			info->pns_call_id = cid;
 		} else
 			info->cstate = PPTP_CALL_NONE;
 		break;
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 84f6bd09fcd4d..2ff5788071237 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -172,7 +172,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		ct_pptp_info->pns_call_id = new_callid;
 		break;
 	case PPTP_IN_CALL_REPLY:
-		cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
+		cid_off = offsetof(union pptp_ctrl_union, icack.callID);
 		break;
 	case PPTP_CALL_CLEAR_REQUEST:
 		cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
-- 
GitLab


From fd5e3befa405ea64d4db6b393b821644bf963c57 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:11:12 -0700
Subject: [PATCH 0726/1063] [NETFILTER]: PPTP conntrack: fix GRE keymap leak

When destroying the GRE expectations without having seen the GRE connection
the keymap entry is not freed, leading to a memory leak and, in case of
a following call within the same session, failure during expectation setup.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index b0225b65ca351..98267b0d2a476 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -194,6 +194,7 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
 {
 	struct ip_conntrack_tuple t;
 
+	ip_ct_gre_keymap_destroy(ct);
 	/* Since ct->sibling_list has literally rusted away in 2.6.11,
 	 * we now need another way to find out about our sibling
 	 * contrack and expects... -HW */
-- 
GitLab


From 4c5de695cf7f71c85ad8cfff509f6475b8bd4d27 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 20 Sep 2006 12:11:30 -0700
Subject: [PATCH 0727/1063] [NETFILTER]: PPTP conntrack: fix another GRE keymap
 leak

When the master PPTP connection times out while still having unfullfilled
expectations (and a GRE keymap entry) associated with it, the keymap entry
is not destroyed.

Add a destroy callback to struct ip_conntrack_helper and use it to destroy
PPTP siblings when the master is destroyed.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack_helper.h |  2 ++
 net/ipv4/netfilter/ip_conntrack_core.c             |  5 +++++
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c      | 12 ++----------
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h
index 8d69279ccfe46..77fe868d36ff9 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h
@@ -25,6 +25,8 @@ struct ip_conntrack_helper
 		    struct ip_conntrack *ct,
 		    enum ip_conntrack_info conntrackinfo);
 
+	void (*destroy)(struct ip_conntrack *ct);
+
 	int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct);
 };
 
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 2b6f24fc727e0..c432b31636091 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -307,6 +307,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 {
 	struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
 	struct ip_conntrack_protocol *proto;
+	struct ip_conntrack_helper *helper;
 
 	DEBUGP("destroy_conntrack(%p)\n", ct);
 	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
@@ -315,6 +316,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	ip_conntrack_event(IPCT_DESTROY, ct);
 	set_bit(IPS_DYING_BIT, &ct->status);
 
+	helper = ct->helper;
+	if (helper && helper->destroy)
+		helper->destroy(ct);
+
 	/* To make sure we don't get any weird locking issues here:
 	 * destroy_conntrack() MUST NOT be called with a write lock
 	 * to ip_conntrack_lock!!! -HW */
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 98267b0d2a476..fb0aee6917214 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -553,15 +553,6 @@ conntrack_pptp_help(struct sk_buff **pskb,
 	nexthdr_off += tcph->doff * 4;
  	datalen = tcplen - tcph->doff * 4;
 
-	if (tcph->fin || tcph->rst) {
-		DEBUGP("RST/FIN received, timeouting GRE\n");
-		/* can't do this after real newnat */
-		info->cstate = PPTP_CALL_NONE;
-
-		/* untrack this call id, unexpect GRE packets */
-		pptp_destroy_siblings(ct);
-	}
-
 	pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
 	if (!pptph) {
 		DEBUGP("no full PPTP header, can't track\n");
@@ -640,7 +631,8 @@ static struct ip_conntrack_helper pptp = {
 			   .protonum = 0xff
 		 	 }
 		},
-	.help = conntrack_pptp_help
+	.help = conntrack_pptp_help,
+	.destroy = pptp_destroy_siblings,
 };
 
 extern void ip_ct_proto_gre_fini(void);
-- 
GitLab


From e21e0b5f19ac7835a244c2016f7ed726f971b3e9 Mon Sep 17 00:00:00 2001
From: Ville Nuorvala <vnuorval@tcs.hut.fi>
Date: Fri, 22 Sep 2006 14:41:44 -0700
Subject: [PATCH 0728/1063] [IPV6] NDISC: Handle NDP messages to proxied
 addresses.

It is required to respond to NDP messages sent directly to the "target"
unicast address.  Proxying node (router) is required to handle such
messages.  To achieve this, check if the packet in forwarding patch is
NDP message.

With this patch, the proxy neighbor entries are always looked up in
forwarding path.  We may want to optimize further.

Based on MIPL2 kernel patch.

Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/ip6_output.c | 45 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c14ea1ecf3792..0f56e9e69a8f9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -308,6 +308,46 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 	return 0;
 }
 
+static int ip6_forward_proxy_check(struct sk_buff *skb)
+{
+	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	u8 nexthdr = hdr->nexthdr;
+	int offset;
+
+	if (ipv6_ext_hdr(nexthdr)) {
+		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
+		if (offset < 0)
+			return 0;
+	} else
+		offset = sizeof(struct ipv6hdr);
+
+	if (nexthdr == IPPROTO_ICMPV6) {
+		struct icmp6hdr *icmp6;
+
+		if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data))
+			return 0;
+
+		icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset);
+
+		switch (icmp6->icmp6_type) {
+		case NDISC_ROUTER_SOLICITATION:
+		case NDISC_ROUTER_ADVERTISEMENT:
+		case NDISC_NEIGHBOUR_SOLICITATION:
+		case NDISC_NEIGHBOUR_ADVERTISEMENT:
+		case NDISC_REDIRECT:
+			/* For reaction involving unicast neighbor discovery
+			 * message destined to the proxied address, pass it to
+			 * input function.
+			 */
+			return 1;
+		default:
+			break;
+		}
+	}
+
+	return 0;
+}
+
 static inline int ip6_forward_finish(struct sk_buff *skb)
 {
 	return dst_output(skb);
@@ -362,6 +402,11 @@ int ip6_forward(struct sk_buff *skb)
 		return -ETIMEDOUT;
 	}
 
+	if (pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
+		if (ip6_forward_proxy_check(skb))
+			return ip6_input(skb);
+	}
+
 	if (!xfrm6_route_forward(skb)) {
 		IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
 		goto drop;
-- 
GitLab


From 74553b09dcd9194cbda737016f0b89f245145670 Mon Sep 17 00:00:00 2001
From: Ville Nuorvala <vnuorval@tcs.hut.fi>
Date: Fri, 22 Sep 2006 14:42:18 -0700
Subject: [PATCH 0729/1063] [IPV6]: Don't forward packets to proxied link-local
 address.

Proxying router can't forward traffic sent to link-local address, so signal
the sender and discard the packet. This behavior is clarified by Mobile IPv6
specification (RFC3775) but might be required for all proxying router.
Based on MIPL2 kernel patch.

Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/ip6_output.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0f56e9e69a8f9..b2be749d22172 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -345,6 +345,16 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 		}
 	}
 
+	/*
+	 * The proxying router can't forward traffic sent to a link-local
+	 * address, so signal the sender and discard the packet. This
+	 * behavior is clarified by the MIPv6 specification.
+	 */
+	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
+		dst_link_failure(skb);
+		return -1;
+	}
+
 	return 0;
 }
 
@@ -403,8 +413,13 @@ int ip6_forward(struct sk_buff *skb)
 	}
 
 	if (pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
-		if (ip6_forward_proxy_check(skb))
+		int proxied = ip6_forward_proxy_check(skb);
+		if (proxied > 0)
 			return ip6_input(skb);
+		else if (proxied < 0) {
+			IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+			goto drop;
+		}
 	}
 
 	if (!xfrm6_route_forward(skb)) {
-- 
GitLab


From 5f3e6e9e19f50a6910aec2dbd479187aabba04b7 Mon Sep 17 00:00:00 2001
From: Ville Nuorvala <vnuorval@tcs.hut.fi>
Date: Fri, 22 Sep 2006 14:42:46 -0700
Subject: [PATCH 0730/1063] [IPV6] NDISC: Avoid updating neighbor cache for
 proxied address in receiving NA.

This aims at proxying router not updating neighbor cache entry for proxied
address when it receives NA because either the proxied node is off link or
it has already sent a NA to the proxied router.

Based on MIPL2 kernel patch.

Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/ndisc.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ed01f9a330d6d..0e0d6ce690213 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -952,6 +952,15 @@ static void ndisc_recv_na(struct sk_buff *skb)
 		if (neigh->nud_state & NUD_FAILED)
 			goto out;
 
+		/*
+		 * Don't update the neighbor cache entry on a proxy NA from
+		 * ourselves because either the proxied node is off link or it
+		 * has already sent a NA to us.
+		 */
+		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
+		    pneigh_lookup(&nd_tbl, &msg->target, dev, 0))
+			goto out;
+
 		neigh_update(neigh, lladdr,
 			     msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
-- 
GitLab


From 62dd93181aaa1d5a501a9cebcb254f44b8a48af7 Mon Sep 17 00:00:00 2001
From: Ville Nuorvala <vnuorval@tcs.hut.fi>
Date: Fri, 22 Sep 2006 14:43:19 -0700
Subject: [PATCH 0731/1063] [IPV6] NDISC: Set per-entry is_router flag in Proxy
 NA.

We have sent NA with router flag from the node-wide forwarding
configuration.  This is not appropriate for proxy NA, and it should be
set according to each proxy entry's configuration.

This is used by Mobile IPv6 home agent to support physical home link
in acting as a proxy router for mobile node which is not a router,
for example.

Based on MIPL2 kernel patch.

Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/neighbour.h |  1 +
 net/core/neighbour.c    | 11 ++++++++---
 net/ipv6/ndisc.c        | 14 +++++++++++---
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index bd187daffdb9f..c8aacbd2e3331 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -126,6 +126,7 @@ struct pneigh_entry
 {
 	struct pneigh_entry	*next;
 	struct net_device		*dev;
+	u8			flags;
 	u8			key[0];
 };
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a45bd2124d6b2..b6c69e1463e87 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1544,9 +1544,14 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
 
 		if (ndm->ndm_flags & NTF_PROXY) {
-			err = 0;
-			if (pneigh_lookup(tbl, dst, dev, 1) == NULL)
-				err = -ENOBUFS;
+			struct pneigh_entry *pn;
+
+			err = -ENOBUFS;
+			pn = pneigh_lookup(tbl, dst, dev, 1);
+			if (pn) {
+				pn->flags = ndm->ndm_flags;
+				err = 0;
+			}
 			goto out_dev_put;
 		}
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0e0d6ce690213..ddf038636f01d 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -736,8 +736,10 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev = NULL;
 	struct neighbour *neigh;
+	struct pneigh_entry *pneigh = NULL;
 	int dad = ipv6_addr_any(saddr);
 	int inc;
+	int is_router;
 
 	if (ipv6_addr_is_multicast(&msg->target)) {
 		ND_PRINTK2(KERN_WARNING 
@@ -822,7 +824,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 
 		if (ipv6_chk_acast_addr(dev, &msg->target) ||
 		    (idev->cnf.forwarding && 
-		     pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) {
+		     (pneigh = pneigh_lookup(&nd_tbl,
+					     &msg->target, dev, 0)) != NULL)) {
 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
 			    skb->pkt_type != PACKET_HOST &&
 			    inc != 0 &&
@@ -843,12 +846,17 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 			goto out;
 	}
 
+	if (pneigh)
+		is_router = pneigh->flags & NTF_ROUTER;
+	else
+		is_router = idev->cnf.forwarding;
+
 	if (dad) {
 		struct in6_addr maddr;
 
 		ipv6_addr_all_nodes(&maddr);
 		ndisc_send_na(dev, NULL, &maddr, &msg->target,
-			      idev->cnf.forwarding, 0, (ifp != NULL), 1);
+			      is_router, 0, (ifp != NULL), 1);
 		goto out;
 	}
 
@@ -869,7 +877,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 			     NEIGH_UPDATE_F_OVERRIDE);
 	if (neigh || !dev->hard_header) {
 		ndisc_send_na(dev, neigh, saddr, &msg->target,
-			      idev->cnf.forwarding, 
+			      is_router,
 			      1, (ifp != NULL && inc), inc);
 		if (neigh)
 			neigh_release(neigh);
-- 
GitLab


From fbea49e1e2404baa2d88ab47e2db89e49551b53b Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 22 Sep 2006 14:43:49 -0700
Subject: [PATCH 0732/1063] [IPV6] NDISC: Add proxy_ndp sysctl.

We do not always need proxy NDP functionality even we
enable forwarding.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  3 +++
 include/linux/ipv6.h                   |  2 ++
 include/linux/sysctl.h                 |  1 +
 net/ipv6/addrconf.c                    | 11 +++++++++++
 net/ipv6/ip6_output.c                  |  4 +++-
 net/ipv6/ndisc.c                       |  8 +++++++-
 6 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 307cd4ec8edd8..935e298f674ad 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -765,6 +765,9 @@ conf/all/forwarding - BOOLEAN
 
 	This referred to as global forwarding.
 
+proxy_ndp - BOOLEAN
+	Do proxy ndp.
+
 conf/interface/*:
 	Change special settings per interface.
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 1d6d3ccc9413c..caca57df0d7d5 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -176,6 +176,7 @@ struct ipv6_devconf {
 	__s32		accept_ra_rt_info_max_plen;
 #endif
 #endif
+	__s32		proxy_ndp;
 	void		*sysctl;
 };
 
@@ -203,6 +204,7 @@ enum {
 	DEVCONF_ACCEPT_RA_RTR_PREF,
 	DEVCONF_RTR_PROBE_INTERVAL,
 	DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN,
+	DEVCONF_PROXY_NDP,
 	DEVCONF_MAX
 };
 
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index af61d92354090..736ed917a4f8d 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -556,6 +556,7 @@ enum {
 	NET_IPV6_ACCEPT_RA_RTR_PREF=20,
 	NET_IPV6_RTR_PROBE_INTERVAL=21,
 	NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22,
+	NET_IPV6_PROXY_NDP=23,
 	__NET_IPV6_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1e5a296d0a82d..825a291d5aa58 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -175,6 +175,7 @@ struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+	.proxy_ndp		= 0,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -205,6 +206,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+	.proxy_ndp		= 0,
 };
 
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
@@ -3337,6 +3339,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
 #endif
 #endif
+	array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
 }
 
 /* Maximum length of ifinfomsg attributes */
@@ -3859,6 +3862,14 @@ static struct addrconf_sysctl_table
 		},
 #endif
 #endif
+		{
+			.ctl_name	=	NET_IPV6_PROXY_NDP,
+			.procname	=	"proxy_ndp",
+			.data		=	&ipv6_devconf.proxy_ndp,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+			.proc_handler	=	&proc_dointvec,
+		},
 		{
 			.ctl_name	=	0,	/* sentinel */
 		}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b2be749d22172..66716911962eb 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -412,7 +412,9 @@ int ip6_forward(struct sk_buff *skb)
 		return -ETIMEDOUT;
 	}
 
-	if (pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
+	/* XXX: idev->cnf.proxy_ndp? */
+	if (ipv6_devconf.proxy_ndp &&
+	    pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
 		int proxied = ip6_forward_proxy_check(skb);
 		if (proxied > 0)
 			return ip6_input(skb);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ddf038636f01d..76517a5f65767 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -824,6 +824,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 
 		if (ipv6_chk_acast_addr(dev, &msg->target) ||
 		    (idev->cnf.forwarding && 
+		     (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
 		     (pneigh = pneigh_lookup(&nd_tbl,
 					     &msg->target, dev, 0)) != NULL)) {
 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
@@ -966,8 +967,13 @@ static void ndisc_recv_na(struct sk_buff *skb)
 		 * has already sent a NA to us.
 		 */
 		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
-		    pneigh_lookup(&nd_tbl, &msg->target, dev, 0))
+		    ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
+		    pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
+			/* XXX: idev->cnf.prixy_ndp */
+			WARN_ON(skb->dst != NULL &&
+				((struct rt6_info *)skb->dst)->rt6i_idev);
 			goto out;
+		}
 
 		neigh_update(neigh, lladdr,
 			     msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
-- 
GitLab


From 8814c4b533817df825485ff32ce6ac406c3a54d1 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 22 Sep 2006 14:44:24 -0700
Subject: [PATCH 0733/1063] [IPV6] ADDRCONF: Convert addrconf_lock to RCU.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h | 10 ++++-----
 include/net/if_inet6.h |  1 +
 net/core/pktgen.c      |  4 ++--
 net/ipv6/addrconf.c    | 46 ++++++++++++++++++++++--------------------
 net/ipv6/anycast.c     |  4 ++--
 net/ipv6/ipv6_syms.c   |  1 -
 net/sctp/ipv6.c        |  6 +++---
 7 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 5fc8627435eb3..aa2ed8f0a9dd6 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -133,20 +133,18 @@ extern int unregister_inet6addr_notifier(struct notifier_block *nb);
 static inline struct inet6_dev *
 __in6_dev_get(struct net_device *dev)
 {
-	return (struct inet6_dev *)dev->ip6_ptr;
+	return rcu_dereference(dev->ip6_ptr);
 }
 
-extern rwlock_t addrconf_lock;
-
 static inline struct inet6_dev *
 in6_dev_get(struct net_device *dev)
 {
 	struct inet6_dev *idev = NULL;
-	read_lock(&addrconf_lock);
-	idev = dev->ip6_ptr;
+	rcu_read_lock();
+	idev = __in6_dev_get(dev);
 	if (idev)
 		atomic_inc(&idev->refcnt);
-	read_unlock(&addrconf_lock);
+	rcu_read_unlock();
 	return idev;
 }
 
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index e459e1a0ae4a9..34489c13c119c 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -189,6 +189,7 @@ struct inet6_dev
 	struct ipv6_devconf	cnf;
 	struct ipv6_devstat	stats;
 	unsigned long		tstamp; /* ipv6InterfaceTable update timestamp */
+	struct rcu_head		rcu;
 };
 
 extern struct ipv6_devconf ipv6_devconf;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6a7320b39ed0c..72145d4a26008 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1786,7 +1786,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 			 * use ipv6_get_lladdr if/when it's get exported
 			 */
 
-			read_lock(&addrconf_lock);
+			rcu_read_lock();
 			if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) {
 				struct inet6_ifaddr *ifp;
 
@@ -1805,7 +1805,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 				}
 				read_unlock_bh(&idev->lock);
 			}
-			read_unlock(&addrconf_lock);
+			rcu_read_unlock();
 			if (err)
 				printk("pktgen: ERROR: IPv6 link address not availble.\n");
 		}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 825a291d5aa58..c09ebb7bb98a3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -119,9 +119,6 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
 static struct inet6_ifaddr		*inet6_addr_lst[IN6_ADDR_HSIZE];
 static DEFINE_RWLOCK(addrconf_hash_lock);
 
-/* Protects inet6 devices */
-DEFINE_RWLOCK(addrconf_lock);
-
 static void addrconf_verify(unsigned long);
 
 static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
@@ -318,6 +315,12 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 
 /* Nobody refers to this device, we may destroy it. */
 
+static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
+{
+	struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
+	kfree(idev);
+}
+
 void in6_dev_finish_destroy(struct inet6_dev *idev)
 {
 	struct net_device *dev = idev->dev;
@@ -332,7 +335,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 		return;
 	}
 	snmp6_free_dev(idev);
-	kfree(idev);
+	call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
 }
 
 static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
@@ -408,9 +411,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 	if (netif_carrier_ok(dev))
 		ndev->if_flags |= IF_READY;
 
-	write_lock_bh(&addrconf_lock);
-	dev->ip6_ptr = ndev;
-	write_unlock_bh(&addrconf_lock);
+	/* protected by rtnl_lock */
+	rcu_assign_pointer(dev->ip6_ptr, ndev);
 
 	ipv6_mc_init_dev(ndev);
 	ndev->tstamp = jiffies;
@@ -474,7 +476,7 @@ static void addrconf_forward_change(void)
 
 	read_lock(&dev_base_lock);
 	for (dev=dev_base; dev; dev=dev->next) {
-		read_lock(&addrconf_lock);
+		rcu_read_lock();
 		idev = __in6_dev_get(dev);
 		if (idev) {
 			int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
@@ -482,7 +484,7 @@ static void addrconf_forward_change(void)
 			if (changed)
 				dev_forward_change(idev);
 		}
-		read_unlock(&addrconf_lock);
+		rcu_read_unlock();
 	}
 	read_unlock(&dev_base_lock);
 }
@@ -543,7 +545,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	int hash;
 	int err = 0;
 
-	read_lock_bh(&addrconf_lock);
+	rcu_read_lock_bh();
 	if (idev->dead) {
 		err = -ENODEV;			/*XXX*/
 		goto out2;
@@ -612,7 +614,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	in6_ifa_hold(ifa);
 	write_unlock(&idev->lock);
 out2:
-	read_unlock_bh(&addrconf_lock);
+	rcu_read_unlock_bh();
 
 	if (likely(err == 0))
 		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
@@ -915,7 +917,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 	memset(&hiscore, 0, sizeof(hiscore));
 
 	read_lock(&dev_base_lock);
-	read_lock(&addrconf_lock);
+	rcu_read_lock();
 
 	for (dev = dev_base; dev; dev=dev->next) {
 		struct inet6_dev *idev;
@@ -1127,7 +1129,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 		}
 		read_unlock_bh(&idev->lock);
 	}
-	read_unlock(&addrconf_lock);
+	rcu_read_unlock();
 	read_unlock(&dev_base_lock);
 
 	if (!ifa_result)
@@ -1151,7 +1153,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
 	struct inet6_dev *idev;
 	int err = -EADDRNOTAVAIL;
 
-	read_lock(&addrconf_lock);
+	rcu_read_lock();
 	if ((idev = __in6_dev_get(dev)) != NULL) {
 		struct inet6_ifaddr *ifp;
 
@@ -1165,7 +1167,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
 		}
 		read_unlock_bh(&idev->lock);
 	}
-	read_unlock(&addrconf_lock);
+	rcu_read_unlock();
 	return err;
 }
 
@@ -1466,7 +1468,7 @@ static void ipv6_regen_rndid(unsigned long data)
 	struct inet6_dev *idev = (struct inet6_dev *) data;
 	unsigned long expires;
 
-	read_lock_bh(&addrconf_lock);
+	rcu_read_lock_bh();
 	write_lock_bh(&idev->lock);
 
 	if (idev->dead)
@@ -1490,7 +1492,7 @@ static void ipv6_regen_rndid(unsigned long data)
 
 out:
 	write_unlock_bh(&idev->lock);
-	read_unlock_bh(&addrconf_lock);
+	rcu_read_unlock_bh();
 	in6_dev_put(idev);
 }
 
@@ -2342,10 +2344,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	           Do not dev_put!
 	 */
 	if (how == 1) {
-		write_lock_bh(&addrconf_lock);
-		dev->ip6_ptr = NULL;
 		idev->dead = 1;
-		write_unlock_bh(&addrconf_lock);
+
+		/* protected by rtnl_lock */
+		rcu_assign_pointer(dev->ip6_ptr, NULL);
 
 		/* Step 1.5: remove snmp6 entry */
 		snmp6_unregister_dev(idev);
@@ -3573,10 +3575,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 
 static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
-	read_lock_bh(&addrconf_lock);
+	rcu_read_lock_bh();
 	if (likely(ifp->idev->dead == 0))
 		__ipv6_ifa_notify(event, ifp);
-	read_unlock_bh(&addrconf_lock);
+	rcu_read_unlock_bh();
 }
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index b80fc502ca038..a9604764e015a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -56,7 +56,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev)
 	int	onlink;
 
 	onlink = 0;
-	read_lock(&addrconf_lock);
+	rcu_read_lock();
 	idev = __in6_dev_get(dev);
 	if (idev) {
 		read_lock_bh(&idev->lock);
@@ -68,7 +68,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev)
 		}
 		read_unlock_bh(&idev->lock);
 	}
-	read_unlock(&addrconf_lock);
+	rcu_read_unlock();
 	return onlink;
 }
 
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
index 7b7b90d9c3d06..0e8e0676a0336 100644
--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -14,7 +14,6 @@ EXPORT_SYMBOL(ndisc_mc_map);
 EXPORT_SYMBOL(register_inet6addr_notifier);
 EXPORT_SYMBOL(unregister_inet6addr_notifier);
 EXPORT_SYMBOL(ip6_route_output);
-EXPORT_SYMBOL(addrconf_lock);
 EXPORT_SYMBOL(ipv6_setsockopt);
 EXPORT_SYMBOL(ipv6_getsockopt);
 EXPORT_SYMBOL(inet6_register_protosw);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index fd87e3ceb56e6..249e5033c1a86 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -321,9 +321,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 	struct inet6_ifaddr *ifp;
 	struct sctp_sockaddr_entry *addr;
 
-	read_lock(&addrconf_lock);
+	rcu_read_lock();
 	if ((in6_dev = __in6_dev_get(dev)) == NULL) {
-		read_unlock(&addrconf_lock);
+		rcu_read_unlock();
 		return;
 	}
 
@@ -342,7 +342,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 	}
 
 	read_unlock(&in6_dev->lock);
-	read_unlock(&addrconf_lock);
+	rcu_read_unlock();
 }
 
 /* Initialize a sockaddr_storage from in incoming skb. */
-- 
GitLab


From fc26d0abd5afd2b5268a7dbdbf8be1095ce5703e Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 22 Sep 2006 14:44:53 -0700
Subject: [PATCH 0734/1063] [IPV6] NDISC: Fix is_router flag setting.

We did not send appropriate IsRouter flag if the forwarding setting is
positive even value.  Let's give 1/0 value to ndisc_send_na().

Also, existing users of ndisc_send_na() give 0/1 to override,
we can omit redundant operation in that function.

Bug hinted by Nicolas Dichtel <nicolas.dichtel@6wind.com>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 76517a5f65767..0304b5fe8d6aa 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -496,7 +496,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
         msg->icmph.icmp6_unused = 0;
         msg->icmph.icmp6_router    = router;
         msg->icmph.icmp6_solicited = solicited;
-        msg->icmph.icmp6_override  = !!override;
+        msg->icmph.icmp6_override  = override;
 
         /* Set the target address. */
 	ipv6_addr_copy(&msg->target, solicited_addr);
@@ -847,10 +847,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 			goto out;
 	}
 
-	if (pneigh)
-		is_router = pneigh->flags & NTF_ROUTER;
-	else
-		is_router = idev->cnf.forwarding;
+	is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding);
 
 	if (dad) {
 		struct in6_addr maddr;
-- 
GitLab


From 55ebaef1d5db9c1c76ba01a87fd986db5dee550d Mon Sep 17 00:00:00 2001
From: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Date: Fri, 22 Sep 2006 14:45:27 -0700
Subject: [PATCH 0735/1063] [IPV6] ADDRCONF: Allow non-DAD'able addresses.

IFA_F_NODAD flag, similar to IN6_IFF_NODAD in BSDs, is introduced
to skip DAD.

This flag should be set to Mobile IPv6 Home Address(es) on Mobile
Node because DAD would fail if we should perform DAD; our Home Agent
protects our Home Address(es).

Signed-off-by: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_addr.h |  1 +
 net/ipv6/addrconf.c     | 31 ++++++++++++++++---------------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h
index e1590454db596..ca24b9de13fb9 100644
--- a/include/linux/if_addr.h
+++ b/include/linux/if_addr.h
@@ -38,6 +38,7 @@ enum
 #define IFA_F_SECONDARY		0x01
 #define IFA_F_TEMPORARY		IFA_F_SECONDARY
 
+#define	IFA_F_NODAD		0x02
 #define IFA_F_DEPRECATED	0x20
 #define IFA_F_TENTATIVE		0x40
 #define IFA_F_PERMANENT		0x80
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c09ebb7bb98a3..adb583a261515 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1873,12 +1873,11 @@ int addrconf_set_dstaddr(void __user *arg)
  *	Manual configuration of address on an interface
  */
 static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
-			  __u32 prefered_lft, __u32 valid_lft)
+			  __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft)
 {
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
 	struct net_device *dev;
-	__u8 ifa_flags = 0;
 	int scope;
 
 	ASSERT_RTNL();
@@ -1971,7 +1970,7 @@ int addrconf_add_ifaddr(void __user *arg)
 
 	rtnl_lock();
 	err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen,
-			     INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+			     IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
 	rtnl_unlock();
 	return err;
 }
@@ -2514,7 +2513,8 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	spin_lock_bh(&ifp->lock);
 
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
-	    !(ifp->flags&IFA_F_TENTATIVE)) {
+	    !(ifp->flags&IFA_F_TENTATIVE) ||
+	    ifp->flags & IFA_F_NODAD) {
 		ifp->flags &= ~IFA_F_TENTATIVE;
 		spin_unlock_bh(&ifp->lock);
 		read_unlock_bh(&idev->lock);
@@ -2912,28 +2912,25 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
 }
 
-static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 prefered_lft,
-			     u32 valid_lft)
+static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
+			     u32 prefered_lft, u32 valid_lft)
 {
-	int ifa_flags = 0;
-
 	if (!valid_lft || (prefered_lft > valid_lft))
 		return -EINVAL;
 
 	if (valid_lft == INFINITY_LIFE_TIME)
-		ifa_flags = IFA_F_PERMANENT;
+		ifa_flags |= IFA_F_PERMANENT;
 	else if (valid_lft >= 0x7FFFFFFF/HZ)
 		valid_lft = 0x7FFFFFFF/HZ;
 
 	if (prefered_lft == 0)
-		ifa_flags = IFA_F_DEPRECATED;
+		ifa_flags |= IFA_F_DEPRECATED;
 	else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
 		 (prefered_lft != INFINITY_LIFE_TIME))
 		prefered_lft = 0x7FFFFFFF/HZ;
 
 	spin_lock_bh(&ifp->lock);
-	ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags;
-
+	ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD)) | ifa_flags;
 	ifp->tstamp = jiffies;
 	ifp->valid_lft = valid_lft;
 	ifp->prefered_lft = prefered_lft;
@@ -2955,7 +2952,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct in6_addr *pfx;
 	struct inet6_ifaddr *ifa;
 	struct net_device *dev;
-	u32 valid_lft, preferred_lft;
+	u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
+	u8 ifa_flags;
 	int err;
 
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -2982,6 +2980,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if (dev == NULL)
 		return -ENODEV;
 
+	/* We ignore other flags so far. */
+	ifa_flags = ifm->ifa_flags & IFA_F_NODAD;
+
 	ifa = ipv6_get_ifaddr(pfx, dev, 1);
 	if (ifa == NULL) {
 		/*
@@ -2989,14 +2990,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		 * userspace alreay relies on not having to provide this.
 		 */
 		return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
-				      preferred_lft, valid_lft);
+				      ifa_flags, preferred_lft, valid_lft);
 	}
 
 	if (nlh->nlmsg_flags & NLM_F_EXCL ||
 	    !(nlh->nlmsg_flags & NLM_F_REPLACE))
 		err = -EEXIST;
 	else
-		err = inet6_addr_modify(ifa, preferred_lft, valid_lft);
+		err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft);
 
 	in6_ifa_put(ifa);
 
-- 
GitLab


From 3b9f9a1c3903b64c38505f9fed3bb11e48dbc931 Mon Sep 17 00:00:00 2001
From: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Date: Fri, 22 Sep 2006 14:45:56 -0700
Subject: [PATCH 0736/1063] [IPV6] ADDRCONF: Mobile IPv6 Home Address support.

IFA_F_HOMEADDRESS is introduced for Mobile IPv6 Home Addresses on
Mobile Node.

The IFA_F_HOMEADDRESS flag should be set for Mobile IPv6 Home
Addresses for 2 purposes. 1) We need to check this on receipt of
Type 2 Routing Header (RFC3775 Secion 6.4), 2) We prefer Home
Address(es) in source address selection (RFC3484 Section 5 Rule 4).

Signed-off-by: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_addr.h |  1 +
 include/net/addrconf.h  |  6 +-----
 net/ipv6/addrconf.c     | 44 ++++++++++++++++++++++++++++++++++++++---
 3 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h
index ca24b9de13fb9..dbe8f6120a40b 100644
--- a/include/linux/if_addr.h
+++ b/include/linux/if_addr.h
@@ -39,6 +39,7 @@ enum
 #define IFA_F_TEMPORARY		IFA_F_SECONDARY
 
 #define	IFA_F_NODAD		0x02
+#define	IFA_F_HOMEADDRESS	0x10
 #define IFA_F_DEPRECATED	0x20
 #define IFA_F_TENTATIVE		0x40
 #define IFA_F_PERMANENT		0x80
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index aa2ed8f0a9dd6..44f1b673f916a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -61,12 +61,8 @@ extern int			addrconf_set_dstaddr(void __user *arg);
 extern int			ipv6_chk_addr(struct in6_addr *addr,
 					      struct net_device *dev,
 					      int strict);
-/* XXX: this is a placeholder till addrconf supports */
 #ifdef CONFIG_IPV6_MIP6
-static inline int ipv6_chk_home_addr(struct in6_addr *addr)
-{
-	return 0;
-}
+extern int			ipv6_chk_home_addr(struct in6_addr *addr);
 #endif
 extern struct inet6_ifaddr *	ipv6_get_ifaddr(struct in6_addr *addr,
 						struct net_device *dev,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index adb583a261515..c18676352397c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1038,9 +1038,27 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 					continue;
 			}
 
-			/* Rule 4: Prefer home address -- not implemented yet */
+			/* Rule 4: Prefer home address */
+#ifdef CONFIG_IPV6_MIP6
+			if (hiscore.rule < 4) {
+				if (ifa_result->flags & IFA_F_HOMEADDRESS)
+					hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
+				hiscore.rule++;
+			}
+			if (ifa->flags & IFA_F_HOMEADDRESS) {
+				score.attrs |= IPV6_SADDR_SCORE_HOA;
+				if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) {
+					score.rule = 4;
+					goto record_it;
+				}
+			} else {
+				if (hiscore.attrs & IPV6_SADDR_SCORE_HOA)
+					continue;
+			}
+#else
 			if (hiscore.rule < 4)
 				hiscore.rule++;
+#endif
 
 			/* Rule 5: Prefer outgoing interface */
 			if (hiscore.rule < 5) {
@@ -2759,6 +2777,26 @@ void if6_proc_exit(void)
 }
 #endif	/* CONFIG_PROC_FS */
 
+#ifdef CONFIG_IPV6_MIP6
+/* Check if address is a home address configured on any interface. */
+int ipv6_chk_home_addr(struct in6_addr *addr)
+{
+	int ret = 0;
+	struct inet6_ifaddr * ifp;
+	u8 hash = ipv6_addr_hash(addr);
+	read_lock_bh(&addrconf_hash_lock);
+	for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
+		if (ipv6_addr_cmp(&ifp->addr, addr) == 0 &&
+		    (ifp->flags & IFA_F_HOMEADDRESS)) {
+			ret = 1;
+			break;
+		}
+	}
+	read_unlock_bh(&addrconf_hash_lock);
+	return ret;
+}
+#endif
+
 /*
  *	Periodic address status verification
  */
@@ -2930,7 +2968,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
 		prefered_lft = 0x7FFFFFFF/HZ;
 
 	spin_lock_bh(&ifp->lock);
-	ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD)) | ifa_flags;
+	ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
 	ifp->tstamp = jiffies;
 	ifp->valid_lft = valid_lft;
 	ifp->prefered_lft = prefered_lft;
@@ -2981,7 +3019,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		return -ENODEV;
 
 	/* We ignore other flags so far. */
-	ifa_flags = ifm->ifa_flags & IFA_F_NODAD;
+	ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
 
 	ifa = ipv6_get_ifaddr(pfx, dev, 1);
 	if (ifa == NULL) {
-- 
GitLab


From fab97220c9e409a98b1956ba677ddd2dd43b0b95 Mon Sep 17 00:00:00 2001
From: Heiko J Schick <schickhj.ibm.com>
Date: Fri, 22 Sep 2006 15:22:22 -0700
Subject: [PATCH 0737/1063] IB/ehca: Add driver for IBM eHCA InfiniBand
 adapters

Add a driver for IBM GX bus InfiniBand adapters, which are usable with
some pSeries/System p systems.

Signed-off-by: Heiko J Schick <schickhj.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 MAINTAINERS                                   |    8 +
 drivers/infiniband/Kconfig                    |    1 +
 drivers/infiniband/Makefile                   |    1 +
 drivers/infiniband/hw/ehca/Kconfig            |   16 +
 drivers/infiniband/hw/ehca/Makefile           |   16 +
 drivers/infiniband/hw/ehca/ehca_av.c          |  271 ++
 drivers/infiniband/hw/ehca/ehca_classes.h     |  346 +++
 .../infiniband/hw/ehca/ehca_classes_pSeries.h |  236 ++
 drivers/infiniband/hw/ehca/ehca_cq.c          |  427 ++++
 drivers/infiniband/hw/ehca/ehca_eq.c          |  185 ++
 drivers/infiniband/hw/ehca/ehca_hca.c         |  241 ++
 drivers/infiniband/hw/ehca/ehca_irq.c         |  762 ++++++
 drivers/infiniband/hw/ehca/ehca_irq.h         |   77 +
 drivers/infiniband/hw/ehca/ehca_iverbs.h      |  181 ++
 drivers/infiniband/hw/ehca/ehca_main.c        |  818 ++++++
 drivers/infiniband/hw/ehca/ehca_mcast.c       |  131 +
 drivers/infiniband/hw/ehca/ehca_mrmw.c        | 2261 +++++++++++++++++
 drivers/infiniband/hw/ehca/ehca_mrmw.h        |  140 +
 drivers/infiniband/hw/ehca/ehca_pd.c          |  114 +
 drivers/infiniband/hw/ehca/ehca_qes.h         |  259 ++
 drivers/infiniband/hw/ehca/ehca_qp.c          | 1506 +++++++++++
 drivers/infiniband/hw/ehca/ehca_reqs.c        |  653 +++++
 drivers/infiniband/hw/ehca/ehca_sqp.c         |  111 +
 drivers/infiniband/hw/ehca/ehca_tools.h       |  172 ++
 drivers/infiniband/hw/ehca/ehca_uverbs.c      |  392 +++
 drivers/infiniband/hw/ehca/hcp_if.c           |  874 +++++++
 drivers/infiniband/hw/ehca/hcp_if.h           |  261 ++
 drivers/infiniband/hw/ehca/hcp_phyp.c         |   80 +
 drivers/infiniband/hw/ehca/hcp_phyp.h         |   90 +
 drivers/infiniband/hw/ehca/hipz_fns.h         |   68 +
 drivers/infiniband/hw/ehca/hipz_fns_core.h    |  100 +
 drivers/infiniband/hw/ehca/hipz_hw.h          |  388 +++
 drivers/infiniband/hw/ehca/ipz_pt_fn.c        |  149 ++
 drivers/infiniband/hw/ehca/ipz_pt_fn.h        |  247 ++
 34 files changed, 11582 insertions(+)
 create mode 100644 drivers/infiniband/hw/ehca/Kconfig
 create mode 100644 drivers/infiniband/hw/ehca/Makefile
 create mode 100644 drivers/infiniband/hw/ehca/ehca_av.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_classes.h
 create mode 100644 drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
 create mode 100644 drivers/infiniband/hw/ehca/ehca_cq.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_eq.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_hca.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_irq.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_irq.h
 create mode 100644 drivers/infiniband/hw/ehca/ehca_iverbs.h
 create mode 100644 drivers/infiniband/hw/ehca/ehca_main.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_mcast.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_mrmw.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_mrmw.h
 create mode 100644 drivers/infiniband/hw/ehca/ehca_pd.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_qes.h
 create mode 100644 drivers/infiniband/hw/ehca/ehca_qp.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_reqs.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_sqp.c
 create mode 100644 drivers/infiniband/hw/ehca/ehca_tools.h
 create mode 100644 drivers/infiniband/hw/ehca/ehca_uverbs.c
 create mode 100644 drivers/infiniband/hw/ehca/hcp_if.c
 create mode 100644 drivers/infiniband/hw/ehca/hcp_if.h
 create mode 100644 drivers/infiniband/hw/ehca/hcp_phyp.c
 create mode 100644 drivers/infiniband/hw/ehca/hcp_phyp.h
 create mode 100644 drivers/infiniband/hw/ehca/hipz_fns.h
 create mode 100644 drivers/infiniband/hw/ehca/hipz_fns_core.h
 create mode 100644 drivers/infiniband/hw/ehca/hipz_hw.h
 create mode 100644 drivers/infiniband/hw/ehca/ipz_pt_fn.c
 create mode 100644 drivers/infiniband/hw/ehca/ipz_pt_fn.h

diff --git a/MAINTAINERS b/MAINTAINERS
index ed2a83cfad7c0..830bec779d479 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -991,6 +991,14 @@ EFS FILESYSTEM
 W:	http://aeschi.ch.eu.org/efs/
 S:	Orphan
 
+EHCA (IBM GX bus InfiniBand adapter) DRIVER:
+P:	Hoang-Nam Nguyen
+M:	hnguyen@de.ibm.com
+P:	Christoph Raisch
+M:	raisch@de.ibm.com
+L:	openib-general@openib.org
+S:	Supported
+
 EMU10K1 SOUND DRIVER
 P:	James Courtier-Dutton
 M:	James@superbug.demon.co.uk
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 69a53d476b5b7..fd2d528daa3aa 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -36,6 +36,7 @@ config INFINIBAND_ADDR_TRANS
 
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
+source "drivers/infiniband/hw/ehca/Kconfig"
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index c7ff58c1d0e5e..893bee0a50b57 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND_MTHCA)		+= hw/mthca/
 obj-$(CONFIG_IPATH_CORE)		+= hw/ipath/
+obj-$(CONFIG_INFINIBAND_EHCA)		+= hw/ehca/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_ISER)		+= ulp/iser/
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
new file mode 100644
index 0000000000000..922389b643940
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -0,0 +1,16 @@
+config INFINIBAND_EHCA
+	tristate "eHCA support"
+	depends on IBMEBUS && INFINIBAND
+	---help---
+	This driver supports the IBM pSeries eHCA InfiniBand adapter.
+
+	To compile the driver as a module, choose M here. The module
+	will be called ib_ehca.
+
+config INFINIBAND_EHCA_SCALING
+	bool "Scaling support (EXPERIMENTAL)"
+	depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
+	---help---
+	eHCA scaling support schedules the CQ callbacks to different CPUs.
+
+	To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile
new file mode 100644
index 0000000000000..74d284e46a406
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/Makefile
@@ -0,0 +1,16 @@
+#  Authors: Heiko J Schick <schickhj@de.ibm.com>
+#           Christoph Raisch <raisch@de.ibm.com>
+#           Joachim Fenkes <fenkes@de.ibm.com>
+#
+#  Copyright (c) 2005 IBM Corporation
+#
+#  All rights reserved.
+#
+#  This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
+
+obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
+
+ib_ehca-objs  = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
+		ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
+		ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
+
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
new file mode 100644
index 0000000000000..3bac197f90141
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -0,0 +1,271 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  adress vector functions
+ *
+ *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Khadija Souissi <souissik@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm/current.h>
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *av_cache;
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	int ret;
+	struct ehca_av *av;
+	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+					      ib_device);
+
+	av = kmem_cache_alloc(av_cache, SLAB_KERNEL);
+	if (!av) {
+		ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
+			 pd, ah_attr);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	av->av.sl = ah_attr->sl;
+	av->av.dlid = ah_attr->dlid;
+	av->av.slid_path_bits = ah_attr->src_path_bits;
+
+	if (ehca_static_rate < 0) {
+		int ah_mult = ib_rate_to_mult(ah_attr->static_rate);
+		int ehca_mult =
+			ib_rate_to_mult(shca->sport[ah_attr->port_num].rate );
+
+		if (ah_mult >= ehca_mult)
+			av->av.ipd = 0;
+		else
+			av->av.ipd = (ah_mult > 0) ?
+				((ehca_mult - 1) / ah_mult) : 0;
+	} else
+	        av->av.ipd = ehca_static_rate;
+
+	av->av.lnh = ah_attr->ah_flags;
+	av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
+					    ah_attr->grh.traffic_class);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+					    ah_attr->grh.flow_label);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+					    ah_attr->grh.hop_limit);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
+	/* set sgid in grh.word_1 */
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		int rc;
+		struct ib_port_attr port_attr;
+		union ib_gid gid;
+		memset(&port_attr, 0, sizeof(port_attr));
+		rc = ehca_query_port(pd->device, ah_attr->port_num,
+				     &port_attr);
+		if (rc) { /* invalid port number */
+			ret = -EINVAL;
+			ehca_err(pd->device, "Invalid port number "
+				 "ehca_query_port() returned %x "
+				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
+			goto create_ah_exit1;
+		}
+		memset(&gid, 0, sizeof(gid));
+		rc = ehca_query_gid(pd->device,
+				    ah_attr->port_num,
+				    ah_attr->grh.sgid_index, &gid);
+		if (rc) {
+			ret = -EINVAL;
+			ehca_err(pd->device, "Failed to retrieve sgid "
+				 "ehca_query_gid() returned %x "
+				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
+			goto create_ah_exit1;
+		}
+		memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
+	}
+	/* for the time being we use a hard coded PMTU of 2048 Bytes */
+	av->av.pmtu = 4;
+
+	/* dgid comes in grh.word_3 */
+	memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
+	       sizeof(ah_attr->grh.dgid));
+
+	return &av->ib_ah;
+
+create_ah_exit1:
+	kmem_cache_free(av_cache, av);
+
+	return ERR_PTR(ret);
+}
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+	struct ehca_av *av;
+	struct ehca_ud_av new_ehca_av;
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	memset(&new_ehca_av, 0, sizeof(new_ehca_av));
+	new_ehca_av.sl = ah_attr->sl;
+	new_ehca_av.dlid = ah_attr->dlid;
+	new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
+	new_ehca_av.ipd = ah_attr->static_rate;
+	new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
+					 (ah_attr->ah_flags & IB_AH_GRH) > 0);
+	new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
+						ah_attr->grh.traffic_class);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+						 ah_attr->grh.flow_label);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+						 ah_attr->grh.hop_limit);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
+
+	/* set sgid in grh.word_1 */
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		int rc;
+		struct ib_port_attr port_attr;
+		union ib_gid gid;
+		memset(&port_attr, 0, sizeof(port_attr));
+		rc = ehca_query_port(ah->device, ah_attr->port_num,
+				     &port_attr);
+		if (rc) { /* invalid port number */
+			ehca_err(ah->device, "Invalid port number "
+				 "ehca_query_port() returned %x "
+				 "ah=%p ah_attr=%p port_num=%x",
+				 rc, ah, ah_attr, ah_attr->port_num);
+			return -EINVAL;
+		}
+		memset(&gid, 0, sizeof(gid));
+		rc = ehca_query_gid(ah->device,
+				    ah_attr->port_num,
+				    ah_attr->grh.sgid_index, &gid);
+		if (rc) {
+			ehca_err(ah->device, "Failed to retrieve sgid "
+				 "ehca_query_gid() returned %x "
+				 "ah=%p ah_attr=%p port_num=%x "
+				 "sgid_index=%x",
+				 rc, ah, ah_attr, ah_attr->port_num,
+				 ah_attr->grh.sgid_index);
+			return -EINVAL;
+		}
+		memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
+	}
+
+	new_ehca_av.pmtu = 4; /* see also comment in create_ah() */
+
+	memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
+	       sizeof(ah_attr->grh.dgid));
+
+	av = container_of(ah, struct ehca_av, ib_ah);
+	av->av = new_ehca_av;
+
+	return 0;
+}
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+	struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
+	       sizeof(ah_attr->grh.dgid));
+	ah_attr->sl = av->av.sl;
+
+	ah_attr->dlid = av->av.dlid;
+
+	ah_attr->src_path_bits = av->av.slid_path_bits;
+	ah_attr->static_rate = av->av.ipd;
+	ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
+	ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
+						    av->av.grh.word_0);
+	ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
+						av->av.grh.word_0);
+	ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
+						 av->av.grh.word_0);
+
+	return 0;
+}
+
+int ehca_destroy_ah(struct ib_ah *ah)
+{
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
+
+	return 0;
+}
+
+int ehca_init_av_cache(void)
+{
+	av_cache = kmem_cache_create("ehca_cache_av",
+				   sizeof(struct ehca_av), 0,
+				   SLAB_HWCACHE_ALIGN,
+				   NULL, NULL);
+	if (!av_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_av_cache(void)
+{
+	if (av_cache)
+		kmem_cache_destroy(av_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
new file mode 100644
index 0000000000000..1c722032319ca
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -0,0 +1,346 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Struct definition for eHCA internal structures
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_H__
+#define __EHCA_CLASSES_H__
+
+#include "ehca_classes.h"
+#include "ipz_pt_fn.h"
+
+struct ehca_module;
+struct ehca_qp;
+struct ehca_cq;
+struct ehca_eq;
+struct ehca_mr;
+struct ehca_mw;
+struct ehca_pd;
+struct ehca_av;
+
+#ifdef CONFIG_PPC64
+#include "ehca_classes_pSeries.h"
+#endif
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "ehca_irq.h"
+
+struct ehca_eq {
+	u32 length;
+	struct ipz_queue ipz_queue;
+	struct ipz_eq_handle ipz_eq_handle;
+	struct work_struct work;
+	struct h_galpas galpas;
+	int is_initialized;
+	struct ehca_pfeq pf;
+	spinlock_t spinlock;
+	struct tasklet_struct interrupt_task;
+	u32 ist;
+};
+
+struct ehca_sport {
+	struct ib_cq *ibcq_aqp1;
+	struct ib_qp *ibqp_aqp1;
+	enum ib_rate  rate;
+	enum ib_port_state port_state;
+};
+
+struct ehca_shca {
+	struct ib_device ib_device;
+	struct ibmebus_dev *ibmebus_dev;
+	u8 num_ports;
+	int hw_level;
+	struct list_head shca_list;
+	struct ipz_adapter_handle ipz_hca_handle;
+	struct ehca_sport sport[2];
+	struct ehca_eq eq;
+	struct ehca_eq neq;
+	struct ehca_mr *maxmr;
+	struct ehca_pd *pd;
+	struct h_galpas galpas;
+};
+
+struct ehca_pd {
+	struct ib_pd ib_pd;
+	struct ipz_pd fw_pd;
+	u32 ownpid;
+};
+
+struct ehca_qp {
+	struct ib_qp ib_qp;
+	u32 qp_type;
+	struct ipz_queue ipz_squeue;
+	struct ipz_queue ipz_rqueue;
+	struct h_galpas galpas;
+	u32 qkey;
+	u32 real_qp_num;
+	u32 token;
+	spinlock_t spinlock_s;
+	spinlock_t spinlock_r;
+	u32 sq_max_inline_data_size;
+	struct ipz_qp_handle ipz_qp_handle;
+	struct ehca_pfqp pf;
+	struct ib_qp_init_attr init_attr;
+	u64 uspace_squeue;
+	u64 uspace_rqueue;
+	u64 uspace_fwh;
+	struct ehca_cq *send_cq;
+	struct ehca_cq *recv_cq;
+	unsigned int sqerr_purgeflag;
+	struct hlist_node list_entries;
+};
+
+/* must be power of 2 */
+#define QP_HASHTAB_LEN 8
+
+struct ehca_cq {
+	struct ib_cq ib_cq;
+	struct ipz_queue ipz_queue;
+	struct h_galpas galpas;
+	spinlock_t spinlock;
+	u32 cq_number;
+	u32 token;
+	u32 nr_of_entries;
+	struct ipz_cq_handle ipz_cq_handle;
+	struct ehca_pfcq pf;
+	spinlock_t cb_lock;
+	u64 uspace_queue;
+	u64 uspace_fwh;
+	struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
+	struct list_head entry;
+	u32 nr_callbacks;
+	spinlock_t task_lock;
+	u32 ownpid;
+};
+
+enum ehca_mr_flag {
+	EHCA_MR_FLAG_FMR = 0x80000000,	 /* FMR, created with ehca_alloc_fmr */
+	EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR                           */
+};
+
+struct ehca_mr {
+	union {
+		struct ib_mr ib_mr;	/* must always be first in ehca_mr */
+		struct ib_fmr ib_fmr;	/* must always be first in ehca_mr */
+	} ib;
+	spinlock_t mrlock;
+
+	enum ehca_mr_flag flags;
+	u32 num_pages;		/* number of MR pages */
+	u32 num_4k;		/* number of 4k "page" portions to form MR */
+	int acl;		/* ACL (stored here for usage in reregister) */
+	u64 *start;		/* virtual start address (stored here for */
+	                        /* usage in reregister) */
+	u64 size;		/* size (stored here for usage in reregister) */
+	u32 fmr_page_size;	/* page size for FMR */
+	u32 fmr_max_pages;	/* max pages for FMR */
+	u32 fmr_max_maps;	/* max outstanding maps for FMR */
+	u32 fmr_map_cnt;	/* map counter for FMR */
+	/* fw specific data */
+	struct ipz_mrmw_handle ipz_mr_handle;	/* MR handle for h-calls */
+	struct h_galpas galpas;
+	/* data for userspace bridge */
+	u32 nr_of_pages;
+	void *pagearray;
+};
+
+struct ehca_mw {
+	struct ib_mw ib_mw;	/* gen2 mw, must always be first in ehca_mw */
+	spinlock_t mwlock;
+
+	u8 never_bound;		/* indication MW was never bound */
+	struct ipz_mrmw_handle ipz_mw_handle;	/* MW handle for h-calls */
+	struct h_galpas galpas;
+};
+
+enum ehca_mr_pgi_type {
+	EHCA_MR_PGI_PHYS   = 1,  /* type of ehca_reg_phys_mr,
+				  * ehca_rereg_phys_mr,
+				  * ehca_reg_internal_maxmr */
+	EHCA_MR_PGI_USER   = 2,  /* type of ehca_reg_user_mr */
+	EHCA_MR_PGI_FMR    = 3   /* type of ehca_map_phys_fmr */
+};
+
+struct ehca_mr_pginfo {
+	enum ehca_mr_pgi_type type;
+	u64 num_pages;
+	u64 page_cnt;
+	u64 num_4k;       /* number of 4k "page" portions */
+	u64 page_4k_cnt;  /* counter for 4k "page" portions */
+	u64 next_4k;      /* next 4k "page" portion in buffer/chunk/listelem */
+
+	/* type EHCA_MR_PGI_PHYS section */
+	int num_phys_buf;
+	struct ib_phys_buf *phys_buf_array;
+	u64 next_buf;
+
+	/* type EHCA_MR_PGI_USER section */
+	struct ib_umem *region;
+	struct ib_umem_chunk *next_chunk;
+	u64 next_nmap;
+
+	/* type EHCA_MR_PGI_FMR section */
+	u64 *page_list;
+	u64 next_listelem;
+	/* next_4k also used within EHCA_MR_PGI_FMR */
+};
+
+/* output parameters for MR/FMR hipz calls */
+struct ehca_mr_hipzout_parms {
+	struct ipz_mrmw_handle handle;
+	u32 lkey;
+	u32 rkey;
+	u64 len;
+	u64 vaddr;
+	u32 acl;
+};
+
+/* output parameters for MW hipz calls */
+struct ehca_mw_hipzout_parms {
+	struct ipz_mrmw_handle handle;
+	u32 rkey;
+};
+
+struct ehca_av {
+	struct ib_ah ib_ah;
+	struct ehca_ud_av av;
+};
+
+struct ehca_ucontext {
+	struct ib_ucontext ib_ucontext;
+};
+
+struct ehca_module *ehca_module_new(void);
+
+int ehca_module_delete(struct ehca_module *me);
+
+int ehca_eq_ctor(struct ehca_eq *eq);
+
+int ehca_eq_dtor(struct ehca_eq *eq);
+
+struct ehca_shca *ehca_shca_new(void);
+
+int ehca_shca_delete(struct ehca_shca *me);
+
+struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
+
+int ehca_init_pd_cache(void);
+void ehca_cleanup_pd_cache(void);
+int ehca_init_cq_cache(void);
+void ehca_cleanup_cq_cache(void);
+int ehca_init_qp_cache(void);
+void ehca_cleanup_qp_cache(void);
+int ehca_init_av_cache(void);
+void ehca_cleanup_av_cache(void);
+int ehca_init_mrmw_cache(void);
+void ehca_cleanup_mrmw_cache(void);
+
+extern spinlock_t ehca_qp_idr_lock;
+extern spinlock_t ehca_cq_idr_lock;
+extern struct idr ehca_qp_idr;
+extern struct idr ehca_cq_idr;
+
+extern int ehca_static_rate;
+extern int ehca_port_act_time;
+extern int ehca_use_hp_mr;
+
+struct ipzu_queue_resp {
+	u64 queue;        /* points to first queue entry */
+	u32 qe_size;      /* queue entry size */
+	u32 act_nr_of_sg;
+	u32 queue_length; /* queue length allocated in bytes */
+	u32 pagesize;
+	u32 toggle_state;
+	u32 dummy; /* padding for 8 byte alignment */
+};
+
+struct ehca_create_cq_resp {
+	u32 cq_number;
+	u32 token;
+	struct ipzu_queue_resp ipz_queue;
+	struct h_galpas galpas;
+};
+
+struct ehca_create_qp_resp {
+	u32 qp_num;
+	u32 token;
+	u32 qp_type;
+	u32 qkey;
+	/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
+	u32 real_qp_num;
+	u32 dummy; /* padding for 8 byte alignment */
+	struct ipzu_queue_resp ipz_squeue;
+	struct ipzu_queue_resp ipz_rqueue;
+	struct h_galpas galpas;
+};
+
+struct ehca_alloc_cq_parms {
+	u32 nr_cqe;
+	u32 act_nr_of_entries;
+	u32 act_pages;
+	struct ipz_eq_handle eq_handle;
+};
+
+struct ehca_alloc_qp_parms {
+	int servicetype;
+	int sigtype;
+	int daqp_ctrl;
+	int max_send_sge;
+	int max_recv_sge;
+	int ud_av_l_key_ctl;
+
+	u16 act_nr_send_wqes;
+	u16 act_nr_recv_wqes;
+	u8  act_nr_recv_sges;
+	u8  act_nr_send_sges;
+
+	u32 nr_rq_pages;
+	u32 nr_sq_pages;
+
+	struct ipz_eq_handle ipz_eq_handle;
+	struct ipz_pd pd;
+};
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
+struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
new file mode 100644
index 0000000000000..5665f213b81a2
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@@ -0,0 +1,236 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  pSeries interface definitions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_PSERIES_H__
+#define __EHCA_CLASSES_PSERIES_H__
+
+#include "hcp_phyp.h"
+#include "ipz_pt_fn.h"
+
+
+struct ehca_pfqp {
+	struct ipz_qpt sqpt;
+	struct ipz_qpt rqpt;
+};
+
+struct ehca_pfcq {
+	struct ipz_qpt qpt;
+	u32 cqnr;
+};
+
+struct ehca_pfeq {
+	struct ipz_qpt qpt;
+	struct h_galpa galpa;
+	u32 eqnr;
+};
+
+struct ipz_adapter_handle {
+	u64 handle;
+};
+
+struct ipz_cq_handle {
+	u64 handle;
+};
+
+struct ipz_eq_handle {
+	u64 handle;
+};
+
+struct ipz_qp_handle {
+	u64 handle;
+};
+struct ipz_mrmw_handle {
+	u64 handle;
+};
+
+struct ipz_pd {
+	u32 value;
+};
+
+struct hcp_modify_qp_control_block {
+	u32 qkey;                      /* 00 */
+	u32 rdd;                       /* reliable datagram domain */
+	u32 send_psn;                  /* 02 */
+	u32 receive_psn;               /* 03 */
+	u32 prim_phys_port;            /* 04 */
+	u32 alt_phys_port;             /* 05 */
+	u32 prim_p_key_idx;            /* 06 */
+	u32 alt_p_key_idx;             /* 07 */
+	u32 rdma_atomic_ctrl;          /* 08 */
+	u32 qp_state;                  /* 09 */
+	u32 reserved_10;               /* 10 */
+	u32 rdma_nr_atomic_resp_res;   /* 11 */
+	u32 path_migration_state;      /* 12 */
+	u32 rdma_atomic_outst_dest_qp; /* 13 */
+	u32 dest_qp_nr;                /* 14 */
+	u32 min_rnr_nak_timer_field;   /* 15 */
+	u32 service_level;             /* 16 */
+	u32 send_grh_flag;             /* 17 */
+	u32 retry_count;               /* 18 */
+	u32 timeout;                   /* 19 */
+	u32 path_mtu;                  /* 20 */
+	u32 max_static_rate;           /* 21 */
+	u32 dlid;                      /* 22 */
+	u32 rnr_retry_count;           /* 23 */
+	u32 source_path_bits;          /* 24 */
+	u32 traffic_class;             /* 25 */
+	u32 hop_limit;                 /* 26 */
+	u32 source_gid_idx;            /* 27 */
+	u32 flow_label;                /* 28 */
+	u32 reserved_29;               /* 29 */
+	union {                        /* 30 */
+		u64 dw[2];
+		u8 byte[16];
+	} dest_gid;
+	u32 service_level_al;          /* 34 */
+	u32 send_grh_flag_al;          /* 35 */
+	u32 retry_count_al;            /* 36 */
+	u32 timeout_al;                /* 37 */
+	u32 max_static_rate_al;        /* 38 */
+	u32 dlid_al;                   /* 39 */
+	u32 rnr_retry_count_al;        /* 40 */
+	u32 source_path_bits_al;       /* 41 */
+	u32 traffic_class_al;          /* 42 */
+	u32 hop_limit_al;              /* 43 */
+	u32 source_gid_idx_al;         /* 44 */
+	u32 flow_label_al;             /* 45 */
+	u32 reserved_46;               /* 46 */
+	u32 reserved_47;               /* 47 */
+	union {                        /* 48 */
+		u64 dw[2];
+		u8 byte[16];
+	} dest_gid_al;
+	u32 max_nr_outst_send_wr;      /* 52 */
+	u32 max_nr_outst_recv_wr;      /* 53 */
+	u32 disable_ete_credit_check;  /* 54 */
+	u32 qp_number;                 /* 55 */
+	u64 send_queue_handle;         /* 56 */
+	u64 recv_queue_handle;         /* 58 */
+	u32 actual_nr_sges_in_sq_wqe;  /* 60 */
+	u32 actual_nr_sges_in_rq_wqe;  /* 61 */
+	u32 qp_enable;                 /* 62 */
+	u32 curr_srq_limit;            /* 63 */
+	u64 qp_aff_asyn_ev_log_reg;    /* 64 */
+	u64 shared_rq_hndl;            /* 66 */
+	u64 trigg_doorbell_qp_hndl;    /* 68 */
+	u32 reserved_70_127[58];       /* 70 */
+};
+
+#define MQPCB_MASK_QKEY                         EHCA_BMASK_IBM(0,0)
+#define MQPCB_MASK_SEND_PSN                     EHCA_BMASK_IBM(2,2)
+#define MQPCB_MASK_RECEIVE_PSN                  EHCA_BMASK_IBM(3,3)
+#define MQPCB_MASK_PRIM_PHYS_PORT               EHCA_BMASK_IBM(4,4)
+#define MQPCB_PRIM_PHYS_PORT                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_PHYS_PORT                EHCA_BMASK_IBM(5,5)
+#define MQPCB_MASK_PRIM_P_KEY_IDX               EHCA_BMASK_IBM(6,6)
+#define MQPCB_PRIM_P_KEY_IDX                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_P_KEY_IDX                EHCA_BMASK_IBM(7,7)
+#define MQPCB_MASK_RDMA_ATOMIC_CTRL             EHCA_BMASK_IBM(8,8)
+#define MQPCB_MASK_QP_STATE                     EHCA_BMASK_IBM(9,9)
+#define MQPCB_QP_STATE                          EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES      EHCA_BMASK_IBM(11,11)
+#define MQPCB_MASK_PATH_MIGRATION_STATE         EHCA_BMASK_IBM(12,12)
+#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP    EHCA_BMASK_IBM(13,13)
+#define MQPCB_MASK_DEST_QP_NR                   EHCA_BMASK_IBM(14,14)
+#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD      EHCA_BMASK_IBM(15,15)
+#define MQPCB_MASK_SERVICE_LEVEL                EHCA_BMASK_IBM(16,16)
+#define MQPCB_MASK_SEND_GRH_FLAG                EHCA_BMASK_IBM(17,17)
+#define MQPCB_MASK_RETRY_COUNT                  EHCA_BMASK_IBM(18,18)
+#define MQPCB_MASK_TIMEOUT                      EHCA_BMASK_IBM(19,19)
+#define MQPCB_MASK_PATH_MTU                     EHCA_BMASK_IBM(20,20)
+#define MQPCB_PATH_MTU                          EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_MAX_STATIC_RATE              EHCA_BMASK_IBM(21,21)
+#define MQPCB_MAX_STATIC_RATE                   EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID                         EHCA_BMASK_IBM(22,22)
+#define MQPCB_DLID                              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT              EHCA_BMASK_IBM(23,23)
+#define MQPCB_RNR_RETRY_COUNT                   EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS             EHCA_BMASK_IBM(24,24)
+#define MQPCB_SOURCE_PATH_BITS                  EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS                EHCA_BMASK_IBM(25,25)
+#define MQPCB_TRAFFIC_CLASS                     EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT                    EHCA_BMASK_IBM(26,26)
+#define MQPCB_HOP_LIMIT                         EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX               EHCA_BMASK_IBM(27,27)
+#define MQPCB_SOURCE_GID_IDX                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL                   EHCA_BMASK_IBM(28,28)
+#define MQPCB_FLOW_LABEL                        EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID                     EHCA_BMASK_IBM(30,30)
+#define MQPCB_MASK_SERVICE_LEVEL_AL             EHCA_BMASK_IBM(31,31)
+#define MQPCB_SERVICE_LEVEL_AL                  EHCA_BMASK_IBM(28,31)
+#define MQPCB_MASK_SEND_GRH_FLAG_AL             EHCA_BMASK_IBM(32,32)
+#define MQPCB_SEND_GRH_FLAG_AL                  EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_RETRY_COUNT_AL               EHCA_BMASK_IBM(33,33)
+#define MQPCB_RETRY_COUNT_AL                    EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_TIMEOUT_AL                   EHCA_BMASK_IBM(34,34)
+#define MQPCB_TIMEOUT_AL                        EHCA_BMASK_IBM(27,31)
+#define MQPCB_MASK_MAX_STATIC_RATE_AL           EHCA_BMASK_IBM(35,35)
+#define MQPCB_MAX_STATIC_RATE_AL                EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID_AL                      EHCA_BMASK_IBM(36,36)
+#define MQPCB_DLID_AL                           EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT_AL           EHCA_BMASK_IBM(37,37)
+#define MQPCB_RNR_RETRY_COUNT_AL                EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS_AL          EHCA_BMASK_IBM(38,38)
+#define MQPCB_SOURCE_PATH_BITS_AL               EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS_AL             EHCA_BMASK_IBM(39,39)
+#define MQPCB_TRAFFIC_CLASS_AL                  EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT_AL                 EHCA_BMASK_IBM(40,40)
+#define MQPCB_HOP_LIMIT_AL                      EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX_AL            EHCA_BMASK_IBM(41,41)
+#define MQPCB_SOURCE_GID_IDX_AL                 EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL_AL                EHCA_BMASK_IBM(42,42)
+#define MQPCB_FLOW_LABEL_AL                     EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID_AL                  EHCA_BMASK_IBM(44,44)
+#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR         EHCA_BMASK_IBM(45,45)
+#define MQPCB_MAX_NR_OUTST_SEND_WR              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR         EHCA_BMASK_IBM(46,46)
+#define MQPCB_MAX_NR_OUTST_RECV_WR              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK     EHCA_BMASK_IBM(47,47)
+#define MQPCB_DISABLE_ETE_CREDIT_CHECK          EHCA_BMASK_IBM(31,31)
+#define MQPCB_QP_NUMBER                         EHCA_BMASK_IBM(8,31)
+#define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48,48)
+#define MQPCB_QP_ENABLE                         EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_CURR_SQR_LIMIT               EHCA_BMASK_IBM(49,49)
+#define MQPCB_CURR_SQR_LIMIT                    EHCA_BMASK_IBM(15,31)
+#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50,50)
+#define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51,51)
+
+#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
new file mode 100644
index 0000000000000..458fe19648a10
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -0,0 +1,427 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Completion queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_iverbs.h"
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *cq_cache;
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
+{
+	unsigned int qp_num = qp->real_qp_num;
+	unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
+	unsigned long spl_flags;
+
+	spin_lock_irqsave(&cq->spinlock, spl_flags);
+	hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
+	spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+
+	ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
+		 cq->cq_number, qp_num);
+
+	return 0;
+}
+
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
+{
+	int ret = -EINVAL;
+	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+	struct hlist_node *iter;
+	struct ehca_qp *qp;
+	unsigned long spl_flags;
+
+	spin_lock_irqsave(&cq->spinlock, spl_flags);
+	hlist_for_each(iter, &cq->qp_hashtab[key]) {
+		qp = hlist_entry(iter, struct ehca_qp, list_entries);
+		if (qp->real_qp_num == real_qp_num) {
+			hlist_del(iter);
+			ehca_dbg(cq->ib_cq.device,
+				 "removed qp from cq .cq_num=%x real_qp_num=%x",
+				 cq->cq_number, real_qp_num);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+	if (ret)
+		ehca_err(cq->ib_cq.device,
+			 "qp not found cq_num=%x real_qp_num=%x",
+			 cq->cq_number, real_qp_num);
+
+	return ret;
+}
+
+struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
+{
+	struct ehca_qp *ret = NULL;
+	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+	struct hlist_node *iter;
+	struct ehca_qp *qp;
+	hlist_for_each(iter, &cq->qp_hashtab[key]) {
+		qp = hlist_entry(iter, struct ehca_qp, list_entries);
+		if (qp->real_qp_num == real_qp_num) {
+			ret = qp;
+			break;
+		}
+	}
+	return ret;
+}
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+			     struct ib_ucontext *context,
+			     struct ib_udata *udata)
+{
+	static const u32 additional_cqe = 20;
+	struct ib_cq *cq;
+	struct ehca_cq *my_cq;
+	struct ehca_shca *shca =
+		container_of(device, struct ehca_shca, ib_device);
+	struct ipz_adapter_handle adapter_handle;
+	struct ehca_alloc_cq_parms param; /* h_call's out parameters */
+	struct h_galpa gal;
+	void *vpage;
+	u32 counter;
+	u64 rpage, cqx_fec, h_ret;
+	int ipz_rc, ret, i;
+	unsigned long flags;
+
+	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
+		return ERR_PTR(-EINVAL);
+
+	my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL);
+	if (!my_cq) {
+		ehca_err(device, "Out of memory for ehca_cq struct device=%p",
+			 device);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(my_cq, 0, sizeof(struct ehca_cq));
+	memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
+
+	spin_lock_init(&my_cq->spinlock);
+	spin_lock_init(&my_cq->cb_lock);
+	spin_lock_init(&my_cq->task_lock);
+	my_cq->ownpid = current->tgid;
+
+	cq = &my_cq->ib_cq;
+
+	adapter_handle = shca->ipz_hca_handle;
+	param.eq_handle = shca->eq.ipz_eq_handle;
+
+	do {
+		if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) {
+			cq = ERR_PTR(-ENOMEM);
+			ehca_err(device, "Can't reserve idr nr. device=%p",
+				 device);
+			goto create_cq_exit1;
+		}
+
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	} while (ret == -EAGAIN);
+
+	if (ret) {
+		cq = ERR_PTR(-ENOMEM);
+		ehca_err(device, "Can't allocate new idr entry. device=%p",
+			 device);
+		goto create_cq_exit1;
+	}
+
+	/*
+	 * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
+	 * for receiving errors CQEs.
+	 */
+	param.nr_cqe = cqe + additional_cqe;
+	h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
+
+	if (h_ret != H_SUCCESS) {
+		ehca_err(device, "hipz_h_alloc_resource_cq() failed "
+			 "h_ret=%lx device=%p", h_ret, device);
+		cq = ERR_PTR(ehca2ib_return_code(h_ret));
+		goto create_cq_exit2;
+	}
+
+	ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages,
+				EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0);
+	if (!ipz_rc) {
+		ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
+			 ipz_rc, device);
+		cq = ERR_PTR(-EINVAL);
+		goto create_cq_exit3;
+	}
+
+	for (counter = 0; counter < param.act_pages; counter++) {
+		vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+		if (!vpage) {
+			ehca_err(device, "ipz_qpageit_get_inc() "
+				 "returns NULL device=%p", device);
+			cq = ERR_PTR(-EAGAIN);
+			goto create_cq_exit4;
+		}
+		rpage = virt_to_abs(vpage);
+
+		h_ret = hipz_h_register_rpage_cq(adapter_handle,
+						 my_cq->ipz_cq_handle,
+						 &my_cq->pf,
+						 0,
+						 0,
+						 rpage,
+						 1,
+						 my_cq->galpas.
+						 kernel);
+
+		if (h_ret < H_SUCCESS) {
+			ehca_err(device, "hipz_h_register_rpage_cq() failed "
+				 "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i "
+				 "act_pages=%i", my_cq, my_cq->cq_number,
+				 h_ret, counter, param.act_pages);
+			cq = ERR_PTR(-EINVAL);
+			goto create_cq_exit4;
+		}
+
+		if (counter == (param.act_pages - 1)) {
+			vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+			if ((h_ret != H_SUCCESS) || vpage) {
+				ehca_err(device, "Registration of pages not "
+					 "complete ehca_cq=%p cq_num=%x "
+					 "h_ret=%lx", my_cq, my_cq->cq_number,
+					 h_ret);
+				cq = ERR_PTR(-EAGAIN);
+				goto create_cq_exit4;
+			}
+		} else {
+			if (h_ret != H_PAGE_REGISTERED) {
+				ehca_err(device, "Registration of page failed "
+					 "ehca_cq=%p cq_num=%x h_ret=%lx"
+					 "counter=%i act_pages=%i",
+					 my_cq, my_cq->cq_number,
+					 h_ret, counter, param.act_pages);
+				cq = ERR_PTR(-ENOMEM);
+				goto create_cq_exit4;
+			}
+		}
+	}
+
+	ipz_qeit_reset(&my_cq->ipz_queue);
+
+	gal = my_cq->galpas.kernel;
+	cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
+	ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx",
+		 my_cq, my_cq->cq_number, cqx_fec);
+
+	my_cq->ib_cq.cqe = my_cq->nr_of_entries =
+		param.act_nr_of_entries - additional_cqe;
+	my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
+
+	for (i = 0; i < QP_HASHTAB_LEN; i++)
+		INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
+
+	if (context) {
+		struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
+		struct ehca_create_cq_resp resp;
+		struct vm_area_struct *vma;
+		memset(&resp, 0, sizeof(resp));
+		resp.cq_number = my_cq->cq_number;
+		resp.token = my_cq->token;
+		resp.ipz_queue.qe_size = ipz_queue->qe_size;
+		resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
+		resp.ipz_queue.queue_length = ipz_queue->queue_length;
+		resp.ipz_queue.pagesize = ipz_queue->pagesize;
+		resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
+		ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000,
+				       ipz_queue->queue_length,
+				       (void**)&resp.ipz_queue.queue,
+				       &vma);
+		if (ret) {
+			ehca_err(device, "Could not mmap queue pages");
+			cq = ERR_PTR(ret);
+			goto create_cq_exit4;
+		}
+		my_cq->uspace_queue = resp.ipz_queue.queue;
+		resp.galpas = my_cq->galpas;
+		ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
+					 (void**)&resp.galpas.kernel.fw_handle,
+					 &vma);
+		if (ret) {
+			ehca_err(device, "Could not mmap fw_handle");
+			cq = ERR_PTR(ret);
+			goto create_cq_exit5;
+		}
+		my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
+		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+			ehca_err(device, "Copy to udata failed.");
+			goto create_cq_exit6;
+		}
+	}
+
+	return cq;
+
+create_cq_exit6:
+	ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
+
+create_cq_exit5:
+	ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
+
+create_cq_exit4:
+	ipz_queue_dtor(&my_cq->ipz_queue);
+
+create_cq_exit3:
+	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+	if (h_ret != H_SUCCESS)
+		ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
+			 "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
+
+create_cq_exit2:
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	idr_remove(&ehca_cq_idr, my_cq->token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+create_cq_exit1:
+	kmem_cache_free(cq_cache, my_cq);
+
+	return cq;
+}
+
+int ehca_destroy_cq(struct ib_cq *cq)
+{
+	u64 h_ret;
+	int ret;
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	int cq_num = my_cq->cq_number;
+	struct ib_device *device = cq->device;
+	struct ehca_shca *shca = container_of(device, struct ehca_shca,
+					      ib_device);
+	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+	u32 cur_pid = current->tgid;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	while (my_cq->nr_callbacks)
+		yield();
+
+	idr_remove(&ehca_cq_idr, my_cq->token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+		ehca_err(device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_cq->ownpid);
+		return -EINVAL;
+	}
+
+	/* un-mmap if vma alloc */
+	if (my_cq->uspace_queue ) {
+		ret = ehca_munmap(my_cq->uspace_queue,
+				  my_cq->ipz_queue.queue_length);
+		if (ret)
+			ehca_err(device, "Could not munmap queue ehca_cq=%p "
+				 "cq_num=%x", my_cq, cq_num);
+		ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
+		if (ret)
+			ehca_err(device, "Could not munmap fwh ehca_cq=%p "
+				 "cq_num=%x", my_cq, cq_num);
+	}
+
+	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
+	if (h_ret == H_R_STATE) {
+		/* cq in err: read err data and destroy it forcibly */
+		ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err "
+			 "state. Try to delete it forcibly.",
+			 my_cq, cq_num, my_cq->ipz_cq_handle.handle);
+		ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
+		h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+		if (h_ret == H_SUCCESS)
+			ehca_dbg(device, "cq_num=%x deleted successfully.",
+				 cq_num);
+	}
+	if (h_ret != H_SUCCESS) {
+		ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx "
+			 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
+		return ehca2ib_return_code(h_ret);
+	}
+	ipz_queue_dtor(&my_cq->ipz_queue);
+	kmem_cache_free(cq_cache, my_cq);
+
+	return 0;
+}
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	u32 cur_pid = current->tgid;
+
+	if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+		ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_cq->ownpid);
+		return -EINVAL;
+	}
+
+	/* TODO: proper resize needs to be done */
+	ehca_err(cq->device, "not implemented yet");
+
+	return -EFAULT;
+}
+
+int ehca_init_cq_cache(void)
+{
+	cq_cache = kmem_cache_create("ehca_cache_cq",
+				     sizeof(struct ehca_cq), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!cq_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_cq_cache(void)
+{
+	if (cq_cache)
+		kmem_cache_destroy(cq_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
new file mode 100644
index 0000000000000..5281dec66f127
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -0,0 +1,185 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Event queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_qes.h"
+#include "hcp_if.h"
+#include "ipz_pt_fn.h"
+
+int ehca_create_eq(struct ehca_shca *shca,
+		   struct ehca_eq *eq,
+		   const enum ehca_eq_type type, const u32 length)
+{
+	u64 ret;
+	u32 nr_pages;
+	u32 i;
+	void *vpage;
+	struct ib_device *ib_dev = &shca->ib_device;
+
+	spin_lock_init(&eq->spinlock);
+	eq->is_initialized = 0;
+
+	if (type != EHCA_EQ && type != EHCA_NEQ) {
+		ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
+		return -EINVAL;
+	}
+	if (!length) {
+		ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
+		return -EINVAL;
+	}
+
+	ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
+				       &eq->pf,
+				       type,
+				       length,
+				       &eq->ipz_eq_handle,
+				       &eq->length,
+				       &nr_pages, &eq->ist);
+
+	if (ret != H_SUCCESS) {
+		ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
+		return -EINVAL;
+	}
+
+	ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages,
+			     EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0);
+	if (!ret) {
+		ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
+		goto create_eq_exit1;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		u64 rpage;
+
+		if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) {
+			ret = H_RESOURCE;
+			goto create_eq_exit2;
+		}
+
+		rpage = virt_to_abs(vpage);
+		ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
+					       eq->ipz_eq_handle,
+					       &eq->pf,
+					       0, 0, rpage, 1);
+
+		if (i == (nr_pages - 1)) {
+			/* last page */
+			vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
+			if (ret != H_SUCCESS || vpage)
+				goto create_eq_exit2;
+		} else {
+			if (ret != H_PAGE_REGISTERED || !vpage)
+				goto create_eq_exit2;
+		}
+	}
+
+	ipz_qeit_reset(&eq->ipz_queue);
+
+	/* register interrupt handlers and initialize work queues */
+	if (type == EHCA_EQ) {
+		ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq,
+					  SA_INTERRUPT, "ehca_eq",
+					  (void *)shca);
+		if (ret < 0)
+			ehca_err(ib_dev, "Can't map interrupt handler.");
+
+		tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
+	} else if (type == EHCA_NEQ) {
+		ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq,
+					  SA_INTERRUPT, "ehca_neq",
+					  (void *)shca);
+		if (ret < 0)
+			ehca_err(ib_dev, "Can't map interrupt handler.");
+
+		tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
+	}
+
+	eq->is_initialized = 1;
+
+	return 0;
+
+create_eq_exit2:
+	ipz_queue_dtor(&eq->ipz_queue);
+
+create_eq_exit1:
+	hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+	return -EINVAL;
+}
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+	unsigned long flags;
+	void *eqe;
+
+	spin_lock_irqsave(&eq->spinlock, flags);
+	eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
+	spin_unlock_irqrestore(&eq->spinlock, flags);
+
+	return eqe;
+}
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+	unsigned long flags;
+	u64 h_ret;
+
+	spin_lock_irqsave(&eq->spinlock, flags);
+	ibmebus_free_irq(NULL, eq->ist, (void *)shca);
+
+	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+	spin_unlock_irqrestore(&eq->spinlock, flags);
+
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't free EQ resources.");
+		return -EINVAL;
+	}
+	ipz_queue_dtor(&eq->ipz_queue);
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
new file mode 100644
index 0000000000000..5eae6ac484259
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -0,0 +1,241 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HCA query functions
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query device properties");
+		ret = -EINVAL;
+		goto query_device1;
+	}
+
+	memset(props, 0, sizeof(struct ib_device_attr));
+	props->fw_ver          = rblock->hw_ver;
+	props->max_mr_size     = rblock->max_mr_size;
+	props->vendor_id       = rblock->vendor_id >> 8;
+	props->vendor_part_id  = rblock->vendor_part_id >> 16;
+	props->hw_ver          = rblock->hw_ver;
+	props->max_qp          = min_t(int, rblock->max_qp, INT_MAX);
+	props->max_qp_wr       = min_t(int, rblock->max_wqes_wq, INT_MAX);
+	props->max_sge         = min_t(int, rblock->max_sge, INT_MAX);
+	props->max_sge_rd      = min_t(int, rblock->max_sge_rd, INT_MAX);
+	props->max_cq          = min_t(int, rblock->max_cq, INT_MAX);
+	props->max_cqe         = min_t(int, rblock->max_cqe, INT_MAX);
+	props->max_mr          = min_t(int, rblock->max_mr, INT_MAX);
+	props->max_mw          = min_t(int, rblock->max_mw, INT_MAX);
+	props->max_pd          = min_t(int, rblock->max_pd, INT_MAX);
+	props->max_ah          = min_t(int, rblock->max_ah, INT_MAX);
+	props->max_fmr         = min_t(int, rblock->max_mr, INT_MAX);
+	props->max_srq         = 0;
+	props->max_srq_wr      = 0;
+	props->max_srq_sge     = 0;
+	props->max_pkeys       = 16;
+	props->local_ca_ack_delay
+		= rblock->local_ca_ack_delay;
+	props->max_raw_ipv6_qp
+		= min_t(int, rblock->max_raw_ipv6_qp, INT_MAX);
+	props->max_raw_ethy_qp
+		= min_t(int, rblock->max_raw_ethy_qp, INT_MAX);
+	props->max_mcast_grp
+		= min_t(int, rblock->max_mcast_grp, INT_MAX);
+	props->max_mcast_qp_attach
+		= min_t(int, rblock->max_mcast_qp_attach, INT_MAX);
+	props->max_total_mcast_qp_attach
+		= min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
+
+query_device1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_port(struct ib_device *ibdev,
+		    u8 port, struct ib_port_attr *props)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_port *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_port1;
+	}
+
+	memset(props, 0, sizeof(struct ib_port_attr));
+	props->state = rblock->state;
+
+	switch (rblock->max_mtu) {
+	case 0x1:
+		props->active_mtu = props->max_mtu = IB_MTU_256;
+		break;
+	case 0x2:
+		props->active_mtu = props->max_mtu = IB_MTU_512;
+		break;
+	case 0x3:
+		props->active_mtu = props->max_mtu = IB_MTU_1024;
+		break;
+	case 0x4:
+		props->active_mtu = props->max_mtu = IB_MTU_2048;
+		break;
+	case 0x5:
+		props->active_mtu = props->max_mtu = IB_MTU_4096;
+		break;
+	default:
+		ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
+			 rblock->max_mtu);
+		break;
+	}
+
+	props->gid_tbl_len     = rblock->gid_tbl_len;
+	props->max_msg_sz      = rblock->max_msg_sz;
+	props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
+	props->qkey_viol_cntr  = rblock->qkey_viol_cntr;
+	props->pkey_tbl_len    = rblock->pkey_tbl_len;
+	props->lid             = rblock->lid;
+	props->sm_lid          = rblock->sm_lid;
+	props->lmc             = rblock->lmc;
+	props->sm_sl           = rblock->sm_sl;
+	props->subnet_timeout  = rblock->subnet_timeout;
+	props->init_type_reply = rblock->init_type_reply;
+
+	props->active_width    = IB_WIDTH_12X;
+	props->active_speed    = 0x1;
+
+query_port1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
+	struct hipz_query_port *rblock;
+
+	if (index > 16) {
+		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+		return -EINVAL;
+	}
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_pkey1;
+	}
+
+	memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
+
+query_pkey1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port,
+		   int index, union ib_gid *gid)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_port *rblock;
+
+	if (index > 255) {
+		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+		return -EINVAL;
+	}
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_gid1;
+	}
+
+	memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
+	memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
+
+query_gid1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_modify_port(struct ib_device *ibdev,
+		     u8 port, int port_modify_mask,
+		     struct ib_port_modify *props)
+{
+	/* Not implemented yet */
+	return -EFAULT;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
new file mode 100644
index 0000000000000..2a65b5be19790
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -0,0 +1,762 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Functions for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM(1,1)
+#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM(8,31)
+#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM(2,7)
+#define EQE_CQ_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32,63)
+#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32,63)
+
+#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM(1,1)
+#define NEQE_EVENT_CODE        EHCA_BMASK_IBM(2,7)
+#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM(8,15)
+#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
+
+#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
+#define ERROR_DATA_TYPE        EHCA_BMASK_IBM(0,7)
+
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+
+static void queue_comp_task(struct ehca_cq *__cq);
+
+static struct ehca_comp_pool* pool;
+static struct notifier_block comp_pool_callback_nb;
+
+#endif
+
+static inline void comp_event_callback(struct ehca_cq *cq)
+{
+	if (!cq->ib_cq.comp_handler)
+		return;
+
+	spin_lock(&cq->cb_lock);
+	cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
+	spin_unlock(&cq->cb_lock);
+
+	return;
+}
+
+static void print_error_data(struct ehca_shca * shca, void* data,
+			     u64* rblock, int length)
+{
+	u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
+	u64 resource = rblock[1];
+
+	switch (type) {
+	case 0x1: /* Queue Pair */
+	{
+		struct ehca_qp *qp = (struct ehca_qp*)data;
+
+		/* only print error data if AER is set */
+		if (rblock[6] == 0)
+			return;
+
+		ehca_err(&shca->ib_device,
+			 "QP 0x%x (resource=%lx) has errors.",
+			 qp->ib_qp.qp_num, resource);
+		break;
+	}
+	case 0x4: /* Completion Queue */
+	{
+		struct ehca_cq *cq = (struct ehca_cq*)data;
+
+		ehca_err(&shca->ib_device,
+			 "CQ 0x%x (resource=%lx) has errors.",
+			 cq->cq_number, resource);
+		break;
+	}
+	default:
+		ehca_err(&shca->ib_device,
+			 "Unknown errror type: %lx on %s.",
+			 type, shca->ib_device.name);
+		break;
+	}
+
+	ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
+	ehca_err(&shca->ib_device, "EHCA ----- error data begin "
+		 "---------------------------------------------------");
+	ehca_dmp(rblock, length, "resource=%lx", resource);
+	ehca_err(&shca->ib_device, "EHCA ----- error data end "
+		 "----------------------------------------------------");
+
+	return;
+}
+
+int ehca_error_data(struct ehca_shca *shca, void *data,
+		    u64 resource)
+{
+
+	unsigned long ret;
+	u64 *rblock;
+	unsigned long block_count;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
+		ret = -ENOMEM;
+		goto error_data1;
+	}
+
+	ret = hipz_h_error_data(shca->ipz_hca_handle,
+				resource,
+				rblock,
+				&block_count);
+
+	if (ret == H_R_STATE) {
+		ehca_err(&shca->ib_device,
+			 "No error data is available: %lx.", resource);
+	}
+	else if (ret == H_SUCCESS) {
+		int length;
+
+		length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
+
+		if (length > PAGE_SIZE)
+			length = PAGE_SIZE;
+
+		print_error_data(shca, data, rblock, length);
+	}
+	else {
+		ehca_err(&shca->ib_device,
+			 "Error data could not be fetched: %lx", resource);
+	}
+
+	kfree(rblock);
+
+error_data1:
+	return ret;
+
+}
+
+static void qp_event_callback(struct ehca_shca *shca,
+			      u64 eqe,
+			      enum ib_event_type event_type)
+{
+	struct ib_event event;
+	struct ehca_qp *qp;
+	unsigned long flags;
+	u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
+
+	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	qp = idr_find(&ehca_qp_idr, token);
+	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+
+	if (!qp)
+		return;
+
+	ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
+
+	if (!qp->ib_qp.event_handler)
+		return;
+
+	event.device     = &shca->ib_device;
+	event.event      = event_type;
+	event.element.qp = &qp->ib_qp;
+
+	qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+
+	return;
+}
+
+static void cq_event_callback(struct ehca_shca *shca,
+					  u64 eqe)
+{
+	struct ehca_cq *cq;
+	unsigned long flags;
+	u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
+
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	cq = idr_find(&ehca_cq_idr, token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	if (!cq)
+		return;
+
+	ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
+
+	return;
+}
+
+static void parse_identifier(struct ehca_shca *shca, u64 eqe)
+{
+	u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
+
+	switch (identifier) {
+	case 0x02: /* path migrated */
+		qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
+		break;
+	case 0x03: /* communication established */
+		qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
+		break;
+	case 0x04: /* send queue drained */
+		qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
+		break;
+	case 0x05: /* QP error */
+	case 0x06: /* QP error */
+		qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
+		break;
+	case 0x07: /* CQ error */
+	case 0x08: /* CQ error */
+		cq_event_callback(shca, eqe);
+		break;
+	case 0x09: /* MRMWPTE error */
+		ehca_err(&shca->ib_device, "MRMWPTE error.");
+		break;
+	case 0x0A: /* port event */
+		ehca_err(&shca->ib_device, "Port event.");
+		break;
+	case 0x0B: /* MR access error */
+		ehca_err(&shca->ib_device, "MR access error.");
+		break;
+	case 0x0C: /* EQ error */
+		ehca_err(&shca->ib_device, "EQ error.");
+		break;
+	case 0x0D: /* P/Q_Key mismatch */
+		ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
+		break;
+	case 0x10: /* sampling complete */
+		ehca_err(&shca->ib_device, "Sampling complete.");
+		break;
+	case 0x11: /* unaffiliated access error */
+		ehca_err(&shca->ib_device, "Unaffiliated access error.");
+		break;
+	case 0x12: /* path migrating error */
+		ehca_err(&shca->ib_device, "Path migration error.");
+		break;
+	case 0x13: /* interface trace stopped */
+		ehca_err(&shca->ib_device, "Interface trace stopped.");
+		break;
+	case 0x14: /* first error capture info available */
+	default:
+		ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
+			 identifier, shca->ib_device.name);
+		break;
+	}
+
+	return;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
+	struct ib_event event;
+	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+
+	switch (ec) {
+	case 0x30: /* port availability change */
+		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
+			ehca_info(&shca->ib_device,
+				  "port %x is active.", port);
+			event.device = &shca->ib_device;
+			event.event = IB_EVENT_PORT_ACTIVE;
+			event.element.port_num = port;
+			shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+			ib_dispatch_event(&event);
+		} else {
+			ehca_info(&shca->ib_device,
+				  "port %x is inactive.", port);
+			event.device = &shca->ib_device;
+			event.event = IB_EVENT_PORT_ERR;
+			event.element.port_num = port;
+			shca->sport[port - 1].port_state = IB_PORT_DOWN;
+			ib_dispatch_event(&event);
+		}
+		break;
+	case 0x31:
+		/* port configuration change
+		 * disruptive change is caused by
+		 * LID, PKEY or SM change
+		 */
+		ehca_warn(&shca->ib_device,
+			  "disruptive port %x configuration change", port);
+
+		ehca_info(&shca->ib_device,
+			 "port %x is inactive.", port);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ERR;
+		event.element.port_num = port;
+		shca->sport[port - 1].port_state = IB_PORT_DOWN;
+		ib_dispatch_event(&event);
+
+		ehca_info(&shca->ib_device,
+			 "port %x is active.", port);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ACTIVE;
+		event.element.port_num = port;
+		shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+		ib_dispatch_event(&event);
+		break;
+	case 0x32: /* adapter malfunction */
+		ehca_err(&shca->ib_device, "Adapter malfunction.");
+		break;
+	case 0x33:  /* trace stopped */
+		ehca_err(&shca->ib_device, "Traced stopped.");
+		break;
+	default:
+		ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
+			 ec, shca->ib_device.name);
+		break;
+	}
+
+	return;
+}
+
+static inline void reset_eq_pending(struct ehca_cq *cq)
+{
+	u64 CQx_EP;
+	struct h_galpa gal = cq->galpas.kernel;
+
+	hipz_galpa_store_cq(gal, cqx_ep, 0x0);
+	CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
+
+	return;
+}
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+	tasklet_hi_schedule(&shca->neq.interrupt_task);
+
+	return IRQ_HANDLED;
+}
+
+void ehca_tasklet_neq(unsigned long data)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)data;
+	struct ehca_eqe *eqe;
+	u64 ret;
+
+	eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+
+	while (eqe) {
+		if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
+			parse_ec(shca, eqe->entry);
+
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+	}
+
+	ret = hipz_h_reset_event(shca->ipz_hca_handle,
+				 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
+
+	if (ret != H_SUCCESS)
+		ehca_err(&shca->ib_device, "Can't clear notification events.");
+
+	return;
+}
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+	tasklet_hi_schedule(&shca->eq.interrupt_task);
+
+	return IRQ_HANDLED;
+}
+
+void ehca_tasklet_eq(unsigned long data)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)data;
+	struct ehca_eqe *eqe;
+	int int_state;
+	int query_cnt = 0;
+
+	do {
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+		if ((shca->hw_level >= 2) && eqe)
+			int_state = 1;
+		else
+			int_state = 0;
+
+		while ((int_state == 1) || eqe) {
+			while (eqe) {
+				u64 eqe_value = eqe->entry;
+
+				ehca_dbg(&shca->ib_device,
+					 "eqe_value=%lx", eqe_value);
+
+				/* TODO: better structure */
+				if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
+						   eqe_value)) {
+					unsigned long flags;
+					u32 token;
+					struct ehca_cq *cq;
+
+					ehca_dbg(&shca->ib_device,
+						 "... completion event");
+					token =
+						EHCA_BMASK_GET(EQE_CQ_TOKEN,
+							       eqe_value);
+					spin_lock_irqsave(&ehca_cq_idr_lock,
+							  flags);
+					cq = idr_find(&ehca_cq_idr, token);
+
+					if (cq == NULL) {
+						spin_unlock(&ehca_cq_idr_lock);
+						break;
+					}
+
+					reset_eq_pending(cq);
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+					queue_comp_task(cq);
+					spin_unlock_irqrestore(&ehca_cq_idr_lock,
+							       flags);
+#else
+					spin_unlock_irqrestore(&ehca_cq_idr_lock,
+							       flags);
+					comp_event_callback(cq);
+#endif
+				} else {
+					ehca_dbg(&shca->ib_device,
+						 "... non completion event");
+					parse_identifier(shca, eqe_value);
+				}
+				eqe =
+					(struct ehca_eqe *)ehca_poll_eq(shca,
+								    &shca->eq);
+			}
+
+			if (shca->hw_level >= 2) {
+				int_state =
+				    hipz_h_query_int_state(shca->ipz_hca_handle,
+							   shca->eq.ist);
+				query_cnt++;
+				iosync();
+				if (query_cnt >= 100) {
+					query_cnt = 0;
+					int_state = 0;
+				}
+			}
+			eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+		}
+	} while (int_state != 0);
+
+	return;
+}
+
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+
+static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
+{
+	unsigned long flags_last_cpu;
+
+	if (ehca_debug_level)
+		ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
+
+	spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
+	pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
+	if (pool->last_cpu == NR_CPUS)
+		pool->last_cpu = first_cpu(cpu_online_map);
+	spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+
+	return pool->last_cpu;
+}
+
+static void __queue_comp_task(struct ehca_cq *__cq,
+			      struct ehca_cpu_comp_task *cct)
+{
+	unsigned long flags_cct;
+	unsigned long flags_cq;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+	spin_lock_irqsave(&__cq->task_lock, flags_cq);
+
+	if (__cq->nr_callbacks == 0) {
+		__cq->nr_callbacks++;
+		list_add_tail(&__cq->entry, &cct->cq_list);
+		cct->cq_jobs++;
+		wake_up(&cct->wait_queue);
+	}
+	else
+		__cq->nr_callbacks++;
+
+	spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+}
+
+static void queue_comp_task(struct ehca_cq *__cq)
+{
+	int cpu;
+	int cpu_id;
+	struct ehca_cpu_comp_task *cct;
+
+	cpu = get_cpu();
+	cpu_id = find_next_online_cpu(pool);
+
+	BUG_ON(!cpu_online(cpu_id));
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+
+	if (cct->cq_jobs > 0) {
+		cpu_id = find_next_online_cpu(pool);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+	}
+
+	__queue_comp_task(__cq, cct);
+
+	put_cpu();
+
+	return;
+}
+
+static void run_comp_task(struct ehca_cpu_comp_task* cct)
+{
+	struct ehca_cq *cq;
+	unsigned long flags_cct;
+	unsigned long flags_cq;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	while (!list_empty(&cct->cq_list)) {
+		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+		spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+		comp_event_callback(cq);
+		spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+		spin_lock_irqsave(&cq->task_lock, flags_cq);
+		cq->nr_callbacks--;
+		if (cq->nr_callbacks == 0) {
+			list_del_init(cct->cq_list.next);
+			cct->cq_jobs--;
+		}
+		spin_unlock_irqrestore(&cq->task_lock, flags_cq);
+
+	}
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+	return;
+}
+
+static int comp_task(void *__cct)
+{
+	struct ehca_cpu_comp_task* cct = __cct;
+	DECLARE_WAITQUEUE(wait, current);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	while(!kthread_should_stop()) {
+		add_wait_queue(&cct->wait_queue, &wait);
+
+		if (list_empty(&cct->cq_list))
+			schedule();
+		else
+			__set_current_state(TASK_RUNNING);
+
+		remove_wait_queue(&cct->wait_queue, &wait);
+
+		if (!list_empty(&cct->cq_list))
+			run_comp_task(__cct);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
+					    int cpu)
+{
+	struct ehca_cpu_comp_task *cct;
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+	spin_lock_init(&cct->task_lock);
+	INIT_LIST_HEAD(&cct->cq_list);
+	init_waitqueue_head(&cct->wait_queue);
+	cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
+
+	return cct->task;
+}
+
+static void destroy_comp_task(struct ehca_comp_pool *pool,
+			      int cpu)
+{
+	struct ehca_cpu_comp_task *cct;
+	struct task_struct *task;
+	unsigned long flags_cct;
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	task = cct->task;
+	cct->task = NULL;
+	cct->cq_jobs = 0;
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+	if (task)
+		kthread_stop(task);
+
+	return;
+}
+
+static void take_over_work(struct ehca_comp_pool *pool,
+			   int cpu)
+{
+	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+	LIST_HEAD(list);
+	struct ehca_cq *cq;
+	unsigned long flags_cct;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	list_splice_init(&cct->cq_list, &list);
+
+	while(!list_empty(&list)) {
+	       cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+
+	       list_del(&cq->entry);
+	       __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
+						 smp_processor_id()));
+	}
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+}
+
+static int comp_pool_callback(struct notifier_block *nfb,
+			      unsigned long action,
+			      void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct ehca_cpu_comp_task *cct;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
+		if(!create_comp_task(pool, cpu)) {
+			ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
+			return NOTIFY_BAD;
+		}
+		break;
+	case CPU_UP_CANCELED:
+		ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+		kthread_bind(cct->task, any_online_cpu(cpu_online_map));
+		destroy_comp_task(pool, cpu);
+		break;
+	case CPU_ONLINE:
+		ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+		kthread_bind(cct->task, cpu);
+		wake_up_process(cct->task);
+		break;
+	case CPU_DOWN_PREPARE:
+		ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
+		break;
+	case CPU_DOWN_FAILED:
+		ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
+		break;
+	case CPU_DEAD:
+		ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
+		destroy_comp_task(pool, cpu);
+		take_over_work(pool, cpu);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+#endif
+
+int ehca_create_comp_pool(void)
+{
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+	int cpu;
+	struct task_struct *task;
+
+	pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
+	if (pool == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&pool->last_cpu_lock);
+	pool->last_cpu = any_online_cpu(cpu_online_map);
+
+	pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
+	if (pool->cpu_comp_tasks == NULL) {
+		kfree(pool);
+		return -EINVAL;
+	}
+
+	for_each_online_cpu(cpu) {
+		task = create_comp_task(pool, cpu);
+		if (task) {
+			kthread_bind(task, cpu);
+			wake_up_process(task);
+		}
+	}
+
+	comp_pool_callback_nb.notifier_call = comp_pool_callback;
+	comp_pool_callback_nb.priority =0;
+	register_cpu_notifier(&comp_pool_callback_nb);
+#endif
+
+	return 0;
+}
+
+void ehca_destroy_comp_pool(void)
+{
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+	int i;
+
+	unregister_cpu_notifier(&comp_pool_callback_nb);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (cpu_online(i))
+			destroy_comp_task(pool, i);
+	}
+#endif
+
+	return;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
new file mode 100644
index 0000000000000..85bf1fe16fe41
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -0,0 +1,77 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Function definitions and structs for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IRQ_H
+#define __EHCA_IRQ_H
+
+
+struct ehca_shca;
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <asm/atomic.h>
+
+int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs);
+void ehca_tasklet_neq(unsigned long data);
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs);
+void ehca_tasklet_eq(unsigned long data);
+
+struct ehca_cpu_comp_task {
+	wait_queue_head_t wait_queue;
+	struct list_head cq_list;
+	struct task_struct *task;
+	spinlock_t task_lock;
+	int cq_jobs;
+};
+
+struct ehca_comp_pool {
+	struct ehca_cpu_comp_task *cpu_comp_tasks;
+	int last_cpu;
+	spinlock_t last_cpu_lock;
+};
+
+int ehca_create_comp_pool(void);
+void ehca_destroy_comp_pool(void);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
new file mode 100644
index 0000000000000..bbdc437f51673
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -0,0 +1,181 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Function definitions for internal functions
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Dietmar Decker <ddecker@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IVERBS_H__
+#define __EHCA_IVERBS_H__
+
+#include "ehca_classes.h"
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
+
+int ehca_query_port(struct ib_device *ibdev, u8 port,
+		    struct ib_port_attr *props);
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
+		   union ib_gid *gid);
+
+int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
+		     struct ib_port_modify *props);
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+			    struct ib_ucontext *context,
+			    struct ib_udata *udata);
+
+int ehca_dealloc_pd(struct ib_pd *pd);
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_destroy_ah(struct ib_ah *ah);
+
+struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
+			       struct ib_phys_buf *phys_buf_array,
+			       int num_phys_buf,
+			       int mr_access_flags, u64 *iova_start);
+
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
+			       struct ib_umem *region,
+			       int mr_access_flags, struct ib_udata *udata);
+
+int ehca_rereg_phys_mr(struct ib_mr *mr,
+		       int mr_rereg_mask,
+		       struct ib_pd *pd,
+		       struct ib_phys_buf *phys_buf_array,
+		       int num_phys_buf, int mr_access_flags, u64 *iova_start);
+
+int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
+
+int ehca_dereg_mr(struct ib_mr *mr);
+
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
+
+int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+		 struct ib_mw_bind *mw_bind);
+
+int ehca_dealloc_mw(struct ib_mw *mw);
+
+struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
+			      int mr_access_flags,
+			      struct ib_fmr_attr *fmr_attr);
+
+int ehca_map_phys_fmr(struct ib_fmr *fmr,
+		      u64 *page_list, int list_len, u64 iova);
+
+int ehca_unmap_fmr(struct list_head *fmr_list);
+
+int ehca_dealloc_fmr(struct ib_fmr *fmr);
+
+enum ehca_eq_type {
+	EHCA_EQ = 0, /* Event Queue              */
+	EHCA_NEQ     /* Notification Event Queue */
+};
+
+int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
+		   enum ehca_eq_type type, const u32 length);
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+			     struct ib_ucontext *context,
+			     struct ib_udata *udata);
+
+int ehca_destroy_cq(struct ib_cq *cq);
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+
+int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify);
+
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+			     struct ib_qp_init_attr *init_attr,
+			     struct ib_udata *udata);
+
+int ehca_destroy_qp(struct ib_qp *qp);
+
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
+
+int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
+		   struct ib_send_wr **bad_send_wr);
+
+int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
+		   struct ib_recv_wr **bad_recv_wr);
+
+u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
+		    struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+					struct ib_udata *udata);
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context);
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+void ehca_poll_eqs(unsigned long data);
+
+int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped,
+		     struct vm_area_struct **vma);
+
+int ehca_mmap_register(u64 physical,void **mapped,
+		       struct vm_area_struct **vma);
+
+int ehca_munmap(unsigned long addr, size_t len);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
new file mode 100644
index 0000000000000..2a99f2d13cdbf
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -0,0 +1,818 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  module start stop, hca detection
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
+MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
+MODULE_VERSION("SVNEHCA_0016");
+
+int ehca_open_aqp1     = 0;
+int ehca_debug_level   = 0;
+int ehca_hw_level      = 0;
+int ehca_nr_ports      = 2;
+int ehca_use_hp_mr     = 0;
+int ehca_port_act_time = 30;
+int ehca_poll_all_eqs  = 1;
+int ehca_static_rate   = -1;
+
+module_param_named(open_aqp1,     ehca_open_aqp1,     int, 0);
+module_param_named(debug_level,   ehca_debug_level,   int, 0);
+module_param_named(hw_level,      ehca_hw_level,      int, 0);
+module_param_named(nr_ports,      ehca_nr_ports,      int, 0);
+module_param_named(use_hp_mr,     ehca_use_hp_mr,     int, 0);
+module_param_named(port_act_time, ehca_port_act_time, int, 0);
+module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, 0);
+module_param_named(static_rate,   ehca_static_rate,   int, 0);
+
+MODULE_PARM_DESC(open_aqp1,
+		 "AQP1 on startup (0: no (default), 1: yes)");
+MODULE_PARM_DESC(debug_level,
+		 "debug level"
+		 " (0: no debug traces (default), 1: with debug traces)");
+MODULE_PARM_DESC(hw_level,
+		 "hardware level"
+		 " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
+MODULE_PARM_DESC(nr_ports,
+		 "number of connected ports (default: 2)");
+MODULE_PARM_DESC(use_hp_mr,
+		 "high performance MRs (0: no (default), 1: yes)");
+MODULE_PARM_DESC(port_act_time,
+		 "time to wait for port activation (default: 30 sec)");
+MODULE_PARM_DESC(poll_all_eqs,
+		 "polls all event queues periodically"
+		 " (0: no, 1: yes (default))");
+MODULE_PARM_DESC(static_rate,
+		 "set permanent static rate (default: disabled)");
+
+spinlock_t ehca_qp_idr_lock;
+spinlock_t ehca_cq_idr_lock;
+DEFINE_IDR(ehca_qp_idr);
+DEFINE_IDR(ehca_cq_idr);
+
+static struct list_head shca_list; /* list of all registered ehcas */
+static spinlock_t shca_list_lock;
+
+static struct timer_list poll_eqs_timer;
+
+static int ehca_create_slab_caches(void)
+{
+	int ret;
+
+	ret = ehca_init_pd_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create PD SLAB cache.");
+		return ret;
+	}
+
+	ret = ehca_init_cq_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create CQ SLAB cache.");
+		goto create_slab_caches2;
+	}
+
+	ret = ehca_init_qp_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create QP SLAB cache.");
+		goto create_slab_caches3;
+	}
+
+	ret = ehca_init_av_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create AV SLAB cache.");
+		goto create_slab_caches4;
+	}
+
+	ret = ehca_init_mrmw_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create MR&MW SLAB cache.");
+		goto create_slab_caches5;
+	}
+
+	return 0;
+
+create_slab_caches5:
+	ehca_cleanup_av_cache();
+
+create_slab_caches4:
+	ehca_cleanup_qp_cache();
+
+create_slab_caches3:
+	ehca_cleanup_cq_cache();
+
+create_slab_caches2:
+	ehca_cleanup_pd_cache();
+
+	return ret;
+}
+
+static void ehca_destroy_slab_caches(void)
+{
+	ehca_cleanup_mrmw_cache();
+	ehca_cleanup_av_cache();
+	ehca_cleanup_qp_cache();
+	ehca_cleanup_cq_cache();
+	ehca_cleanup_pd_cache();
+}
+
+#define EHCA_HCAAVER  EHCA_BMASK_IBM(32,39)
+#define EHCA_REVID    EHCA_BMASK_IBM(40,63)
+
+int ehca_sense_attributes(struct ehca_shca *shca)
+{
+	int ret = 0;
+	u64 h_ret;
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_gen_err("Cannot allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
+	if (h_ret != H_SUCCESS) {
+		ehca_gen_err("Cannot query device properties. h_ret=%lx",
+			     h_ret);
+		ret = -EPERM;
+		goto num_ports1;
+	}
+
+	if (ehca_nr_ports == 1)
+		shca->num_ports = 1;
+	else
+		shca->num_ports = (u8)rblock->num_ports;
+
+	ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
+
+	if (ehca_hw_level == 0) {
+		u32 hcaaver;
+		u32 revid;
+
+		hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
+		revid   = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
+
+		ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
+
+		if ((hcaaver == 1) && (revid == 0))
+			shca->hw_level = 0;
+		else if ((hcaaver == 1) && (revid == 1))
+			shca->hw_level = 1;
+		else if ((hcaaver == 1) && (revid == 2))
+			shca->hw_level = 2;
+	}
+	ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
+
+	shca->sport[0].rate = IB_RATE_30_GBPS;
+	shca->sport[1].rate = IB_RATE_30_GBPS;
+
+num_ports1:
+	kfree(rblock);
+	return ret;
+}
+
+static int init_node_guid(struct ehca_shca *shca)
+{
+	int ret = 0;
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query device properties");
+		ret = -EINVAL;
+		goto init_node_guid1;
+	}
+
+	memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
+
+init_node_guid1:
+	kfree(rblock);
+	return ret;
+}
+
+int ehca_register_device(struct ehca_shca *shca)
+{
+	int ret;
+
+	ret = init_node_guid(shca);
+	if (ret)
+		return ret;
+
+	strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
+	shca->ib_device.owner               = THIS_MODULE;
+
+	shca->ib_device.uverbs_abi_ver	    = 5;
+	shca->ib_device.uverbs_cmd_mask	    =
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
+
+	shca->ib_device.node_type           = IB_NODE_CA;
+	shca->ib_device.phys_port_cnt       = shca->num_ports;
+	shca->ib_device.dma_device          = &shca->ibmebus_dev->ofdev.dev;
+	shca->ib_device.query_device        = ehca_query_device;
+	shca->ib_device.query_port          = ehca_query_port;
+	shca->ib_device.query_gid           = ehca_query_gid;
+	shca->ib_device.query_pkey          = ehca_query_pkey;
+	/* shca->in_device.modify_device    = ehca_modify_device    */
+	shca->ib_device.modify_port         = ehca_modify_port;
+	shca->ib_device.alloc_ucontext      = ehca_alloc_ucontext;
+	shca->ib_device.dealloc_ucontext    = ehca_dealloc_ucontext;
+	shca->ib_device.alloc_pd            = ehca_alloc_pd;
+	shca->ib_device.dealloc_pd          = ehca_dealloc_pd;
+	shca->ib_device.create_ah	    = ehca_create_ah;
+	/* shca->ib_device.modify_ah	    = ehca_modify_ah;	    */
+	shca->ib_device.query_ah	    = ehca_query_ah;
+	shca->ib_device.destroy_ah	    = ehca_destroy_ah;
+	shca->ib_device.create_qp	    = ehca_create_qp;
+	shca->ib_device.modify_qp	    = ehca_modify_qp;
+	shca->ib_device.query_qp	    = ehca_query_qp;
+	shca->ib_device.destroy_qp	    = ehca_destroy_qp;
+	shca->ib_device.post_send	    = ehca_post_send;
+	shca->ib_device.post_recv	    = ehca_post_recv;
+	shca->ib_device.create_cq	    = ehca_create_cq;
+	shca->ib_device.destroy_cq	    = ehca_destroy_cq;
+	shca->ib_device.resize_cq	    = ehca_resize_cq;
+	shca->ib_device.poll_cq		    = ehca_poll_cq;
+	/* shca->ib_device.peek_cq	    = ehca_peek_cq;	    */
+	shca->ib_device.req_notify_cq	    = ehca_req_notify_cq;
+	/* shca->ib_device.req_ncomp_notif  = ehca_req_ncomp_notif; */
+	shca->ib_device.get_dma_mr	    = ehca_get_dma_mr;
+	shca->ib_device.reg_phys_mr	    = ehca_reg_phys_mr;
+	shca->ib_device.reg_user_mr	    = ehca_reg_user_mr;
+	shca->ib_device.query_mr	    = ehca_query_mr;
+	shca->ib_device.dereg_mr	    = ehca_dereg_mr;
+	shca->ib_device.rereg_phys_mr	    = ehca_rereg_phys_mr;
+	shca->ib_device.alloc_mw	    = ehca_alloc_mw;
+	shca->ib_device.bind_mw		    = ehca_bind_mw;
+	shca->ib_device.dealloc_mw	    = ehca_dealloc_mw;
+	shca->ib_device.alloc_fmr	    = ehca_alloc_fmr;
+	shca->ib_device.map_phys_fmr	    = ehca_map_phys_fmr;
+	shca->ib_device.unmap_fmr	    = ehca_unmap_fmr;
+	shca->ib_device.dealloc_fmr	    = ehca_dealloc_fmr;
+	shca->ib_device.attach_mcast	    = ehca_attach_mcast;
+	shca->ib_device.detach_mcast	    = ehca_detach_mcast;
+	/* shca->ib_device.process_mad	    = ehca_process_mad;	    */
+	shca->ib_device.mmap		    = ehca_mmap;
+
+	ret = ib_register_device(&shca->ib_device);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "ib_register_device() failed ret=%x", ret);
+
+	return ret;
+}
+
+static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
+{
+	struct ehca_sport *sport = &shca->sport[port - 1];
+	struct ib_cq *ibcq;
+	struct ib_qp *ibqp;
+	struct ib_qp_init_attr qp_init_attr;
+	int ret;
+
+	if (sport->ibcq_aqp1) {
+		ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
+		return -EPERM;
+	}
+
+	ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10);
+	if (IS_ERR(ibcq)) {
+		ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
+		return PTR_ERR(ibcq);
+	}
+	sport->ibcq_aqp1 = ibcq;
+
+	if (sport->ibqp_aqp1) {
+		ehca_err(&shca->ib_device, "AQP1 QP is already created.");
+		ret = -EPERM;
+		goto create_aqp1;
+	}
+
+	memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
+	qp_init_attr.send_cq          = ibcq;
+	qp_init_attr.recv_cq          = ibcq;
+	qp_init_attr.sq_sig_type      = IB_SIGNAL_ALL_WR;
+	qp_init_attr.cap.max_send_wr  = 100;
+	qp_init_attr.cap.max_recv_wr  = 100;
+	qp_init_attr.cap.max_send_sge = 2;
+	qp_init_attr.cap.max_recv_sge = 1;
+	qp_init_attr.qp_type          = IB_QPT_GSI;
+	qp_init_attr.port_num         = port;
+	qp_init_attr.qp_context       = NULL;
+	qp_init_attr.event_handler    = NULL;
+	qp_init_attr.srq              = NULL;
+
+	ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
+	if (IS_ERR(ibqp)) {
+		ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
+		ret = PTR_ERR(ibqp);
+		goto create_aqp1;
+	}
+	sport->ibqp_aqp1 = ibqp;
+
+	return 0;
+
+create_aqp1:
+	ib_destroy_cq(sport->ibcq_aqp1);
+	return ret;
+}
+
+static int ehca_destroy_aqp1(struct ehca_sport *sport)
+{
+	int ret;
+
+	ret = ib_destroy_qp(sport->ibqp_aqp1);
+	if (ret) {
+		ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret);
+		return ret;
+	}
+
+	ret = ib_destroy_cq(sport->ibcq_aqp1);
+	if (ret)
+		ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret);
+
+	return ret;
+}
+
+static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
+{
+	return  snprintf(buf, PAGE_SIZE, "%d\n",
+			 ehca_debug_level);
+}
+
+static ssize_t ehca_store_debug_level(struct device_driver *ddp,
+				      const char *buf, size_t count)
+{
+	int value = (*buf) - '0';
+	if (value >= 0 && value <= 9)
+		ehca_debug_level = value;
+	return 1;
+}
+
+DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
+	    ehca_show_debug_level, ehca_store_debug_level);
+
+void ehca_create_driver_sysfs(struct ibmebus_driver *drv)
+{
+	driver_create_file(&drv->driver, &driver_attr_debug_level);
+}
+
+void ehca_remove_driver_sysfs(struct ibmebus_driver *drv)
+{
+	driver_remove_file(&drv->driver, &driver_attr_debug_level);
+}
+
+#define EHCA_RESOURCE_ATTR(name)                                           \
+static ssize_t  ehca_show_##name(struct device *dev,                       \
+				 struct device_attribute *attr,            \
+				 char *buf)                                \
+{									   \
+	struct ehca_shca *shca;						   \
+	struct hipz_query_hca *rblock;				           \
+	int data;                                                          \
+									   \
+	shca = dev->driver_data;					   \
+									   \
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);			   \
+	if (!rblock) {						           \
+		dev_err(dev, "Can't allocate rblock memory.");		   \
+		return 0;						   \
+	}								   \
+									   \
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
+		dev_err(dev, "Can't query device properties");	   	   \
+		kfree(rblock);					   	   \
+		return 0;					   	   \
+	}								   \
+									   \
+	data = rblock->name;                                               \
+	kfree(rblock);                                                     \
+									   \
+	if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1))	   \
+		return snprintf(buf, 256, "1\n");			   \
+	else								   \
+		return snprintf(buf, 256, "%d\n", data);		   \
+									   \
+}									   \
+static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
+
+EHCA_RESOURCE_ATTR(num_ports);
+EHCA_RESOURCE_ATTR(hw_ver);
+EHCA_RESOURCE_ATTR(max_eq);
+EHCA_RESOURCE_ATTR(cur_eq);
+EHCA_RESOURCE_ATTR(max_cq);
+EHCA_RESOURCE_ATTR(cur_cq);
+EHCA_RESOURCE_ATTR(max_qp);
+EHCA_RESOURCE_ATTR(cur_qp);
+EHCA_RESOURCE_ATTR(max_mr);
+EHCA_RESOURCE_ATTR(cur_mr);
+EHCA_RESOURCE_ATTR(max_mw);
+EHCA_RESOURCE_ATTR(cur_mw);
+EHCA_RESOURCE_ATTR(max_pd);
+EHCA_RESOURCE_ATTR(max_ah);
+
+static ssize_t ehca_show_adapter_handle(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct ehca_shca *shca = dev->driver_data;
+
+	return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle);
+
+}
+static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
+
+
+void ehca_create_device_sysfs(struct ibmebus_dev *dev)
+{
+	device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+	device_create_file(&dev->ofdev.dev, &dev_attr_num_ports);
+	device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_eq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_cq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_qp);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_mr);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_mw);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_pd);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+void ehca_remove_device_sysfs(struct ibmebus_dev *dev)
+{
+	device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+static int __devinit ehca_probe(struct ibmebus_dev *dev,
+				const struct of_device_id *id)
+{
+	struct ehca_shca *shca;
+	u64 *handle;
+	struct ib_pd *ibpd;
+	int ret;
+
+	handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
+	if (!handle) {
+		ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
+			     dev->ofdev.node->full_name);
+		return -ENODEV;
+	}
+
+	if (!(*handle)) {
+		ehca_gen_err("Wrong eHCA handle for adapter: %s.",
+			     dev->ofdev.node->full_name);
+		return -ENODEV;
+	}
+
+	shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
+	if (!shca) {
+		ehca_gen_err("Cannot allocate shca memory.");
+		return -ENOMEM;
+	}
+
+	shca->ibmebus_dev = dev;
+	shca->ipz_hca_handle.handle = *handle;
+	dev->ofdev.dev.driver_data = shca;
+
+	ret = ehca_sense_attributes(shca);
+	if (ret < 0) {
+		ehca_gen_err("Cannot sense eHCA attributes.");
+		goto probe1;
+	}
+
+	ret = ehca_register_device(shca);
+	if (ret) {
+		ehca_gen_err("Cannot register Infiniband device");
+		goto probe1;
+	}
+
+	/* create event queues */
+	ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create EQ.");
+		goto probe2;
+	}
+
+	ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create NEQ.");
+		goto probe3;
+	}
+
+	/* create internal protection domain */
+	ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL);
+	if (IS_ERR(ibpd)) {
+		ehca_err(&shca->ib_device, "Cannot create internal PD.");
+		ret = PTR_ERR(ibpd);
+		goto probe4;
+	}
+
+	shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
+	shca->pd->ib_pd.device = &shca->ib_device;
+
+	/* create internal max MR */
+	ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
+
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x",
+			 ret);
+		goto probe5;
+	}
+
+	/* create AQP1 for port 1 */
+	if (ehca_open_aqp1 == 1) {
+		shca->sport[0].port_state = IB_PORT_DOWN;
+		ret = ehca_create_aqp1(shca, 1);
+		if (ret) {
+			ehca_err(&shca->ib_device,
+				 "Cannot create AQP1 for port 1.");
+			goto probe6;
+		}
+	}
+
+	/* create AQP1 for port 2 */
+	if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
+		shca->sport[1].port_state = IB_PORT_DOWN;
+		ret = ehca_create_aqp1(shca, 2);
+		if (ret) {
+			ehca_err(&shca->ib_device,
+				 "Cannot create AQP1 for port 2.");
+			goto probe7;
+		}
+	}
+
+	ehca_create_device_sysfs(dev);
+
+	spin_lock(&shca_list_lock);
+	list_add(&shca->shca_list, &shca_list);
+	spin_unlock(&shca_list_lock);
+
+	return 0;
+
+probe7:
+	ret = ehca_destroy_aqp1(&shca->sport[0]);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy AQP1 for port 1. ret=%x", ret);
+
+probe6:
+	ret = ehca_dereg_internal_maxmr(shca);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal MR. ret=%x", ret);
+
+probe5:
+	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal PD. ret=%x", ret);
+
+probe4:
+	ret = ehca_destroy_eq(shca, &shca->neq);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy NEQ. ret=%x", ret);
+
+probe3:
+	ret = ehca_destroy_eq(shca, &shca->eq);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy EQ. ret=%x", ret);
+
+probe2:
+	ib_unregister_device(&shca->ib_device);
+
+probe1:
+	ib_dealloc_device(&shca->ib_device);
+
+	return -EINVAL;
+}
+
+static int __devexit ehca_remove(struct ibmebus_dev *dev)
+{
+	struct ehca_shca *shca = dev->ofdev.dev.driver_data;
+	int ret;
+
+	ehca_remove_device_sysfs(dev);
+
+	if (ehca_open_aqp1 == 1) {
+		int i;
+		for (i = 0; i < shca->num_ports; i++) {
+			ret = ehca_destroy_aqp1(&shca->sport[i]);
+			if (ret)
+				ehca_err(&shca->ib_device,
+					 "Cannot destroy AQP1 for port %x "
+					 "ret=%x", ret, i);
+		}
+	}
+
+	ib_unregister_device(&shca->ib_device);
+
+	ret = ehca_dereg_internal_maxmr(shca);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal MR. ret=%x", ret);
+
+	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal PD. ret=%x", ret);
+
+	ret = ehca_destroy_eq(shca, &shca->eq);
+	if (ret)
+		ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret);
+
+	ret = ehca_destroy_eq(shca, &shca->neq);
+	if (ret)
+		ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret);
+
+	ib_dealloc_device(&shca->ib_device);
+
+	spin_lock(&shca_list_lock);
+	list_del(&shca->shca_list);
+	spin_unlock(&shca_list_lock);
+
+	return ret;
+}
+
+static struct of_device_id ehca_device_table[] =
+{
+	{
+		.name       = "lhca",
+		.compatible = "IBM,lhca",
+	},
+	{},
+};
+
+static struct ibmebus_driver ehca_driver = {
+	.name     = "ehca",
+	.id_table = ehca_device_table,
+	.probe    = ehca_probe,
+	.remove   = ehca_remove,
+};
+
+void ehca_poll_eqs(unsigned long data)
+{
+	struct ehca_shca *shca;
+
+	spin_lock(&shca_list_lock);
+	list_for_each_entry(shca, &shca_list, shca_list) {
+		if (shca->eq.is_initialized)
+			ehca_tasklet_eq((unsigned long)(void*)shca);
+	}
+	mod_timer(&poll_eqs_timer, jiffies + HZ);
+	spin_unlock(&shca_list_lock);
+}
+
+int __init ehca_module_init(void)
+{
+	int ret;
+
+	printk(KERN_INFO "eHCA Infiniband Device Driver "
+	                 "(Rel.: SVNEHCA_0016)\n");
+	idr_init(&ehca_qp_idr);
+	idr_init(&ehca_cq_idr);
+	spin_lock_init(&ehca_qp_idr_lock);
+	spin_lock_init(&ehca_cq_idr_lock);
+
+	INIT_LIST_HEAD(&shca_list);
+	spin_lock_init(&shca_list_lock);
+
+	if ((ret = ehca_create_comp_pool())) {
+		ehca_gen_err("Cannot create comp pool.");
+		return ret;
+	}
+
+	if ((ret = ehca_create_slab_caches())) {
+		ehca_gen_err("Cannot create SLAB caches");
+		ret = -ENOMEM;
+		goto module_init1;
+	}
+
+	if ((ret = ibmebus_register_driver(&ehca_driver))) {
+		ehca_gen_err("Cannot register eHCA device driver");
+		ret = -EINVAL;
+		goto module_init2;
+	}
+
+	ehca_create_driver_sysfs(&ehca_driver);
+
+	if (ehca_poll_all_eqs != 1) {
+		ehca_gen_err("WARNING!!!");
+		ehca_gen_err("It is possible to lose interrupts.");
+	} else {
+		init_timer(&poll_eqs_timer);
+		poll_eqs_timer.function = ehca_poll_eqs;
+		poll_eqs_timer.expires = jiffies + HZ;
+		add_timer(&poll_eqs_timer);
+	}
+
+	return 0;
+
+module_init2:
+	ehca_destroy_slab_caches();
+
+module_init1:
+	ehca_destroy_comp_pool();
+	return ret;
+};
+
+void __exit ehca_module_exit(void)
+{
+	if (ehca_poll_all_eqs == 1)
+		del_timer_sync(&poll_eqs_timer);
+
+	ehca_remove_driver_sysfs(&ehca_driver);
+	ibmebus_unregister_driver(&ehca_driver);
+
+	ehca_destroy_slab_caches();
+
+	ehca_destroy_comp_pool();
+
+	idr_destroy(&ehca_cq_idr);
+	idr_destroy(&ehca_qp_idr);
+};
+
+module_init(ehca_module_init);
+module_exit(ehca_module_exit);
diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c
new file mode 100644
index 0000000000000..32a870660bfe7
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mcast.c
@@ -0,0 +1,131 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  mcast  functions
+ *
+ *  Authors: Khadija Souissi <souissik@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+#define MAX_MC_LID 0xFFFE
+#define MIN_MC_LID 0xC000	/* Multicast limits */
+#define EHCA_VALID_MULTICAST_GID(gid)  ((gid)[0] == 0xFF)
+#define EHCA_VALID_MULTICAST_LID(lid) \
+	(((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
+
+int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+					      ib_device);
+	union ib_gid my_gid;
+	u64 subnet_prefix, interface_id, h_ret;
+
+	if (ibqp->qp_type != IB_QPT_UD) {
+		ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
+		return -EINVAL;
+	}
+
+	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+		ehca_err(ibqp->device, "invalid mulitcast gid");
+		return -EINVAL;
+	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+		return -EINVAL;
+	}
+
+	memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+	interface_id = be64_to_cpu(my_gid.global.interface_id);
+	h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
+				   my_qp->ipz_qp_handle,
+				   my_qp->galpas.kernel,
+				   lid, subnet_prefix, interface_id);
+	if (h_ret != H_SUCCESS)
+		ehca_err(ibqp->device,
+			 "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
+			 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+
+	return ehca2ib_return_code(h_ret);
+}
+
+int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca = container_of(ibqp->pd->device,
+					      struct ehca_shca, ib_device);
+	union ib_gid my_gid;
+	u64 subnet_prefix, interface_id, h_ret;
+
+	if (ibqp->qp_type != IB_QPT_UD) {
+		ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
+		return -EINVAL;
+	}
+
+	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+		ehca_err(ibqp->device, "invalid mulitcast gid");
+		return -EINVAL;
+	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+		return -EINVAL;
+	}
+
+	memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+	interface_id = be64_to_cpu(my_gid.global.interface_id);
+	h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
+				   my_qp->ipz_qp_handle,
+				   my_qp->galpas.kernel,
+				   lid, subnet_prefix, interface_id);
+	if (h_ret != H_SUCCESS)
+		ehca_err(ibqp->device,
+			 "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
+			 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+
+	return ehca2ib_return_code(h_ret);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
new file mode 100644
index 0000000000000..5ca65441e1da5
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -0,0 +1,2261 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  MR/MW functions
+ *
+ *  Authors: Dietmar Decker <ddecker@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "hcp_if.h"
+#include "hipz_hw.h"
+
+static struct kmem_cache *mr_cache;
+static struct kmem_cache *mw_cache;
+
+static struct ehca_mr *ehca_mr_new(void)
+{
+	struct ehca_mr *me;
+
+	me = kmem_cache_alloc(mr_cache, SLAB_KERNEL);
+	if (me) {
+		memset(me, 0, sizeof(struct ehca_mr));
+		spin_lock_init(&me->mrlock);
+	} else
+		ehca_gen_err("alloc failed");
+
+	return me;
+}
+
+static void ehca_mr_delete(struct ehca_mr *me)
+{
+	kmem_cache_free(mr_cache, me);
+}
+
+static struct ehca_mw *ehca_mw_new(void)
+{
+	struct ehca_mw *me;
+
+	me = kmem_cache_alloc(mw_cache, SLAB_KERNEL);
+	if (me) {
+		memset(me, 0, sizeof(struct ehca_mw));
+		spin_lock_init(&me->mwlock);
+	} else
+		ehca_gen_err("alloc failed");
+
+	return me;
+}
+
+static void ehca_mw_delete(struct ehca_mw *me)
+{
+	kmem_cache_free(mw_cache, me);
+}
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
+{
+	struct ib_mr *ib_mr;
+	int ret;
+	struct ehca_mr *e_maxmr;
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+
+	if (shca->maxmr) {
+		e_maxmr = ehca_mr_new();
+		if (!e_maxmr) {
+			ehca_err(&shca->ib_device, "out of memory");
+			ib_mr = ERR_PTR(-ENOMEM);
+			goto get_dma_mr_exit0;
+		}
+
+		ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE,
+				     mr_access_flags, e_pd,
+				     &e_maxmr->ib.ib_mr.lkey,
+				     &e_maxmr->ib.ib_mr.rkey);
+		if (ret) {
+			ib_mr = ERR_PTR(ret);
+			goto get_dma_mr_exit0;
+		}
+		ib_mr = &e_maxmr->ib.ib_mr;
+	} else {
+		ehca_err(&shca->ib_device, "no internal max-MR exist!");
+		ib_mr = ERR_PTR(-EINVAL);
+		goto get_dma_mr_exit0;
+	}
+
+get_dma_mr_exit0:
+	if (IS_ERR(ib_mr))
+		ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ",
+			 PTR_ERR(ib_mr), pd, mr_access_flags);
+	return ib_mr;
+} /* end ehca_get_dma_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
+			       struct ib_phys_buf *phys_buf_array,
+			       int num_phys_buf,
+			       int mr_access_flags,
+			       u64 *iova_start)
+{
+	struct ib_mr *ib_mr;
+	int ret;
+	struct ehca_mr *e_mr;
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+	u64 size;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	u32 num_pages_mr;
+	u32 num_pages_4k; /* 4k portion "pages" */
+
+	if ((num_phys_buf <= 0) || !phys_buf_array) {
+		ehca_err(pd->device, "bad input values: num_phys_buf=%x "
+			 "phys_buf_array=%p", num_phys_buf, phys_buf_array);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_phys_mr_exit0;
+	}
+	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_phys_mr_exit0;
+	}
+
+	/* check physical buffer list and calculate size */
+	ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
+					    iova_start, &size);
+	if (ret) {
+		ib_mr = ERR_PTR(ret);
+		goto reg_phys_mr_exit0;
+	}
+	if ((size == 0) ||
+	    (((u64)iova_start + size) < (u64)iova_start)) {
+		ehca_err(pd->device, "bad input values: size=%lx iova_start=%p",
+			 size, iova_start);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_phys_mr_exit0;
+	}
+
+	e_mr = ehca_mr_new();
+	if (!e_mr) {
+		ehca_err(pd->device, "out of memory");
+		ib_mr = ERR_PTR(-ENOMEM);
+		goto reg_phys_mr_exit0;
+	}
+
+	/* determine number of MR pages */
+	num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size +
+			 PAGE_SIZE - 1) / PAGE_SIZE);
+	num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size +
+			 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+	/* register MR on HCA */
+	if (ehca_mr_is_maxmr(size, iova_start)) {
+		e_mr->flags |= EHCA_MR_FLAG_MAXMR;
+		ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
+				     e_pd, &e_mr->ib.ib_mr.lkey,
+				     &e_mr->ib.ib_mr.rkey);
+		if (ret) {
+			ib_mr = ERR_PTR(ret);
+			goto reg_phys_mr_exit1;
+		}
+	} else {
+		pginfo.type           = EHCA_MR_PGI_PHYS;
+		pginfo.num_pages      = num_pages_mr;
+		pginfo.num_4k         = num_pages_4k;
+		pginfo.num_phys_buf   = num_phys_buf;
+		pginfo.phys_buf_array = phys_buf_array;
+		pginfo.next_4k        = (((u64)iova_start & ~PAGE_MASK) /
+					 EHCA_PAGESIZE);
+
+		ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
+				  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
+				  &e_mr->ib.ib_mr.rkey);
+		if (ret) {
+			ib_mr = ERR_PTR(ret);
+			goto reg_phys_mr_exit1;
+		}
+	}
+
+	/* successful registration of all pages */
+	return &e_mr->ib.ib_mr;
+
+reg_phys_mr_exit1:
+	ehca_mr_delete(e_mr);
+reg_phys_mr_exit0:
+	if (IS_ERR(ib_mr))
+		ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p "
+			 "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
+			 PTR_ERR(ib_mr), pd, phys_buf_array,
+			 num_phys_buf, mr_access_flags, iova_start);
+	return ib_mr;
+} /* end ehca_reg_phys_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
+			       struct ib_umem *region,
+			       int mr_access_flags,
+			       struct ib_udata *udata)
+{
+	struct ib_mr *ib_mr;
+	struct ehca_mr *e_mr;
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	int ret;
+	u32 num_pages_mr;
+	u32 num_pages_4k; /* 4k portion "pages" */
+
+	if (!pd) {
+		ehca_gen_err("bad pd=%p", pd);
+		return ERR_PTR(-EFAULT);
+	}
+	if (!region) {
+		ehca_err(pd->device, "bad input values: region=%p", region);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+	if (region->page_size != PAGE_SIZE) {
+		ehca_err(pd->device, "page size not supported, "
+			 "region->page_size=%x", region->page_size);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+
+	if ((region->length == 0) ||
+	    ((region->virt_base + region->length) < region->virt_base)) {
+		ehca_err(pd->device, "bad input values: length=%lx "
+			 "virt_base=%lx", region->length, region->virt_base);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+
+	e_mr = ehca_mr_new();
+	if (!e_mr) {
+		ehca_err(pd->device, "out of memory");
+		ib_mr = ERR_PTR(-ENOMEM);
+		goto reg_user_mr_exit0;
+	}
+
+	/* determine number of MR pages */
+	num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length +
+			 PAGE_SIZE - 1) / PAGE_SIZE);
+	num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length +
+			 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+	/* register MR on HCA */
+	pginfo.type       = EHCA_MR_PGI_USER;
+	pginfo.num_pages  = num_pages_mr;
+	pginfo.num_4k     = num_pages_4k;
+	pginfo.region     = region;
+	pginfo.next_4k	  = region->offset / EHCA_PAGESIZE;
+	pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
+					       (&region->chunk_list),
+					       list);
+
+	ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base,
+			  region->length, mr_access_flags, e_pd, &pginfo,
+			  &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
+	if (ret) {
+		ib_mr = ERR_PTR(ret);
+		goto reg_user_mr_exit1;
+	}
+
+	/* successful registration of all pages */
+	return &e_mr->ib.ib_mr;
+
+reg_user_mr_exit1:
+	ehca_mr_delete(e_mr);
+reg_user_mr_exit0:
+	if (IS_ERR(ib_mr))
+		ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x"
+			 " udata=%p",
+			 PTR_ERR(ib_mr), pd, region, mr_access_flags, udata);
+	return ib_mr;
+} /* end ehca_reg_user_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_rereg_phys_mr(struct ib_mr *mr,
+		       int mr_rereg_mask,
+		       struct ib_pd *pd,
+		       struct ib_phys_buf *phys_buf_array,
+		       int num_phys_buf,
+		       int mr_access_flags,
+		       u64 *iova_start)
+{
+	int ret;
+
+	struct ehca_shca *shca =
+		container_of(mr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+	u64 new_size;
+	u64 *new_start;
+	u32 new_acl;
+	struct ehca_pd *new_pd;
+	u32 tmp_lkey, tmp_rkey;
+	unsigned long sl_flags;
+	u32 num_pages_mr = 0;
+	u32 num_pages_4k = 0; /* 4k portion "pages" */
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    (my_pd->ownpid != cur_pid)) {
+		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
+		/* TODO not supported, because PHYP rereg hCall needs pages */
+		ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
+			 "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	if (mr_rereg_mask & IB_MR_REREG_PD) {
+		if (!pd) {
+			ehca_err(mr->device, "rereg with bad pd, pd=%p "
+				 "mr_rereg_mask=%x", pd, mr_rereg_mask);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit0;
+		}
+	}
+
+	if ((mr_rereg_mask &
+	     ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
+	    (mr_rereg_mask == 0)) {
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	/* check other parameters */
+	if (e_mr == shca->maxmr) {
+		/* should be impossible, however reject to be sure */
+		ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
+			 "shca->maxmr=%p mr->lkey=%x",
+			 mr, shca->maxmr, mr->lkey);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+	if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
+		if (e_mr->flags & EHCA_MR_FLAG_FMR) {
+			ehca_err(mr->device, "not supported for FMR, mr=%p "
+				 "flags=%x", mr, e_mr->flags);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit0;
+		}
+		if (!phys_buf_array || num_phys_buf <= 0) {
+			ehca_err(mr->device, "bad input values: mr_rereg_mask=%x"
+				 " phys_buf_array=%p num_phys_buf=%x",
+				 mr_rereg_mask, phys_buf_array, num_phys_buf);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit0;
+		}
+	}
+	if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&	/* change ACL */
+	    (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	      !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	     ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	      !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
+			 "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	/* set requested values dependent on rereg request */
+	spin_lock_irqsave(&e_mr->mrlock, sl_flags);
+	new_start = e_mr->start;  /* new == old address */
+	new_size  = e_mr->size;	  /* new == old length */
+	new_acl   = e_mr->acl;	  /* new == old access control */
+	new_pd    = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/
+
+	if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+		new_start = iova_start;	/* change address */
+		/* check physical buffer list and calculate size */
+		ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
+						    num_phys_buf, iova_start,
+						    &new_size);
+		if (ret)
+			goto rereg_phys_mr_exit1;
+		if ((new_size == 0) ||
+		    (((u64)iova_start + new_size) < (u64)iova_start)) {
+			ehca_err(mr->device, "bad input values: new_size=%lx "
+				 "iova_start=%p", new_size, iova_start);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit1;
+		}
+		num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size +
+				 PAGE_SIZE - 1) / PAGE_SIZE);
+		num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size +
+				 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+		pginfo.type           = EHCA_MR_PGI_PHYS;
+		pginfo.num_pages      = num_pages_mr;
+		pginfo.num_4k         = num_pages_4k;
+		pginfo.num_phys_buf   = num_phys_buf;
+		pginfo.phys_buf_array = phys_buf_array;
+		pginfo.next_4k        = (((u64)iova_start & ~PAGE_MASK) /
+					 EHCA_PAGESIZE);
+	}
+	if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+		new_acl = mr_access_flags;
+	if (mr_rereg_mask & IB_MR_REREG_PD)
+		new_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+	ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
+			    new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
+	if (ret)
+		goto rereg_phys_mr_exit1;
+
+	/* successful reregistration */
+	if (mr_rereg_mask & IB_MR_REREG_PD)
+		mr->pd = pd;
+	mr->lkey = tmp_lkey;
+	mr->rkey = tmp_rkey;
+
+rereg_phys_mr_exit1:
+	spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
+rereg_phys_mr_exit0:
+	if (ret)
+		ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p "
+			 "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
+			 "iova_start=%p",
+			 ret, mr, mr_rereg_mask, pd, phys_buf_array,
+			 num_phys_buf, mr_access_flags, iova_start);
+	return ret;
+} /* end ehca_rereg_phys_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
+{
+	int ret = 0;
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(mr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+	unsigned long sl_flags;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    (my_pd->ownpid != cur_pid)) {
+		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		ret = -EINVAL;
+		goto query_mr_exit0;
+	}
+
+	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
+			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
+		ret = -EINVAL;
+		goto query_mr_exit0;
+	}
+
+	memset(mr_attr, 0, sizeof(struct ib_mr_attr));
+	spin_lock_irqsave(&e_mr->mrlock, sl_flags);
+
+	h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p "
+			 "hca_hndl=%lx mr_hndl=%lx lkey=%x",
+			 h_ret, mr, shca->ipz_hca_handle.handle,
+			 e_mr->ipz_mr_handle.handle, mr->lkey);
+		ret = ehca_mrmw_map_hrc_query_mr(h_ret);
+		goto query_mr_exit1;
+	}
+	mr_attr->pd               = mr->pd;
+	mr_attr->device_virt_addr = hipzout.vaddr;
+	mr_attr->size             = hipzout.len;
+	mr_attr->lkey             = hipzout.lkey;
+	mr_attr->rkey             = hipzout.rkey;
+	ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
+
+query_mr_exit1:
+	spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
+query_mr_exit0:
+	if (ret)
+		ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p",
+			 ret, mr, mr_attr);
+	return ret;
+} /* end ehca_query_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dereg_mr(struct ib_mr *mr)
+{
+	int ret = 0;
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(mr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    (my_pd->ownpid != cur_pid)) {
+		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		ret = -EINVAL;
+		goto dereg_mr_exit0;
+	}
+
+	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
+			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
+		ret = -EINVAL;
+		goto dereg_mr_exit0;
+	} else if (e_mr == shca->maxmr) {
+		/* should be impossible, however reject to be sure */
+		ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
+			 "shca->maxmr=%p mr->lkey=%x",
+			 mr, shca->maxmr, mr->lkey);
+		ret = -EINVAL;
+		goto dereg_mr_exit0;
+	}
+
+	/* TODO: BUSY: MR still has bound window(s) */
+	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p "
+			 "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
+			 h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
+			 e_mr->ipz_mr_handle.handle, mr->lkey);
+		ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+		goto dereg_mr_exit0;
+	}
+
+	/* successful deregistration */
+	ehca_mr_delete(e_mr);
+
+dereg_mr_exit0:
+	if (ret)
+		ehca_err(mr->device, "ret=%x mr=%p", ret, mr);
+	return ret;
+} /* end ehca_dereg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
+{
+	struct ib_mw *ib_mw;
+	u64 h_ret;
+	struct ehca_mw *e_mw;
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_mw_hipzout_parms hipzout = {{0},0};
+
+	e_mw = ehca_mw_new();
+	if (!e_mw) {
+		ib_mw = ERR_PTR(-ENOMEM);
+		goto alloc_mw_exit0;
+	}
+
+	h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
+					 e_pd->fw_pd, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx "
+			 "shca=%p hca_hndl=%lx mw=%p",
+			 h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
+		ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret));
+		goto alloc_mw_exit1;
+	}
+	/* successful MW allocation */
+	e_mw->ipz_mw_handle = hipzout.handle;
+	e_mw->ib_mw.rkey    = hipzout.rkey;
+	return &e_mw->ib_mw;
+
+alloc_mw_exit1:
+	ehca_mw_delete(e_mw);
+alloc_mw_exit0:
+	if (IS_ERR(ib_mw))
+		ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd);
+	return ib_mw;
+} /* end ehca_alloc_mw() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_bind_mw(struct ib_qp *qp,
+		 struct ib_mw *mw,
+		 struct ib_mw_bind *mw_bind)
+{
+	/* TODO: not supported up to now */
+	ehca_gen_err("bind MW currently not supported by HCAD");
+
+	return -EPERM;
+} /* end ehca_bind_mw() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dealloc_mw(struct ib_mw *mw)
+{
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(mw->device, struct ehca_shca, ib_device);
+	struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
+
+	h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p "
+			 "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
+			 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
+			 e_mw->ipz_mw_handle.handle);
+		return ehca_mrmw_map_hrc_free_mw(h_ret);
+	}
+	/* successful deallocation */
+	ehca_mw_delete(e_mw);
+	return 0;
+} /* end ehca_dealloc_mw() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
+			      int mr_access_flags,
+			      struct ib_fmr_attr *fmr_attr)
+{
+	struct ib_fmr *ib_fmr;
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_mr *e_fmr;
+	int ret;
+	u32 tmp_lkey, tmp_rkey;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+
+	/* check other parameters */
+	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+	if (mr_access_flags & IB_ACCESS_MW_BIND) {
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+	if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
+		ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
+			 "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
+			 fmr_attr->max_pages, fmr_attr->max_maps,
+			 fmr_attr->page_shift);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+	if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) &&
+	    ((1 << fmr_attr->page_shift) != PAGE_SIZE)) {
+		ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
+			 fmr_attr->page_shift);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+
+	e_fmr = ehca_mr_new();
+	if (!e_fmr) {
+		ib_fmr = ERR_PTR(-ENOMEM);
+		goto alloc_fmr_exit0;
+	}
+	e_fmr->flags |= EHCA_MR_FLAG_FMR;
+
+	/* register MR on HCA */
+	ret = ehca_reg_mr(shca, e_fmr, NULL,
+			  fmr_attr->max_pages * (1 << fmr_attr->page_shift),
+			  mr_access_flags, e_pd, &pginfo,
+			  &tmp_lkey, &tmp_rkey);
+	if (ret) {
+		ib_fmr = ERR_PTR(ret);
+		goto alloc_fmr_exit1;
+	}
+
+	/* successful */
+	e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
+	e_fmr->fmr_max_pages = fmr_attr->max_pages;
+	e_fmr->fmr_max_maps = fmr_attr->max_maps;
+	e_fmr->fmr_map_cnt = 0;
+	return &e_fmr->ib.ib_fmr;
+
+alloc_fmr_exit1:
+	ehca_mr_delete(e_fmr);
+alloc_fmr_exit0:
+	if (IS_ERR(ib_fmr))
+		ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x "
+			 "fmr_attr=%p", PTR_ERR(ib_fmr), pd,
+			 mr_access_flags, fmr_attr);
+	return ib_fmr;
+} /* end ehca_alloc_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_map_phys_fmr(struct ib_fmr *fmr,
+		      u64 *page_list,
+		      int list_len,
+		      u64 iova)
+{
+	int ret;
+	struct ehca_shca *shca =
+		container_of(fmr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
+	struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	u32 tmp_lkey, tmp_rkey;
+
+	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
+			 e_fmr, e_fmr->flags);
+		ret = -EINVAL;
+		goto map_phys_fmr_exit0;
+	}
+	ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
+	if (ret)
+		goto map_phys_fmr_exit0;
+	if (iova % e_fmr->fmr_page_size) {
+		/* only whole-numbered pages */
+		ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x",
+			 iova, e_fmr->fmr_page_size);
+		ret = -EINVAL;
+		goto map_phys_fmr_exit0;
+	}
+	if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
+		/* HCAD does not limit the maps, however trace this anyway */
+		ehca_info(fmr->device, "map limit exceeded, fmr=%p "
+			  "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
+			  fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
+	}
+
+	pginfo.type      = EHCA_MR_PGI_FMR;
+	pginfo.num_pages = list_len;
+	pginfo.num_4k    = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE);
+	pginfo.page_list = page_list;
+	pginfo.next_4k   = ((iova & (e_fmr->fmr_page_size-1)) /
+			    EHCA_PAGESIZE);
+
+	ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova,
+			    list_len * e_fmr->fmr_page_size,
+			    e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
+	if (ret)
+		goto map_phys_fmr_exit0;
+
+	/* successful reregistration */
+	e_fmr->fmr_map_cnt++;
+	e_fmr->ib.ib_fmr.lkey = tmp_lkey;
+	e_fmr->ib.ib_fmr.rkey = tmp_rkey;
+	return 0;
+
+map_phys_fmr_exit0:
+	if (ret)
+		ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x "
+			 "iova=%lx",
+			 ret, fmr, page_list, list_len, iova);
+	return ret;
+} /* end ehca_map_phys_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_unmap_fmr(struct list_head *fmr_list)
+{
+	int ret = 0;
+	struct ib_fmr *ib_fmr;
+	struct ehca_shca *shca = NULL;
+	struct ehca_shca *prev_shca;
+	struct ehca_mr *e_fmr;
+	u32 num_fmr = 0;
+	u32 unmap_fmr_cnt = 0;
+
+	/* check all FMR belong to same SHCA, and check internal flag */
+	list_for_each_entry(ib_fmr, fmr_list, list) {
+		prev_shca = shca;
+		if (!ib_fmr) {
+			ehca_gen_err("bad fmr=%p in list", ib_fmr);
+			ret = -EINVAL;
+			goto unmap_fmr_exit0;
+		}
+		shca = container_of(ib_fmr->device, struct ehca_shca,
+				    ib_device);
+		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
+		if ((shca != prev_shca) && prev_shca) {
+			ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
+				 "prev_shca=%p e_fmr=%p",
+				 shca, prev_shca, e_fmr);
+			ret = -EINVAL;
+			goto unmap_fmr_exit0;
+		}
+		if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+			ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
+				 "e_fmr->flags=%x", e_fmr, e_fmr->flags);
+			ret = -EINVAL;
+			goto unmap_fmr_exit0;
+		}
+		num_fmr++;
+	}
+
+	/* loop over all FMRs to unmap */
+	list_for_each_entry(ib_fmr, fmr_list, list) {
+		unmap_fmr_cnt++;
+		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
+		shca = container_of(ib_fmr->device, struct ehca_shca,
+				    ib_device);
+		ret = ehca_unmap_one_fmr(shca, e_fmr);
+		if (ret) {
+			/* unmap failed, stop unmapping of rest of FMRs */
+			ehca_err(&shca->ib_device, "unmap of one FMR failed, "
+				 "stop rest, e_fmr=%p num_fmr=%x "
+				 "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
+				 unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
+			goto unmap_fmr_exit0;
+		}
+	}
+
+unmap_fmr_exit0:
+	if (ret)
+		ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
+			     ret, fmr_list, num_fmr, unmap_fmr_cnt);
+	return ret;
+} /* end ehca_unmap_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dealloc_fmr(struct ib_fmr *fmr)
+{
+	int ret;
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(fmr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
+
+	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
+			 e_fmr, e_fmr->flags);
+		ret = -EINVAL;
+		goto free_fmr_exit0;
+	}
+
+	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p "
+			 "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
+			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
+			 e_fmr->ipz_mr_handle.handle, fmr->lkey);
+		ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+		goto free_fmr_exit0;
+	}
+	/* successful deregistration */
+	ehca_mr_delete(e_fmr);
+	return 0;
+
+free_fmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr);
+	return ret;
+} /* end ehca_dealloc_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_mr(struct ehca_shca *shca,
+		struct ehca_mr *e_mr,
+		u64 *iova_start,
+		u64 size,
+		int acl,
+		struct ehca_pd *e_pd,
+		struct ehca_mr_pginfo *pginfo,
+		u32 *lkey, /*OUT*/
+		u32 *rkey) /*OUT*/
+{
+	int ret;
+	u64 h_ret;
+	u32 hipz_acl;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+	if (ehca_use_hp_mr == 1)
+	        hipz_acl |= 0x00000001;
+
+	h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
+					 (u64)iova_start, size, hipz_acl,
+					 e_pd->fw_pd, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx "
+			 "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
+		ret = ehca_mrmw_map_hrc_alloc(h_ret);
+		goto ehca_reg_mr_exit0;
+	}
+
+	e_mr->ipz_mr_handle = hipzout.handle;
+
+	ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
+	if (ret)
+		goto ehca_reg_mr_exit1;
+
+	/* successful registration */
+	e_mr->num_pages = pginfo->num_pages;
+	e_mr->num_4k    = pginfo->num_4k;
+	e_mr->start     = iova_start;
+	e_mr->size      = size;
+	e_mr->acl       = acl;
+	*lkey = hipzout.lkey;
+	*rkey = hipzout.rkey;
+	return 0;
+
+ehca_reg_mr_exit1:
+	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p "
+			 "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
+			 "pginfo=%p num_pages=%lx num_4k=%lx ret=%x",
+			 h_ret, shca, e_mr, iova_start, size, acl, e_pd,
+			 hipzout.lkey, pginfo, pginfo->num_pages,
+			 pginfo->num_4k, ret);
+		ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
+			 "not recoverable");
+	}
+ehca_reg_mr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
+			 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
+			 "num_pages=%lx num_4k=%lx",
+			 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
+			 pginfo->num_pages, pginfo->num_4k);
+	return ret;
+} /* end ehca_reg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_mr_rpages(struct ehca_shca *shca,
+		       struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo)
+{
+	int ret = 0;
+	u64 h_ret;
+	u32 rnum;
+	u64 rpage;
+	u32 i;
+	u64 *kpage;
+
+	kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!kpage) {
+		ehca_err(&shca->ib_device, "kpage alloc failed");
+		ret = -ENOMEM;
+		goto ehca_reg_mr_rpages_exit0;
+	}
+
+	/* max 512 pages per shot */
+	for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) {
+
+		if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
+			rnum = pginfo->num_4k % 512; /* last shot */
+			if (rnum == 0)
+				rnum = 512;      /* last shot is full */
+		} else
+			rnum = 512;
+
+		if (rnum > 1) {
+			ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage);
+			if (ret) {
+				ehca_err(&shca->ib_device, "ehca_set_pagebuf "
+					 "bad rc, ret=%x rnum=%x kpage=%p",
+					 ret, rnum, kpage);
+				ret = -EFAULT;
+				goto ehca_reg_mr_rpages_exit1;
+			}
+			rpage = virt_to_abs(kpage);
+			if (!rpage) {
+				ehca_err(&shca->ib_device, "kpage=%p i=%x",
+					 kpage, i);
+				ret = -EFAULT;
+				goto ehca_reg_mr_rpages_exit1;
+			}
+		} else {  /* rnum==1 */
+			ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage);
+			if (ret) {
+				ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 "
+					 "bad rc, ret=%x i=%x", ret, i);
+				ret = -EFAULT;
+				goto ehca_reg_mr_rpages_exit1;
+			}
+		}
+
+		h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr,
+						 0, /* pagesize 4k */
+						 0, rpage, rnum);
+
+		if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
+			/*
+			 * check for 'registration complete'==H_SUCCESS
+			 * and for 'page registered'==H_PAGE_REGISTERED
+			 */
+			if (h_ret != H_SUCCESS) {
+				ehca_err(&shca->ib_device, "last "
+					 "hipz_reg_rpage_mr failed, h_ret=%lx "
+					 "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx"
+					 " lkey=%x", h_ret, e_mr, i,
+					 shca->ipz_hca_handle.handle,
+					 e_mr->ipz_mr_handle.handle,
+					 e_mr->ib.ib_mr.lkey);
+				ret = ehca_mrmw_map_hrc_rrpg_last(h_ret);
+				break;
+			} else
+				ret = 0;
+		} else if (h_ret != H_PAGE_REGISTERED) {
+			ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
+				 "h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx "
+				 "mr_hndl=%lx", h_ret, e_mr, i,
+				 e_mr->ib.ib_mr.lkey,
+				 shca->ipz_hca_handle.handle,
+				 e_mr->ipz_mr_handle.handle);
+			ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret);
+			break;
+		} else
+			ret = 0;
+	} /* end for(i) */
+
+
+ehca_reg_mr_rpages_exit1:
+	kfree(kpage);
+ehca_reg_mr_rpages_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p "
+			 "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo,
+			 pginfo->num_pages, pginfo->num_4k);
+	return ret;
+} /* end ehca_reg_mr_rpages() */
+
+/*----------------------------------------------------------------------*/
+
+inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
+				struct ehca_mr *e_mr,
+				u64 *iova_start,
+				u64 size,
+				u32 acl,
+				struct ehca_pd *e_pd,
+				struct ehca_mr_pginfo *pginfo,
+				u32 *lkey, /*OUT*/
+				u32 *rkey) /*OUT*/
+{
+	int ret;
+	u64 h_ret;
+	u32 hipz_acl;
+	u64 *kpage;
+	u64 rpage;
+	struct ehca_mr_pginfo pginfo_save;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+	kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!kpage) {
+		ehca_err(&shca->ib_device, "kpage alloc failed");
+		ret = -ENOMEM;
+		goto ehca_rereg_mr_rereg1_exit0;
+	}
+
+	pginfo_save = *pginfo;
+	ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage);
+	if (ret) {
+		ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
+			 "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p",
+			 e_mr, pginfo, pginfo->type, pginfo->num_pages,
+			 pginfo->num_4k,kpage);
+		goto ehca_rereg_mr_rereg1_exit1;
+	}
+	rpage = virt_to_abs(kpage);
+	if (!rpage) {
+		ehca_err(&shca->ib_device, "kpage=%p", kpage);
+		ret = -EFAULT;
+		goto ehca_rereg_mr_rereg1_exit1;
+	}
+	h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
+				      (u64)iova_start, size, hipz_acl,
+				      e_pd->fw_pd, rpage, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		/*
+		 * reregistration unsuccessful, try it again with the 3 hCalls,
+		 * e.g. this is required in case H_MR_CONDITION
+		 * (MW bound or MR is shared)
+		 */
+		ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
+			  "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr);
+		*pginfo = pginfo_save;
+		ret = -EAGAIN;
+	} else if ((u64*)hipzout.vaddr != iova_start) {
+		ehca_err(&shca->ib_device, "PHYP changed iova_start in "
+			 "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p "
+			 "mr_handle=%lx lkey=%x lkey_out=%x", iova_start,
+			 hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
+			 e_mr->ib.ib_mr.lkey, hipzout.lkey);
+		ret = -EFAULT;
+	} else {
+		/*
+		 * successful reregistration
+		 * note: start and start_out are identical for eServer HCAs
+		 */
+		e_mr->num_pages = pginfo->num_pages;
+		e_mr->num_4k    = pginfo->num_4k;
+		e_mr->start     = iova_start;
+		e_mr->size      = size;
+		e_mr->acl       = acl;
+		*lkey = hipzout.lkey;
+		*rkey = hipzout.rkey;
+	}
+
+ehca_rereg_mr_rereg1_exit1:
+	kfree(kpage);
+ehca_rereg_mr_rereg1_exit0:
+	if ( ret && (ret != -EAGAIN) )
+		ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x "
+			 "pginfo=%p num_pages=%lx num_4k=%lx",
+			 ret, *lkey, *rkey, pginfo, pginfo->num_pages,
+			 pginfo->num_4k);
+	return ret;
+} /* end ehca_rereg_mr_rereg1() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_rereg_mr(struct ehca_shca *shca,
+		  struct ehca_mr *e_mr,
+		  u64 *iova_start,
+		  u64 size,
+		  int acl,
+		  struct ehca_pd *e_pd,
+		  struct ehca_mr_pginfo *pginfo,
+		  u32 *lkey,
+		  u32 *rkey)
+{
+	int ret = 0;
+	u64 h_ret;
+	int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
+	int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
+
+	/* first determine reregistration hCall(s) */
+	if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) ||
+	    (pginfo->num_4k > e_mr->num_4k)) {
+		ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx "
+			 "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k);
+		rereg_1_hcall = 0;
+		rereg_3_hcall = 1;
+	}
+
+	if (e_mr->flags & EHCA_MR_FLAG_MAXMR) {	/* check for max-MR */
+		rereg_1_hcall = 0;
+		rereg_3_hcall = 1;
+		e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
+		ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
+			 e_mr);
+	}
+
+	if (rereg_1_hcall) {
+		ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
+					   acl, e_pd, pginfo, lkey, rkey);
+		if (ret) {
+			if (ret == -EAGAIN)
+				rereg_3_hcall = 1;
+			else
+				goto ehca_rereg_mr_exit0;
+		}
+	}
+
+	if (rereg_3_hcall) {
+		struct ehca_mr save_mr;
+
+		/* first deregister old MR */
+		h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+		if (h_ret != H_SUCCESS) {
+			ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+				 "h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx "
+				 "mr->lkey=%x",
+				 h_ret, e_mr, shca->ipz_hca_handle.handle,
+				 e_mr->ipz_mr_handle.handle,
+				 e_mr->ib.ib_mr.lkey);
+			ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+			goto ehca_rereg_mr_exit0;
+		}
+		/* clean ehca_mr_t, without changing struct ib_mr and lock */
+		save_mr = *e_mr;
+		ehca_mr_deletenew(e_mr);
+
+		/* set some MR values */
+		e_mr->flags = save_mr.flags;
+		e_mr->fmr_page_size = save_mr.fmr_page_size;
+		e_mr->fmr_max_pages = save_mr.fmr_max_pages;
+		e_mr->fmr_max_maps = save_mr.fmr_max_maps;
+		e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
+
+		ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
+				      e_pd, pginfo, lkey, rkey);
+		if (ret) {
+			u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
+			memcpy(&e_mr->flags, &(save_mr.flags),
+			       sizeof(struct ehca_mr) - offset);
+			goto ehca_rereg_mr_exit0;
+		}
+	}
+
+ehca_rereg_mr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
+			 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
+			 "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
+			 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
+			 acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey,
+			 rereg_1_hcall, rereg_3_hcall);
+	return ret;
+} /* end ehca_rereg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_unmap_one_fmr(struct ehca_shca *shca,
+		       struct ehca_mr *e_fmr)
+{
+	int ret = 0;
+	u64 h_ret;
+	int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */
+	int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */
+	struct ehca_pd *e_pd =
+		container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
+	struct ehca_mr save_fmr;
+	u32 tmp_lkey, tmp_rkey;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	/* first check if reregistration hCall can be used for unmap */
+	if (e_fmr->fmr_max_pages > 512) {
+		rereg_1_hcall = 0;
+		rereg_3_hcall = 1;
+	}
+
+	if (rereg_1_hcall) {
+		/*
+		 * note: after using rereg hcall with len=0,
+		 * rereg hcall must be used again for registering pages
+		 */
+		h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
+					      0, 0, e_pd->fw_pd, 0, &hipzout);
+		if (h_ret != H_SUCCESS) {
+			/*
+			 * should not happen, because length checked above,
+			 * FMRs are not shared and no MW bound to FMRs
+			 */
+			ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
+				 "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
+				 "mr_hndl=%lx lkey=%x lkey_out=%x",
+				 h_ret, e_fmr, shca->ipz_hca_handle.handle,
+				 e_fmr->ipz_mr_handle.handle,
+				 e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
+			rereg_3_hcall = 1;
+		} else {
+			/* successful reregistration */
+			e_fmr->start = NULL;
+			e_fmr->size = 0;
+			tmp_lkey = hipzout.lkey;
+			tmp_rkey = hipzout.rkey;
+		}
+	}
+
+	if (rereg_3_hcall) {
+		struct ehca_mr save_mr;
+
+		/* first free old FMR */
+		h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+		if (h_ret != H_SUCCESS) {
+			ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+				 "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
+				 "lkey=%x",
+				 h_ret, e_fmr, shca->ipz_hca_handle.handle,
+				 e_fmr->ipz_mr_handle.handle,
+				 e_fmr->ib.ib_fmr.lkey);
+			ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+			goto ehca_unmap_one_fmr_exit0;
+		}
+		/* clean ehca_mr_t, without changing lock */
+		save_fmr = *e_fmr;
+		ehca_mr_deletenew(e_fmr);
+
+		/* set some MR values */
+		e_fmr->flags = save_fmr.flags;
+		e_fmr->fmr_page_size = save_fmr.fmr_page_size;
+		e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
+		e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
+		e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
+		e_fmr->acl = save_fmr.acl;
+
+		pginfo.type      = EHCA_MR_PGI_FMR;
+		pginfo.num_pages = 0;
+		pginfo.num_4k    = 0;
+		ret = ehca_reg_mr(shca, e_fmr, NULL,
+				  (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
+				  e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
+				  &tmp_rkey);
+		if (ret) {
+			u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
+			memcpy(&e_fmr->flags, &(save_mr.flags),
+			       sizeof(struct ehca_mr) - offset);
+			goto ehca_unmap_one_fmr_exit0;
+		}
+	}
+
+ehca_unmap_one_fmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x "
+			 "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x",
+			 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages,
+			 rereg_1_hcall, rereg_3_hcall);
+	return ret;
+} /* end ehca_unmap_one_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_smr(struct ehca_shca *shca,
+		 struct ehca_mr *e_origmr,
+		 struct ehca_mr *e_newmr,
+		 u64 *iova_start,
+		 int acl,
+		 struct ehca_pd *e_pd,
+		 u32 *lkey, /*OUT*/
+		 u32 *rkey) /*OUT*/
+{
+	int ret = 0;
+	u64 h_ret;
+	u32 hipz_acl;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
+				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
+				    &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
+			 "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
+			 "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+			 h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
+			 shca->ipz_hca_handle.handle,
+			 e_origmr->ipz_mr_handle.handle,
+			 e_origmr->ib.ib_mr.lkey);
+		ret = ehca_mrmw_map_hrc_reg_smr(h_ret);
+		goto ehca_reg_smr_exit0;
+	}
+	/* successful registration */
+	e_newmr->num_pages     = e_origmr->num_pages;
+	e_newmr->num_4k        = e_origmr->num_4k;
+	e_newmr->start         = iova_start;
+	e_newmr->size          = e_origmr->size;
+	e_newmr->acl           = acl;
+	e_newmr->ipz_mr_handle = hipzout.handle;
+	*lkey = hipzout.lkey;
+	*rkey = hipzout.rkey;
+	return 0;
+
+ehca_reg_smr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p "
+			 "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
+			 ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
+	return ret;
+} /* end ehca_reg_smr() */
+
+/*----------------------------------------------------------------------*/
+
+/* register internal max-MR to internal SHCA */
+int ehca_reg_internal_maxmr(
+	struct ehca_shca *shca,
+	struct ehca_pd *e_pd,
+	struct ehca_mr **e_maxmr)  /*OUT*/
+{
+	int ret;
+	struct ehca_mr *e_mr;
+	u64 *iova_start;
+	u64 size_maxmr;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	struct ib_phys_buf ib_pbuf;
+	u32 num_pages_mr;
+	u32 num_pages_4k; /* 4k portion "pages" */
+
+	e_mr = ehca_mr_new();
+	if (!e_mr) {
+		ehca_err(&shca->ib_device, "out of memory");
+		ret = -ENOMEM;
+		goto ehca_reg_internal_maxmr_exit0;
+	}
+	e_mr->flags |= EHCA_MR_FLAG_MAXMR;
+
+	/* register internal max-MR on HCA */
+	size_maxmr = (u64)high_memory - PAGE_OFFSET;
+	iova_start = (u64*)KERNELBASE;
+	ib_pbuf.addr = 0;
+	ib_pbuf.size = size_maxmr;
+	num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr +
+			 PAGE_SIZE - 1) / PAGE_SIZE);
+	num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr +
+			 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+	pginfo.type           = EHCA_MR_PGI_PHYS;
+	pginfo.num_pages      = num_pages_mr;
+	pginfo.num_4k         = num_pages_4k;
+	pginfo.num_phys_buf   = 1;
+	pginfo.phys_buf_array = &ib_pbuf;
+
+	ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
+			  &pginfo, &e_mr->ib.ib_mr.lkey,
+			  &e_mr->ib.ib_mr.rkey);
+	if (ret) {
+		ehca_err(&shca->ib_device, "reg of internal max MR failed, "
+			 "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x "
+			 "num_pages_4k=%x", e_mr, iova_start, size_maxmr,
+			 num_pages_mr, num_pages_4k);
+		goto ehca_reg_internal_maxmr_exit1;
+	}
+
+	/* successful registration of all pages */
+	e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
+	e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
+	e_mr->ib.ib_mr.uobject = NULL;
+	atomic_inc(&(e_pd->ib_pd.usecnt));
+	atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
+	*e_maxmr = e_mr;
+	return 0;
+
+ehca_reg_internal_maxmr_exit1:
+	ehca_mr_delete(e_mr);
+ehca_reg_internal_maxmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p",
+			 ret, shca, e_pd, e_maxmr);
+	return ret;
+} /* end ehca_reg_internal_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_maxmr(struct ehca_shca *shca,
+		   struct ehca_mr *e_newmr,
+		   u64 *iova_start,
+		   int acl,
+		   struct ehca_pd *e_pd,
+		   u32 *lkey,
+		   u32 *rkey)
+{
+	u64 h_ret;
+	struct ehca_mr *e_origmr = shca->maxmr;
+	u32 hipz_acl;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
+				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
+				    &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
+			 "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+			 h_ret, e_origmr, shca->ipz_hca_handle.handle,
+			 e_origmr->ipz_mr_handle.handle,
+			 e_origmr->ib.ib_mr.lkey);
+		return ehca_mrmw_map_hrc_reg_smr(h_ret);
+	}
+	/* successful registration */
+	e_newmr->num_pages     = e_origmr->num_pages;
+	e_newmr->num_4k        = e_origmr->num_4k;
+	e_newmr->start         = iova_start;
+	e_newmr->size          = e_origmr->size;
+	e_newmr->acl           = acl;
+	e_newmr->ipz_mr_handle = hipzout.handle;
+	*lkey = hipzout.lkey;
+	*rkey = hipzout.rkey;
+	return 0;
+} /* end ehca_reg_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
+{
+	int ret;
+	struct ehca_mr *e_maxmr;
+	struct ib_pd *ib_pd;
+
+	if (!shca->maxmr) {
+		ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
+		ret = -EINVAL;
+		goto ehca_dereg_internal_maxmr_exit0;
+	}
+
+	e_maxmr = shca->maxmr;
+	ib_pd = e_maxmr->ib.ib_mr.pd;
+	shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
+
+	ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
+	if (ret) {
+		ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
+			 "ret=%x e_maxmr=%p shca=%p lkey=%x",
+			 ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
+		shca->maxmr = e_maxmr;
+		goto ehca_dereg_internal_maxmr_exit0;
+	}
+
+	atomic_dec(&ib_pd->usecnt);
+
+ehca_dereg_internal_maxmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p",
+			 ret, shca, shca->maxmr);
+	return ret;
+} /* end ehca_dereg_internal_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * check physical buffer array of MR verbs for validness and
+ * calculates MR size
+ */
+int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
+				  int num_phys_buf,
+				  u64 *iova_start,
+				  u64 *size)
+{
+	struct ib_phys_buf *pbuf = phys_buf_array;
+	u64 size_count = 0;
+	u32 i;
+
+	if (num_phys_buf == 0) {
+		ehca_gen_err("bad phys buf array len, num_phys_buf=0");
+		return -EINVAL;
+	}
+	/* check first buffer */
+	if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
+		ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
+			     "pbuf->addr=%lx pbuf->size=%lx",
+			     iova_start, pbuf->addr, pbuf->size);
+		return -EINVAL;
+	}
+	if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
+	    (num_phys_buf > 1)) {
+		ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx "
+			     "pbuf->size=%lx", pbuf->addr, pbuf->size);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < num_phys_buf; i++) {
+		if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
+			ehca_gen_err("bad address, i=%x pbuf->addr=%lx "
+				     "pbuf->size=%lx",
+				     i, pbuf->addr, pbuf->size);
+			return -EINVAL;
+		}
+		if (((i > 0) &&	/* not 1st */
+		     (i < (num_phys_buf - 1)) &&	/* not last */
+		     (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
+			ehca_gen_err("bad size, i=%x pbuf->size=%lx",
+				     i, pbuf->size);
+			return -EINVAL;
+		}
+		size_count += pbuf->size;
+		pbuf++;
+	}
+
+	*size = size_count;
+	return 0;
+} /* end ehca_mr_chk_buf_and_calc_size() */
+
+/*----------------------------------------------------------------------*/
+
+/* check page list of map FMR verb for validness */
+int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
+			     u64 *page_list,
+			     int list_len)
+{
+	u32 i;
+	u64 *page;
+
+	if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
+		ehca_gen_err("bad list_len, list_len=%x "
+			     "e_fmr->fmr_max_pages=%x fmr=%p",
+			     list_len, e_fmr->fmr_max_pages, e_fmr);
+		return -EINVAL;
+	}
+
+	/* each page must be aligned */
+	page = page_list;
+	for (i = 0; i < list_len; i++) {
+		if (*page % e_fmr->fmr_page_size) {
+			ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p "
+				     "fmr_page_size=%x", i, *page, page, e_fmr,
+				     e_fmr->fmr_page_size);
+			return -EINVAL;
+		}
+		page++;
+	}
+
+	return 0;
+} /* end ehca_fmr_check_page_list() */
+
+/*----------------------------------------------------------------------*/
+
+/* setup page buffer from page info */
+int ehca_set_pagebuf(struct ehca_mr *e_mr,
+		     struct ehca_mr_pginfo *pginfo,
+		     u32 number,
+		     u64 *kpage)
+{
+	int ret = 0;
+	struct ib_umem_chunk *prev_chunk;
+	struct ib_umem_chunk *chunk;
+	struct ib_phys_buf *pbuf;
+	u64 *fmrlist;
+	u64 num4k, pgaddr, offs4k;
+	u32 i = 0;
+	u32 j = 0;
+
+	if (pginfo->type == EHCA_MR_PGI_PHYS) {
+		/* loop over desired phys_buf_array entries */
+		while (i < number) {
+			pbuf   = pginfo->phys_buf_array + pginfo->next_buf;
+			num4k  = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size +
+				  EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
+			offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
+			while (pginfo->next_4k < offs4k + num4k) {
+				/* sanity check */
+				if ((pginfo->page_cnt >= pginfo->num_pages) ||
+				    (pginfo->page_4k_cnt >= pginfo->num_4k)) {
+					ehca_gen_err("page_cnt >= num_pages, "
+						     "page_cnt=%lx "
+						     "num_pages=%lx "
+						     "page_4k_cnt=%lx "
+						     "num_4k=%lx i=%x",
+						     pginfo->page_cnt,
+						     pginfo->num_pages,
+						     pginfo->page_4k_cnt,
+						     pginfo->num_4k, i);
+					ret = -EFAULT;
+					goto ehca_set_pagebuf_exit0;
+				}
+				*kpage = phys_to_abs(
+					(pbuf->addr & EHCA_PAGEMASK)
+					+ (pginfo->next_4k * EHCA_PAGESIZE));
+				if ( !(*kpage) && pbuf->addr ) {
+					ehca_gen_err("pbuf->addr=%lx "
+						     "pbuf->size=%lx "
+						     "next_4k=%lx", pbuf->addr,
+						     pbuf->size,
+						     pginfo->next_4k);
+					ret = -EFAULT;
+					goto ehca_set_pagebuf_exit0;
+				}
+				(pginfo->page_4k_cnt)++;
+				(pginfo->next_4k)++;
+				if (pginfo->next_4k %
+				    (PAGE_SIZE / EHCA_PAGESIZE) == 0)
+					(pginfo->page_cnt)++;
+				kpage++;
+				i++;
+				if (i >= number) break;
+			}
+			if (pginfo->next_4k >= offs4k + num4k) {
+				(pginfo->next_buf)++;
+				pginfo->next_4k = 0;
+			}
+		}
+	} else if (pginfo->type == EHCA_MR_PGI_USER) {
+		/* loop over desired chunk entries */
+		chunk      = pginfo->next_chunk;
+		prev_chunk = pginfo->next_chunk;
+		list_for_each_entry_continue(chunk,
+					     (&(pginfo->region->chunk_list)),
+					     list) {
+			for (i = pginfo->next_nmap; i < chunk->nmap; ) {
+				pgaddr = ( page_to_pfn(chunk->page_list[i].page)
+					   << PAGE_SHIFT );
+				*kpage = phys_to_abs(pgaddr +
+						     (pginfo->next_4k *
+						      EHCA_PAGESIZE));
+				if ( !(*kpage) ) {
+					ehca_gen_err("pgaddr=%lx "
+						     "chunk->page_list[i]=%lx "
+						     "i=%x next_4k=%lx mr=%p",
+						     pgaddr,
+						     (u64)sg_dma_address(
+							     &chunk->
+							     page_list[i]),
+						     i, pginfo->next_4k, e_mr);
+					ret = -EFAULT;
+					goto ehca_set_pagebuf_exit0;
+				}
+				(pginfo->page_4k_cnt)++;
+				(pginfo->next_4k)++;
+				kpage++;
+				if (pginfo->next_4k %
+				    (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
+					(pginfo->page_cnt)++;
+					(pginfo->next_nmap)++;
+					pginfo->next_4k = 0;
+					i++;
+				}
+				j++;
+				if (j >= number) break;
+			}
+			if ((pginfo->next_nmap >= chunk->nmap) &&
+			    (j >= number)) {
+				pginfo->next_nmap = 0;
+				prev_chunk = chunk;
+				break;
+			} else if (pginfo->next_nmap >= chunk->nmap) {
+				pginfo->next_nmap = 0;
+				prev_chunk = chunk;
+			} else if (j >= number)
+				break;
+			else
+				prev_chunk = chunk;
+		}
+		pginfo->next_chunk =
+			list_prepare_entry(prev_chunk,
+					   (&(pginfo->region->chunk_list)),
+					   list);
+	} else if (pginfo->type == EHCA_MR_PGI_FMR) {
+		/* loop over desired page_list entries */
+		fmrlist = pginfo->page_list + pginfo->next_listelem;
+		for (i = 0; i < number; i++) {
+			*kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
+					     pginfo->next_4k * EHCA_PAGESIZE);
+			if ( !(*kpage) ) {
+				ehca_gen_err("*fmrlist=%lx fmrlist=%p "
+					     "next_listelem=%lx next_4k=%lx",
+					     *fmrlist, fmrlist,
+					     pginfo->next_listelem,
+					     pginfo->next_4k);
+				ret = -EFAULT;
+				goto ehca_set_pagebuf_exit0;
+			}
+			(pginfo->page_4k_cnt)++;
+			(pginfo->next_4k)++;
+			kpage++;
+			if (pginfo->next_4k %
+			    (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
+				(pginfo->page_cnt)++;
+				(pginfo->next_listelem)++;
+				fmrlist++;
+				pginfo->next_4k = 0;
+			}
+		}
+	} else {
+		ehca_gen_err("bad pginfo->type=%x", pginfo->type);
+		ret = -EFAULT;
+		goto ehca_set_pagebuf_exit0;
+	}
+
+ehca_set_pagebuf_exit0:
+	if (ret)
+		ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
+			     "num_4k=%lx next_buf=%lx next_4k=%lx number=%x "
+			     "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x "
+			     "next_listelem=%lx region=%p next_chunk=%p "
+			     "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type,
+			     pginfo->num_pages, pginfo->num_4k,
+			     pginfo->next_buf, pginfo->next_4k, number, kpage,
+			     pginfo->page_cnt, pginfo->page_4k_cnt, i,
+			     pginfo->next_listelem, pginfo->region,
+			     pginfo->next_chunk, pginfo->next_nmap);
+	return ret;
+} /* end ehca_set_pagebuf() */
+
+/*----------------------------------------------------------------------*/
+
+/* setup 1 page from page info page buffer */
+int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo,
+		       u64 *rpage)
+{
+	int ret = 0;
+	struct ib_phys_buf *tmp_pbuf;
+	u64 *fmrlist;
+	struct ib_umem_chunk *chunk;
+	struct ib_umem_chunk *prev_chunk;
+	u64 pgaddr, num4k, offs4k;
+
+	if (pginfo->type == EHCA_MR_PGI_PHYS) {
+		/* sanity check */
+		if ((pginfo->page_cnt >= pginfo->num_pages) ||
+		    (pginfo->page_4k_cnt >= pginfo->num_4k)) {
+			ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx "
+				     "num_pages=%lx page_4k_cnt=%lx num_4k=%lx",
+				     pginfo->page_cnt, pginfo->num_pages,
+				     pginfo->page_4k_cnt, pginfo->num_4k);
+			ret = -EFAULT;
+			goto ehca_set_pagebuf_1_exit0;
+		}
+		tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf;
+		num4k  = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size +
+			  EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
+		offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
+		*rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) +
+				     (pginfo->next_4k * EHCA_PAGESIZE));
+		if ( !(*rpage) && tmp_pbuf->addr ) {
+			ehca_gen_err("tmp_pbuf->addr=%lx"
+				     " tmp_pbuf->size=%lx next_4k=%lx",
+				     tmp_pbuf->addr, tmp_pbuf->size,
+				     pginfo->next_4k);
+			ret = -EFAULT;
+			goto ehca_set_pagebuf_1_exit0;
+		}
+		(pginfo->page_4k_cnt)++;
+		(pginfo->next_4k)++;
+		if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0)
+			(pginfo->page_cnt)++;
+		if (pginfo->next_4k >= offs4k + num4k) {
+			(pginfo->next_buf)++;
+			pginfo->next_4k = 0;
+		}
+	} else if (pginfo->type == EHCA_MR_PGI_USER) {
+		chunk      = pginfo->next_chunk;
+		prev_chunk = pginfo->next_chunk;
+		list_for_each_entry_continue(chunk,
+					     (&(pginfo->region->chunk_list)),
+					     list) {
+			pgaddr = ( page_to_pfn(chunk->page_list[
+						       pginfo->next_nmap].page)
+				   << PAGE_SHIFT);
+			*rpage = phys_to_abs(pgaddr +
+					     (pginfo->next_4k * EHCA_PAGESIZE));
+			if ( !(*rpage) ) {
+				ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx"
+					     " next_nmap=%lx next_4k=%lx mr=%p",
+					     pgaddr, (u64)sg_dma_address(
+						     &chunk->page_list[
+							     pginfo->
+							     next_nmap]),
+					     pginfo->next_nmap, pginfo->next_4k,
+					     e_mr);
+				ret = -EFAULT;
+				goto ehca_set_pagebuf_1_exit0;
+			}
+			(pginfo->page_4k_cnt)++;
+			(pginfo->next_4k)++;
+			if (pginfo->next_4k %
+			    (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
+				(pginfo->page_cnt)++;
+				(pginfo->next_nmap)++;
+				pginfo->next_4k = 0;
+			}
+			if (pginfo->next_nmap >= chunk->nmap) {
+				pginfo->next_nmap = 0;
+				prev_chunk = chunk;
+			}
+			break;
+		}
+		pginfo->next_chunk =
+			list_prepare_entry(prev_chunk,
+					   (&(pginfo->region->chunk_list)),
+					   list);
+	} else if (pginfo->type == EHCA_MR_PGI_FMR) {
+		fmrlist = pginfo->page_list + pginfo->next_listelem;
+		*rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
+				     pginfo->next_4k * EHCA_PAGESIZE);
+		if ( !(*rpage) ) {
+			ehca_gen_err("*fmrlist=%lx fmrlist=%p "
+				     "next_listelem=%lx next_4k=%lx",
+				     *fmrlist, fmrlist, pginfo->next_listelem,
+				     pginfo->next_4k);
+			ret = -EFAULT;
+			goto ehca_set_pagebuf_1_exit0;
+		}
+		(pginfo->page_4k_cnt)++;
+		(pginfo->next_4k)++;
+		if (pginfo->next_4k %
+		    (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
+			(pginfo->page_cnt)++;
+			(pginfo->next_listelem)++;
+			pginfo->next_4k = 0;
+		}
+	} else {
+		ehca_gen_err("bad pginfo->type=%x", pginfo->type);
+		ret = -EFAULT;
+		goto ehca_set_pagebuf_1_exit0;
+	}
+
+ehca_set_pagebuf_1_exit0:
+	if (ret)
+		ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
+			     "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p "
+			     "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx "
+			     "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr,
+			     pginfo, pginfo->type, pginfo->num_pages,
+			     pginfo->num_4k, pginfo->next_buf, pginfo->next_4k,
+			     rpage, pginfo->page_cnt, pginfo->page_4k_cnt,
+			     pginfo->next_listelem, pginfo->region,
+			     pginfo->next_chunk, pginfo->next_nmap);
+	return ret;
+} /* end ehca_set_pagebuf_1() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * check MR if it is a max-MR, i.e. uses whole memory
+ * in case it's a max-MR 1 is returned, else 0
+ */
+int ehca_mr_is_maxmr(u64 size,
+		     u64 *iova_start)
+{
+	/* a MR is treated as max-MR only if it fits following: */
+	if ((size == ((u64)high_memory - PAGE_OFFSET)) &&
+	    (iova_start == (void*)KERNELBASE)) {
+		ehca_gen_dbg("this is a max-MR");
+		return 1;
+	} else
+		return 0;
+} /* end ehca_mr_is_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+/* map access control for MR/MW. This routine is used for MR and MW. */
+void ehca_mrmw_map_acl(int ib_acl,
+		       u32 *hipz_acl)
+{
+	*hipz_acl = 0;
+	if (ib_acl & IB_ACCESS_REMOTE_READ)
+		*hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
+	if (ib_acl & IB_ACCESS_REMOTE_WRITE)
+		*hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
+	if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
+		*hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
+	if (ib_acl & IB_ACCESS_LOCAL_WRITE)
+		*hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
+	if (ib_acl & IB_ACCESS_MW_BIND)
+		*hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
+} /* end ehca_mrmw_map_acl() */
+
+/*----------------------------------------------------------------------*/
+
+/* sets page size in hipz access control for MR/MW. */
+void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/
+{
+	return; /* HCA supports only 4k */
+} /* end ehca_mrmw_set_pgsize_hipz_acl() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * reverse map access control for MR/MW.
+ * This routine is used for MR and MW.
+ */
+void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
+			       int *ib_acl) /*OUT*/
+{
+	*ib_acl = 0;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
+		*ib_acl |= IB_ACCESS_REMOTE_READ;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
+		*ib_acl |= IB_ACCESS_REMOTE_WRITE;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
+		*ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
+		*ib_acl |= IB_ACCESS_LOCAL_WRITE;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
+		*ib_acl |= IB_ACCESS_MW_BIND;
+} /* end ehca_mrmw_reverse_map_acl() */
+
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR/MW allocations
+ * Used for hipz_mr_reg_alloc and hipz_mw_alloc.
+ */
+int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	             /* successful completion */
+		return 0;
+	case H_ADAPTER_PARM:         /* invalid adapter handle */
+	case H_RT_PARM:              /* invalid resource type */
+	case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+	case H_MLENGTH_PARM:         /* invalid memory length */
+	case H_MEM_ACCESS_PARM:      /* invalid access controls */
+	case H_CONSTRAINED:          /* resource constraint */
+		return -EINVAL;
+	case H_BUSY:                 /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_alloc() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR register rpage
+ * Used for hipz_h_register_rpage_mr at registering last page
+ */
+int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:         /* registration complete */
+		return 0;
+	case H_PAGE_REGISTERED:	/* page registered */
+	case H_ADAPTER_PARM:    /* invalid adapter handle */
+	case H_RH_PARM:         /* invalid resource handle */
+/*	case H_QT_PARM:            invalid queue type */
+	case H_PARAMETER:       /*
+				 * invalid logical address,
+				 * or count zero or greater 512
+				 */
+	case H_TABLE_FULL:      /* page table full */
+	case H_HARDWARE:        /* HCA not operational */
+		return -EINVAL;
+	case H_BUSY:            /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_rrpg_last() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR register rpage
+ * Used for hipz_h_register_rpage_mr at registering one page, but not last page
+ */
+int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_PAGE_REGISTERED:	/* page registered */
+		return 0;
+	case H_SUCCESS:         /* registration complete */
+	case H_ADAPTER_PARM:    /* invalid adapter handle */
+	case H_RH_PARM:         /* invalid resource handle */
+/*	case H_QT_PARM:            invalid queue type */
+	case H_PARAMETER:       /*
+				 * invalid logical address,
+				 * or count zero or greater 512
+				 */
+	case H_TABLE_FULL:      /* page table full */
+	case H_HARDWARE:        /* HCA not operational */
+		return -EINVAL;
+	case H_BUSY:            /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_rrpg_notlast() */
+
+/*----------------------------------------------------------------------*/
+
+/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */
+int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	             /* successful completion */
+		return 0;
+	case H_ADAPTER_PARM:         /* invalid adapter handle */
+	case H_RH_PARM:              /* invalid resource handle */
+		return -EINVAL;
+	case H_BUSY:                 /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_query_mr() */
+
+/*----------------------------------------------------------------------*/
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for freeing MR resource
+ * Used for hipz_h_free_resource_mr
+ */
+int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:      /* resource freed */
+		return 0;
+	case H_ADAPTER_PARM: /* invalid adapter handle */
+	case H_RH_PARM:      /* invalid resource handle */
+	case H_R_STATE:      /* invalid resource state */
+	case H_HARDWARE:     /* HCA not operational */
+		return -EINVAL;
+	case H_RESOURCE:     /* Resource in use */
+	case H_BUSY:         /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_free_mr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for freeing MW resource
+ * Used for hipz_h_free_resource_mw
+ */
+int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	     /* resource freed */
+		return 0;
+	case H_ADAPTER_PARM: /* invalid adapter handle */
+	case H_RH_PARM:      /* invalid resource handle */
+	case H_R_STATE:      /* invalid resource state */
+	case H_HARDWARE:     /* HCA not operational */
+		return -EINVAL;
+	case H_RESOURCE:     /* Resource in use */
+	case H_BUSY:         /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_free_mw() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for SMR registrations
+ * Used for hipz_h_register_smr.
+ */
+int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	             /* successful completion */
+		return 0;
+	case H_ADAPTER_PARM:         /* invalid adapter handle */
+	case H_RH_PARM:              /* invalid resource handle */
+	case H_MEM_PARM:             /* invalid MR virtual address */
+	case H_MEM_ACCESS_PARM:      /* invalid access controls */
+	case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+		return -EINVAL;
+	case H_BUSY:                 /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_reg_smr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * MR destructor and constructor
+ * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
+ * except struct ib_mr and spinlock
+ */
+void ehca_mr_deletenew(struct ehca_mr *mr)
+{
+	mr->flags         = 0;
+	mr->num_pages     = 0;
+	mr->num_4k        = 0;
+	mr->acl           = 0;
+	mr->start         = NULL;
+	mr->fmr_page_size = 0;
+	mr->fmr_max_pages = 0;
+	mr->fmr_max_maps  = 0;
+	mr->fmr_map_cnt   = 0;
+	memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
+	memset(&mr->galpas, 0, sizeof(mr->galpas));
+	mr->nr_of_pages   = 0;
+	mr->pagearray     = NULL;
+} /* end ehca_mr_deletenew() */
+
+int ehca_init_mrmw_cache(void)
+{
+	mr_cache = kmem_cache_create("ehca_cache_mr",
+				     sizeof(struct ehca_mr), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!mr_cache)
+		return -ENOMEM;
+	mw_cache = kmem_cache_create("ehca_cache_mw",
+				     sizeof(struct ehca_mw), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!mw_cache) {
+		kmem_cache_destroy(mr_cache);
+		mr_cache = NULL;
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void ehca_cleanup_mrmw_cache(void)
+{
+	if (mr_cache)
+		kmem_cache_destroy(mr_cache);
+	if (mw_cache)
+		kmem_cache_destroy(mw_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h
new file mode 100644
index 0000000000000..d936e40a57484
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h
@@ -0,0 +1,140 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  MR/MW declarations and inline functions
+ *
+ *  Authors: Dietmar Decker <ddecker@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _EHCA_MRMW_H_
+#define _EHCA_MRMW_H_
+
+int ehca_reg_mr(struct ehca_shca *shca,
+		struct ehca_mr *e_mr,
+		u64 *iova_start,
+		u64 size,
+		int acl,
+		struct ehca_pd *e_pd,
+		struct ehca_mr_pginfo *pginfo,
+		u32 *lkey,
+		u32 *rkey);
+
+int ehca_reg_mr_rpages(struct ehca_shca *shca,
+		       struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo);
+
+int ehca_rereg_mr(struct ehca_shca *shca,
+		  struct ehca_mr *e_mr,
+		  u64 *iova_start,
+		  u64 size,
+		  int mr_access_flags,
+		  struct ehca_pd *e_pd,
+		  struct ehca_mr_pginfo *pginfo,
+		  u32 *lkey,
+		  u32 *rkey);
+
+int ehca_unmap_one_fmr(struct ehca_shca *shca,
+		       struct ehca_mr *e_fmr);
+
+int ehca_reg_smr(struct ehca_shca *shca,
+		 struct ehca_mr *e_origmr,
+		 struct ehca_mr *e_newmr,
+		 u64 *iova_start,
+		 int acl,
+		 struct ehca_pd *e_pd,
+		 u32 *lkey,
+		 u32 *rkey);
+
+int ehca_reg_internal_maxmr(struct ehca_shca *shca,
+			    struct ehca_pd *e_pd,
+			    struct ehca_mr **maxmr);
+
+int ehca_reg_maxmr(struct ehca_shca *shca,
+		   struct ehca_mr *e_newmr,
+		   u64 *iova_start,
+		   int acl,
+		   struct ehca_pd *e_pd,
+		   u32 *lkey,
+		   u32 *rkey);
+
+int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
+
+int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
+				  int num_phys_buf,
+				  u64 *iova_start,
+				  u64 *size);
+
+int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
+			     u64 *page_list,
+			     int list_len);
+
+int ehca_set_pagebuf(struct ehca_mr *e_mr,
+		     struct ehca_mr_pginfo *pginfo,
+		     u32 number,
+		     u64 *kpage);
+
+int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo,
+		       u64 *rpage);
+
+int ehca_mr_is_maxmr(u64 size,
+		     u64 *iova_start);
+
+void ehca_mrmw_map_acl(int ib_acl,
+		       u32 *hipz_acl);
+
+void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl);
+
+void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
+			       int *ib_acl);
+
+int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc);
+
+void ehca_mr_deletenew(struct ehca_mr *mr);
+
+#endif  /*_EHCA_MRMW_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
new file mode 100644
index 0000000000000..2c3cdc6f7b39e
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -0,0 +1,114 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  PD functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+
+static struct kmem_cache *pd_cache;
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+			    struct ib_ucontext *context, struct ib_udata *udata)
+{
+	struct ehca_pd *pd;
+
+	pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL);
+	if (!pd) {
+		ehca_err(device, "device=%p context=%p out of memory",
+			 device, context);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(pd, 0, sizeof(struct ehca_pd));
+	pd->ownpid = current->tgid;
+
+	/*
+	 * Kernel PD: when device = -1, 0
+	 * User   PD: when context != -1
+	 */
+	if (!context) {
+		/*
+		 * Kernel PDs after init reuses always
+		 * the one created in ehca_shca_reopen()
+		 */
+		struct ehca_shca *shca = container_of(device, struct ehca_shca,
+						      ib_device);
+		pd->fw_pd.value = shca->pd->fw_pd.value;
+	} else
+		pd->fw_pd.value = (u64)pd;
+
+	return &pd->ib_pd;
+}
+
+int ehca_dealloc_pd(struct ib_pd *pd)
+{
+	u32 cur_pid = current->tgid;
+	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(pd->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	kmem_cache_free(pd_cache,
+			container_of(pd, struct ehca_pd, ib_pd));
+
+	return 0;
+}
+
+int ehca_init_pd_cache(void)
+{
+	pd_cache = kmem_cache_create("ehca_cache_pd",
+				     sizeof(struct ehca_pd), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!pd_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_pd_cache(void)
+{
+	if (pd_cache)
+		kmem_cache_destroy(pd_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
new file mode 100644
index 0000000000000..8707d297ce4c6
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@@ -0,0 +1,259 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Hardware request structures
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _EHCA_QES_H_
+#define _EHCA_QES_H_
+
+#include "ehca_tools.h"
+
+/* virtual scatter gather entry to specify remote adresses with length */
+struct ehca_vsgentry {
+	u64 vaddr;
+	u32 lkey;
+	u32 length;
+};
+
+#define GRH_FLAG_MASK        EHCA_BMASK_IBM(7,7)
+#define GRH_IPVERSION_MASK   EHCA_BMASK_IBM(0,3)
+#define GRH_TCLASS_MASK      EHCA_BMASK_IBM(4,12)
+#define GRH_FLOWLABEL_MASK   EHCA_BMASK_IBM(13,31)
+#define GRH_PAYLEN_MASK      EHCA_BMASK_IBM(32,47)
+#define GRH_NEXTHEADER_MASK  EHCA_BMASK_IBM(48,55)
+#define GRH_HOPLIMIT_MASK    EHCA_BMASK_IBM(56,63)
+
+/*
+ * Unreliable Datagram Address Vector Format
+ * see IBTA Vol1 chapter 8.3 Global Routing Header
+ */
+struct ehca_ud_av {
+	u8 sl;
+	u8 lnh;
+	u16 dlid;
+	u8 reserved1;
+	u8 reserved2;
+	u8 reserved3;
+	u8 slid_path_bits;
+	u8 reserved4;
+	u8 ipd;
+	u8 reserved5;
+	u8 pmtu;
+	u32 reserved6;
+	u64 reserved7;
+	union {
+		struct {
+			u64 word_0; /* always set to 6  */
+			/*should be 0x1B for IB transport */
+			u64 word_1;
+			u64 word_2;
+			u64 word_3;
+			u64 word_4;
+		} grh;
+		struct {
+			u32 wd_0;
+			u32 wd_1;
+			/* DWord_1 --> SGID */
+
+			u32 sgid_wd3;
+			u32 sgid_wd2;
+
+			u32 sgid_wd1;
+			u32 sgid_wd0;
+			/* DWord_3 --> DGID */
+
+			u32 dgid_wd3;
+			u32 dgid_wd2;
+
+			u32 dgid_wd1;
+			u32 dgid_wd0;
+		} grh_l;
+	};
+};
+
+/* maximum number of sg entries allowed in a WQE */
+#define MAX_WQE_SG_ENTRIES 252
+
+#define WQE_OPTYPE_SEND             0x80
+#define WQE_OPTYPE_RDMAREAD         0x40
+#define WQE_OPTYPE_RDMAWRITE        0x20
+#define WQE_OPTYPE_CMPSWAP          0x10
+#define WQE_OPTYPE_FETCHADD         0x08
+#define WQE_OPTYPE_BIND             0x04
+
+#define WQE_WRFLAG_REQ_SIGNAL_COM   0x80
+#define WQE_WRFLAG_FENCE            0x40
+#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
+#define WQE_WRFLAG_SOLIC_EVENT      0x10
+
+#define WQEF_CACHE_HINT             0x80
+#define WQEF_CACHE_HINT_RD_WR       0x40
+#define WQEF_TIMED_WQE              0x20
+#define WQEF_PURGE                  0x08
+#define WQEF_HIGH_NIBBLE            0xF0
+
+#define MW_BIND_ACCESSCTRL_R_WRITE   0x40
+#define MW_BIND_ACCESSCTRL_R_READ    0x20
+#define MW_BIND_ACCESSCTRL_R_ATOMIC  0x10
+
+struct ehca_wqe {
+	u64 work_request_id;
+	u8 optype;
+	u8 wr_flag;
+	u16 pkeyi;
+	u8 wqef;
+	u8 nr_of_data_seg;
+	u16 wqe_provided_slid;
+	u32 destination_qp_number;
+	u32 resync_psn_sqp;
+	u32 local_ee_context_qkey;
+	u32 immediate_data;
+	union {
+		struct {
+			u64 remote_virtual_adress;
+			u32 rkey;
+			u32 reserved;
+			u64 atomic_1st_op_dma_len;
+			u64 atomic_2nd_op;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+
+		} nud;
+		struct {
+			u64 ehca_ud_av_ptr;
+			u64 reserved1;
+			u64 reserved2;
+			u64 reserved3;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+		} ud_avp;
+		struct {
+			struct ehca_ud_av ud_av;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
+						     2];
+		} ud_av;
+		struct {
+			u64 reserved0;
+			u64 reserved1;
+			u64 reserved2;
+			u64 reserved3;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+		} all_rcv;
+
+		struct {
+			u64 reserved;
+			u32 rkey;
+			u32 old_rkey;
+			u64 reserved1;
+			u64 reserved2;
+			u64 virtual_address;
+			u32 reserved3;
+			u32 length;
+			u32 reserved4;
+			u16 reserved5;
+			u8 reserved6;
+			u8 lr_ctl;
+			u32 lkey;
+			u32 reserved7;
+			u64 reserved8;
+			u64 reserved9;
+			u64 reserved10;
+			u64 reserved11;
+		} bind;
+		struct {
+			u64 reserved12;
+			u64 reserved13;
+			u32 size;
+			u32 start;
+		} inline_data;
+	} u;
+
+};
+
+#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0)
+#define WC_IMM_DATA     EHCA_BMASK_IBM(1,1)
+#define WC_GRH_PRESENT  EHCA_BMASK_IBM(2,2)
+#define WC_SE_BIT       EHCA_BMASK_IBM(3,3)
+#define WC_STATUS_ERROR_BIT 0x80000000
+#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
+#define WC_STATUS_PURGE_BIT 0x10
+
+struct ehca_cqe {
+	u64 work_request_id;
+	u8 optype;
+	u8 w_completion_flags;
+	u16 reserved1;
+	u32 nr_bytes_transferred;
+	u32 immediate_data;
+	u32 local_qp_number;
+	u8 freed_resource_count;
+	u8 service_level;
+	u16 wqe_count;
+	u32 qp_token;
+	u32 qkey_ee_token;
+	u32 remote_qp_number;
+	u16 dlid;
+	u16 rlid;
+	u16 reserved2;
+	u16 pkey_index;
+	u32 cqe_timestamp;
+	u32 wqe_timestamp;
+	u8 wqe_timestamp_valid;
+	u8 reserved3;
+	u8 reserved4;
+	u8 cqe_flags;
+	u32 status;
+};
+
+struct ehca_eqe {
+	u64 entry;
+};
+
+struct ehca_mrte {
+	u64 starting_va;
+	u64 length; /* length of memory region in bytes*/
+	u32 pd;
+	u8 key_instance;
+	u8 pagesize;
+	u8 mr_control;
+	u8 local_remote_access_ctrl;
+	u8 reserved[0x20 - 0x18];
+	u64 at_pointer[4];
+};
+#endif /*_EHCA_QES_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
new file mode 100644
index 0000000000000..4b27bedc6c244
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -0,0 +1,1506 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  QP functions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm/current.h>
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+static struct kmem_cache *qp_cache;
+
+/*
+ * attributes not supported by query qp
+ */
+#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \
+				     IB_QP_MAX_QP_RD_ATOMIC   | \
+				     IB_QP_ACCESS_FLAGS       | \
+				     IB_QP_EN_SQD_ASYNC_NOTIFY)
+
+/*
+ * ehca (internal) qp state values
+ */
+enum ehca_qp_state {
+	EHCA_QPS_RESET = 1,
+	EHCA_QPS_INIT = 2,
+	EHCA_QPS_RTR = 3,
+	EHCA_QPS_RTS = 5,
+	EHCA_QPS_SQD = 6,
+	EHCA_QPS_SQE = 8,
+	EHCA_QPS_ERR = 128
+};
+
+/*
+ * qp state transitions as defined by IB Arch Rel 1.1 page 431
+ */
+enum ib_qp_statetrans {
+	IB_QPST_ANY2RESET,
+	IB_QPST_ANY2ERR,
+	IB_QPST_RESET2INIT,
+	IB_QPST_INIT2RTR,
+	IB_QPST_INIT2INIT,
+	IB_QPST_RTR2RTS,
+	IB_QPST_RTS2SQD,
+	IB_QPST_RTS2RTS,
+	IB_QPST_SQD2RTS,
+	IB_QPST_SQE2RTS,
+	IB_QPST_SQD2SQD,
+	IB_QPST_MAX	/* nr of transitions, this must be last!!! */
+};
+
+/*
+ * ib2ehca_qp_state maps IB to ehca qp_state
+ * returns ehca qp state corresponding to given ib qp state
+ */
+static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state)
+{
+	switch (ib_qp_state) {
+	case IB_QPS_RESET:
+		return EHCA_QPS_RESET;
+	case IB_QPS_INIT:
+		return EHCA_QPS_INIT;
+	case IB_QPS_RTR:
+		return EHCA_QPS_RTR;
+	case IB_QPS_RTS:
+		return EHCA_QPS_RTS;
+	case IB_QPS_SQD:
+		return EHCA_QPS_SQD;
+	case IB_QPS_SQE:
+		return EHCA_QPS_SQE;
+	case IB_QPS_ERR:
+		return EHCA_QPS_ERR;
+	default:
+		ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state);
+		return -EINVAL;
+	}
+}
+
+/*
+ * ehca2ib_qp_state maps ehca to IB qp_state
+ * returns ib qp state corresponding to given ehca qp state
+ */
+static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state
+						ehca_qp_state)
+{
+	switch (ehca_qp_state) {
+	case EHCA_QPS_RESET:
+		return IB_QPS_RESET;
+	case EHCA_QPS_INIT:
+		return IB_QPS_INIT;
+	case EHCA_QPS_RTR:
+		return IB_QPS_RTR;
+	case EHCA_QPS_RTS:
+		return IB_QPS_RTS;
+	case EHCA_QPS_SQD:
+		return IB_QPS_SQD;
+	case EHCA_QPS_SQE:
+		return IB_QPS_SQE;
+	case EHCA_QPS_ERR:
+		return IB_QPS_ERR;
+	default:
+		ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state);
+		return -EINVAL;
+	}
+}
+
+/*
+ * ehca_qp_type used as index for req_attr and opt_attr of
+ * struct ehca_modqp_statetrans
+ */
+enum ehca_qp_type {
+	QPT_RC = 0,
+	QPT_UC = 1,
+	QPT_UD = 2,
+	QPT_SQP = 3,
+	QPT_MAX
+};
+
+/*
+ * ib2ehcaqptype maps Ib to ehca qp_type
+ * returns ehca qp type corresponding to ib qp type
+ */
+static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype)
+{
+	switch (ibqptype) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		return QPT_SQP;
+	case IB_QPT_RC:
+		return QPT_RC;
+	case IB_QPT_UC:
+		return QPT_UC;
+	case IB_QPT_UD:
+		return QPT_UD;
+	default:
+		ehca_gen_err("Invalid ibqptype=%x", ibqptype);
+		return -EINVAL;
+	}
+}
+
+static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
+							 int ib_tostate)
+{
+	int index = -EINVAL;
+	switch (ib_tostate) {
+	case IB_QPS_RESET:
+		index = IB_QPST_ANY2RESET;
+		break;
+	case IB_QPS_INIT:
+		switch (ib_fromstate) {
+		case IB_QPS_RESET:
+			index = IB_QPST_RESET2INIT;
+			break;
+		case IB_QPS_INIT:
+			index = IB_QPST_INIT2INIT;
+			break;
+		}
+		break;
+	case IB_QPS_RTR:
+		if (ib_fromstate == IB_QPS_INIT)
+			index = IB_QPST_INIT2RTR;
+		break;
+	case IB_QPS_RTS:
+		switch (ib_fromstate) {
+		case IB_QPS_RTR:
+			index = IB_QPST_RTR2RTS;
+			break;
+		case IB_QPS_RTS:
+			index = IB_QPST_RTS2RTS;
+			break;
+		case IB_QPS_SQD:
+			index = IB_QPST_SQD2RTS;
+			break;
+		case IB_QPS_SQE:
+			index = IB_QPST_SQE2RTS;
+			break;
+		}
+		break;
+	case IB_QPS_SQD:
+		if (ib_fromstate == IB_QPS_RTS)
+			index = IB_QPST_RTS2SQD;
+		break;
+	case IB_QPS_SQE:
+		break;
+	case IB_QPS_ERR:
+		index = IB_QPST_ANY2ERR;
+		break;
+	default:
+		break;
+	}
+	return index;
+}
+
+enum ehca_service_type {
+	ST_RC = 0,
+	ST_UC = 1,
+	ST_RD = 2,
+	ST_UD = 3
+};
+
+/*
+ * ibqptype2servicetype returns hcp service type corresponding to given
+ * ib qp type used by create_qp()
+ */
+static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
+{
+	switch (ibqptype) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		return ST_UD;
+	case IB_QPT_RC:
+		return ST_RC;
+	case IB_QPT_UC:
+		return ST_UC;
+	case IB_QPT_UD:
+		return ST_UD;
+	case IB_QPT_RAW_IPV6:
+		return -EINVAL;
+	case IB_QPT_RAW_ETY:
+		return -EINVAL;
+	default:
+		ehca_gen_err("Invalid ibqptype=%x", ibqptype);
+		return -EINVAL;
+	}
+}
+
+/*
+ * init_qp_queues initializes/constructs r/squeue and registers queue pages.
+ */
+static inline int init_qp_queues(struct ehca_shca *shca,
+				 struct ehca_qp *my_qp,
+				 int nr_sq_pages,
+				 int nr_rq_pages,
+				 int swqe_size,
+				 int rwqe_size,
+				 int nr_send_sges, int nr_receive_sges)
+{
+	int ret, cnt, ipz_rc;
+	void *vpage;
+	u64 rpage, h_ret;
+	struct ib_device *ib_dev = &shca->ib_device;
+	struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
+
+	ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue,
+				nr_sq_pages,
+				EHCA_PAGESIZE, swqe_size, nr_send_sges);
+	if (!ipz_rc) {
+		ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x",
+			 ipz_rc);
+		return -EBUSY;
+	}
+
+	ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue,
+				nr_rq_pages,
+				EHCA_PAGESIZE, rwqe_size, nr_receive_sges);
+	if (!ipz_rc) {
+		ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x",
+			 ipz_rc);
+		ret = -EBUSY;
+		goto init_qp_queues0;
+	}
+	/* register SQ pages */
+	for (cnt = 0; cnt < nr_sq_pages; cnt++) {
+		vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue);
+		if (!vpage) {
+			ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() "
+				 "failed p_vpage= %p", vpage);
+			ret = -EINVAL;
+			goto init_qp_queues1;
+		}
+		rpage = virt_to_abs(vpage);
+
+		h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
+						 my_qp->ipz_qp_handle,
+						 &my_qp->pf, 0, 0,
+						 rpage, 1,
+						 my_qp->galpas.kernel);
+		if (h_ret < H_SUCCESS) {
+			ehca_err(ib_dev, "SQ hipz_qp_register_rpage()"
+				 " failed rc=%lx", h_ret);
+			ret = ehca2ib_return_code(h_ret);
+			goto init_qp_queues1;
+		}
+	}
+
+	ipz_qeit_reset(&my_qp->ipz_squeue);
+
+	/* register RQ pages */
+	for (cnt = 0; cnt < nr_rq_pages; cnt++) {
+		vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
+		if (!vpage) {
+			ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() "
+				 "failed p_vpage = %p", vpage);
+			ret = -EINVAL;
+			goto init_qp_queues1;
+		}
+
+		rpage = virt_to_abs(vpage);
+
+		h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
+						 my_qp->ipz_qp_handle,
+						 &my_qp->pf, 0, 1,
+						 rpage, 1,my_qp->galpas.kernel);
+		if (h_ret < H_SUCCESS) {
+			ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed "
+				 "rc=%lx", h_ret);
+			ret = ehca2ib_return_code(h_ret);
+			goto init_qp_queues1;
+		}
+		if (cnt == (nr_rq_pages - 1)) {	/* last page! */
+			if (h_ret != H_SUCCESS) {
+				ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+					 "h_ret= %lx ", h_ret);
+				ret = ehca2ib_return_code(h_ret);
+				goto init_qp_queues1;
+			}
+			vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
+			if (vpage) {
+				ehca_err(ib_dev, "ipz_qpageit_get_inc() "
+					 "should not succeed vpage=%p", vpage);
+				ret = -EINVAL;
+				goto init_qp_queues1;
+			}
+		} else {
+			if (h_ret != H_PAGE_REGISTERED) {
+				ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+					 "h_ret= %lx ", h_ret);
+				ret = ehca2ib_return_code(h_ret);
+				goto init_qp_queues1;
+			}
+		}
+	}
+
+	ipz_qeit_reset(&my_qp->ipz_rqueue);
+
+	return 0;
+
+init_qp_queues1:
+	ipz_queue_dtor(&my_qp->ipz_rqueue);
+init_qp_queues0:
+	ipz_queue_dtor(&my_qp->ipz_squeue);
+	return ret;
+}
+
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+			     struct ib_qp_init_attr *init_attr,
+			     struct ib_udata *udata)
+{
+	static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 };
+	static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 };
+	struct ehca_qp *my_qp;
+	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+					      ib_device);
+	struct ib_ucontext *context = NULL;
+	u64 h_ret;
+	int max_send_sge, max_recv_sge, ret;
+
+	/* h_call's out parameters */
+	struct ehca_alloc_qp_parms parms;
+	u32 swqe_size = 0, rwqe_size = 0;
+	u8 daqp_completion, isdaqp;
+	unsigned long flags;
+
+	if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
+		init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
+		ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
+			 init_attr->sq_sig_type);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* save daqp completion bits */
+	daqp_completion = init_attr->qp_type & 0x60;
+	/* save daqp bit */
+	isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0;
+	init_attr->qp_type = init_attr->qp_type & 0x1F;
+
+	if (init_attr->qp_type != IB_QPT_UD &&
+	    init_attr->qp_type != IB_QPT_SMI &&
+	    init_attr->qp_type != IB_QPT_GSI &&
+	    init_attr->qp_type != IB_QPT_UC &&
+	    init_attr->qp_type != IB_QPT_RC) {
+		ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+	}
+	if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD)
+	    && isdaqp) {
+		ehca_err(pd->device, "unsupported LL QP Type=%x",
+			 init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+	} else if (init_attr->qp_type == IB_QPT_RC && isdaqp &&
+		   (init_attr->cap.max_send_wr > 255 ||
+		    init_attr->cap.max_recv_wr > 255 )) {
+		       ehca_err(pd->device, "Invalid Number of max_sq_wr =%x "
+				"or max_rq_wr=%x for QP Type=%x",
+				init_attr->cap.max_send_wr,
+				init_attr->cap.max_recv_wr,init_attr->qp_type);
+		       return ERR_PTR(-EINVAL);
+	} else if (init_attr->qp_type == IB_QPT_UD && isdaqp &&
+		  init_attr->cap.max_send_wr > 255) {
+		ehca_err(pd->device,
+			 "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x",
+			 init_attr->cap.max_send_wr, init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (pd->uobject && udata)
+		context = pd->uobject->context;
+
+	my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL);
+	if (!my_qp) {
+		ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(my_qp, 0, sizeof(struct ehca_qp));
+	memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
+	spin_lock_init(&my_qp->spinlock_s);
+	spin_lock_init(&my_qp->spinlock_r);
+
+	my_qp->recv_cq =
+		container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
+	my_qp->send_cq =
+		container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
+
+	my_qp->init_attr = *init_attr;
+
+	do {
+		if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
+			ret = -ENOMEM;
+			ehca_err(pd->device, "Can't reserve idr resources.");
+			goto create_qp_exit0;
+		}
+
+		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
+		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+	} while (ret == -EAGAIN);
+
+	if (ret) {
+		ret = -ENOMEM;
+		ehca_err(pd->device, "Can't allocate new idr entry.");
+		goto create_qp_exit0;
+	}
+
+	parms.servicetype = ibqptype2servicetype(init_attr->qp_type);
+	if (parms.servicetype < 0) {
+		ret = -EINVAL;
+		ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type);
+		goto create_qp_exit0;
+	}
+
+	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+		parms.sigtype = HCALL_SIGT_EVERY;
+	else
+		parms.sigtype = HCALL_SIGT_BY_WQE;
+
+	/* UD_AV CIRCUMVENTION */
+	max_send_sge = init_attr->cap.max_send_sge;
+	max_recv_sge = init_attr->cap.max_recv_sge;
+	if (IB_QPT_UD == init_attr->qp_type ||
+	    IB_QPT_GSI == init_attr->qp_type ||
+	    IB_QPT_SMI == init_attr->qp_type) {
+		max_send_sge += 2;
+		max_recv_sge += 2;
+	}
+
+	parms.ipz_eq_handle = shca->eq.ipz_eq_handle;
+	parms.daqp_ctrl = isdaqp | daqp_completion;
+	parms.pd = my_pd->fw_pd;
+	parms.max_recv_sge = max_recv_sge;
+	parms.max_send_sge = max_send_sge;
+
+	h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms);
+
+	if (h_ret != H_SUCCESS) {
+		ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
+			 h_ret);
+		ret = ehca2ib_return_code(h_ret);
+		goto create_qp_exit1;
+	}
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_RC:
+	        if (isdaqp == 0) {
+			swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
+					     (parms.act_nr_send_sges)]);
+			rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
+					     (parms.act_nr_recv_sges)]);
+		} else { /* for daqp we need to use msg size, not wqe size */
+		        swqe_size = da_rc_msg_size[max_send_sge];
+			rwqe_size = da_rc_msg_size[max_recv_sge];
+			parms.act_nr_send_sges = 1;
+			parms.act_nr_recv_sges = 1;
+		}
+		break;
+	case IB_QPT_UC:
+		swqe_size = offsetof(struct ehca_wqe,
+				     u.nud.sg_list[parms.act_nr_send_sges]);
+		rwqe_size = offsetof(struct ehca_wqe,
+				     u.nud.sg_list[parms.act_nr_recv_sges]);
+		break;
+
+	case IB_QPT_UD:
+	case IB_QPT_GSI:
+	case IB_QPT_SMI:
+		/* UD circumvention */
+		parms.act_nr_recv_sges -= 2;
+		parms.act_nr_send_sges -= 2;
+		if (isdaqp) {
+		        swqe_size = da_ud_sq_msg_size[max_send_sge];
+			rwqe_size = da_rc_msg_size[max_recv_sge];
+			parms.act_nr_send_sges = 1;
+			parms.act_nr_recv_sges = 1;
+		} else {
+			swqe_size = offsetof(struct ehca_wqe,
+					     u.ud_av.sg_list[parms.act_nr_send_sges]);
+			rwqe_size = offsetof(struct ehca_wqe,
+					     u.ud_av.sg_list[parms.act_nr_recv_sges]);
+		}
+
+		if (IB_QPT_GSI == init_attr->qp_type ||
+		    IB_QPT_SMI == init_attr->qp_type) {
+			parms.act_nr_send_wqes = init_attr->cap.max_send_wr;
+			parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
+			parms.act_nr_send_sges = init_attr->cap.max_send_sge;
+			parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
+			my_qp->real_qp_num =
+				(init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
+		}
+
+		break;
+
+	default:
+		break;
+	}
+
+	/* initializes r/squeue and registers queue pages */
+	ret = init_qp_queues(shca, my_qp,
+			     parms.nr_sq_pages, parms.nr_rq_pages,
+			     swqe_size, rwqe_size,
+			     parms.act_nr_send_sges, parms.act_nr_recv_sges);
+	if (ret) {
+		ehca_err(pd->device,
+			 "Couldn't initialize r/squeue and pages ret=%x", ret);
+		goto create_qp_exit2;
+	}
+
+	my_qp->ib_qp.pd = &my_pd->ib_pd;
+	my_qp->ib_qp.device = my_pd->ib_pd.device;
+
+	my_qp->ib_qp.recv_cq = init_attr->recv_cq;
+	my_qp->ib_qp.send_cq = init_attr->send_cq;
+
+	my_qp->ib_qp.qp_num = my_qp->real_qp_num;
+	my_qp->ib_qp.qp_type = init_attr->qp_type;
+
+	my_qp->qp_type = init_attr->qp_type;
+	my_qp->ib_qp.srq = init_attr->srq;
+
+	my_qp->ib_qp.qp_context = init_attr->qp_context;
+	my_qp->ib_qp.event_handler = init_attr->event_handler;
+
+	init_attr->cap.max_inline_data = 0; /* not supported yet */
+	init_attr->cap.max_recv_sge = parms.act_nr_recv_sges;
+	init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes;
+	init_attr->cap.max_send_sge = parms.act_nr_send_sges;
+	init_attr->cap.max_send_wr = parms.act_nr_send_wqes;
+
+	/* NOTE: define_apq0() not supported yet */
+	if (init_attr->qp_type == IB_QPT_GSI) {
+		h_ret = ehca_define_sqp(shca, my_qp, init_attr);
+		if (h_ret != H_SUCCESS) {
+			ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
+				 h_ret);
+			ret = ehca2ib_return_code(h_ret);
+			goto create_qp_exit3;
+		}
+	}
+	if (init_attr->send_cq) {
+		struct ehca_cq *cq = container_of(init_attr->send_cq,
+						  struct ehca_cq, ib_cq);
+		ret = ehca_cq_assign_qp(cq, my_qp);
+		if (ret) {
+			ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x",
+				 ret);
+			goto create_qp_exit3;
+		}
+		my_qp->send_cq = cq;
+	}
+	/* copy queues, galpa data to user space */
+	if (context && udata) {
+		struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
+		struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
+		struct ehca_create_qp_resp resp;
+		struct vm_area_struct * vma;
+		memset(&resp, 0, sizeof(resp));
+
+		resp.qp_num = my_qp->real_qp_num;
+		resp.token = my_qp->token;
+		resp.qp_type = my_qp->qp_type;
+		resp.qkey = my_qp->qkey;
+		resp.real_qp_num = my_qp->real_qp_num;
+		/* rqueue properties */
+		resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size;
+		resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg;
+		resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
+		resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
+		resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
+		ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000,
+				       ipz_rqueue->queue_length,
+				       (void**)&resp.ipz_rqueue.queue,
+				       &vma);
+		if (ret) {
+			ehca_err(pd->device, "Could not mmap rqueue pages");
+			goto create_qp_exit3;
+		}
+		my_qp->uspace_rqueue = resp.ipz_rqueue.queue;
+		/* squeue properties */
+		resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
+		resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
+		resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
+		resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
+		resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
+		ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000,
+				       ipz_squeue->queue_length,
+				       (void**)&resp.ipz_squeue.queue,
+				       &vma);
+		if (ret) {
+			ehca_err(pd->device, "Could not mmap squeue pages");
+			goto create_qp_exit4;
+		}
+		my_qp->uspace_squeue = resp.ipz_squeue.queue;
+		/* fw_handle */
+		resp.galpas = my_qp->galpas;
+		ret = ehca_mmap_register(my_qp->galpas.user.fw_handle,
+					 (void**)&resp.galpas.kernel.fw_handle,
+					 &vma);
+		if (ret) {
+			ehca_err(pd->device, "Could not mmap fw_handle");
+			goto create_qp_exit5;
+		}
+		my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
+
+		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+			ehca_err(pd->device, "Copy to udata failed");
+			ret = -EINVAL;
+			goto create_qp_exit6;
+		}
+	}
+
+	return &my_qp->ib_qp;
+
+create_qp_exit6:
+	ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
+
+create_qp_exit5:
+	ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length);
+
+create_qp_exit4:
+	ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length);
+
+create_qp_exit3:
+	ipz_queue_dtor(&my_qp->ipz_rqueue);
+	ipz_queue_dtor(&my_qp->ipz_squeue);
+
+create_qp_exit2:
+	hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
+
+create_qp_exit1:
+	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	idr_remove(&ehca_qp_idr, my_qp->token);
+	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+create_qp_exit0:
+	kmem_cache_free(qp_cache, my_qp);
+	return ERR_PTR(ret);
+}
+
+/*
+ * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
+ * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
+ * returns total number of bad wqes in bad_wqe_cnt
+ */
+static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
+			   int *bad_wqe_cnt)
+{
+	u64 h_ret;
+	struct ipz_queue *squeue;
+	void *bad_send_wqe_p, *bad_send_wqe_v;
+	void *squeue_start_p, *squeue_end_p;
+	void *squeue_start_v, *squeue_end_v;
+	struct ehca_wqe *wqe;
+	int qp_num = my_qp->ib_qp.qp_num;
+
+	/* get send wqe pointer */
+	h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
+					   my_qp->ipz_qp_handle, &my_qp->pf,
+					   &bad_send_wqe_p, NULL, 2);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
+			 " ehca_qp=%p qp_num=%x h_ret=%lx",
+			 my_qp, qp_num, h_ret);
+		return ehca2ib_return_code(h_ret);
+	}
+	bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63)));
+	ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
+		 qp_num, bad_send_wqe_p);
+	/* convert wqe pointer to vadr */
+	bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p);
+	if (ehca_debug_level)
+		ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
+	squeue = &my_qp->ipz_squeue;
+	squeue_start_p = (void*)virt_to_abs(ipz_qeit_calc(squeue, 0L));
+	squeue_end_p = squeue_start_p+squeue->queue_length;
+	squeue_start_v = abs_to_virt((u64)squeue_start_p);
+	squeue_end_v = abs_to_virt((u64)squeue_end_p);
+	ehca_dbg(&shca->ib_device, "qp_num=%x squeue_start_v=%p squeue_end_v=%p",
+		 qp_num, squeue_start_v, squeue_end_v);
+
+	/* loop sets wqe's purge bit */
+	wqe = (struct ehca_wqe*)bad_send_wqe_v;
+	*bad_wqe_cnt = 0;
+	while (wqe->optype != 0xff && wqe->wqef != 0xff) {
+		if (ehca_debug_level)
+			ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
+		wqe->nr_of_data_seg = 0; /* suppress data access */
+		wqe->wqef = WQEF_PURGE; /* WQE to be purged */
+		wqe = (struct ehca_wqe*)((u8*)wqe+squeue->qe_size);
+		*bad_wqe_cnt = (*bad_wqe_cnt)+1;
+		if ((void*)wqe >= squeue_end_v) {
+			wqe = squeue_start_v;
+		}
+	}
+	/*
+	 * bad wqe will be reprocessed and ignored when pol_cq() is called,
+	 *  i.e. nr of wqes with flush error status is one less
+	 */
+	ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x",
+		 qp_num, (*bad_wqe_cnt)-1);
+	wqe->wqef = 0;
+
+	return 0;
+}
+
+/*
+ * internal_modify_qp with circumvention to handle aqp0 properly
+ * smi_reset2init indicates if this is an internal reset-to-init-call for
+ * smi. This flag must always be zero if called from ehca_modify_qp()!
+ * This internal func was intorduced to avoid recursion of ehca_modify_qp()!
+ */
+static int internal_modify_qp(struct ib_qp *ibqp,
+			      struct ib_qp_attr *attr,
+			      int attr_mask, int smi_reset2init)
+{
+	enum ib_qp_state qp_cur_state, qp_new_state;
+	int cnt, qp_attr_idx, ret = 0;
+	enum ib_qp_statetrans statetrans;
+	struct hcp_modify_qp_control_block *mqpcb;
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca =
+		container_of(ibqp->pd->device, struct ehca_shca, ib_device);
+	u64 update_mask;
+	u64 h_ret;
+	int bad_wqe_cnt = 0;
+	int squeue_locked = 0;
+	unsigned long spl_flags = 0;
+
+	/* do query_qp to obtain current attr values */
+	mqpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (mqpcb == NULL) {
+		ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
+			 "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
+		return -ENOMEM;
+	}
+
+	h_ret = hipz_h_query_qp(shca->ipz_hca_handle,
+				my_qp->ipz_qp_handle,
+				&my_qp->pf,
+				mqpcb, my_qp->galpas.kernel);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(ibqp->device, "hipz_h_query_qp() failed "
+			 "ehca_qp=%p qp_num=%x h_ret=%lx",
+			 my_qp, ibqp->qp_num, h_ret);
+		ret = ehca2ib_return_code(h_ret);
+		goto modify_qp_exit1;
+	}
+
+	qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
+
+	if (qp_cur_state == -EINVAL) {	/* invalid qp state */
+		ret = -EINVAL;
+		ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x "
+			 "ehca_qp=%p qp_num=%x",
+			 mqpcb->qp_state, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+	/*
+	 * circumvention to set aqp0 initial state to init
+	 * as expected by IB spec
+	 */
+	if (smi_reset2init == 0 &&
+	    ibqp->qp_type == IB_QPT_SMI &&
+	    qp_cur_state == IB_QPS_RESET &&
+	    (attr_mask & IB_QP_STATE) &&
+	    attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */
+		struct ib_qp_attr smiqp_attr = {
+			.qp_state = IB_QPS_INIT,
+			.port_num = my_qp->init_attr.port_num,
+			.pkey_index = 0,
+			.qkey = 0
+		};
+		int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT |
+			IB_QP_PKEY_INDEX | IB_QP_QKEY;
+		int smirc = internal_modify_qp(
+			ibqp, &smiqp_attr, smiqp_attr_mask, 1);
+		if (smirc) {
+			ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
+				 "ehca_modify_qp() rc=%x", smirc);
+			ret = H_PARAMETER;
+			goto modify_qp_exit1;
+		}
+		qp_cur_state = IB_QPS_INIT;
+		ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded");
+	}
+	/* is transmitted current state  equal to "real" current state */
+	if ((attr_mask & IB_QP_CUR_STATE) &&
+	    qp_cur_state != attr->cur_qp_state) {
+		ret = -EINVAL;
+		ehca_err(ibqp->device,
+			 "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>"
+			 " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x",
+			 attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x "
+		 "new qp_state=%x attribute_mask=%x",
+		 my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
+
+	qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
+	if (!smi_reset2init &&
+	    !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
+				attr_mask)) {
+		ret = -EINVAL;
+		ehca_err(ibqp->device,
+			 "Invalid qp transition new_state=%x cur_state=%x "
+			 "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state,
+			 qp_cur_state, my_qp, ibqp->qp_num, attr_mask);
+		goto modify_qp_exit1;
+	}
+
+	if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state)))
+		update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+	else {
+		ret = -EINVAL;
+		ehca_err(ibqp->device, "Invalid new qp state=%x "
+			 "ehca_qp=%p qp_num=%x",
+			 qp_new_state, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	/* retrieve state transition struct to get req and opt attrs */
+	statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state);
+	if (statetrans < 0) {
+		ret = -EINVAL;
+		ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x "
+			 "new_qp_state=%x State_xsition=%x ehca_qp=%p "
+			 "qp_num=%x", qp_cur_state, qp_new_state,
+			 statetrans, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	qp_attr_idx = ib2ehcaqptype(ibqp->qp_type);
+
+	if (qp_attr_idx < 0) {
+		ret = qp_attr_idx;
+		ehca_err(ibqp->device,
+			 "Invalid QP type=%x ehca_qp=%p qp_num=%x",
+			 ibqp->qp_type, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	ehca_dbg(ibqp->device,
+		 "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
+		 my_qp, ibqp->qp_num, statetrans);
+
+	/* sqe -> rts: set purge bit of bad wqe before actual trans */
+	if ((my_qp->qp_type == IB_QPT_UD ||
+	     my_qp->qp_type == IB_QPT_GSI ||
+	     my_qp->qp_type == IB_QPT_SMI) &&
+	    statetrans == IB_QPST_SQE2RTS) {
+		/* mark next free wqe if kernel */
+		if (my_qp->uspace_squeue == 0) {
+			struct ehca_wqe *wqe;
+			/* lock send queue */
+			spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+			squeue_locked = 1;
+			/* mark next free wqe */
+			wqe = (struct ehca_wqe*)
+				ipz_qeit_get(&my_qp->ipz_squeue);
+			wqe->optype = wqe->wqef = 0xff;
+			ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
+				 ibqp->qp_num, wqe);
+		}
+		ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
+		if (ret) {
+			ehca_err(ibqp->device, "prepare_sqe_rts() failed "
+				 "ehca_qp=%p qp_num=%x ret=%x",
+				 my_qp, ibqp->qp_num, ret);
+			goto modify_qp_exit2;
+		}
+	}
+
+	/*
+	 * enable RDMA_Atomic_Control if reset->init und reliable con
+	 * this is necessary since gen2 does not provide that flag,
+	 * but pHyp requires it
+	 */
+	if (statetrans == IB_QPST_RESET2INIT &&
+	    (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) {
+		mqpcb->rdma_atomic_ctrl = 3;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1);
+	}
+	/* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */
+	if (statetrans == IB_QPST_INIT2RTR &&
+	    (ibqp->qp_type == IB_QPT_UC) &&
+	    !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) {
+		mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
+	}
+
+	if (attr_mask & IB_QP_PKEY_INDEX) {
+		mqpcb->prim_p_key_idx = attr->pkey_index;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
+	}
+	if (attr_mask & IB_QP_PORT) {
+		if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
+			ret = -EINVAL;
+			ehca_err(ibqp->device, "Invalid port=%x. "
+				 "ehca_qp=%p qp_num=%x num_ports=%x",
+				 attr->port_num, my_qp, ibqp->qp_num,
+				 shca->num_ports);
+			goto modify_qp_exit2;
+		}
+		mqpcb->prim_phys_port = attr->port_num;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
+	}
+	if (attr_mask & IB_QP_QKEY) {
+		mqpcb->qkey = attr->qkey;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
+	}
+	if (attr_mask & IB_QP_AV) {
+		int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate);
+		int ehca_mult = ib_rate_to_mult(shca->sport[my_qp->
+						init_attr.port_num].rate);
+
+		mqpcb->dlid = attr->ah_attr.dlid;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
+		mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1);
+		mqpcb->service_level = attr->ah_attr.sl;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
+
+		if (ah_mult < ehca_mult)
+			mqpcb->max_static_rate = (ah_mult > 0) ?
+			((ehca_mult - 1) / ah_mult) : 0;
+		else
+			mqpcb->max_static_rate = 0;
+
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
+
+		/*
+		 * only if GRH is TRUE we might consider SOURCE_GID_IDX
+		 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
+		 */
+		if (attr->ah_attr.ah_flags == IB_AH_GRH) {
+			mqpcb->send_grh_flag = 1 << 31;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+			mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
+
+			for (cnt = 0; cnt < 16; cnt++)
+				mqpcb->dest_gid.byte[cnt] =
+					attr->ah_attr.grh.dgid.raw[cnt];
+
+			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1);
+			mqpcb->flow_label = attr->ah_attr.grh.flow_label;
+			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1);
+			mqpcb->hop_limit = attr->ah_attr.grh.hop_limit;
+			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1);
+			mqpcb->traffic_class = attr->ah_attr.grh.traffic_class;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1);
+		}
+	}
+
+	if (attr_mask & IB_QP_PATH_MTU) {
+		mqpcb->path_mtu = attr->path_mtu;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
+	}
+	if (attr_mask & IB_QP_TIMEOUT) {
+		mqpcb->timeout = attr->timeout;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1);
+	}
+	if (attr_mask & IB_QP_RETRY_CNT) {
+		mqpcb->retry_count = attr->retry_cnt;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1);
+	}
+	if (attr_mask & IB_QP_RNR_RETRY) {
+		mqpcb->rnr_retry_count = attr->rnr_retry;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1);
+	}
+	if (attr_mask & IB_QP_RQ_PSN) {
+		mqpcb->receive_psn = attr->rq_psn;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1);
+	}
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+		mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ?
+			attr->max_dest_rd_atomic : 2;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
+	}
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+		mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ?
+			attr->max_rd_atomic : 2;
+		update_mask |=
+			EHCA_BMASK_SET
+			(MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
+	}
+	if (attr_mask & IB_QP_ALT_PATH) {
+		int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate);
+		int ehca_mult = ib_rate_to_mult(
+			shca->sport[my_qp->init_attr.port_num].rate);
+
+		mqpcb->dlid_al = attr->alt_ah_attr.dlid;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1);
+		mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1);
+		mqpcb->service_level_al = attr->alt_ah_attr.sl;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1);
+
+		if (ah_mult < ehca_mult)
+			mqpcb->max_static_rate = (ah_mult > 0) ?
+			((ehca_mult - 1) / ah_mult) : 0;
+		else
+			mqpcb->max_static_rate_al = 0;
+
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1);
+
+		/*
+		 * only if GRH is TRUE we might consider SOURCE_GID_IDX
+		 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
+		 */
+		if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
+			mqpcb->send_grh_flag_al = 1 << 31;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
+			mqpcb->source_gid_idx_al =
+				attr->alt_ah_attr.grh.sgid_index;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1);
+
+			for (cnt = 0; cnt < 16; cnt++)
+				mqpcb->dest_gid_al.byte[cnt] =
+					attr->alt_ah_attr.grh.dgid.raw[cnt];
+
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1);
+			mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1);
+			mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1);
+			mqpcb->traffic_class_al =
+				attr->alt_ah_attr.grh.traffic_class;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
+		}
+	}
+
+	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+		mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1);
+	}
+
+	if (attr_mask & IB_QP_SQ_PSN) {
+		mqpcb->send_psn = attr->sq_psn;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1);
+	}
+
+	if (attr_mask & IB_QP_DEST_QPN) {
+		mqpcb->dest_qp_nr = attr->dest_qp_num;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1);
+	}
+
+	if (attr_mask & IB_QP_PATH_MIG_STATE) {
+		mqpcb->path_migration_state = attr->path_mig_state;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
+	}
+
+	if (attr_mask & IB_QP_CAP) {
+		mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1);
+		mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1);
+		/* no support for max_send/recv_sge yet */
+	}
+
+	if (ehca_debug_level)
+		ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
+
+	h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
+				 my_qp->ipz_qp_handle,
+				 &my_qp->pf,
+				 update_mask,
+				 mqpcb, my_qp->galpas.kernel);
+
+	if (h_ret != H_SUCCESS) {
+		ret = ehca2ib_return_code(h_ret);
+		ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx "
+			 "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num);
+		goto modify_qp_exit2;
+	}
+
+	if ((my_qp->qp_type == IB_QPT_UD ||
+	     my_qp->qp_type == IB_QPT_GSI ||
+	     my_qp->qp_type == IB_QPT_SMI) &&
+	    statetrans == IB_QPST_SQE2RTS) {
+		/* doorbell to reprocessing wqes */
+		iosync(); /* serialize GAL register access */
+		hipz_update_sqa(my_qp, bad_wqe_cnt-1);
+		ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt);
+	}
+
+	if (statetrans == IB_QPST_RESET2INIT ||
+	    statetrans == IB_QPST_INIT2INIT) {
+		mqpcb->qp_enable = 1;
+		mqpcb->qp_state = EHCA_QPS_INIT;
+		update_mask = 0;
+		update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
+
+		h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
+					 my_qp->ipz_qp_handle,
+					 &my_qp->pf,
+					 update_mask,
+					 mqpcb,
+					 my_qp->galpas.kernel);
+
+		if (h_ret != H_SUCCESS) {
+			ret = ehca2ib_return_code(h_ret);
+			ehca_err(ibqp->device, "ENABLE in context of "
+				 "RESET_2_INIT failed! Maybe you didn't get "
+				 "a LID h_ret=%lx ehca_qp=%p qp_num=%x",
+				 h_ret, my_qp, ibqp->qp_num);
+			goto modify_qp_exit2;
+		}
+	}
+
+	if (statetrans == IB_QPST_ANY2RESET) {
+		ipz_qeit_reset(&my_qp->ipz_rqueue);
+		ipz_qeit_reset(&my_qp->ipz_squeue);
+	}
+
+	if (attr_mask & IB_QP_QKEY)
+		my_qp->qkey = attr->qkey;
+
+modify_qp_exit2:
+	if (squeue_locked) { /* this means: sqe -> rts */
+		spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+		my_qp->sqerr_purgeflag = 1;
+	}
+
+modify_qp_exit1:
+	kfree(mqpcb);
+
+	return ret;
+}
+
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+					     ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ibqp->pd->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	return internal_modify_qp(ibqp, attr, attr_mask, 0);
+}
+
+int ehca_query_qp(struct ib_qp *qp,
+		  struct ib_qp_attr *qp_attr,
+		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+					     ib_pd);
+	struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
+					      ib_device);
+	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+	struct hcp_modify_qp_control_block *qpcb;
+	u32 cur_pid = current->tgid;
+	int cnt, ret = 0;
+	u64 h_ret;
+
+	if (my_pd->ib_pd.uobject  && my_pd->ib_pd.uobject->context  &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(qp->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
+		ehca_err(qp->device,"Invalid attribute mask "
+			 "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
+			 my_qp, qp->qp_num, qp_attr_mask);
+		return -EINVAL;
+	}
+
+	qpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL );
+	if (!qpcb) {
+		ehca_err(qp->device,"Out of memory for qpcb "
+			 "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
+		return -ENOMEM;
+	}
+
+	h_ret = hipz_h_query_qp(adapter_handle,
+				my_qp->ipz_qp_handle,
+				&my_qp->pf,
+				qpcb, my_qp->galpas.kernel);
+
+	if (h_ret != H_SUCCESS) {
+		ret = ehca2ib_return_code(h_ret);
+		ehca_err(qp->device,"hipz_h_query_qp() failed "
+			 "ehca_qp=%p qp_num=%x h_ret=%lx",
+			 my_qp, qp->qp_num, h_ret);
+		goto query_qp_exit1;
+	}
+
+	qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state);
+	qp_attr->qp_state = qp_attr->cur_qp_state;
+
+	if (qp_attr->cur_qp_state == -EINVAL) {
+		ret = -EINVAL;
+		ehca_err(qp->device,"Got invalid ehca_qp_state=%x "
+			 "ehca_qp=%p qp_num=%x",
+			 qpcb->qp_state, my_qp, qp->qp_num);
+		goto query_qp_exit1;
+	}
+
+	if (qp_attr->qp_state == IB_QPS_SQD)
+		qp_attr->sq_draining = 1;
+
+	qp_attr->qkey = qpcb->qkey;
+	qp_attr->path_mtu = qpcb->path_mtu;
+	qp_attr->path_mig_state = qpcb->path_migration_state;
+	qp_attr->rq_psn = qpcb->receive_psn;
+	qp_attr->sq_psn = qpcb->send_psn;
+	qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
+	qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1;
+	qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1;
+	/* UD_AV CIRCUMVENTION */
+	if (my_qp->qp_type == IB_QPT_UD) {
+		qp_attr->cap.max_send_sge =
+			qpcb->actual_nr_sges_in_sq_wqe - 2;
+		qp_attr->cap.max_recv_sge =
+			qpcb->actual_nr_sges_in_rq_wqe - 2;
+	} else {
+		qp_attr->cap.max_send_sge =
+			qpcb->actual_nr_sges_in_sq_wqe;
+		qp_attr->cap.max_recv_sge =
+			qpcb->actual_nr_sges_in_rq_wqe;
+	}
+
+	qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
+	qp_attr->dest_qp_num = qpcb->dest_qp_nr;
+
+	qp_attr->pkey_index =
+		EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx);
+
+	qp_attr->port_num =
+		EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port);
+
+	qp_attr->timeout = qpcb->timeout;
+	qp_attr->retry_cnt = qpcb->retry_count;
+	qp_attr->rnr_retry = qpcb->rnr_retry_count;
+
+	qp_attr->alt_pkey_index =
+		EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx);
+
+	qp_attr->alt_port_num = qpcb->alt_phys_port;
+	qp_attr->alt_timeout = qpcb->timeout_al;
+
+	/* primary av */
+	qp_attr->ah_attr.sl = qpcb->service_level;
+
+	if (qpcb->send_grh_flag) {
+		qp_attr->ah_attr.ah_flags = IB_AH_GRH;
+	}
+
+	qp_attr->ah_attr.static_rate = qpcb->max_static_rate;
+	qp_attr->ah_attr.dlid = qpcb->dlid;
+	qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits;
+	qp_attr->ah_attr.port_num = qp_attr->port_num;
+
+	/* primary GRH */
+	qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class;
+	qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit;
+	qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx;
+	qp_attr->ah_attr.grh.flow_label = qpcb->flow_label;
+
+	for (cnt = 0; cnt < 16; cnt++)
+		qp_attr->ah_attr.grh.dgid.raw[cnt] =
+			qpcb->dest_gid.byte[cnt];
+
+	/* alternate AV */
+	qp_attr->alt_ah_attr.sl = qpcb->service_level_al;
+	if (qpcb->send_grh_flag_al) {
+		qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH;
+	}
+
+	qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al;
+	qp_attr->alt_ah_attr.dlid = qpcb->dlid_al;
+	qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al;
+
+	/* alternate GRH */
+	qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al;
+	qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al;
+	qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al;
+	qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al;
+
+	for (cnt = 0; cnt < 16; cnt++)
+		qp_attr->alt_ah_attr.grh.dgid.raw[cnt] =
+			qpcb->dest_gid_al.byte[cnt];
+
+	/* return init attributes given in ehca_create_qp */
+	if (qp_init_attr)
+		*qp_init_attr = my_qp->init_attr;
+
+	if (ehca_debug_level)
+		ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
+
+query_qp_exit1:
+	kfree(qpcb);
+
+	return ret;
+}
+
+int ehca_destroy_qp(struct ib_qp *ibqp)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+					      ib_device);
+	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+					     ib_pd);
+	u32 cur_pid = current->tgid;
+	u32 qp_num = ibqp->qp_num;
+	int ret;
+	u64 h_ret;
+	u8 port_num;
+	enum ib_qp_type	qp_type;
+	unsigned long flags;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	if (my_qp->send_cq) {
+		ret = ehca_cq_unassign_qp(my_qp->send_cq,
+					      my_qp->real_qp_num);
+		if (ret) {
+			ehca_err(ibqp->device, "Couldn't unassign qp from "
+				 "send_cq ret=%x qp_num=%x cq_num=%x", ret,
+				 my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number);
+			return ret;
+		}
+	}
+
+	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	idr_remove(&ehca_qp_idr, my_qp->token);
+	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+	/* un-mmap if vma alloc */
+	if (my_qp->uspace_rqueue) {
+		ret = ehca_munmap(my_qp->uspace_rqueue,
+				  my_qp->ipz_rqueue.queue_length);
+		if (ret)
+			ehca_err(ibqp->device, "Could not munmap rqueue "
+				 "qp_num=%x", qp_num);
+		ret = ehca_munmap(my_qp->uspace_squeue,
+				  my_qp->ipz_squeue.queue_length);
+		if (ret)
+			ehca_err(ibqp->device, "Could not munmap squeue "
+				 "qp_num=%x", qp_num);
+		ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
+		if (ret)
+			ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x",
+				 qp_num);
+	}
+
+	h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx "
+			 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
+		return ehca2ib_return_code(h_ret);
+	}
+
+	port_num = my_qp->init_attr.port_num;
+	qp_type  = my_qp->init_attr.qp_type;
+
+	/* no support for IB_QPT_SMI yet */
+	if (qp_type == IB_QPT_GSI) {
+		struct ib_event event;
+		ehca_info(ibqp->device, "device %s: port %x is inactive.",
+			  shca->ib_device.name, port_num);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ERR;
+		event.element.port_num = port_num;
+		shca->sport[port_num - 1].port_state = IB_PORT_DOWN;
+		ib_dispatch_event(&event);
+	}
+
+	ipz_queue_dtor(&my_qp->ipz_rqueue);
+	ipz_queue_dtor(&my_qp->ipz_squeue);
+	kmem_cache_free(qp_cache, my_qp);
+	return 0;
+}
+
+int ehca_init_qp_cache(void)
+{
+	qp_cache = kmem_cache_create("ehca_cache_qp",
+				     sizeof(struct ehca_qp), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!qp_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_qp_cache(void)
+{
+	if (qp_cache)
+		kmem_cache_destroy(qp_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
new file mode 100644
index 0000000000000..b46bda1bf85d7
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -0,0 +1,653 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  post_send/recv, poll_cq, req_notify
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm-powerpc/system.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
+				  struct ehca_wqe *wqe_p,
+				  struct ib_recv_wr *recv_wr)
+{
+	u8 cnt_ds;
+	if (unlikely((recv_wr->num_sge < 0) ||
+		     (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
+		ehca_gen_err("Invalid number of WQE SGE. "
+			 "num_sqe=%x max_nr_of_sg=%x",
+			 recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
+		return -EINVAL; /* invalid SG list length */
+	}
+
+	/* clear wqe header until sglist */
+	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+	wqe_p->work_request_id = recv_wr->wr_id;
+	wqe_p->nr_of_data_seg = recv_wr->num_sge;
+
+	for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
+		wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
+			recv_wr->sg_list[cnt_ds].addr;
+		wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
+			recv_wr->sg_list[cnt_ds].lkey;
+		wqe_p->u.all_rcv.sg_list[cnt_ds].length =
+			recv_wr->sg_list[cnt_ds].length;
+	}
+
+	if (ehca_debug_level) {
+		ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue);
+		ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
+	}
+
+	return 0;
+}
+
+#if defined(DEBUG_GSI_SEND_WR)
+
+/* need ib_mad struct */
+#include <rdma/ib_mad.h>
+
+static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
+{
+	int idx;
+	int j;
+	while (send_wr) {
+		struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
+		struct ib_sge *sge = send_wr->sg_list;
+		ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
+			     "send_flags=%x opcode=%x",idx, send_wr->wr_id,
+			     send_wr->num_sge, send_wr->send_flags,
+			     send_wr->opcode);
+		if (mad_hdr) {
+			ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x "
+				     "mgmt_class=%x class_version=%x method=%x "
+				     "status=%x class_specific=%x tid=%lx "
+				     "attr_id=%x resv=%x attr_mod=%x",
+				     idx, mad_hdr->base_version,
+				     mad_hdr->mgmt_class,
+				     mad_hdr->class_version, mad_hdr->method,
+				     mad_hdr->status, mad_hdr->class_specific,
+				     mad_hdr->tid, mad_hdr->attr_id,
+				     mad_hdr->resv,
+				     mad_hdr->attr_mod);
+		}
+		for (j = 0; j < send_wr->num_sge; j++) {
+			u8 *data = (u8 *) abs_to_virt(sge->addr);
+			ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
+				     "lkey=%x",
+				     idx, j, data, sge->length, sge->lkey);
+			/* assume length is n*16 */
+			ehca_dmp(data, sge->length, "send_wr#%x sge#%x",
+				 idx, j);
+			sge++;
+		} /* eof for j */
+		idx++;
+		send_wr = send_wr->next;
+	} /* eof while send_wr */
+}
+
+#endif /* DEBUG_GSI_SEND_WR */
+
+static inline int ehca_write_swqe(struct ehca_qp *qp,
+				  struct ehca_wqe *wqe_p,
+				  const struct ib_send_wr *send_wr)
+{
+	u32 idx;
+	u64 dma_length;
+	struct ehca_av *my_av;
+	u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+
+	if (unlikely((send_wr->num_sge < 0) ||
+		     (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
+		ehca_gen_err("Invalid number of WQE SGE. "
+			 "num_sqe=%x max_nr_of_sg=%x",
+			 send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
+		return -EINVAL; /* invalid SG list length */
+	}
+
+	/* clear wqe header until sglist */
+	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+	wqe_p->work_request_id = send_wr->wr_id;
+
+	switch (send_wr->opcode) {
+	case IB_WR_SEND:
+	case IB_WR_SEND_WITH_IMM:
+		wqe_p->optype = WQE_OPTYPE_SEND;
+		break;
+	case IB_WR_RDMA_WRITE:
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
+		break;
+	case IB_WR_RDMA_READ:
+		wqe_p->optype = WQE_OPTYPE_RDMAREAD;
+		break;
+	default:
+		ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
+		return -EINVAL; /* invalid opcode */
+	}
+
+	wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
+
+	wqe_p->wr_flag = 0;
+
+	if (send_wr->send_flags & IB_SEND_SIGNALED)
+		wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+
+	if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
+	    send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+		/* this might not work as long as HW does not support it */
+		wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
+		wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
+	}
+
+	wqe_p->nr_of_data_seg = send_wr->num_sge;
+
+	switch (qp->qp_type) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		/* no break is intential here */
+	case IB_QPT_UD:
+		/* IB 1.2 spec C10-15 compliance */
+		if (send_wr->wr.ud.remote_qkey & 0x80000000)
+			remote_qkey = qp->qkey;
+
+		wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
+		wqe_p->local_ee_context_qkey = remote_qkey;
+		if (!send_wr->wr.ud.ah) {
+			ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
+			return -EINVAL;
+		}
+		my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
+		wqe_p->u.ud_av.ud_av = my_av->av;
+
+		/*
+		 * omitted check of IB_SEND_INLINE
+		 * since HW does not support it
+		 */
+		for (idx = 0; idx < send_wr->num_sge; idx++) {
+			wqe_p->u.ud_av.sg_list[idx].vaddr =
+				send_wr->sg_list[idx].addr;
+			wqe_p->u.ud_av.sg_list[idx].lkey =
+				send_wr->sg_list[idx].lkey;
+			wqe_p->u.ud_av.sg_list[idx].length =
+				send_wr->sg_list[idx].length;
+		} /* eof for idx */
+		if (qp->qp_type == IB_QPT_SMI ||
+		    qp->qp_type == IB_QPT_GSI)
+			wqe_p->u.ud_av.ud_av.pmtu = 1;
+		if (qp->qp_type == IB_QPT_GSI) {
+			wqe_p->pkeyi = send_wr->wr.ud.pkey_index;
+#ifdef DEBUG_GSI_SEND_WR
+			trace_send_wr_ud(send_wr);
+#endif /* DEBUG_GSI_SEND_WR */
+		}
+		break;
+
+	case IB_QPT_UC:
+		if (send_wr->send_flags & IB_SEND_FENCE)
+			wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
+		/* no break is intentional here */
+	case IB_QPT_RC:
+		/* TODO: atomic not implemented */
+		wqe_p->u.nud.remote_virtual_adress =
+			send_wr->wr.rdma.remote_addr;
+		wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
+
+		/*
+		 * omitted checking of IB_SEND_INLINE
+		 * since HW does not support it
+		 */
+		dma_length = 0;
+		for (idx = 0; idx < send_wr->num_sge; idx++) {
+			wqe_p->u.nud.sg_list[idx].vaddr =
+				send_wr->sg_list[idx].addr;
+			wqe_p->u.nud.sg_list[idx].lkey =
+				send_wr->sg_list[idx].lkey;
+			wqe_p->u.nud.sg_list[idx].length =
+				send_wr->sg_list[idx].length;
+			dma_length += send_wr->sg_list[idx].length;
+		} /* eof idx */
+		wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
+
+		break;
+
+	default:
+		ehca_gen_err("Invalid qptype=%x", qp->qp_type);
+		return -EINVAL;
+	}
+
+	if (ehca_debug_level) {
+		ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
+		ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
+	}
+	return 0;
+}
+
+/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
+static inline void map_ib_wc_status(u32 cqe_status,
+				    enum ib_wc_status *wc_status)
+{
+	if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
+		switch (cqe_status & 0x3F) {
+		case 0x01:
+		case 0x21:
+			*wc_status = IB_WC_LOC_LEN_ERR;
+			break;
+		case 0x02:
+		case 0x22:
+			*wc_status = IB_WC_LOC_QP_OP_ERR;
+			break;
+		case 0x03:
+		case 0x23:
+			*wc_status = IB_WC_LOC_EEC_OP_ERR;
+			break;
+		case 0x04:
+		case 0x24:
+			*wc_status = IB_WC_LOC_PROT_ERR;
+			break;
+		case 0x05:
+		case 0x25:
+			*wc_status = IB_WC_WR_FLUSH_ERR;
+			break;
+		case 0x06:
+			*wc_status = IB_WC_MW_BIND_ERR;
+			break;
+		case 0x07: /* remote error - look into bits 20:24 */
+			switch ((cqe_status
+				 & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
+			case 0x0:
+				/*
+				 * PSN Sequence Error!
+				 * couldn't find a matching status!
+				 */
+				*wc_status = IB_WC_GENERAL_ERR;
+				break;
+			case 0x1:
+				*wc_status = IB_WC_REM_INV_REQ_ERR;
+				break;
+			case 0x2:
+				*wc_status = IB_WC_REM_ACCESS_ERR;
+				break;
+			case 0x3:
+				*wc_status = IB_WC_REM_OP_ERR;
+				break;
+			case 0x4:
+				*wc_status = IB_WC_REM_INV_RD_REQ_ERR;
+				break;
+			}
+			break;
+		case 0x08:
+			*wc_status = IB_WC_RETRY_EXC_ERR;
+			break;
+		case 0x09:
+			*wc_status = IB_WC_RNR_RETRY_EXC_ERR;
+			break;
+		case 0x0A:
+		case 0x2D:
+			*wc_status = IB_WC_REM_ABORT_ERR;
+			break;
+		case 0x0B:
+		case 0x2E:
+			*wc_status = IB_WC_INV_EECN_ERR;
+			break;
+		case 0x0C:
+		case 0x2F:
+			*wc_status = IB_WC_INV_EEC_STATE_ERR;
+			break;
+		case 0x0D:
+			*wc_status = IB_WC_BAD_RESP_ERR;
+			break;
+		case 0x10:
+			/* WQE purged */
+			*wc_status = IB_WC_WR_FLUSH_ERR;
+			break;
+		default:
+			*wc_status = IB_WC_FATAL_ERR;
+
+		}
+	} else
+		*wc_status = IB_WC_SUCCESS;
+}
+
+int ehca_post_send(struct ib_qp *qp,
+		   struct ib_send_wr *send_wr,
+		   struct ib_send_wr **bad_send_wr)
+{
+	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+	struct ib_send_wr *cur_send_wr;
+	struct ehca_wqe *wqe_p;
+	int wqe_cnt = 0;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	/* LOCK the QUEUE */
+	spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+
+	/* loop processes list of send reqs */
+	for (cur_send_wr = send_wr; cur_send_wr != NULL;
+	     cur_send_wr = cur_send_wr->next) {
+		u64 start_offset = my_qp->ipz_squeue.current_q_offset;
+		/* get pointer next to free WQE */
+		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
+		if (unlikely(!wqe_p)) {
+			/* too many posted work requests: queue overflow */
+			if (bad_send_wr)
+				*bad_send_wr = cur_send_wr;
+			if (wqe_cnt == 0) {
+				ret = -ENOMEM;
+				ehca_err(qp->device, "Too many posted WQEs "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_send_exit0;
+		}
+		/* write a SEND WQE into the QUEUE */
+		ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr);
+		/*
+		 * if something failed,
+		 * reset the free entry pointer to the start value
+		 */
+		if (unlikely(ret)) {
+			my_qp->ipz_squeue.current_q_offset = start_offset;
+			*bad_send_wr = cur_send_wr;
+			if (wqe_cnt == 0) {
+				ret = -EINVAL;
+				ehca_err(qp->device, "Could not write WQE "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_send_exit0;
+		}
+		wqe_cnt++;
+		ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
+			 my_qp, qp->qp_num, wqe_cnt);
+	} /* eof for cur_send_wr */
+
+post_send_exit0:
+	/* UNLOCK the QUEUE */
+	spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+	iosync(); /* serialize GAL register access */
+	hipz_update_sqa(my_qp, wqe_cnt);
+	return ret;
+}
+
+int ehca_post_recv(struct ib_qp *qp,
+		   struct ib_recv_wr *recv_wr,
+		   struct ib_recv_wr **bad_recv_wr)
+{
+	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+	struct ib_recv_wr *cur_recv_wr;
+	struct ehca_wqe *wqe_p;
+	int wqe_cnt = 0;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	/* LOCK the QUEUE */
+	spin_lock_irqsave(&my_qp->spinlock_r, spl_flags);
+
+	/* loop processes list of send reqs */
+	for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
+	     cur_recv_wr = cur_recv_wr->next) {
+		u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
+		/* get pointer next to free WQE */
+		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
+		if (unlikely(!wqe_p)) {
+			/* too many posted work requests: queue overflow */
+			if (bad_recv_wr)
+				*bad_recv_wr = cur_recv_wr;
+			if (wqe_cnt == 0) {
+				ret = -ENOMEM;
+				ehca_err(qp->device, "Too many posted WQEs "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_recv_exit0;
+		}
+		/* write a RECV WQE into the QUEUE */
+		ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
+		/*
+		 * if something failed,
+		 * reset the free entry pointer to the start value
+		 */
+		if (unlikely(ret)) {
+			my_qp->ipz_rqueue.current_q_offset = start_offset;
+			*bad_recv_wr = cur_recv_wr;
+			if (wqe_cnt == 0) {
+				ret = -EINVAL;
+				ehca_err(qp->device, "Could not write WQE "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_recv_exit0;
+		}
+		wqe_cnt++;
+		ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d",
+		     my_qp, qp->qp_num, wqe_cnt);
+	} /* eof for cur_recv_wr */
+
+post_recv_exit0:
+	spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags);
+	iosync(); /* serialize GAL register access */
+	hipz_update_rqa(my_qp, wqe_cnt);
+	return ret;
+}
+
+/*
+ * ib_wc_opcode table converts ehca wc opcode to ib
+ * Since we use zero to indicate invalid opcode, the actual ib opcode must
+ * be decremented!!!
+ */
+static const u8 ib_wc_opcode[255] = {
+	[0x01] = IB_WC_RECV+1,
+	[0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
+	[0x04] = IB_WC_BIND_MW+1,
+	[0x08] = IB_WC_FETCH_ADD+1,
+	[0x10] = IB_WC_COMP_SWAP+1,
+	[0x20] = IB_WC_RDMA_WRITE+1,
+	[0x40] = IB_WC_RDMA_READ+1,
+	[0x80] = IB_WC_SEND+1
+};
+
+/* internal function to poll one entry of cq */
+static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
+{
+	int ret = 0;
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	struct ehca_cqe *cqe;
+	int cqe_count = 0;
+
+poll_cq_one_read_cqe:
+	cqe = (struct ehca_cqe *)
+		ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
+	if (!cqe) {
+		ret = -EAGAIN;
+		ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p "
+			 "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret);
+		goto  poll_cq_one_exit0;
+	}
+
+	/* prevents loads being reordered across this point */
+	rmb();
+
+	cqe_count++;
+	if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
+		struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
+		int purgeflag;
+		unsigned long spl_flags;
+		if (!qp) {
+			ehca_err(cq->device, "cq_num=%x qp_num=%x "
+				 "could not find qp -> ignore cqe",
+				 my_cq->cq_number, cqe->local_qp_number);
+			ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
+				 my_cq->cq_number, cqe->local_qp_number);
+			/* ignore this purged cqe */
+			goto poll_cq_one_read_cqe;
+		}
+		spin_lock_irqsave(&qp->spinlock_s, spl_flags);
+		purgeflag = qp->sqerr_purgeflag;
+		spin_unlock_irqrestore(&qp->spinlock_s, spl_flags);
+
+		if (purgeflag) {
+			ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
+				 "src_qp=%x",
+				 cqe->local_qp_number, cqe->remote_qp_number);
+			if (ehca_debug_level)
+				ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
+					 cqe->local_qp_number,
+					 cqe->remote_qp_number);
+			/*
+			 * ignore this to avoid double cqes of bad wqe
+			 * that caused sqe and turn off purge flag
+			 */
+			qp->sqerr_purgeflag = 0;
+			goto poll_cq_one_read_cqe;
+		}
+	}
+
+	/* tracing cqe */
+	if (ehca_debug_level) {
+		ehca_dbg(cq->device,
+			 "Received COMPLETION ehca_cq=%p cq_num=%x -----",
+			 my_cq, my_cq->cq_number);
+		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+			 my_cq, my_cq->cq_number);
+		ehca_dbg(cq->device,
+			 "ehca_cq=%p cq_num=%x -------------------------",
+			 my_cq, my_cq->cq_number);
+	}
+
+	/* we got a completion! */
+	wc->wr_id = cqe->work_request_id;
+
+	/* eval ib_wc_opcode */
+	wc->opcode = ib_wc_opcode[cqe->optype]-1;
+	if (unlikely(wc->opcode == -1)) {
+		ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
+			 "ehca_cq=%p cq_num=%x",
+			 cqe->optype, cqe->status, my_cq, my_cq->cq_number);
+		/* dump cqe for other infos */
+		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+			 my_cq, my_cq->cq_number);
+		/* update also queue adder to throw away this entry!!! */
+		goto poll_cq_one_exit0;
+	}
+	/* eval ib_wc_status */
+	if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) {
+		/* complete with errors */
+		map_ib_wc_status(cqe->status, &wc->status);
+		wc->vendor_err = wc->status;
+	} else
+		wc->status = IB_WC_SUCCESS;
+
+	wc->qp_num = cqe->local_qp_number;
+	wc->byte_len = cqe->nr_bytes_transferred;
+	wc->pkey_index = cqe->pkey_index;
+	wc->slid = cqe->rlid;
+	wc->dlid_path_bits = cqe->dlid;
+	wc->src_qp = cqe->remote_qp_number;
+	wc->wc_flags = cqe->w_completion_flags;
+	wc->imm_data = cpu_to_be32(cqe->immediate_data);
+	wc->sl = cqe->service_level;
+
+	if (wc->status != IB_WC_SUCCESS)
+		ehca_dbg(cq->device,
+			 "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
+			 "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
+			 "cqe=%p", my_cq, my_cq->cq_number, cqe->optype,
+			 cqe->status, cqe->local_qp_number,
+			 cqe->remote_qp_number, cqe->work_request_id, cqe);
+
+poll_cq_one_exit0:
+	if (cqe_count > 0)
+		hipz_update_feca(my_cq, cqe_count);
+
+	return ret;
+}
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	int nr;
+	struct ib_wc *current_wc = wc;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	if (num_entries < 1) {
+		ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
+			 "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
+		ret = -EINVAL;
+		goto poll_cq_exit0;
+	}
+
+	spin_lock_irqsave(&my_cq->spinlock, spl_flags);
+	for (nr = 0; nr < num_entries; nr++) {
+		ret = ehca_poll_cq_one(cq, current_wc);
+		if (ret)
+			break;
+		current_wc++;
+	} /* eof for nr */
+	spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+	if (ret == -EAGAIN  || !ret)
+		ret = nr;
+
+poll_cq_exit0:
+	return ret;
+}
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+
+	switch (cq_notify) {
+	case IB_CQ_SOLICITED:
+		hipz_set_cqx_n0(my_cq, 1);
+		break;
+	case IB_CQ_NEXT_COMP:
+		hipz_set_cqx_n1(my_cq, 1);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
new file mode 100644
index 0000000000000..9f16e9c79394e
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -0,0 +1,111 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  SQP functions
+ *
+ *  Authors: Khadija Souissi <souissi@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+
+/**
+ * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
+ * pair is created successfully, the corresponding port gets active.
+ *
+ * Define Special Queue pair 0 (SMI QP) is still not supported.
+ *
+ * @qp_init_attr: Queue pair init attributes with port and queue pair type
+ */
+
+u64 ehca_define_sqp(struct ehca_shca *shca,
+		    struct ehca_qp *ehca_qp,
+		    struct ib_qp_init_attr *qp_init_attr)
+{
+	u32 pma_qp_nr, bma_qp_nr;
+	u64 ret;
+	u8 port = qp_init_attr->port_num;
+	int counter;
+
+	shca->sport[port - 1].port_state = IB_PORT_DOWN;
+
+	switch (qp_init_attr->qp_type) {
+	case IB_QPT_SMI:
+		/* function not supported yet */
+		break;
+	case IB_QPT_GSI:
+		ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
+					 ehca_qp->ipz_qp_handle,
+					 ehca_qp->galpas.kernel,
+					 (u32) qp_init_attr->port_num,
+					 &pma_qp_nr, &bma_qp_nr);
+
+		if (ret != H_SUCCESS) {
+			ehca_err(&shca->ib_device,
+				 "Can't define AQP1 for port %x. rc=%lx",
+				 port, ret);
+			return ret;
+		}
+		break;
+	default:
+		ehca_err(&shca->ib_device, "invalid qp_type=%x",
+			 qp_init_attr->qp_type);
+		return H_PARAMETER;
+	}
+
+	for (counter = 0;
+	     shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
+		     counter < ehca_port_act_time;
+	     counter++) {
+		ehca_dbg(&shca->ib_device, "... wait until port %x is active",
+			 port);
+		msleep_interruptible(1000);
+	}
+
+	if (counter == ehca_port_act_time) {
+		ehca_err(&shca->ib_device, "Port %x is not active.", port);
+		return H_HARDWARE;
+	}
+
+	return H_SUCCESS;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
new file mode 100644
index 0000000000000..9f56bb846d93b
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -0,0 +1,172 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  auxiliary functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Khadija Souissi <souissik@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef EHCA_TOOLS_H
+#define EHCA_TOOLS_H
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+
+#include <asm/abs_addr.h>
+#include <asm/ibmebus.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+extern int ehca_debug_level;
+
+#define ehca_dbg(ib_dev, format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
+				   "PU%04x EHCA_DBG:%s " format "\n", \
+				   get_paca()->paca_index, __FUNCTION__, \
+				   ## arg); \
+	} while (0)
+
+#define ehca_info(ib_dev, format, arg...) \
+	dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
+		 get_paca()->paca_index, __FUNCTION__, ## arg)
+
+#define ehca_warn(ib_dev, format, arg...) \
+	dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
+		 get_paca()->paca_index, __FUNCTION__, ## arg)
+
+#define ehca_err(ib_dev, format, arg...) \
+	dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
+		get_paca()->paca_index, __FUNCTION__, ## arg)
+
+/* use this one only if no ib_dev available */
+#define ehca_gen_dbg(format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\
+			       get_paca()->paca_index, __FUNCTION__, ## arg); \
+	} while (0)
+
+#define ehca_gen_warn(format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\
+			       get_paca()->paca_index, __FUNCTION__, ## arg); \
+	} while (0)
+
+#define ehca_gen_err(format, arg...) \
+	printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
+		get_paca()->paca_index, __FUNCTION__, ## arg)
+
+/**
+ * ehca_dmp - printk a memory block, whose length is n*8 bytes.
+ * Each line has the following layout:
+ * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
+ */
+#define ehca_dmp(adr, len, format, args...) \
+	do {				       \
+		unsigned int x;			      \
+		unsigned int l = (unsigned int)(len); \
+		unsigned char *deb = (unsigned char*)(adr);	\
+		for (x = 0; x < l; x += 16) { \
+			printk("EHCA_DMP:%s" format \
+			       " adr=%p ofs=%04x %016lx %016lx\n", \
+			       __FUNCTION__, ##args, deb, x, \
+			       *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
+			deb += 16; \
+		} \
+	} while (0)
+
+/* define a bitmask, little endian version */
+#define EHCA_BMASK(pos,length) (((pos)<<16)+(length))
+
+/* define a bitmask, the ibm way... */
+#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1))
+
+/* internal function, don't use */
+#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff)
+
+/* internal function, don't use */
+#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff))
+
+/**
+ * EHCA_BMASK_SET - return value shifted and masked by mask
+ * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
+ * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
+ * in variable
+ */
+#define EHCA_BMASK_SET(mask,value) \
+	((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask))
+
+/**
+ * EHCA_BMASK_GET - extract a parameter from value by mask
+ */
+#define EHCA_BMASK_GET(mask,value) \
+	(EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask)))
+
+
+/* Converts ehca to ib return code */
+static inline int ehca2ib_return_code(u64 ehca_rc)
+{
+	switch (ehca_rc) {
+	case H_SUCCESS:
+		return 0;
+	case H_BUSY:
+		return -EBUSY;
+	case H_NO_MEM:
+		return -ENOMEM;
+	default:
+		return -EINVAL;
+	}
+}
+
+
+#endif /* EHCA_TOOLS_H */
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
new file mode 100644
index 0000000000000..e08764e4aef28
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -0,0 +1,392 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  userspace support verbs
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+					struct ib_udata *udata)
+{
+	struct ehca_ucontext *my_context;
+
+	my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
+	if (!my_context) {
+		ehca_err(device, "Out of memory device=%p", device);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return &my_context->ib_ucontext;
+}
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context)
+{
+	kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
+	return 0;
+}
+
+struct page *ehca_nopage(struct vm_area_struct *vma,
+			 unsigned long address, int *type)
+{
+	struct page *mypage = NULL;
+	u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+	u32 idr_handle = fileoffset >> 32;
+	u32 q_type = (fileoffset >> 28) & 0xF;	  /* CQ, QP,...        */
+	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+	u32 cur_pid = current->tgid;
+	unsigned long flags;
+	struct ehca_cq *cq;
+	struct ehca_qp *qp;
+	struct ehca_pd *pd;
+	u64 offset;
+	void *vaddr;
+
+	switch (q_type) {
+	case 1: /* CQ */
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		cq = idr_find(&ehca_cq_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!cq) {
+			ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS");
+			return NOPAGE_SIGBUS;
+		}
+
+		if (cq->ownpid != cur_pid) {
+			ehca_err(cq->ib_cq.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, cq->ownpid);
+			return NOPAGE_SIGBUS;
+		}
+
+		if (rsrc_type == 2) {
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&cq->ipz_queue, offset);
+			ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		}
+		break;
+
+	case 2: /* QP */
+		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		qp = idr_find(&ehca_qp_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!qp) {
+			ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS");
+			return NOPAGE_SIGBUS;
+		}
+
+		pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+		if (pd->ownpid != cur_pid) {
+			ehca_err(qp->ib_qp.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, pd->ownpid);
+			return NOPAGE_SIGBUS;
+		}
+
+		if (rsrc_type == 2) {	/* rqueue */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset);
+			ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		} else if (rsrc_type == 3) {	/* squeue */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset);
+			ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		}
+		break;
+
+	default:
+		ehca_gen_err("bad queue type %x", q_type);
+		return NOPAGE_SIGBUS;
+	}
+
+	if (!mypage) {
+		ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS");
+		return NOPAGE_SIGBUS;
+	}
+	get_page(mypage);
+
+	return mypage;
+}
+
+static struct vm_operations_struct ehcau_vm_ops = {
+	.nopage = ehca_nopage,
+};
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+	u32 idr_handle = fileoffset >> 32;
+	u32 q_type = (fileoffset >> 28) & 0xF;	  /* CQ, QP,...        */
+	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+	u32 cur_pid = current->tgid;
+	u32 ret;
+	u64 vsize, physical;
+	unsigned long flags;
+	struct ehca_cq *cq;
+	struct ehca_qp *qp;
+	struct ehca_pd *pd;
+
+	switch (q_type) {
+	case  1: /* CQ */
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		cq = idr_find(&ehca_cq_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!cq)
+			return -EINVAL;
+
+		if (cq->ownpid != cur_pid) {
+			ehca_err(cq->ib_cq.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, cq->ownpid);
+			return -ENOMEM;
+		}
+
+		if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
+			return -EINVAL;
+
+		switch (rsrc_type) {
+		case 1: /* galpa fw handle */
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq);
+			vma->vm_flags |= VM_RESERVED;
+			vsize = vma->vm_end - vma->vm_start;
+			if (vsize != EHCA_PAGESIZE) {
+				ehca_err(cq->ib_cq.device, "invalid vsize=%lx",
+					 vma->vm_end - vma->vm_start);
+				return -EINVAL;
+			}
+
+			physical = cq->galpas.user.fw_handle;
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+			vma->vm_flags |= VM_IO | VM_RESERVED;
+
+			ehca_dbg(cq->ib_cq.device,
+				 "vsize=%lx physical=%lx", vsize, physical);
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      physical >> PAGE_SHIFT, vsize,
+					      vma->vm_page_prot);
+			if (ret) {
+				ehca_err(cq->ib_cq.device,
+					 "remap_pfn_range() failed ret=%x",
+					 ret);
+				return -ENOMEM;
+			}
+			break;
+
+		case 2: /* cq queue_addr */
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		default:
+			ehca_err(cq->ib_cq.device, "bad resource type %x",
+				 rsrc_type);
+			return -EINVAL;
+		}
+		break;
+
+	case 2: /* QP */
+		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		qp = idr_find(&ehca_qp_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!qp)
+			return -EINVAL;
+
+		pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+		if (pd->ownpid != cur_pid) {
+			ehca_err(qp->ib_qp.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, pd->ownpid);
+			return -ENOMEM;
+		}
+
+		if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
+			return -EINVAL;
+
+		switch (rsrc_type) {
+		case 1: /* galpa fw handle */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vsize = vma->vm_end - vma->vm_start;
+			if (vsize != EHCA_PAGESIZE) {
+				ehca_err(qp->ib_qp.device, "invalid vsize=%lx",
+					 vma->vm_end - vma->vm_start);
+				return -EINVAL;
+			}
+
+			physical = qp->galpas.user.fw_handle;
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+			vma->vm_flags |= VM_IO | VM_RESERVED;
+
+			ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx",
+				 vsize, physical);
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      physical >> PAGE_SHIFT, vsize,
+					      vma->vm_page_prot);
+			if (ret) {
+				ehca_err(qp->ib_qp.device,
+					 "remap_pfn_range() failed ret=%x",
+					 ret);
+				return -ENOMEM;
+			}
+			break;
+
+		case 2: /* qp rqueue_addr */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		case 3: /* qp squeue_addr */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		default:
+			ehca_err(qp->ib_qp.device, "bad resource type %x",
+				 rsrc_type);
+			return -EINVAL;
+		}
+		break;
+
+	default:
+		ehca_gen_err("bad queue type %x", q_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped,
+		     struct vm_area_struct **vma)
+{
+	down_write(&current->mm->mmap_sem);
+	*mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE,
+				 MAP_SHARED | MAP_ANONYMOUS,
+				 foffset);
+	up_write(&current->mm->mmap_sem);
+	if (!(*mapped)) {
+		ehca_gen_err("couldn't mmap foffset=%lx length=%lx",
+			     foffset, length);
+		return -EINVAL;
+	}
+
+	*vma = find_vma(current->mm, (u64)*mapped);
+	if (!(*vma)) {
+		down_write(&current->mm->mmap_sem);
+		do_munmap(current->mm, 0, length);
+		up_write(&current->mm->mmap_sem);
+		ehca_gen_err("couldn't find vma queue=%p", *mapped);
+		return -EINVAL;
+	}
+	(*vma)->vm_flags |= VM_RESERVED;
+	(*vma)->vm_ops = &ehcau_vm_ops;
+
+	return 0;
+}
+
+int ehca_mmap_register(u64 physical, void **mapped,
+		       struct vm_area_struct **vma)
+{
+	int ret;
+	unsigned long vsize;
+	/* ehca hw supports only 4k page */
+	ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma);
+	if (ret) {
+		ehca_gen_err("could'nt mmap physical=%lx", physical);
+		return ret;
+	}
+
+	(*vma)->vm_flags |= VM_RESERVED;
+	vsize = (*vma)->vm_end - (*vma)->vm_start;
+	if (vsize != EHCA_PAGESIZE) {
+		ehca_gen_err("invalid vsize=%lx",
+			     (*vma)->vm_end - (*vma)->vm_start);
+		return -EINVAL;
+	}
+
+	(*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot);
+	(*vma)->vm_flags |= VM_IO | VM_RESERVED;
+
+	ret = remap_pfn_range((*vma), (*vma)->vm_start,
+			      physical >> PAGE_SHIFT, vsize,
+			      (*vma)->vm_page_prot);
+	if (ret) {
+		ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
+		return -ENOMEM;
+	}
+
+	return 0;
+
+}
+
+int ehca_munmap(unsigned long addr, size_t len) {
+	int ret = 0;
+	struct mm_struct *mm = current->mm;
+	if (mm) {
+		down_write(&mm->mmap_sem);
+		ret = do_munmap(mm, addr, len);
+		up_write(&mm->mmap_sem);
+	}
+	return ret;
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
new file mode 100644
index 0000000000000..3fb46e67df87f
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -0,0 +1,874 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware Infiniband Interface code for POWER
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/hvcall.h>
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hcp_phyp.h"
+#include "hipz_fns.h"
+#include "ipz_pt_fn.h"
+
+#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9, 11)
+#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12, 12)
+#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13, 15)
+#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18, 18)
+#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19, 21)
+#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22, 23)
+#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31, 31)
+#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56, 63)
+
+#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0, 15)
+#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
+#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
+
+#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
+#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24, 31)
+
+#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
+
+/* direct access qp controls */
+#define DAQP_CTRL_ENABLE    0x01
+#define DAQP_CTRL_SEND_COMP 0x20
+#define DAQP_CTRL_RECV_COMP 0x40
+
+static u32 get_longbusy_msecs(int longbusy_rc)
+{
+	switch (longbusy_rc) {
+	case H_LONG_BUSY_ORDER_1_MSEC:
+		return 1;
+	case H_LONG_BUSY_ORDER_10_MSEC:
+		return 10;
+	case H_LONG_BUSY_ORDER_100_MSEC:
+		return 100;
+	case H_LONG_BUSY_ORDER_1_SEC:
+		return 1000;
+	case H_LONG_BUSY_ORDER_10_SEC:
+		return 10000;
+	case H_LONG_BUSY_ORDER_100_SEC:
+		return 100000;
+	default:
+		return 1;
+	}
+}
+
+static long ehca_plpar_hcall_norets(unsigned long opcode,
+				    unsigned long arg1,
+				    unsigned long arg2,
+				    unsigned long arg3,
+				    unsigned long arg4,
+				    unsigned long arg5,
+				    unsigned long arg6,
+				    unsigned long arg7)
+{
+	long ret;
+	int i, sleep_msecs;
+
+	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
+		     "arg5=%lx arg6=%lx arg7=%lx",
+		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+
+	for (i = 0; i < 5; i++) {
+		ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
+					 arg5, arg6, arg7);
+
+		if (H_IS_LONG_BUSY(ret)) {
+			sleep_msecs = get_longbusy_msecs(ret);
+			msleep_interruptible(sleep_msecs);
+			continue;
+		}
+
+		if (ret < H_SUCCESS)
+			ehca_gen_err("opcode=%lx ret=%lx"
+				     " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
+				     " arg5=%lx arg6=%lx arg7=%lx ",
+				     opcode, ret,
+				     arg1, arg2, arg3, arg4, arg5,
+				     arg6, arg7);
+
+		ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret);
+		return ret;
+
+	}
+
+	return H_BUSY;
+}
+
+static long ehca_plpar_hcall9(unsigned long opcode,
+			      unsigned long *outs, /* array of 9 outputs */
+			      unsigned long arg1,
+			      unsigned long arg2,
+			      unsigned long arg3,
+			      unsigned long arg4,
+			      unsigned long arg5,
+			      unsigned long arg6,
+			      unsigned long arg7,
+			      unsigned long arg8,
+			      unsigned long arg9)
+{
+	long ret;
+	int i, sleep_msecs;
+
+	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
+		     "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
+		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7,
+		     arg8, arg9);
+
+	for (i = 0; i < 5; i++) {
+		ret = plpar_hcall9(opcode, outs,
+				   arg1, arg2, arg3, arg4, arg5,
+				   arg6, arg7, arg8, arg9);
+
+		if (H_IS_LONG_BUSY(ret)) {
+			sleep_msecs = get_longbusy_msecs(ret);
+			msleep_interruptible(sleep_msecs);
+			continue;
+		}
+
+		if (ret < H_SUCCESS)
+			ehca_gen_err("opcode=%lx ret=%lx"
+				     " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
+				     " arg5=%lx arg6=%lx arg7=%lx arg8=%lx"
+				     " arg9=%lx"
+				     " out1=%lx out2=%lx out3=%lx out4=%lx"
+				     " out5=%lx out6=%lx out7=%lx out8=%lx"
+				     " out9=%lx",
+				     opcode, ret,
+				     arg1, arg2, arg3, arg4, arg5,
+				     arg6, arg7, arg8, arg9,
+				     outs[0], outs[1], outs[2], outs[3],
+				     outs[4], outs[5], outs[6], outs[7],
+				     outs[8]);
+
+		ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
+			     "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx "
+			     "out9=%lx",
+			     opcode, ret, outs[0], outs[1], outs[2], outs[3],
+			     outs[4], outs[5], outs[6], outs[7], outs[8]);
+		return ret;
+
+	}
+
+	return H_BUSY;
+}
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u32 neq_control,
+			     const u32 number_of_entries,
+			     struct ipz_eq_handle *eq_handle,
+			     u32 *act_nr_of_entries,
+			     u32 *act_pages,
+			     u32 *eq_ist)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	u64 allocate_controls;
+
+	/* resource type */
+	allocate_controls = 3ULL;
+
+	/* ISN is associated */
+	if (neq_control != 1)
+		allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
+	else /* notification event queue */
+		allocate_controls = (1ULL << 63) | allocate_controls;
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,  /* r4 */
+				allocate_controls,      /* r5 */
+				number_of_entries,      /* r6 */
+				0, 0, 0, 0, 0, 0);
+	eq_handle->handle = outs[0];
+	*act_nr_of_entries = (u32)outs[3];
+	*act_pages = (u32)outs[4];
+	*eq_ist = (u32)outs[5];
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resource - ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+		       struct ipz_eq_handle eq_handle,
+		       const u64 event_mask)
+{
+	return ehca_plpar_hcall_norets(H_RESET_EVENTS,
+				       adapter_handle.handle, /* r4 */
+				       eq_handle.handle,      /* r5 */
+				       event_mask,	      /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_cq *cq,
+			     struct ehca_alloc_cq_parms *param)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,   /* r4  */
+				2,	                 /* r5  */
+				param->eq_handle.handle, /* r6  */
+				cq->token,	         /* r7  */
+				param->nr_cqe,           /* r8  */
+				0, 0, 0, 0);
+	cq->ipz_cq_handle.handle = outs[0];
+	param->act_nr_of_entries = (u32)outs[3];
+	param->act_pages = (u32)outs[4];
+
+	if (ret == H_SUCCESS)
+		hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_qp *qp,
+			     struct ehca_alloc_qp_parms *parms)
+{
+	u64 ret;
+	u64 allocate_controls;
+	u64 max_r10_reg;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
+	u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
+	int daqp_ctrl = parms->daqp_ctrl;
+
+	allocate_controls =
+		EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS,
+			       (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
+				 (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
+				 (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
+				 parms->ud_av_l_key_ctl)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
+
+	max_r10_reg =
+		EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
+			       max_nr_send_wqes)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
+				 max_nr_receive_wqes)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
+				 parms->max_send_sge)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
+				 parms->max_recv_sge);
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,	           /* r4  */
+				allocate_controls,	           /* r5  */
+				qp->send_cq->ipz_cq_handle.handle,
+				qp->recv_cq->ipz_cq_handle.handle,
+				parms->ipz_eq_handle.handle,
+				((u64)qp->token << 32) | parms->pd.value,
+				max_r10_reg,	                   /* r10 */
+				parms->ud_av_l_key_ctl,            /* r11 */
+				0);
+	qp->ipz_qp_handle.handle = outs[0];
+	qp->real_qp_num = (u32)outs[1];
+	parms->act_nr_send_sges =
+		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
+	parms->act_nr_recv_wqes =
+		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
+	parms->act_nr_send_sges =
+		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
+	parms->act_nr_recv_sges =
+		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
+	parms->nr_sq_pages =
+		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
+	parms->nr_rq_pages =
+		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
+
+	if (ret == H_SUCCESS)
+		hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+		      const u8 port_id,
+		      struct hipz_query_port *query_port_response_block)
+{
+	u64 ret;
+	u64 r_cb = virt_to_abs(query_port_response_block);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("response block not page aligned");
+		return H_PARAMETER;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
+				      adapter_handle.handle, /* r4 */
+				      port_id,	             /* r5 */
+				      r_cb,	             /* r6 */
+				      0, 0, 0, 0);
+
+	if (ehca_debug_level)
+		ehca_dmp(query_port_response_block, 64, "response_block");
+
+	return ret;
+}
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+		     struct hipz_query_hca *query_hca_rblock)
+{
+	u64 r_cb = virt_to_abs(query_hca_rblock);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("response_block=%p not page aligned",
+			     query_hca_rblock);
+		return H_PARAMETER;
+	}
+
+	return ehca_plpar_hcall_norets(H_QUERY_HCA,
+				       adapter_handle.handle, /* r4 */
+				       r_cb,                  /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+			  const u8 pagesize,
+			  const u8 queue_type,
+			  const u64 resource_handle,
+			  const u64 logical_address_of_page,
+			  u64 count)
+{
+	return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
+				       adapter_handle.handle,      /* r4  */
+				       queue_type | pagesize << 8, /* r5  */
+				       resource_handle,	           /* r6  */
+				       logical_address_of_page,    /* r7  */
+				       count,	                   /* r8  */
+				       0, 0);
+}
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_eq_handle eq_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count)
+{
+	if (count != 1) {
+		ehca_gen_err("Ppage counter=%lx", count);
+		return H_PARAMETER;
+	}
+	return hipz_h_register_rpage(adapter_handle,
+				     pagesize,
+				     queue_type,
+				     eq_handle.handle,
+				     logical_address_of_page, count);
+}
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
+			   u32 ist)
+{
+	u64 ret;
+	ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
+				      adapter_handle.handle, /* r4 */
+				      ist,                   /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret != H_SUCCESS && ret != H_BUSY)
+		ehca_gen_err("Could not query interrupt state.");
+
+	return ret;
+}
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_cq_handle cq_handle,
+			     struct ehca_pfcq *pfcq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa gal)
+{
+	if (count != 1) {
+		ehca_gen_err("Page counter=%lx", count);
+		return H_PARAMETER;
+	}
+
+	return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
+				     cq_handle.handle, logical_address_of_page,
+				     count);
+}
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_qp_handle qp_handle,
+			     struct ehca_pfqp *pfqp,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa galpa)
+{
+	if (count != 1) {
+		ehca_gen_err("Page counter=%lx", count);
+		return H_PARAMETER;
+	}
+
+	return hipz_h_register_rpage(adapter_handle,pagesize,queue_type,
+				     qp_handle.handle,logical_address_of_page,
+				     count);
+}
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+			       const struct ipz_qp_handle qp_handle,
+			       struct ehca_pfqp *pfqp,
+			       void **log_addr_next_sq_wqe2processed,
+			       void **log_addr_next_rq_wqe2processed,
+			       int dis_and_get_function_code)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+				adapter_handle.handle,     /* r4 */
+				dis_and_get_function_code, /* r5 */
+				qp_handle.handle,	   /* r6 */
+				0, 0, 0, 0, 0, 0);
+	if (log_addr_next_sq_wqe2processed)
+		*log_addr_next_sq_wqe2processed = (void*)outs[0];
+	if (log_addr_next_rq_wqe2processed)
+		*log_addr_next_rq_wqe2processed = (void*)outs[1];
+
+	return ret;
+}
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+		     const struct ipz_qp_handle qp_handle,
+		     struct ehca_pfqp *pfqp,
+		     const u64 update_mask,
+		     struct hcp_modify_qp_control_block *mqpcb,
+		     struct h_galpa gal)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
+				adapter_handle.handle, /* r4 */
+				qp_handle.handle,      /* r5 */
+				update_mask,	       /* r6 */
+				virt_to_abs(mqpcb),    /* r7 */
+				0, 0, 0, 0, 0);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Insufficient resources ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+		    const struct ipz_qp_handle qp_handle,
+		    struct ehca_pfqp *pfqp,
+		    struct hcp_modify_qp_control_block *qqpcb,
+		    struct h_galpa gal)
+{
+	return ehca_plpar_hcall_norets(H_QUERY_QP,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       virt_to_abs(qqpcb),    /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_qp *qp)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = hcp_galpas_dtor(&qp->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct qp->galpas");
+		return H_RESOURCE;
+	}
+	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+				adapter_handle.handle,     /* r4 */
+				/* function code */
+				1,	                   /* r5 */
+				qp->ipz_qp_handle.handle,  /* r6 */
+				0, 0, 0, 0, 0, 0);
+	if (ret == H_HARDWARE)
+		ehca_gen_err("HCA not operational. ret=%lx", ret);
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      qp->ipz_qp_handle.handle,  /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("Resource still in use. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port)
+{
+	return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       port,                  /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port, u32 * pma_qp_nr,
+		       u32 * bma_qp_nr)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
+				adapter_handle.handle, /* r4 */
+				qp_handle.handle,      /* r5 */
+				port,	               /* r6 */
+				0, 0, 0, 0, 0, 0);
+	*pma_qp_nr = (u32)outs[0];
+	*bma_qp_nr = (u32)outs[1];
+
+	if (ret == H_ALIAS_EXIST)
+		ehca_gen_err("AQP1 already exists. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id)
+{
+	u64 ret;
+
+	ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
+				      adapter_handle.handle,  /* r4 */
+				      qp_handle.handle,       /* r5 */
+				      mcg_dlid,               /* r6 */
+				      interface_id,           /* r7 */
+				      subnet_prefix,          /* r8 */
+				      0, 0);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id)
+{
+	return ehca_plpar_hcall_norets(H_DETACH_MCQP,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       mcg_dlid,              /* r6 */
+				       interface_id,          /* r7 */
+				       subnet_prefix,         /* r8 */
+				       0, 0);
+}
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_cq *cq,
+		      u8 force_flag)
+{
+	u64 ret;
+
+	ret = hcp_galpas_dtor(&cq->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct cp->galpas");
+		return H_RESOURCE;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      cq->ipz_cq_handle.handle,  /* r5 */
+				      force_flag != 0 ? 1L : 0L, /* r6 */
+				      0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_eq *eq)
+{
+	u64 ret;
+
+	ret = hcp_galpas_dtor(&eq->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct eq->galpas");
+		return H_RESOURCE;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      eq->ipz_eq_handle.handle,  /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("Resource in use. ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u64 vaddr,
+			     const u64 length,
+			     const u32 access_ctrl,
+			     const struct ipz_pd pd,
+			     struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,            /* r4 */
+				5,                                /* r5 */
+				vaddr,                            /* r6 */
+				length,                           /* r7 */
+				(((u64)access_ctrl) << 32ULL),    /* r8 */
+				pd.value,                         /* r9 */
+				0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count)
+{
+	u64 ret;
+
+	if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
+		ehca_gen_err("logical_address_of_page not on a 4k boundary "
+			     "adapter_handle=%lx mr=%p mr_handle=%lx "
+			     "pagesize=%x queue_type=%x "
+			     "logical_address_of_page=%lx count=%lx",
+			     adapter_handle.handle, mr,
+			     mr->ipz_mr_handle.handle, pagesize, queue_type,
+			     logical_address_of_page, count);
+		ret = H_PARAMETER;
+	} else
+		ret = hipz_h_register_rpage(adapter_handle, pagesize,
+					    queue_type,
+					    mr->ipz_mr_handle.handle,
+					    logical_address_of_page, count);
+	return ret;
+}
+
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mr *mr,
+		    struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
+				adapter_handle.handle,     /* r4 */
+				mr->ipz_mr_handle.handle,  /* r5 */
+				0, 0, 0, 0, 0, 0, 0);
+	outparms->len = outs[0];
+	outparms->vaddr = outs[1];
+	outparms->acl  = outs[4] >> 32;
+	outparms->lkey = (u32)(outs[5] >> 32);
+	outparms->rkey = (u32)(outs[5] & (0xffffffff));
+
+	return ret;
+}
+
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mr *mr)
+{
+	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				       adapter_handle.handle,    /* r4 */
+				       mr->ipz_mr_handle.handle, /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+			  const struct ehca_mr *mr,
+			  const u64 vaddr_in,
+			  const u64 length,
+			  const u32 access_ctrl,
+			  const struct ipz_pd pd,
+			  const u64 mr_addr_cb,
+			  struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
+				adapter_handle.handle,    /* r4 */
+				mr->ipz_mr_handle.handle, /* r5 */
+				vaddr_in,	          /* r6 */
+				length,                   /* r7 */
+				/* r8 */
+				((((u64)access_ctrl) << 32ULL) | pd.value),
+				mr_addr_cb,               /* r9 */
+				0, 0, 0);
+	outparms->vaddr = outs[1];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+			const struct ehca_mr *mr,
+			const struct ehca_mr *orig_mr,
+			const u64 vaddr_in,
+			const u32 access_ctrl,
+			const struct ipz_pd pd,
+			struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
+				adapter_handle.handle,            /* r4 */
+				orig_mr->ipz_mr_handle.handle,    /* r5 */
+				vaddr_in,                         /* r6 */
+				(((u64)access_ctrl) << 32ULL),    /* r7 */
+				pd.value,                         /* r8 */
+				0, 0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mw *mw,
+			     const struct ipz_pd pd,
+			     struct ehca_mw_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,      /* r4 */
+				6,                          /* r5 */
+				pd.value,                   /* r6 */
+				0, 0, 0, 0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mw *mw,
+		    struct ehca_mw_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
+				adapter_handle.handle,    /* r4 */
+				mw->ipz_mw_handle.handle, /* r5 */
+				0, 0, 0, 0, 0, 0, 0);
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mw *mw)
+{
+	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				       adapter_handle.handle,    /* r4 */
+				       mw->ipz_mw_handle.handle, /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+		      const u64 ressource_handle,
+		      void *rblock,
+		      unsigned long *byte_count)
+{
+	u64 r_cb = virt_to_abs(rblock);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("rblock not page aligned.");
+		return H_PARAMETER;
+	}
+
+	return ehca_plpar_hcall_norets(H_ERROR_DATA,
+				       adapter_handle.handle,
+				       ressource_handle,
+				       r_cb,
+				       0, 0, 0, 0);
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
new file mode 100644
index 0000000000000..587ebd470959b
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -0,0 +1,261 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware Infiniband Interface code for POWER
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_IF_H__
+#define __HCP_IF_H__
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "hipz_hw.h"
+
+/*
+ * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize
+ * resources, create the empty EQPT (ring).
+ */
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u32 neq_control,
+			     const u32 number_of_entries,
+			     struct ipz_eq_handle *eq_handle,
+			     u32 * act_nr_of_entries,
+			     u32 * act_pages,
+			     u32 * eq_ist);
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+		       struct ipz_eq_handle eq_handle,
+		       const u64 event_mask);
+/*
+ * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
+ * resources, create the empty CQPT (ring).
+ */
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_cq *cq,
+			     struct ehca_alloc_cq_parms *param);
+
+
+/*
+ * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
+ * initialize resources, create empty QPPTs (2 rings).
+ */
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_qp *qp,
+			     struct ehca_alloc_qp_parms *parms);
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+		      const u8 port_id,
+		      struct hipz_query_port *query_port_response_block);
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+		     struct hipz_query_hca *query_hca_rblock);
+
+/*
+ * hipz_h_register_rpage internal function in hcp_if.h for all
+ * hcp_H_REGISTER_RPAGE calls.
+ */
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+			  const u8 pagesize,
+			  const u8 queue_type,
+			  const u64 resource_handle,
+			  const u64 logical_address_of_page,
+			  u64 count);
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_eq_handle eq_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count);
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle
+			   hcp_adapter_handle,
+			   u32 ist);
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_cq_handle cq_handle,
+			     struct ehca_pfcq *pfcq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa gal);
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_qp_handle qp_handle,
+			     struct ehca_pfqp *pfqp,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa galpa);
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+			       const struct ipz_qp_handle qp_handle,
+			       struct ehca_pfqp *pfqp,
+			       void **log_addr_next_sq_wqe_tb_processed,
+			       void **log_addr_next_rq_wqe_tb_processed,
+			       int dis_and_get_function_code);
+enum hcall_sigt {
+	HCALL_SIGT_NO_CQE = 0,
+	HCALL_SIGT_BY_WQE = 1,
+	HCALL_SIGT_EVERY = 2
+};
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+		     const struct ipz_qp_handle qp_handle,
+		     struct ehca_pfqp *pfqp,
+		     const u64 update_mask,
+		     struct hcp_modify_qp_control_block *mqpcb,
+		     struct h_galpa gal);
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+		    const struct ipz_qp_handle qp_handle,
+		    struct ehca_pfqp *pfqp,
+		    struct hcp_modify_qp_control_block *qqpcb,
+		    struct h_galpa gal);
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_qp *qp);
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port);
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port, u32 * pma_qp_nr,
+		       u32 * bma_qp_nr);
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_cq *cq,
+		      u8 force_flag);
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_eq *eq);
+
+/*
+ * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u64 vaddr,
+			     const u64 length,
+			     const u32 access_ctrl,
+			     const struct ipz_pd pd,
+			     struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count);
+
+/* hipz_h_query_mr queries MR in HW and FW */
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mr *mr,
+		    struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mr frees MR resources in HW and FW */
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mr *mr);
+
+/* hipz_h_reregister_pmr reregisters MR in HW and FW */
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+			  const struct ehca_mr *mr,
+			  const u64 vaddr_in,
+			  const u64 length,
+			  const u32 access_ctrl,
+			  const struct ipz_pd pd,
+			  const u64 mr_addr_cb,
+			  struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_smr register shared MR in HW and FW */
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+			const struct ehca_mr *mr,
+			const struct ehca_mr *orig_mr,
+			const u64 vaddr_in,
+			const u32 access_ctrl,
+			const struct ipz_pd pd,
+			struct ehca_mr_hipzout_parms *outparms);
+
+/*
+ * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mw *mw,
+			     const struct ipz_pd pd,
+			     struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_query_mw queries MW in HW and FW */
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mw *mw,
+		    struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mw frees MW resources in HW and FW */
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mw *mw);
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+		      const u64 ressource_handle,
+		      void *rblock,
+		      unsigned long *byte_count);
+
+#endif /* __HCP_IF_H__ */
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
new file mode 100644
index 0000000000000..0b1a4772c78a4
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -0,0 +1,80 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *   load store abstraction for ehca register access with tracing
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+int hcall_map_page(u64 physaddr, u64 *mapaddr)
+{
+	*mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE));
+	return 0;
+}
+
+int hcall_unmap_page(u64 mapaddr)
+{
+	iounmap((volatile void __iomem*)mapaddr);
+	return 0;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+		    u64 paddr_kernel, u64 paddr_user)
+{
+	int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
+	if (ret)
+		return ret;
+
+	galpas->user.fw_handle = paddr_user;
+
+	return 0;
+}
+
+int hcp_galpas_dtor(struct h_galpas *galpas)
+{
+	if (galpas->kernel.fw_handle) {
+		int ret = hcall_unmap_page(galpas->kernel.fw_handle);
+		if (ret)
+			return ret;
+	}
+
+	galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h
new file mode 100644
index 0000000000000..5305c2a3ed94a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.h
@@ -0,0 +1,90 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware calls
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_PHYP_H__
+#define __HCP_PHYP_H__
+
+
+/*
+ * eHCA page (mapped into memory)
+ * resource to access eHCA register pages in CPU address space
+*/
+struct h_galpa {
+	u64 fw_handle;
+	/* for pSeries this is a 64bit memory address where
+	   I/O memory is mapped into CPU address space (kv) */
+};
+
+/*
+ * resource to access eHCA address space registers, all types
+ */
+struct h_galpas {
+	u32 pid;		/*PID of userspace galpa checking */
+	struct h_galpa user;	/* user space accessible resource,
+				   set to 0 if unused */
+	struct h_galpa kernel;	/* kernel space accessible resource,
+				   set to 0 if unused */
+};
+
+static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
+{
+	u64 addr = galpa.fw_handle + offset;
+	return *(volatile u64 __force *)addr;
+}
+
+static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
+{
+	u64 addr = galpa.fw_handle + offset;
+	*(volatile u64 __force *)addr = value;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+		    u64 paddr_kernel, u64 paddr_user);
+
+int hcp_galpas_dtor(struct h_galpas *galpas);
+
+int hcall_map_page(u64 physaddr, u64 * mapaddr);
+
+int hcall_unmap_page(u64 mapaddr);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h
new file mode 100644
index 0000000000000..9dac93d021407
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_fns.h
@@ -0,0 +1,68 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HW abstraction register functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_H__
+#define __HIPZ_FNS_H__
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+#include "hipz_fns_core.h"
+
+#define hipz_galpa_store_eq(gal, offset, value) \
+	hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_eq(gal, offset) \
+	hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qped(gal, offset, value) \
+	hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_qped(gal, offset) \
+	hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
+
+#define hipz_galpa_store_mrmw(gal, offset, value) \
+	hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_mrmw(gal, offset) \
+	hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h
new file mode 100644
index 0000000000000..20898a153446a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h
@@ -0,0 +1,100 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HW abstraction register functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_CORE_H__
+#define __HIPZ_FNS_CORE_H__
+
+#include "hcp_phyp.h"
+#include "hipz_hw.h"
+
+#define hipz_galpa_store_cq(gal, offset, value) \
+	hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_cq(gal, offset) \
+	hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qp(gal,offset, value) \
+	hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
+#define hipz_galpa_load_qp(gal, offset) \
+	hipz_galpa_load(gal,QPTEMM_OFFSET(offset))
+
+static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+	/*  ringing doorbell :-) */
+	hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
+			    EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
+}
+
+static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+	/*  ringing doorbell :-) */
+	hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
+			    EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
+}
+
+static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
+{
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
+			    EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
+}
+
+static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
+{
+	u64 cqx_n0_reg;
+
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
+			    EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
+					   value));
+	cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
+}
+
+static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
+{
+	u64 cqx_n1_reg;
+
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
+			    EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
+	cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
+}
+
+#endif /* __HIPZ_FNC_CORE_H__ */
diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h
new file mode 100644
index 0000000000000..3fc92b031c50c
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_hw.h
@@ -0,0 +1,388 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  eHCA register definitions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_HW_H__
+#define __HIPZ_HW_H__
+
+#include "ehca_tools.h"
+
+/* QP Table Entry Memory Map */
+struct hipz_qptemm {
+	u64 qpx_hcr;
+	u64 qpx_c;
+	u64 qpx_herr;
+	u64 qpx_aer;
+/* 0x20*/
+	u64 qpx_sqa;
+	u64 qpx_sqc;
+	u64 qpx_rqa;
+	u64 qpx_rqc;
+/* 0x40*/
+	u64 qpx_st;
+	u64 qpx_pmstate;
+	u64 qpx_pmfa;
+	u64 qpx_pkey;
+/* 0x60*/
+	u64 qpx_pkeya;
+	u64 qpx_pkeyb;
+	u64 qpx_pkeyc;
+	u64 qpx_pkeyd;
+/* 0x80*/
+	u64 qpx_qkey;
+	u64 qpx_dqp;
+	u64 qpx_dlidp;
+	u64 qpx_portp;
+/* 0xa0*/
+	u64 qpx_slidp;
+	u64 qpx_slidpp;
+	u64 qpx_dlida;
+	u64 qpx_porta;
+/* 0xc0*/
+	u64 qpx_slida;
+	u64 qpx_slidpa;
+	u64 qpx_slvl;
+	u64 qpx_ipd;
+/* 0xe0*/
+	u64 qpx_mtu;
+	u64 qpx_lato;
+	u64 qpx_rlimit;
+	u64 qpx_rnrlimit;
+/* 0x100*/
+	u64 qpx_t;
+	u64 qpx_sqhp;
+	u64 qpx_sqptp;
+	u64 qpx_nspsn;
+/* 0x120*/
+	u64 qpx_nspsnhwm;
+	u64 reserved1;
+	u64 qpx_sdsi;
+	u64 qpx_sdsbc;
+/* 0x140*/
+	u64 qpx_sqwsize;
+	u64 qpx_sqwts;
+	u64 qpx_lsn;
+	u64 qpx_nssn;
+/* 0x160 */
+	u64 qpx_mor;
+	u64 qpx_cor;
+	u64 qpx_sqsize;
+	u64 qpx_erc;
+/* 0x180*/
+	u64 qpx_rnrrc;
+	u64 qpx_ernrwt;
+	u64 qpx_rnrresp;
+	u64 qpx_lmsna;
+/* 0x1a0 */
+	u64 qpx_sqhpc;
+	u64 qpx_sqcptp;
+	u64 qpx_sigt;
+	u64 qpx_wqecnt;
+/* 0x1c0*/
+	u64 qpx_rqhp;
+	u64 qpx_rqptp;
+	u64 qpx_rqsize;
+	u64 qpx_nrr;
+/* 0x1e0*/
+	u64 qpx_rdmac;
+	u64 qpx_nrpsn;
+	u64 qpx_lapsn;
+	u64 qpx_lcr;
+/* 0x200*/
+	u64 qpx_rwc;
+	u64 qpx_rwva;
+	u64 qpx_rdsi;
+	u64 qpx_rdsbc;
+/* 0x220*/
+	u64 qpx_rqwsize;
+	u64 qpx_crmsn;
+	u64 qpx_rdd;
+	u64 qpx_larpsn;
+/* 0x240*/
+	u64 qpx_pd;
+	u64 qpx_scqn;
+	u64 qpx_rcqn;
+	u64 qpx_aeqn;
+/* 0x260*/
+	u64 qpx_aaelog;
+	u64 qpx_ram;
+	u64 qpx_rdmaqe0;
+	u64 qpx_rdmaqe1;
+/* 0x280*/
+	u64 qpx_rdmaqe2;
+	u64 qpx_rdmaqe3;
+	u64 qpx_nrpsnhwm;
+/* 0x298*/
+	u64 reserved[(0x400 - 0x298) / 8];
+/* 0x400 extended data */
+	u64 reserved_ext[(0x500 - 0x400) / 8];
+/* 0x500 */
+	u64 reserved2[(0x1000 - 0x500) / 8];
+/* 0x1000      */
+};
+
+#define QPX_SQADDER EHCA_BMASK_IBM(48,63)
+#define QPX_RQADDER EHCA_BMASK_IBM(48,63)
+
+#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
+
+/* MRMWPT Entry Memory Map */
+struct hipz_mrmwmm {
+	/* 0x00 */
+	u64 mrx_hcr;
+
+	u64 mrx_c;
+	u64 mrx_herr;
+	u64 mrx_aer;
+	/* 0x20 */
+	u64 mrx_pp;
+	u64 reserved1;
+	u64 reserved2;
+	u64 reserved3;
+	/* 0x40 */
+	u64 reserved4[(0x200 - 0x40) / 8];
+	/* 0x200 */
+	u64 mrx_ctl[64];
+
+};
+
+#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x)
+
+struct hipz_qpedmm {
+	/* 0x00 */
+	u64 reserved0[(0x400) / 8];
+	/* 0x400 */
+	u64 qpedx_phh;
+	u64 qpedx_ppsgp;
+	/* 0x410 */
+	u64 qpedx_ppsgu;
+	u64 qpedx_ppdgp;
+	/* 0x420 */
+	u64 qpedx_ppdgu;
+	u64 qpedx_aph;
+	/* 0x430 */
+	u64 qpedx_apsgp;
+	u64 qpedx_apsgu;
+	/* 0x440 */
+	u64 qpedx_apdgp;
+	u64 qpedx_apdgu;
+	/* 0x450 */
+	u64 qpedx_apav;
+	u64 qpedx_apsav;
+	/* 0x460  */
+	u64 qpedx_hcr;
+	u64 reserved1[4];
+	/* 0x488 */
+	u64 qpedx_rrl0;
+	/* 0x490 */
+	u64 qpedx_rrrkey0;
+	u64 qpedx_rrva0;
+	/* 0x4a0 */
+	u64 reserved2;
+	u64 qpedx_rrl1;
+	/* 0x4b0 */
+	u64 qpedx_rrrkey1;
+	u64 qpedx_rrva1;
+	/* 0x4c0 */
+	u64 reserved3;
+	u64 qpedx_rrl2;
+	/* 0x4d0 */
+	u64 qpedx_rrrkey2;
+	u64 qpedx_rrva2;
+	/* 0x4e0 */
+	u64 reserved4;
+	u64 qpedx_rrl3;
+	/* 0x4f0 */
+	u64 qpedx_rrrkey3;
+	u64 qpedx_rrva3;
+};
+
+#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x)
+
+/* CQ Table Entry Memory Map */
+struct hipz_cqtemm {
+	u64 cqx_hcr;
+	u64 cqx_c;
+	u64 cqx_herr;
+	u64 cqx_aer;
+/* 0x20  */
+	u64 cqx_ptp;
+	u64 cqx_tp;
+	u64 cqx_fec;
+	u64 cqx_feca;
+/* 0x40  */
+	u64 cqx_ep;
+	u64 cqx_eq;
+/* 0x50  */
+	u64 reserved1;
+	u64 cqx_n0;
+/* 0x60  */
+	u64 cqx_n1;
+	u64 reserved2[(0x1000 - 0x60) / 8];
+/* 0x1000 */
+};
+
+#define CQX_FEC_CQE_CNT           EHCA_BMASK_IBM(32,63)
+#define CQX_FECADDER              EHCA_BMASK_IBM(32,63)
+#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0)
+#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0)
+
+#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x)
+
+/* EQ Table Entry Memory Map */
+struct hipz_eqtemm {
+	u64 eqx_hcr;
+	u64 eqx_c;
+
+	u64 eqx_herr;
+	u64 eqx_aer;
+/* 0x20 */
+	u64 eqx_ptp;
+	u64 eqx_tp;
+	u64 eqx_ssba;
+	u64 eqx_psba;
+
+/* 0x40 */
+	u64 eqx_cec;
+	u64 eqx_meql;
+	u64 eqx_xisbi;
+	u64 eqx_xisc;
+/* 0x60 */
+	u64 eqx_it;
+
+};
+
+#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x)
+
+/* access control defines for MR/MW */
+#define HIPZ_ACCESSCTRL_L_WRITE  0x00800000
+#define HIPZ_ACCESSCTRL_R_WRITE  0x00400000
+#define HIPZ_ACCESSCTRL_R_READ   0x00200000
+#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
+#define HIPZ_ACCESSCTRL_MW_BIND  0x00080000
+
+/* query hca response block */
+struct hipz_query_hca {
+	u32 cur_reliable_dg;
+	u32 cur_qp;
+	u32 cur_cq;
+	u32 cur_eq;
+	u32 cur_mr;
+	u32 cur_mw;
+	u32 cur_ee_context;
+	u32 cur_mcast_grp;
+	u32 cur_qp_attached_mcast_grp;
+	u32 reserved1;
+	u32 cur_ipv6_qp;
+	u32 cur_eth_qp;
+	u32 cur_hp_mr;
+	u32 reserved2[3];
+	u32 max_rd_domain;
+	u32 max_qp;
+	u32 max_cq;
+	u32 max_eq;
+	u32 max_mr;
+	u32 max_hp_mr;
+	u32 max_mw;
+	u32 max_mrwpte;
+	u32 max_special_mrwpte;
+	u32 max_rd_ee_context;
+	u32 max_mcast_grp;
+	u32 max_total_mcast_qp_attach;
+	u32 max_mcast_qp_attach;
+	u32 max_raw_ipv6_qp;
+	u32 max_raw_ethy_qp;
+	u32 internal_clock_frequency;
+	u32 max_pd;
+	u32 max_ah;
+	u32 max_cqe;
+	u32 max_wqes_wq;
+	u32 max_partitions;
+	u32 max_rr_ee_context;
+	u32 max_rr_qp;
+	u32 max_rr_hca;
+	u32 max_act_wqs_ee_context;
+	u32 max_act_wqs_qp;
+	u32 max_sge;
+	u32 max_sge_rd;
+	u32 memory_page_size_supported;
+	u64 max_mr_size;
+	u32 local_ca_ack_delay;
+	u32 num_ports;
+	u32 vendor_id;
+	u32 vendor_part_id;
+	u32 hw_ver;
+	u64 node_guid;
+	u64 hca_cap_indicators;
+	u32 data_counter_register_size;
+	u32 max_shared_rq;
+	u32 max_isns_eq;
+	u32 max_neq;
+} __attribute__ ((packed));
+
+/* query port response block */
+struct hipz_query_port {
+	u32 state;
+	u32 bad_pkey_cntr;
+	u32 lmc;
+	u32 lid;
+	u32 subnet_timeout;
+	u32 qkey_viol_cntr;
+	u32 sm_sl;
+	u32 sm_lid;
+	u32 capability_mask;
+	u32 init_type_reply;
+	u32 pkey_tbl_len;
+	u32 gid_tbl_len;
+	u64 gid_prefix;
+	u32 port_nr;
+	u16 pkey_entries[16];
+	u8  reserved1[32];
+	u32 trent_size;
+	u32 trbuf_size;
+	u64 max_msg_sz;
+	u32 max_mtu;
+	u32 vl_cap;
+	u8  reserved2[1900];
+	u64 guid_entries[255];
+} __attribute__ ((packed));
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
new file mode 100644
index 0000000000000..e028ff1588ccb
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -0,0 +1,149 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  internal queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "ipz_pt_fn.h"
+
+void *ipz_qpageit_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	queue->current_q_offset += queue->pagesize;
+	if (queue->current_q_offset > queue->queue_length) {
+		queue->current_q_offset -= queue->pagesize;
+		ret = NULL;
+	}
+	if (((u64)ret) % EHCA_PAGESIZE) {
+		ehca_gen_err("ERROR!! not at PAGE-Boundary");
+		return NULL;
+	}
+	return ret;
+}
+
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	u64 last_entry_in_q = queue->queue_length - queue->qe_size;
+
+	queue->current_q_offset += queue->qe_size;
+	if (queue->current_q_offset > last_entry_in_q) {
+		queue->current_q_offset = 0;
+		queue->toggle_state = (~queue->toggle_state) & 1;
+	}
+
+	return ret;
+}
+
+int ipz_queue_ctor(struct ipz_queue *queue,
+		   const u32 nr_of_pages,
+		   const u32 pagesize, const u32 qe_size, const u32 nr_of_sg)
+{
+	int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
+	int f;
+
+	if (pagesize > PAGE_SIZE) {
+		ehca_gen_err("FATAL ERROR: pagesize=%x is greater "
+			     "than kernel page size", pagesize);
+		return 0;
+	}
+	if (!pages_per_kpage) {
+		ehca_gen_err("FATAL ERROR: invalid kernel page size. "
+			     "pages_per_kpage=%x", pages_per_kpage);
+		return 0;
+	}
+	queue->queue_length = nr_of_pages * pagesize;
+	queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+	if (!queue->queue_pages) {
+		ehca_gen_err("ERROR!! didn't get the memory");
+		return 0;
+	}
+	memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
+	/*
+	 * allocate pages for queue:
+	 * outer loop allocates whole kernel pages (page aligned) and
+	 * inner loop divides a kernel page into smaller hca queue pages
+	 */
+	f = 0;
+	while (f < nr_of_pages) {
+		u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL);
+		int k;
+		if (!kpage)
+			goto ipz_queue_ctor_exit0; /*NOMEM*/
+		for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) {
+			(queue->queue_pages)[f] = (struct ipz_page *)kpage;
+			kpage += EHCA_PAGESIZE;
+			f++;
+		}
+	}
+
+	queue->current_q_offset = 0;
+	queue->qe_size = qe_size;
+	queue->act_nr_of_sg = nr_of_sg;
+	queue->pagesize = pagesize;
+	queue->toggle_state = 1;
+	return 1;
+
+ ipz_queue_ctor_exit0:
+	ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x",
+		     queue, f, nr_of_pages);
+	for (f = 0; f < nr_of_pages; f += pages_per_kpage) {
+		if (!(queue->queue_pages)[f])
+			break;
+		free_page((unsigned long)(queue->queue_pages)[f]);
+	}
+	return 0;
+}
+
+int ipz_queue_dtor(struct ipz_queue *queue)
+{
+	int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
+	int g;
+	int nr_pages;
+
+	if (!queue || !queue->queue_pages) {
+		ehca_gen_dbg("queue or queue_pages is NULL");
+		return 0;
+	}
+	nr_pages = queue->queue_length / queue->pagesize;
+	for (g = 0; g < nr_pages; g += pages_per_kpage)
+		free_page((unsigned long)(queue->queue_pages)[g]);
+	vfree(queue->queue_pages);
+
+	return 1;
+}
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
new file mode 100644
index 0000000000000..2f13509d5257a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -0,0 +1,247 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  internal queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IPZ_PT_FN_H__
+#define __IPZ_PT_FN_H__
+
+#define EHCA_PAGESHIFT   12
+#define EHCA_PAGESIZE   4096UL
+#define EHCA_PAGEMASK   (~(EHCA_PAGESIZE-1))
+#define EHCA_PT_ENTRIES 512UL
+
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+
+/* struct generic ehca page */
+struct ipz_page {
+	u8 entries[EHCA_PAGESIZE];
+};
+
+/* struct generic queue in linux kernel virtual memory (kv) */
+struct ipz_queue {
+	u64 current_q_offset;	/* current queue entry */
+
+	struct ipz_page **queue_pages;	/* array of pages belonging to queue */
+	u32 qe_size;		/* queue entry size */
+	u32 act_nr_of_sg;
+	u32 queue_length;	/* queue length allocated in bytes */
+	u32 pagesize;
+	u32 toggle_state;	/* toggle flag - per page */
+	u32 dummy3;		/* 64 bit alignment */
+};
+
+/*
+ * return current Queue Entry for a certain q_offset
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
+{
+	struct ipz_page *current_page;
+	if (q_offset >= queue->queue_length)
+		return NULL;
+	current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
+	return  &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
+}
+
+/*
+ * return current Queue Entry
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_get(struct ipz_queue *queue)
+{
+	return ipz_qeit_calc(queue, queue->current_q_offset);
+}
+
+/*
+ * return current Queue Page , increment Queue Page iterator from
+ * page to page in struct ipz_queue, last increment will return 0! and
+ * NOT wrap
+ * returns address (kv) of Queue Page
+ * warning don't use in parallel with ipz_QE_get_inc()
+ */
+void *ipz_qpageit_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	queue->current_q_offset += queue->qe_size;
+	if (queue->current_q_offset >= queue->queue_length) {
+		queue->current_q_offset = 0;
+		/* toggle the valid flag */
+		queue->toggle_state = (~queue->toggle_state) & 1;
+	}
+
+	return ret;
+}
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
+{
+	struct ehca_cqe *cqe = ipz_qeit_get(queue);
+	u32 cqe_flags = cqe->cqe_flags;
+
+	if ((cqe_flags >> 7) != (queue->toggle_state & 1))
+		return NULL;
+
+	ipz_qeit_get_inc(queue);
+	return cqe;
+}
+
+/*
+ * returns and resets Queue Entry iterator
+ * returns address (kv) of first Queue Entry
+ */
+static inline void *ipz_qeit_reset(struct ipz_queue *queue)
+{
+	queue->current_q_offset = 0;
+	return ipz_qeit_get(queue);
+}
+
+/* struct generic page table */
+struct ipz_pt {
+	u64 entries[EHCA_PT_ENTRIES];
+};
+
+/* struct page table for a queue, only to be used in pf */
+struct ipz_qpt {
+	/* queue page tables (kv), use u64 because we know the element length */
+	u64 *qpts;
+	u32 n_qpts;
+	u32 n_ptes;       /*  number of page table entries */
+	u64 *current_pte_addr;
+};
+
+/*
+ * constructor for a ipz_queue_t, placement new for ipz_queue_t,
+ * new for all dependent datastructors
+ * all QP Tables are the same
+ * flow:
+ *    allocate+pin queue
+ * see ipz_qpt_ctor()
+ * returns true if ok, false if out of memory
+ */
+int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages,
+		   const u32 pagesize, const u32 qe_size,
+		   const u32 nr_of_sg);
+
+/*
+ * destructor for a ipz_queue_t
+ *  -# free queue
+ *  see ipz_queue_ctor()
+ *  returns true if ok, false if queue was NULL-ptr of free failed
+ */
+int ipz_queue_dtor(struct ipz_queue *queue);
+
+/*
+ * constructor for a ipz_qpt_t,
+ * placement new for struct ipz_queue, new for all dependent datastructors
+ * all QP Tables are the same,
+ * flow:
+ * -# allocate+pin queue
+ * -# initialise ptcb
+ * -# allocate+pin PTs
+ * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
+ * -# the ring must have room for exactly nr_of_PTEs
+ * see ipz_qpt_ctor()
+ */
+void ipz_qpt_ctor(struct ipz_qpt *qpt,
+		  const u32 nr_of_qes,
+		  const u32 pagesize,
+		  const u32 qe_size,
+		  const u8 lowbyte, const u8 toggle,
+		  u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ * fix EQ page problems
+ */
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Event Queue Entry, increment Queue Entry iterator
+ * by one step in struct ipz_queue if valid, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_queue_QPageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	u32 qe = *(u8 *) ret;
+	if ((qe >> 7) != (queue->toggle_state & 1))
+		return NULL;
+	ipz_qeit_eq_get_inc(queue); /* this is a good one */
+	return ret;
+}
+
+/* returns address (GX) of first queue entry */
+static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
+{
+	return be64_to_cpu(qpt->qpts[0]);
+}
+
+/* returns address (kv) of first page of queue page table */
+static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
+{
+	return qpt->qpts;
+}
+
+#endif				/* __IPZ_PT_FN_H__ */
-- 
GitLab


From 64f817ba98095156149ba5991592d5d039f6da74 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralph.campbell@qlogic.com>
Date: Fri, 22 Sep 2006 15:22:24 -0700
Subject: [PATCH 0738/1063] IB/uverbs: Allow resize CQ operation to return
 driver-specific data

Add a ib_uverbs_resize_cq_resp.driver_data field so that low-level
drivers can return data from a resize CQ operation to userspace.  Have
ib_uverbs_resize_cq() only copy the cqe field, to avoid having to bump
the userspace ABI.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 3 +--
 include/rdma/ib_user_verbs.h         | 2 ++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 8b6df7cec0bf9..deae43f31e79b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -879,11 +879,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 	if (ret)
 		goto out;
 
-	memset(&resp, 0, sizeof resp);
 	resp.cqe = cq->cqe;
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
-			 &resp, sizeof resp))
+			 &resp, sizeof resp.cqe))
 		ret = -EFAULT;
 
 out:
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index 7b5372010f4b0..db1b814b62cca 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -275,6 +275,8 @@ struct ib_uverbs_resize_cq {
 
 struct ib_uverbs_resize_cq_resp {
 	__u32 cqe;
+	__u32 reserved;
+	__u64 driver_data[0];
 };
 
 struct ib_uverbs_poll_cq {
-- 
GitLab


From 9bc57e2d19db4da81c1150120658cc3658a99ed4 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralphc@pathscale.com>
Date: Fri, 11 Aug 2006 14:58:09 -0700
Subject: [PATCH 0739/1063] IB/uverbs: Pass userspace data to modify_srq and
 modify_qp methods

Pass a struct ib_udata to the low-level driver's ->modify_srq() and
->modify_qp() methods, so that it can get to the device-specific data
passed in by the userspace driver.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/uverbs_cmd.c      | 13 ++++++++++---
 drivers/infiniband/core/verbs.c           |  4 ++--
 drivers/infiniband/hw/ehca/ehca_iverbs.h  |  3 ++-
 drivers/infiniband/hw/ehca/ehca_qp.c      |  3 ++-
 drivers/infiniband/hw/ipath/ipath_qp.c    |  3 ++-
 drivers/infiniband/hw/ipath/ipath_srq.c   |  4 +++-
 drivers/infiniband/hw/ipath/ipath_verbs.h |  5 +++--
 drivers/infiniband/hw/mthca/mthca_dev.h   |  5 +++--
 drivers/infiniband/hw/mthca/mthca_qp.c    |  3 ++-
 drivers/infiniband/hw/mthca/mthca_srq.c   |  2 +-
 include/rdma/ib_verbs.h                   |  6 ++++--
 11 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index deae43f31e79b..3fcb5d189a234 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -841,7 +841,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 err_copy:
 	idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
 
-
 err_free:
 	ib_destroy_cq(cq);
 
@@ -1273,6 +1272,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 			    int out_len)
 {
 	struct ib_uverbs_modify_qp cmd;
+	struct ib_udata            udata;
 	struct ib_qp              *qp;
 	struct ib_qp_attr         *attr;
 	int                        ret;
@@ -1280,6 +1280,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+		   out_len);
+
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
 	if (!attr)
 		return -ENOMEM;
@@ -1336,7 +1339,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 	attr->alt_ah_attr.ah_flags 	    = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
 	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num;
 
-	ret = ib_modify_qp(qp, attr, cmd.attr_mask);
+	ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
 
 	put_qp_read(qp);
 
@@ -2054,6 +2057,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 			     int out_len)
 {
 	struct ib_uverbs_modify_srq cmd;
+	struct ib_udata             udata;
 	struct ib_srq              *srq;
 	struct ib_srq_attr          attr;
 	int                         ret;
@@ -2061,6 +2065,9 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+		   out_len);
+
 	srq = idr_read_srq(cmd.srq_handle, file->ucontext);
 	if (!srq)
 		return -EINVAL;
@@ -2068,7 +2075,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 	attr.max_wr    = cmd.max_wr;
 	attr.srq_limit = cmd.srq_limit;
 
-	ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
+	ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
 
 	put_srq_read(srq);
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 468999c388033..06f98e9e14f98 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -231,7 +231,7 @@ int ib_modify_srq(struct ib_srq *srq,
 		  struct ib_srq_attr *srq_attr,
 		  enum ib_srq_attr_mask srq_attr_mask)
 {
-	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask);
+	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_srq);
 
@@ -547,7 +547,7 @@ int ib_modify_qp(struct ib_qp *qp,
 		 struct ib_qp_attr *qp_attr,
 		 int qp_attr_mask)
 {
-	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask);
+	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_qp);
 
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index bbdc437f51673..319c39d47f3a9 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -143,7 +143,8 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
 
 int ehca_destroy_qp(struct ib_qp *qp);
 
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		   struct ib_udata *udata);
 
 int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 4b27bedc6c244..4394123cdbd74 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -1230,7 +1230,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 	return ret;
 }
 
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		   struct ib_udata *udata)
 {
 	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
 	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 83e557be591ed..44c32d2db9909 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -426,11 +426,12 @@ void ipath_error_qp(struct ipath_qp *qp)
  * @ibqp: the queue pair who's attributes we're modifying
  * @attr: the new attributes
  * @attr_mask: the mask of attributes to modify
+ * @udata: user data for ipathverbs.so
  *
  * Returns 0 on success, otherwise returns an errno.
  */
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask)
+		    int attr_mask, struct ib_udata *udata)
 {
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
 	struct ipath_qp *qp = to_iqp(ibqp);
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index f760434660bd1..fa77da6667edb 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -188,9 +188,11 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
  * @ibsrq: the SRQ to modify
  * @attr: the new attributes of the SRQ
  * @attr_mask: indicates which attributes to modify
+ * @udata: user data for ipathverbs.so
  */
 int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask)
+		     enum ib_srq_attr_mask attr_mask,
+		     struct ib_udata *udata)
 {
 	struct ipath_srq *srq = to_isrq(ibsrq);
 	unsigned long flags;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 2df684727dc1b..698396778f001 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -579,7 +579,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 int ipath_destroy_qp(struct ib_qp *ibqp);
 
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask);
+		    int attr_mask, struct ib_udata *udata);
 
 int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		   int attr_mask, struct ib_qp_init_attr *init_attr);
@@ -638,7 +638,8 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
 				struct ib_udata *udata);
 
 int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask);
+		     enum ib_srq_attr_mask attr_mask,
+		     struct ib_udata *udata);
 
 int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
 
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index f8160b8de0908..33bd0b8bfd130 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -506,7 +506,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
 		    struct ib_srq_attr *attr, struct mthca_srq *srq);
 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask);
+		     enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int mthca_max_srq_sge(struct mthca_dev *dev);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
@@ -521,7 +521,8 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
 		    enum ib_event_type event_type);
 int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
 		   struct ib_qp_init_attr *qp_init_attr);
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		    struct ib_udata *udata);
 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			  struct ib_send_wr **bad_wr);
 int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 2e8f6f36e0a5b..6d6ba4180a394 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -527,7 +527,8 @@ static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
 	return 0;
 }
 
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		    struct ib_udata *udata)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index b60a9d79ae54a..0f316c87bf642 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -358,7 +358,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
 }
 
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask)
+		     enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index ee1f3a355666d..61eed3996117a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -888,7 +888,8 @@ struct ib_device {
 						 struct ib_udata *udata);
 	int                        (*modify_srq)(struct ib_srq *srq,
 						 struct ib_srq_attr *srq_attr,
-						 enum ib_srq_attr_mask srq_attr_mask);
+						 enum ib_srq_attr_mask srq_attr_mask,
+						 struct ib_udata *udata);
 	int                        (*query_srq)(struct ib_srq *srq,
 						struct ib_srq_attr *srq_attr);
 	int                        (*destroy_srq)(struct ib_srq *srq);
@@ -900,7 +901,8 @@ struct ib_device {
 						struct ib_udata *udata);
 	int                        (*modify_qp)(struct ib_qp *qp,
 						struct ib_qp_attr *qp_attr,
-						int qp_attr_mask);
+						int qp_attr_mask,
+						struct ib_udata *udata);
 	int                        (*query_qp)(struct ib_qp *qp,
 					       struct ib_qp_attr *qp_attr,
 					       int qp_attr_mask,
-- 
GitLab


From 373d9915803aebbbf7fd3841efd9dac31c32e148 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralph.campbell@qlogic.com>
Date: Fri, 22 Sep 2006 15:22:26 -0700
Subject: [PATCH 0740/1063] IB/ipath: Performance improvements via mmap of
 queues

Improve performance of userspace post receive, post SRQ receive, and
poll CQ operations for ipath by allowing userspace to directly mmap()
receive queues and completion queues.  This eliminates the copying
between userspace and the kernel in the data path.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/Makefile      |   1 +
 drivers/infiniband/hw/ipath/ipath_cq.c    | 176 ++++++++++++----
 drivers/infiniband/hw/ipath/ipath_mmap.c  | 122 +++++++++++
 drivers/infiniband/hw/ipath/ipath_qp.c    | 156 ++++++++++----
 drivers/infiniband/hw/ipath/ipath_ruc.c   | 138 +++++++++----
 drivers/infiniband/hw/ipath/ipath_srq.c   | 240 ++++++++++++++--------
 drivers/infiniband/hw/ipath/ipath_ud.c    | 169 +++++++++------
 drivers/infiniband/hw/ipath/ipath_verbs.c |  50 ++---
 drivers/infiniband/hw/ipath/ipath_verbs.h | 115 ++++++-----
 9 files changed, 785 insertions(+), 382 deletions(-)
 create mode 100644 drivers/infiniband/hw/ipath/ipath_mmap.c

diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index b0bf72864130e..6bb43474d1047 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -25,6 +25,7 @@ ib_ipath-y := \
 	ipath_cq.o \
 	ipath_keys.o \
 	ipath_mad.o \
+	ipath_mmap.o \
 	ipath_mr.o \
 	ipath_qp.o \
 	ipath_rc.o \
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 3efee341c9bcd..3c4c198a4514a 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -42,20 +42,28 @@
  * @entry: work completion entry to add
  * @sig: true if @entry is a solicitated entry
  *
- * This may be called with one of the qp->s_lock or qp->r_rq.lock held.
+ * This may be called with qp->s_lock held.
  */
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 {
+	struct ipath_cq_wc *wc = cq->queue;
 	unsigned long flags;
+	u32 head;
 	u32 next;
 
 	spin_lock_irqsave(&cq->lock, flags);
 
-	if (cq->head == cq->ibcq.cqe)
+	/*
+	 * Note that the head pointer might be writable by user processes.
+	 * Take care to verify it is a sane value.
+	 */
+	head = wc->head;
+	if (head >= (unsigned) cq->ibcq.cqe) {
+		head = cq->ibcq.cqe;
 		next = 0;
-	else
-		next = cq->head + 1;
-	if (unlikely(next == cq->tail)) {
+	} else
+		next = head + 1;
+	if (unlikely(next == wc->tail)) {
 		spin_unlock_irqrestore(&cq->lock, flags);
 		if (cq->ibcq.event_handler) {
 			struct ib_event ev;
@@ -67,8 +75,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 		}
 		return;
 	}
-	cq->queue[cq->head] = *entry;
-	cq->head = next;
+	wc->queue[head] = *entry;
+	wc->head = next;
 
 	if (cq->notify == IB_CQ_NEXT_COMP ||
 	    (cq->notify == IB_CQ_SOLICITED && solicited)) {
@@ -101,19 +109,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
+	struct ipath_cq_wc *wc = cq->queue;
 	unsigned long flags;
 	int npolled;
 
 	spin_lock_irqsave(&cq->lock, flags);
 
 	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (cq->tail == cq->head)
+		if (wc->tail == wc->head)
 			break;
-		*entry = cq->queue[cq->tail];
-		if (cq->tail == cq->ibcq.cqe)
-			cq->tail = 0;
+		*entry = wc->queue[wc->tail];
+		if (wc->tail >= cq->ibcq.cqe)
+			wc->tail = 0;
 		else
-			cq->tail++;
+			wc->tail++;
 	}
 
 	spin_unlock_irqrestore(&cq->lock, flags);
@@ -160,38 +169,74 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
 {
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	struct ipath_cq *cq;
-	struct ib_wc *wc;
+	struct ipath_cq_wc *wc;
 	struct ib_cq *ret;
 
 	if (entries > ib_ipath_max_cqes) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}
 
 	if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
-	/*
-	 * Need to use vmalloc() if we want to support large #s of
-	 * entries.
-	 */
+	/* Allocate the completion queue structure. */
 	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
 	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
+	 * Allocate the completion queue entries and head/tail pointers.
+	 * This is allocated separately so that it can be resized and
+	 * also mapped into user space.
+	 * We need to use vmalloc() in order to support mmap and large
+	 * numbers of entries.
 	 */
-	wc = vmalloc(sizeof(*wc) * (entries + 1));
+	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
 	if (!wc) {
-		kfree(cq);
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto bail_cq;
 	}
+
+	/*
+	 * Return the address of the WC as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) wc;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_wc;
+		}
+
+		/* Allocate info for ipath_mmap(). */
+		ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+		if (!ip) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_wc;
+		}
+		cq->ip = ip;
+		ip->context = context;
+		ip->obj = wc;
+		kref_init(&ip->ref);
+		ip->mmap_cnt = 0;
+		ip->size = PAGE_ALIGN(sizeof(*wc) +
+				      sizeof(struct ib_wc) * entries);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	} else
+		cq->ip = NULL;
+
 	/*
 	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
 	 * The number of entries should be >= the number requested or return
@@ -202,15 +247,22 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
 	cq->triggered = 0;
 	spin_lock_init(&cq->lock);
 	tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
-	cq->head = 0;
-	cq->tail = 0;
+	wc->head = 0;
+	wc->tail = 0;
 	cq->queue = wc;
 
 	ret = &cq->ibcq;
 
 	dev->n_cqs_allocated++;
+	goto done;
 
-bail:
+bail_wc:
+	vfree(wc);
+
+bail_cq:
+	kfree(cq);
+
+done:
 	return ret;
 }
 
@@ -229,7 +281,10 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
 
 	tasklet_kill(&cq->comptask);
 	dev->n_cqs_allocated--;
-	vfree(cq->queue);
+	if (cq->ip)
+		kref_put(&cq->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(cq->queue);
 	kfree(cq);
 
 	return 0;
@@ -253,7 +308,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
 	spin_lock_irqsave(&cq->lock, flags);
 	/*
 	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-	 * any other transitions.
+	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
 	 */
 	if (cq->notify != IB_CQ_NEXT_COMP)
 		cq->notify = notify;
@@ -264,46 +319,81 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
 int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
-	struct ib_wc *wc, *old_wc;
-	u32 n;
+	struct ipath_cq_wc *old_wc = cq->queue;
+	struct ipath_cq_wc *wc;
+	u32 head, tail, n;
 	int ret;
 
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
-	wc = vmalloc(sizeof(*wc) * (cqe + 1));
+	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
 	if (!wc) {
 		ret = -ENOMEM;
 		goto bail;
 	}
 
+	/*
+	 * Return the address of the WC as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		__u64 offset = (__u64) wc;
+
+		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (ret)
+			goto bail;
+	}
+
 	spin_lock_irq(&cq->lock);
-	if (cq->head < cq->tail)
-		n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
+	/*
+	 * Make sure head and tail are sane since they
+	 * might be user writable.
+	 */
+	head = old_wc->head;
+	if (head > (u32) cq->ibcq.cqe)
+		head = (u32) cq->ibcq.cqe;
+	tail = old_wc->tail;
+	if (tail > (u32) cq->ibcq.cqe)
+		tail = (u32) cq->ibcq.cqe;
+	if (head < tail)
+		n = cq->ibcq.cqe + 1 + head - tail;
 	else
-		n = cq->head - cq->tail;
+		n = head - tail;
 	if (unlikely((u32)cqe < n)) {
 		spin_unlock_irq(&cq->lock);
 		vfree(wc);
 		ret = -EOVERFLOW;
 		goto bail;
 	}
-	for (n = 0; cq->tail != cq->head; n++) {
-		wc[n] = cq->queue[cq->tail];
-		if (cq->tail == cq->ibcq.cqe)
-			cq->tail = 0;
+	for (n = 0; tail != head; n++) {
+		wc->queue[n] = old_wc->queue[tail];
+		if (tail == (u32) cq->ibcq.cqe)
+			tail = 0;
 		else
-			cq->tail++;
+			tail++;
 	}
 	cq->ibcq.cqe = cqe;
-	cq->head = n;
-	cq->tail = 0;
-	old_wc = cq->queue;
+	wc->head = n;
+	wc->tail = 0;
 	cq->queue = wc;
 	spin_unlock_irq(&cq->lock);
 
 	vfree(old_wc);
 
+	if (cq->ip) {
+		struct ipath_ibdev *dev = to_idev(ibcq->device);
+		struct ipath_mmap_info *ip = cq->ip;
+
+		ip->obj = wc;
+		ip->size = PAGE_ALIGN(sizeof(*wc) +
+				      sizeof(struct ib_wc) * cqe);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	}
+
 	ret = 0;
 
 bail:
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
new file mode 100644
index 0000000000000..11b7378ff2145
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <asm/pgtable.h>
+
+#include "ipath_verbs.h"
+
+/**
+ * ipath_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct ipath_mmap_info
+ */
+void ipath_release_mmap_info(struct kref *ref)
+{
+	struct ipath_mmap_info *ip =
+		container_of(ref, struct ipath_mmap_info, ref);
+
+	vfree(ip->obj);
+	kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the CQ is mapped,
+ * to avoid releasing it.
+ */
+static void ipath_vma_open(struct vm_area_struct *vma)
+{
+	struct ipath_mmap_info *ip = vma->vm_private_data;
+
+	kref_get(&ip->ref);
+	ip->mmap_cnt++;
+}
+
+static void ipath_vma_close(struct vm_area_struct *vma)
+{
+	struct ipath_mmap_info *ip = vma->vm_private_data;
+
+	ip->mmap_cnt--;
+	kref_put(&ip->ref, ipath_release_mmap_info);
+}
+
+static struct vm_operations_struct ipath_vm_ops = {
+	.open =     ipath_vma_open,
+	.close =    ipath_vma_close,
+};
+
+/**
+ * ipath_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	struct ipath_ibdev *dev = to_idev(context->device);
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long size = vma->vm_end - vma->vm_start;
+	struct ipath_mmap_info *ip, **pp;
+	int ret = -EINVAL;
+
+	/*
+	 * Search the device's list of objects waiting for a mmap call.
+	 * Normally, this list is very short since a call to create a
+	 * CQ, QP, or SRQ is soon followed by a call to mmap().
+	 */
+	spin_lock_irq(&dev->pending_lock);
+	for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
+		/* Only the creator is allowed to mmap the object */
+		if (context != ip->context || (void *) offset != ip->obj)
+			continue;
+		/* Don't allow a mmap larger than the object. */
+		if (size > ip->size)
+			break;
+
+		*pp = ip->next;
+		spin_unlock_irq(&dev->pending_lock);
+
+		ret = remap_vmalloc_range(vma, ip->obj, 0);
+		if (ret)
+			goto done;
+		vma->vm_ops = &ipath_vm_ops;
+		vma->vm_private_data = ip;
+		ipath_vma_open(vma);
+		goto done;
+	}
+	spin_unlock_irq(&dev->pending_lock);
+done:
+	return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 44c32d2db9909..1ccfc909db1e7 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -35,7 +35,7 @@
 #include <linux/vmalloc.h>
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 #define BITS_PER_PAGE		(PAGE_SIZE*BITS_PER_BYTE)
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
@@ -44,19 +44,6 @@
 #define find_next_offset(map, off) find_next_zero_bit((map)->page, \
 						      BITS_PER_PAGE, off)
 
-#define TRANS_INVALID	0
-#define TRANS_ANY2RST	1
-#define TRANS_RST2INIT	2
-#define TRANS_INIT2INIT	3
-#define TRANS_INIT2RTR	4
-#define TRANS_RTR2RTS	5
-#define TRANS_RTS2RTS	6
-#define TRANS_SQERR2RTS	7
-#define TRANS_ANY2ERR	8
-#define TRANS_RTS2SQD	9  /* XXX Wait for expected ACKs & signal event */
-#define TRANS_SQD2SQD	10 /* error if not drained & parameter change */
-#define TRANS_SQD2RTS	11 /* error if not drained */
-
 /*
  * Convert the AETH credit code into the number of credits.
  */
@@ -355,8 +342,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
 	qp->s_last = 0;
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
-	qp->r_rq.head = 0;
-	qp->r_rq.tail = 0;
+	if (qp->r_rq.wq) {
+		qp->r_rq.wq->head = 0;
+		qp->r_rq.wq->tail = 0;
+	}
 	qp->r_reuse_sge = 0;
 }
 
@@ -410,15 +399,32 @@ void ipath_error_qp(struct ipath_qp *qp)
 	qp->s_hdrwords = 0;
 	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 
-	wc.opcode = IB_WC_RECV;
-	spin_lock(&qp->r_rq.lock);
-	while (qp->r_rq.tail != qp->r_rq.head) {
-		wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
-		if (++qp->r_rq.tail >= qp->r_rq.size)
-			qp->r_rq.tail = 0;
-		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	if (qp->r_rq.wq) {
+		struct ipath_rwq *wq;
+		u32 head;
+		u32 tail;
+
+		spin_lock(&qp->r_rq.lock);
+
+		/* sanity check pointers before trusting them */
+		wq = qp->r_rq.wq;
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
+		wc.opcode = IB_WC_RECV;
+		while (tail != head) {
+			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+			if (++tail >= qp->r_rq.size)
+				tail = 0;
+			ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+		}
+		wq->tail = tail;
+
+		spin_unlock(&qp->r_rq.lock);
 	}
-	spin_unlock(&qp->r_rq.lock);
 }
 
 /**
@@ -544,7 +550,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	attr->dest_qp_num = qp->remote_qpn;
 	attr->qp_access_flags = qp->qp_access_flags;
 	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->r_rq.size - 1;
+	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
 	attr->cap.max_send_sge = qp->s_max_sge;
 	attr->cap.max_recv_sge = qp->r_rq.max_sge;
 	attr->cap.max_inline_data = 0;
@@ -597,13 +603,23 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp)
 	} else {
 		u32 min, max, x;
 		u32 credits;
-
+		struct ipath_rwq *wq = qp->r_rq.wq;
+		u32 head;
+		u32 tail;
+
+		/* sanity check pointers before trusting them */
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
 		/*
 		 * Compute the number of credits available (RWQEs).
 		 * XXX Not holding the r_rq.lock here so there is a small
 		 * chance that the pair of reads are not atomic.
 		 */
-		credits = qp->r_rq.head - qp->r_rq.tail;
+		credits = head - tail;
 		if ((int)credits < 0)
 			credits += qp->r_rq.size;
 		/*
@@ -680,27 +696,37 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 	case IB_QPT_UD:
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
-		qp = kmalloc(sizeof(*qp), GFP_KERNEL);
+		sz = sizeof(*qp);
+		if (init_attr->srq) {
+			struct ipath_srq *srq = to_isrq(init_attr->srq);
+
+			sz += sizeof(*qp->r_sg_list) *
+				srq->rq.max_sge;
+		} else
+			sz += sizeof(*qp->r_sg_list) *
+				init_attr->cap.max_recv_sge;
+		qp = kmalloc(sz, GFP_KERNEL);
 		if (!qp) {
-			vfree(swq);
 			ret = ERR_PTR(-ENOMEM);
-			goto bail;
+			goto bail_swq;
 		}
 		if (init_attr->srq) {
+			sz = 0;
 			qp->r_rq.size = 0;
 			qp->r_rq.max_sge = 0;
 			qp->r_rq.wq = NULL;
+			init_attr->cap.max_recv_wr = 0;
+			init_attr->cap.max_recv_sge = 0;
 		} else {
 			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
 			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) +
+			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
 				sizeof(struct ipath_rwqe);
-			qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
+			qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
+					      qp->r_rq.size * sz);
 			if (!qp->r_rq.wq) {
-				kfree(qp);
-				vfree(swq);
 				ret = ERR_PTR(-ENOMEM);
-				goto bail;
+				goto bail_qp;
 			}
 		}
 
@@ -726,12 +752,10 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 		err = ipath_alloc_qpn(&dev->qp_table, qp,
 				      init_attr->qp_type);
 		if (err) {
-			vfree(swq);
-			vfree(qp->r_rq.wq);
-			kfree(qp);
 			ret = ERR_PTR(err);
-			goto bail;
+			goto bail_rwq;
 		}
+		qp->ip = NULL;
 		ipath_reset_qp(qp);
 
 		/* Tell the core driver that the kernel SMA is present. */
@@ -748,8 +772,51 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 
 	init_attr->cap.max_inline_data = 0;
 
+	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) qp->r_rq.wq;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_rwq;
+		}
+
+		if (qp->r_rq.wq) {
+			/* Allocate info for ipath_mmap(). */
+			ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+			if (!ip) {
+				ret = ERR_PTR(-ENOMEM);
+				goto bail_rwq;
+			}
+			qp->ip = ip;
+			ip->context = ibpd->uobject->context;
+			ip->obj = qp->r_rq.wq;
+			kref_init(&ip->ref);
+			ip->mmap_cnt = 0;
+			ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+					      qp->r_rq.size * sz);
+			spin_lock_irq(&dev->pending_lock);
+			ip->next = dev->pending_mmaps;
+			dev->pending_mmaps = ip;
+			spin_unlock_irq(&dev->pending_lock);
+		}
+	}
+
 	ret = &qp->ibqp;
+	goto bail;
 
+bail_rwq:
+	vfree(qp->r_rq.wq);
+bail_qp:
+	kfree(qp);
+bail_swq:
+	vfree(swq);
 bail:
 	return ret;
 }
@@ -773,11 +840,9 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 	if (qp->ibqp.qp_type == IB_QPT_SMI)
 		ipath_layer_set_verbs_flags(dev->dd, 0);
 
-	spin_lock_irqsave(&qp->r_rq.lock, flags);
-	spin_lock(&qp->s_lock);
+	spin_lock_irqsave(&qp->s_lock, flags);
 	qp->state = IB_QPS_ERR;
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 
 	/* Stop the sending tasklet. */
 	tasklet_kill(&qp->s_task);
@@ -798,8 +863,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 	if (atomic_read(&qp->refcount) != 0)
 		ipath_free_qp(&dev->qp_table, qp);
 
+	if (qp->ip)
+		kref_put(&qp->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(qp->r_rq.wq);
 	vfree(qp->s_wq);
-	vfree(qp->r_rq.wq);
 	kfree(qp);
 	return 0;
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 772bc59fb85c3..dd09420d677d8 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /*
  * Convert the AETH RNR timeout code into the number of milliseconds.
@@ -106,6 +106,54 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 }
 
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
+{
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	int user = to_ipd(qp->ibqp.pd)->user;
+	int i, j, ret;
+	struct ib_wc wc;
+
+	qp->r_len = 0;
+	for (i = j = 0; i < wqe->num_sge; i++) {
+		if (wqe->sg_list[i].length == 0)
+			continue;
+		/* Check LKEY */
+		if ((user && wqe->sg_list[i].lkey == 0) ||
+		    !ipath_lkey_ok(&dev->lk_table,
+				   &qp->r_sg_list[j], &wqe->sg_list[i],
+				   IB_ACCESS_LOCAL_WRITE))
+			goto bad_lkey;
+		qp->r_len += wqe->sg_list[i].length;
+		j++;
+	}
+	qp->r_sge.sge = qp->r_sg_list[0];
+	qp->r_sge.sg_list = qp->r_sg_list + 1;
+	qp->r_sge.num_sge = j;
+	ret = 1;
+	goto bail;
+
+bad_lkey:
+	wc.wr_id = wqe->wr_id;
+	wc.status = IB_WC_LOC_PROT_ERR;
+	wc.opcode = IB_WC_RECV;
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp_num = qp->ibqp.qp_num;
+	wc.src_qp = 0;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = 0;
+	wc.sl = 0;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	/* Signal solicited completion event. */
+	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	ret = 0;
+bail:
+	return ret;
+}
+
 /**
  * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
  * @qp: the QP
@@ -119,71 +167,71 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 {
 	unsigned long flags;
 	struct ipath_rq *rq;
+	struct ipath_rwq *wq;
 	struct ipath_srq *srq;
 	struct ipath_rwqe *wqe;
-	int ret = 1;
+	void (*handler)(struct ib_event *, void *);
+	u32 tail;
+	int ret;
 
-	if (!qp->ibqp.srq) {
+	if (qp->ibqp.srq) {
+		srq = to_isrq(qp->ibqp.srq);
+		handler = srq->ibsrq.event_handler;
+		rq = &srq->rq;
+	} else {
+		srq = NULL;
+		handler = NULL;
 		rq = &qp->r_rq;
-		spin_lock_irqsave(&rq->lock, flags);
-
-		if (unlikely(rq->tail == rq->head)) {
-			ret = 0;
-			goto done;
-		}
-		wqe = get_rwqe_ptr(rq, rq->tail);
-		qp->r_wr_id = wqe->wr_id;
-		if (!wr_id_only) {
-			qp->r_sge.sge = wqe->sg_list[0];
-			qp->r_sge.sg_list = wqe->sg_list + 1;
-			qp->r_sge.num_sge = wqe->num_sge;
-			qp->r_len = wqe->length;
-		}
-		if (++rq->tail >= rq->size)
-			rq->tail = 0;
-		goto done;
 	}
 
-	srq = to_isrq(qp->ibqp.srq);
-	rq = &srq->rq;
 	spin_lock_irqsave(&rq->lock, flags);
-
-	if (unlikely(rq->tail == rq->head)) {
-		ret = 0;
-		goto done;
-	}
-	wqe = get_rwqe_ptr(rq, rq->tail);
+	wq = rq->wq;
+	tail = wq->tail;
+	/* Validate tail before using it since it is user writable. */
+	if (tail >= rq->size)
+		tail = 0;
+	do {
+		if (unlikely(tail == wq->head)) {
+			spin_unlock_irqrestore(&rq->lock, flags);
+			ret = 0;
+			goto bail;
+		}
+		wqe = get_rwqe_ptr(rq, tail);
+		if (++tail >= rq->size)
+			tail = 0;
+	} while (!wr_id_only && !init_sge(qp, wqe));
 	qp->r_wr_id = wqe->wr_id;
-	if (!wr_id_only) {
-		qp->r_sge.sge = wqe->sg_list[0];
-		qp->r_sge.sg_list = wqe->sg_list + 1;
-		qp->r_sge.num_sge = wqe->num_sge;
-		qp->r_len = wqe->length;
-	}
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq->ibsrq.event_handler) {
-		struct ib_event ev;
+	wq->tail = tail;
+
+	ret = 1;
+	if (handler) {
 		u32 n;
 
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		n = wq->head;
+		if (n >= rq->size)
+			n = 0;
+		if (n < tail)
+			n += rq->size - tail;
 		else
-			n = rq->head - rq->tail;
+			n -= tail;
 		if (n < srq->limit) {
+			struct ib_event ev;
+
 			srq->limit = 0;
 			spin_unlock_irqrestore(&rq->lock, flags);
 			ev.device = qp->ibqp.device;
 			ev.element.srq = qp->ibqp.srq;
 			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
+			handler(&ev, srq->ibsrq.srq_context);
 			goto bail;
 		}
 	}
-
-done:
 	spin_unlock_irqrestore(&rq->lock, flags);
+
 bail:
 	return ret;
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index fa77da6667edb..941e866d9517b 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -48,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 			   struct ib_recv_wr **bad_wr)
 {
 	struct ipath_srq *srq = to_isrq(ibsrq);
-	struct ipath_ibdev *dev = to_idev(ibsrq->device);
+	struct ipath_rwq *wq;
 	unsigned long flags;
 	int ret;
 
 	for (; wr; wr = wr->next) {
 		struct ipath_rwqe *wqe;
 		u32 next;
-		int i, j;
+		int i;
 
-		if (wr->num_sge > srq->rq.max_sge) {
+		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
 		spin_lock_irqsave(&srq->rq.lock, flags);
-		next = srq->rq.head + 1;
+		wq = srq->rq.wq;
+		next = wq->head + 1;
 		if (next >= srq->rq.size)
 			next = 0;
-		if (next == srq->rq.tail) {
+		if (next == wq->tail) {
 			spin_unlock_irqrestore(&srq->rq.lock, flags);
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
+		wqe = get_rwqe_ptr(&srq->rq, wq->head);
 		wqe->wr_id = wr->wr_id;
-		wqe->sg_list[0].mr = NULL;
-		wqe->sg_list[0].vaddr = NULL;
-		wqe->sg_list[0].length = 0;
-		wqe->sg_list[0].sge_length = 0;
-		wqe->length = 0;
-		for (i = 0, j = 0; i < wr->num_sge; i++) {
-			/* Check LKEY */
-			if (to_ipd(srq->ibsrq.pd)->user &&
-			    wr->sg_list[i].lkey == 0) {
-				spin_unlock_irqrestore(&srq->rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			if (wr->sg_list[i].length == 0)
-				continue;
-			if (!ipath_lkey_ok(&dev->lk_table,
-					   &wqe->sg_list[j],
-					   &wr->sg_list[i],
-					   IB_ACCESS_LOCAL_WRITE)) {
-				spin_unlock_irqrestore(&srq->rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			wqe->length += wr->sg_list[i].length;
-			j++;
-		}
-		wqe->num_sge = j;
-		srq->rq.head = next;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		wq->head = next;
 		spin_unlock_irqrestore(&srq->rq.lock, flags);
 	}
 	ret = 0;
@@ -133,53 +106,95 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
 
 	if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
 	if (srq_init_attr->attr.max_wr == 0) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}
 
 	if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
 	    (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}
 
 	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
 	if (!srq) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
 	srq->rq.size = srq_init_attr->attr.max_wr + 1;
-	sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge +
+	srq->rq.max_sge = srq_init_attr->attr.max_sge;
+	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
 		sizeof(struct ipath_rwqe);
-	srq->rq.wq = vmalloc(srq->rq.size * sz);
+	srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
 	if (!srq->rq.wq) {
-		kfree(srq);
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto bail_srq;
 	}
 
+	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) srq->rq.wq;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_wq;
+		}
+
+		/* Allocate info for ipath_mmap(). */
+		ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+		if (!ip) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_wq;
+		}
+		srq->ip = ip;
+		ip->context = ibpd->uobject->context;
+		ip->obj = srq->rq.wq;
+		kref_init(&ip->ref);
+		ip->mmap_cnt = 0;
+		ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+				      srq->rq.size * sz);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	} else
+		srq->ip = NULL;
+
 	/*
 	 * ib_create_srq() will initialize srq->ibsrq.
 	 */
 	spin_lock_init(&srq->rq.lock);
-	srq->rq.head = 0;
-	srq->rq.tail = 0;
+	srq->rq.wq->head = 0;
+	srq->rq.wq->tail = 0;
 	srq->rq.max_sge = srq_init_attr->attr.max_sge;
 	srq->limit = srq_init_attr->attr.srq_limit;
 
+	dev->n_srqs_allocated++;
+
 	ret = &srq->ibsrq;
+	goto done;
 
-	dev->n_srqs_allocated++;
+bail_wq:
+	vfree(srq->rq.wq);
 
-bail:
+bail_srq:
+	kfree(srq);
+
+done:
 	return ret;
 }
 
@@ -195,78 +210,123 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		     struct ib_udata *udata)
 {
 	struct ipath_srq *srq = to_isrq(ibsrq);
-	unsigned long flags;
-	int ret;
+	int ret = 0;
 
-	if (attr_mask & IB_SRQ_MAX_WR)
-		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
-		    (attr->max_sge > srq->rq.max_sge)) {
-			ret = -EINVAL;
-			goto bail;
-		}
+	if (attr_mask & IB_SRQ_MAX_WR) {
+		struct ipath_rwq *owq;
+		struct ipath_rwq *wq;
+		struct ipath_rwqe *p;
+		u32 sz, size, n, head, tail;
 
-	if (attr_mask & IB_SRQ_LIMIT)
-		if (attr->srq_limit >= srq->rq.size) {
+		/* Check that the requested sizes are below the limits. */
+		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
+		    ((attr_mask & IB_SRQ_LIMIT) ?
+		     attr->srq_limit : srq->limit) > attr->max_wr) {
 			ret = -EINVAL;
 			goto bail;
 		}
 
-	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct ipath_rwqe *wq, *p;
-		u32 sz, size, n;
-
 		sz = sizeof(struct ipath_rwqe) +
-			attr->max_sge * sizeof(struct ipath_sge);
+			srq->rq.max_sge * sizeof(struct ib_sge);
 		size = attr->max_wr + 1;
-		wq = vmalloc(size * sz);
+		wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
 		if (!wq) {
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		if (srq->rq.head < srq->rq.tail)
-			n = srq->rq.size + srq->rq.head - srq->rq.tail;
+		/*
+		 * Return the address of the RWQ as the offset to mmap.
+		 * See ipath_mmap() for details.
+		 */
+		if (udata && udata->inlen >= sizeof(__u64)) {
+			__u64 offset_addr;
+			__u64 offset = (__u64) wq;
+
+			ret = ib_copy_from_udata(&offset_addr, udata,
+						 sizeof(offset_addr));
+			if (ret) {
+				vfree(wq);
+				goto bail;
+			}
+			udata->outbuf = (void __user *) offset_addr;
+			ret = ib_copy_to_udata(udata, &offset,
+					       sizeof(offset));
+			if (ret) {
+				vfree(wq);
+				goto bail;
+			}
+		}
+
+		spin_lock_irq(&srq->rq.lock);
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		owq = srq->rq.wq;
+		head = owq->head;
+		if (head >= srq->rq.size)
+			head = 0;
+		tail = owq->tail;
+		if (tail >= srq->rq.size)
+			tail = 0;
+		n = head;
+		if (n < tail)
+			n += srq->rq.size - tail;
 		else
-			n = srq->rq.head - srq->rq.tail;
-		if (size <= n || size <= srq->limit) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
+			n -= tail;
+		if (size <= n) {
+			spin_unlock_irq(&srq->rq.lock);
 			vfree(wq);
 			ret = -EINVAL;
 			goto bail;
 		}
 		n = 0;
-		p = wq;
-		while (srq->rq.tail != srq->rq.head) {
+		p = wq->wq;
+		while (tail != head) {
 			struct ipath_rwqe *wqe;
 			int i;
 
-			wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail);
+			wqe = get_rwqe_ptr(&srq->rq, tail);
 			p->wr_id = wqe->wr_id;
-			p->length = wqe->length;
 			p->num_sge = wqe->num_sge;
 			for (i = 0; i < wqe->num_sge; i++)
 				p->sg_list[i] = wqe->sg_list[i];
 			n++;
 			p = (struct ipath_rwqe *)((char *) p + sz);
-			if (++srq->rq.tail >= srq->rq.size)
-				srq->rq.tail = 0;
+			if (++tail >= srq->rq.size)
+				tail = 0;
 		}
-		vfree(srq->rq.wq);
 		srq->rq.wq = wq;
 		srq->rq.size = size;
-		srq->rq.head = n;
-		srq->rq.tail = 0;
-		srq->rq.max_sge = attr->max_sge;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
-
-	if (attr_mask & IB_SRQ_LIMIT) {
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		srq->limit = attr->srq_limit;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
+		wq->head = n;
+		wq->tail = 0;
+		if (attr_mask & IB_SRQ_LIMIT)
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);
+
+		vfree(owq);
+
+		if (srq->ip) {
+			struct ipath_mmap_info *ip = srq->ip;
+			struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
+
+			ip->obj = wq;
+			ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+					      size * sz);
+			spin_lock_irq(&dev->pending_lock);
+			ip->next = dev->pending_mmaps;
+			dev->pending_mmaps = ip;
+			spin_unlock_irq(&dev->pending_lock);
+		}
+	} else if (attr_mask & IB_SRQ_LIMIT) {
+		spin_lock_irq(&srq->rq.lock);
+		if (attr->srq_limit >= srq->rq.size)
+			ret = -EINVAL;
+		else
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);
 	}
-	ret = 0;
 
 bail:
 	return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 3466129af8043..82439fcfc2f85 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -34,7 +34,54 @@
 #include <rdma/ib_smi.h>
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
+
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+		    u32 *lengthp, struct ipath_sge_state *ss)
+{
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	int user = to_ipd(qp->ibqp.pd)->user;
+	int i, j, ret;
+	struct ib_wc wc;
+
+	*lengthp = 0;
+	for (i = j = 0; i < wqe->num_sge; i++) {
+		if (wqe->sg_list[i].length == 0)
+			continue;
+		/* Check LKEY */
+		if ((user && wqe->sg_list[i].lkey == 0) ||
+		    !ipath_lkey_ok(&dev->lk_table,
+				   j ? &ss->sg_list[j - 1] : &ss->sge,
+				   &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+			goto bad_lkey;
+		*lengthp += wqe->sg_list[i].length;
+		j++;
+	}
+	ss->num_sge = j;
+	ret = 1;
+	goto bail;
+
+bad_lkey:
+	wc.wr_id = wqe->wr_id;
+	wc.status = IB_WC_LOC_PROT_ERR;
+	wc.opcode = IB_WC_RECV;
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp_num = qp->ibqp.qp_num;
+	wc.src_qp = 0;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = 0;
+	wc.sl = 0;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	/* Signal solicited completion event. */
+	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	ret = 0;
+bail:
+	return ret;
+}
 
 /**
  * ipath_ud_loopback - handle send on loopback QPs
@@ -46,6 +93,8 @@
  *
  * This is called from ipath_post_ud_send() to forward a WQE addressed
  * to the same HCA.
+ * Note that the receive interrupt handler may be calling ipath_ud_rcv()
+ * while this is being called.
  */
 static void ipath_ud_loopback(struct ipath_qp *sqp,
 			      struct ipath_sge_state *ss,
@@ -60,7 +109,11 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 	struct ipath_srq *srq;
 	struct ipath_sge_state rsge;
 	struct ipath_sge *sge;
+	struct ipath_rwq *wq;
 	struct ipath_rwqe *wqe;
+	void (*handler)(struct ib_event *, void *);
+	u32 tail;
+	u32 rlen;
 
 	qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
 	if (!qp)
@@ -94,6 +147,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 		wc->imm_data = 0;
 	}
 
+	if (wr->num_sge > 1) {
+		rsge.sg_list = kmalloc((wr->num_sge - 1) *
+					sizeof(struct ipath_sge),
+				       GFP_ATOMIC);
+	} else
+		rsge.sg_list = NULL;
+
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 * Note that it is safe to drop the lock after changing rq->tail
@@ -101,37 +161,52 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 	 */
 	if (qp->ibqp.srq) {
 		srq = to_isrq(qp->ibqp.srq);
+		handler = srq->ibsrq.event_handler;
 		rq = &srq->rq;
 	} else {
 		srq = NULL;
+		handler = NULL;
 		rq = &qp->r_rq;
 	}
+
 	spin_lock_irqsave(&rq->lock, flags);
-	if (rq->tail == rq->head) {
-		spin_unlock_irqrestore(&rq->lock, flags);
-		dev->n_pkt_drops++;
-		goto done;
+	wq = rq->wq;
+	tail = wq->tail;
+	while (1) {
+		if (unlikely(tail == wq->head)) {
+			spin_unlock_irqrestore(&rq->lock, flags);
+			dev->n_pkt_drops++;
+			goto bail_sge;
+		}
+		wqe = get_rwqe_ptr(rq, tail);
+		if (++tail >= rq->size)
+			tail = 0;
+		if (init_sge(qp, wqe, &rlen, &rsge))
+			break;
+		wq->tail = tail;
 	}
 	/* Silently drop packets which are too big. */
-	wqe = get_rwqe_ptr(rq, rq->tail);
-	if (wc->byte_len > wqe->length) {
+	if (wc->byte_len > rlen) {
 		spin_unlock_irqrestore(&rq->lock, flags);
 		dev->n_pkt_drops++;
-		goto done;
+		goto bail_sge;
 	}
+	wq->tail = tail;
 	wc->wr_id = wqe->wr_id;
-	rsge.sge = wqe->sg_list[0];
-	rsge.sg_list = wqe->sg_list + 1;
-	rsge.num_sge = wqe->num_sge;
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq && srq->ibsrq.event_handler) {
+	if (handler) {
 		u32 n;
 
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		n = wq->head;
+		if (n >= rq->size)
+			n = 0;
+		if (n < tail)
+			n += rq->size - tail;
 		else
-			n = rq->head - rq->tail;
+			n -= tail;
 		if (n < srq->limit) {
 			struct ib_event ev;
 
@@ -140,12 +215,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 			ev.device = qp->ibqp.device;
 			ev.element.srq = qp->ibqp.srq;
 			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
+			handler(&ev, srq->ibsrq.srq_context);
 		} else
 			spin_unlock_irqrestore(&rq->lock, flags);
 	} else
 		spin_unlock_irqrestore(&rq->lock, flags);
+
 	ah_attr = &to_iah(wr->wr.ud.ah)->attr;
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
@@ -186,7 +261,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 	wc->src_qp = sqp->ibqp.qp_num;
 	/* XXX do we know which pkey matched? Only needed for GSI. */
 	wc->pkey_index = 0;
-	wc->slid = ipath_layer_get_lid(dev->dd) |
+	wc->slid = dev->dd->ipath_lid |
 		(ah_attr->src_path_bits &
 		 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
 	wc->sl = ah_attr->sl;
@@ -196,6 +271,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
 		       wr->send_flags & IB_SEND_SOLICITED);
 
+bail_sge:
+	kfree(rsge.sg_list);
 done:
 	if (atomic_dec_and_test(&qp->refcount))
 		wake_up(&qp->wait);
@@ -433,13 +510,9 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 	int opcode;
 	u32 hdrsize;
 	u32 pad;
-	unsigned long flags;
 	struct ib_wc wc;
 	u32 qkey;
 	u32 src_qp;
-	struct ipath_rq *rq;
-	struct ipath_srq *srq;
-	struct ipath_rwqe *wqe;
 	u16 dlid;
 	int header_in_data;
 
@@ -547,19 +620,10 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 
 	/*
 	 * Get the next work request entry to find where to put the data.
-	 * Note that it is safe to drop the lock after changing rq->tail
-	 * since ipath_post_receive() won't fill the empty slot.
 	 */
-	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
-		rq = &srq->rq;
-	} else {
-		srq = NULL;
-		rq = &qp->r_rq;
-	}
-	spin_lock_irqsave(&rq->lock, flags);
-	if (rq->tail == rq->head) {
-		spin_unlock_irqrestore(&rq->lock, flags);
+	if (qp->r_reuse_sge)
+		qp->r_reuse_sge = 0;
+	else if (!ipath_get_rwqe(qp, 0)) {
 		/*
 		 * Count VL15 packets dropped due to no receive buffer.
 		 * Otherwise, count them as buffer overruns since usually,
@@ -573,39 +637,11 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		goto bail;
 	}
 	/* Silently drop packets which are too big. */
-	wqe = get_rwqe_ptr(rq, rq->tail);
-	if (wc.byte_len > wqe->length) {
-		spin_unlock_irqrestore(&rq->lock, flags);
+	if (wc.byte_len > qp->r_len) {
+		qp->r_reuse_sge = 1;
 		dev->n_pkt_drops++;
 		goto bail;
 	}
-	wc.wr_id = wqe->wr_id;
-	qp->r_sge.sge = wqe->sg_list[0];
-	qp->r_sge.sg_list = wqe->sg_list + 1;
-	qp->r_sge.num_sge = wqe->num_sge;
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq && srq->ibsrq.event_handler) {
-		u32 n;
-
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
-		else
-			n = rq->head - rq->tail;
-		if (n < srq->limit) {
-			struct ib_event ev;
-
-			srq->limit = 0;
-			spin_unlock_irqrestore(&rq->lock, flags);
-			ev.device = qp->ibqp.device;
-			ev.element.srq = qp->ibqp.srq;
-			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
-		} else
-			spin_unlock_irqrestore(&rq->lock, flags);
-	} else
-		spin_unlock_irqrestore(&rq->lock, flags);
 	if (has_grh) {
 		ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
 			       sizeof(struct ib_grh));
@@ -614,6 +650,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
 	ipath_copy_sge(&qp->r_sge, data,
 		       wc.byte_len - sizeof(struct ib_grh));
+	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
 	wc.opcode = IB_WC_RECV;
 	wc.vendor_err = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index d70a9b6b52397..a2b4c70192d88 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -277,11 +277,12 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 			      struct ib_recv_wr **bad_wr)
 {
 	struct ipath_qp *qp = to_iqp(ibqp);
+	struct ipath_rwq *wq = qp->r_rq.wq;
 	unsigned long flags;
 	int ret;
 
 	/* Check that state is OK to post receive. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) {
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
 		*bad_wr = wr;
 		ret = -EINVAL;
 		goto bail;
@@ -290,59 +291,31 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	for (; wr; wr = wr->next) {
 		struct ipath_rwqe *wqe;
 		u32 next;
-		int i, j;
+		int i;
 
-		if (wr->num_sge > qp->r_rq.max_sge) {
+		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
 		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = qp->r_rq.head + 1;
+		next = wq->head + 1;
 		if (next >= qp->r_rq.size)
 			next = 0;
-		if (next == qp->r_rq.tail) {
+		if (next == wq->tail) {
 			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head);
+		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
 		wqe->wr_id = wr->wr_id;
-		wqe->sg_list[0].mr = NULL;
-		wqe->sg_list[0].vaddr = NULL;
-		wqe->sg_list[0].length = 0;
-		wqe->sg_list[0].sge_length = 0;
-		wqe->length = 0;
-		for (i = 0, j = 0; i < wr->num_sge; i++) {
-			/* Check LKEY */
-			if (to_ipd(qp->ibqp.pd)->user &&
-			    wr->sg_list[i].lkey == 0) {
-				spin_unlock_irqrestore(&qp->r_rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			if (wr->sg_list[i].length == 0)
-				continue;
-			if (!ipath_lkey_ok(
-				    &to_idev(qp->ibqp.device)->lk_table,
-				    &wqe->sg_list[j], &wr->sg_list[i],
-				    IB_ACCESS_LOCAL_WRITE)) {
-				spin_unlock_irqrestore(&qp->r_rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			wqe->length += wr->sg_list[i].length;
-			j++;
-		}
-		wqe->num_sge = j;
-		qp->r_rq.head = next;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		wq->head = next;
 		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 	}
 	ret = 0;
@@ -1137,6 +1110,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 	dev->attach_mcast = ipath_multicast_attach;
 	dev->detach_mcast = ipath_multicast_detach;
 	dev->process_mad = ipath_process_mad;
+	dev->mmap = ipath_mmap;
 
 	snprintf(dev->node_desc, sizeof(dev->node_desc),
 		 IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 698396778f001..7d2ba72609f7a 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -38,6 +38,7 @@
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kref.h>
 #include <rdma/ib_pack.h>
 
 #include "ipath_layer.h"
@@ -50,7 +51,7 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define IPATH_UVERBS_ABI_VERSION       1
+#define IPATH_UVERBS_ABI_VERSION       2
 
 /*
  * Define an ib_cq_notify value that is not valid so we know when CQ
@@ -178,58 +179,41 @@ struct ipath_ah {
 };
 
 /*
- * Quick description of our CQ/QP locking scheme:
- *
- * We have one global lock that protects dev->cq/qp_table.  Each
- * struct ipath_cq/qp also has its own lock.  An individual qp lock
- * may be taken inside of an individual cq lock.  Both cqs attached to
- * a qp may be locked, with the send cq locked first.  No other
- * nesting should be done.
- *
- * Each struct ipath_cq/qp also has an atomic_t ref count.  The
- * pointer from the cq/qp_table to the struct counts as one reference.
- * This reference also is good for access through the consumer API, so
- * modifying the CQ/QP etc doesn't need to take another reference.
- * Access because of a completion being polled does need a reference.
- *
- * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the
- * destroy function to sleep on.
- *
- * This means that access from the consumer API requires nothing but
- * taking the struct's lock.
- *
- * Access because of a completion event should go as follows:
- * - lock cq/qp_table and look up struct
- * - increment ref count in struct
- * - drop cq/qp_table lock
- * - lock struct, do your thing, and unlock struct
- * - decrement ref count; if zero, wake up waiters
- *
- * To destroy a CQ/QP, we can do the following:
- * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
- * - decrement ref count
- * - wait_event until ref count is zero
- *
- * It is the consumer's responsibilty to make sure that no QP
- * operations (WQE posting or state modification) are pending when the
- * QP is destroyed.  Also, the consumer must make sure that calls to
- * qp_modify are serialized.
- *
- * Possible optimizations (wait for profile data to see if/where we
- * have locks bouncing between CPUs):
- * - split cq/qp table lock into n separate (cache-aligned) locks,
- *   indexed (say) by the page in the table
+ * This structure is used by ipath_mmap() to validate an offset
+ * when an mmap() request is made.  The vm_area_struct then uses
+ * this as its vm_private_data.
  */
+struct ipath_mmap_info {
+	struct ipath_mmap_info *next;
+	struct ib_ucontext *context;
+	void *obj;
+	struct kref ref;
+	unsigned size;
+	unsigned mmap_cnt;
+};
 
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct ipath_cq_wc {
+	u32 head;		/* index of next entry to fill */
+	u32 tail;		/* index of next ib_poll_cq() entry */
+	struct ib_wc queue[1];	/* this is actually size ibcq.cqe + 1 */
+};
+
+/*
+ * The completion queue structure.
+ */
 struct ipath_cq {
 	struct ib_cq ibcq;
 	struct tasklet_struct comptask;
 	spinlock_t lock;
 	u8 notify;
 	u8 triggered;
-	u32 head;		/* new records added to the head */
-	u32 tail;		/* poll_cq() reads from here. */
-	struct ib_wc *queue;	/* this is actually ibcq.cqe + 1 */
+	struct ipath_cq_wc *queue;
+	struct ipath_mmap_info *ip;
 };
 
 /*
@@ -248,28 +232,40 @@ struct ipath_swqe {
 
 /*
  * Receive work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->r_max_sge.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
  */
 struct ipath_rwqe {
 	u64 wr_id;
-	u32 length;		/* total length of data in sg_list */
 	u8 num_sge;
-	struct ipath_sge sg_list[0];
+	struct ib_sge sg_list[0];
 };
 
-struct ipath_rq {
-	spinlock_t lock;
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct ipath_rwq {
 	u32 head;		/* new work requests posted to the head */
 	u32 tail;		/* receives pull requests from here. */
+	struct ipath_rwqe wq[0];
+};
+
+struct ipath_rq {
+	struct ipath_rwq *wq;
+	spinlock_t lock;
 	u32 size;		/* size of RWQE array */
 	u8 max_sge;
-	struct ipath_rwqe *wq;	/* RWQE array */
 };
 
 struct ipath_srq {
 	struct ib_srq ibsrq;
 	struct ipath_rq rq;
+	struct ipath_mmap_info *ip;
 	/* send signal when number of RWQEs < limit */
 	u32 limit;
 };
@@ -293,6 +289,7 @@ struct ipath_qp {
 	atomic_t refcount;
 	wait_queue_head_t wait;
 	struct tasklet_struct s_task;
+	struct ipath_mmap_info *ip;
 	struct ipath_sge_state *s_cur_sge;
 	struct ipath_sge_state s_sge;	/* current send request data */
 	/* current RDMA read send data */
@@ -345,7 +342,8 @@ struct ipath_qp {
 	u32 s_ssn;		/* SSN of tail entry */
 	u32 s_lsn;		/* limit sequence number (credit) */
 	struct ipath_swqe *s_wq;	/* send work queue */
-	struct ipath_rq r_rq;	/* receive work queue */
+	struct ipath_rq r_rq;		/* receive work queue */
+	struct ipath_sge r_sg_list[0];	/* verified SGEs */
 };
 
 /*
@@ -369,15 +367,15 @@ static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
 
 /*
  * Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rq.wq.  This function does the array index computation.
+ * struct ipath_rwq.wq.  This function does the array index computation.
  */
 static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
 					      unsigned n)
 {
 	return (struct ipath_rwqe *)
-		((char *) rq->wq +
+		((char *) rq->wq->wq +
 		 (sizeof(struct ipath_rwqe) +
-		  rq->max_sge * sizeof(struct ipath_sge)) * n);
+		  rq->max_sge * sizeof(struct ib_sge)) * n);
 }
 
 /*
@@ -417,6 +415,7 @@ struct ipath_ibdev {
 	struct ib_device ibdev;
 	struct list_head dev_list;
 	struct ipath_devdata *dd;
+	struct ipath_mmap_info *pending_mmaps;
 	int ib_unit;		/* This is the device number */
 	u16 sm_lid;		/* in host order */
 	u8 sm_sl;
@@ -681,6 +680,10 @@ int ipath_unmap_fmr(struct list_head *fmr_list);
 
 int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
 
+void ipath_release_mmap_info(struct kref *ref);
+
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
 void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
 
 void ipath_insert_rnr_queue(struct ipath_qp *qp);
-- 
GitLab


From eb9dc6f48dc7537ce53163109625bd992150e0cf Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:26 -0700
Subject: [PATCH 0741/1063] IB/ipath: More changes to support InfiniPath on
 PowerPC 970 systems

Ordering of writethrough store buffers needs to be forced, and we need
to use ifdef to get writethrough behavior to InfiniPath buffers, because
there is no generic way to specify that at this time (similar to code
in char/drm/drm_vm.c and block/z2ram.c).

Signed-off-by: John Gregor <john.gregor@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/Makefile         |  1 +
 drivers/infiniband/hw/ipath/ipath_driver.c   |  6 +++
 drivers/infiniband/hw/ipath/ipath_file_ops.c |  7 +++
 drivers/infiniband/hw/ipath/ipath_wc_ppc64.c | 52 ++++++++++++++++++++
 4 files changed, 66 insertions(+)
 create mode 100644 drivers/infiniband/hw/ipath/ipath_wc_ppc64.c

diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index 6bb43474d1047..075e313941fe2 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -20,6 +20,7 @@ ipath_core-y := \
 	ipath_user_pages.o
 
 ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o
+ipath_core-$(CONFIG_PPC64) += ipath_wc_ppc64.o
 
 ib_ipath-y := \
 	ipath_cq.o \
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index f98518d912b54..6ded914f9eb93 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -440,7 +440,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	}
 	dd->ipath_pcirev = rev;
 
+#if defined(__powerpc__)
+	/* There isn't a generic way to specify writethrough mappings */
+	dd->ipath_kregbase = __ioremap(addr, len,
+		(_PAGE_NO_CACHE|_PAGE_WRITETHRU));
+#else
 	dd->ipath_kregbase = ioremap_nocache(addr, len);
+#endif
 
 	if (!dd->ipath_kregbase) {
 		ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index bbaa70e57db1e..0b6e7679eefd5 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -985,6 +985,13 @@ static int mmap_piobufs(struct vm_area_struct *vma,
 	 * write combining behavior we want on the PIO buffers!
 	 */
 
+#if defined(__powerpc__)
+	/* There isn't a generic way to specify writethrough mappings */
+	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+	pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
+	pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
+#endif
+
 	if (vma->vm_flags & VM_READ) {
 		dev_info(&dd->pcidev->dev,
 			 "Can't map piobufs as readable (flags=%lx)\n",
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
new file mode 100644
index 0000000000000..036fde662aa9b
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on PowerPC only.  Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include "ipath_kernel.h"
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is ordered
+ *
+ * PowerPC systems (at least those in the 970 processor family)
+ * write partially filled store buffers in address order, but will write
+ * completely filled store buffers in "random" order, and therefore must
+ * have serialization for correctness with current InfiniPath chips.
+ *
+ */
+int ipath_unordered_wc(void)
+{
+	return 1;
+}
-- 
GitLab


From c27fef26271d352b5546c33239edeb0dcb4fc0cc Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:27 -0700
Subject: [PATCH 0742/1063] IB/ipath: lock resource limit counters correctly

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_verbs.c     | 34 ++++++++++++++-----
 drivers/infiniband/hw/ipath/ipath_verbs.h     |  7 ++++
 .../infiniband/hw/ipath/ipath_verbs_mcast.c   |  5 +++
 3 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index a2b4c70192d88..5b8ee65c6cd34 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -776,18 +776,22 @@ static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
 	 * we allow allocations of more than we report for this value.
 	 */
 
-	if (dev->n_pds_allocated == ib_ipath_max_pds) {
+	pd = kmalloc(sizeof *pd, GFP_KERNEL);
+	if (!pd) {
 		ret = ERR_PTR(-ENOMEM);
 		goto bail;
 	}
 
-	pd = kmalloc(sizeof *pd, GFP_KERNEL);
-	if (!pd) {
+	spin_lock(&dev->n_pds_lock);
+	if (dev->n_pds_allocated == ib_ipath_max_pds) {
+		spin_unlock(&dev->n_pds_lock);
+		kfree(pd);
 		ret = ERR_PTR(-ENOMEM);
 		goto bail;
 	}
 
 	dev->n_pds_allocated++;
+	spin_unlock(&dev->n_pds_lock);
 
 	/* ib_alloc_pd() will initialize pd->ibpd. */
 	pd->user = udata != NULL;
@@ -803,7 +807,9 @@ static int ipath_dealloc_pd(struct ib_pd *ibpd)
 	struct ipath_pd *pd = to_ipd(ibpd);
 	struct ipath_ibdev *dev = to_idev(ibpd->device);
 
+	spin_lock(&dev->n_pds_lock);
 	dev->n_pds_allocated--;
+	spin_unlock(&dev->n_pds_lock);
 
 	kfree(pd);
 
@@ -824,11 +830,6 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
 	struct ib_ah *ret;
 	struct ipath_ibdev *dev = to_idev(pd->device);
 
-	if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
 	/* A multicast address requires a GRH (see ch. 8.4.1). */
 	if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
 	    ah_attr->dlid != IPATH_PERMISSIVE_LID &&
@@ -854,7 +855,16 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
 		goto bail;
 	}
 
+	spin_lock(&dev->n_ahs_lock);
+	if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
+		spin_unlock(&dev->n_ahs_lock);
+		kfree(ah);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
 	dev->n_ahs_allocated++;
+	spin_unlock(&dev->n_ahs_lock);
 
 	/* ib_create_ah() will initialize ah->ibah. */
 	ah->attr = *ah_attr;
@@ -876,7 +886,9 @@ static int ipath_destroy_ah(struct ib_ah *ibah)
 	struct ipath_ibdev *dev = to_idev(ibah->device);
 	struct ipath_ah *ah = to_iah(ibah);
 
+	spin_lock(&dev->n_ahs_lock);
 	dev->n_ahs_allocated--;
+	spin_unlock(&dev->n_ahs_lock);
 
 	kfree(ah);
 
@@ -963,6 +975,12 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 	dev = &idev->ibdev;
 
 	/* Only need to initialize non-zero fields. */
+	spin_lock_init(&idev->n_pds_lock);
+	spin_lock_init(&idev->n_ahs_lock);
+	spin_lock_init(&idev->n_cqs_lock);
+	spin_lock_init(&idev->n_srqs_lock);
+	spin_lock_init(&idev->n_mcast_grps_lock);
+
 	spin_lock_init(&idev->qp_table.lock);
 	spin_lock_init(&idev->lk_table.lock);
 	idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 7d2ba72609f7a..a9baa91014326 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -434,11 +434,18 @@ struct ipath_ibdev {
 	__be64 sys_image_guid;	/* in network order */
 	__be64 gid_prefix;	/* in network order */
 	__be64 mkey;
+
 	u32 n_pds_allocated;	/* number of PDs allocated for device */
+	spinlock_t n_pds_lock;
 	u32 n_ahs_allocated;	/* number of AHs allocated for device */
+	spinlock_t n_ahs_lock;
 	u32 n_cqs_allocated;	/* number of CQs allocated for device */
+	spinlock_t n_cqs_lock;
 	u32 n_srqs_allocated;	/* number of SRQs allocated for device */
+	spinlock_t n_srqs_lock;
 	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+	spinlock_t n_mcast_grps_lock;
+
 	u64 ipath_sword;	/* total dwords sent (sample result) */
 	u64 ipath_rword;	/* total dwords received (sample result) */
 	u64 ipath_spkts;	/* total packets sent (sample result) */
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index ee0e1d96d7234..cb35679e4a185 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -207,12 +207,15 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
 		goto bail;
 	}
 
+	spin_lock(&dev->n_mcast_grps_lock);
 	if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
+		spin_unlock(&dev->n_mcast_grps_lock);
 		ret = ENOMEM;
 		goto bail;
 	}
 
 	dev->n_mcast_grps_allocated++;
+	spin_unlock(&dev->n_mcast_grps_lock);
 
 	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
 
@@ -343,7 +346,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		atomic_dec(&mcast->refcount);
 		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
 		ipath_mcast_free(mcast);
+		spin_lock(&dev->n_mcast_grps_lock);
 		dev->n_mcast_grps_allocated--;
+		spin_unlock(&dev->n_mcast_grps_lock);
 	}
 
 	ret = 0;
-- 
GitLab


From 8e280d94e29af67035637fb957daba7ae0d23583 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:28 -0700
Subject: [PATCH 0743/1063] IB/ipath: fix for crash on module unload, if
 cfgports < portcnt

Allocate enough pointers for all possible ports, to avoid problems in
cleanup/unload.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_init_chip.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 414cdd1d80a6f..c63de8f0fa9ec 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -240,7 +240,11 @@ static int init_chip_first(struct ipath_devdata *dd,
 			  "only supports %u\n", ipath_cfgports,
 			  dd->ipath_portcnt);
 	}
-	dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports,
+	/*
+	 * Allocate full portcnt array, rather than just cfgports, because
+	 * cleanup iterates across all possible ports.
+	 */
+	dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt,
 			       GFP_KERNEL);
 
 	if (!dd->ipath_pd) {
-- 
GitLab


From ba11203a11835737df980ef3dd3bd8325b9cc94e Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:29 -0700
Subject: [PATCH 0744/1063] IB/ipath: fix handling of kpiobufs

Change comment: no longer imply that user can set ipath_kpiobufs to zero.
Actually set ipath_kpiobufs from parameter. Previously only altered
per-device ipath_lastport_piobuf, which was over-written in chip init.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_init_chip.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index c63de8f0fa9ec..75c3721367024 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -691,7 +691,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
 		/ (sizeof(u64) * BITS_PER_BYTE / 2);
 	if (ipath_kpiobufs == 0) {
-		/* not set by user, or set explictly to default  */
+		/* not set by user (this is default) */
 		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
 			kpiobufs = 32;
 		else
@@ -950,6 +950,7 @@ static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
 			dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
 	}
 
+	ipath_kpiobufs = val;
 	ret = 0;
 bail:
 	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-- 
GitLab


From 367fe711c5dc85dbc3265cf01e34d4d6fbd55f06 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:30 -0700
Subject: [PATCH 0745/1063] IB/ipath: drop requirement that PIO buffers be
 mmaped write-only

Some userlands try to mmap these pages read-write, so accommodate them.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_file_ops.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 0b6e7679eefd5..e999a46bef9be 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -992,15 +992,10 @@ static int mmap_piobufs(struct vm_area_struct *vma,
 	pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
 #endif
 
-	if (vma->vm_flags & VM_READ) {
-		dev_info(&dd->pcidev->dev,
-			 "Can't map piobufs as readable (flags=%lx)\n",
-			 vma->vm_flags);
-		ret = -EPERM;
-		goto bail;
-	}
-
-	/* don't allow them to later change to readable with mprotect */
+	/*
+	 * don't allow them to later change to readable with mprotect (for when
+	 * not initially mapped readable, as is normally the case)
+	 */
 	vma->vm_flags &= ~VM_MAYREAD;
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
 
-- 
GitLab


From b1c1b6a30eac88665a35a207cc5e6233090b9d65 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:31 -0700
Subject: [PATCH 0746/1063] IB/ipath: merge ipath_core and ib_ipath drivers

There is little point in keeping the two drivers separate, so we are
merging them.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/Makefile                |   2 +-
 drivers/infiniband/hw/ipath/Kconfig        |  21 +-
 drivers/infiniband/hw/ipath/Makefile       |  27 ++-
 drivers/infiniband/hw/ipath/ipath_driver.c |  13 +-
 drivers/infiniband/hw/ipath/ipath_intr.c   |   3 +-
 drivers/infiniband/hw/ipath/ipath_kernel.h |  11 +-
 drivers/infiniband/hw/ipath/ipath_layer.c  | 214 +--------------------
 drivers/infiniband/hw/ipath/ipath_layer.h  |   8 -
 drivers/infiniband/hw/ipath/ipath_verbs.c  |  45 ++---
 drivers/infiniband/hw/ipath/ipath_verbs.h  |  10 +
 10 files changed, 62 insertions(+), 292 deletions(-)

diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index 893bee0a50b57..08cff32d900ea 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,6 +1,6 @@
 obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND_MTHCA)		+= hw/mthca/
-obj-$(CONFIG_IPATH_CORE)		+= hw/ipath/
+obj-$(CONFIG_INFINIBAND_IPATH)		+= hw/ipath/
 obj-$(CONFIG_INFINIBAND_EHCA)		+= hw/ehca/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
index 1db9489f1e82b..574a678e7fdd0 100644
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -1,16 +1,9 @@
-config IPATH_CORE
-	tristate "QLogic InfiniPath Driver"
-	depends on 64BIT && PCI_MSI && NET
-	---help---
-	This is a low-level driver for QLogic InfiniPath host channel
-	adapters (HCAs) based on the HT-400 and PE-800 chips.
-
 config INFINIBAND_IPATH
-	tristate "QLogic InfiniPath Verbs Driver"
-	depends on IPATH_CORE && INFINIBAND
+	tristate "QLogic InfiniPath Driver"
+	depends on PCI_MSI && 64BIT && INFINIBAND
 	---help---
-	This is a driver that provides InfiniBand verbs support for
-	QLogic InfiniPath host channel adapters (HCAs).  This
-	allows these devices to be used with both kernel upper level
-	protocols such as IP-over-InfiniBand as well as with userspace
-	applications (in conjunction with InfiniBand userspace access).
+	This is a driver for QLogic InfiniPath host channel adapters,
+	including InfiniBand verbs support.  This driver allows these
+	devices to be used with both kernel upper level protocols such
+	as IP-over-InfiniBand as well as with userspace applications
+	(in conjunction with InfiniBand userspace access).
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index 075e313941fe2..690dc713e63ef 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -1,10 +1,10 @@
 EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \
 	-DIPATH_KERN_TYPE=0
 
-obj-$(CONFIG_IPATH_CORE) += ipath_core.o
 obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
 
-ipath_core-y := \
+ib_ipath-y := \
+	ipath_cq.o \
 	ipath_diag.o \
 	ipath_driver.o \
 	ipath_eeprom.o \
@@ -13,26 +13,23 @@ ipath_core-y := \
 	ipath_ht400.o \
 	ipath_init_chip.o \
 	ipath_intr.o \
-	ipath_layer.o \
-	ipath_pe800.o \
-	ipath_stats.o \
-	ipath_sysfs.o \
-	ipath_user_pages.o
-
-ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-ipath_core-$(CONFIG_PPC64) += ipath_wc_ppc64.o
-
-ib_ipath-y := \
-	ipath_cq.o \
 	ipath_keys.o \
+	ipath_layer.o \
 	ipath_mad.o \
 	ipath_mmap.o \
 	ipath_mr.o \
+	ipath_pe800.o \
 	ipath_qp.o \
 	ipath_rc.o \
 	ipath_ruc.o \
 	ipath_srq.o \
+	ipath_stats.o \
+	ipath_sysfs.o \
 	ipath_uc.o \
 	ipath_ud.o \
-	ipath_verbs.o \
-	ipath_verbs_mcast.o
+	ipath_user_pages.o \
+	ipath_verbs_mcast.o \
+	ipath_verbs.o
+
+ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
+ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 6ded914f9eb93..9af7406d6a623 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -40,6 +40,7 @@
 
 #include "ipath_kernel.h"
 #include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 static void ipath_update_pio_bufs(struct ipath_devdata *);
@@ -51,8 +52,6 @@ const char *ipath_get_unit_name(int unit)
 	return iname;
 }
 
-EXPORT_SYMBOL_GPL(ipath_get_unit_name);
-
 #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
 #define PFX IPATH_DRV_NAME ": "
 
@@ -510,6 +509,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	ipath_user_add(dd);
 	ipath_diag_add(dd);
 	ipath_layer_add(dd);
+	ipath_register_ib_device(dd);
 
 	goto bail;
 
@@ -538,6 +538,7 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
 		return;
 
 	dd = pci_get_drvdata(pdev);
+	ipath_unregister_ib_device(dd->verbs_dev);
 	ipath_layer_remove(dd);
 	ipath_diag_remove(dd);
 	ipath_user_remove(dd);
@@ -978,12 +979,8 @@ void ipath_kreceive(struct ipath_devdata *dd)
 		if (unlikely(eflags))
 			ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
 		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-				int ret = __ipath_verbs_rcv(dd, rc + 1,
-							    ebuf, tlen);
-				if (ret == -ENODEV)
-					ipath_cdbg(VERBOSE,
-						   "received IB packet, "
-						   "not SMA (QP=%x)\n", qp);
+				ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf,
+					     tlen);
 				if (dd->ipath_lli_counter)
 					dd->ipath_lli_counter--;
 
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 280e732660a19..ed54f8f2945eb 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -35,6 +35,7 @@
 
 #include "ipath_kernel.h"
 #include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 /* These are all rcv-related errors which we want to count for stats */
@@ -712,7 +713,7 @@ static void handle_layer_pioavail(struct ipath_devdata *dd)
 	if (ret > 0)
 		goto set;
 
-	ret = __ipath_verbs_piobufavail(dd);
+	ret = ipath_ib_piobufavail(dd->verbs_dev);
 	if (ret > 0)
 		goto set;
 
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index e9f374fb641ef..f1931105adb37 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -132,12 +132,6 @@ struct _ipath_layer {
 	void *l_arg;
 };
 
-/* Verbs layer interface */
-struct _verbs_layer {
-	void *l_arg;
-	struct timer_list l_timer;
-};
-
 struct ipath_devdata {
 	struct list_head ipath_list;
 
@@ -198,7 +192,8 @@ struct ipath_devdata {
 	void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
 	/* fill out chip-specific fields */
 	int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
-	struct _verbs_layer verbs_layer;
+	struct ipath_ibdev *verbs_dev;
+	struct timer_list verbs_timer;
 	/* total dwords sent (summed from counter) */
 	u64 ipath_sword;
 	/* total dwords rcvd (summed from counter) */
@@ -529,8 +524,6 @@ extern int ipath_layer_intr(struct ipath_devdata *, u32);
 extern int __ipath_layer_rcv(struct ipath_devdata *, void *,
 			     struct sk_buff *);
 extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *);
-extern int __ipath_verbs_piobufavail(struct ipath_devdata *);
-extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32);
 
 void ipath_layer_add(struct ipath_devdata *);
 void ipath_layer_remove(struct ipath_devdata *);
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
index b28c6f81c7312..acc32200cc0e8 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -42,26 +42,20 @@
 
 #include "ipath_kernel.h"
 #include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 /* Acquire before ipath_devs_lock. */
 static DEFINE_MUTEX(ipath_layer_mutex);
 
-static int ipath_verbs_registered;
-
 u16 ipath_layer_rcv_opcode;
 
 static int (*layer_intr)(void *, u32);
 static int (*layer_rcv)(void *, void *, struct sk_buff *);
 static int (*layer_rcv_lid)(void *, void *);
-static int (*verbs_piobufavail)(void *);
-static void (*verbs_rcv)(void *, void *, void *, u32);
 
 static void *(*layer_add_one)(int, struct ipath_devdata *);
 static void (*layer_remove_one)(void *);
-static void *(*verbs_add_one)(int, struct ipath_devdata *);
-static void (*verbs_remove_one)(void *);
-static void (*verbs_timer_cb)(void *);
 
 int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
 {
@@ -107,29 +101,6 @@ int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr)
 	return ret;
 }
 
-int __ipath_verbs_piobufavail(struct ipath_devdata *dd)
-{
-	int ret = -ENODEV;
-
-	if (dd->verbs_layer.l_arg && verbs_piobufavail)
-		ret = verbs_piobufavail(dd->verbs_layer.l_arg);
-
-	return ret;
-}
-
-int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf,
-		      u32 tlen)
-{
-	int ret = -ENODEV;
-
-	if (dd->verbs_layer.l_arg && verbs_rcv) {
-		verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen);
-		ret = 0;
-	}
-
-	return ret;
-}
-
 int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate)
 {
 	u32 lstate;
@@ -212,8 +183,6 @@ int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate)
 	return ret;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate);
-
 /**
  * ipath_layer_set_mtu - set the MTU
  * @dd: the infinipath device
@@ -298,8 +267,6 @@ int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg)
 	return ret;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_set_mtu);
-
 int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
 {
 	dd->ipath_lid = arg;
@@ -315,8 +282,6 @@ int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_set_lid);
-
 int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid)
 {
 	/* XXX - need to inform anyone who cares this just happened. */
@@ -324,85 +289,56 @@ int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_set_guid);
-
 __be64 ipath_layer_get_guid(struct ipath_devdata *dd)
 {
 	return dd->ipath_guid;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_guid);
-
-u32 ipath_layer_get_nguid(struct ipath_devdata *dd)
-{
-	return dd->ipath_nguid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_nguid);
-
 u32 ipath_layer_get_majrev(struct ipath_devdata *dd)
 {
 	return dd->ipath_majrev;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_majrev);
-
 u32 ipath_layer_get_minrev(struct ipath_devdata *dd)
 {
 	return dd->ipath_minrev;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_minrev);
-
 u32 ipath_layer_get_pcirev(struct ipath_devdata *dd)
 {
 	return dd->ipath_pcirev;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_pcirev);
-
 u32 ipath_layer_get_flags(struct ipath_devdata *dd)
 {
 	return dd->ipath_flags;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_flags);
-
 struct device *ipath_layer_get_device(struct ipath_devdata *dd)
 {
 	return &dd->pcidev->dev;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_device);
-
 u16 ipath_layer_get_deviceid(struct ipath_devdata *dd)
 {
 	return dd->ipath_deviceid;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid);
-
 u32 ipath_layer_get_vendorid(struct ipath_devdata *dd)
 {
 	return dd->ipath_vendorid;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_vendorid);
-
 u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd)
 {
 	return dd->ipath_lastibcstat;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat);
-
 u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd)
 {
 	return dd->ipath_ibmtu;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu);
-
 void ipath_layer_add(struct ipath_devdata *dd)
 {
 	mutex_lock(&ipath_layer_mutex);
@@ -411,10 +347,6 @@ void ipath_layer_add(struct ipath_devdata *dd)
 		dd->ipath_layer.l_arg =
 			layer_add_one(dd->ipath_unit, dd);
 
-	if (verbs_add_one)
-		dd->verbs_layer.l_arg =
-			verbs_add_one(dd->ipath_unit, dd);
-
 	mutex_unlock(&ipath_layer_mutex);
 }
 
@@ -427,11 +359,6 @@ void ipath_layer_remove(struct ipath_devdata *dd)
 		dd->ipath_layer.l_arg = NULL;
 	}
 
-	if (dd->verbs_layer.l_arg && verbs_remove_one) {
-		verbs_remove_one(dd->verbs_layer.l_arg);
-		dd->verbs_layer.l_arg = NULL;
-	}
-
 	mutex_unlock(&ipath_layer_mutex);
 }
 
@@ -521,95 +448,10 @@ static void __ipath_verbs_timer(unsigned long arg)
 		ipath_kreceive(dd);
 
 	/* Handle verbs layer timeouts. */
-	if (dd->verbs_layer.l_arg && verbs_timer_cb)
-		verbs_timer_cb(dd->verbs_layer.l_arg);
-
-	mod_timer(&dd->verbs_layer.l_timer, jiffies + 1);
-}
-
-/**
- * ipath_verbs_register - verbs layer registration
- * @l_piobufavail: callback for when PIO buffers become available
- * @l_rcv: callback for receiving a packet
- * @l_timer_cb: timer callback
- * @ipath_devdata: device data structure is put here
- */
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
-			 void (*l_remove)(void *arg),
-			 int (*l_piobufavail) (void *arg),
-			 void (*l_rcv) (void *arg, void *rhdr,
-					void *data, u32 tlen),
-			 void (*l_timer_cb) (void *arg))
-{
-	struct ipath_devdata *dd, *tmp;
-	unsigned long flags;
-
-	mutex_lock(&ipath_layer_mutex);
-
-	verbs_add_one = l_add;
-	verbs_remove_one = l_remove;
-	verbs_piobufavail = l_piobufavail;
-	verbs_rcv = l_rcv;
-	verbs_timer_cb = l_timer_cb;
-
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-		if (!(dd->ipath_flags & IPATH_INITTED))
-			continue;
-
-		if (dd->verbs_layer.l_arg)
-			continue;
-
-		spin_unlock_irqrestore(&ipath_devs_lock, flags);
-		dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd);
-		spin_lock_irqsave(&ipath_devs_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-	mutex_unlock(&ipath_layer_mutex);
-
-	ipath_verbs_registered = 1;
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_register);
-
-void ipath_verbs_unregister(void)
-{
-	struct ipath_devdata *dd, *tmp;
-	unsigned long flags;
-
-	mutex_lock(&ipath_layer_mutex);
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-		*dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
-
-		if (dd->verbs_layer.l_arg && verbs_remove_one) {
-			spin_unlock_irqrestore(&ipath_devs_lock, flags);
-			verbs_remove_one(dd->verbs_layer.l_arg);
-			spin_lock_irqsave(&ipath_devs_lock, flags);
-			dd->verbs_layer.l_arg = NULL;
-		}
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-	verbs_add_one = NULL;
-	verbs_remove_one = NULL;
-	verbs_piobufavail = NULL;
-	verbs_rcv = NULL;
-	verbs_timer_cb = NULL;
-
-	ipath_verbs_registered = 0;
-
-	mutex_unlock(&ipath_layer_mutex);
+	ipath_ib_timer(dd->verbs_dev);
+	mod_timer(&dd->verbs_timer, jiffies + 1);
 }
 
-EXPORT_SYMBOL_GPL(ipath_verbs_unregister);
-
 int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
 {
 	int ret;
@@ -703,8 +545,6 @@ u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd)
 	return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey);
-
 static void update_sge(struct ipath_sge_state *ss, u32 length)
 {
 	struct ipath_sge *sge = &ss->sge;
@@ -981,8 +821,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
 	return ret;
 }
 
-EXPORT_SYMBOL_GPL(ipath_verbs_send);
-
 int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
 				  u64 *rwords, u64 *spkts, u64 *rpkts,
 				  u64 *xmit_wait)
@@ -1007,8 +845,6 @@ int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
 	return ret;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters);
-
 /**
  * ipath_layer_get_counters - get various chip counters
  * @dd: the infinipath device
@@ -1069,8 +905,6 @@ int ipath_layer_get_counters(struct ipath_devdata *dd,
 	return ret;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_counters);
-
 int ipath_layer_want_buffer(struct ipath_devdata *dd)
 {
 	set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
@@ -1080,8 +914,6 @@ int ipath_layer_want_buffer(struct ipath_devdata *dd)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_want_buffer);
-
 int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
 {
 	int ret = 0;
@@ -1174,30 +1006,26 @@ int ipath_layer_enable_timer(struct ipath_devdata *dd)
 				 (u64) (1 << 2));
 	}
 
-	init_timer(&dd->verbs_layer.l_timer);
-	dd->verbs_layer.l_timer.function = __ipath_verbs_timer;
-	dd->verbs_layer.l_timer.data = (unsigned long)dd;
-	dd->verbs_layer.l_timer.expires = jiffies + 1;
-	add_timer(&dd->verbs_layer.l_timer);
+	init_timer(&dd->verbs_timer);
+	dd->verbs_timer.function = __ipath_verbs_timer;
+	dd->verbs_timer.data = (unsigned long)dd;
+	dd->verbs_timer.expires = jiffies + 1;
+	add_timer(&dd->verbs_timer);
 
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_enable_timer);
-
 int ipath_layer_disable_timer(struct ipath_devdata *dd)
 {
 	/* Disable GPIO bit 2 interrupt */
 	if (dd->ipath_flags & IPATH_GPIO_INTR)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
 
-	del_timer_sync(&dd->verbs_layer.l_timer);
+	del_timer_sync(&dd->verbs_timer);
 
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_disable_timer);
-
 /**
  * ipath_layer_set_verbs_flags - set the verbs layer flags
  * @dd: the infinipath device
@@ -1225,8 +1053,6 @@ int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags);
-
 /**
  * ipath_layer_get_npkeys - return the size of the PKEY table for port 0
  * @dd: the infinipath device
@@ -1236,8 +1062,6 @@ unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd)
 	return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys);
-
 /**
  * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table
  * @dd: the infinipath device
@@ -1255,8 +1079,6 @@ unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index)
 	return ret;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkey);
-
 /**
  * ipath_layer_get_pkeys - return the PKEY table for port 0
  * @dd: the infinipath device
@@ -1271,8 +1093,6 @@ int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys);
-
 /**
  * rm_pkey - decrecment the reference count for the given PKEY
  * @dd: the infinipath device
@@ -1419,8 +1239,6 @@ int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys);
-
 /**
  * ipath_layer_get_linkdowndefaultstate - get the default linkdown state
  * @dd: the infinipath device
@@ -1432,8 +1250,6 @@ int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd)
 	return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate);
-
 /**
  * ipath_layer_set_linkdowndefaultstate - set the default linkdown state
  * @dd: the infinipath device
@@ -1453,8 +1269,6 @@ int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate);
-
 int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd)
 {
 	return (dd->ipath_ibcctrl >>
@@ -1462,8 +1276,6 @@ int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd)
 		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold);
-
 /**
  * ipath_layer_set_phyerrthreshold - set the physical error threshold
  * @dd: the infinipath device
@@ -1489,8 +1301,6 @@ int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold);
-
 int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd)
 {
 	return (dd->ipath_ibcctrl >>
@@ -1498,8 +1308,6 @@ int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd)
 		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold);
-
 /**
  * ipath_layer_set_overrunthreshold - set the overrun threshold
  * @dd: the infinipath device
@@ -1525,17 +1333,13 @@ int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold);
-
 int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
 			      size_t namelen)
 {
 	return dd->ipath_f_get_boardname(dd, name, namelen);
 }
-EXPORT_SYMBOL_GPL(ipath_layer_get_boardname);
 
 u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd)
 {
 	return dd->ipath_rcvhdrentsize;
 }
-EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize);
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h
index 71485096fcaca..57c990a5715fc 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ b/drivers/infiniband/hw/ipath/ipath_layer.h
@@ -114,14 +114,7 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
 				      struct sk_buff *),
 			 u16 rcv_opcode,
 			 int (*l_rcv_lid)(void *, void *));
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
-			 void (*l_remove)(void *arg),
-			 int (*l_piobufavail)(void *arg),
-			 void (*l_rcv)(void *arg, void *rhdr,
-				       void *data, u32 tlen),
-			 void (*l_timer_cb)(void *arg));
 void ipath_layer_unregister(void);
-void ipath_verbs_unregister(void);
 int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
 u16 ipath_layer_get_lid(struct ipath_devdata *dd);
 int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
@@ -145,7 +138,6 @@ int ipath_layer_get_counters(struct ipath_devdata *dd,
 int ipath_layer_want_buffer(struct ipath_devdata *dd);
 int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid);
 __be64 ipath_layer_get_guid(struct ipath_devdata *);
-u32 ipath_layer_get_nguid(struct ipath_devdata *);
 u32 ipath_layer_get_majrev(struct ipath_devdata *);
 u32 ipath_layer_get_minrev(struct ipath_devdata *);
 u32 ipath_layer_get_pcirev(struct ipath_devdata *);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 5b8ee65c6cd34..15edec9227e43 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -368,7 +368,7 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
 }
 
 /**
- * ipath_ib_rcv - process and incoming packet
+ * ipath_ib_rcv - process an incoming packet
  * @arg: the device pointer
  * @rhdr: the header of the packet
  * @data: the packet data
@@ -377,9 +377,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
  * This is called from ipath_kreceive() to process an incoming packet at
  * interrupt level. Tlen is the length of the header + data + CRC in bytes.
  */
-static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
+void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
+		  u32 tlen)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ipath_ib_header *hdr = rhdr;
 	struct ipath_other_headers *ohdr;
 	struct ipath_qp *qp;
@@ -468,9 +468,8 @@ bail:;
  * This is called from ipath_do_rcv_timer() at interrupt level to check for
  * QPs which need retransmits and to collect performance numbers.
  */
-static void ipath_ib_timer(void *arg)
+void ipath_ib_timer(struct ipath_ibdev *dev)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ipath_qp *resend = NULL;
 	struct list_head *last;
 	struct ipath_qp *qp;
@@ -564,9 +563,8 @@ static void ipath_ib_timer(void *arg)
  * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
  * return zero).
  */
-static int ipath_ib_piobufavail(void *arg)
+int ipath_ib_piobufavail(struct ipath_ibdev *dev)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ipath_qp *qp;
 	unsigned long flags;
 
@@ -957,11 +955,10 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev);
 
 /**
  * ipath_register_ib_device - register our device with the infiniband core
- * @unit: the device number to register
  * @dd: the device data structure
  * Return the allocated ipath_ibdev pointer or NULL on error.
  */
-static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
+int ipath_register_ib_device(struct ipath_devdata *dd)
 {
 	struct ipath_layer_counters cntrs;
 	struct ipath_ibdev *idev;
@@ -969,8 +966,10 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 	int ret;
 
 	idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
-	if (idev == NULL)
+	if (idev == NULL) {
+		ret = -ENOMEM;
 		goto bail;
+	}
 
 	dev = &idev->ibdev;
 
@@ -1047,7 +1046,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 	if (!sys_image_guid)
 		sys_image_guid = ipath_layer_get_guid(dd);
 	idev->sys_image_guid = sys_image_guid;
-	idev->ib_unit = unit;
+	idev->ib_unit = dd->ipath_unit;
 	idev->dd = dd;
 
 	strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
@@ -1153,16 +1152,16 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 err_qp:
 	ib_dealloc_device(dev);
 	_VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
-		     unit, -ret);
+		     dd->ipath_unit, -ret);
 	idev = NULL;
 
 bail:
-	return idev;
+	dd->verbs_dev = idev;
+	return ret;
 }
 
-static void ipath_unregister_ib_device(void *arg)
+void ipath_unregister_ib_device(struct ipath_ibdev *dev)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ib_device *ibdev = &dev->ibdev;
 
 	ipath_layer_disable_timer(dev->dd);
@@ -1193,19 +1192,6 @@ static void ipath_unregister_ib_device(void *arg)
 	ib_dealloc_device(ibdev);
 }
 
-static int __init ipath_verbs_init(void)
-{
-	return ipath_verbs_register(ipath_register_ib_device,
-				    ipath_unregister_ib_device,
-				    ipath_ib_piobufavail, ipath_ib_rcv,
-				    ipath_ib_timer);
-}
-
-static void __exit ipath_verbs_cleanup(void)
-{
-	ipath_verbs_unregister();
-}
-
 static ssize_t show_rev(struct class_device *cdev, char *buf)
 {
 	struct ipath_ibdev *dev =
@@ -1297,6 +1283,3 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev)
 bail:
 	return ret;
 }
-
-module_init(ipath_verbs_init);
-module_exit(ipath_verbs_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index a9baa91014326..d6faa4ba6067b 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -711,6 +711,16 @@ int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
 int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
 		      u32 pmtu, u32 *bth0p, u32 *bth2p);
 
+int ipath_register_ib_device(struct ipath_devdata *);
+
+void ipath_unregister_ib_device(struct ipath_ibdev *);
+
+void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
+
+int ipath_ib_piobufavail(struct ipath_ibdev *);
+
+void ipath_ib_timer(struct ipath_ibdev *);
+
 extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
 
 extern const u8 ipath_cvt_physportstate[];
-- 
GitLab


From 34b2aafea38efdf02cd8107a6e1057e2a297c447 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:32 -0700
Subject: [PATCH 0747/1063] IB/ipath: simplify layering code

A lot of ipath layer code was only called in one place. Now that the
ipath_core and ib_ipath drivers are merged, it's more sensible to simply
inline the simple stuff that the layer code was doing.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_diag.c     |   1 -
 drivers/infiniband/hw/ipath/ipath_driver.c   | 275 ++++--
 drivers/infiniband/hw/ipath/ipath_file_ops.c |   1 -
 drivers/infiniband/hw/ipath/ipath_intr.c     |   7 -
 drivers/infiniband/hw/ipath/ipath_kernel.h   |  15 +-
 drivers/infiniband/hw/ipath/ipath_layer.c    | 978 +------------------
 drivers/infiniband/hw/ipath/ipath_layer.h    | 104 --
 drivers/infiniband/hw/ipath/ipath_mad.c      | 339 ++++++-
 drivers/infiniband/hw/ipath/ipath_mr.c       |  12 +
 drivers/infiniband/hw/ipath/ipath_qp.c       |  34 +-
 drivers/infiniband/hw/ipath/ipath_rc.c       |   9 +-
 drivers/infiniband/hw/ipath/ipath_ruc.c      |  22 +-
 drivers/infiniband/hw/ipath/ipath_sysfs.c    |   6 +-
 drivers/infiniband/hw/ipath/ipath_uc.c       |   5 +-
 drivers/infiniband/hw/ipath/ipath_ud.c       |  13 +-
 drivers/infiniband/hw/ipath/ipath_verbs.c    | 525 +++++++++-
 drivers/infiniband/hw/ipath/ipath_verbs.h    | 109 ++-
 17 files changed, 1126 insertions(+), 1329 deletions(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 147dd89e21c90..5d77a74aa57b1 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -45,7 +45,6 @@
 #include <asm/uaccess.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 int ipath_diag_inuse;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 9af7406d6a623..958cc9b33c8fc 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -39,7 +39,6 @@
 #include <linux/vmalloc.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
@@ -508,7 +507,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	ipathfs_add_device(dd);
 	ipath_user_add(dd);
 	ipath_diag_add(dd);
-	ipath_layer_add(dd);
 	ipath_register_ib_device(dd);
 
 	goto bail;
@@ -539,7 +537,6 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
 
 	dd = pci_get_drvdata(pdev);
 	ipath_unregister_ib_device(dd->verbs_dev);
-	ipath_layer_remove(dd);
 	ipath_diag_remove(dd);
 	ipath_user_remove(dd);
 	ipathfs_remove_device(dd);
@@ -614,11 +611,12 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
  *
  * wait up to msecs milliseconds for IB link state change to occur for
  * now, take the easy polling route.  Currently used only by
- * ipath_layer_set_linkstate.  Returns 0 if state reached, otherwise
+ * ipath_set_linkstate.  Returns 0 if state reached, otherwise
  * -ETIMEDOUT state can have multiple states set, for any of several
  * transitions.
  */
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
+static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
+				int msecs)
 {
 	dd->ipath_sma_state_wanted = state;
 	wait_event_interruptible_timeout(ipath_sma_state_wait,
@@ -814,58 +812,6 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
 	return skb;
 }
 
-/**
- * ipath_rcv_layer - receive a packet for the layered (ethernet) driver
- * @dd: the infinipath device
- * @etail: the sk_buff number
- * @tlen: the total packet length
- * @hdr: the ethernet header
- *
- * Separate routine for better overall optimization
- */
-static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail,
-			    u32 tlen, struct ether_header *hdr)
-{
-	u32 elen;
-	u8 pad, *bthbytes;
-	struct sk_buff *skb, *nskb;
-
-	if (dd->ipath_port0_skbs &&
-			hdr->sub_opcode == IPATH_ITH4X_OPCODE_ENCAP) {
-		/*
-		 * Allocate a new sk_buff to replace the one we give
-		 * to the network stack.
-		 */
-		nskb = ipath_alloc_skb(dd, GFP_ATOMIC);
-		if (!nskb) {
-			/* count OK packets that we drop */
-			ipath_stats.sps_krdrops++;
-			return;
-		}
-
-		bthbytes = (u8 *) hdr->bth;
-		pad = (bthbytes[1] >> 4) & 3;
-		/* +CRC32 */
-		elen = tlen - (sizeof(*hdr) + pad + sizeof(u32));
-
-		skb = dd->ipath_port0_skbs[etail];
-		dd->ipath_port0_skbs[etail] = nskb;
-		skb_put(skb, elen);
-
-		dd->ipath_f_put_tid(dd, etail + (u64 __iomem *)
-				    ((char __iomem *) dd->ipath_kregbase
-				     + dd->ipath_rcvegrbase), 0,
-				    virt_to_phys(nskb->data));
-
-		__ipath_layer_rcv(dd, hdr, skb);
-
-		/* another ether packet received */
-		ipath_stats.sps_ether_rpkts++;
-	}
-	else if (hdr->sub_opcode == IPATH_ITH4X_OPCODE_LID_ARP)
-		__ipath_layer_rcv_lid(dd, hdr);
-}
-
 static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
 			     u32 eflags,
 			     u32 l,
@@ -979,22 +925,17 @@ void ipath_kreceive(struct ipath_devdata *dd)
 		if (unlikely(eflags))
 			ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
 		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-				ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf,
-					     tlen);
-				if (dd->ipath_lli_counter)
-					dd->ipath_lli_counter--;
-
-		} else if (etype == RCVHQ_RCV_TYPE_EAGER) {
-			if (qp == IPATH_KD_QP &&
-			    bthbytes[0] == ipath_layer_rcv_opcode &&
-			    ebuf)
-				ipath_rcv_layer(dd, etail, tlen,
-						(struct ether_header *)hdr);
-			else
-				ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
-					   "qp=%x), len %x; ignored\n",
-					   etype, bthbytes[0], qp, tlen);
+			ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
+			if (dd->ipath_lli_counter)
+				dd->ipath_lli_counter--;
+			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+				   "qp=%x), len %x; ignored\n",
+				   etype, bthbytes[0], qp, tlen);
 		}
+		else if (etype == RCVHQ_RCV_TYPE_EAGER)
+			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+				   "qp=%x), len %x; ignored\n",
+				   etype, bthbytes[0], qp, tlen);
 		else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
 			ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
 				  be32_to_cpu(hdr->bth[0]) & 0xff);
@@ -1320,13 +1261,6 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
 		goto bail;
 	}
 
-	if (updated)
-		/*
-		 * ran out of bufs, now some (at least this one we just
-		 * got) are now available, so tell the layered driver.
-		 */
-		__ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-
 	/*
 	 * set next starting place.  Since it's just an optimization,
 	 * it doesn't matter who wins on this, so no locking
@@ -1503,7 +1437,7 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
 	return ret;
 }
 
-void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
+static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 {
 	static const char *what[4] = {
 		[0] = "DOWN",
@@ -1537,6 +1471,180 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 			 dd->ipath_ibcctrl | which);
 }
 
+int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+{
+	u32 lstate;
+	int ret;
+
+	switch (newstate) {
+	case IPATH_IB_LINKDOWN:
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKDOWN_SLEEP:
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKDOWN_DISABLE:
+		ipath_set_ib_lstate(dd,
+				    INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKINIT:
+		if (dd->ipath_flags & IPATH_LINKINIT) {
+			ret = 0;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		lstate = IPATH_LINKINIT;
+		break;
+
+	case IPATH_IB_LINKARM:
+		if (dd->ipath_flags & IPATH_LINKARMED) {
+			ret = 0;
+			goto bail;
+		}
+		if (!(dd->ipath_flags &
+		      (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
+			ret = -EINVAL;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		/*
+		 * Since the port can transition to ACTIVE by receiving
+		 * a non VL 15 packet, wait for either state.
+		 */
+		lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
+		break;
+
+	case IPATH_IB_LINKACTIVE:
+		if (dd->ipath_flags & IPATH_LINKACTIVE) {
+			ret = 0;
+			goto bail;
+		}
+		if (!(dd->ipath_flags & IPATH_LINKARMED)) {
+			ret = -EINVAL;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		lstate = IPATH_LINKACTIVE;
+		break;
+
+	default:
+		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
+		ret = -EINVAL;
+		goto bail;
+	}
+	ret = ipath_wait_linkstate(dd, lstate, 2000);
+
+bail:
+	return ret;
+}
+
+/**
+ * ipath_set_mtu - set the MTU
+ * @dd: the infinipath device
+ * @arg: the new MTU
+ *
+ * we can handle "any" incoming size, the issue here is whether we
+ * need to restrict our outgoing size.   For now, we don't do any
+ * sanity checking on this, and we don't deal with what happens to
+ * programs that are already running when the size changes.
+ * NOTE: changing the MTU will usually cause the IBC to go back to
+ * link initialize (IPATH_IBSTATE_INIT) state...
+ */
+int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
+{
+	u32 piosize;
+	int changed = 0;
+	int ret;
+
+	/*
+	 * mtu is IB data payload max.  It's the largest power of 2 less
+	 * than piosize (or even larger, since it only really controls the
+	 * largest we can receive; we can send the max of the mtu and
+	 * piosize).  We check that it's one of the valid IB sizes.
+	 */
+	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
+	    arg != 4096) {
+		ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
+		ret = -EINVAL;
+		goto bail;
+	}
+	if (dd->ipath_ibmtu == arg) {
+		ret = 0;        /* same as current */
+		goto bail;
+	}
+
+	piosize = dd->ipath_ibmaxlen;
+	dd->ipath_ibmtu = arg;
+
+	if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
+		/* Only if it's not the initial value (or reset to it) */
+		if (piosize != dd->ipath_init_ibmaxlen) {
+			dd->ipath_ibmaxlen = piosize;
+			changed = 1;
+		}
+	} else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
+		piosize = arg + IPATH_PIO_MAXIBHDR;
+		ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
+			   "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
+			   arg);
+		dd->ipath_ibmaxlen = piosize;
+		changed = 1;
+	}
+
+	if (changed) {
+		/*
+		 * set the IBC maxpktlength to the size of our pio
+		 * buffers in words
+		 */
+		u64 ibc = dd->ipath_ibcctrl;
+		ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
+			 INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
+
+		piosize = piosize - 2 * sizeof(u32);    /* ignore pbc */
+		dd->ipath_ibmaxlen = piosize;
+		piosize /= sizeof(u32); /* in words */
+		/*
+		 * for ICRC, which we only send in diag test pkt mode, and
+		 * we don't need to worry about that for mtu
+		 */
+		piosize += 1;
+
+		ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+		dd->ipath_ibcctrl = ibc;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+		dd->ipath_f_tidtemplate(dd);
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
+{
+	dd->ipath_lid = arg;
+	dd->ipath_lmc = lmc;
+
+	return 0;
+}
+
 /**
  * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
  * @dd: the infinipath device
@@ -1640,13 +1748,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
 	ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
 			    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
 
-	/*
-	 * we are shutting down, so tell the layered driver.  We don't do
-	 * this on just a link state change, much like ethernet, a cable
-	 * unplug, etc. doesn't change driver state
-	 */
-	ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN);
-
 	/* disable IBC */
 	dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index e999a46bef9be..f865ce89b73f1 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -39,7 +39,6 @@
 #include <asm/pgtable.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 static int ipath_open(struct inode *, struct file *);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index ed54f8f2945eb..250e2a9f01bb6 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -34,7 +34,6 @@
 #include <linux/pci.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
@@ -290,8 +289,6 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 		*dd->ipath_statusp |=
 			IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
 		dd->ipath_f_setextled(dd, lstate, ltstate);
-
-		__ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
 	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
 		/*
 		 * set INIT and DOWN.  Down is checked by most of the other
@@ -709,10 +706,6 @@ static void handle_layer_pioavail(struct ipath_devdata *dd)
 {
 	int ret;
 
-	ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-	if (ret > 0)
-		goto set;
-
 	ret = ipath_ib_piobufavail(dd->verbs_dev);
 	if (ret > 0)
 		goto set;
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index f1931105adb37..999249b7f27f8 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -518,16 +518,6 @@ extern struct list_head ipath_dev_list;
 extern spinlock_t ipath_devs_lock;
 extern struct ipath_devdata *ipath_lookup(int unit);
 
-extern u16 ipath_layer_rcv_opcode;
-extern int __ipath_layer_intr(struct ipath_devdata *, u32);
-extern int ipath_layer_intr(struct ipath_devdata *, u32);
-extern int __ipath_layer_rcv(struct ipath_devdata *, void *,
-			     struct sk_buff *);
-extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *);
-
-void ipath_layer_add(struct ipath_devdata *);
-void ipath_layer_remove(struct ipath_devdata *);
-
 int ipath_init_chip(struct ipath_devdata *, int);
 int ipath_enable_wc(struct ipath_devdata *dd);
 void ipath_disable_wc(struct ipath_devdata *dd);
@@ -575,12 +565,13 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
 
 int ipath_parse_ushort(const char *str, unsigned short *valp);
 
-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-void ipath_set_ib_lstate(struct ipath_devdata *, int);
 void ipath_kreceive(struct ipath_devdata *);
 int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
 int ipath_reset_device(int);
 void ipath_get_faststats(unsigned long);
+int ipath_set_linkstate(struct ipath_devdata *, u8);
+int ipath_set_mtu(struct ipath_devdata *, u16);
+int ipath_set_lid(struct ipath_devdata *, u32, u8);
 
 /* for use in system calls, where we want to know device type, etc. */
 #define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
index acc32200cc0e8..10f578e2aed62 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -101,242 +101,14 @@ int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr)
 	return ret;
 }
 
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+void ipath_layer_lid_changed(struct ipath_devdata *dd)
 {
-	u32 lstate;
-	int ret;
-
-	switch (newstate) {
-	case IPATH_IB_LINKDOWN:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN_SLEEP:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN_DISABLE:
-		ipath_set_ib_lstate(dd,
-				    INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKINIT:
-		if (dd->ipath_flags & IPATH_LINKINIT) {
-			ret = 0;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
-		lstate = IPATH_LINKINIT;
-		break;
-
-	case IPATH_IB_LINKARM:
-		if (dd->ipath_flags & IPATH_LINKARMED) {
-			ret = 0;
-			goto bail;
-		}
-		if (!(dd->ipath_flags &
-		      (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
-			ret = -EINVAL;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
-		/*
-		 * Since the port can transition to ACTIVE by receiving
-		 * a non VL 15 packet, wait for either state.
-		 */
-		lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
-		break;
-
-	case IPATH_IB_LINKACTIVE:
-		if (dd->ipath_flags & IPATH_LINKACTIVE) {
-			ret = 0;
-			goto bail;
-		}
-		if (!(dd->ipath_flags & IPATH_LINKARMED)) {
-			ret = -EINVAL;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
-		lstate = IPATH_LINKACTIVE;
-		break;
-
-	default:
-		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
-		ret = -EINVAL;
-		goto bail;
-	}
-	ret = ipath_wait_linkstate(dd, lstate, 2000);
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_layer_set_mtu - set the MTU
- * @dd: the infinipath device
- * @arg: the new MTU
- *
- * we can handle "any" incoming size, the issue here is whether we
- * need to restrict our outgoing size.   For now, we don't do any
- * sanity checking on this, and we don't deal with what happens to
- * programs that are already running when the size changes.
- * NOTE: changing the MTU will usually cause the IBC to go back to
- * link initialize (IPATH_IBSTATE_INIT) state...
- */
-int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg)
-{
-	u32 piosize;
-	int changed = 0;
-	int ret;
-
-	/*
-	 * mtu is IB data payload max.  It's the largest power of 2 less
-	 * than piosize (or even larger, since it only really controls the
-	 * largest we can receive; we can send the max of the mtu and
-	 * piosize).  We check that it's one of the valid IB sizes.
-	 */
-	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
-	    arg != 4096) {
-		ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
-		ret = -EINVAL;
-		goto bail;
-	}
-	if (dd->ipath_ibmtu == arg) {
-		ret = 0;	/* same as current */
-		goto bail;
-	}
-
-	piosize = dd->ipath_ibmaxlen;
-	dd->ipath_ibmtu = arg;
-
-	if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
-		/* Only if it's not the initial value (or reset to it) */
-		if (piosize != dd->ipath_init_ibmaxlen) {
-			dd->ipath_ibmaxlen = piosize;
-			changed = 1;
-		}
-	} else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
-		piosize = arg + IPATH_PIO_MAXIBHDR;
-		ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
-			   "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
-			   arg);
-		dd->ipath_ibmaxlen = piosize;
-		changed = 1;
-	}
-
-	if (changed) {
-		/*
-		 * set the IBC maxpktlength to the size of our pio
-		 * buffers in words
-		 */
-		u64 ibc = dd->ipath_ibcctrl;
-		ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
-			 INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
-
-		piosize = piosize - 2 * sizeof(u32);	/* ignore pbc */
-		dd->ipath_ibmaxlen = piosize;
-		piosize /= sizeof(u32);	/* in words */
-		/*
-		 * for ICRC, which we only send in diag test pkt mode, and
-		 * we don't need to worry about that for mtu
-		 */
-		piosize += 1;
-
-		ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-		dd->ipath_ibcctrl = ibc;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-		dd->ipath_f_tidtemplate(dd);
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
-{
-	dd->ipath_lid = arg;
-	dd->ipath_lmc = lmc;
-
 	mutex_lock(&ipath_layer_mutex);
 
 	if (dd->ipath_layer.l_arg && layer_intr)
 		layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID);
 
 	mutex_unlock(&ipath_layer_mutex);
-
-	return 0;
-}
-
-int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid)
-{
-	/* XXX - need to inform anyone who cares this just happened. */
-	dd->ipath_guid = guid;
-	return 0;
-}
-
-__be64 ipath_layer_get_guid(struct ipath_devdata *dd)
-{
-	return dd->ipath_guid;
-}
-
-u32 ipath_layer_get_majrev(struct ipath_devdata *dd)
-{
-	return dd->ipath_majrev;
-}
-
-u32 ipath_layer_get_minrev(struct ipath_devdata *dd)
-{
-	return dd->ipath_minrev;
-}
-
-u32 ipath_layer_get_pcirev(struct ipath_devdata *dd)
-{
-	return dd->ipath_pcirev;
-}
-
-u32 ipath_layer_get_flags(struct ipath_devdata *dd)
-{
-	return dd->ipath_flags;
-}
-
-struct device *ipath_layer_get_device(struct ipath_devdata *dd)
-{
-	return &dd->pcidev->dev;
-}
-
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd)
-{
-	return dd->ipath_deviceid;
-}
-
-u32 ipath_layer_get_vendorid(struct ipath_devdata *dd)
-{
-	return dd->ipath_vendorid;
-}
-
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd)
-{
-	return dd->ipath_lastibcstat;
-}
-
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd)
-{
-	return dd->ipath_ibmtu;
 }
 
 void ipath_layer_add(struct ipath_devdata *dd)
@@ -436,22 +208,6 @@ void ipath_layer_unregister(void)
 
 EXPORT_SYMBOL_GPL(ipath_layer_unregister);
 
-static void __ipath_verbs_timer(unsigned long arg)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *) arg;
-
-	/*
-	 * If port 0 receive packet interrupts are not available, or
-	 * can be missed, poll the receive queue
-	 */
-	if (dd->ipath_flags & IPATH_POLL_RX_INTR)
-		ipath_kreceive(dd);
-
-	/* Handle verbs layer timeouts. */
-	ipath_ib_timer(dd->verbs_dev);
-	mod_timer(&dd->verbs_timer, jiffies + 1);
-}
-
 int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
 {
 	int ret;
@@ -540,380 +296,6 @@ u16 ipath_layer_get_bcast(struct ipath_devdata *dd)
 
 EXPORT_SYMBOL_GPL(ipath_layer_get_bcast);
 
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd)
-{
-	return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-}
-
-static void update_sge(struct ipath_sge_state *ss, u32 length)
-{
-	struct ipath_sge *sge = &ss->sge;
-
-	sge->vaddr += length;
-	sge->length -= length;
-	sge->sge_length -= length;
-	if (sge->sge_length == 0) {
-		if (--ss->num_sge)
-			*sge = *ss->sg_list++;
-	} else if (sge->length == 0 && sge->mr != NULL) {
-		if (++sge->n >= IPATH_SEGSZ) {
-			if (++sge->m >= sge->mr->mapsz)
-				return;
-			sge->n = 0;
-		}
-		sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
-		sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
-	}
-}
-
-#ifdef __LITTLE_ENDIAN
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-	return data >> shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-	return data << shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-	data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
-	data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-	return data;
-}
-#else
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-	return data << shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-	return data >> shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-	data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
-	data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-	return data;
-}
-#endif
-
-static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
-		    u32 length)
-{
-	u32 extra = 0;
-	u32 data = 0;
-	u32 last;
-
-	while (1) {
-		u32 len = ss->sge.length;
-		u32 off;
-
-		BUG_ON(len == 0);
-		if (len > length)
-			len = length;
-		if (len > ss->sge.sge_length)
-			len = ss->sge.sge_length;
-		/* If the source address is not aligned, try to align it. */
-		off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
-		if (off) {
-			u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
-					    ~(sizeof(u32) - 1));
-			u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
-			u32 y;
-
-			y = sizeof(u32) - off;
-			if (len > y)
-				len = y;
-			if (len + extra >= sizeof(u32)) {
-				data |= set_upper_bits(v, extra *
-						       BITS_PER_BYTE);
-				len = sizeof(u32) - extra;
-				if (len == length) {
-					last = data;
-					break;
-				}
-				__raw_writel(data, piobuf);
-				piobuf++;
-				extra = 0;
-				data = 0;
-			} else {
-				/* Clear unused upper bytes */
-				data |= clear_upper_bytes(v, len, extra);
-				if (len == length) {
-					last = data;
-					break;
-				}
-				extra += len;
-			}
-		} else if (extra) {
-			/* Source address is aligned. */
-			u32 *addr = (u32 *) ss->sge.vaddr;
-			int shift = extra * BITS_PER_BYTE;
-			int ushift = 32 - shift;
-			u32 l = len;
-
-			while (l >= sizeof(u32)) {
-				u32 v = *addr;
-
-				data |= set_upper_bits(v, shift);
-				__raw_writel(data, piobuf);
-				data = get_upper_bits(v, ushift);
-				piobuf++;
-				addr++;
-				l -= sizeof(u32);
-			}
-			/*
-			 * We still have 'extra' number of bytes leftover.
-			 */
-			if (l) {
-				u32 v = *addr;
-
-				if (l + extra >= sizeof(u32)) {
-					data |= set_upper_bits(v, shift);
-					len -= l + extra - sizeof(u32);
-					if (len == length) {
-						last = data;
-						break;
-					}
-					__raw_writel(data, piobuf);
-					piobuf++;
-					extra = 0;
-					data = 0;
-				} else {
-					/* Clear unused upper bytes */
-					data |= clear_upper_bytes(v, l,
-								  extra);
-					if (len == length) {
-						last = data;
-						break;
-					}
-					extra += l;
-				}
-			} else if (len == length) {
-				last = data;
-				break;
-			}
-		} else if (len == length) {
-			u32 w;
-
-			/*
-			 * Need to round up for the last dword in the
-			 * packet.
-			 */
-			w = (len + 3) >> 2;
-			__iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
-			piobuf += w - 1;
-			last = ((u32 *) ss->sge.vaddr)[w - 1];
-			break;
-		} else {
-			u32 w = len >> 2;
-
-			__iowrite32_copy(piobuf, ss->sge.vaddr, w);
-			piobuf += w;
-
-			extra = len & (sizeof(u32) - 1);
-			if (extra) {
-				u32 v = ((u32 *) ss->sge.vaddr)[w];
-
-				/* Clear unused upper bytes */
-				data = clear_upper_bytes(v, extra, 0);
-			}
-		}
-		update_sge(ss, len);
-		length -= len;
-	}
-	/* Update address before sending packet. */
-	update_sge(ss, length);
-	/* must flush early everything before trigger word */
-	ipath_flush_wc();
-	__raw_writel(last, piobuf);
-	/* be sure trigger word is written */
-	ipath_flush_wc();
-}
-
-/**
- * ipath_verbs_send - send a packet from the verbs layer
- * @dd: the infinipath device
- * @hdrwords: the number of words in the header
- * @hdr: the packet header
- * @len: the length of the packet in bytes
- * @ss: the SGE to send
- *
- * This is like ipath_sma_send_pkt() in that we need to be able to send
- * packets after the chip is initialized (MADs) but also like
- * ipath_layer_send_hdr() since its used by the verbs layer.
- */
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
-		     u32 *hdr, u32 len, struct ipath_sge_state *ss)
-{
-	u32 __iomem *piobuf;
-	u32 plen;
-	int ret;
-
-	/* +1 is for the qword padding of pbc */
-	plen = hdrwords + ((len + 3) >> 2) + 1;
-	if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
-		ipath_dbg("packet len 0x%x too long, failing\n", plen);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/* Get a PIO buffer to use. */
-	piobuf = ipath_getpiobuf(dd, NULL);
-	if (unlikely(piobuf == NULL)) {
-		ret = -EBUSY;
-		goto bail;
-	}
-
-	/*
-	 * Write len to control qword, no flags.
-	 * We have to flush after the PBC for correctness on some cpus
-	 * or WC buffer can be written out of order.
-	 */
-	writeq(plen, piobuf);
-	ipath_flush_wc();
-	piobuf += 2;
-	if (len == 0) {
-		/*
-		 * If there is just the header portion, must flush before
-		 * writing last word of header for correctness, and after
-		 * the last header word (trigger word).
-		 */
-		__iowrite32_copy(piobuf, hdr, hdrwords - 1);
-		ipath_flush_wc();
-		__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
-		ipath_flush_wc();
-		ret = 0;
-		goto bail;
-	}
-
-	__iowrite32_copy(piobuf, hdr, hdrwords);
-	piobuf += hdrwords;
-
-	/* The common case is aligned and contained in one segment. */
-	if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
-		   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
-		u32 w;
-		u32 *addr = (u32 *) ss->sge.vaddr;
-
-		/* Update address before sending packet. */
-		update_sge(ss, len);
-		/* Need to round up for the last dword in the packet. */
-		w = (len + 3) >> 2;
-		__iowrite32_copy(piobuf, addr, w - 1);
-		/* must flush early everything before trigger word */
-		ipath_flush_wc();
-		__raw_writel(addr[w - 1], piobuf + w - 1);
-		/* be sure trigger word is written */
-		ipath_flush_wc();
-		ret = 0;
-		goto bail;
-	}
-	copy_io(piobuf, ss, len);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-				  u64 *rwords, u64 *spkts, u64 *rpkts,
-				  u64 *xmit_wait)
-{
-	int ret;
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
-		ret = -EINVAL;
-		goto bail;
-	}
-	*swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-	*rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-	*spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-	*rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	*xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_layer_get_counters - get various chip counters
- * @dd: the infinipath device
- * @cntrs: counters are placed here
- *
- * Return the counters needed by recv_pma_get_portcounters().
- */
-int ipath_layer_get_counters(struct ipath_devdata *dd,
-			      struct ipath_layer_counters *cntrs)
-{
-	int ret;
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
-		ret = -EINVAL;
-		goto bail;
-	}
-	cntrs->symbol_error_counter =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
-	cntrs->link_error_recovery_counter =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
-	/*
-	 * The link downed counter counts when the other side downs the
-	 * connection.  We add in the number of times we downed the link
-	 * due to local link integrity errors to compensate.
-	 */
-	cntrs->link_downed_counter =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
-	cntrs->port_rcv_errors =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
-	cntrs->port_rcv_remphys_errors =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
-	cntrs->port_xmit_discards =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
-	cntrs->port_xmit_data =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-	cntrs->port_rcv_data =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-	cntrs->port_xmit_packets =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-	cntrs->port_rcv_packets =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
-	cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int ipath_layer_want_buffer(struct ipath_devdata *dd)
-{
-	set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 dd->ipath_sendctrl);
-
-	return 0;
-}
-
 int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
 {
 	int ret = 0;
@@ -985,361 +367,3 @@ int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd)
 }
 
 EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int);
-
-int ipath_layer_enable_timer(struct ipath_devdata *dd)
-{
-	/*
-	 * HT-400 has a design flaw where the chip and kernel idea
-	 * of the tail register don't always agree, and therefore we won't
-	 * get an interrupt on the next packet received.
-	 * If the board supports per packet receive interrupts, use it.
-	 * Otherwise, the timer function periodically checks for packets
-	 * to cover this case.
-	 * Either way, the timer is needed for verbs layer related
-	 * processing.
-	 */
-	if (dd->ipath_flags & IPATH_GPIO_INTR) {
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
-				 0x2074076542310ULL);
-		/* Enable GPIO bit 2 interrupt */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-				 (u64) (1 << 2));
-	}
-
-	init_timer(&dd->verbs_timer);
-	dd->verbs_timer.function = __ipath_verbs_timer;
-	dd->verbs_timer.data = (unsigned long)dd;
-	dd->verbs_timer.expires = jiffies + 1;
-	add_timer(&dd->verbs_timer);
-
-	return 0;
-}
-
-int ipath_layer_disable_timer(struct ipath_devdata *dd)
-{
-	/* Disable GPIO bit 2 interrupt */
-	if (dd->ipath_flags & IPATH_GPIO_INTR)
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
-
-	del_timer_sync(&dd->verbs_timer);
-
-	return 0;
-}
-
-/**
- * ipath_layer_set_verbs_flags - set the verbs layer flags
- * @dd: the infinipath device
- * @flags: the flags to set
- */
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags)
-{
-	struct ipath_devdata *ss;
-	unsigned long lflags;
-
-	spin_lock_irqsave(&ipath_devs_lock, lflags);
-
-	list_for_each_entry(ss, &ipath_dev_list, ipath_list) {
-		if (!(ss->ipath_flags & IPATH_INITTED))
-			continue;
-		if ((flags & IPATH_VERBS_KERNEL_SMA) &&
-		    !(*ss->ipath_statusp & IPATH_STATUS_SMA))
-			*ss->ipath_statusp |= IPATH_STATUS_OIB_SMA;
-		else
-			*ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, lflags);
-
-	return 0;
-}
-
-/**
- * ipath_layer_get_npkeys - return the size of the PKEY table for port 0
- * @dd: the infinipath device
- */
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd)
-{
-	return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
-}
-
-/**
- * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table
- * @dd: the infinipath device
- * @index: the PKEY index
- */
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index)
-{
-	unsigned ret;
-
-	if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
-		ret = 0;
-	else
-		ret = dd->ipath_pd[0]->port_pkeys[index];
-
-	return ret;
-}
-
-/**
- * ipath_layer_get_pkeys - return the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the pkey table is placed here
- */
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-	struct ipath_portdata *pd = dd->ipath_pd[0];
-
-	memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
-
-	return 0;
-}
-
-/**
- * rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the infinipath device
- * @key: the PKEY index
- *
- * Return true if this was the last reference and the hardware table entry
- * needs to be changed.
- */
-static int rm_pkey(struct ipath_devdata *dd, u16 key)
-{
-	int i;
-	int ret;
-
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (dd->ipath_pkeys[i] != key)
-			continue;
-		if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
-			dd->ipath_pkeys[i] = 0;
-			ret = 1;
-			goto bail;
-		}
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * add_pkey - add the given PKEY to the hardware table
- * @dd: the infinipath device
- * @key: the PKEY
- *
- * Return an error code if unable to add the entry, zero if no change,
- * or 1 if the hardware PKEY register needs to be updated.
- */
-static int add_pkey(struct ipath_devdata *dd, u16 key)
-{
-	int i;
-	u16 lkey = key & 0x7FFF;
-	int any = 0;
-	int ret;
-
-	if (lkey == 0x7FFF) {
-		ret = 0;
-		goto bail;
-	}
-
-	/* Look for an empty slot or a matching PKEY. */
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i]) {
-			any++;
-			continue;
-		}
-		/* If it matches exactly, try to increment the ref count */
-		if (dd->ipath_pkeys[i] == key) {
-			if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
-				ret = 0;
-				goto bail;
-			}
-			/* Lost the race. Look for an empty slot below. */
-			atomic_dec(&dd->ipath_pkeyrefs[i]);
-			any++;
-		}
-		/*
-		 * It makes no sense to have both the limited and unlimited
-		 * PKEY set at the same time since the unlimited one will
-		 * disable the limited one.
-		 */
-		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-			ret = -EEXIST;
-			goto bail;
-		}
-	}
-	if (!any) {
-		ret = -EBUSY;
-		goto bail;
-	}
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i] &&
-		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-			/* for ipathstats, etc. */
-			ipath_stats.sps_pkeys[i] = lkey;
-			dd->ipath_pkeys[i] = key;
-			ret = 1;
-			goto bail;
-		}
-	}
-	ret = -EBUSY;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_layer_set_pkeys - set the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the PKEY table
- */
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-	struct ipath_portdata *pd;
-	int i;
-	int changed = 0;
-
-	pd = dd->ipath_pd[0];
-
-	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-		u16 key = pkeys[i];
-		u16 okey = pd->port_pkeys[i];
-
-		if (key == okey)
-			continue;
-		/*
-		 * The value of this PKEY table entry is changing.
-		 * Remove the old entry in the hardware's array of PKEYs.
-		 */
-		if (okey & 0x7FFF)
-			changed |= rm_pkey(dd, okey);
-		if (key & 0x7FFF) {
-			int ret = add_pkey(dd, key);
-
-			if (ret < 0)
-				key = 0;
-			else
-				changed |= ret;
-		}
-		pd->port_pkeys[i] = key;
-	}
-	if (changed) {
-		u64 pkey;
-
-		pkey = (u64) dd->ipath_pkeys[0] |
-			((u64) dd->ipath_pkeys[1] << 16) |
-			((u64) dd->ipath_pkeys[2] << 32) |
-			((u64) dd->ipath_pkeys[3] << 48);
-		ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
-			   (unsigned long long) pkey);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-				 pkey);
-	}
-	return 0;
-}
-
-/**
- * ipath_layer_get_linkdowndefaultstate - get the default linkdown state
- * @dd: the infinipath device
- *
- * Returns zero if the default is POLL, 1 if the default is SLEEP.
- */
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd)
-{
-	return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
-}
-
-/**
- * ipath_layer_set_linkdowndefaultstate - set the default linkdown state
- * @dd: the infinipath device
- * @sleep: the new state
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
-					 int sleep)
-{
-	if (sleep)
-		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-	else
-		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-			 dd->ipath_ibcctrl);
-	return 0;
-}
-
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd)
-{
-	return (dd->ipath_ibcctrl >>
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-}
-
-/**
- * ipath_layer_set_phyerrthreshold - set the physical error threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
-{
-	unsigned v;
-
-	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-	if (v != n) {
-		dd->ipath_ibcctrl &=
-			~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
-			  INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
-		dd->ipath_ibcctrl |=
-			(u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-	}
-	return 0;
-}
-
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd)
-{
-	return (dd->ipath_ibcctrl >>
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-}
-
-/**
- * ipath_layer_set_overrunthreshold - set the overrun threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
-{
-	unsigned v;
-
-	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-	if (v != n) {
-		dd->ipath_ibcctrl &=
-			~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
-			  INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
-		dd->ipath_ibcctrl |=
-			(u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-	}
-	return 0;
-}
-
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
-			      size_t namelen)
-{
-	return dd->ipath_f_get_boardname(dd, name, namelen);
-}
-
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd)
-{
-	return dd->ipath_rcvhdrentsize;
-}
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h
index 57c990a5715fc..4a27ede49941b 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ b/drivers/infiniband/hw/ipath/ipath_layer.h
@@ -40,73 +40,9 @@
  */
 
 struct sk_buff;
-struct ipath_sge_state;
 struct ipath_devdata;
 struct ether_header;
 
-struct ipath_layer_counters {
-	u64 symbol_error_counter;
-	u64 link_error_recovery_counter;
-	u64 link_downed_counter;
-	u64 port_rcv_errors;
-	u64 port_rcv_remphys_errors;
-	u64 port_xmit_discards;
-	u64 port_xmit_data;
-	u64 port_rcv_data;
-	u64 port_xmit_packets;
-	u64 port_rcv_packets;
-	u32 local_link_integrity_errors;
-	u32 excessive_buffer_overrun_errors;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
-	struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
-	u64 user_base;		/* User's address for this region */
-	u64 iova;		/* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;		/* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;		/* number of ipath_segs in all the arrays */
-	u32 mapsz;		/* size of the map array */
-	struct ipath_segarray *map[0];	/* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
-	struct ipath_mregion *mr;
-	void *vaddr;		/* current pointer into the segment */
-	u32 sge_length;		/* length of the SGE */
-	u32 length;		/* remaining length of the segment */
-	u16 m;			/* current index: mr->map[m] */
-	u16 n;			/* current index: mr->map[m]->segs[n] */
-};
-
-struct ipath_sge_state {
-	struct ipath_sge *sg_list;	/* next SGE to be used if any */
-	struct ipath_sge sge;	/* progress state for the current SGE */
-	u8 num_sge;
-};
-
 int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
 			 void (*l_remove)(void *),
 			 int (*l_intr)(void *, u32),
@@ -119,49 +55,9 @@ int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
 u16 ipath_layer_get_lid(struct ipath_devdata *dd);
 int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
 u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd);
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state);
-int ipath_layer_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_lid(struct ipath_devdata *, u32, u8);
 int ipath_layer_send_hdr(struct ipath_devdata *dd,
 			 struct ether_header *hdr);
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
-		     u32 * hdr, u32 len, struct ipath_sge_state *ss);
 int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
-			      size_t namelen);
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-				  u64 *rwords, u64 *spkts, u64 *rpkts,
-				  u64 *xmit_wait);
-int ipath_layer_get_counters(struct ipath_devdata *dd,
-			     struct ipath_layer_counters *cntrs);
-int ipath_layer_want_buffer(struct ipath_devdata *dd);
-int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid);
-__be64 ipath_layer_get_guid(struct ipath_devdata *);
-u32 ipath_layer_get_majrev(struct ipath_devdata *);
-u32 ipath_layer_get_minrev(struct ipath_devdata *);
-u32 ipath_layer_get_pcirev(struct ipath_devdata *);
-u32 ipath_layer_get_flags(struct ipath_devdata *dd);
-struct device *ipath_layer_get_device(struct ipath_devdata *dd);
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd);
-u32 ipath_layer_get_vendorid(struct ipath_devdata *);
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd);
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd);
-int ipath_layer_enable_timer(struct ipath_devdata *dd);
-int ipath_layer_disable_timer(struct ipath_devdata *dd);
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags);
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd);
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index);
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd);
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
-					 int sleep);
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n);
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n);
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
 
 /* ipath_ether interrupt values */
 #define IPATH_LAYER_INT_IF_UP 0x2
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index d3402341b7d0d..72d1db89db8f6 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -101,15 +101,15 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
 	nip->num_ports = ibdev->phys_port_cnt;
 	/* This is already in network order */
 	nip->sys_guid = to_idev(ibdev)->sys_image_guid;
-	nip->node_guid = ipath_layer_get_guid(dd);
+	nip->node_guid = dd->ipath_guid;
 	nip->port_guid = nip->sys_guid;
-	nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd));
-	nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd));
-	majrev = ipath_layer_get_majrev(dd);
-	minrev = ipath_layer_get_minrev(dd);
+	nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
+	nip->device_id = cpu_to_be16(dd->ipath_deviceid);
+	majrev = dd->ipath_majrev;
+	minrev = dd->ipath_minrev;
 	nip->revision = cpu_to_be32((majrev << 16) | minrev);
 	nip->local_port_num = port;
-	vendor = ipath_layer_get_vendorid(dd);
+	vendor = dd->ipath_vendorid;
 	nip->vendor_id[0] = 0;
 	nip->vendor_id[1] = vendor >> 8;
 	nip->vendor_id[2] = vendor;
@@ -133,13 +133,89 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
 	 */
 	if (startgx == 0)
 		/* The first is a copy of the read-only HW GUID. */
-		*p = ipath_layer_get_guid(to_idev(ibdev)->dd);
+		*p = to_idev(ibdev)->dd->ipath_guid;
 	else
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 	return reply(smp);
 }
 
+
+static int get_overrunthreshold(struct ipath_devdata *dd)
+{
+	return (dd->ipath_ibcctrl >>
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+}
+
+/**
+ * set_overrunthreshold - set the overrun threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
+{
+	unsigned v;
+
+	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+	if (v != n) {
+		dd->ipath_ibcctrl &=
+			~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
+			  INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
+		dd->ipath_ibcctrl |=
+			(u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+	}
+	return 0;
+}
+
+static int get_phyerrthreshold(struct ipath_devdata *dd)
+{
+	return (dd->ipath_ibcctrl >>
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+}
+
+/**
+ * set_phyerrthreshold - set the physical error threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
+{
+	unsigned v;
+
+	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+	if (v != n) {
+		dd->ipath_ibcctrl &=
+			~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
+			  INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
+		dd->ipath_ibcctrl |=
+			(u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+	}
+	return 0;
+}
+
+/**
+ * get_linkdowndefaultstate - get the default linkdown state
+ * @dd: the infinipath device
+ *
+ * Returns zero if the default is POLL, 1 if the default is SLEEP.
+ */
+static int get_linkdowndefaultstate(struct ipath_devdata *dd)
+{
+	return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
+}
+
 static int recv_subn_get_portinfo(struct ib_smp *smp,
 				  struct ib_device *ibdev, u8 port)
 {
@@ -166,7 +242,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	    (dev->mkeyprot_resv_lmc >> 6) == 0)
 		pip->mkey = dev->mkey;
 	pip->gid_prefix = dev->gid_prefix;
-	lid = ipath_layer_get_lid(dev->dd);
+	lid = dev->dd->ipath_lid;
 	pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
 	pip->sm_lid = cpu_to_be16(dev->sm_lid);
 	pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
@@ -177,14 +253,14 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	pip->link_width_supported = 3;	/* 1x or 4x */
 	pip->link_width_active = 2;	/* 4x */
 	pip->linkspeed_portstate = 0x10;	/* 2.5Gbps */
-	ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+	ibcstat = dev->dd->ipath_lastibcstat;
 	pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
 	pip->portphysstate_linkdown =
 		(ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
-		(ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2);
+		(get_linkdowndefaultstate(dev->dd) ? 1 : 2);
 	pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
 	pip->linkspeedactive_enabled = 0x11;	/* 2.5Gbps, 2.5Gbps */
-	switch (ipath_layer_get_ibmtu(dev->dd)) {
+	switch (dev->dd->ipath_ibmtu) {
 	case 4096:
 		mtu = IB_MTU_4096;
 		break;
@@ -217,7 +293,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
 	/* P_KeyViolations are counted by hardware. */
 	pip->pkey_violations =
-		cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) -
+		cpu_to_be16((ipath_get_cr_errpkey(dev->dd) -
 			     dev->z_pkey_violations) & 0xFFFF);
 	pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
 	/* Only the hardware GUID is supported for now */
@@ -226,8 +302,8 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	/* 32.768 usec. response time (guessing) */
 	pip->resv_resptimevalue = 3;
 	pip->localphyerrors_overrunerrors =
-		(ipath_layer_get_phyerrthreshold(dev->dd) << 4) |
-		ipath_layer_get_overrunthreshold(dev->dd);
+		(get_phyerrthreshold(dev->dd) << 4) |
+		get_overrunthreshold(dev->dd);
 	/* pip->max_credit_hint; */
 	/* pip->link_roundtrip_latency[3]; */
 
@@ -237,6 +313,20 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	return ret;
 }
 
+/**
+ * get_pkeys - return the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the pkey table is placed here
+ */
+static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
+{
+	struct ipath_portdata *pd = dd->ipath_pd[0];
+
+	memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
+
+	return 0;
+}
+
 static int recv_subn_get_pkeytable(struct ib_smp *smp,
 				   struct ib_device *ibdev)
 {
@@ -249,9 +339,9 @@ static int recv_subn_get_pkeytable(struct ib_smp *smp,
 	memset(smp->data, 0, sizeof(smp->data));
 	if (startpx == 0) {
 		struct ipath_ibdev *dev = to_idev(ibdev);
-		unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+		unsigned i, n = ipath_get_npkeys(dev->dd);
 
-		ipath_layer_get_pkeys(dev->dd, p);
+		get_pkeys(dev->dd, p);
 
 		for (i = 0; i < n; i++)
 			q[i] = cpu_to_be16(p[i]);
@@ -268,6 +358,24 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp,
 	return recv_subn_get_guidinfo(smp, ibdev);
 }
 
+/**
+ * set_linkdowndefaultstate - set the default linkdown state
+ * @dd: the infinipath device
+ * @sleep: the new state
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
+{
+	if (sleep)
+		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+	else
+		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+			 dd->ipath_ibcctrl);
+	return 0;
+}
+
 /**
  * recv_subn_set_portinfo - set port information
  * @smp: the incoming SM packet
@@ -290,7 +398,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	u8 state;
 	u16 lstate;
 	u32 mtu;
-	int ret;
+	int ret, ore;
 
 	if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
 		goto err;
@@ -304,7 +412,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
 
 	lid = be16_to_cpu(pip->lid);
-	if (lid != ipath_layer_get_lid(dev->dd)) {
+	if (lid != dev->dd->ipath_lid) {
 		/* Must be a valid unicast LID address. */
 		if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
 			goto err;
@@ -342,11 +450,11 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	case 0: /* NOP */
 		break;
 	case 1: /* SLEEP */
-		if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1))
+		if (set_linkdowndefaultstate(dev->dd, 1))
 			goto err;
 		break;
 	case 2: /* POLL */
-		if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0))
+		if (set_linkdowndefaultstate(dev->dd, 0))
 			goto err;
 		break;
 	default:
@@ -376,7 +484,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 		/* XXX We have already partially updated our state! */
 		goto err;
 	}
-	ipath_layer_set_mtu(dev->dd, mtu);
+	ipath_set_mtu(dev->dd, mtu);
 
 	dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
 
@@ -392,20 +500,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	 * later.
 	 */
 	if (pip->pkey_violations == 0)
-		dev->z_pkey_violations =
-			ipath_layer_get_cr_errpkey(dev->dd);
+		dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd);
 
 	if (pip->qkey_violations == 0)
 		dev->qkey_violations = 0;
 
-	if (ipath_layer_set_phyerrthreshold(
-		    dev->dd,
-		    (pip->localphyerrors_overrunerrors >> 4) & 0xF))
+	ore = pip->localphyerrors_overrunerrors;
+	if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF))
 		goto err;
 
-	if (ipath_layer_set_overrunthreshold(
-		    dev->dd,
-		    (pip->localphyerrors_overrunerrors & 0xF)))
+	if (set_overrunthreshold(dev->dd, (ore & 0xF)))
 		goto err;
 
 	dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@@ -423,7 +527,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	 * is down or is being set to down.
 	 */
 	state = pip->linkspeed_portstate & 0xF;
-	flags = ipath_layer_get_flags(dev->dd);
+	flags = dev->dd->ipath_flags;
 	lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
 	if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
 		goto err;
@@ -439,7 +543,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 		/* FALLTHROUGH */
 	case IB_PORT_DOWN:
 		if (lstate == 0)
-			if (ipath_layer_get_linkdowndefaultstate(dev->dd))
+			if (get_linkdowndefaultstate(dev->dd))
 				lstate = IPATH_IB_LINKDOWN_SLEEP;
 			else
 				lstate = IPATH_IB_LINKDOWN;
@@ -451,7 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 			lstate = IPATH_IB_LINKDOWN_DISABLE;
 		else
 			goto err;
-		ipath_layer_set_linkstate(dev->dd, lstate);
+		ipath_set_linkstate(dev->dd, lstate);
 		if (flags & IPATH_LINKACTIVE) {
 			event.event = IB_EVENT_PORT_ERR;
 			ib_dispatch_event(&event);
@@ -460,7 +564,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	case IB_PORT_ARMED:
 		if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
 			break;
-		ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM);
+		ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
 		if (flags & IPATH_LINKACTIVE) {
 			event.event = IB_EVENT_PORT_ERR;
 			ib_dispatch_event(&event);
@@ -469,7 +573,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	case IB_PORT_ACTIVE:
 		if (!(flags & IPATH_LINKARMED))
 			break;
-		ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
+		ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
 		event.event = IB_EVENT_PORT_ACTIVE;
 		ib_dispatch_event(&event);
 		break;
@@ -493,6 +597,152 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
 	return ret;
 }
 
+/**
+ * rm_pkey - decrecment the reference count for the given PKEY
+ * @dd: the infinipath device
+ * @key: the PKEY index
+ *
+ * Return true if this was the last reference and the hardware table entry
+ * needs to be changed.
+ */
+static int rm_pkey(struct ipath_devdata *dd, u16 key)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (dd->ipath_pkeys[i] != key)
+			continue;
+		if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
+			dd->ipath_pkeys[i] = 0;
+			ret = 1;
+			goto bail;
+		}
+		break;
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * add_pkey - add the given PKEY to the hardware table
+ * @dd: the infinipath device
+ * @key: the PKEY
+ *
+ * Return an error code if unable to add the entry, zero if no change,
+ * or 1 if the hardware PKEY register needs to be updated.
+ */
+static int add_pkey(struct ipath_devdata *dd, u16 key)
+{
+	int i;
+	u16 lkey = key & 0x7FFF;
+	int any = 0;
+	int ret;
+
+	if (lkey == 0x7FFF) {
+		ret = 0;
+		goto bail;
+	}
+
+	/* Look for an empty slot or a matching PKEY. */
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (!dd->ipath_pkeys[i]) {
+			any++;
+			continue;
+		}
+		/* If it matches exactly, try to increment the ref count */
+		if (dd->ipath_pkeys[i] == key) {
+			if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
+				ret = 0;
+				goto bail;
+			}
+			/* Lost the race. Look for an empty slot below. */
+			atomic_dec(&dd->ipath_pkeyrefs[i]);
+			any++;
+		}
+		/*
+		 * It makes no sense to have both the limited and unlimited
+		 * PKEY set at the same time since the unlimited one will
+		 * disable the limited one.
+		 */
+		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
+			ret = -EEXIST;
+			goto bail;
+		}
+	}
+	if (!any) {
+		ret = -EBUSY;
+		goto bail;
+	}
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (!dd->ipath_pkeys[i] &&
+		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
+			/* for ipathstats, etc. */
+			ipath_stats.sps_pkeys[i] = lkey;
+			dd->ipath_pkeys[i] = key;
+			ret = 1;
+			goto bail;
+		}
+	}
+	ret = -EBUSY;
+
+bail:
+	return ret;
+}
+
+/**
+ * set_pkeys - set the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the PKEY table
+ */
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+{
+	struct ipath_portdata *pd;
+	int i;
+	int changed = 0;
+
+	pd = dd->ipath_pd[0];
+
+	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
+		u16 key = pkeys[i];
+		u16 okey = pd->port_pkeys[i];
+
+		if (key == okey)
+			continue;
+		/*
+		 * The value of this PKEY table entry is changing.
+		 * Remove the old entry in the hardware's array of PKEYs.
+		 */
+		if (okey & 0x7FFF)
+			changed |= rm_pkey(dd, okey);
+		if (key & 0x7FFF) {
+			int ret = add_pkey(dd, key);
+
+			if (ret < 0)
+				key = 0;
+			else
+				changed |= ret;
+		}
+		pd->port_pkeys[i] = key;
+	}
+	if (changed) {
+		u64 pkey;
+
+		pkey = (u64) dd->ipath_pkeys[0] |
+			((u64) dd->ipath_pkeys[1] << 16) |
+			((u64) dd->ipath_pkeys[2] << 32) |
+			((u64) dd->ipath_pkeys[3] << 48);
+		ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
+			   (unsigned long long) pkey);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
+				 pkey);
+	}
+	return 0;
+}
+
 static int recv_subn_set_pkeytable(struct ib_smp *smp,
 				   struct ib_device *ibdev)
 {
@@ -500,13 +750,12 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
 	__be16 *p = (__be16 *) smp->data;
 	u16 *q = (u16 *) smp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+	unsigned i, n = ipath_get_npkeys(dev->dd);
 
 	for (i = 0; i < n; i++)
 		q[i] = be16_to_cpu(p[i]);
 
-	if (startpx != 0 ||
-	    ipath_layer_set_pkeys(dev->dd, q) != 0)
+	if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 	return recv_subn_get_pkeytable(smp, ibdev);
@@ -844,10 +1093,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
 		pmp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_layer_counters cntrs;
+	struct ipath_verbs_counters cntrs;
 	u8 port_select = p->port_select;
 
-	ipath_layer_get_counters(dev->dd, &cntrs);
+	ipath_get_counters(dev->dd, &cntrs);
 
 	/* Adjust counters for any resets done. */
 	cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
@@ -944,8 +1193,8 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
 	u64 swords, rwords, spkts, rpkts, xwait;
 	u8 port_select = p->port_select;
 
-	ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				      &rpkts, &xwait);
+	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+				&rpkts, &xwait);
 
 	/* Adjust counters for any resets done. */
 	swords -= dev->z_port_xmit_data;
@@ -978,13 +1227,13 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
 		pmp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_layer_counters cntrs;
+	struct ipath_verbs_counters cntrs;
 
 	/*
 	 * Since the HW doesn't support clearing counters, we save the
 	 * current count and subtract it from future responses.
 	 */
-	ipath_layer_get_counters(dev->dd, &cntrs);
+	ipath_get_counters(dev->dd, &cntrs);
 
 	if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
 		dev->z_symbol_error_counter = cntrs.symbol_error_counter;
@@ -1041,8 +1290,8 @@ static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	u64 swords, rwords, spkts, rpkts, xwait;
 
-	ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				      &rpkts, &xwait);
+	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+				&rpkts, &xwait);
 
 	if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
 		dev->z_port_xmit_data = swords;
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 4ac31a5da3308..b36f6fb3e37a7 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -36,6 +36,18 @@
 
 #include "ipath_verbs.h"
 
+/* Fast memory region */
+struct ipath_fmr {
+	struct ib_fmr ibfmr;
+	u8 page_shift;
+	struct ipath_mregion mr;        /* must be last */
+};
+
+static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+	return container_of(ibfmr, struct ipath_fmr, ibfmr);
+}
+
 /**
  * ipath_get_dma_mr - get a DMA memory region
  * @pd: protection domain for this memory region
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 1ccfc909db1e7..9228d59b59d46 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -461,7 +461,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 			goto inval;
 
 	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd))
+		if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
 			goto inval;
 
 	if (attr_mask & IB_QP_MIN_RNR_TIMER)
@@ -644,6 +644,33 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp)
 	return cpu_to_be32(aeth);
 }
 
+/**
+ * set_verbs_flags - set the verbs layer flags
+ * @dd: the infinipath device
+ * @flags: the flags to set
+ */
+static int set_verbs_flags(struct ipath_devdata *dd, unsigned flags)
+{
+	struct ipath_devdata *ss;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&ipath_devs_lock, lflags);
+
+	list_for_each_entry(ss, &ipath_dev_list, ipath_list) {
+		if (!(ss->ipath_flags & IPATH_INITTED))
+			continue;
+		if ((flags & IPATH_VERBS_KERNEL_SMA) &&
+		    !(*ss->ipath_statusp & IPATH_STATUS_SMA))
+			*ss->ipath_statusp |= IPATH_STATUS_OIB_SMA;
+		else
+			*ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
+	}
+
+	spin_unlock_irqrestore(&ipath_devs_lock, lflags);
+
+	return 0;
+}
+
 /**
  * ipath_create_qp - create a queue pair for a device
  * @ibpd: the protection domain who's device we create the queue pair for
@@ -760,8 +787,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 
 		/* Tell the core driver that the kernel SMA is present. */
 		if (init_attr->qp_type == IB_QPT_SMI)
-			ipath_layer_set_verbs_flags(dev->dd,
-						    IPATH_VERBS_KERNEL_SMA);
+			set_verbs_flags(dev->dd, IPATH_VERBS_KERNEL_SMA);
 		break;
 
 	default:
@@ -838,7 +864,7 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 
 	/* Tell the core driver that the kernel SMA is gone. */
 	if (qp->ibqp.qp_type == IB_QPT_SMI)
-		ipath_layer_set_verbs_flags(dev->dd, 0);
+		set_verbs_flags(dev->dd, 0);
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 	qp->state = IB_QPS_ERR;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 774d1615ce2f1..a08654042c034 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_RC_##x
@@ -540,7 +540,7 @@ static void send_rc_ack(struct ipath_qp *qp)
 		lrh0 = IPATH_LRH_GRH;
 	}
 	/* read pkey_index w/o lock (its atomic) */
-	bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	if (qp->r_nak_state)
 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
 					    (qp->r_nak_state <<
@@ -557,7 +557,7 @@ static void send_rc_ack(struct ipath_qp *qp)
 	hdr.lrh[0] = cpu_to_be16(lrh0);
 	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
 	hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-	hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
+	hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
 	ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
@@ -1323,8 +1323,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		 * the eager header buffer size to 56 bytes so the last 4
 		 * bytes of the BTH header (PSN) is in the data buffer.
 		 */
-		header_in_data =
-			ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
 		if (header_in_data) {
 			psn = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index dd09420d677d8..5c1da2d25e03e 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -470,6 +470,15 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
 		wake_up(&qp->wait);
 }
 
+static int want_buffer(struct ipath_devdata *dd)
+{
+	set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+			 dd->ipath_sendctrl);
+
+	return 0;
+}
+
 /**
  * ipath_no_bufs_available - tell the layer driver we need buffers
  * @qp: the QP that caused the problem
@@ -486,7 +495,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
 		list_add_tail(&qp->piowait, &dev->piowait);
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 	/*
-	 * Note that as soon as ipath_layer_want_buffer() is called and
+	 * Note that as soon as want_buffer() is called and
 	 * possibly before it returns, ipath_ib_piobufavail()
 	 * could be called.  If we are still in the tasklet function,
 	 * tasklet_hi_schedule() will not call us until the next time
@@ -496,7 +505,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
 	 */
 	clear_bit(IPATH_S_BUSY, &qp->s_flags);
 	tasklet_unlock(&qp->s_task);
-	ipath_layer_want_buffer(dev->dd);
+	want_buffer(dev->dd);
 	dev->n_piowait++;
 }
 
@@ -611,7 +620,7 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
 	hdr->hop_limit = grh->hop_limit;
 	/* The SGID is 32-bit aligned. */
 	hdr->sgid.global.subnet_prefix = dev->gid_prefix;
-	hdr->sgid.global.interface_id = ipath_layer_get_guid(dev->dd);
+	hdr->sgid.global.interface_id = dev->dd->ipath_guid;
 	hdr->dgid = grh->dgid;
 
 	/* GRH header size in 32-bit words. */
@@ -643,8 +652,7 @@ void ipath_do_ruc_send(unsigned long data)
 	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
 		goto bail;
 
-	if (unlikely(qp->remote_ah_attr.dlid ==
-		     ipath_layer_get_lid(dev->dd))) {
+	if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
 		ipath_ruc_loopback(qp);
 		goto clear;
 	}
@@ -711,8 +719,8 @@ void ipath_do_ruc_send(unsigned long data)
 	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
 	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
 				       SIZE_OF_CRC);
-	qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
-	bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+	qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+	bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index b98821d7801d9..7396a63840db4 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -35,7 +35,6 @@
 #include <linux/pci.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 /**
@@ -227,7 +226,6 @@ static ssize_t store_mlid(struct device *dev,
 	unit = dd->ipath_unit;
 
 	dd->ipath_mlid = mlid;
-	ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST);
 
 	goto bail;
 invalid:
@@ -467,7 +465,7 @@ static ssize_t store_link_state(struct device *dev,
 	if (ret < 0)
 		goto invalid;
 
-	r = ipath_layer_set_linkstate(dd, state);
+	r = ipath_set_linkstate(dd, state);
 	if (r < 0) {
 		ret = r;
 		goto bail;
@@ -502,7 +500,7 @@ static ssize_t store_mtu(struct device *dev,
 	if (ret < 0)
 		goto invalid;
 
-	r = ipath_layer_set_mtu(dd, mtu);
+	r = ipath_set_mtu(dd, mtu);
 	if (r < 0)
 		ret = r;
 
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index c33abea2d5a7c..0fd3cded16baf 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_UC_##x
@@ -261,8 +261,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		 * size to 56 bytes so the last 4 bytes of
 		 * the BTH header (PSN) is in the data buffer.
 		 */
-		header_in_data =
-			ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
 		if (header_in_data) {
 			psn = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 82439fcfc2f85..6991d1d74e3ce 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -353,7 +353,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		ss.num_sge++;
 	}
 	/* Check for invalid packet size. */
-	if (len > ipath_layer_get_ibmtu(dev->dd)) {
+	if (len > dev->dd->ipath_ibmtu) {
 		ret = -EINVAL;
 		goto bail;
 	}
@@ -375,7 +375,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		dev->n_unicast_xmit++;
 		lid = ah_attr->dlid &
 			~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
-		if (unlikely(lid == ipath_layer_get_lid(dev->dd))) {
+		if (unlikely(lid == dev->dd->ipath_lid)) {
 			/*
 			 * Pass in an uninitialized ib_wc to save stack
 			 * space.
@@ -404,7 +404,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
 			dev->gid_prefix;
 		qp->s_hdr.u.l.grh.sgid.global.interface_id =
-			ipath_layer_get_guid(dev->dd);
+			dev->dd->ipath_guid;
 		qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
 		/*
 		 * Don't worry about sending to locally attached multicast
@@ -434,7 +434,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
 	qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);	/* DEST LID */
 	qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
-	lid = ipath_layer_get_lid(dev->dd);
+	lid = dev->dd->ipath_lid;
 	if (lid) {
 		lid |= ah_attr->src_path_bits &
 			((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
@@ -445,7 +445,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		bth0 |= 1 << 23;
 	bth0 |= extra_bytes << 20;
 	bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
-		ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+		ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	/*
 	 * Use the multicast QP if the destination LID is a multicast LID.
@@ -531,8 +531,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		 * the eager header buffer size to 56 bytes so the last 12
 		 * bytes of the IB header is in the data buffer.
 		 */
-		header_in_data =
-			ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
 		if (header_in_data) {
 			qkey = be32_to_cpu(((__be32 *) data)[1]);
 			src_qp = be32_to_cpu(((__be32 *) data)[2]);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 15edec9227e43..3c47620e98877 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -33,15 +33,13 @@
 
 #include <rdma/ib_mad.h>
 #include <rdma/ib_user_verbs.h>
+#include <linux/io.h>
 #include <linux/utsname.h>
 
 #include "ipath_kernel.h"
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
-/* Not static, because we don't want the compiler removing it */
-const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR;
-
 static unsigned int ib_ipath_qp_table_size = 251;
 module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
@@ -109,10 +107,6 @@ module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
 		   uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@pathscale.com>");
-MODULE_DESCRIPTION("QLogic InfiniPath driver");
-
 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
 	[IB_QPS_RESET] = 0,
 	[IB_QPS_INIT] = IPATH_POST_RECV_OK,
@@ -125,6 +119,16 @@ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
 	[IB_QPS_ERR] = 0,
 };
 
+struct ipath_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
+						  *ibucontext)
+{
+	return container_of(ibucontext, struct ipath_ucontext, ibucontext);
+}
+
 /*
  * Translate ib_wr_opcode into ib_wc_opcode.
  */
@@ -400,7 +404,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
 	lid = be16_to_cpu(hdr->lrh[1]);
 	if (lid < IPATH_MULTICAST_LID_BASE) {
 		lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
-		if (unlikely(lid != ipath_layer_get_lid(dev->dd))) {
+		if (unlikely(lid != dev->dd->ipath_lid)) {
 			dev->rcv_errors++;
 			goto bail;
 		}
@@ -511,19 +515,19 @@ void ipath_ib_timer(struct ipath_ibdev *dev)
 	if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
 	    --dev->pma_sample_start == 0) {
 		dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
-		ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword,
-					      &dev->ipath_rword,
-					      &dev->ipath_spkts,
-					      &dev->ipath_rpkts,
-					      &dev->ipath_xmit_wait);
+		ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
+					&dev->ipath_rword,
+					&dev->ipath_spkts,
+					&dev->ipath_rpkts,
+					&dev->ipath_xmit_wait);
 	}
 	if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
 		if (dev->pma_sample_interval == 0) {
 			u64 ta, tb, tc, td, te;
 
 			dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
-			ipath_layer_snapshot_counters(dev->dd, &ta, &tb,
-						      &tc, &td, &te);
+			ipath_snapshot_counters(dev->dd, &ta, &tb,
+						&tc, &td, &te);
 
 			dev->ipath_sword = ta - dev->ipath_sword;
 			dev->ipath_rword = tb - dev->ipath_rword;
@@ -553,6 +557,362 @@ void ipath_ib_timer(struct ipath_ibdev *dev)
 	}
 }
 
+static void update_sge(struct ipath_sge_state *ss, u32 length)
+{
+	struct ipath_sge *sge = &ss->sge;
+
+	sge->vaddr += length;
+	sge->length -= length;
+	sge->sge_length -= length;
+	if (sge->sge_length == 0) {
+		if (--ss->num_sge)
+			*sge = *ss->sg_list++;
+	} else if (sge->length == 0 && sge->mr != NULL) {
+		if (++sge->n >= IPATH_SEGSZ) {
+			if (++sge->m >= sge->mr->mapsz)
+				return;
+			sge->n = 0;
+		}
+		sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+		sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+	}
+}
+
+#ifdef __LITTLE_ENDIAN
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+	return data >> shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+	return data << shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+	data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
+	data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+	return data;
+}
+#else
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+	return data << shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+	return data >> shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+	data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
+	data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+	return data;
+}
+#endif
+
+static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
+		    u32 length)
+{
+	u32 extra = 0;
+	u32 data = 0;
+	u32 last;
+
+	while (1) {
+		u32 len = ss->sge.length;
+		u32 off;
+
+		BUG_ON(len == 0);
+		if (len > length)
+			len = length;
+		if (len > ss->sge.sge_length)
+			len = ss->sge.sge_length;
+		/* If the source address is not aligned, try to align it. */
+		off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
+		if (off) {
+			u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
+					    ~(sizeof(u32) - 1));
+			u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
+			u32 y;
+
+			y = sizeof(u32) - off;
+			if (len > y)
+				len = y;
+			if (len + extra >= sizeof(u32)) {
+				data |= set_upper_bits(v, extra *
+						       BITS_PER_BYTE);
+				len = sizeof(u32) - extra;
+				if (len == length) {
+					last = data;
+					break;
+				}
+				__raw_writel(data, piobuf);
+				piobuf++;
+				extra = 0;
+				data = 0;
+			} else {
+				/* Clear unused upper bytes */
+				data |= clear_upper_bytes(v, len, extra);
+				if (len == length) {
+					last = data;
+					break;
+				}
+				extra += len;
+			}
+		} else if (extra) {
+			/* Source address is aligned. */
+			u32 *addr = (u32 *) ss->sge.vaddr;
+			int shift = extra * BITS_PER_BYTE;
+			int ushift = 32 - shift;
+			u32 l = len;
+
+			while (l >= sizeof(u32)) {
+				u32 v = *addr;
+
+				data |= set_upper_bits(v, shift);
+				__raw_writel(data, piobuf);
+				data = get_upper_bits(v, ushift);
+				piobuf++;
+				addr++;
+				l -= sizeof(u32);
+			}
+			/*
+			 * We still have 'extra' number of bytes leftover.
+			 */
+			if (l) {
+				u32 v = *addr;
+
+				if (l + extra >= sizeof(u32)) {
+					data |= set_upper_bits(v, shift);
+					len -= l + extra - sizeof(u32);
+					if (len == length) {
+						last = data;
+						break;
+					}
+					__raw_writel(data, piobuf);
+					piobuf++;
+					extra = 0;
+					data = 0;
+				} else {
+					/* Clear unused upper bytes */
+					data |= clear_upper_bytes(v, l,
+								  extra);
+					if (len == length) {
+						last = data;
+						break;
+					}
+					extra += l;
+				}
+			} else if (len == length) {
+				last = data;
+				break;
+			}
+		} else if (len == length) {
+			u32 w;
+
+			/*
+			 * Need to round up for the last dword in the
+			 * packet.
+			 */
+			w = (len + 3) >> 2;
+			__iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
+			piobuf += w - 1;
+			last = ((u32 *) ss->sge.vaddr)[w - 1];
+			break;
+		} else {
+			u32 w = len >> 2;
+
+			__iowrite32_copy(piobuf, ss->sge.vaddr, w);
+			piobuf += w;
+
+			extra = len & (sizeof(u32) - 1);
+			if (extra) {
+				u32 v = ((u32 *) ss->sge.vaddr)[w];
+
+				/* Clear unused upper bytes */
+				data = clear_upper_bytes(v, extra, 0);
+			}
+		}
+		update_sge(ss, len);
+		length -= len;
+	}
+	/* Update address before sending packet. */
+	update_sge(ss, length);
+	/* must flush early everything before trigger word */
+	ipath_flush_wc();
+	__raw_writel(last, piobuf);
+	/* be sure trigger word is written */
+	ipath_flush_wc();
+}
+
+/**
+ * ipath_verbs_send - send a packet
+ * @dd: the infinipath device
+ * @hdrwords: the number of words in the header
+ * @hdr: the packet header
+ * @len: the length of the packet in bytes
+ * @ss: the SGE to send
+ */
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+		     u32 *hdr, u32 len, struct ipath_sge_state *ss)
+{
+	u32 __iomem *piobuf;
+	u32 plen;
+	int ret;
+
+	/* +1 is for the qword padding of pbc */
+	plen = hdrwords + ((len + 3) >> 2) + 1;
+	if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
+		ipath_dbg("packet len 0x%x too long, failing\n", plen);
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	/* Get a PIO buffer to use. */
+	piobuf = ipath_getpiobuf(dd, NULL);
+	if (unlikely(piobuf == NULL)) {
+		ret = -EBUSY;
+		goto bail;
+	}
+
+	/*
+	 * Write len to control qword, no flags.
+	 * We have to flush after the PBC for correctness on some cpus
+	 * or WC buffer can be written out of order.
+	 */
+	writeq(plen, piobuf);
+	ipath_flush_wc();
+	piobuf += 2;
+	if (len == 0) {
+		/*
+		 * If there is just the header portion, must flush before
+		 * writing last word of header for correctness, and after
+		 * the last header word (trigger word).
+		 */
+		__iowrite32_copy(piobuf, hdr, hdrwords - 1);
+		ipath_flush_wc();
+		__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+		ipath_flush_wc();
+		ret = 0;
+		goto bail;
+	}
+
+	__iowrite32_copy(piobuf, hdr, hdrwords);
+	piobuf += hdrwords;
+
+	/* The common case is aligned and contained in one segment. */
+	if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
+		   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
+		u32 w;
+		u32 *addr = (u32 *) ss->sge.vaddr;
+
+		/* Update address before sending packet. */
+		update_sge(ss, len);
+		/* Need to round up for the last dword in the packet. */
+		w = (len + 3) >> 2;
+		__iowrite32_copy(piobuf, addr, w - 1);
+		/* must flush early everything before trigger word */
+		ipath_flush_wc();
+		__raw_writel(addr[w - 1], piobuf + w - 1);
+		/* be sure trigger word is written */
+		ipath_flush_wc();
+		ret = 0;
+		goto bail;
+	}
+	copy_io(piobuf, ss, len);
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+			    u64 *rwords, u64 *spkts, u64 *rpkts,
+			    u64 *xmit_wait)
+{
+	int ret;
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		/* no hardware, freeze, etc. */
+		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+		ret = -EINVAL;
+		goto bail;
+	}
+	*swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+	*rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+	*spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+	*rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+	*xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * ipath_get_counters - get various chip counters
+ * @dd: the infinipath device
+ * @cntrs: counters are placed here
+ *
+ * Return the counters needed by recv_pma_get_portcounters().
+ */
+int ipath_get_counters(struct ipath_devdata *dd,
+		       struct ipath_verbs_counters *cntrs)
+{
+	int ret;
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		/* no hardware, freeze, etc. */
+		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+		ret = -EINVAL;
+		goto bail;
+	}
+	cntrs->symbol_error_counter =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+	cntrs->link_error_recovery_counter =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+	/*
+	 * The link downed counter counts when the other side downs the
+	 * connection.  We add in the number of times we downed the link
+	 * due to local link integrity errors to compensate.
+	 */
+	cntrs->link_downed_counter =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
+	cntrs->port_rcv_errors =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
+	cntrs->port_rcv_remphys_errors =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
+	cntrs->port_xmit_discards =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
+	cntrs->port_xmit_data =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+	cntrs->port_rcv_data =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+	cntrs->port_xmit_packets =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+	cntrs->port_rcv_packets =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+	cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
+	cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
 /**
  * ipath_ib_piobufavail - callback when a PIO buffer is available
  * @arg: the device pointer
@@ -595,9 +955,9 @@ static int ipath_query_device(struct ib_device *ibdev,
 		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
 		IB_DEVICE_SYS_IMAGE_GUID;
 	props->page_size_cap = PAGE_SIZE;
-	props->vendor_id = ipath_layer_get_vendorid(dev->dd);
-	props->vendor_part_id = ipath_layer_get_deviceid(dev->dd);
-	props->hw_ver = ipath_layer_get_pcirev(dev->dd);
+	props->vendor_id = dev->dd->ipath_vendorid;
+	props->vendor_part_id = dev->dd->ipath_deviceid;
+	props->hw_ver = dev->dd->ipath_pcirev;
 
 	props->sys_image_guid = dev->sys_image_guid;
 
@@ -618,7 +978,7 @@ static int ipath_query_device(struct ib_device *ibdev,
 	props->max_srq_sge = ib_ipath_max_srq_sges;
 	/* props->local_ca_ack_delay */
 	props->atomic_cap = IB_ATOMIC_HCA;
-	props->max_pkeys = ipath_layer_get_npkeys(dev->dd);
+	props->max_pkeys = ipath_get_npkeys(dev->dd);
 	props->max_mcast_grp = ib_ipath_max_mcast_grps;
 	props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
 	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
@@ -643,12 +1003,17 @@ const u8 ipath_cvt_physportstate[16] = {
 	[INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
 };
 
+u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
+{
+	return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
+}
+
 static int ipath_query_port(struct ib_device *ibdev,
 			    u8 port, struct ib_port_attr *props)
 {
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	enum ib_mtu mtu;
-	u16 lid = ipath_layer_get_lid(dev->dd);
+	u16 lid = dev->dd->ipath_lid;
 	u64 ibcstat;
 
 	memset(props, 0, sizeof(*props));
@@ -656,16 +1021,16 @@ static int ipath_query_port(struct ib_device *ibdev,
 	props->lmc = dev->mkeyprot_resv_lmc & 7;
 	props->sm_lid = dev->sm_lid;
 	props->sm_sl = dev->sm_sl;
-	ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+	ibcstat = dev->dd->ipath_lastibcstat;
 	props->state = ((ibcstat >> 4) & 0x3) + 1;
 	/* See phys_state_show() */
 	props->phys_state = ipath_cvt_physportstate[
-		ipath_layer_get_lastibcstat(dev->dd) & 0xf];
+		dev->dd->ipath_lastibcstat & 0xf];
 	props->port_cap_flags = dev->port_cap_flags;
 	props->gid_tbl_len = 1;
 	props->max_msg_sz = 0x80000000;
-	props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd);
-	props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) -
+	props->pkey_tbl_len = ipath_get_npkeys(dev->dd);
+	props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) -
 		dev->z_pkey_violations;
 	props->qkey_viol_cntr = dev->qkey_violations;
 	props->active_width = IB_WIDTH_4X;
@@ -675,7 +1040,7 @@ static int ipath_query_port(struct ib_device *ibdev,
 	props->init_type_reply = 0;
 
 	props->max_mtu = IB_MTU_4096;
-	switch (ipath_layer_get_ibmtu(dev->dd)) {
+	switch (dev->dd->ipath_ibmtu) {
 	case 4096:
 		mtu = IB_MTU_4096;
 		break;
@@ -734,7 +1099,7 @@ static int ipath_modify_port(struct ib_device *ibdev,
 	dev->port_cap_flags |= props->set_port_cap_mask;
 	dev->port_cap_flags &= ~props->clr_port_cap_mask;
 	if (port_modify_mask & IB_PORT_SHUTDOWN)
-		ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
+		ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
 	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
 		dev->qkey_violations = 0;
 	return 0;
@@ -751,7 +1116,7 @@ static int ipath_query_gid(struct ib_device *ibdev, u8 port,
 		goto bail;
 	}
 	gid->global.subnet_prefix = dev->gid_prefix;
-	gid->global.interface_id = ipath_layer_get_guid(dev->dd);
+	gid->global.interface_id = dev->dd->ipath_guid;
 
 	ret = 0;
 
@@ -902,25 +1267,50 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
 	return 0;
 }
 
+/**
+ * ipath_get_npkeys - return the size of the PKEY table for port 0
+ * @dd: the infinipath device
+ */
+unsigned ipath_get_npkeys(struct ipath_devdata *dd)
+{
+	return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
+}
+
+/**
+ * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
+ * @dd: the infinipath device
+ * @index: the PKEY index
+ */
+unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
+{
+	unsigned ret;
+
+	if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
+		ret = 0;
+	else
+		ret = dd->ipath_pd[0]->port_pkeys[index];
+
+	return ret;
+}
+
 static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 			    u16 *pkey)
 {
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	int ret;
 
-	if (index >= ipath_layer_get_npkeys(dev->dd)) {
+	if (index >= ipath_get_npkeys(dev->dd)) {
 		ret = -EINVAL;
 		goto bail;
 	}
 
-	*pkey = ipath_layer_get_pkey(dev->dd, index);
+	*pkey = ipath_get_pkey(dev->dd, index);
 	ret = 0;
 
 bail:
 	return ret;
 }
 
-
 /**
  * ipath_alloc_ucontext - allocate a ucontest
  * @ibdev: the infiniband device
@@ -953,6 +1343,63 @@ static int ipath_dealloc_ucontext(struct ib_ucontext *context)
 
 static int ipath_verbs_register_sysfs(struct ib_device *dev);
 
+static void __verbs_timer(unsigned long arg)
+{
+	struct ipath_devdata *dd = (struct ipath_devdata *) arg;
+
+	/*
+	 * If port 0 receive packet interrupts are not available, or
+	 * can be missed, poll the receive queue
+	 */
+	if (dd->ipath_flags & IPATH_POLL_RX_INTR)
+		ipath_kreceive(dd);
+
+	/* Handle verbs layer timeouts. */
+	ipath_ib_timer(dd->verbs_dev);
+
+	mod_timer(&dd->verbs_timer, jiffies + 1);
+}
+
+static int enable_timer(struct ipath_devdata *dd)
+{
+	/*
+	 * Early chips had a design flaw where the chip and kernel idea
+	 * of the tail register don't always agree, and therefore we won't
+	 * get an interrupt on the next packet received.
+	 * If the board supports per packet receive interrupts, use it.
+	 * Otherwise, the timer function periodically checks for packets
+	 * to cover this case.
+	 * Either way, the timer is needed for verbs layer related
+	 * processing.
+	 */
+	if (dd->ipath_flags & IPATH_GPIO_INTR) {
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
+				 0x2074076542310ULL);
+		/* Enable GPIO bit 2 interrupt */
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+				 (u64) (1 << 2));
+	}
+
+	init_timer(&dd->verbs_timer);
+	dd->verbs_timer.function = __verbs_timer;
+	dd->verbs_timer.data = (unsigned long)dd;
+	dd->verbs_timer.expires = jiffies + 1;
+	add_timer(&dd->verbs_timer);
+
+	return 0;
+}
+
+static int disable_timer(struct ipath_devdata *dd)
+{
+	/* Disable GPIO bit 2 interrupt */
+	if (dd->ipath_flags & IPATH_GPIO_INTR)
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
+
+	del_timer_sync(&dd->verbs_timer);
+
+	return 0;
+}
+
 /**
  * ipath_register_ib_device - register our device with the infiniband core
  * @dd: the device data structure
@@ -960,7 +1407,7 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev);
  */
 int ipath_register_ib_device(struct ipath_devdata *dd)
 {
-	struct ipath_layer_counters cntrs;
+	struct ipath_verbs_counters cntrs;
 	struct ipath_ibdev *idev;
 	struct ib_device *dev;
 	int ret;
@@ -1020,7 +1467,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	idev->link_width_enabled = 3;	/* 1x or 4x */
 
 	/* Snapshot current HW counters to "clear" them. */
-	ipath_layer_get_counters(dd, &cntrs);
+	ipath_get_counters(dd, &cntrs);
 	idev->z_symbol_error_counter = cntrs.symbol_error_counter;
 	idev->z_link_error_recovery_counter =
 		cntrs.link_error_recovery_counter;
@@ -1044,14 +1491,14 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	 * device types in the system, we can't be sure this is unique.
 	 */
 	if (!sys_image_guid)
-		sys_image_guid = ipath_layer_get_guid(dd);
+		sys_image_guid = dd->ipath_guid;
 	idev->sys_image_guid = sys_image_guid;
 	idev->ib_unit = dd->ipath_unit;
 	idev->dd = dd;
 
 	strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
 	dev->owner = THIS_MODULE;
-	dev->node_guid = ipath_layer_get_guid(dd);
+	dev->node_guid = dd->ipath_guid;
 	dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
 	dev->uverbs_cmd_mask =
 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
@@ -1085,7 +1532,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
 	dev->node_type = IB_NODE_CA;
 	dev->phys_port_cnt = 1;
-	dev->dma_device = ipath_layer_get_device(dd);
+	dev->dma_device = &dd->pcidev->dev;
 	dev->class_dev.dev = dev->dma_device;
 	dev->query_device = ipath_query_device;
 	dev->modify_device = ipath_modify_device;
@@ -1139,7 +1586,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	if (ipath_verbs_register_sysfs(dev))
 		goto err_class;
 
-	ipath_layer_enable_timer(dd);
+	enable_timer(dd);
 
 	goto bail;
 
@@ -1164,7 +1611,7 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev)
 {
 	struct ib_device *ibdev = &dev->ibdev;
 
-	ipath_layer_disable_timer(dev->dd);
+	disable_timer(dev->dd);
 
 	ib_unregister_device(ibdev);
 
@@ -1197,7 +1644,7 @@ static ssize_t show_rev(struct class_device *cdev, char *buf)
 	struct ipath_ibdev *dev =
 		container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
 
-	return sprintf(buf, "%x\n", ipath_layer_get_pcirev(dev->dd));
+	return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
 }
 
 static ssize_t show_hca(struct class_device *cdev, char *buf)
@@ -1206,7 +1653,7 @@ static ssize_t show_hca(struct class_device *cdev, char *buf)
 		container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
 	int ret;
 
-	ret = ipath_layer_get_boardname(dev->dd, buf, 128);
+	ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
 	if (ret < 0)
 		goto bail;
 	strcat(buf, "\n");
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index d6faa4ba6067b..00f4cecc258ed 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -153,19 +153,6 @@ struct ipath_mcast {
 	int n_attached;
 };
 
-/* Memory region */
-struct ipath_mr {
-	struct ib_mr ibmr;
-	struct ipath_mregion mr;	/* must be last */
-};
-
-/* Fast memory region */
-struct ipath_fmr {
-	struct ib_fmr ibfmr;
-	u8 page_shift;
-	struct ipath_mregion mr;	/* must be last */
-};
-
 /* Protection domain */
 struct ipath_pd {
 	struct ib_pd ibpd;
@@ -216,6 +203,54 @@ struct ipath_cq {
 	struct ipath_mmap_info *ip;
 };
 
+/*
+ * A segment is a linear region of low physical memory.
+ * XXX Maybe we should use phys addr here and kmap()/kunmap().
+ * Used by the verbs layer.
+ */
+struct ipath_seg {
+	void *vaddr;
+	size_t length;
+};
+
+/* The number of ipath_segs that fit in a page. */
+#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
+
+struct ipath_segarray {
+	struct ipath_seg segs[IPATH_SEGSZ];
+};
+
+struct ipath_mregion {
+	u64 user_base;		/* User's address for this region */
+	u64 iova;		/* IB start address of this region */
+	size_t length;
+	u32 lkey;
+	u32 offset;		/* offset (bytes) to start of region */
+	int access_flags;
+	u32 max_segs;		/* number of ipath_segs in all the arrays */
+	u32 mapsz;		/* size of the map array */
+	struct ipath_segarray *map[0];	/* the segments */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct ipath_sge {
+	struct ipath_mregion *mr;
+	void *vaddr;		/* current pointer into the segment */
+	u32 sge_length;		/* length of the SGE */
+	u32 length;		/* remaining length of the segment */
+	u16 m;			/* current index: mr->map[m] */
+	u16 n;			/* current index: mr->map[m]->segs[n] */
+};
+
+/* Memory region */
+struct ipath_mr {
+	struct ib_mr ibmr;
+	struct ipath_mregion mr;	/* must be last */
+};
+
 /*
  * Send work request queue entry.
  * The size of the sg_list is determined when the QP is created and stored
@@ -270,6 +305,12 @@ struct ipath_srq {
 	u32 limit;
 };
 
+struct ipath_sge_state {
+	struct ipath_sge *sg_list;      /* next SGE to be used if any */
+	struct ipath_sge sge;   /* progress state for the current SGE */
+	u8 num_sge;
+};
+
 /*
  * Variables prefixed with s_ are for the requester (sender).
  * Variables prefixed with r_ are for the responder (receiver).
@@ -500,8 +541,19 @@ struct ipath_ibdev {
 	struct ipath_opcode_stats opstats[128];
 };
 
-struct ipath_ucontext {
-	struct ib_ucontext ibucontext;
+struct ipath_verbs_counters {
+	u64 symbol_error_counter;
+	u64 link_error_recovery_counter;
+	u64 link_downed_counter;
+	u64 port_rcv_errors;
+	u64 port_rcv_remphys_errors;
+	u64 port_xmit_discards;
+	u64 port_xmit_data;
+	u64 port_rcv_data;
+	u64 port_xmit_packets;
+	u64 port_rcv_packets;
+	u32 local_link_integrity_errors;
+	u32 excessive_buffer_overrun_errors;
 };
 
 static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
@@ -509,11 +561,6 @@ static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
 	return container_of(ibmr, struct ipath_mr, ibmr);
 }
 
-static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-	return container_of(ibfmr, struct ipath_fmr, ibfmr);
-}
-
 static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
 {
 	return container_of(ibpd, struct ipath_pd, ibpd);
@@ -551,12 +598,6 @@ int ipath_process_mad(struct ib_device *ibdev,
 		      struct ib_grh *in_grh,
 		      struct ib_mad *in_mad, struct ib_mad *out_mad);
 
-static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
-						  *ibucontext)
-{
-	return container_of(ibucontext, struct ipath_ucontext, ibucontext);
-}
-
 /*
  * Compare the lower 24 bits of the two values.
  * Returns an integer <, ==, or > than zero.
@@ -568,6 +609,13 @@ static inline int ipath_cmp24(u32 a, u32 b)
 
 struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
 
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+			    u64 *rwords, u64 *spkts, u64 *rpkts,
+			    u64 *xmit_wait);
+
+int ipath_get_counters(struct ipath_devdata *dd,
+		       struct ipath_verbs_counters *cntrs);
+
 int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
 
 int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
@@ -598,6 +646,9 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
 
 void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
 
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+		     u32 *hdr, u32 len, struct ipath_sge_state *ss);
+
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
 
 int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
@@ -721,6 +772,12 @@ int ipath_ib_piobufavail(struct ipath_ibdev *);
 
 void ipath_ib_timer(struct ipath_ibdev *);
 
+unsigned ipath_get_npkeys(struct ipath_devdata *);
+
+u32 ipath_get_cr_errpkey(struct ipath_devdata *);
+
+unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
+
 extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
 
 extern const u8 ipath_cvt_physportstate[];
-- 
GitLab


From b55f4f06c834a67f949a5219c5f97ffafa240989 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:33 -0700
Subject: [PATCH 0748/1063] IB/ipath: simplify debugging code after ipath_core
 and ib_ipath merger

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_driver.c |   4 +-
 drivers/infiniband/hw/ipath/ipath_kernel.h |   4 +-
 drivers/infiniband/hw/ipath/ipath_keys.c   |   3 +-
 drivers/infiniband/hw/ipath/ipath_qp.c     |  10 +-
 drivers/infiniband/hw/ipath/ipath_sysfs.c  |   2 +-
 drivers/infiniband/hw/ipath/ipath_verbs.c  |  19 +---
 drivers/infiniband/hw/ipath/ipath_verbs.h  |   1 -
 drivers/infiniband/hw/ipath/verbs_debug.h  | 108 ---------------------
 8 files changed, 17 insertions(+), 134 deletions(-)
 delete mode 100644 drivers/infiniband/hw/ipath/verbs_debug.h

diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 958cc9b33c8fc..e6261bb9a8a10 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -58,7 +58,7 @@ const char *ipath_get_unit_name(int unit)
  * The size has to be longer than this string, so we can append
  * board/chip information to it in the init code.
  */
-const char ipath_core_version[] = IPATH_IDSTR "\n";
+const char ib_ipath_version[] = IPATH_IDSTR "\n";
 
 static struct idr unit_table;
 DEFINE_SPINLOCK(ipath_devs_lock);
@@ -1847,7 +1847,7 @@ static int __init infinipath_init(void)
 {
 	int ret;
 
-	ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version);
+	ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
 
 	/*
 	 * These must be called before the driver is registered with
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 999249b7f27f8..af342314b3689 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -785,7 +785,7 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
 
 struct device_driver;
 
-extern const char ipath_core_version[];
+extern const char ib_ipath_version[];
 
 int ipath_driver_create_group(struct device_driver *);
 void ipath_driver_remove_group(struct device_driver *);
@@ -815,7 +815,7 @@ const char *ipath_get_unit_name(int unit);
 
 extern struct mutex ipath_mutex;
 
-#define IPATH_DRV_NAME		"ipath_core"
+#define IPATH_DRV_NAME		"ib_ipath"
 #define IPATH_MAJOR		233
 #define IPATH_USER_MINOR_BASE	0
 #define IPATH_SMA_MINOR		128
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index a5ca279370aa2..ba1b93226caa4 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -34,6 +34,7 @@
 #include <asm/io.h>
 
 #include "ipath_verbs.h"
+#include "ipath_kernel.h"
 
 /**
  * ipath_alloc_lkey - allocate an lkey
@@ -60,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
 		r = (r + 1) & (rkt->max - 1);
 		if (r == n) {
 			spin_unlock_irqrestore(&rkt->lock, flags);
-			_VERBS_INFO("LKEY table full\n");
+			ipath_dbg(KERN_INFO "LKEY table full\n");
 			ret = 0;
 			goto bail;
 		}
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 9228d59b59d46..b86858e70f2a1 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -274,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
 				free_qpn(qpt, qp->ibqp.qp_num);
 			if (!atomic_dec_and_test(&qp->refcount) ||
 			    !ipath_destroy_qp(&qp->ibqp))
-				_VERBS_INFO("QP memory leak!\n");
+				ipath_dbg(KERN_INFO "QP memory leak!\n");
 			qp = nqp;
 		}
 	}
@@ -362,8 +362,8 @@ void ipath_error_qp(struct ipath_qp *qp)
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ib_wc wc;
 
-	_VERBS_INFO("QP%d/%d in error state\n",
-		    qp->ibqp.qp_num, qp->remote_qpn);
+	ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+		  qp->ibqp.qp_num, qp->remote_qpn);
 
 	spin_lock(&dev->pending_lock);
 	/* XXX What if its already removed by the timeout code? */
@@ -945,8 +945,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
 
-	_VERBS_INFO("Send queue error on QP%d/%d: err: %d\n",
-		    qp->ibqp.qp_num, qp->remote_qpn, wc->status);
+	ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+		  qp->ibqp.qp_num, qp->remote_qpn, wc->status);
 
 	spin_lock(&dev->pending_lock);
 	/* XXX What if its already removed by the timeout code? */
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index 7396a63840db4..56f12202ff49b 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -75,7 +75,7 @@ int ipath_parse_ushort(const char *str, unsigned short *valp)
 static ssize_t show_version(struct device_driver *dev, char *buf)
 {
 	/* The string printed here is already newline-terminated. */
-	return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version);
+	return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
 }
 
 static ssize_t show_num_units(struct device_driver *dev, char *buf)
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 3c47620e98877..1776330914e59 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -50,10 +50,6 @@ module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
 MODULE_PARM_DESC(lkey_table_size,
 		 "LKEY table size in bits (2^n, 1 <= n <= 23)");
 
-unsigned int ib_ipath_debug;	/* debug mask */
-module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(debug, "Verbs debug mask");
-
 static unsigned int ib_ipath_max_pds = 0xFFFF;
 module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_pds,
@@ -1598,8 +1594,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	kfree(idev->qp_table.table);
 err_qp:
 	ib_dealloc_device(dev);
-	_VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
-		     dd->ipath_unit, -ret);
+	ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
 	idev = NULL;
 
 bail:
@@ -1618,17 +1613,13 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev)
 	if (!list_empty(&dev->pending[0]) ||
 	    !list_empty(&dev->pending[1]) ||
 	    !list_empty(&dev->pending[2]))
-		_VERBS_ERROR("ipath%d pending list not empty!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "pending list not empty!\n");
 	if (!list_empty(&dev->piowait))
-		_VERBS_ERROR("ipath%d piowait list not empty!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "piowait list not empty!\n");
 	if (!list_empty(&dev->rnrwait))
-		_VERBS_ERROR("ipath%d rnrwait list not empty!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
 	if (!ipath_mcast_tree_empty())
-		_VERBS_ERROR("ipath%d multicast table memory leak!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "multicast table memory leak!\n");
 	/*
 	 * Note that ipath_unregister_ib_device() can be called before all
 	 * the QPs are destroyed!
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 00f4cecc258ed..9cc0dbfe86029 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -42,7 +42,6 @@
 #include <rdma/ib_pack.h>
 
 #include "ipath_layer.h"
-#include "verbs_debug.h"
 
 #define QPN_MAX                 (1 << 24)
 #define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h
deleted file mode 100644
index 6186676f2a16f..0000000000000
--- a/drivers/infiniband/hw/ipath/verbs_debug.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _VERBS_DEBUG_H
-#define _VERBS_DEBUG_H
-
-/*
- * This file contains tracing code for the ib_ipath kernel module.
- */
-#ifndef _VERBS_DEBUGGING	/* tracing enabled or not */
-#define _VERBS_DEBUGGING 1
-#endif
-
-extern unsigned ib_ipath_debug;
-
-#define _VERBS_ERROR(fmt,...) \
-	do { \
-		printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_UNIT_ERROR(unit,fmt,...) \
-	do { \
-		printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
-	} while(0)
-
-#if _VERBS_DEBUGGING
-
-/*
- * Mask values for debugging.  The scheme allows us to compile out any
- * of the debug tracing stuff, and if compiled in, to enable or
- * disable dynamically.
- * This can be set at modprobe time also:
- *      modprobe ib_path ib_ipath_debug=3
- */
-
-#define __VERBS_INFO        0x1	/* generic low verbosity stuff */
-#define __VERBS_DBG         0x2	/* generic debug */
-#define __VERBS_VDBG        0x4	/* verbose debug */
-#define __VERBS_SMADBG      0x8000	/* sma packet debug */
-
-#define _VERBS_INFO(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_INFO)) \
-			printk(KERN_INFO "%s: " fmt,"ib_ipath", \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_DBG(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_DBG)) \
-			printk(KERN_DEBUG "%s: " fmt, __func__, \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_VDBG(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \
-			printk(KERN_DEBUG "%s: " fmt, __func__, \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_SMADBG(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \
-			printk(KERN_DEBUG "%s: " fmt, __func__, \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#else /* ! _VERBS_DEBUGGING */
-
-#define _VERBS_INFO(fmt,...)
-#define _VERBS_DBG(fmt,...)
-#define _VERBS_VDBG(fmt,...)
-#define _VERBS_SMADBG(fmt,...)
-
-#endif /* _VERBS_DEBUGGING */
-
-#endif /* _VERBS_DEBUG_H */
-- 
GitLab


From 0fd41363e0785247b7c19127318abc8b5eacc86b Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:34 -0700
Subject: [PATCH 0749/1063] IB/ipath: remove stale references to userspace SMA

When we first submitted a userspace subnet management agent, it was
rejected, so we left it out of the final driver submission.  This patch
removes a number of vestigial references to it.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_common.h    | 16 ++-------
 drivers/infiniband/hw/ipath/ipath_debug.h     |  2 --
 drivers/infiniband/hw/ipath/ipath_driver.c    | 17 ++++-----
 drivers/infiniband/hw/ipath/ipath_file_ops.c  |  4 +--
 drivers/infiniband/hw/ipath/ipath_fs.c        |  4 +--
 drivers/infiniband/hw/ipath/ipath_init_chip.c | 12 +++----
 drivers/infiniband/hw/ipath/ipath_intr.c      | 14 ++++----
 drivers/infiniband/hw/ipath/ipath_kernel.h    | 11 ++----
 drivers/infiniband/hw/ipath/ipath_layer.c     |  3 --
 drivers/infiniband/hw/ipath/ipath_layer.h     |  3 --
 drivers/infiniband/hw/ipath/ipath_qp.c        | 35 -------------------
 drivers/infiniband/hw/ipath/ipath_stats.c     | 27 --------------
 drivers/infiniband/hw/ipath/ipath_sysfs.c     |  4 +--
 drivers/infiniband/hw/ipath/ipath_verbs.c     |  2 +-
 14 files changed, 35 insertions(+), 119 deletions(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 062bd392e7e55..f8df3b771c267 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -106,9 +106,9 @@ struct infinipath_stats {
 	__u64 sps_ether_spkts;
 	/* number of "ethernet" packets received by driver */
 	__u64 sps_ether_rpkts;
-	/* number of SMA packets sent by driver */
+	/* number of SMA packets sent by driver. Obsolete. */
 	__u64 sps_sma_spkts;
-	/* number of SMA packets received by driver */
+	/* number of SMA packets received by driver. Obsolete. */
 	__u64 sps_sma_rpkts;
 	/* number of times all ports rcvhdrq was full and packet dropped */
 	__u64 sps_hdrqfull;
@@ -138,7 +138,7 @@ struct infinipath_stats {
 	__u64 sps_pageunlocks;
 	/*
 	 * Number of packets dropped in kernel other than errors (ether
-	 * packets if ipath not configured, sma/mad, etc.)
+	 * packets if ipath not configured, etc.)
 	 */
 	__u64 sps_krdrops;
 	/* pad for future growth */
@@ -153,8 +153,6 @@ struct infinipath_stats {
 #define IPATH_STATUS_DISABLED      0x2	/* hardware disabled */
 /* Device has been disabled via admin request */
 #define IPATH_STATUS_ADMIN_DISABLED    0x4
-#define IPATH_STATUS_OIB_SMA       0x8	/* ipath_mad kernel SMA running */
-#define IPATH_STATUS_SMA          0x10	/* user SMA running */
 /* Chip has been found and initted */
 #define IPATH_STATUS_CHIP_PRESENT 0x20
 /* IB link is at ACTIVE, usable for data traffic */
@@ -465,14 +463,6 @@ struct __ipath_sendpkt {
 	struct ipath_iovec sps_iov[4];
 };
 
-/* Passed into SMA special file's ->read and ->write methods. */
-struct ipath_sma_pkt
-{
-	__u32 unit;	/* unit on which to send packet */
-	__u64 data;	/* address of payload in userspace */
-	__u32 len;	/* length of payload */
-};
-
 /*
  * Data layout in I2C flash (for GUID, etc.)
  * All fields are little-endian binary unless otherwise stated
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index f415beda0d32b..df69f0d80b8bf 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -60,7 +60,6 @@
 #define __IPATH_USER_SEND   0x1000	/* use user mode send */
 #define __IPATH_KERNEL_SEND 0x2000	/* use kernel mode send */
 #define __IPATH_EPKTDBG     0x4000	/* print ethernet packet data */
-#define __IPATH_SMADBG      0x8000	/* sma packet debug */
 #define __IPATH_IPATHDBG    0x10000	/* Ethernet (IPATH) gen debug */
 #define __IPATH_IPATHWARN   0x20000	/* Ethernet (IPATH) warnings */
 #define __IPATH_IPATHERR    0x40000	/* Ethernet (IPATH) errors */
@@ -84,7 +83,6 @@
 /* print mmap/nopage stuff, not using VDBG any more */
 #define __IPATH_MMDBG     0x0
 #define __IPATH_EPKTDBG   0x0	/* print ethernet packet data */
-#define __IPATH_SMADBG    0x0   /* process startup (init)/exit messages */
 #define __IPATH_IPATHDBG  0x0	/* Ethernet (IPATH) table dump on */
 #define __IPATH_IPATHWARN 0x0	/* Ethernet (IPATH) warnings on   */
 #define __IPATH_IPATHERR  0x0	/* Ethernet (IPATH) errors on   */
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index e6261bb9a8a10..520c38f13868c 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -64,7 +64,7 @@ static struct idr unit_table;
 DEFINE_SPINLOCK(ipath_devs_lock);
 LIST_HEAD(ipath_dev_list);
 
-wait_queue_head_t ipath_sma_state_wait;
+wait_queue_head_t ipath_state_wait;
 
 unsigned ipath_debug = __IPATH_INFO;
 
@@ -618,15 +618,16 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
 static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
 				int msecs)
 {
-	dd->ipath_sma_state_wanted = state;
-	wait_event_interruptible_timeout(ipath_sma_state_wait,
+	dd->ipath_state_wanted = state;
+	wait_event_interruptible_timeout(ipath_state_wait,
 					 (dd->ipath_flags & state),
 					 msecs_to_jiffies(msecs));
-	dd->ipath_sma_state_wanted = 0;
+	dd->ipath_state_wanted = 0;
 
 	if (!(dd->ipath_flags & state)) {
 		u64 val;
-		ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n",
+		ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
+			   " ms\n",
 			   /* test INIT ahead of DOWN, both can be set */
 			   (state & IPATH_LINKINIT) ? "INIT" :
 			   ((state & IPATH_LINKDOWN) ? "DOWN" :
@@ -1155,7 +1156,7 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
  *
  * do appropriate marking as busy, etc.
  * returns buffer number if one found (>=0), negative number is error.
- * Used by ipath_sma_send_pkt and ipath_layer_send
+ * Used by ipath_layer_send
  */
 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
 {
@@ -1448,7 +1449,7 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 	int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
 			INFINIPATH_IBCC_LINKCMD_MASK;
 
-	ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate "
+	ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
 		   "is %s\n", dd->ipath_unit,
 		   what[linkcmd],
 		   ipath_ibcstatus_str[
@@ -1457,7 +1458,7 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 			    INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
 			   INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
 	/* flush all queued sends when going to DOWN or INIT, to be sure that
-	 * they don't block SMA and other MAD packets */
+	 * they don't block MAD packets */
 	if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 				 INFINIPATH_S_ABORT);
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index f865ce89b73f1..4080fed2dcd91 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1816,7 +1816,7 @@ int ipath_user_add(struct ipath_devdata *dd)
 		if (ret < 0) {
 			ipath_dev_err(dd, "Could not create wildcard "
 				      "minor: error %d\n", -ret);
-			goto bail_sma;
+			goto bail_user;
 		}
 
 		atomic_set(&user_setup, 1);
@@ -1832,7 +1832,7 @@ int ipath_user_add(struct ipath_devdata *dd)
 
 	goto bail;
 
-bail_sma:
+bail_user:
 	user_cleanup();
 bail:
 	return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 0936d8e8d7043..a5eb30a06a5cb 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -191,8 +191,8 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
 	portinfo[4] = (dd->ipath_lid << 16);
 
 	/*
-	 * Notimpl yet SMLID (should we store this in the driver, in case
-	 * SMA dies?)  CapabilityMask is 0, we don't support any of these
+	 * Notimpl yet SMLID.
+	 * CapabilityMask is 0, we don't support any of these
 	 * DiagCode is 0; we don't store any diag info for now Notimpl yet
 	 * M_KeyLeasePeriod (we don't support M_Key)
 	 */
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 75c3721367024..44669dc2e22d1 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -53,8 +53,8 @@ module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
 MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
 
 /*
- * Number of buffers reserved for driver (layered drivers and SMA
- * send).  Reserved at end of buffer list.   Initialized based on
+ * Number of buffers reserved for driver (verbs and layered drivers.)
+ * Reserved at end of buffer list.   Initialized based on
  * number of PIO buffers if not set via module interface.
  * The problem with this is that it's global, but we'll use different
  * numbers for different chip types.  So the default value is not
@@ -80,7 +80,7 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
  *
  * Allocate the eager TID buffers and program them into infinipath.
  * We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like SMA
+ * memory, and either use the buffers as is for things like verbs
  * packets, or pass the buffers up to the ipath layered driver and
  * thence the network layer, replacing them as we do so (see
  * ipath_rcv_layer()).
@@ -450,9 +450,9 @@ static void enable_chip(struct ipath_devdata *dd,
 	u32 val;
 	int i;
 
-	if (!reinit) {
-		init_waitqueue_head(&ipath_sma_state_wait);
-	}
+	if (!reinit)
+		init_waitqueue_head(&ipath_state_wait);
+
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 250e2a9f01bb6..49bf7bb15b04b 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -201,7 +201,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 				  ib_linkstate(lstate));
 		}
 		else
-			ipath_cdbg(SMA, "Unit %u link state %s, last "
+			ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
 				   "was %s\n", dd->ipath_unit,
 				   ib_linkstate(lstate),
 				   ib_linkstate((unsigned)
@@ -213,7 +213,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 		if (lstate == IPATH_IBSTATE_INIT ||
 		    lstate == IPATH_IBSTATE_ARM ||
 		    lstate == IPATH_IBSTATE_ACTIVE)
-			ipath_cdbg(SMA, "Unit %u link state down"
+			ipath_cdbg(VERBOSE, "Unit %u link state down"
 				   " (state 0x%x), from %s\n",
 				   dd->ipath_unit,
 				   (u32)val & IPATH_IBSTATE_MASK,
@@ -269,7 +269,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 			     INFINIPATH_IBCS_LINKSTATE_MASK)
 			    == INFINIPATH_IBCS_L_STATE_ACTIVE)
 				/* if from up to down be more vocal */
-				ipath_cdbg(SMA,
+				ipath_cdbg(VERBOSE,
 					   "Unit %u link now down (%s)\n",
 					   dd->ipath_unit,
 					   ipath_ibcstatus_str[ltstate]);
@@ -596,11 +596,11 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 
 	if (!noprint && *msg)
 		ipath_dev_err(dd, "%s error\n", msg);
-	if (dd->ipath_sma_state_wanted & dd->ipath_flags) {
-		ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, "
-			   "waking\n", dd->ipath_sma_state_wanted,
+	if (dd->ipath_state_wanted & dd->ipath_flags) {
+		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
+			   "waking\n", dd->ipath_state_wanted,
 			   dd->ipath_flags);
-		wake_up_interruptible(&ipath_sma_state_wait);
+		wake_up_interruptible(&ipath_state_wait);
 	}
 
 	return chkerrpkts;
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index af342314b3689..a600347bba6a4 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -245,8 +245,8 @@ struct ipath_devdata {
 	u32 ipath_pioavregs;
 	/* IPATH_POLL, etc. */
 	u32 ipath_flags;
-	/* ipath_flags sma is waiting for */
-	u32 ipath_sma_state_wanted;
+	/* ipath_flags driver is waiting for */
+	u32 ipath_state_wanted;
 	/* last buffer for user use, first buf for kernel use is this
 	 * index. */
 	u32 ipath_lastport_piobuf;
@@ -306,10 +306,6 @@ struct ipath_devdata {
 	u32 ipath_pcibar0;
 	/* so we can rewrite it after a chip reset */
 	u32 ipath_pcibar1;
-	/* sequential tries for SMA send and no bufs */
-	u32 ipath_nosma_bufs;
-	/* duration (seconds) ipath_nosma_bufs set */
-	u32 ipath_nosma_secs;
 
 	/* HT/PCI Vendor ID (here for NodeInfo) */
 	u16 ipath_vendorid;
@@ -534,7 +530,7 @@ int ipath_diag_add(struct ipath_devdata *);
 void ipath_diag_remove(struct ipath_devdata *);
 void ipath_diag_bringup_link(struct ipath_devdata *);
 
-extern wait_queue_head_t ipath_sma_state_wait;
+extern wait_queue_head_t ipath_state_wait;
 
 int ipath_user_add(struct ipath_devdata *dd);
 void ipath_user_remove(struct ipath_devdata *dd);
@@ -818,7 +814,6 @@ extern struct mutex ipath_mutex;
 #define IPATH_DRV_NAME		"ib_ipath"
 #define IPATH_MAJOR		233
 #define IPATH_USER_MINOR_BASE	0
-#define IPATH_SMA_MINOR		128
 #define IPATH_DIAG_MINOR_BASE	129
 #define IPATH_NMINORS		255
 
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
index 10f578e2aed62..e46aa4ed2a7e1 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -162,9 +162,6 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
 		if (dd->ipath_layer.l_arg)
 			continue;
 
-		if (!(*dd->ipath_statusp & IPATH_STATUS_SMA))
-			*dd->ipath_statusp |= IPATH_STATUS_OIB_SMA;
-
 		spin_unlock_irqrestore(&ipath_devs_lock, flags);
 		dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd);
 		spin_lock_irqsave(&ipath_devs_lock, flags);
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h
index 4a27ede49941b..3854a4eae6847 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ b/drivers/infiniband/hw/ipath/ipath_layer.h
@@ -66,9 +66,6 @@ int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
 #define IPATH_LAYER_INT_SEND_CONTINUE 0x10
 #define IPATH_LAYER_INT_BCAST 0x40
 
-/* _verbs_layer.l_flags */
-#define IPATH_VERBS_KERNEL_SMA 0x1
-
 extern unsigned ipath_debug; /* debugging bit mask */
 
 #endif				/* _IPATH_LAYER_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index b86858e70f2a1..c0267cf8ca8ce 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -644,33 +644,6 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp)
 	return cpu_to_be32(aeth);
 }
 
-/**
- * set_verbs_flags - set the verbs layer flags
- * @dd: the infinipath device
- * @flags: the flags to set
- */
-static int set_verbs_flags(struct ipath_devdata *dd, unsigned flags)
-{
-	struct ipath_devdata *ss;
-	unsigned long lflags;
-
-	spin_lock_irqsave(&ipath_devs_lock, lflags);
-
-	list_for_each_entry(ss, &ipath_dev_list, ipath_list) {
-		if (!(ss->ipath_flags & IPATH_INITTED))
-			continue;
-		if ((flags & IPATH_VERBS_KERNEL_SMA) &&
-		    !(*ss->ipath_statusp & IPATH_STATUS_SMA))
-			*ss->ipath_statusp |= IPATH_STATUS_OIB_SMA;
-		else
-			*ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, lflags);
-
-	return 0;
-}
-
 /**
  * ipath_create_qp - create a queue pair for a device
  * @ibpd: the protection domain who's device we create the queue pair for
@@ -784,10 +757,6 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 		}
 		qp->ip = NULL;
 		ipath_reset_qp(qp);
-
-		/* Tell the core driver that the kernel SMA is present. */
-		if (init_attr->qp_type == IB_QPT_SMI)
-			set_verbs_flags(dev->dd, IPATH_VERBS_KERNEL_SMA);
 		break;
 
 	default:
@@ -862,10 +831,6 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
 	unsigned long flags;
 
-	/* Tell the core driver that the kernel SMA is gone. */
-	if (qp->ibqp.qp_type == IB_QPT_SMI)
-		set_verbs_flags(dev->dd, 0);
-
 	spin_lock_irqsave(&qp->s_lock, flags);
 	qp->state = IB_QPS_ERR;
 	spin_unlock_irqrestore(&qp->s_lock, flags);
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 70351b7e35c0a..30a825928fcf3 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -271,33 +271,6 @@ void ipath_get_faststats(unsigned long opaque)
 		}
 	}
 
-	if (dd->ipath_nosma_bufs) {
-		dd->ipath_nosma_secs += 5;
-		if (dd->ipath_nosma_secs >= 30) {
-			ipath_cdbg(SMA, "No SMA bufs avail %u seconds; "
-				   "cancelling pending sends\n",
-				   dd->ipath_nosma_secs);
-			/*
-			 * issue an abort as well, in case we have a packet
-			 * stuck in launch fifo.  This could corrupt an
-			 * outgoing user packet in the worst case,
-			 * but this is a pretty catastrophic, anyway.
-			 */
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-					 INFINIPATH_S_ABORT);
-			ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
-					     dd->ipath_piobcnt2k +
-					     dd->ipath_piobcnt4k -
-					     dd->ipath_lastport_piobuf);
-			/* start again, if necessary */
-			dd->ipath_nosma_secs = 0;
-		} else
-			ipath_cdbg(SMA, "No SMA bufs avail %u tries, "
-				   "after %u seconds\n",
-				   dd->ipath_nosma_bufs,
-				   dd->ipath_nosma_secs);
-	}
-
 done:
 	mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index 56f12202ff49b..8476dd3c7af44 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -107,8 +107,8 @@ static const char *ipath_status_str[] = {
 	"Initted",
 	"Disabled",
 	"Admin_Disabled",
-	"OIB_SMA",
-	"SMA",
+	"", /* This used to be the old "OIB_SMA" status. */
+	"", /* This used to be the old "SMA" status. */
 	"Present",
 	"IB_link_up",
 	"IB_configured",
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 1776330914e59..ab0006288b41e 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1573,7 +1573,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	dev->mmap = ipath_mmap;
 
 	snprintf(dev->node_desc, sizeof(dev->node_desc),
-		 IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename);
+		 IPATH_IDSTR " %s", system_utsname.nodename);
 
 	ret = ib_register_device(dev);
 	if (ret)
-- 
GitLab


From 32c0a26c8f91dbc2797175c2bdff42b54f66c71d Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:35 -0700
Subject: [PATCH 0750/1063] IB/ipath: trivial cleanups

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_kernel.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index a600347bba6a4..0ae2729c7ea27 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -528,7 +528,6 @@ void ipath_cdev_cleanup(struct cdev **cdevp,
 
 int ipath_diag_add(struct ipath_devdata *);
 void ipath_diag_remove(struct ipath_devdata *);
-void ipath_diag_bringup_link(struct ipath_devdata *);
 
 extern wait_queue_head_t ipath_state_wait;
 
-- 
GitLab


From 98341f261893acd7bc5abee5ddc35337ef49e457 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:36 -0700
Subject: [PATCH 0751/1063] IB/ipath: add new minor device to allow sending of
 diag packets

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_common.h |   7 +
 drivers/infiniband/hw/ipath/ipath_diag.c   | 153 +++++++++++++++++++++
 drivers/infiniband/hw/ipath/ipath_driver.c |  12 ++
 drivers/infiniband/hw/ipath/ipath_kernel.h |   4 +
 4 files changed, 176 insertions(+)

diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index f8df3b771c267..f577905e3acaa 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -463,6 +463,13 @@ struct __ipath_sendpkt {
 	struct ipath_iovec sps_iov[4];
 };
 
+/* Passed into diag data special file's ->write method. */
+struct ipath_diag_pkt {
+	__u32 unit;
+	__u64 data;
+	__u32 len;
+};
+
 /*
  * Data layout in I2C flash (for GUID, etc.)
  * All fields are little-endian binary unless otherwise stated
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 5d77a74aa57b1..28b6b46c106ae 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -41,6 +41,7 @@
  * through the /sys/bus/pci resource mmap interface.
  */
 
+#include <linux/io.h>
 #include <linux/pci.h>
 #include <asm/uaccess.h>
 
@@ -273,6 +274,158 @@ static int ipath_diag_open(struct inode *in, struct file *fp)
 	return ret;
 }
 
+static ssize_t ipath_diagpkt_write(struct file *fp,
+				   const char __user *data,
+				   size_t count, loff_t *off);
+
+static struct file_operations diagpkt_file_ops = {
+	.owner = THIS_MODULE,
+	.write = ipath_diagpkt_write,
+};
+
+static struct cdev *diagpkt_cdev;
+static struct class_device *diagpkt_class_dev;
+
+int __init ipath_diagpkt_add(void)
+{
+	return ipath_cdev_init(IPATH_DIAGPKT_MINOR,
+			       "ipath_diagpkt", &diagpkt_file_ops,
+			       &diagpkt_cdev, &diagpkt_class_dev);
+}
+
+void __exit ipath_diagpkt_remove(void)
+{
+	ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev);
+}
+
+/**
+ * ipath_diagpkt_write - write an IB packet
+ * @fp: the diag data device file pointer
+ * @data: ipath_diag_pkt structure saying where to get the packet
+ * @count: size of data to write
+ * @off: unused by this code
+ */
+static ssize_t ipath_diagpkt_write(struct file *fp,
+				   const char __user *data,
+				   size_t count, loff_t *off)
+{
+	u32 __iomem *piobuf;
+	u32 plen, clen, pbufn;
+	struct ipath_diag_pkt dp;
+	u32 *tmpbuf = NULL;
+	struct ipath_devdata *dd;
+	ssize_t ret = 0;
+	u64 val;
+
+	if (count < sizeof(dp)) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	if (copy_from_user(&dp, data, sizeof(dp))) {
+		ret = -EFAULT;
+		goto bail;
+	}
+
+	/* send count must be an exact number of dwords */
+	if (dp.len & 3) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	clen = dp.len >> 2;
+
+	dd = ipath_lookup(dp.unit);
+	if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
+	    !dd->ipath_kregbase) {
+		ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
+			   dp.unit);
+		ret = -ENODEV;
+		goto bail;
+	}
+
+	if (ipath_diag_inuse && !diag_set_link &&
+	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+		diag_set_link = 1;
+		ipath_cdbg(VERBOSE, "Trying to set to set link active for "
+			   "diag pkt\n");
+		ipath_set_linkstate(dd, IPATH_IB_LINKARM);
+		ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
+	}
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		/* no hardware, freeze, etc. */
+		ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
+		ret = -ENODEV;
+		goto bail;
+	}
+	val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
+	if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
+	    val != IPATH_IBSTATE_ACTIVE) {
+		ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
+			   dd->ipath_unit, (unsigned long long) val);
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	/* need total length before first word written */
+	/* +1 word is for the qword padding */
+	plen = sizeof(u32) + dp.len;
+
+	if ((plen + 4) > dd->ipath_ibmaxlen) {
+		ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
+			  plen - 4, dd->ipath_ibmaxlen);
+		ret = -EINVAL;
+		goto bail;	/* before writing pbc */
+	}
+	tmpbuf = vmalloc(plen);
+	if (!tmpbuf) {
+		dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
+			 "failing\n");
+		ret = -ENOMEM;
+		goto bail;
+	}
+
+	if (copy_from_user(tmpbuf,
+			   (const void __user *) (unsigned long) dp.data,
+			   dp.len)) {
+		ret = -EFAULT;
+		goto bail;
+	}
+
+	piobuf = ipath_getpiobuf(dd, &pbufn);
+	if (!piobuf) {
+		ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
+			   dd->ipath_unit);
+		ret = -EBUSY;
+		goto bail;
+	}
+
+	plen >>= 2;		/* in dwords */
+
+	if (ipath_debug & __IPATH_PKTDBG)
+		ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
+			   dd->ipath_unit, plen - 1, pbufn);
+
+	/* we have to flush after the PBC for correctness on some cpus
+	 * or WC buffer can be written out of order */
+	writeq(plen, piobuf);
+	ipath_flush_wc();
+	/* copy all by the trigger word, then flush, so it's written
+	 * to chip before trigger word, then write trigger word, then
+	 * flush again, so packet is sent. */
+	__iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+	ipath_flush_wc();
+	__raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+	ipath_flush_wc();
+
+	ret = sizeof(dp);
+
+bail:
+	vfree(tmpbuf);
+	return ret;
+}
+
 static int ipath_diag_release(struct inode *in, struct file *fp)
 {
 	mutex_lock(&ipath_mutex);
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 520c38f13868c..8c908b30984e1 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1881,8 +1881,18 @@ static int __init infinipath_init(void)
 		goto bail_group;
 	}
 
+	ret = ipath_diagpkt_add();
+	if (ret < 0) {
+		printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
+		       "diag data device: error %d\n", -ret);
+		goto bail_ipathfs;
+	}
+
 	goto bail;
 
+bail_ipathfs:
+	ipath_exit_ipathfs();
+
 bail_group:
 	ipath_driver_remove_group(&ipath_driver.driver);
 
@@ -1993,6 +2003,8 @@ static void __exit infinipath_cleanup(void)
 	struct ipath_devdata *dd, *tmp;
 	unsigned long flags;
 
+	ipath_diagpkt_remove();
+
 	ipath_exit_ipathfs();
 
 	ipath_driver_remove_group(&ipath_driver.driver);
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 0ae2729c7ea27..f8accc79b92f1 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -789,6 +789,9 @@ int ipath_device_create_group(struct device *, struct ipath_devdata *);
 void ipath_device_remove_group(struct device *, struct ipath_devdata *);
 int ipath_expose_reset(struct device *);
 
+int ipath_diagpkt_add(void);
+void ipath_diagpkt_remove(void);
+
 int ipath_init_ipathfs(void);
 void ipath_exit_ipathfs(void);
 int ipathfs_add_device(struct ipath_devdata *);
@@ -813,6 +816,7 @@ extern struct mutex ipath_mutex;
 #define IPATH_DRV_NAME		"ib_ipath"
 #define IPATH_MAJOR		233
 #define IPATH_USER_MINOR_BASE	0
+#define IPATH_DIAGPKT_MINOR	127
 #define IPATH_DIAG_MINOR_BASE	129
 #define IPATH_NMINORS		255
 
-- 
GitLab


From eae33d47a797e159306567643284a98ae7428ec4 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:37 -0700
Subject: [PATCH 0752/1063] IB/ipath: do not allow use of CQ entries with
 invalid counts

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_cq.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 3c4c198a4514a..049221bc590e0 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -172,7 +172,7 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
 	struct ipath_cq_wc *wc;
 	struct ib_cq *ret;
 
-	if (entries > ib_ipath_max_cqes) {
+	if (entries < 1 || entries > ib_ipath_max_cqes) {
 		ret = ERR_PTR(-EINVAL);
 		goto done;
 	}
@@ -324,6 +324,11 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	u32 head, tail, n;
 	int ret;
 
+	if (cqe < 1 || cqe > ib_ipath_max_cqes) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
-- 
GitLab


From 092260b8f966ebe0742045416082e9a81bd971d1 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:38 -0700
Subject: [PATCH 0753/1063] IB/ipath: account for attached QPs correctly

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_verbs_mcast.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index cb35679e4a185..085e28b939ec5 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -217,6 +217,8 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
 	dev->n_mcast_grps_allocated++;
 	spin_unlock(&dev->n_mcast_grps_lock);
 
+	mcast->n_attached++;
+
 	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
 
 	atomic_inc(&mcast->refcount);
-- 
GitLab


From 525d0ca1d452ed336c1d907fb20c104467a8a47b Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:39 -0700
Subject: [PATCH 0754/1063] IB/ipath: support new QLogic product naming scheme

This patch only renames files, fixes product names, and updates
comments.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/Makefile          |  4 +-
 drivers/infiniband/hw/ipath/ipath_driver.c    |  9 +--
 drivers/infiniband/hw/ipath/ipath_file_ops.c  |  6 +-
 .../ipath/{ipath_ht400.c => ipath_iba6110.c}  | 39 ++++++-----
 .../ipath/{ipath_pe800.c => ipath_iba6120.c}  | 68 +++++++++----------
 drivers/infiniband/hw/ipath/ipath_kernel.h    |  8 +--
 drivers/infiniband/hw/ipath/ipath_registers.h |  5 +-
 7 files changed, 68 insertions(+), 71 deletions(-)
 rename drivers/infiniband/hw/ipath/{ipath_ht400.c => ipath_iba6110.c} (98%)
 rename drivers/infiniband/hw/ipath/{ipath_pe800.c => ipath_iba6120.c} (95%)

diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index 690dc713e63ef..5e29cb0095e56 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -10,7 +10,8 @@ ib_ipath-y := \
 	ipath_eeprom.o \
 	ipath_file_ops.o \
 	ipath_fs.o \
-	ipath_ht400.o \
+	ipath_iba6110.o \
+	ipath_iba6120.o \
 	ipath_init_chip.o \
 	ipath_intr.o \
 	ipath_keys.o \
@@ -18,7 +19,6 @@ ib_ipath-y := \
 	ipath_mad.o \
 	ipath_mmap.o \
 	ipath_mr.o \
-	ipath_pe800.o \
 	ipath_qp.o \
 	ipath_rc.o \
 	ipath_ruc.o \
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 8c908b30984e1..3a15efee73875 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -401,10 +401,10 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	/* setup the chip-specific functions, as early as possible. */
 	switch (ent->device) {
 	case PCI_DEVICE_ID_INFINIPATH_HT:
-		ipath_init_ht400_funcs(dd);
+		ipath_init_iba6110_funcs(dd);
 		break;
 	case PCI_DEVICE_ID_INFINIPATH_PE800:
-		ipath_init_pe800_funcs(dd);
+		ipath_init_iba6120_funcs(dd);
 		break;
 	default:
 		ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -969,7 +969,8 @@ void ipath_kreceive(struct ipath_devdata *dd)
 		 */
 		if (l == hdrqtail || (i && !(i&0xf))) {
 			u64 lval;
-			if (l == hdrqtail) /* PE-800 interrupt only on last */
+			if (l == hdrqtail)
+				/* request IBA6120 interrupt only on last */
 				lval = dd->ipath_rhdrhead_intr_off | l;
 			else
 				lval = l;
@@ -983,7 +984,7 @@ void ipath_kreceive(struct ipath_devdata *dd)
 	}
 
 	if (!dd->ipath_rhdrhead_intr_off && !reloop) {
-		/* HT-400 workaround; we can have a race clearing chip
+		/* IBA6110 workaround; we can have a race clearing chip
 		 * interrupt with another interrupt about to be delivered,
 		 * and can clear it before it is delivered on the GPIO
 		 * workaround.  By doing the extra check here for the
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 4080fed2dcd91..6ba9a2d1e6ecb 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1110,7 +1110,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
 		ret = mmap_rcvegrbufs(vma, pd);
 	else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
 		/*
-		 * The rcvhdrq itself; readonly except on HT-400 (so have
+		 * The rcvhdrq itself; readonly except on HT (so have
 		 * to allow writable mapping), multiple pages, contiguous
 		 * from an i/o perspective.
 		 */
@@ -1298,14 +1298,14 @@ static int find_best_unit(struct file *fp)
 	 * This code is present to allow a knowledgeable person to
 	 * specify the layout of processes to processors before opening
 	 * this driver, and then we'll assign the process to the "closest"
-	 * HT-400 to that processor (we assume reasonable connectivity,
+	 * InfiniPath chip to that processor (we assume reasonable connectivity,
 	 * for now).  This code assumes that if affinity has been set
 	 * before this point, that at most one cpu is set; for now this
 	 * is reasonable.  I check for both cpus_empty() and cpus_full(),
 	 * in case some kernel variant sets none of the bits when no
 	 * affinity is set.  2.6.11 and 12 kernels have all present
 	 * cpus set.  Some day we'll have to fix it up further to handle
-	 * a cpu subset.  This algorithm fails for two HT-400's connected
+	 * a cpu subset.  This algorithm fails for two HT chips connected
 	 * in tunnel fashion.  Eventually this needs real topology
 	 * information.  There may be some issues with dual core numbering
 	 * as well.  This needs more work prior to release.
diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
similarity index 98%
rename from drivers/infiniband/hw/ipath/ipath_ht400.c
rename to drivers/infiniband/hw/ipath/ipath_iba6110.c
index 3db015da6e778..5076738aff328 100644
--- a/drivers/infiniband/hw/ipath/ipath_ht400.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -33,7 +33,7 @@
 
 /*
  * This file contains all of the code that is specific to the InfiniPath
- * HT-400 chip.
+ * HT chip.
  */
 
 #include <linux/pci.h>
@@ -43,7 +43,7 @@
 #include "ipath_registers.h"
 
 /*
- * This lists the InfiniPath HT400 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
  * This structure should never be directly accessed.
  *
  * The names are in InterCap form because they're taken straight from
@@ -537,7 +537,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 	if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
 		strlcat(msg, "[HT core Misc7]", msgl);
 	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-		strlcat(msg, "[Memory BIST test failed, HT-400 unusable]",
+		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
 			msgl);
 		/* ignore from now on, so disable until driver reloaded */
 		dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
@@ -553,7 +553,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 
 	if (hwerrs & _IPATH_PLL_FAIL) {
 		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[PLL failed (%llx), HT-400 unusable]",
+			 "[PLL failed (%llx), InfiniPath hardware unusable]",
 			 (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
 		strlcat(msg, bitsmsg, msgl);
 		/* ignore from now on, so disable until driver reloaded */
@@ -610,18 +610,18 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 		break;
 	case 5:
 		/*
-		 * HT-460 original production board; two production levels, with
+		 * original production board; two production levels, with
 		 * different serial number ranges.   See ipath_ht_early_init() for
 		 * case where we enable IPATH_GPIO_INTR for later serial # range.
 		 */
-		n = "InfiniPath_HT-460";
+		n = "InfiniPath_QHT7040";
 		break;
 	case 6:
 		n = "OEM_Board_3";
 		break;
 	case 7:
-		/* HT-460 small form factor production board */
-		n = "InfiniPath_HT-465";
+		/* small form factor production board */
+		n = "InfiniPath_QHT7140";
 		break;
 	case 8:
 		n = "LS/X-1";
@@ -633,7 +633,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 		n = "OEM_Board_2";
 		break;
 	case 11:
-		n = "InfiniPath_HT-470";
+		n = "InfiniPath_HT-470"; /* obsoleted */
 		break;
 	case 12:
 		n = "OEM_Board_4";
@@ -641,7 +641,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 	default:		/* don't know, just print the number */
 		ipath_dev_err(dd, "Don't yet know about board "
 			      "with ID %u\n", boardrev);
-		snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u",
+		snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
 			 boardrev);
 		break;
 	}
@@ -650,11 +650,10 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 
 	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
 		/*
-		 * This version of the driver only supports the HT-400
-		 * Rev 3.2
+		 * This version of the driver only supports Rev 3.2 and 3.3
 		 */
 		ipath_dev_err(dd,
-			      "Unsupported HT-400 revision %u.%u!\n",
+			      "Unsupported InfiniPath hardware revision %u.%u!\n",
 			      dd->ipath_majrev, dd->ipath_minrev);
 		ret = 1;
 		goto bail;
@@ -738,7 +737,7 @@ static void ipath_check_htlink(struct ipath_devdata *dd)
 
 static int ipath_setup_ht_reset(struct ipath_devdata *dd)
 {
-	ipath_dbg("No reset possible for HT-400\n");
+	ipath_dbg("No reset possible for this InfiniPath hardware\n");
 	return 0;
 }
 
@@ -925,7 +924,7 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev,
 
 	/*
 	 * kernels with CONFIG_PCI_MSI set the vector in the irq field of
-	 * struct pci_device, so we use that to program the HT-400 internal
+	 * struct pci_device, so we use that to program the internal
 	 * interrupt register (not config space) with that value. The BIOS
 	 * must still have done the basic MSI setup.
 	 */
@@ -1013,7 +1012,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
  * @dd: the infinipath device
  *
  * Called during driver unload.
- * This is currently a nop for the HT-400, not for all chips
+ * This is currently a nop for the HT chip, not for all chips
  */
 static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
 {
@@ -1470,7 +1469,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 	dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
 
 	/*
-	 * For HT-400, we allocate a somewhat overly large eager buffer,
+	 * For HT, we allocate a somewhat overly large eager buffer,
 	 * such that we can guarantee that we can receive the largest
 	 * packet that we can send out.  To truly support a 4KB MTU,
 	 * we need to bump this to a large value.  To date, other than
@@ -1531,7 +1530,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 	if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
 		dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
 		/*
-		 * Later production HT-460 has same changes as HT-465, so
+		 * Later production QHT7040 has same changes as QHT7140, so
 		 * can use GPIO interrupts.  They have serial #'s starting
 		 * with 128, rather than 112.
 		 */
@@ -1560,13 +1559,13 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
 }
 
 /**
- * ipath_init_ht400_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6110_funcs - set up the chip-specific function pointers
  * @dd: the infinipath device
  *
  * This is global, and is called directly at init to set up the
  * chip-specific function pointers for later use.
  */
-void ipath_init_ht400_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
 {
 	dd->ipath_f_intrsetup = ipath_ht_intconfig;
 	dd->ipath_f_bus = ipath_setup_ht_config;
diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
similarity index 95%
rename from drivers/infiniband/hw/ipath/ipath_pe800.c
rename to drivers/infiniband/hw/ipath/ipath_iba6120.c
index b83f66d8262cf..f4233baaa3380 100644
--- a/drivers/infiniband/hw/ipath/ipath_pe800.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -32,7 +32,7 @@
  */
 /*
  * This file contains all of the code that is specific to the
- * InfiniPath PE-800 chip.
+ * InfiniPath PCIe chip.
  */
 
 #include <linux/interrupt.h>
@@ -45,9 +45,9 @@
 
 /*
  * This file contains all the chip-specific register information and
- * access functions for the QLogic InfiniPath PE800, the PCI-Express chip.
+ * access functions for the QLogic InfiniPath PCI-Express chip.
  *
- * This lists the InfiniPath PE800 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
  * This structure should never be directly accessed.
  */
 struct _infinipath_do_not_use_kernel_regs {
@@ -213,7 +213,6 @@ static const struct ipath_kregs ipath_pe_kregs = {
 	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
 	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
 
-	/* This group is pe-800-specific; and used only in this file */
 	/* The rcvpktled register controls one of the debug port signals, so
 	 * a packet activity LED can be connected to it. */
 	.kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
@@ -388,7 +387,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 	*msg = '\0';
 
 	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-		strlcat(msg, "[Memory BIST test failed, PE-800 unusable]",
+		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
 			msgl);
 		/* ignore from now on, so disable until driver reloaded */
 		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
@@ -433,7 +432,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 
 	if (hwerrs & _IPATH_PLL_FAIL) {
 		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[PLL failed (%llx), PE-800 unusable]",
+			 "[PLL failed (%llx), InfiniPath hardware unusable]",
 			 (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
 		strlcat(msg, bitsmsg, msgl);
 		/* ignore from now on, so disable until driver reloaded */
@@ -511,22 +510,25 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
 		n = "InfiniPath_Emulation";
 		break;
 	case 1:
-		n = "InfiniPath_PE-800-Bringup";
+		n = "InfiniPath_QLE7140-Bringup";
 		break;
 	case 2:
-		n = "InfiniPath_PE-880";
+		n = "InfiniPath_QLE7140";
 		break;
 	case 3:
-		n = "InfiniPath_PE-850";
+		n = "InfiniPath_QMI7140";
 		break;
 	case 4:
-		n = "InfiniPath_PE-860";
+		n = "InfiniPath_QEM7140";
+		break;
+	case 5:
+		n = "InfiniPath_QMH7140";
 		break;
 	default:
 		ipath_dev_err(dd,
 			      "Don't yet know about board with ID %u\n",
 			      boardrev);
-		snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u",
+		snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
 			 boardrev);
 		break;
 	}
@@ -534,7 +536,7 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
 		snprintf(name, namelen, "%s", n);
 
 	if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
-		ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n",
+		ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
 			      dd->ipath_majrev, dd->ipath_minrev);
 		ret = 1;
 	} else
@@ -705,7 +707,7 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
 }
 
-/* this is not yet needed on the PE800, so just return 0. */
+/* this is not yet needed on this chip, so just return 0. */
 static int ipath_pe_intconfig(struct ipath_devdata *dd)
 {
 	return 0;
@@ -759,8 +761,8 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
  *
  * This is called during driver unload.
  * We do the pci_disable_msi here, not in generic code, because it
- * isn't used for the HT-400. If we do end up needing pci_enable_msi
- * at some point in the future for HT-400, we'll move the call back
+ * isn't used for the HT chips. If we do end up needing pci_enable_msi
+ * at some point in the future for HT, we'll move the call back
  * into the main init_one code.
  */
 static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
@@ -780,10 +782,10 @@ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
  * late in 2.6.16).
  * All that can be done is to edit the kernel source to remove the quirk
  * check until that is fixed.
- * We do not need to call enable_msi() for our HyperTransport chip (HT-400),
- * even those it uses MSI, and we want to avoid the quirk warning, so
- * So we call enable_msi only for the PE-800.  If we do end up needing
- * pci_enable_msi at some point in the future for HT-400, we'll move the
+ * We do not need to call enable_msi() for our HyperTransport chip,
+ * even though it uses MSI, and we want to avoid the quirk warning, so
+ * So we call enable_msi only for PCIe.  If we do end up needing
+ * pci_enable_msi at some point in the future for HT, we'll move the
  * call back into the main init_one code.
  * We save the msi lo and hi values, so we can restore them after
  * chip reset (the kernel PCI infrastructure doesn't yet handle that
@@ -971,8 +973,7 @@ static int ipath_setup_pe_reset(struct ipath_devdata *dd)
 	int ret;
 
 	/* Use ERROR so it shows up in logs, etc. */
-	ipath_dev_err(dd, "Resetting PE-800 unit %u\n",
-		      dd->ipath_unit);
+	ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
 	/* keep chip from being accessed in a few places */
 	dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
 	val = dd->ipath_control | INFINIPATH_C_RESET;
@@ -1078,7 +1079,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
  * @port: the port
  *
  * clear all TID entries for a port, expected and eager.
- * Used from ipath_close().  On PE800, TIDs are only 32 bits,
+ * Used from ipath_close().  On this chip, TIDs are only 32 bits,
  * not 64, but they are still on 64 bit boundaries, so tidbase
  * is declared as u64 * for the pointer math, even though we write 32 bits
  */
@@ -1148,9 +1149,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
 	dd->ipath_flags |= IPATH_4BYTE_TID;
 
 	/*
-	 * For openib, we need to be able to handle an IB header of 96 bytes
-	 * or 24 dwords.  HT-400 has arbitrary sized receive buffers, so we
-	 * made them the same size as the PIO buffers.  The PE-800 does not
+	 * For openfabrics, we need to be able to handle an IB header of
+	 * 24 dwords.  HT chip has arbitrary sized receive buffers, so we
+	 * made them the same size as the PIO buffers.  This chip does not
 	 * handle arbitrary size buffers, so we need the header large enough
 	 * to handle largest IB header, but still have room for a 2KB MTU
 	 * standard IB packet.
@@ -1158,11 +1159,10 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
 	dd->ipath_rcvhdrentsize = 24;
 	dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
 
-	/* For HT-400, we allocate a somewhat overly large eager buffer,
-	 * such that we can guarantee that we can receive the largest packet
-	 * that we can send out.  To truly support a 4KB MTU, we need to
-	 * bump this to a larger value.  We'll do this when I get around to
-	 * testing 4KB sends on the PE-800, which I have not yet done.
+	/*
+	 * To truly support a 4KB MTU (for usermode), we need to
+	 * bump this to a larger value.  For now, we use them for
+	 * the kernel only.
 	 */
 	dd->ipath_rcvegrbufsize = 2048;
 	/*
@@ -1175,9 +1175,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
 	dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
 
 	/*
-	 * For PE-800, we can request a receive interrupt for 1 or
+	 * We can request a receive interrupt for 1 or
 	 * more packets from current offset.  For now, we set this
-	 * up for a single packet, to match the HT-400 behavior.
+	 * up for a single packet.
 	 */
 	dd->ipath_rhdrhead_intr_off = 1ULL<<32;
 
@@ -1216,13 +1216,13 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
 }
 
 /**
- * ipath_init_pe800_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6120_funcs - set up the chip-specific function pointers
  * @dd: the infinipath device
  *
  * This is global, and is called directly at init to set up the
  * chip-specific function pointers for later use.
  */
-void ipath_init_pe800_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
 {
 	dd->ipath_f_intrsetup = ipath_pe_intconfig;
 	dd->ipath_f_bus = ipath_setup_pe_config;
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index f8accc79b92f1..2530686f68936 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -236,7 +236,7 @@ struct ipath_devdata {
 	u64 ipath_tidtemplate;
 	/* value to write to free TIDs */
 	u64 ipath_tidinvalid;
-	/* PE-800 rcv interrupt setup */
+	/* IBA6120 rcv interrupt setup */
 	u64 ipath_rhdrhead_intr_off;
 
 	/* size of memory at ipath_kregbase */
@@ -621,10 +621,8 @@ void ipath_free_data(struct ipath_portdata *dd);
 int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
 int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
-/* init PE-800-specific func */
-void ipath_init_pe800_funcs(struct ipath_devdata *);
-/* init HT-400-specific func */
-void ipath_init_ht400_funcs(struct ipath_devdata *);
+void ipath_init_iba6120_funcs(struct ipath_devdata *);
+void ipath_init_iba6110_funcs(struct ipath_devdata *);
 void ipath_get_eeprom_info(struct ipath_devdata *);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
 
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 89df8f5ea9980..f08c86088ca4a 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -36,8 +36,7 @@
 
 /*
  * This file should only be included by kernel source, and by the diags.  It
- * defines the registers, and their contents, for the InfiniPath HT-400
- * chip.
+ * defines the registers, and their contents, for InfiniPath chips.
  */
 
 /*
@@ -286,7 +285,7 @@
 
 #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL	/* 40 bits valid */
 
-/* TID entries (memory), HT400-only */
+/* TID entries (memory), HT-only */
 #define INFINIPATH_RT_VALID 0x8000000000000000ULL
 #define INFINIPATH_RT_ADDR_SHIFT 0
 #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
-- 
GitLab


From ff0b8597ec3e7db6b37130a195cb3d673dafea27 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:40 -0700
Subject: [PATCH 0755/1063] IB/ipath: add serial number to hardware freeze
 error message

Also added the word "Hardware" after "Fatal" to make it more obvious
that it's hardware, not software.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_iba6110.c | 5 +++--
 drivers/infiniband/hw/ipath/ipath_iba6120.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 5076738aff328..7028c98e5c4f3 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -461,8 +461,9 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			 * times.
 			 */
 			if (dd->ipath_flags & IPATH_INITTED) {
-				ipath_dev_err(dd, "Fatal Error (freeze "
-					      "mode), no longer usable\n");
+				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+					      "mode), no longer usable, SN %.16s\n",
+						  dd->ipath_serial);
 				isfatal = 1;
 			}
 			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index f4233baaa3380..3a7640be25307 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -363,8 +363,9 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			 * and we get here multiple times
 			 */
 			if (dd->ipath_flags & IPATH_INITTED) {
-				ipath_dev_err(dd, "Fatal Error (freeze "
-					      "mode), no longer usable\n");
+				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+					      "mode), no longer usable, SN %.16s\n",
+						  dd->ipath_serial);
 				isfatal = 1;
 			}
 			/*
-- 
GitLab


From fc8cf8cdfc3ce328c577b18ebcd60a5595f2a283 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:41 -0700
Subject: [PATCH 0756/1063] IB/ipath: be more strict about testing the modify
 QP verb

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_qp.c    | 33 +++++++++++++++++++++--
 drivers/infiniband/hw/ipath/ipath_verbs.h |  1 +
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index c0267cf8ca8ce..502d555fdf371 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -455,11 +455,16 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 				attr_mask))
 		goto inval;
 
-	if (attr_mask & IB_QP_AV)
+	if (attr_mask & IB_QP_AV) {
 		if (attr->ah_attr.dlid == 0 ||
 		    attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
 			goto inval;
 
+		if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
+		    (attr->ah_attr.grh.sgid_index > 1))
+			goto inval;
+	}
+
 	if (attr_mask & IB_QP_PKEY_INDEX)
 		if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
 			goto inval;
@@ -468,6 +473,27 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		if (attr->min_rnr_timer > 31)
 			goto inval;
 
+	if (attr_mask & IB_QP_PORT)
+		if (attr->port_num == 0 ||
+		    attr->port_num > ibqp->device->phys_port_cnt)
+			goto inval;
+
+	if (attr_mask & IB_QP_PATH_MTU)
+		if (attr->path_mtu > IB_MTU_4096)
+			goto inval;
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		if (attr->max_dest_rd_atomic > 1)
+			goto inval;
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+		if (attr->max_rd_atomic > 1)
+			goto inval;
+
+	if (attr_mask & IB_QP_PATH_MIG_STATE)
+		if (attr->path_mig_state != IB_MIG_MIGRATED)
+			goto inval;
+
 	switch (new_state) {
 	case IB_QPS_RESET:
 		ipath_reset_qp(qp);
@@ -518,6 +544,9 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	if (attr_mask & IB_QP_MIN_RNR_TIMER)
 		qp->r_min_rnr_timer = attr->min_rnr_timer;
 
+	if (attr_mask & IB_QP_TIMEOUT)
+		qp->timeout = attr->timeout;
+
 	if (attr_mask & IB_QP_QKEY)
 		qp->qkey = attr->qkey;
 
@@ -564,7 +593,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	attr->max_dest_rd_atomic = 1;
 	attr->min_rnr_timer = qp->r_min_rnr_timer;
 	attr->port_num = 1;
-	attr->timeout = 0;
+	attr->timeout = qp->timeout;
 	attr->retry_cnt = qp->s_retry_cnt;
 	attr->rnr_retry = qp->s_rnr_retry;
 	attr->alt_port_num = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 9cc0dbfe86029..f2956090d93f7 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -371,6 +371,7 @@ struct ipath_qp {
 	u8 s_retry;		/* requester retry counter */
 	u8 s_rnr_retry;		/* requester RNR retry counter */
 	u8 s_pkey_index;	/* PKEY index to use */
+	u8 timeout;		/* Timeout for this QP */
 	enum ib_mtu path_mtu;
 	u32 remote_qpn;
 	u32 qkey;		/* QKEY for this QP (for UD or RD) */
-- 
GitLab


From ca4ce383acfb05b8035453cdbbfd4f8ae36c7a69 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:42 -0700
Subject: [PATCH 0757/1063] IB/ipath: validate path_mig_state properly

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_qp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 502d555fdf371..77391886d2f8d 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -491,7 +491,8 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 			goto inval;
 
 	if (attr_mask & IB_QP_PATH_MIG_STATE)
-		if (attr->path_mig_state != IB_MIG_MIGRATED)
+		if (attr->path_mig_state != IB_MIG_MIGRATED &&
+		    attr->path_mig_state != IB_MIG_REARM)
 			goto inval;
 
 	switch (new_state) {
-- 
GitLab


From 0b81e4f79af8322c7142701982f40d1431dedf19 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:43 -0700
Subject: [PATCH 0758/1063] IB/ipath: put a limit on the number of QPs that can
 be created

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_qp.c    | 15 +++++++++++++++
 drivers/infiniband/hw/ipath/ipath_verbs.c |  7 ++++++-
 drivers/infiniband/hw/ipath/ipath_verbs.h |  4 ++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 77391886d2f8d..607ba72af2fa2 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -833,9 +833,21 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 		}
 	}
 
+	spin_lock(&dev->n_qps_lock);
+	if (dev->n_qps_allocated == ib_ipath_max_qps) {
+		spin_unlock(&dev->n_qps_lock);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail_ip;
+	}
+
+	dev->n_qps_allocated++;
+	spin_unlock(&dev->n_qps_lock);
+
 	ret = &qp->ibqp;
 	goto bail;
 
+bail_ip:
+	kfree(qp->ip);
 bail_rwq:
 	vfree(qp->r_rq.wq);
 bail_qp:
@@ -864,6 +876,9 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 	spin_lock_irqsave(&qp->s_lock, flags);
 	qp->state = IB_QPS_ERR;
 	spin_unlock_irqrestore(&qp->s_lock, flags);
+	spin_lock(&dev->n_qps_lock);
+	dev->n_qps_allocated--;
+	spin_unlock(&dev->n_qps_lock);
 
 	/* Stop the sending tasklet. */
 	tasklet_kill(&qp->s_task);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index ab0006288b41e..b9be0fd2ed361 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -73,6 +73,10 @@ module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
 		   S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
 
+unsigned int ib_ipath_max_qps = 16384;
+module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
+
 unsigned int ib_ipath_max_sges = 0x60;
 module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
@@ -958,7 +962,7 @@ static int ipath_query_device(struct ib_device *ibdev,
 	props->sys_image_guid = dev->sys_image_guid;
 
 	props->max_mr_size = ~0ull;
-	props->max_qp = dev->qp_table.max;
+	props->max_qp = ib_ipath_max_qps;
 	props->max_qp_wr = ib_ipath_max_qp_wrs;
 	props->max_sge = ib_ipath_max_sges;
 	props->max_cq = ib_ipath_max_cqs;
@@ -1420,6 +1424,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	spin_lock_init(&idev->n_pds_lock);
 	spin_lock_init(&idev->n_ahs_lock);
 	spin_lock_init(&idev->n_cqs_lock);
+	spin_lock_init(&idev->n_qps_lock);
 	spin_lock_init(&idev->n_srqs_lock);
 	spin_lock_init(&idev->n_mcast_grps_lock);
 
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index f2956090d93f7..09bbb3f9a2176 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -482,6 +482,8 @@ struct ipath_ibdev {
 	spinlock_t n_ahs_lock;
 	u32 n_cqs_allocated;	/* number of CQs allocated for device */
 	spinlock_t n_cqs_lock;
+	u32 n_qps_allocated;	/* number of QPs allocated for device */
+	spinlock_t n_qps_lock;
 	u32 n_srqs_allocated;	/* number of SRQs allocated for device */
 	spinlock_t n_srqs_lock;
 	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
@@ -792,6 +794,8 @@ extern unsigned int ib_ipath_max_cqs;
 
 extern unsigned int ib_ipath_max_qp_wrs;
 
+extern unsigned int ib_ipath_max_qps;
+
 extern unsigned int ib_ipath_max_sges;
 
 extern unsigned int ib_ipath_max_mcast_grps;
-- 
GitLab


From a78aa6fb156f9954562c9539aeb25dbec1ffca10 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:44 -0700
Subject: [PATCH 0759/1063] IB/ipath: handle sq_sig_all field correctly

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_qp.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 607ba72af2fa2..224b0f40767f0 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -606,9 +606,10 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	init_attr->recv_cq = qp->ibqp.recv_cq;
 	init_attr->srq = qp->ibqp.srq;
 	init_attr->cap = attr->cap;
-	init_attr->sq_sig_type =
-		(qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
-		? IB_SIGNAL_REQ_WR : 0;
+	if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+	else
+		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
 	init_attr->qp_type = qp->ibqp.qp_type;
 	init_attr->port_num = 1;
 	return 0;
@@ -776,8 +777,10 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 		qp->s_wq = swq;
 		qp->s_size = init_attr->cap.max_send_wr + 1;
 		qp->s_max_sge = init_attr->cap.max_send_sge;
-		qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ?
-			1 << IPATH_S_SIGNAL_REQ_WR : 0;
+		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+			qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+		else
+			qp->s_flags = 0;
 		dev = to_idev(ibpd->device);
 		err = ipath_alloc_qpn(&dev->qp_table, qp,
 				      init_attr->qp_type);
-- 
GitLab


From d821f02a6ebed97e35e0bc7575452cfc6f9073cb Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:45 -0700
Subject: [PATCH 0760/1063] IB/ipath: allow SMA to be disabled

This is useful for testing purposes.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_verbs.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index b9be0fd2ed361..fbda7739715f2 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -107,6 +107,10 @@ module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
 		   uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 
+static unsigned int ib_ipath_disable_sma;
+module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA");
+
 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
 	[IB_QPS_RESET] = 0,
 	[IB_QPS_INIT] = IPATH_POST_RECV_OK,
@@ -354,6 +358,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
 	switch (qp->ibqp.qp_type) {
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
+		if (ib_ipath_disable_sma)
+			break;
+		/* FALLTHROUGH */
 	case IB_QPT_UD:
 		ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
 		break;
-- 
GitLab


From e35d710d0c5b74bc9833d6a3791706bd577a3724 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:46 -0700
Subject: [PATCH 0761/1063] IB/ipath: fix return value from ipath_poll

This stops the generic poll code from waiting for a timeout.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_file_ops.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 6ba9a2d1e6ecb..29930e22318e5 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1150,6 +1150,7 @@ static unsigned int ipath_poll(struct file *fp,
 	struct ipath_portdata *pd;
 	u32 head, tail;
 	int bit;
+	unsigned pollflag = 0;
 	struct ipath_devdata *dd;
 
 	pd = port_fp(fp);
@@ -1186,9 +1187,12 @@ static unsigned int ipath_poll(struct file *fp,
 			clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
 			pd->port_rcvwait_to++;
 		}
+		else
+			pollflag = POLLIN | POLLRDNORM;
 	}
 	else {
 		/* it's already happened; don't do wait_event overhead */
+		pollflag = POLLIN | POLLRDNORM;
 		pd->port_rcvnowait++;
 	}
 
@@ -1196,7 +1200,7 @@ static unsigned int ipath_poll(struct file *fp,
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 
-	return 0;
+	return pollflag;
 }
 
 static int try_alloc_port(struct ipath_devdata *dd, int port,
-- 
GitLab


From 30fc5c3130bdbc7cc051a2d6054ad38360d408a8 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Fri, 25 Aug 2006 11:24:48 -0700
Subject: [PATCH 0762/1063] IB/ipath: control receive polarity inversion

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_driver.c    | 17 +++++++++++
 drivers/infiniband/hw/ipath/ipath_iba6110.c   |  9 ++++++
 drivers/infiniband/hw/ipath/ipath_iba6120.c   |  9 ++++++
 drivers/infiniband/hw/ipath/ipath_kernel.h    |  3 ++
 drivers/infiniband/hw/ipath/ipath_registers.h |  2 ++
 drivers/infiniband/hw/ipath/ipath_sysfs.c     | 29 +++++++++++++++++++
 6 files changed, 69 insertions(+)

diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 3a15efee73875..47c9d15557c8a 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -2116,5 +2116,22 @@ int ipath_reset_device(int unit)
 	return ret;
 }
 
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
+{
+	u64 val;
+	if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
+		return -1;
+	}
+	if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
+		dd->ipath_rx_pol_inv = new_pol_inv;
+		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+                         INFINIPATH_XGXS_RX_POL_SHIFT);
+                val |= ((u64)dd->ipath_rx_pol_inv) <<
+                        INFINIPATH_XGXS_RX_POL_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+	}
+	return 0;
+}
 module_init(infinipath_init);
 module_exit(infinipath_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 7028c98e5c4f3..bf2455a6d5623 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -1290,6 +1290,15 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
 		val &= ~INFINIPATH_XGXS_RESET;
 		change = 1;
 	}
+	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+		/* need to compensate for Tx inversion in partner */
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+		         INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= dd->ipath_rx_pol_inv <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
+		change = 1;
+	}
 	if (change)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 3a7640be25307..d86516d23df61 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -654,6 +654,15 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
 		val &= ~INFINIPATH_XGXS_RESET;
 		change = 1;
 	}
+	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+		/* need to compensate for Tx inversion in partner */
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+		         INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= dd->ipath_rx_pol_inv <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
+		change = 1;
+	}
 	if (change)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 2530686f68936..a8a56276ff1db 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -503,6 +503,8 @@ struct ipath_devdata {
 	u8 ipath_pci_cacheline;
 	/* LID mask control */
 	u8 ipath_lmc;
+	/* Rx Polarity inversion (compensate for ~tx on partner) */
+	u8 ipath_rx_pol_inv;
 
 	/* local link integrity counter */
 	u32 ipath_lli_counter;
@@ -567,6 +569,7 @@ void ipath_get_faststats(unsigned long);
 int ipath_set_linkstate(struct ipath_devdata *, u8);
 int ipath_set_mtu(struct ipath_devdata *, u16);
 int ipath_set_lid(struct ipath_devdata *, u32, u8);
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 
 /* for use in system calls, where we want to know device type, etc. */
 #define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index f08c86088ca4a..6e23b3d632b82 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -282,6 +282,8 @@
 #define INFINIPATH_XGXS_RESET          0x7ULL
 #define INFINIPATH_XGXS_MDIOADDR_MASK  0xfULL
 #define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
+#define INFINIPATH_XGXS_RX_POL_SHIFT 19
+#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
 
 #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL	/* 40 bits valid */
 
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index 8476dd3c7af44..e299148c4b68e 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -561,6 +561,33 @@ static ssize_t store_enabled(struct device *dev,
 	return ret;
 }
 
+static ssize_t store_rx_pol_inv(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf,
+			  size_t count)
+{
+	struct ipath_devdata *dd = dev_get_drvdata(dev);
+	int ret, r;
+	u16 val;
+
+	ret = ipath_parse_ushort(buf, &val);
+	if (ret < 0)
+		goto invalid;
+
+	r = ipath_set_rx_pol_inv(dd, val);
+	if (r < 0) {
+		ret = r;
+		goto bail;
+	}
+
+	goto bail;
+invalid:
+	ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
+bail:
+	return ret;
+}
+
+
 static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
 static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
 
@@ -587,6 +614,7 @@ static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
 static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
 static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
 static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
+static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
 
 static struct attribute *dev_attributes[] = {
 	&dev_attr_guid.attr,
@@ -601,6 +629,7 @@ static struct attribute *dev_attributes[] = {
 	&dev_attr_boardversion.attr,
 	&dev_attr_unit.attr,
 	&dev_attr_enabled.attr,
+	&dev_attr_rx_pol_inv.attr,
 	NULL
 };
 
-- 
GitLab


From b046a04e162dc7f468700a0817acda0321b2b3ae Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@mellanox.co.il>
Date: Mon, 28 Aug 2006 19:08:53 +0300
Subject: [PATCH 0763/1063] IB/mthca: Fix default static rate returned for
 Tavor in AV

When default static rate is returned for Tavor, need to translate it
to an ib rate value.

Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_av.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index e215041b2db9c..69599455aca2c 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -90,7 +90,7 @@ static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
 	case MTHCA_RATE_TAVOR_1X:     return IB_RATE_2_5_GBPS;
 	case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
 	case MTHCA_RATE_TAVOR_4X:     return IB_RATE_10_GBPS;
-	default:		      return port_rate;
+	default:		      return mult_to_ib_rate(port_rate);
 	}
 }
 
-- 
GitLab


From f6f76725b5ed8085c602b16bfd309c9957fb84c8 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@mellanox.co.il>
Date: Mon, 28 Aug 2006 19:10:34 +0300
Subject: [PATCH 0764/1063] IB/mthca: Return port number for unconnected QPs in
 query_qp

port_num was not being returned for unconnected QPs.

Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_qp.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 6d6ba4180a394..4ac25cf067941 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -472,10 +472,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
 	if (qp->transport == RC || qp->transport == UC) {
 		to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
 		to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+		qp_attr->alt_pkey_index =
+			be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+		qp_attr->alt_port_num 	= qp_attr->alt_ah_attr.port_num;
 	}
 
-	qp_attr->pkey_index     = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
-	qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+	qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
+	qp_attr->port_num   =
+		(be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3;
 
 	/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
 	qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
@@ -486,11 +490,9 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
 		1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
 	qp_attr->min_rnr_timer 	    =
 		(be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
-	qp_attr->port_num 	    = qp_attr->ah_attr.port_num;
 	qp_attr->timeout 	    = context->pri_path.ackto >> 3;
 	qp_attr->retry_cnt 	    = (be32_to_cpu(context->params1) >> 16) & 0x7;
 	qp_attr->rnr_retry 	    = context->pri_path.rnr_retry >> 5;
-	qp_attr->alt_port_num 	    = qp_attr->alt_ah_attr.port_num;
 	qp_attr->alt_timeout 	    = context->alt_path.ackto >> 3;
 	qp_init_attr->cap 	    = qp_attr->cap;
 
-- 
GitLab


From 9e583b85c2a0215dc7f4427361b4f75fcc0316af Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@mellanox.co.il>
Date: Mon, 28 Aug 2006 19:12:39 +0300
Subject: [PATCH 0765/1063] IB/mthca: Return correct number of bits for static
 rate in query_qp

Incorrect number of bits was taken for static_rate field.

Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_qp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 4ac25cf067941..9324b6204ac57 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -408,7 +408,7 @@ static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
 	ib_ah_attr->sl       	  = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
 	ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
 	ib_ah_attr->static_rate   = mthca_rate_to_ib(dev,
-						     path->static_rate & 0x7,
+						     path->static_rate & 0xf,
 						     ib_ah_attr->port_num);
 	ib_ah_attr->ah_flags      = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
 	if (ib_ah_attr->ah_flags) {
-- 
GitLab


From c1f250c0b45cdfdd89b21f2b866f317439aa21de Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Mon, 28 Aug 2006 11:55:52 -0700
Subject: [PATCH 0766/1063] IB/cm: Enable atomics along with RDMA reads

Enable atomic operations along with RDMA reads if a local RDMA
read/atomic depth is provided by the user.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0de335b7bfc2f..0df1454819acf 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3125,7 +3125,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
 		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
 					   IB_ACCESS_REMOTE_WRITE;
 		if (cm_id_priv->responder_resources)
-			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
+			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
+						    IB_ACCESS_REMOTE_ATOMIC;
 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
 		qp_attr->port_num = cm_id_priv->av.port->port_num;
 		ret = 0;
-- 
GitLab


From 76842405fca5f8b8e08d91558ecd3b922265034a Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Mon, 28 Aug 2006 11:57:42 -0700
Subject: [PATCH 0767/1063] IB/cm: Use correct reject code for invalid GID

Set the reject code properly when rejecting a request that contains an
invalid GID.  A suitable GID is returned by the IB CM in the
additional reject information (ARI).  This is a spec compliancy issue.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cm.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0df1454819acf..1aad33e035282 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1354,7 +1354,7 @@ static int cm_req_handler(struct cm_work *work)
 							    id.local_id);
 	if (IS_ERR(cm_id_priv->timewait_info)) {
 		ret = PTR_ERR(cm_id_priv->timewait_info);
-		goto error1;
+		goto destroy;
 	}
 	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
 	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
@@ -1363,7 +1363,8 @@ static int cm_req_handler(struct cm_work *work)
 	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
 	if (!listen_cm_id_priv) {
 		ret = -EINVAL;
-		goto error2;
+		kfree(cm_id_priv->timewait_info);
+		goto destroy;
 	}
 
 	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -1373,12 +1374,22 @@ static int cm_req_handler(struct cm_work *work)
 
 	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
-	if (ret)
-		goto error3;
+	if (ret) {
+		ib_get_cached_gid(work->port->cm_dev->device,
+				  work->port->port_num, 0, &work->path[0].sgid);
+		ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+			       &work->path[0].sgid, sizeof work->path[0].sgid,
+			       NULL, 0);
+		goto rejected;
+	}
 	if (req_msg->alt_local_lid) {
 		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
-		if (ret)
-			goto error3;
+		if (ret) {
+			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+				       &work->path[0].sgid,
+				       sizeof work->path[0].sgid, NULL, 0);
+			goto rejected;
+		}
 	}
 	cm_id_priv->tid = req_msg->hdr.tid;
 	cm_id_priv->timeout_ms = cm_convert_to_ms(
@@ -1400,12 +1411,11 @@ static int cm_req_handler(struct cm_work *work)
 	cm_deref_id(listen_cm_id_priv);
 	return 0;
 
-error3:	atomic_dec(&cm_id_priv->refcount);
+rejected:
+	atomic_dec(&cm_id_priv->refcount);
 	cm_deref_id(listen_cm_id_priv);
-	cm_cleanup_timewait(cm_id_priv->timewait_info);
-error2:	kfree(cm_id_priv->timewait_info);
-	cm_id_priv->timewait_info = NULL;
-error1:	ib_destroy_cm_id(&cm_id_priv->id);
+destroy:
+	ib_destroy_cm_id(cm_id);
 	return ret;
 }
 
-- 
GitLab


From 75ab13443e4575c00788ba9861105745b9dda05c Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Mon, 28 Aug 2006 15:10:32 -0700
Subject: [PATCH 0768/1063] IB/mad: Add support for dual-sided RMPP transfers.

The implementation assumes that any RMPP request that requires a
response uses DS RMPP.  Based on the RMPP start-up scenarios defined
by the spec, this should be a valid assumption.  That is, there is no
start-up scenario defined where an RMPP request is followed by a
non-RMPP response.  By having this assumption we avoid any API
changes.

In order for a node that supports DS RMPP to communicate with one that
does not, RMPP responses assume a new window size of 1 if a DS ACK has
not been received.  (By DS ACK, I'm referring to the turn-around ACK
after the final ACK of the request.)  This is a slight spec deviation,
but is necessary to allow communication with nodes that do not
generate the DS ACK.  It also handles the case when a response is sent
after the request state has been discarded.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/mad_rmpp.c | 90 +++++++++++++++++++++++++++++-
 1 file changed, 87 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index ebcd5b1817706..74fe1af9b18aa 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -60,6 +60,7 @@ struct mad_rmpp_recv {
 	int last_ack;
 	int seg_num;
 	int newwin;
+	int repwin;
 
 	__be64 tid;
 	u32 src_qp;
@@ -170,6 +171,32 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
 	return msg;
 }
 
+static void ack_ds_ack(struct ib_mad_agent_private *agent,
+		       struct ib_mad_recv_wc *recv_wc)
+{
+	struct ib_mad_send_buf *msg;
+	struct ib_rmpp_mad *rmpp_mad;
+	int ret;
+
+	msg = alloc_response_msg(&agent->agent, recv_wc);
+	if (IS_ERR(msg))
+		return;
+
+	rmpp_mad = msg->mad;
+	memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
+
+	rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+	rmpp_mad->rmpp_hdr.seg_num = 0;
+	rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1);
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		ib_destroy_ah(msg->ah);
+		ib_free_send_mad(msg);
+	}
+}
+
 void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
 {
 	struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
@@ -271,6 +298,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
 	rmpp_recv->newwin = 1;
 	rmpp_recv->seg_num = 1;
 	rmpp_recv->last_ack = 0;
+	rmpp_recv->repwin = 1;
 
 	mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
 	rmpp_recv->tid = mad_hdr->tid;
@@ -591,6 +619,16 @@ static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
 			break;
 }
 
+static void process_ds_ack(struct ib_mad_agent_private *agent,
+			   struct ib_mad_recv_wc *mad_recv_wc, int newwin)
+{
+	struct mad_rmpp_recv *rmpp_recv;
+
+	rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
+	if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE)
+		rmpp_recv->repwin = newwin;
+}
+
 static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 			     struct ib_mad_recv_wc *mad_recv_wc)
 {
@@ -616,8 +654,18 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 
 	spin_lock_irqsave(&agent->lock, flags);
 	mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
-	if (!mad_send_wr)
-		goto out;	/* Unmatched ACK */
+	if (!mad_send_wr) {
+		if (!seg_num)
+			process_ds_ack(agent, mad_recv_wc, newwin);
+		goto out;	/* Unmatched or DS RMPP ACK */
+	}
+
+	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) &&
+	    (mad_send_wr->timeout)) {
+		spin_unlock_irqrestore(&agent->lock, flags);
+		ack_ds_ack(agent, mad_recv_wc);
+		return;		/* Repeated ACK for DS RMPP transaction */
+	}
 
 	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
 	    (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
@@ -656,6 +704,9 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 		if (mad_send_wr->refcount == 1)
 			ib_reset_mad_timeout(mad_send_wr,
 					     mad_send_wr->send_buf.timeout_ms);
+		spin_unlock_irqrestore(&agent->lock, flags);
+		ack_ds_ack(agent, mad_recv_wc);
+		return;
 	} else if (mad_send_wr->refcount == 1 &&
 		   mad_send_wr->seg_num < mad_send_wr->newwin &&
 		   mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
@@ -772,6 +823,39 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
 	return NULL;
 }
 
+static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
+{
+	struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
+	struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
+	struct mad_rmpp_recv *rmpp_recv;
+	struct ib_ah_attr ah_attr;
+	unsigned long flags;
+	int newwin = 1;
+
+	if (!(mad_hdr->method & IB_MGMT_METHOD_RESP))
+		goto out;
+
+	spin_lock_irqsave(&agent->lock, flags);
+	list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+		if (rmpp_recv->tid != mad_hdr->tid ||
+		    rmpp_recv->mgmt_class != mad_hdr->mgmt_class ||
+		    rmpp_recv->class_version != mad_hdr->class_version ||
+		    (rmpp_recv->method & IB_MGMT_METHOD_RESP))
+			continue;
+
+		if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+			continue;
+
+		if (rmpp_recv->slid == ah_attr.dlid) {
+			newwin = rmpp_recv->repwin;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&agent->lock, flags);
+out:
+	return newwin;
+}
+
 int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_rmpp_mad *rmpp_mad;
@@ -787,7 +871,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 		return IB_RMPP_RESULT_INTERNAL;
 	}
 
-	mad_send_wr->newwin = 1;
+	mad_send_wr->newwin = init_newwin(mad_send_wr);
 
 	/* We need to wait for the final ACK even if there isn't a response */
 	mad_send_wr->refcount += (mad_send_wr->timeout == 0);
-- 
GitLab


From 2b3e258e5dd1938e2708eb5354ad8ba056fe8154 Mon Sep 17 00:00:00 2001
From: James Lentini <jlentini@netapp.com>
Date: Mon, 28 Aug 2006 15:12:04 -0700
Subject: [PATCH 0769/1063] IB/mad: Remove unused includes

The ib_mad module does not use a kthread function, but mad_priv.h
includes <linux/kthread.h>.  mad_rmpp.c does not do any DMA-related
stuff, but includes <linux/dma-mapping.h>.  Remove the unused includes.

Signed-off-by: James Lentini <jlentini@netapp.com>
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/mad_priv.h | 1 -
 drivers/infiniband/core/mad_rmpp.c | 2 --
 2 files changed, 3 deletions(-)

diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index d147f3bad2ce7..1da9adbccaecd 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -39,7 +39,6 @@
 
 #include <linux/completion.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/workqueue.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 74fe1af9b18aa..3ace5f492dc4b 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -33,8 +33,6 @@
  * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $
  */
 
-#include <linux/dma-mapping.h>
-
 #include "mad_priv.h"
 #include "mad_rmpp.h"
 
-- 
GitLab


From f06d26537559113207e4b73af6a22eaa5c5e9dc3 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Mon, 28 Aug 2006 15:15:18 -0700
Subject: [PATCH 0770/1063] IB/cm: Randomize starting comm ID

Randomize the starting local comm ID to avoid getting a rejected
connection due to a stale connection after a system reboot or
reloading of the ib_cm.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cm.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 1aad33e035282..c8982b02d9b63 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -41,6 +41,7 @@
 #include <linux/idr.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/random.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
@@ -73,6 +74,7 @@ static struct ib_cm {
 	struct rb_root remote_id_table;
 	struct rb_root remote_sidr_table;
 	struct idr local_id_table;
+	__be32 random_id_operand;
 	struct workqueue_struct *wq;
 } cm;
 
@@ -299,15 +301,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 {
 	unsigned long flags;
-	int ret;
+	int ret, id;
 	static int next_id;
 
 	do {
 		spin_lock_irqsave(&cm.lock, flags);
-		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++,
-					(__force int *) &cm_id_priv->id.local_id);
+		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
+					next_id++, &id);
 		spin_unlock_irqrestore(&cm.lock, flags);
 	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
+
+	cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand);
 	return ret;
 }
 
@@ -316,7 +320,8 @@ static void cm_free_id(__be32 local_id)
 	unsigned long flags;
 
 	spin_lock_irqsave(&cm.lock, flags);
-	idr_remove(&cm.local_id_table, (__force int) local_id);
+	idr_remove(&cm.local_id_table,
+		   (__force int) (local_id ^ cm.random_id_operand));
 	spin_unlock_irqrestore(&cm.lock, flags);
 }
 
@@ -324,7 +329,8 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
 {
 	struct cm_id_private *cm_id_priv;
 
-	cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id);
+	cm_id_priv = idr_find(&cm.local_id_table,
+			      (__force int) (local_id ^ cm.random_id_operand));
 	if (cm_id_priv) {
 		if (cm_id_priv->id.remote_id == remote_id)
 			atomic_inc(&cm_id_priv->refcount);
@@ -2082,8 +2088,9 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
 			spin_unlock_irqrestore(&cm.lock, flags);
 			return NULL;
 		}
-		cm_id_priv = idr_find(&cm.local_id_table,
-				      (__force int) timewait_info->work.local_id);
+		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
+				      (timewait_info->work.local_id ^
+				       cm.random_id_operand));
 		if (cm_id_priv) {
 			if (cm_id_priv->id.remote_id == remote_id)
 				atomic_inc(&cm_id_priv->refcount);
@@ -3360,6 +3367,7 @@ static int __init ib_cm_init(void)
 	cm.remote_qp_table = RB_ROOT;
 	cm.remote_sidr_table = RB_ROOT;
 	idr_init(&cm.local_id_table);
+	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
 	idr_pre_get(&cm.local_id_table, GFP_KERNEL);
 
 	cm.wq = create_workqueue("ib_cm");
-- 
GitLab


From 3cd965646b7cb75ae84dd0daf6258adf20e4f169 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Fri, 22 Sep 2006 15:22:46 -0700
Subject: [PATCH 0771/1063] IB: Whitespace fixes

Remove some trailing whitespace that has snuck in despite the best
efforts of whitespace=error-all.  Also fix a few other whitespace
bogosities.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/Kconfig                     |  2 +-
 drivers/infiniband/core/addr.c                 |  4 ++--
 drivers/infiniband/core/cm.c                   |  2 +-
 drivers/infiniband/core/cma.c                  |  8 ++++----
 drivers/infiniband/core/mad.c                  | 12 ++++++------
 drivers/infiniband/core/mad_rmpp.c             |  2 +-
 drivers/infiniband/core/sa_query.c             |  2 +-
 drivers/infiniband/core/sysfs.c                |  2 +-
 drivers/infiniband/core/ucm.c                  |  6 +++---
 drivers/infiniband/core/user_mad.c             |  2 +-
 drivers/infiniband/core/uverbs_cmd.c           |  2 --
 drivers/infiniband/hw/ipath/ipath_driver.c     |  6 +++---
 drivers/infiniband/hw/mthca/mthca_cq.c         | 10 +++++-----
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |  2 +-
 drivers/infiniband/ulp/srp/ib_srp.c            |  2 +-
 15 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index fd2d528daa3aa..9a329b2c108cc 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -14,7 +14,7 @@ config INFINIBAND_USER_MAD
 	---help---
 	  Userspace InfiniBand Management Datagram (MAD) support.  This
 	  is the kernel side of the userspace MAD support, which allows
-	  userspace processes to send and receive MADs. You will also 
+	  userspace processes to send and receive MADs. You will also
 	  need libibumad from <http://www.openib.org>.
 
 config INFINIBAND_USER_ACCESS
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 1205e8027829a..d8e54e002ce3c 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -327,10 +327,10 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
 }
 EXPORT_SYMBOL(rdma_addr_cancel);
 
-static int netevent_callback(struct notifier_block *self, unsigned long event, 
+static int netevent_callback(struct notifier_block *self, unsigned long event,
 	void *ctx)
 {
-	if (event == NETEVENT_NEIGH_UPDATE) {  
+	if (event == NETEVENT_NEIGH_UPDATE) {
 		struct neighbour *neigh = ctx;
 
 		if (neigh->dev->type == ARPHRD_INFINIBAND &&
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c8982b02d9b63..1c145fe92a54c 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -179,7 +179,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
 	if (IS_ERR(ah))
 		return PTR_ERR(ah);
 
-	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 
+	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
 			       cm_id_priv->av.pkey_index,
 			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5d625a81193f0..9d58bb59cd45b 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -613,7 +613,7 @@ static void cma_destroy_listen(struct rdma_id_private *id_priv)
 	if (id_priv->cma_dev) {
 		switch (id_priv->id.device->node_type) {
 		case IB_NODE_CA:
-	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
 		default:
@@ -692,13 +692,13 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 	if (id_priv->cma_dev) {
 		switch (id->device->node_type) {
 		case IB_NODE_CA:
-	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
 		default:
 			break;
 		}
-	  	mutex_lock(&lock);
+		mutex_lock(&lock);
 		cma_detach_from_dev(id_priv);
 		mutex_unlock(&lock);
 	}
@@ -1492,7 +1492,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
 	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
 		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
 			return -EADDRNOTAVAIL;
-		
+
 		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
 		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
 			return -EADDRINUSE;
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 1c3cfbbe6a97f..32d3028b274b8 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1246,8 +1246,8 @@ static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
 	int i;
 
 	for (i = 0; i < MAX_MGMT_OUI; i++)
-                /* Is there matching OUI for this vendor class ? */
-                if (!memcmp(vendor_class->oui[i], oui, 3))
+		/* Is there matching OUI for this vendor class ? */
+		if (!memcmp(vendor_class->oui[i], oui, 3))
 			return i;
 
 	return -1;
@@ -2237,7 +2237,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
 				 &mad_agent_priv->send_list, agent_list) {
 		if (mad_send_wr->status == IB_WC_SUCCESS) {
- 			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
 			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
 		}
 	}
@@ -2528,10 +2528,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
 			}
 		}
 		sg_list.addr = dma_map_single(qp_info->port_priv->
-					      	device->dma_device,
+					        device->dma_device,
 					      &mad_priv->grh,
 					      sizeof *mad_priv -
-					      	sizeof mad_priv->header,
+					        sizeof mad_priv->header,
 					      DMA_FROM_DEVICE);
 		pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
 		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
@@ -2606,7 +2606,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
 	struct ib_qp *qp;
 
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
- 	if (!attr) {
+	if (!attr) {
 		printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
 		return -ENOMEM;
 	}
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 3ace5f492dc4b..1ef79d015a1e3 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -391,7 +391,7 @@ static inline int window_size(struct ib_mad_agent_private *agent)
 static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
 						  int seg_num)
 {
-        struct ib_mad_recv_buf *seg_buf;
+	struct ib_mad_recv_buf *seg_buf;
 	int cur_seg_num;
 
 	list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index d6b84226bba7b..df762ba4868f8 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -887,7 +887,7 @@ static void send_handler(struct ib_mad_agent *agent,
 	idr_remove(&query_idr, query->id);
 	spin_unlock_irqrestore(&idr_lock, flags);
 
-        ib_free_send_mad(mad_send_wc->send_buf);
+	ib_free_send_mad(mad_send_wc->send_buf);
 	kref_put(&query->sm_ah->ref, free_sm_ah);
 	query->release(query);
 }
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 21f9282c1b25d..fb6660564a309 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -68,7 +68,7 @@ struct port_table_attribute {
 	int			index;
 };
 
-static inline int ibdev_is_alive(const struct ib_device *dev) 
+static inline int ibdev_is_alive(const struct ib_device *dev)
 {
 	return dev->reg_state == IB_DEV_REGISTERED;
 }
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index c1c6fda9452cc..e74c964af7fa4 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -309,9 +309,9 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
 		info	      = evt->param.apr_rcvd.apr_info;
 		break;
 	case IB_CM_SIDR_REQ_RECEIVED:
-		uvt->resp.u.sidr_req_resp.pkey = 
+		uvt->resp.u.sidr_req_resp.pkey =
 					evt->param.sidr_req_rcvd.pkey;
-		uvt->resp.u.sidr_req_resp.port = 
+		uvt->resp.u.sidr_req_resp.port =
 					evt->param.sidr_req_rcvd.port;
 		uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
 		break;
@@ -1237,7 +1237,7 @@ static struct class ucm_class = {
 static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
 	struct ib_ucm_device *dev;
-	
+
 	dev = container_of(class_dev, struct ib_ucm_device, class_dev);
 	return sprintf(buf, "%s\n", dev->ib_dev->name);
 }
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 1273f8807e849..8a455aec758f3 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3fcb5d189a234..b72c7f69ca906 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1676,7 +1676,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
 				break;
 		}
 
-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
@@ -1726,7 +1725,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
 				break;
 		}
 
-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 47c9d15557c8a..2108466c7e337 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -2126,9 +2126,9 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
 		dd->ipath_rx_pol_inv = new_pol_inv;
 		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
 		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-                         INFINIPATH_XGXS_RX_POL_SHIFT);
-                val |= ((u64)dd->ipath_rx_pol_inv) <<
-                        INFINIPATH_XGXS_RX_POL_SHIFT;
+			 INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= ((u64)dd->ipath_rx_pol_inv) <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 	}
 	return 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 3e27a084257e9..e393681ba7d46 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -544,11 +544,11 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
 		wq = &(*cur_qp)->rq;
 		wqe = be32_to_cpu(cqe->wqe);
 		wqe_index = wqe >> wq->wqe_shift;
-               /*
-		* WQE addr == base - 1 might be reported in receive completion
-		* with error instead of (rq size - 1) by Sinai FW 1.0.800 and
-		* Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
-		*/
+		/*
+		 * WQE addr == base - 1 might be reported in receive completion
+		 * with error instead of (rq size - 1) by Sinai FW 1.0.800 and
+		 * Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
+		 */
 		if (unlikely(wqe_index < 0))
 			wqe_index = wq->max - 1;
 		entry->wr_id = (*cur_qp)->wrid[wqe_index];
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index ec356ce7cdcde..60b09f5cb3476 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -795,7 +795,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 	}
 
 	if (priv->broadcast) {
- 		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
+		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
 		list_add_tail(&priv->broadcast->list, &remove_list);
 		priv->broadcast = NULL;
 	}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 249a98c06aeb0..61c13d1e05063 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -330,7 +330,7 @@ static int srp_send_req(struct srp_target_port *target)
 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
 					      SRP_BUF_FORMAT_INDIRECT);
 	/*
-	 * In the published SRP specification (draft rev. 16a), the 
+	 * In the published SRP specification (draft rev. 16a), the
 	 * port identifier format is 8 bytes of ID extension followed
 	 * by 8 bytes of GUID.  Older drafts put the two halves in the
 	 * opposite order, so that the GUID comes first.
-- 
GitLab


From 922a8e9fb2e0711212badce47a41137e2ca04cb3 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Thu, 3 Aug 2006 16:02:40 -0500
Subject: [PATCH 0772/1063] RDMA: iWARP Connection Manager.

Add an iWARP Connection Manager (CM), which abstracts connection
management for iWARP devices (RNICs).  It is a logical instance of the
xx_cm where xx is the transport type (ib or iw).  The symbols exported
are used by the transport independent rdma_cm module, and are
available also for transport dependent ULPs.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/iwcm.c | 1019 ++++++++++++++++++++++++++++++++
 drivers/infiniband/core/iwcm.h |   62 ++
 include/rdma/iw_cm.h           |  258 ++++++++
 3 files changed, 1339 insertions(+)
 create mode 100644 drivers/infiniband/core/iwcm.c
 create mode 100644 drivers/infiniband/core/iwcm.h
 create mode 100644 include/rdma/iw_cm.h

diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
new file mode 100644
index 0000000000000..c3fb304a4e866
--- /dev/null
+++ b/drivers/infiniband/core/iwcm.c
@@ -0,0 +1,1019 @@
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_addr.h>
+
+#include "iwcm.h"
+
+MODULE_AUTHOR("Tom Tucker");
+MODULE_DESCRIPTION("iWARP CM");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct workqueue_struct *iwcm_wq;
+struct iwcm_work {
+	struct work_struct work;
+	struct iwcm_id_private *cm_id;
+	struct list_head list;
+	struct iw_cm_event event;
+	struct list_head free_list;
+};
+
+/*
+ * The following services provide a mechanism for pre-allocating iwcm_work
+ * elements.  The design pre-allocates them  based on the cm_id type:
+ *	LISTENING IDS: 	Get enough elements preallocated to handle the
+ *			listen backlog.
+ *	ACTIVE IDS:	4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
+ *	PASSIVE IDS:	3: ESTABLISHED, DISCONNECT, CLOSE
+ *
+ * Allocating them in connect and listen avoids having to deal
+ * with allocation failures on the event upcall from the provider (which
+ * is called in the interrupt context).
+ *
+ * One exception is when creating the cm_id for incoming connection requests.
+ * There are two cases:
+ * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
+ *    the backlog is exceeded, then no more connection request events will
+ *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
+ *    to the provider to reject the connectino request.
+ * 2) in the connection request workqueue handler, cm_conn_req_handler().
+ *    If work elements cannot be allocated for the new connect request cm_id,
+ *    then IWCM will call the provider reject method.  This is ok since
+ *    cm_conn_req_handler() runs in the workqueue thread context.
+ */
+
+static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
+{
+	struct iwcm_work *work;
+
+	if (list_empty(&cm_id_priv->work_free_list))
+		return NULL;
+	work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
+			  free_list);
+	list_del_init(&work->free_list);
+	return work;
+}
+
+static void put_work(struct iwcm_work *work)
+{
+	list_add(&work->free_list, &work->cm_id->work_free_list);
+}
+
+static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
+{
+	struct list_head *e, *tmp;
+
+	list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
+		kfree(list_entry(e, struct iwcm_work, free_list));
+}
+
+static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
+{
+	struct iwcm_work *work;
+
+	BUG_ON(!list_empty(&cm_id_priv->work_free_list));
+	while (count--) {
+		work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
+		if (!work) {
+			dealloc_work_entries(cm_id_priv);
+			return -ENOMEM;
+		}
+		work->cm_id = cm_id_priv;
+		INIT_LIST_HEAD(&work->list);
+		put_work(work);
+	}
+	return 0;
+}
+
+/*
+ * Save private data from incoming connection requests in the
+ * cm_id_priv so the low level driver doesn't have to.  Adjust
+ * the event ptr to point to the local copy.
+ */
+static int copy_private_data(struct iwcm_id_private *cm_id_priv,
+		       struct iw_cm_event *event)
+{
+	void *p;
+
+	p = kmalloc(event->private_data_len, GFP_ATOMIC);
+	if (!p)
+		return -ENOMEM;
+	memcpy(p, event->private_data, event->private_data_len);
+	event->private_data = p;
+	return 0;
+}
+
+/*
+ * Release a reference on cm_id. If the last reference is being removed
+ * and iw_destroy_cm_id is waiting, wake up the waiting thread.
+ */
+static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+{
+	int ret = 0;
+
+	BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+	if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+		BUG_ON(!list_empty(&cm_id_priv->work_list));
+		if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
+			BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
+			BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
+					&cm_id_priv->flags));
+			ret = 1;
+		}
+		complete(&cm_id_priv->destroy_comp);
+	}
+
+	return ret;
+}
+
+static void add_ref(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	atomic_inc(&cm_id_priv->refcount);
+}
+
+static void rem_ref(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	iwcm_deref_id(cm_id_priv);
+}
+
+static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
+
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+				 iw_cm_handler cm_handler,
+				 void *context)
+{
+	struct iwcm_id_private *cm_id_priv;
+
+	cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
+	if (!cm_id_priv)
+		return ERR_PTR(-ENOMEM);
+
+	cm_id_priv->state = IW_CM_STATE_IDLE;
+	cm_id_priv->id.device = device;
+	cm_id_priv->id.cm_handler = cm_handler;
+	cm_id_priv->id.context = context;
+	cm_id_priv->id.event_handler = cm_event_handler;
+	cm_id_priv->id.add_ref = add_ref;
+	cm_id_priv->id.rem_ref = rem_ref;
+	spin_lock_init(&cm_id_priv->lock);
+	atomic_set(&cm_id_priv->refcount, 1);
+	init_waitqueue_head(&cm_id_priv->connect_wait);
+	init_completion(&cm_id_priv->destroy_comp);
+	INIT_LIST_HEAD(&cm_id_priv->work_list);
+	INIT_LIST_HEAD(&cm_id_priv->work_free_list);
+
+	return &cm_id_priv->id;
+}
+EXPORT_SYMBOL(iw_create_cm_id);
+
+
+static int iwcm_modify_qp_err(struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	if (!qp)
+		return -EINVAL;
+
+	qp_attr.qp_state = IB_QPS_ERR;
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * This is really the RDMAC CLOSING state. It is most similar to the
+ * IB SQD QP state.
+ */
+static int iwcm_modify_qp_sqd(struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	BUG_ON(qp == NULL);
+	qp_attr.qp_state = IB_QPS_SQD;
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * Block if a passive or active connection is currenlty being processed. Then
+ * process the event as follows:
+ * - If we are ESTABLISHED, move to CLOSING and modify the QP state
+ *   based on the abrupt flag
+ * - If the connection is already in the CLOSING or IDLE state, the peer is
+ *   disconnecting concurrently with us and we've already seen the
+ *   DISCONNECT event -- ignore the request and return 0
+ * - Disconnect on a listening endpoint returns -EINVAL
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+	struct ib_qp *qp = NULL;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	/* Wait if we're currently in a connect or accept downcall */
+	wait_event(cm_id_priv->connect_wait,
+		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_ESTABLISHED:
+		cm_id_priv->state = IW_CM_STATE_CLOSING;
+
+		/* QP could be <nul> for user-mode client */
+		if (cm_id_priv->qp)
+			qp = cm_id_priv->qp;
+		else
+			ret = -EINVAL;
+		break;
+	case IW_CM_STATE_LISTEN:
+		ret = -EINVAL;
+		break;
+	case IW_CM_STATE_CLOSING:
+		/* remote peer closed first */
+	case IW_CM_STATE_IDLE:
+		/* accept or connect returned !0 */
+		break;
+	case IW_CM_STATE_CONN_RECV:
+		/*
+		 * App called disconnect before/without calling accept after
+		 * connect_request event delivered.
+		 */
+		break;
+	case IW_CM_STATE_CONN_SENT:
+		/* Can only get here if wait above fails */
+	default:
+		BUG();
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (qp) {
+		if (abrupt)
+			ret = iwcm_modify_qp_err(qp);
+		else
+			ret = iwcm_modify_qp_sqd(qp);
+
+		/*
+		 * If both sides are disconnecting the QP could
+		 * already be in ERR or SQD states
+		 */
+		ret = 0;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_disconnect);
+
+/*
+ * CM_ID <-- DESTROYING
+ *
+ * Clean up all resources associated with the connection and release
+ * the initial reference taken by iw_create_cm_id.
+ */
+static void destroy_cm_id(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	/*
+	 * Wait if we're currently in a connect or accept downcall. A
+	 * listening endpoint should never block here.
+	 */
+	wait_event(cm_id_priv->connect_wait,
+		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_LISTEN:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		/* destroy the listening endpoint */
+		ret = cm_id->device->iwcm->destroy_listen(cm_id);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_ESTABLISHED:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		/* Abrupt close of the connection */
+		(void)iwcm_modify_qp_err(cm_id_priv->qp);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CLOSING:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		break;
+	case IW_CM_STATE_CONN_RECV:
+		/*
+		 * App called destroy before/without calling accept after
+		 * receiving connection request event notification.
+		 */
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		break;
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_DESTROYING:
+	default:
+		BUG();
+		break;
+	}
+	if (cm_id_priv->qp) {
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	(void)iwcm_deref_id(cm_id_priv);
+}
+
+/*
+ * This function is only called by the application thread and cannot
+ * be called by the event thread. The function will wait for all
+ * references to be released on the cm_id and then kfree the cm_id
+ * object.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
+
+	destroy_cm_id(cm_id);
+
+	wait_for_completion(&cm_id_priv->destroy_comp);
+
+	dealloc_work_entries(cm_id_priv);
+
+	kfree(cm_id_priv);
+}
+EXPORT_SYMBOL(iw_destroy_cm_id);
+
+/*
+ * CM_ID <-- LISTEN
+ *
+ * Start listening for connect requests. Generates one CONNECT_REQUEST
+ * event for each inbound connect request.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	ret = alloc_work_entries(cm_id_priv, backlog);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+		cm_id_priv->state = IW_CM_STATE_LISTEN;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+		if (ret)
+			cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_listen);
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * Rejects an inbound connection request. No events are generated.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id,
+		 const void *private_data,
+		 u8 private_data_len)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+	cm_id_priv->state = IW_CM_STATE_IDLE;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->reject(cm_id, private_data,
+					  private_data_len);
+
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_reject);
+
+/*
+ * CM_ID <-- ESTABLISHED
+ *
+ * Accepts an inbound connection request and generates an ESTABLISHED
+ * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
+ * until the ESTABLISHED event is received from the provider.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id,
+		 struct iw_cm_conn_param *iw_param)
+{
+	struct iwcm_id_private *cm_id_priv;
+	struct ib_qp *qp;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+	/* Get the ib_qp given the QPN */
+	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+	if (!qp) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		return -EINVAL;
+	}
+	cm_id->device->iwcm->add_ref(qp);
+	cm_id_priv->qp = qp;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->accept(cm_id, iw_param);
+	if (ret) {
+		/* An error on accept precludes provider events */
+		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		if (cm_id_priv->qp) {
+			cm_id->device->iwcm->rem_ref(qp);
+			cm_id_priv->qp = NULL;
+		}
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_accept);
+
+/*
+ * Active Side: CM_ID <-- CONN_SENT
+ *
+ * If successful, results in the generation of a CONNECT_REPLY
+ * event. iw_cm_disconnect and iw_cm_destroy will block until the
+ * CONNECT_REPLY event is received from the provider.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret = 0;
+	unsigned long flags;
+	struct ib_qp *qp;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	ret = alloc_work_entries(cm_id_priv, 4);
+	if (ret)
+		return ret;
+
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+
+	/* Get the ib_qp given the QPN */
+	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+	if (!qp) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		return -EINVAL;
+	}
+	cm_id->device->iwcm->add_ref(qp);
+	cm_id_priv->qp = qp;
+	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->connect(cm_id, iw_param);
+	if (ret) {
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		if (cm_id_priv->qp) {
+			cm_id->device->iwcm->rem_ref(qp);
+			cm_id_priv->qp = NULL;
+		}
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_connect);
+
+/*
+ * Passive Side: new CM_ID <-- CONN_RECV
+ *
+ * Handles an inbound connect request. The function creates a new
+ * iw_cm_id to represent the new connection and inherits the client
+ * callback function and other attributes from the listening parent.
+ *
+ * The work item contains a pointer to the listen_cm_id and the event. The
+ * listen_cm_id contains the client cm_handler, context and
+ * device. These are copied when the device is cloned. The event
+ * contains the new four tuple.
+ *
+ * An error on the child should not affect the parent, so this
+ * function does not return a value.
+ */
+static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
+				struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	struct iw_cm_id *cm_id;
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	/*
+	 * The provider should never generate a connection request
+	 * event with a bad status.
+	 */
+	BUG_ON(iw_event->status);
+
+	/*
+	 * We could be destroying the listening id. If so, ignore this
+	 * upcall.
+	 */
+	spin_lock_irqsave(&listen_id_priv->lock, flags);
+	if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
+		spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+
+	cm_id = iw_create_cm_id(listen_id_priv->id.device,
+				listen_id_priv->id.cm_handler,
+				listen_id_priv->id.context);
+	/* If the cm_id could not be created, ignore the request */
+	if (IS_ERR(cm_id))
+		return;
+
+	cm_id->provider_data = iw_event->provider_data;
+	cm_id->local_addr = iw_event->local_addr;
+	cm_id->remote_addr = iw_event->remote_addr;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
+
+	ret = alloc_work_entries(cm_id_priv, 3);
+	if (ret) {
+		iw_cm_reject(cm_id, NULL, 0);
+		iw_destroy_cm_id(cm_id);
+		return;
+	}
+
+	/* Call the client CM handler */
+	ret = cm_id->cm_handler(cm_id, iw_event);
+	if (ret) {
+		set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+		destroy_cm_id(cm_id);
+		if (atomic_read(&cm_id_priv->refcount)==0)
+			kfree(cm_id);
+	}
+
+	if (iw_event->private_data_len)
+		kfree(iw_event->private_data);
+}
+
+/*
+ * Passive Side: CM_ID <-- ESTABLISHED
+ *
+ * The provider generated an ESTABLISHED event which means that
+ * the MPA negotion has completed successfully and we are now in MPA
+ * FPDU mode.
+ *
+ * This event can only be received in the CONN_RECV state. If the
+ * remote peer closed, the ESTABLISHED event would be received followed
+ * by the CLOSE event. If the app closes, it will block until we wake
+ * it up after processing this event.
+ */
+static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
+			       struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	/*
+	 * We clear the CONNECT_WAIT bit here to allow the callback
+	 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
+	 * from a callback handler is not allowed.
+	 */
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+	cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+
+/*
+ * Active Side: CM_ID <-- ESTABLISHED
+ *
+ * The app has called connect and is waiting for the established event to
+ * post it's requests to the server. This event will wake up anyone
+ * blocked in iw_cm_disconnect or iw_destroy_id.
+ */
+static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
+			       struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	/*
+	 * Clear the connect wait bit so a callback function calling
+	 * iw_cm_disconnect will not wait and deadlock this thread
+	 */
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+	if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+		cm_id_priv->id.local_addr = iw_event->local_addr;
+		cm_id_priv->id.remote_addr = iw_event->remote_addr;
+		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+	} else {
+		/* REJECTED or RESET */
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+
+	if (iw_event->private_data_len)
+		kfree(iw_event->private_data);
+
+	/* Wake up waiters on connect complete */
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * If in the ESTABLISHED state, move to CLOSING.
+ */
+static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
+				  struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
+		cm_id_priv->state = IW_CM_STATE_CLOSING;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * If in the ESTBLISHED or CLOSING states, the QP will have have been
+ * moved by the provider to the ERR state. Disassociate the CM_ID from
+ * the QP,  move to IDLE, and remove the 'connected' reference.
+ *
+ * If in some other state, the cm_id was destroyed asynchronously.
+ * This is the last reference that will result in waking up
+ * the app thread blocked in iw_destroy_cm_id.
+ */
+static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
+				  struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret = 0;
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	if (cm_id_priv->qp) {
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+	}
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_ESTABLISHED:
+	case IW_CM_STATE_CLOSING:
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_DESTROYING:
+		break;
+	default:
+		BUG();
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	return ret;
+}
+
+static int process_event(struct iwcm_id_private *cm_id_priv,
+			 struct iw_cm_event *iw_event)
+{
+	int ret = 0;
+
+	switch (iw_event->event) {
+	case IW_CM_EVENT_CONNECT_REQUEST:
+		cm_conn_req_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		ret = cm_conn_rep_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		ret = cm_conn_est_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_DISCONNECT:
+		cm_disconnect_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_CLOSE:
+		ret = cm_close_handler(cm_id_priv, iw_event);
+		break;
+	default:
+		BUG();
+	}
+
+	return ret;
+}
+
+/*
+ * Process events on the work_list for the cm_id. If the callback
+ * function requests that the cm_id be deleted, a flag is set in the
+ * cm_id flags to indicate that when the last reference is
+ * removed, the cm_id is to be destroyed. This is necessary to
+ * distinguish between an object that will be destroyed by the app
+ * thread asleep on the destroy_comp list vs. an object destroyed
+ * here synchronously when the last reference is removed.
+ */
+static void cm_work_handler(void *arg)
+{
+	struct iwcm_work *work = arg, lwork;
+	struct iwcm_id_private *cm_id_priv = work->cm_id;
+	unsigned long flags;
+	int empty;
+	int ret = 0;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	empty = list_empty(&cm_id_priv->work_list);
+	while (!empty) {
+		work = list_entry(cm_id_priv->work_list.next,
+				  struct iwcm_work, list);
+		list_del_init(&work->list);
+		empty = list_empty(&cm_id_priv->work_list);
+		lwork = *work;
+		put_work(work);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+		ret = process_event(cm_id_priv, &work->event);
+		if (ret) {
+			set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+			destroy_cm_id(&cm_id_priv->id);
+		}
+		BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+		if (iwcm_deref_id(cm_id_priv))
+			return;
+
+		if (atomic_read(&cm_id_priv->refcount)==0 &&
+		    test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+			dealloc_work_entries(cm_id_priv);
+			kfree(cm_id_priv);
+			return;
+		}
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * This function is called on interrupt context. Schedule events on
+ * the iwcm_wq thread to allow callback functions to downcall into
+ * the CM and/or block.  Events are queued to a per-CM_ID
+ * work_list. If this is the first event on the work_list, the work
+ * element is also queued on the iwcm_wq thread.
+ *
+ * Each event holds a reference on the cm_id. Until the last posted
+ * event has been delivered and processed, the cm_id cannot be
+ * deleted.
+ *
+ * Returns:
+ * 	      0	- the event was handled.
+ *	-ENOMEM	- the event was not handled due to lack of resources.
+ */
+static int cm_event_handler(struct iw_cm_id *cm_id,
+			     struct iw_cm_event *iw_event)
+{
+	struct iwcm_work *work;
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	work = get_work(cm_id_priv);
+	if (!work) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	INIT_WORK(&work->work, cm_work_handler, work);
+	work->cm_id = cm_id_priv;
+	work->event = *iw_event;
+
+	if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
+	     work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
+	    work->event.private_data_len) {
+		ret = copy_private_data(cm_id_priv, &work->event);
+		if (ret) {
+			put_work(work);
+			goto out;
+		}
+	}
+
+	atomic_inc(&cm_id_priv->refcount);
+	if (list_empty(&cm_id_priv->work_list)) {
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+		queue_work(iwcm_wq, &work->work);
+	} else
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+out:
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
+				  struct ib_qp_attr *qp_attr,
+				  int *qp_attr_mask)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_CONN_RECV:
+	case IW_CM_STATE_ESTABLISHED:
+		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+					   IB_ACCESS_REMOTE_WRITE|
+					   IB_ACCESS_REMOTE_READ;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
+				  struct ib_qp_attr *qp_attr,
+				  int *qp_attr_mask)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_CONN_RECV:
+	case IW_CM_STATE_ESTABLISHED:
+		*qp_attr_mask = 0;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
+		       struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	switch (qp_attr->qp_state) {
+	case IB_QPS_INIT:
+	case IB_QPS_RTR:
+		ret = iwcm_init_qp_init_attr(cm_id_priv,
+					     qp_attr, qp_attr_mask);
+		break;
+	case IB_QPS_RTS:
+		ret = iwcm_init_qp_rts_attr(cm_id_priv,
+					    qp_attr, qp_attr_mask);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_init_qp_attr);
+
+static int __init iw_cm_init(void)
+{
+	iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
+	if (!iwcm_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __exit iw_cm_cleanup(void)
+{
+	destroy_workqueue(iwcm_wq);
+}
+
+module_init(iw_cm_init);
+module_exit(iw_cm_cleanup);
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
new file mode 100644
index 0000000000000..3f6cc82564c8b
--- /dev/null
+++ b/drivers/infiniband/core/iwcm.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IWCM_H
+#define IWCM_H
+
+enum iw_cm_state {
+	IW_CM_STATE_IDLE,             /* unbound, inactive */
+	IW_CM_STATE_LISTEN,           /* listen waiting for connect */
+	IW_CM_STATE_CONN_RECV,        /* inbound waiting for user accept */
+	IW_CM_STATE_CONN_SENT,        /* outbound waiting for peer accept */
+	IW_CM_STATE_ESTABLISHED,      /* established */
+	IW_CM_STATE_CLOSING,	      /* disconnect */
+	IW_CM_STATE_DESTROYING        /* object being deleted */
+};
+
+struct iwcm_id_private {
+	struct iw_cm_id	id;
+	enum iw_cm_state state;
+	unsigned long flags;
+	struct ib_qp *qp;
+	struct completion destroy_comp;
+	wait_queue_head_t connect_wait;
+	struct list_head work_list;
+	spinlock_t lock;
+	atomic_t refcount;
+	struct list_head work_free_list;
+};
+
+#define IWCM_F_CALLBACK_DESTROY   1
+#define IWCM_F_CONNECT_WAIT       2
+
+#endif /* IWCM_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
new file mode 100644
index 0000000000000..aeefa9b740dc8
--- /dev/null
+++ b/include/rdma/iw_cm.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IW_CM_H
+#define IW_CM_H
+
+#include <linux/in.h>
+#include <rdma/ib_cm.h>
+
+struct iw_cm_id;
+
+enum iw_cm_event_type {
+	IW_CM_EVENT_CONNECT_REQUEST = 1, /* connect request received */
+	IW_CM_EVENT_CONNECT_REPLY,	 /* reply from active connect request */
+	IW_CM_EVENT_ESTABLISHED,	 /* passive side accept successful */
+	IW_CM_EVENT_DISCONNECT,		 /* orderly shutdown */
+	IW_CM_EVENT_CLOSE		 /* close complete */
+};
+
+enum iw_cm_event_status {
+	IW_CM_EVENT_STATUS_OK = 0,	 /* request successful */
+	IW_CM_EVENT_STATUS_ACCEPTED = 0, /* connect request accepted */
+	IW_CM_EVENT_STATUS_REJECTED,	 /* connect request rejected */
+	IW_CM_EVENT_STATUS_TIMEOUT,	 /* the operation timed out */
+	IW_CM_EVENT_STATUS_RESET,	 /* reset from remote peer */
+	IW_CM_EVENT_STATUS_EINVAL,	 /* asynchronous failure for bad parm */
+};
+
+struct iw_cm_event {
+	enum iw_cm_event_type event;
+	enum iw_cm_event_status status;
+	struct sockaddr_in local_addr;
+	struct sockaddr_in remote_addr;
+	void *private_data;
+	u8 private_data_len;
+	void* provider_data;
+};
+
+/**
+ * iw_cm_handler - Function to be called by the IW CM when delivering events
+ * to the client.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_cm_handler)(struct iw_cm_id *cm_id,
+			     struct iw_cm_event *event);
+
+/**
+ * iw_event_handler - Function called by the provider when delivering provider
+ * events to the IW CM.  Returns either 0 indicating the event was processed
+ * or -errno if the event could not be processed.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_event_handler)(struct iw_cm_id *cm_id,
+				 struct iw_cm_event *event);
+
+struct iw_cm_id {
+	iw_cm_handler		cm_handler;      /* client callback function */
+	void		        *context;	 /* client cb context */
+	struct ib_device	*device;
+	struct sockaddr_in      local_addr;
+	struct sockaddr_in	remote_addr;
+	void			*provider_data;	 /* provider private data */
+	iw_event_handler        event_handler;   /* cb for provider
+						    events */
+	/* Used by provider to add and remove refs on IW cm_id */
+	void (*add_ref)(struct iw_cm_id *);
+	void (*rem_ref)(struct iw_cm_id *);
+};
+
+struct iw_cm_conn_param {
+	const void *private_data;
+	u16 private_data_len;
+	u32 ord;
+	u32 ird;
+	u32 qpn;
+};
+
+struct iw_cm_verbs {
+	void		(*add_ref)(struct ib_qp *qp);
+
+	void		(*rem_ref)(struct ib_qp *qp);
+
+	struct ib_qp *	(*get_qp)(struct ib_device *device,
+				  int qpn);
+
+	int		(*connect)(struct iw_cm_id *cm_id,
+				   struct iw_cm_conn_param *conn_param);
+
+	int		(*accept)(struct iw_cm_id *cm_id,
+				  struct iw_cm_conn_param *conn_param);
+
+	int		(*reject)(struct iw_cm_id *cm_id,
+				  const void *pdata, u8 pdata_len);
+
+	int		(*create_listen)(struct iw_cm_id *cm_id,
+					 int backlog);
+
+	int		(*destroy_listen)(struct iw_cm_id *cm_id);
+};
+
+/**
+ * iw_create_cm_id - Create an IW CM identifier.
+ *
+ * @device: The IB device on which to create the IW CM identier.
+ * @event_handler: User callback invoked to report events associated with the
+ *   returned IW CM identifier.
+ * @context: User specified context associated with the id.
+ */
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+				 iw_cm_handler cm_handler, void *context);
+
+/**
+ * iw_destroy_cm_id - Destroy an IW CM identifier.
+ *
+ * @cm_id: The previously created IW CM identifier to destroy.
+ *
+ * The client can assume that no events will be delivered for the CM ID after
+ * this function returns.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id);
+
+/**
+ * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP
+ *
+ * @cm_id: The IW CM idenfier to unbind from the QP.
+ * @qp: The QP
+ *
+ * This is called by the provider when destroying the QP to ensure
+ * that any references held by the IWCM are released. It may also
+ * be called by the IWCM when destroying a CM_ID to that any
+ * references held by the provider are released.
+ */
+void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp);
+
+/**
+ * iw_cm_get_qp - Return the ib_qp associated with a QPN
+ *
+ * @ib_device: The IB device
+ * @qpn: The queue pair number
+ */
+struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn);
+
+/**
+ * iw_cm_listen - Listen for incoming connection requests on the
+ * specified IW CM id.
+ *
+ * @cm_id: The IW CM identifier.
+ * @backlog: The maximum number of outstanding un-accepted inbound listen
+ *   requests to queue.
+ *
+ * The source address and port number are specified in the IW CM identifier
+ * structure.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog);
+
+/**
+ * iw_cm_accept - Called to accept an incoming connect request.
+ *
+ * @cm_id: The IW CM identifier associated with the connection request.
+ * @iw_param: Pointer to a structure containing connection establishment
+ *   parameters.
+ *
+ * The specified cm_id will have been provided in the event data for a
+ * CONNECT_REQUEST event. Subsequent events related to this connection will be
+ * delivered to the specified IW CM identifier prior and may occur prior to
+ * the return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_reject - Reject an incoming connection request.
+ *
+ * @cm_id: Connection identifier associated with the request.
+ * @private_daa: Pointer to data to deliver to the remote peer as part of the
+ *   reject message.
+ * @private_data_len: The number of bytes in the private_data parameter.
+ *
+ * The client can assume that no events will be delivered to the specified IW
+ * CM identifier following the return of this function. The private_data
+ * buffer is available for reuse when this function returns.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data,
+		 u8 private_data_len);
+
+/**
+ * iw_cm_connect - Called to request a connection to a remote peer.
+ *
+ * @cm_id: The IW CM identifier for the connection.
+ * @iw_param: Pointer to a structure containing connection  establishment
+ *   parameters.
+ *
+ * Events may be delivered to the specified IW CM identifier prior to the
+ * return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_disconnect - Close the specified connection.
+ *
+ * @cm_id: The IW CM identifier to close.
+ * @abrupt: If 0, the connection will be closed gracefully, otherwise, the
+ *   connection will be reset.
+ *
+ * The IW CM identifier is still active until the IW_CM_EVENT_CLOSE event is
+ * delivered.
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt);
+
+/**
+ * iw_cm_init_qp_attr - Called to initialize the attributes of the QP
+ * associated with a IW CM identifier.
+ *
+ * @cm_id: The IW CM identifier associated with the QP
+ * @qp_attr: Pointer to the QP attributes structure.
+ * @qp_attr_mask: Pointer to a bit vector specifying which QP attributes are
+ *   valid.
+ */
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask);
+
+#endif /* IW_CM_H */
-- 
GitLab


From 07ebafbaaa72aa6a35472879008f5a1d1d469a0c Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Thu, 3 Aug 2006 16:02:42 -0500
Subject: [PATCH 0773/1063] RDMA: iWARP Core Changes.

Modifications to the existing rdma header files, core files, drivers,
and ulp files to support iWARP, including:
 - Hook iWARP CM into the build system and use it in rdma_cm.
 - Convert enum ib_node_type to enum rdma_node_type, which includes
   the possibility of RDMA_NODE_RNIC, and update everything for this.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/Makefile             |   4 +-
 drivers/infiniband/core/addr.c               |  18 +-
 drivers/infiniband/core/cache.c              |   5 +-
 drivers/infiniband/core/cm.c                 |   3 +
 drivers/infiniband/core/cma.c                | 356 ++++++++++++++++---
 drivers/infiniband/core/device.c             |   4 +-
 drivers/infiniband/core/mad.c                |   7 +-
 drivers/infiniband/core/sa_query.c           |   5 +-
 drivers/infiniband/core/smi.c                |  16 +-
 drivers/infiniband/core/sysfs.c              |  11 +-
 drivers/infiniband/core/ucm.c                |   3 +-
 drivers/infiniband/core/user_mad.c           |   5 +-
 drivers/infiniband/core/verbs.c              |  17 +
 drivers/infiniband/hw/ehca/ehca_main.c       |   2 +-
 drivers/infiniband/hw/ipath/ipath_verbs.c    |   2 +-
 drivers/infiniband/hw/mthca/mthca_provider.c |   2 +-
 drivers/infiniband/ulp/ipoib/ipoib_main.c    |   8 +-
 drivers/infiniband/ulp/srp/ib_srp.c          |   2 +-
 include/rdma/ib_addr.h                       |  17 +-
 include/rdma/ib_verbs.h                      |  25 +-
 20 files changed, 431 insertions(+), 81 deletions(-)

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 68e73ec2d1f87..163d991eb8c96 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,7 +1,7 @@
 infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= ib_addr.o rdma_cm.o
 
 obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \
-					ib_cm.o $(infiniband-y)
+					ib_cm.o iw_cm.o $(infiniband-y)
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o
 
@@ -14,6 +14,8 @@ ib_sa-y :=			sa_query.o
 
 ib_cm-y :=			cm.o
 
+iw_cm-y :=			iwcm.o
+
 rdma_cm-y :=			cma.o
 
 ib_addr-y :=			addr.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index d8e54e002ce3c..9cbf09e2052f8 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -61,12 +61,15 @@ static LIST_HEAD(req_list);
 static DECLARE_WORK(work, process_req, NULL);
 static struct workqueue_struct *addr_wq;
 
-static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
-		     unsigned char *dst_dev_addr)
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+		     const unsigned char *dst_dev_addr)
 {
 	switch (dev->type) {
 	case ARPHRD_INFINIBAND:
-		dev_addr->dev_type = IB_NODE_CA;
+		dev_addr->dev_type = RDMA_NODE_IB_CA;
+		break;
+	case ARPHRD_ETHER:
+		dev_addr->dev_type = RDMA_NODE_RNIC;
 		break;
 	default:
 		return -EADDRNOTAVAIL;
@@ -78,6 +81,7 @@ static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
 	return 0;
 }
+EXPORT_SYMBOL(rdma_copy_addr);
 
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 {
@@ -89,7 +93,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 	if (!dev)
 		return -EADDRNOTAVAIL;
 
-	ret = copy_addr(dev_addr, dev, NULL);
+	ret = rdma_copy_addr(dev_addr, dev, NULL);
 	dev_put(dev);
 	return ret;
 }
@@ -161,7 +165,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
 
 	/* If the device does ARP internally, return 'done' */
 	if (rt->idev->dev->flags & IFF_NOARP) {
-		copy_addr(addr, rt->idev->dev, NULL);
+		rdma_copy_addr(addr, rt->idev->dev, NULL);
 		goto put;
 	}
 
@@ -181,7 +185,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
 		src_in->sin_addr.s_addr = rt->rt_src;
 	}
 
-	ret = copy_addr(addr, neigh->dev, neigh->ha);
+	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
 release:
 	neigh_release(neigh);
 put:
@@ -245,7 +249,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
 	if (ZERONET(src_ip)) {
 		src_in->sin_family = dst_in->sin_family;
 		src_in->sin_addr.s_addr = dst_ip;
-		ret = copy_addr(addr, dev, dev->dev_addr);
+		ret = rdma_copy_addr(addr, dev, dev->dev_addr);
 	} else if (LOOPBACK(src_ip)) {
 		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
 		if (!ret)
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 75313ade2e0de..20e9f64e67a6c 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -62,12 +62,13 @@ struct ib_update_work {
 
 static inline int start_port(struct ib_device *device)
 {
-	return device->node_type == IB_NODE_SWITCH ? 0 : 1;
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
 }
 
 static inline int end_port(struct ib_device *device)
 {
-	return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt;
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+		0 : device->phys_port_cnt;
 }
 
 int ib_get_cached_gid(struct ib_device *device,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 1c145fe92a54c..e130d2e895152 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3280,6 +3280,9 @@ static void cm_add_one(struct ib_device *device)
 	int ret;
 	u8 i;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
 			 device->phys_port_cnt, GFP_KERNEL);
 	if (!cm_dev)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9d58bb59cd45b..e88a7c652ca01 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -35,6 +35,7 @@
 #include <linux/mutex.h>
 #include <linux/random.h>
 #include <linux/idr.h>
+#include <linux/inetdevice.h>
 
 #include <net/tcp.h>
 
@@ -43,6 +44,7 @@
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
+#include <rdma/iw_cm.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -124,6 +126,7 @@ struct rdma_id_private {
 	int			query_id;
 	union {
 		struct ib_cm_id	*ib;
+		struct iw_cm_id	*iw;
 	} cm_id;
 
 	u32			seq_num;
@@ -259,14 +262,23 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv)
 	id_priv->cma_dev = NULL;
 }
 
-static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
+static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
+	enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type;
 	struct cma_device *cma_dev;
 	union ib_gid gid;
 	int ret = -ENODEV;
 
-	ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid),
-
+	switch (rdma_node_get_transport(dev_type)) {
+	case RDMA_TRANSPORT_IB:
+		ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+		iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+		break;
+	default:
+		return -ENODEV;
+	}
 	mutex_lock(&lock);
 	list_for_each_entry(cma_dev, &dev_list, list) {
 		ret = ib_find_cached_gid(cma_dev->device, &gid,
@@ -280,16 +292,6 @@ static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
 	return ret;
 }
 
-static int cma_acquire_dev(struct rdma_id_private *id_priv)
-{
-	switch (id_priv->id.route.addr.dev_addr.dev_type) {
-	case IB_NODE_CA:
-		return cma_acquire_ib_dev(id_priv);
-	default:
-		return -ENODEV;
-	}
-}
-
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
 	if (atomic_dec_and_test(&id_priv->refcount))
@@ -347,6 +349,16 @@ static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 					  IB_QP_PKEY_INDEX | IB_QP_PORT);
 }
 
+static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
+}
+
 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 		   struct ib_qp_init_attr *qp_init_attr)
 {
@@ -362,10 +374,13 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 	if (IS_ERR(qp))
 		return PTR_ERR(qp);
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_init_ib_qp(id_priv, qp);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_init_iw_qp(id_priv, qp);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -451,13 +466,17 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	switch (id_priv->id.device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
 					 qp_attr_mask);
 		if (qp_attr->qp_state == IB_QPS_RTR)
 			qp_attr->rq_psn = id_priv->seq_num;
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+					qp_attr_mask);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -590,8 +609,8 @@ static int cma_notify_user(struct rdma_id_private *id_priv,
 
 static void cma_cancel_route(struct rdma_id_private *id_priv)
 {
-	switch (id_priv->id.device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		if (id_priv->query)
 			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
 		break;
@@ -611,11 +630,15 @@ static void cma_destroy_listen(struct rdma_id_private *id_priv)
 	cma_exch(id_priv, CMA_DESTROYING);
 
 	if (id_priv->cma_dev) {
-		switch (id_priv->id.device->node_type) {
-		case IB_NODE_CA:
+		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+		case RDMA_TRANSPORT_IB:
 			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+			break;
 		default:
 			break;
 		}
@@ -690,11 +713,15 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 	cma_cancel_operation(id_priv, state);
 
 	if (id_priv->cma_dev) {
-		switch (id->device->node_type) {
-		case IB_NODE_CA:
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
 			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+			break;
 		default:
 			break;
 		}
@@ -869,7 +896,7 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
 	ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
 	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 	ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
-	rt->addr.dev_addr.dev_type = IB_NODE_CA;
+	rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = CMA_CONNECT;
@@ -898,7 +925,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	}
 
 	atomic_inc(&conn_id->dev_remove);
-	ret = cma_acquire_ib_dev(conn_id);
+	ret = cma_acquire_dev(conn_id);
 	if (ret) {
 		ret = -ENODEV;
 		cma_release_remove(conn_id);
@@ -982,6 +1009,128 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
 	}
 }
 
+static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
+{
+	struct rdma_id_private *id_priv = iw_id->context;
+	enum rdma_cm_event_type event = 0;
+	struct sockaddr_in *sin;
+	int ret = 0;
+
+	atomic_inc(&id_priv->dev_remove);
+
+	switch (iw_event->event) {
+	case IW_CM_EVENT_CLOSE:
+		event = RDMA_CM_EVENT_DISCONNECTED;
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+		*sin = iw_event->local_addr;
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
+		*sin = iw_event->remote_addr;
+		if (iw_event->status)
+			event = RDMA_CM_EVENT_REJECTED;
+		else
+			event = RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		event = RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	default:
+		BUG_ON(1);
+	}
+
+	ret = cma_notify_user(id_priv, event, iw_event->status,
+			      iw_event->private_data,
+			      iw_event->private_data_len);
+	if (ret) {
+		/* Destroy the CM ID by returning a non-zero value. */
+		id_priv->cm_id.iw = NULL;
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_release_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return ret;
+	}
+
+	cma_release_remove(id_priv);
+	return ret;
+}
+
+static int iw_conn_req_handler(struct iw_cm_id *cm_id,
+			       struct iw_cm_event *iw_event)
+{
+	struct rdma_cm_id *new_cm_id;
+	struct rdma_id_private *listen_id, *conn_id;
+	struct sockaddr_in *sin;
+	struct net_device *dev = NULL;
+	int ret;
+
+	listen_id = cm_id->context;
+	atomic_inc(&listen_id->dev_remove);
+	if (!cma_comp(listen_id, CMA_LISTEN)) {
+		ret = -ECONNABORTED;
+		goto out;
+	}
+
+	/* Create a new RDMA id for the new IW CM ID */
+	new_cm_id = rdma_create_id(listen_id->id.event_handler,
+				   listen_id->id.context,
+				   RDMA_PS_TCP);
+	if (!new_cm_id) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
+	atomic_inc(&conn_id->dev_remove);
+	conn_id->state = CMA_CONNECT;
+
+	dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
+	if (!dev) {
+		ret = -EADDRNOTAVAIL;
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+	if (ret) {
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	ret = cma_acquire_dev(conn_id);
+	if (ret) {
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	conn_id->cm_id.iw = cm_id;
+	cm_id->context = conn_id;
+	cm_id->cm_handler = cma_iw_handler;
+
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
+	*sin = iw_event->local_addr;
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
+	*sin = iw_event->remote_addr;
+
+	ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
+			      iw_event->private_data,
+			      iw_event->private_data_len);
+	if (ret) {
+		/* User wants to destroy the CM ID */
+		conn_id->cm_id.iw = NULL;
+		cma_exch(conn_id, CMA_DESTROYING);
+		cma_release_remove(conn_id);
+		rdma_destroy_id(&conn_id->id);
+	}
+
+out:
+	if (dev)
+		dev_put(dev);
+	cma_release_remove(listen_id);
+	return ret;
+}
+
 static int cma_ib_listen(struct rdma_id_private *id_priv)
 {
 	struct ib_cm_compare_data compare_data;
@@ -1011,6 +1160,30 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
 	return ret;
 }
 
+static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
+{
+	int ret;
+	struct sockaddr_in *sin;
+
+	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
+					    iw_conn_req_handler,
+					    id_priv);
+	if (IS_ERR(id_priv->cm_id.iw))
+		return PTR_ERR(id_priv->cm_id.iw);
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	id_priv->cm_id.iw->local_addr = *sin;
+
+	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
+
+	if (ret) {
+		iw_destroy_cm_id(id_priv->cm_id.iw);
+		id_priv->cm_id.iw = NULL;
+	}
+
+	return ret;
+}
+
 static int cma_listen_handler(struct rdma_cm_id *id,
 			      struct rdma_cm_event *event)
 {
@@ -1087,12 +1260,17 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 
 	id_priv->backlog = backlog;
 	if (id->device) {
-		switch (id->device->node_type) {
-		case IB_NODE_CA:
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
 			ret = cma_ib_listen(id_priv);
 			if (ret)
 				goto err;
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			ret = cma_iw_listen(id_priv, backlog);
+			if (ret)
+				goto err;
+			break;
 		default:
 			ret = -ENOSYS;
 			goto err;
@@ -1231,6 +1409,23 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
 }
 EXPORT_SYMBOL(rdma_set_ib_paths);
 
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+	struct cma_work *work;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (!work)
+		return -ENOMEM;
+
+	work->id = id_priv;
+	INIT_WORK(&work->work, cma_work_handler, work);
+	work->old_state = CMA_ROUTE_QUERY;
+	work->new_state = CMA_ROUTE_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+	queue_work(cma_wq, &work->work);
+	return 0;
+}
+
 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 {
 	struct rdma_id_private *id_priv;
@@ -1241,10 +1436,13 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_resolve_ib_route(id_priv, timeout_ms);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_resolve_iw_route(id_priv, timeout_ms);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1649,6 +1847,47 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
 	return ret;
 }
 
+static int cma_connect_iw(struct rdma_id_private *id_priv,
+			  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_id *cm_id;
+	struct sockaddr_in* sin;
+	int ret;
+	struct iw_cm_conn_param iw_param;
+
+	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
+	if (IS_ERR(cm_id)) {
+		ret = PTR_ERR(cm_id);
+		goto out;
+	}
+
+	id_priv->cm_id.iw = cm_id;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
+	cm_id->local_addr = *sin;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
+	cm_id->remote_addr = *sin;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret) {
+		iw_destroy_cm_id(cm_id);
+		return ret;
+	}
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp)
+		iw_param.qpn = id_priv->qp_num;
+	else
+		iw_param.qpn = conn_param->qp_num;
+	ret = iw_cm_connect(cm_id, &iw_param);
+out:
+	return ret;
+}
+
 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
@@ -1664,10 +1903,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 		id_priv->srq = conn_param->srq;
 	}
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_connect_ib(id_priv, conn_param);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_connect_iw(id_priv, conn_param);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1708,6 +1950,28 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
 	return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
 }
 
+static int cma_accept_iw(struct rdma_id_private *id_priv,
+		  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_conn_param iw_param;
+	int ret;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret)
+		return ret;
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp) {
+		iw_param.qpn = id_priv->qp_num;
+	} else
+		iw_param.qpn = conn_param->qp_num;
+
+	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
+}
+
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
@@ -1723,13 +1987,16 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 		id_priv->srq = conn_param->srq;
 	}
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		if (conn_param)
 			ret = cma_accept_ib(id_priv, conn_param);
 		else
 			ret = cma_rep_recv(id_priv);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_accept_iw(id_priv, conn_param);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1756,12 +2023,16 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
 	if (!cma_comp(id_priv, CMA_CONNECT))
 		return -EINVAL;
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = ib_send_cm_rej(id_priv->cm_id.ib,
 				     IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
 				     private_data, private_data_len);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_reject(id_priv->cm_id.iw,
+				   private_data, private_data_len);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1780,17 +2051,20 @@ int rdma_disconnect(struct rdma_cm_id *id)
 	    !cma_comp(id_priv, CMA_DISCONNECT))
 		return -EINVAL;
 
-	ret = cma_modify_qp_err(id);
-	if (ret)
-		goto out;
-
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		ret = cma_modify_qp_err(id);
+		if (ret)
+			goto out;
 		/* Initiate or respond to a disconnect. */
 		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
 			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
+		break;
 	default:
+		ret = -EINVAL;
 		break;
 	}
 out:
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b2f3cb91d9bcf..d978fbe975355 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -505,7 +505,7 @@ int ib_query_port(struct ib_device *device,
 		  u8 port_num,
 		  struct ib_port_attr *port_attr)
 {
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		if (port_num)
 			return -EINVAL;
 	} else if (port_num < 1 || port_num > device->phys_port_cnt)
@@ -580,7 +580,7 @@ int ib_modify_port(struct ib_device *device,
 		   u8 port_num, int port_modify_mask,
 		   struct ib_port_modify *port_modify)
 {
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		if (port_num)
 			return -EINVAL;
 	} else if (port_num < 1 || port_num > device->phys_port_cnt)
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 32d3028b274b8..082f03c158f00 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2876,7 +2876,10 @@ static void ib_mad_init_device(struct ib_device *device)
 {
 	int start, end, i;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		start = 0;
 		end   = 0;
 	} else {
@@ -2923,7 +2926,7 @@ static void ib_mad_remove_device(struct ib_device *device)
 {
 	int i, num_ports, cur_port;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		num_ports = 1;
 		cur_port = 0;
 	} else {
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index df762ba4868f8..ca8760a7d88c6 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -919,7 +919,10 @@ static void ib_sa_add_one(struct ib_device *device)
 	struct ib_sa_device *sa_dev;
 	int s, e, i;
 
-	if (device->node_type == IB_NODE_SWITCH)
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
 		s = e = 0;
 	else {
 		s = 1;
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index 35852e794e265..54b81e17ad50d 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -64,7 +64,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 
 		/* C14-9:2 */
 		if (hop_ptr && hop_ptr < hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			/* smp->return_path set when received */
@@ -77,7 +77,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 		if (hop_ptr == hop_cnt) {
 			/* smp->return_path set when received */
 			smp->hop_ptr++;
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_dlid == IB_LID_PERMISSIVE);
 		}
 
@@ -95,7 +95,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			smp->hop_ptr--;
@@ -107,7 +107,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 		if (hop_ptr == 1) {
 			smp->hop_ptr--;
 			/* C14-13:3 -- SMPs destined for SM shouldn't be here */
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_slid == IB_LID_PERMISSIVE);
 		}
 
@@ -142,7 +142,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 
 		/* C14-9:2 -- intermediate hop */
 		if (hop_ptr && hop_ptr < hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			smp->return_path[hop_ptr] = port_num;
@@ -156,7 +156,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 				smp->return_path[hop_ptr] = port_num;
 			/* smp->hop_ptr updated when sending */
 
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_dlid == IB_LID_PERMISSIVE);
 		}
 
@@ -175,7 +175,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			/* smp->hop_ptr updated when sending */
@@ -190,7 +190,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 				return 1;
 			}
 			/* smp->hop_ptr updated when sending */
-			return (node_type == IB_NODE_SWITCH);
+			return (node_type == RDMA_NODE_IB_SWITCH);
 		}
 
 		/* C14-13:4 -- hop_ptr = 0 -> give to SM */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index fb6660564a309..709323c14c5d3 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -589,10 +589,11 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf)
 		return -ENODEV;
 
 	switch (dev->node_type) {
-	case IB_NODE_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
-	case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
-	case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
-	default:             return sprintf(buf, "%d: <unknown>\n", dev->node_type);
+	case RDMA_NODE_IB_CA:	  return sprintf(buf, "%d: CA\n", dev->node_type);
+	case RDMA_NODE_RNIC:	  return sprintf(buf, "%d: RNIC\n", dev->node_type);
+	case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
+	case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
+	default:		  return sprintf(buf, "%d: <unknown>\n", dev->node_type);
 	}
 }
 
@@ -708,7 +709,7 @@ int ib_device_register_sysfs(struct ib_device *device)
 	if (ret)
 		goto err_put;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		ret = add_port(device, 0);
 		if (ret)
 			goto err_put;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index e74c964af7fa4..ad4f4d5c29240 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1247,7 +1247,8 @@ static void ib_ucm_add_one(struct ib_device *device)
 {
 	struct ib_ucm_device *ucm_dev;
 
-	if (!device->alloc_ucontext)
+	if (!device->alloc_ucontext ||
+	    rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
 		return;
 
 	ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 8a455aec758f3..807fbd6b84140 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1032,7 +1032,10 @@ static void ib_umad_add_one(struct ib_device *device)
 	struct ib_umad_device *umad_dev;
 	int s, e, i;
 
-	if (device->node_type == IB_NODE_SWITCH)
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
 		s = e = 0;
 	else {
 		s = 1;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 06f98e9e14f98..8b5dd3649bbf3 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -79,6 +79,23 @@ enum ib_rate mult_to_ib_rate(int mult)
 }
 EXPORT_SYMBOL(mult_to_ib_rate);
 
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+	switch (node_type) {
+	case RDMA_NODE_IB_CA:
+	case RDMA_NODE_IB_SWITCH:
+	case RDMA_NODE_IB_ROUTER:
+		return RDMA_TRANSPORT_IB;
+	case RDMA_NODE_RNIC:
+		return RDMA_TRANSPORT_IWARP;
+	default:
+		BUG();
+		return 0;
+	}
+}
+EXPORT_SYMBOL(rdma_node_get_transport);
+
 /* Protection domains */
 
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 2a99f2d13cdbf..2380994418a5f 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -269,7 +269,7 @@ int ehca_register_device(struct ehca_shca *shca)
 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
 
-	shca->ib_device.node_type           = IB_NODE_CA;
+	shca->ib_device.node_type           = RDMA_NODE_IB_CA;
 	shca->ib_device.phys_port_cnt       = shca->num_ports;
 	shca->ib_device.dma_device          = &shca->ibmebus_dev->ofdev.dev;
 	shca->ib_device.query_device        = ehca_query_device;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index fbda7739715f2..b8381c5e72bd6 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1538,7 +1538,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	dev->node_type = IB_NODE_CA;
+	dev->node_type = RDMA_NODE_IB_CA;
 	dev->phys_port_cnt = 1;
 	dev->dma_device = &dd->pcidev->dev;
 	dev->class_dev.dev = dev->dma_device;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 265b1d1c4a62d..981fe2eebdfa3 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1288,7 +1288,7 @@ int mthca_register_device(struct mthca_dev *dev)
 		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
-	dev->ib_dev.node_type            = IB_NODE_CA;
+	dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 	dev->ib_dev.dma_device           = &dev->pdev->dev;
 	dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 36d76987a4811..e9a7659eb1d79 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1111,13 +1111,16 @@ static void ipoib_add_one(struct ib_device *device)
 	struct ipoib_dev_priv *priv;
 	int s, e, p;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
 	if (!dev_list)
 		return;
 
 	INIT_LIST_HEAD(dev_list);
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
 	} else {
@@ -1141,6 +1144,9 @@ static void ipoib_remove_one(struct ib_device *device)
 	struct ipoib_dev_priv *priv, *tmp;
 	struct list_head *dev_list;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	dev_list = ib_get_client_data(device, &ipoib_client);
 
 	list_for_each_entry_safe(priv, tmp, dev_list, list) {
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 61c13d1e05063..feb1fcd0f2fb1 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1899,7 +1899,7 @@ static void srp_add_one(struct ib_device *device)
 	if (IS_ERR(srp_dev->fmr_pool))
 		srp_dev->fmr_pool = NULL;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
 	} else {
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 0ff67398928d3..81b62307621d2 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -40,7 +40,7 @@ struct rdma_dev_addr {
 	unsigned char src_dev_addr[MAX_ADDR_LEN];
 	unsigned char dst_dev_addr[MAX_ADDR_LEN];
 	unsigned char broadcast[MAX_ADDR_LEN];
-	enum ib_node_type dev_type;
+	enum rdma_node_type dev_type;
 };
 
 /**
@@ -72,6 +72,9 @@ int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
 
 void rdma_addr_cancel(struct rdma_dev_addr *addr);
 
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+	      const unsigned char *dst_dev_addr);
+
 static inline int ip_addr_size(struct sockaddr *addr)
 {
 	return addr->sa_family == AF_INET6 ?
@@ -113,4 +116,16 @@ static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr,
 	memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
 }
 
+static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
+}
+
+static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid);
+}
+
 #endif /* IB_ADDR_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 61eed3996117a..8eacc3510993f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -56,12 +56,22 @@ union ib_gid {
 	} global;
 };
 
-enum ib_node_type {
-	IB_NODE_CA 	= 1,
-	IB_NODE_SWITCH,
-	IB_NODE_ROUTER
+enum rdma_node_type {
+	/* IB values map to NodeInfo:NodeType. */
+	RDMA_NODE_IB_CA 	= 1,
+	RDMA_NODE_IB_SWITCH,
+	RDMA_NODE_IB_ROUTER,
+	RDMA_NODE_RNIC
 };
 
+enum rdma_transport_type {
+	RDMA_TRANSPORT_IB,
+	RDMA_TRANSPORT_IWARP
+};
+
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__;
+
 enum ib_device_cap_flags {
 	IB_DEVICE_RESIZE_MAX_WR		= 1,
 	IB_DEVICE_BAD_PKEY_CNTR		= (1<<1),
@@ -78,6 +88,9 @@ enum ib_device_cap_flags {
 	IB_DEVICE_RC_RNR_NAK_GEN	= (1<<12),
 	IB_DEVICE_SRQ_RESIZE		= (1<<13),
 	IB_DEVICE_N_NOTIFY_CQ		= (1<<14),
+	IB_DEVICE_ZERO_STAG		= (1<<15),
+	IB_DEVICE_SEND_W_INV		= (1<<16),
+	IB_DEVICE_MEM_WINDOW		= (1<<17)
 };
 
 enum ib_atomic_cap {
@@ -835,6 +848,8 @@ struct ib_cache {
 	u8                     *lmc_cache;
 };
 
+struct iw_cm_verbs;
+
 struct ib_device {
 	struct device                *dma_device;
 
@@ -851,6 +866,8 @@ struct ib_device {
 
 	u32                           flags;
 
+	struct iw_cm_verbs	     *iwcm;
+
 	int		           (*query_device)(struct ib_device *device,
 						   struct ib_device_attr *device_attr);
 	int		           (*query_port)(struct ib_device *device,
-- 
GitLab


From f94b533d091a42da92d908eb7b3f9ade1923f90d Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Fri, 22 Sep 2006 15:22:48 -0700
Subject: [PATCH 0774/1063] RDMA/amso1100: Add driver for Ammasso 1100 RNIC

Add a driver for the Ammasso 1100 gigabit ethernet RNIC.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 MAINTAINERS                                  |    8 +
 drivers/infiniband/Kconfig                   |    1 +
 drivers/infiniband/Makefile                  |    1 +
 drivers/infiniband/hw/amso1100/Kbuild        |    8 +
 drivers/infiniband/hw/amso1100/Kconfig       |   15 +
 drivers/infiniband/hw/amso1100/c2.c          | 1255 +++++++++++++++
 drivers/infiniband/hw/amso1100/c2.h          |  551 +++++++
 drivers/infiniband/hw/amso1100/c2_ae.c       |  321 ++++
 drivers/infiniband/hw/amso1100/c2_ae.h       |  108 ++
 drivers/infiniband/hw/amso1100/c2_alloc.c    |  144 ++
 drivers/infiniband/hw/amso1100/c2_cm.c       |  452 ++++++
 drivers/infiniband/hw/amso1100/c2_cq.c       |  433 +++++
 drivers/infiniband/hw/amso1100/c2_intr.c     |  209 +++
 drivers/infiniband/hw/amso1100/c2_mm.c       |  375 +++++
 drivers/infiniband/hw/amso1100/c2_mq.c       |  174 ++
 drivers/infiniband/hw/amso1100/c2_mq.h       |  106 ++
 drivers/infiniband/hw/amso1100/c2_pd.c       |   89 +
 drivers/infiniband/hw/amso1100/c2_provider.c |  869 ++++++++++
 drivers/infiniband/hw/amso1100/c2_provider.h |  181 +++
 drivers/infiniband/hw/amso1100/c2_qp.c       |  975 +++++++++++
 drivers/infiniband/hw/amso1100/c2_rnic.c     |  663 ++++++++
 drivers/infiniband/hw/amso1100/c2_status.h   |  158 ++
 drivers/infiniband/hw/amso1100/c2_user.h     |   82 +
 drivers/infiniband/hw/amso1100/c2_vq.c       |  260 +++
 drivers/infiniband/hw/amso1100/c2_vq.h       |   63 +
 drivers/infiniband/hw/amso1100/c2_wr.h       | 1520 ++++++++++++++++++
 26 files changed, 9021 insertions(+)
 create mode 100644 drivers/infiniband/hw/amso1100/Kbuild
 create mode 100644 drivers/infiniband/hw/amso1100/Kconfig
 create mode 100644 drivers/infiniband/hw/amso1100/c2.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2.h
 create mode 100644 drivers/infiniband/hw/amso1100/c2_ae.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_ae.h
 create mode 100644 drivers/infiniband/hw/amso1100/c2_alloc.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_cm.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_cq.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_intr.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_mm.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_mq.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_mq.h
 create mode 100644 drivers/infiniband/hw/amso1100/c2_pd.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_provider.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_provider.h
 create mode 100644 drivers/infiniband/hw/amso1100/c2_qp.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_rnic.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_status.h
 create mode 100644 drivers/infiniband/hw/amso1100/c2_user.h
 create mode 100644 drivers/infiniband/hw/amso1100/c2_vq.c
 create mode 100644 drivers/infiniband/hw/amso1100/c2_vq.h
 create mode 100644 drivers/infiniband/hw/amso1100/c2_wr.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 830bec779d479..b08c537018de2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -298,6 +298,14 @@ L:	info-linux@geode.amd.com
 W:	http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
 S:	Supported
 
+AMSO1100 RNIC DRIVER
+P:	Tom Tucker
+M:	tom@opengridcomputing.com
+P:	Steve Wise
+M:	swise@opengridcomputing.com
+L:	openib-general@openib.org
+S:	Maintained
+
 AOA (Apple Onboard Audio) ALSA DRIVER
 P:	Johannes Berg
 M:	johannes@sipsolutions.net
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 9a329b2c108cc..9edfacee7d844 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -37,6 +37,7 @@ config INFINIBAND_ADDR_TRANS
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
 source "drivers/infiniband/hw/ehca/Kconfig"
+source "drivers/infiniband/hw/amso1100/Kconfig"
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index 08cff32d900ea..2b5d1098ef45f 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND_MTHCA)		+= hw/mthca/
 obj-$(CONFIG_INFINIBAND_IPATH)		+= hw/ipath/
 obj-$(CONFIG_INFINIBAND_EHCA)		+= hw/ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100)	+= hw/amso1100/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_ISER)		+= ulp/iser/
diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild
new file mode 100644
index 0000000000000..06964c4af8496
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/Kbuild
@@ -0,0 +1,8 @@
+ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+endif
+
+obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
+
+iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
+	c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig
new file mode 100644
index 0000000000000..809cb14ac6dec
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/Kconfig
@@ -0,0 +1,15 @@
+config INFINIBAND_AMSO1100
+	tristate "Ammasso 1100 HCA support"
+	depends on PCI && INET && INFINIBAND
+	---help---
+	  This is a low-level driver for the Ammasso 1100 host
+	  channel adapter (HCA).
+
+config INFINIBAND_AMSO1100_DEBUG
+	bool "Verbose debugging output"
+	depends on INFINIBAND_AMSO1100
+	default n
+	---help---
+	  This option causes the amso1100 driver to produce a bunch of
+	  debug messages.  Select this if you are developing the driver
+	  or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
new file mode 100644
index 0000000000000..9e9120f36019d
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -0,0 +1,1255 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_provider.h"
+
+MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
+MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
+    | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
+
+static int debug = -1;		/* defaults above */
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static int c2_up(struct net_device *netdev);
+static int c2_down(struct net_device *netdev);
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+static void c2_tx_interrupt(struct net_device *netdev);
+static void c2_rx_interrupt(struct net_device *netdev);
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static void c2_tx_timeout(struct net_device *netdev);
+static int c2_change_mtu(struct net_device *netdev, int new_mtu);
+static void c2_reset(struct c2_port *c2_port);
+static struct net_device_stats *c2_get_stats(struct net_device *netdev);
+
+static struct pci_device_id c2_pci_table[] = {
+	{ PCI_DEVICE(0x18b8, 0xb001) },
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, c2_pci_table);
+
+static void c2_print_macaddr(struct net_device *netdev)
+{
+	pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
+		"IRQ %u\n", netdev->name,
+		netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+		netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
+		netdev->irq);
+}
+
+static void c2_set_rxbufsize(struct c2_port *c2_port)
+{
+	struct net_device *netdev = c2_port->netdev;
+
+	if (netdev->mtu > RX_BUF_SIZE)
+		c2_port->rx_buf_size =
+		    netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
+		    NET_IP_ALIGN;
+	else
+		c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
+}
+
+/*
+ * Allocate TX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
+			    dma_addr_t base, void __iomem * mmio_txp_ring)
+{
+	struct c2_tx_desc *tx_desc;
+	struct c2_txp_desc __iomem *txp_desc;
+	struct c2_element *elem;
+	int i;
+
+	tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL);
+	if (!tx_ring->start)
+		return -ENOMEM;
+
+	elem = tx_ring->start;
+	tx_desc = vaddr;
+	txp_desc = mmio_txp_ring;
+	for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
+		tx_desc->len = 0;
+		tx_desc->status = 0;
+
+		/* Set TXP_HTXD_UNINIT */
+		__raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+			     (void __iomem *) txp_desc + C2_TXP_ADDR);
+		__raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
+		__raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+			     (void __iomem *) txp_desc + C2_TXP_FLAGS);
+
+		elem->skb = NULL;
+		elem->ht_desc = tx_desc;
+		elem->hw_desc = txp_desc;
+
+		if (i == tx_ring->count - 1) {
+			elem->next = tx_ring->start;
+			tx_desc->next_offset = base;
+		} else {
+			elem->next = elem + 1;
+			tx_desc->next_offset =
+			    base + (i + 1) * sizeof(*tx_desc);
+		}
+	}
+
+	tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
+
+	return 0;
+}
+
+/*
+ * Allocate RX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
+			    dma_addr_t base, void __iomem * mmio_rxp_ring)
+{
+	struct c2_rx_desc *rx_desc;
+	struct c2_rxp_desc __iomem *rxp_desc;
+	struct c2_element *elem;
+	int i;
+
+	rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL);
+	if (!rx_ring->start)
+		return -ENOMEM;
+
+	elem = rx_ring->start;
+	rx_desc = vaddr;
+	rxp_desc = mmio_rxp_ring;
+	for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
+		rx_desc->len = 0;
+		rx_desc->status = 0;
+
+		/* Set RXP_HRXD_UNINIT */
+		__raw_writew(cpu_to_be16(RXP_HRXD_OK),
+		       (void __iomem *) rxp_desc + C2_RXP_STATUS);
+		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
+		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
+		__raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+			     (void __iomem *) rxp_desc + C2_RXP_ADDR);
+		__raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+			     (void __iomem *) rxp_desc + C2_RXP_FLAGS);
+
+		elem->skb = NULL;
+		elem->ht_desc = rx_desc;
+		elem->hw_desc = rxp_desc;
+
+		if (i == rx_ring->count - 1) {
+			elem->next = rx_ring->start;
+			rx_desc->next_offset = base;
+		} else {
+			elem->next = elem + 1;
+			rx_desc->next_offset =
+			    base + (i + 1) * sizeof(*rx_desc);
+		}
+	}
+
+	rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
+
+	return 0;
+}
+
+/* Setup buffer for receiving */
+static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_rx_desc *rx_desc = elem->ht_desc;
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen;
+	struct c2_rxp_hdr *rxp_hdr;
+
+	skb = dev_alloc_skb(c2_port->rx_buf_size);
+	if (unlikely(!skb)) {
+		pr_debug("%s: out of memory for receive\n",
+			c2_port->netdev->name);
+		return -ENOMEM;
+	}
+
+	/* Zero out the rxp hdr in the sk_buff */
+	memset(skb->data, 0, sizeof(*rxp_hdr));
+
+	skb->dev = c2_port->netdev;
+
+	maplen = c2_port->rx_buf_size;
+	mapaddr =
+	    pci_map_single(c2dev->pcidev, skb->data, maplen,
+			   PCI_DMA_FROMDEVICE);
+
+	/* Set the sk_buff RXP_header to RXP_HRXD_READY */
+	rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+	rxp_hdr->flags = RXP_HRXD_READY;
+
+	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+	__raw_writew(cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
+		     elem->hw_desc + C2_RXP_LEN);
+	__raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
+	__raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+	elem->skb = skb;
+	elem->mapaddr = mapaddr;
+	elem->maplen = maplen;
+	rx_desc->len = maplen;
+
+	return 0;
+}
+
+/*
+ * Allocate buffers for the Rx ring
+ * For receive:  rx_ring.to_clean is next received frame
+ */
+static int c2_rx_fill(struct c2_port *c2_port)
+{
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	int ret = 0;
+
+	elem = rx_ring->start;
+	do {
+		if (c2_rx_alloc(c2_port, elem)) {
+			ret = 1;
+			break;
+		}
+	} while ((elem = elem->next) != rx_ring->start);
+
+	rx_ring->to_clean = rx_ring->start;
+	return ret;
+}
+
+/* Free all buffers in RX ring, assumes receiver stopped */
+static void c2_rx_clean(struct c2_port *c2_port)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	struct c2_rx_desc *rx_desc;
+
+	elem = rx_ring->start;
+	do {
+		rx_desc = elem->ht_desc;
+		rx_desc->len = 0;
+
+		__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+		__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+		__raw_writew(0, elem->hw_desc + C2_RXP_LEN);
+		__raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+			     elem->hw_desc + C2_RXP_ADDR);
+		__raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+			     elem->hw_desc + C2_RXP_FLAGS);
+
+		if (elem->skb) {
+			pci_unmap_single(c2dev->pcidev, elem->mapaddr,
+					 elem->maplen, PCI_DMA_FROMDEVICE);
+			dev_kfree_skb(elem->skb);
+			elem->skb = NULL;
+		}
+	} while ((elem = elem->next) != rx_ring->start);
+}
+
+static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
+{
+	struct c2_tx_desc *tx_desc = elem->ht_desc;
+
+	tx_desc->len = 0;
+
+	pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
+			 PCI_DMA_TODEVICE);
+
+	if (elem->skb) {
+		dev_kfree_skb_any(elem->skb);
+		elem->skb = NULL;
+	}
+
+	return 0;
+}
+
+/* Free all buffers in TX ring, assumes transmitter stopped */
+static void c2_tx_clean(struct c2_port *c2_port)
+{
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	struct c2_txp_desc txp_htxd;
+	int retry;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+	elem = tx_ring->start;
+
+	do {
+		retry = 0;
+		do {
+			txp_htxd.flags =
+			    readw(elem->hw_desc + C2_TXP_FLAGS);
+
+			if (txp_htxd.flags == TXP_HTXD_READY) {
+				retry = 1;
+				__raw_writew(0,
+					     elem->hw_desc + C2_TXP_LEN);
+				__raw_writeq(0,
+					     elem->hw_desc + C2_TXP_ADDR);
+				__raw_writew(cpu_to_be16(TXP_HTXD_DONE),
+					     elem->hw_desc + C2_TXP_FLAGS);
+				c2_port->netstats.tx_dropped++;
+				break;
+			} else {
+				__raw_writew(0,
+					     elem->hw_desc + C2_TXP_LEN);
+				__raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+					     elem->hw_desc + C2_TXP_ADDR);
+				__raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+					     elem->hw_desc + C2_TXP_FLAGS);
+			}
+
+			c2_tx_free(c2_port->c2dev, elem);
+
+		} while ((elem = elem->next) != tx_ring->start);
+	} while (retry);
+
+	c2_port->tx_avail = c2_port->tx_ring.count - 1;
+	c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
+
+	if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+		netif_wake_queue(c2_port->netdev);
+
+	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+}
+
+/*
+ * Process transmit descriptors marked 'DONE' by the firmware,
+ * freeing up their unneeded sk_buffs.
+ */
+static void c2_tx_interrupt(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	struct c2_txp_desc txp_htxd;
+
+	spin_lock(&c2_port->tx_lock);
+
+	for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
+	     elem = elem->next) {
+		txp_htxd.flags =
+		    be16_to_cpu(readw(elem->hw_desc + C2_TXP_FLAGS));
+
+		if (txp_htxd.flags != TXP_HTXD_DONE)
+			break;
+
+		if (netif_msg_tx_done(c2_port)) {
+			/* PCI reads are expensive in fast path */
+			txp_htxd.len =
+			    be16_to_cpu(readw(elem->hw_desc + C2_TXP_LEN));
+			pr_debug("%s: tx done slot %3Zu status 0x%x len "
+				"%5u bytes\n",
+				netdev->name, elem - tx_ring->start,
+				txp_htxd.flags, txp_htxd.len);
+		}
+
+		c2_tx_free(c2dev, elem);
+		++(c2_port->tx_avail);
+	}
+
+	tx_ring->to_clean = elem;
+
+	if (netif_queue_stopped(netdev)
+	    && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+		netif_wake_queue(netdev);
+
+	spin_unlock(&c2_port->tx_lock);
+}
+
+static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
+{
+	struct c2_rx_desc *rx_desc = elem->ht_desc;
+	struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+
+	if (rxp_hdr->status != RXP_HRXD_OK ||
+	    rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
+		pr_debug("BAD RXP_HRXD\n");
+		pr_debug("  rx_desc : %p\n", rx_desc);
+		pr_debug("    index : %Zu\n",
+			elem - c2_port->rx_ring.start);
+		pr_debug("    len   : %u\n", rx_desc->len);
+		pr_debug("  rxp_hdr : %p [PA %p]\n", rxp_hdr,
+			(void *) __pa((unsigned long) rxp_hdr));
+		pr_debug("    flags : 0x%x\n", rxp_hdr->flags);
+		pr_debug("    status: 0x%x\n", rxp_hdr->status);
+		pr_debug("    len   : %u\n", rxp_hdr->len);
+		pr_debug("    rsvd  : 0x%x\n", rxp_hdr->rsvd);
+	}
+
+	/* Setup the skb for reuse since we're dropping this pkt */
+	elem->skb->tail = elem->skb->data = elem->skb->head;
+
+	/* Zero out the rxp hdr in the sk_buff */
+	memset(elem->skb->data, 0, sizeof(*rxp_hdr));
+
+	/* Write the descriptor to the adapter's rx ring */
+	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+	__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+	__raw_writew(cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
+		     elem->hw_desc + C2_RXP_LEN);
+	__raw_writeq(cpu_to_be64(elem->mapaddr), elem->hw_desc + C2_RXP_ADDR);
+	__raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+	pr_debug("packet dropped\n");
+	c2_port->netstats.rx_dropped++;
+}
+
+static void c2_rx_interrupt(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	struct c2_rx_desc *rx_desc;
+	struct c2_rxp_hdr *rxp_hdr;
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen, buflen;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2dev->lock, flags);
+
+	/* Begin where we left off */
+	rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
+
+	for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
+	     elem = elem->next) {
+		rx_desc = elem->ht_desc;
+		mapaddr = elem->mapaddr;
+		maplen = elem->maplen;
+		skb = elem->skb;
+		rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+
+		if (rxp_hdr->flags != RXP_HRXD_DONE)
+			break;
+		buflen = rxp_hdr->len;
+
+		/* Sanity check the RXP header */
+		if (rxp_hdr->status != RXP_HRXD_OK ||
+		    buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
+			c2_rx_error(c2_port, elem);
+			continue;
+		}
+
+		/*
+		 * Allocate and map a new skb for replenishing the host
+		 * RX desc
+		 */
+		if (c2_rx_alloc(c2_port, elem)) {
+			c2_rx_error(c2_port, elem);
+			continue;
+		}
+
+		/* Unmap the old skb */
+		pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
+				 PCI_DMA_FROMDEVICE);
+
+		prefetch(skb->data);
+
+		/*
+		 * Skip past the leading 8 bytes comprising of the
+		 * "struct c2_rxp_hdr", prepended by the adapter
+		 * to the usual Ethernet header ("struct ethhdr"),
+		 * to the start of the raw Ethernet packet.
+		 *
+		 * Fix up the various fields in the sk_buff before
+		 * passing it up to netif_rx(). The transfer size
+		 * (in bytes) specified by the adapter len field of
+		 * the "struct rxp_hdr_t" does NOT include the
+		 * "sizeof(struct c2_rxp_hdr)".
+		 */
+		skb->data += sizeof(*rxp_hdr);
+		skb->tail = skb->data + buflen;
+		skb->len = buflen;
+		skb->dev = netdev;
+		skb->protocol = eth_type_trans(skb, netdev);
+
+		netif_rx(skb);
+
+		netdev->last_rx = jiffies;
+		c2_port->netstats.rx_packets++;
+		c2_port->netstats.rx_bytes += buflen;
+	}
+
+	/* Save where we left off */
+	rx_ring->to_clean = elem;
+	c2dev->cur_rx = elem - rx_ring->start;
+	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+	spin_unlock_irqrestore(&c2dev->lock, flags);
+}
+
+/*
+ * Handle netisr0 TX & RX interrupts.
+ */
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned int netisr0, dmaisr;
+	int handled = 0;
+	struct c2_dev *c2dev = (struct c2_dev *) dev_id;
+
+	/* Process CCILNET interrupts */
+	netisr0 = readl(c2dev->regs + C2_NISR0);
+	if (netisr0) {
+
+		/*
+		 * There is an issue with the firmware that always
+		 * provides the status of RX for both TX & RX
+		 * interrupts.  So process both queues here.
+		 */
+		c2_rx_interrupt(c2dev->netdev);
+		c2_tx_interrupt(c2dev->netdev);
+
+		/* Clear the interrupt */
+		writel(netisr0, c2dev->regs + C2_NISR0);
+		handled++;
+	}
+
+	/* Process RNIC interrupts */
+	dmaisr = readl(c2dev->regs + C2_DISR);
+	if (dmaisr) {
+		writel(dmaisr, c2dev->regs + C2_DISR);
+		c2_rnic_interrupt(c2dev);
+		handled++;
+	}
+
+	if (handled) {
+		return IRQ_HANDLED;
+	} else {
+		return IRQ_NONE;
+	}
+}
+
+static int c2_up(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_element *elem;
+	struct c2_rxp_hdr *rxp_hdr;
+	struct in_device *in_dev;
+	size_t rx_size, tx_size;
+	int ret, i;
+	unsigned int netimr0;
+
+	if (netif_msg_ifup(c2_port))
+		pr_debug("%s: enabling interface\n", netdev->name);
+
+	/* Set the Rx buffer size based on MTU */
+	c2_set_rxbufsize(c2_port);
+
+	/* Allocate DMA'able memory for Tx/Rx host descriptor rings */
+	rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
+	tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
+
+	c2_port->mem_size = tx_size + rx_size;
+	c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size,
+					    &c2_port->dma);
+	if (c2_port->mem == NULL) {
+		pr_debug("Unable to allocate memory for "
+			"host descriptor rings\n");
+		return -ENOMEM;
+	}
+
+	memset(c2_port->mem, 0, c2_port->mem_size);
+
+	/* Create the Rx host descriptor ring */
+	if ((ret =
+	     c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
+			      c2dev->mmio_rxp_ring))) {
+		pr_debug("Unable to create RX ring\n");
+		goto bail0;
+	}
+
+	/* Allocate Rx buffers for the host descriptor ring */
+	if (c2_rx_fill(c2_port)) {
+		pr_debug("Unable to fill RX ring\n");
+		goto bail1;
+	}
+
+	/* Create the Tx host descriptor ring */
+	if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
+				    c2_port->dma + rx_size,
+				    c2dev->mmio_txp_ring))) {
+		pr_debug("Unable to create TX ring\n");
+		goto bail1;
+	}
+
+	/* Set the TX pointer to where we left off */
+	c2_port->tx_avail = c2_port->tx_ring.count - 1;
+	c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
+	    c2_port->tx_ring.start + c2dev->cur_tx;
+
+	/* missing: Initialize MAC */
+
+	BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
+
+	/* Reset the adapter, ensures the driver is in sync with the RXP */
+	c2_reset(c2_port);
+
+	/* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
+	for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
+	     i++, elem++) {
+		rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+		rxp_hdr->flags = 0;
+		__raw_writew(cpu_to_be16(RXP_HRXD_READY),
+			     elem->hw_desc + C2_RXP_FLAGS);
+	}
+
+	/* Enable network packets */
+	netif_start_queue(netdev);
+
+	/* Enable IRQ */
+	writel(0, c2dev->regs + C2_IDIS);
+	netimr0 = readl(c2dev->regs + C2_NIMR0);
+	netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
+	writel(netimr0, c2dev->regs + C2_NIMR0);
+
+	/* Tell the stack to ignore arp requests for ipaddrs bound to
+	 * other interfaces.  This is needed to prevent the host stack
+	 * from responding to arp requests to the ipaddr bound on the
+	 * rdma interface.
+	 */
+	in_dev = in_dev_get(netdev);
+	in_dev->cnf.arp_ignore = 1;
+	in_dev_put(in_dev);
+
+	return 0;
+
+      bail1:
+	c2_rx_clean(c2_port);
+	kfree(c2_port->rx_ring.start);
+
+      bail0:
+	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+			    c2_port->dma);
+
+	return ret;
+}
+
+static int c2_down(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+
+	if (netif_msg_ifdown(c2_port))
+		pr_debug("%s: disabling interface\n",
+			netdev->name);
+
+	/* Wait for all the queued packets to get sent */
+	c2_tx_interrupt(netdev);
+
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+
+	/* Disable IRQs by clearing the interrupt mask */
+	writel(1, c2dev->regs + C2_IDIS);
+	writel(0, c2dev->regs + C2_NIMR0);
+
+	/* missing: Stop transmitter */
+
+	/* missing: Stop receiver */
+
+	/* Reset the adapter, ensures the driver is in sync with the RXP */
+	c2_reset(c2_port);
+
+	/* missing: Turn off LEDs here */
+
+	/* Free all buffers in the host descriptor rings */
+	c2_tx_clean(c2_port);
+	c2_rx_clean(c2_port);
+
+	/* Free the host descriptor rings */
+	kfree(c2_port->rx_ring.start);
+	kfree(c2_port->tx_ring.start);
+	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+			    c2_port->dma);
+
+	return 0;
+}
+
+static void c2_reset(struct c2_port *c2_port)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	unsigned int cur_rx = c2dev->cur_rx;
+
+	/* Tell the hardware to quiesce */
+	C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
+
+	/*
+	 * The hardware will reset the C2_PCI_HRX_QUI bit once
+	 * the RXP is quiesced.  Wait 2 seconds for this.
+	 */
+	ssleep(2);
+
+	cur_rx = C2_GET_CUR_RX(c2dev);
+
+	if (cur_rx & C2_PCI_HRX_QUI)
+		pr_debug("c2_reset: failed to quiesce the hardware!\n");
+
+	cur_rx &= ~C2_PCI_HRX_QUI;
+
+	c2dev->cur_rx = cur_rx;
+
+	pr_debug("Current RX: %u\n", c2dev->cur_rx);
+}
+
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	dma_addr_t mapaddr;
+	u32 maplen;
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+	if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
+		netif_stop_queue(netdev);
+		spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+		pr_debug("%s: Tx ring full when queue awake!\n",
+			netdev->name);
+		return NETDEV_TX_BUSY;
+	}
+
+	maplen = skb_headlen(skb);
+	mapaddr =
+	    pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
+
+	elem = tx_ring->to_use;
+	elem->skb = skb;
+	elem->mapaddr = mapaddr;
+	elem->maplen = maplen;
+
+	/* Tell HW to xmit */
+	__raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR);
+	__raw_writew(cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN);
+	__raw_writew(cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS);
+
+	c2_port->netstats.tx_packets++;
+	c2_port->netstats.tx_bytes += maplen;
+
+	/* Loop thru additional data fragments and queue them */
+	if (skb_shinfo(skb)->nr_frags) {
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			maplen = frag->size;
+			mapaddr =
+			    pci_map_page(c2dev->pcidev, frag->page,
+					 frag->page_offset, maplen,
+					 PCI_DMA_TODEVICE);
+
+			elem = elem->next;
+			elem->skb = NULL;
+			elem->mapaddr = mapaddr;
+			elem->maplen = maplen;
+
+			/* Tell HW to xmit */
+			__raw_writeq(cpu_to_be64(mapaddr),
+				     elem->hw_desc + C2_TXP_ADDR);
+			__raw_writew(cpu_to_be16(maplen),
+				     elem->hw_desc + C2_TXP_LEN);
+			__raw_writew(cpu_to_be16(TXP_HTXD_READY),
+				     elem->hw_desc + C2_TXP_FLAGS);
+
+			c2_port->netstats.tx_packets++;
+			c2_port->netstats.tx_bytes += maplen;
+		}
+	}
+
+	tx_ring->to_use = elem->next;
+	c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
+
+	if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
+		netif_stop_queue(netdev);
+		if (netif_msg_tx_queued(c2_port))
+			pr_debug("%s: transmit queue full\n",
+				netdev->name);
+	}
+
+	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+	netdev->trans_start = jiffies;
+
+	return NETDEV_TX_OK;
+}
+
+static struct net_device_stats *c2_get_stats(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+
+	return &c2_port->netstats;
+}
+
+static void c2_tx_timeout(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+
+	if (netif_msg_timer(c2_port))
+		pr_debug("%s: tx timeout\n", netdev->name);
+
+	c2_tx_clean(c2_port);
+}
+
+static int c2_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int ret = 0;
+
+	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev)) {
+		c2_down(netdev);
+
+		c2_up(netdev);
+	}
+
+	return ret;
+}
+
+/* Initialize network device */
+static struct net_device *c2_devinit(struct c2_dev *c2dev,
+				     void __iomem * mmio_addr)
+{
+	struct c2_port *c2_port = NULL;
+	struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
+
+	if (!netdev) {
+		pr_debug("c2_port etherdev alloc failed");
+		return NULL;
+	}
+
+	SET_MODULE_OWNER(netdev);
+	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+	netdev->open = c2_up;
+	netdev->stop = c2_down;
+	netdev->hard_start_xmit = c2_xmit_frame;
+	netdev->get_stats = c2_get_stats;
+	netdev->tx_timeout = c2_tx_timeout;
+	netdev->change_mtu = c2_change_mtu;
+	netdev->watchdog_timeo = C2_TX_TIMEOUT;
+	netdev->irq = c2dev->pcidev->irq;
+
+	c2_port = netdev_priv(netdev);
+	c2_port->netdev = netdev;
+	c2_port->c2dev = c2dev;
+	c2_port->msg_enable = netif_msg_init(debug, default_msg);
+	c2_port->tx_ring.count = C2_NUM_TX_DESC;
+	c2_port->rx_ring.count = C2_NUM_RX_DESC;
+
+	spin_lock_init(&c2_port->tx_lock);
+
+	/* Copy our 48-bit ethernet hardware address */
+	memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
+
+	/* Validate the MAC address */
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		pr_debug("Invalid MAC Address\n");
+		c2_print_macaddr(netdev);
+		free_netdev(netdev);
+		return NULL;
+	}
+
+	c2dev->netdev = netdev;
+
+	return netdev;
+}
+
+static int __devinit c2_probe(struct pci_dev *pcidev,
+			      const struct pci_device_id *ent)
+{
+	int ret = 0, i;
+	unsigned long reg0_start, reg0_flags, reg0_len;
+	unsigned long reg2_start, reg2_flags, reg2_len;
+	unsigned long reg4_start, reg4_flags, reg4_len;
+	unsigned kva_map_size;
+	struct net_device *netdev = NULL;
+	struct c2_dev *c2dev = NULL;
+	void __iomem *mmio_regs = NULL;
+
+	printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
+		DRV_VERSION);
+
+	/* Enable PCI device */
+	ret = pci_enable_device(pcidev);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
+			pci_name(pcidev));
+		goto bail0;
+	}
+
+	reg0_start = pci_resource_start(pcidev, BAR_0);
+	reg0_len = pci_resource_len(pcidev, BAR_0);
+	reg0_flags = pci_resource_flags(pcidev, BAR_0);
+
+	reg2_start = pci_resource_start(pcidev, BAR_2);
+	reg2_len = pci_resource_len(pcidev, BAR_2);
+	reg2_flags = pci_resource_flags(pcidev, BAR_2);
+
+	reg4_start = pci_resource_start(pcidev, BAR_4);
+	reg4_len = pci_resource_len(pcidev, BAR_4);
+	reg4_flags = pci_resource_flags(pcidev, BAR_4);
+
+	pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
+	pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
+	pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
+
+	/* Make sure PCI base addr are MMIO */
+	if (!(reg0_flags & IORESOURCE_MEM) ||
+	    !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
+		printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
+		ret = -ENODEV;
+		goto bail1;
+	}
+
+	/* Check for weird/broken PCI region reporting */
+	if ((reg0_len < C2_REG0_SIZE) ||
+	    (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
+		printk(KERN_ERR PFX "Invalid PCI region sizes\n");
+		ret = -ENODEV;
+		goto bail1;
+	}
+
+	/* Reserve PCI I/O and memory resources */
+	ret = pci_request_regions(pcidev, DRV_NAME);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: Unable to request regions\n",
+			pci_name(pcidev));
+		goto bail1;
+	}
+
+	if ((sizeof(dma_addr_t) > 4)) {
+		ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "64b DMA configuration failed\n");
+			goto bail2;
+		}
+	} else {
+		ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "32b DMA configuration failed\n");
+			goto bail2;
+		}
+	}
+
+	/* Enables bus-mastering on the device */
+	pci_set_master(pcidev);
+
+	/* Remap the adapter PCI registers in BAR4 */
+	mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+				    sizeof(struct c2_adapter_pci_regs));
+	if (mmio_regs == 0UL) {
+		printk(KERN_ERR PFX
+			"Unable to remap adapter PCI registers in BAR4\n");
+		ret = -EIO;
+		goto bail2;
+	}
+
+	/* Validate PCI regs magic */
+	for (i = 0; i < sizeof(c2_magic); i++) {
+		if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
+			printk(KERN_ERR PFX "Downlevel Firmware boot loader "
+				"[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
+			       "utility to update your boot loader\n",
+				i + 1, sizeof(c2_magic),
+				readb(mmio_regs + C2_REGS_MAGIC + i),
+				c2_magic[i]);
+			printk(KERN_ERR PFX "Adapter not claimed\n");
+			iounmap(mmio_regs);
+			ret = -EIO;
+			goto bail2;
+		}
+	}
+
+	/* Validate the adapter version */
+	if (be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
+		printk(KERN_ERR PFX "Version mismatch "
+			"[fw=%u, c2=%u], Adapter not claimed\n",
+			be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)),
+			C2_VERSION);
+		ret = -EINVAL;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	/* Validate the adapter IVN */
+	if (be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
+		printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
+		       "the OpenIB device support kit. "
+		       "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
+			be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)),
+			C2_IVN);
+		ret = -EINVAL;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	/* Allocate hardware structure */
+	c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
+	if (!c2dev) {
+		printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
+			pci_name(pcidev));
+		ret = -ENOMEM;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	memset(c2dev, 0, sizeof(*c2dev));
+	spin_lock_init(&c2dev->lock);
+	c2dev->pcidev = pcidev;
+	c2dev->cur_tx = 0;
+
+	/* Get the last RX index */
+	c2dev->cur_rx =
+	    (be32_to_cpu(readl(mmio_regs + C2_REGS_HRX_CUR)) -
+	     0xffffc000) / sizeof(struct c2_rxp_desc);
+
+	/* Request an interrupt line for the driver */
+	ret = request_irq(pcidev->irq, c2_interrupt, SA_SHIRQ, DRV_NAME, c2dev);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
+			pci_name(pcidev), pcidev->irq);
+		iounmap(mmio_regs);
+		goto bail3;
+	}
+
+	/* Set driver specific data */
+	pci_set_drvdata(pcidev, c2dev);
+
+	/* Initialize network device */
+	if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
+		iounmap(mmio_regs);
+		goto bail4;
+	}
+
+	/* Save off the actual size prior to unmapping mmio_regs */
+	kva_map_size = be32_to_cpu(readl(mmio_regs + C2_REGS_PCI_WINSIZE));
+
+	/* Unmap the adapter PCI registers in BAR4 */
+	iounmap(mmio_regs);
+
+	/* Register network device */
+	ret = register_netdev(netdev);
+	if (ret) {
+		printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
+			ret);
+		goto bail5;
+	}
+
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+
+	/* Remap the adapter HRXDQ PA space to kernel VA space */
+	c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
+					       C2_RXP_HRXDQ_SIZE);
+	if (c2dev->mmio_rxp_ring == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
+		ret = -EIO;
+		goto bail6;
+	}
+
+	/* Remap the adapter HTXDQ PA space to kernel VA space */
+	c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
+					       C2_TXP_HTXDQ_SIZE);
+	if (c2dev->mmio_txp_ring == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
+		ret = -EIO;
+		goto bail7;
+	}
+
+	/* Save off the current RX index in the last 4 bytes of the TXP Ring */
+	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+	/* Remap the PCI registers in adapter BAR0 to kernel VA space */
+	c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
+	if (c2dev->regs == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap BAR0\n");
+		ret = -EIO;
+		goto bail8;
+	}
+
+	/* Remap the PCI registers in adapter BAR4 to kernel VA space */
+	c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
+	c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+				     kva_map_size);
+	if (c2dev->kva == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap BAR4\n");
+		ret = -EIO;
+		goto bail9;
+	}
+
+	/* Print out the MAC address */
+	c2_print_macaddr(netdev);
+
+	ret = c2_rnic_init(c2dev);
+	if (ret) {
+		printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
+		goto bail10;
+	}
+
+	c2_register_device(c2dev);
+
+	return 0;
+
+ bail10:
+	iounmap(c2dev->kva);
+
+ bail9:
+	iounmap(c2dev->regs);
+
+ bail8:
+	iounmap(c2dev->mmio_txp_ring);
+
+ bail7:
+	iounmap(c2dev->mmio_rxp_ring);
+
+ bail6:
+	unregister_netdev(netdev);
+
+ bail5:
+	free_netdev(netdev);
+
+ bail4:
+	free_irq(pcidev->irq, c2dev);
+
+ bail3:
+	ib_dealloc_device(&c2dev->ibdev);
+
+ bail2:
+	pci_release_regions(pcidev);
+
+ bail1:
+	pci_disable_device(pcidev);
+
+ bail0:
+	return ret;
+}
+
+static void __devexit c2_remove(struct pci_dev *pcidev)
+{
+	struct c2_dev *c2dev = pci_get_drvdata(pcidev);
+	struct net_device *netdev = c2dev->netdev;
+
+	/* Unregister with OpenIB */
+	c2_unregister_device(c2dev);
+
+	/* Clean up the RNIC resources */
+	c2_rnic_term(c2dev);
+
+	/* Remove network device from the kernel */
+	unregister_netdev(netdev);
+
+	/* Free network device */
+	free_netdev(netdev);
+
+	/* Free the interrupt line */
+	free_irq(pcidev->irq, c2dev);
+
+	/* missing: Turn LEDs off here */
+
+	/* Unmap adapter PA space */
+	iounmap(c2dev->kva);
+	iounmap(c2dev->regs);
+	iounmap(c2dev->mmio_txp_ring);
+	iounmap(c2dev->mmio_rxp_ring);
+
+	/* Free the hardware structure */
+	ib_dealloc_device(&c2dev->ibdev);
+
+	/* Release reserved PCI I/O and memory resources */
+	pci_release_regions(pcidev);
+
+	/* Disable PCI device */
+	pci_disable_device(pcidev);
+
+	/* Clear driver specific data */
+	pci_set_drvdata(pcidev, NULL);
+}
+
+static struct pci_driver c2_pci_driver = {
+	.name = DRV_NAME,
+	.id_table = c2_pci_table,
+	.probe = c2_probe,
+	.remove = __devexit_p(c2_remove),
+};
+
+static int __init c2_init_module(void)
+{
+	return pci_module_init(&c2_pci_driver);
+}
+
+static void __exit c2_exit_module(void)
+{
+	pci_unregister_driver(&c2_pci_driver);
+}
+
+module_init(c2_init_module);
+module_exit(c2_exit_module);
diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h
new file mode 100644
index 0000000000000..1b17dcdd05051
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2.h
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __C2_H
+#define __C2_H
+
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+#include <asm/semaphore.h>
+
+#include "c2_provider.h"
+#include "c2_mq.h"
+#include "c2_status.h"
+
+#define DRV_NAME     "c2"
+#define DRV_VERSION  "1.1"
+#define PFX          DRV_NAME ": "
+
+#define BAR_0                0
+#define BAR_2                2
+#define BAR_4                4
+
+#define RX_BUF_SIZE         (1536 + 8)
+#define ETH_JUMBO_MTU        9000
+#define C2_MAGIC            "CEPHEUS"
+#define C2_VERSION           4
+#define C2_IVN              (18 & 0x7fffffff)
+
+#define C2_REG0_SIZE        (16 * 1024)
+#define C2_REG2_SIZE        (2 * 1024 * 1024)
+#define C2_REG4_SIZE        (256 * 1024 * 1024)
+#define C2_NUM_TX_DESC       341
+#define C2_NUM_RX_DESC       256
+#define C2_PCI_REGS_OFFSET  (0x10000)
+#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
+#define C2_RXP_HRXDQ_SIZE   (4096)
+#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
+#define C2_TXP_HTXDQ_SIZE   (4096)
+#define C2_TX_TIMEOUT	    (6*HZ)
+
+/* CEPHEUS */
+static const u8 c2_magic[] = {
+	0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
+};
+
+enum adapter_pci_regs {
+	C2_REGS_MAGIC = 0x0000,
+	C2_REGS_VERS = 0x0008,
+	C2_REGS_IVN = 0x000C,
+	C2_REGS_PCI_WINSIZE = 0x0010,
+	C2_REGS_Q0_QSIZE = 0x0014,
+	C2_REGS_Q0_MSGSIZE = 0x0018,
+	C2_REGS_Q0_POOLSTART = 0x001C,
+	C2_REGS_Q0_SHARED = 0x0020,
+	C2_REGS_Q1_QSIZE = 0x0024,
+	C2_REGS_Q1_MSGSIZE = 0x0028,
+	C2_REGS_Q1_SHARED = 0x0030,
+	C2_REGS_Q2_QSIZE = 0x0034,
+	C2_REGS_Q2_MSGSIZE = 0x0038,
+	C2_REGS_Q2_SHARED = 0x0040,
+	C2_REGS_ENADDR = 0x004C,
+	C2_REGS_RDMA_ENADDR = 0x0054,
+	C2_REGS_HRX_CUR = 0x006C,
+};
+
+struct c2_adapter_pci_regs {
+	char reg_magic[8];
+	u32 version;
+	u32 ivn;
+	u32 pci_window_size;
+	u32 q0_q_size;
+	u32 q0_msg_size;
+	u32 q0_pool_start;
+	u32 q0_shared;
+	u32 q1_q_size;
+	u32 q1_msg_size;
+	u32 q1_pool_start;
+	u32 q1_shared;
+	u32 q2_q_size;
+	u32 q2_msg_size;
+	u32 q2_pool_start;
+	u32 q2_shared;
+	u32 log_start;
+	u32 log_size;
+	u8 host_enaddr[8];
+	u8 rdma_enaddr[8];
+	u32 crash_entry;
+	u32 crash_ready[2];
+	u32 fw_txd_cur;
+	u32 fw_hrxd_cur;
+	u32 fw_rxd_cur;
+};
+
+enum pci_regs {
+	C2_HISR = 0x0000,
+	C2_DISR = 0x0004,
+	C2_HIMR = 0x0008,
+	C2_DIMR = 0x000C,
+	C2_NISR0 = 0x0010,
+	C2_NISR1 = 0x0014,
+	C2_NIMR0 = 0x0018,
+	C2_NIMR1 = 0x001C,
+	C2_IDIS = 0x0020,
+};
+
+enum {
+	C2_PCI_HRX_INT = 1 << 8,
+	C2_PCI_HTX_INT = 1 << 17,
+	C2_PCI_HRX_QUI = 1 << 31,
+};
+
+/*
+ * Cepheus registers in BAR0.
+ */
+struct c2_pci_regs {
+	u32 hostisr;
+	u32 dmaisr;
+	u32 hostimr;
+	u32 dmaimr;
+	u32 netisr0;
+	u32 netisr1;
+	u32 netimr0;
+	u32 netimr1;
+	u32 int_disable;
+};
+
+/* TXP flags */
+enum c2_txp_flags {
+	TXP_HTXD_DONE = 0,
+	TXP_HTXD_READY = 1 << 0,
+	TXP_HTXD_UNINIT = 1 << 1,
+};
+
+/* RXP flags */
+enum c2_rxp_flags {
+	RXP_HRXD_UNINIT = 0,
+	RXP_HRXD_READY = 1 << 0,
+	RXP_HRXD_DONE = 1 << 1,
+};
+
+/* RXP status */
+enum c2_rxp_status {
+	RXP_HRXD_ZERO = 0,
+	RXP_HRXD_OK = 1 << 0,
+	RXP_HRXD_BUF_OV = 1 << 1,
+};
+
+/* TXP descriptor fields */
+enum txp_desc {
+	C2_TXP_FLAGS = 0x0000,
+	C2_TXP_LEN = 0x0002,
+	C2_TXP_ADDR = 0x0004,
+};
+
+/* RXP descriptor fields */
+enum rxp_desc {
+	C2_RXP_FLAGS = 0x0000,
+	C2_RXP_STATUS = 0x0002,
+	C2_RXP_COUNT = 0x0004,
+	C2_RXP_LEN = 0x0006,
+	C2_RXP_ADDR = 0x0008,
+};
+
+struct c2_txp_desc {
+	u16 flags;
+	u16 len;
+	u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_desc {
+	u16 flags;
+	u16 status;
+	u16 count;
+	u16 len;
+	u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_hdr {
+	u16 flags;
+	u16 status;
+	u16 len;
+	u16 rsvd;
+} __attribute__ ((packed));
+
+struct c2_tx_desc {
+	u32 len;
+	u32 status;
+	dma_addr_t next_offset;
+};
+
+struct c2_rx_desc {
+	u32 len;
+	u32 status;
+	dma_addr_t next_offset;
+};
+
+struct c2_alloc {
+	u32 last;
+	u32 max;
+	spinlock_t lock;
+	unsigned long *table;
+};
+
+struct c2_array {
+	struct {
+		void **page;
+		int used;
+	} *page_list;
+};
+
+/*
+ * The MQ shared pointer pool is organized as a linked list of
+ * chunks. Each chunk contains a linked list of free shared pointers
+ * that can be allocated to a given user mode client.
+ *
+ */
+struct sp_chunk {
+	struct sp_chunk *next;
+	dma_addr_t dma_addr;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	u16 head;
+	u16 shared_ptr[0];
+};
+
+struct c2_pd_table {
+	u32 last;
+	u32 max;
+	spinlock_t lock;
+	unsigned long *table;
+};
+
+struct c2_qp_table {
+	struct idr idr;
+	spinlock_t lock;
+	int last;
+};
+
+struct c2_element {
+	struct c2_element *next;
+	void *ht_desc;		/* host     descriptor */
+	void __iomem *hw_desc;	/* hardware descriptor */
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen;
+};
+
+struct c2_ring {
+	struct c2_element *to_clean;
+	struct c2_element *to_use;
+	struct c2_element *start;
+	unsigned long count;
+};
+
+struct c2_dev {
+	struct ib_device ibdev;
+	void __iomem *regs;
+	void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
+	void __iomem *mmio_rxp_ring;
+	spinlock_t lock;
+	struct pci_dev *pcidev;
+	struct net_device *netdev;
+	struct net_device *pseudo_netdev;
+	unsigned int cur_tx;
+	unsigned int cur_rx;
+	u32 adapter_handle;
+	int device_cap_flags;
+	void __iomem *kva;	/* KVA device memory */
+	unsigned long pa;	/* PA device memory */
+	void **qptr_array;
+
+	kmem_cache_t *host_msg_cache;
+
+	struct list_head cca_link;		/* adapter list */
+	struct list_head eh_wakeup_list;	/* event wakeup list */
+	wait_queue_head_t req_vq_wo;
+
+	/* Cached RNIC properties */
+	struct ib_device_attr props;
+
+	struct c2_pd_table pd_table;
+	struct c2_qp_table qp_table;
+	int ports;		/* num of GigE ports */
+	int devnum;
+	spinlock_t vqlock;	/* sync vbs req MQ */
+
+	/* Verbs Queues */
+	struct c2_mq req_vq;	/* Verbs Request MQ */
+	struct c2_mq rep_vq;	/* Verbs Reply MQ */
+	struct c2_mq aeq;	/* Async Events MQ */
+
+	/* Kernel client MQs */
+	struct sp_chunk *kern_mqsp_pool;
+
+	/* Device updates these values when posting messages to a host
+	 * target queue */
+	u16 req_vq_shared;
+	u16 rep_vq_shared;
+	u16 aeq_shared;
+	u16 irq_claimed;
+
+	/*
+	 * Shared host target pages for user-accessible MQs.
+	 */
+	int hthead;		/* index of first free entry */
+	void *htpages;		/* kernel vaddr */
+	int htlen;		/* length of htpages memory */
+	void *htuva;		/* user mapped vaddr */
+	spinlock_t htlock;	/* serialize allocation */
+
+	u64 adapter_hint_uva;	/* access to the activity FIFO */
+
+	//	spinlock_t aeq_lock;
+	//	spinlock_t rnic_lock;
+
+	u16 *hint_count;
+	dma_addr_t hint_count_dma;
+	u16 hints_read;
+
+	int init;		/* TRUE if it's ready */
+	char ae_cache_name[16];
+	char vq_cache_name[16];
+};
+
+struct c2_port {
+	u32 msg_enable;
+	struct c2_dev *c2dev;
+	struct net_device *netdev;
+
+	spinlock_t tx_lock;
+	u32 tx_avail;
+	struct c2_ring tx_ring;
+	struct c2_ring rx_ring;
+
+	void *mem;		/* PCI memory for host rings */
+	dma_addr_t dma;
+	unsigned long mem_size;
+
+	u32 rx_buf_size;
+
+	struct net_device_stats netstats;
+};
+
+/*
+ * Activity FIFO registers in BAR0.
+ */
+#define PCI_BAR0_HOST_HINT	0x100
+#define PCI_BAR0_ADAPTER_HINT	0x2000
+
+/*
+ * Ammasso PCI vendor id and Cepheus PCI device id.
+ */
+#define CQ_ARMED 	0x01
+#define CQ_WAIT_FOR_DMA	0x80
+
+/*
+ * The format of a hint is as follows:
+ * Lower 16 bits are the count of hints for the queue.
+ * Next 15 bits are the qp_index
+ * Upper most bit depends on who reads it:
+ *    If read by producer, then it means Full (1) or Not-Full (0)
+ *    If read by consumer, then it means Empty (1) or Not-Empty (0)
+ */
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+
+/*
+ * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
+ * struct.
+ */
+#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
+
+#ifndef readq
+static inline u64 readq(const void __iomem * addr)
+{
+	u64 ret = readl(addr + 4);
+	ret <<= 32;
+	ret |= readl(addr);
+
+	return ret;
+}
+#endif
+
+#ifndef writeq
+static inline void __raw_writeq(u64 val, void __iomem * addr)
+{
+	__raw_writel((u32) (val), addr);
+	__raw_writel((u32) (val >> 32), (addr + 4));
+}
+#endif
+
+#define C2_SET_CUR_RX(c2dev, cur_rx) \
+	__raw_writel(cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
+
+#define C2_GET_CUR_RX(c2dev) \
+	be32_to_cpu(readl(c2dev->mmio_txp_ring + 4092))
+
+static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct c2_dev, ibdev);
+}
+
+static inline int c2_errno(void *reply)
+{
+	switch (c2_wr_get_result(reply)) {
+	case C2_OK:
+		return 0;
+	case CCERR_NO_BUFS:
+	case CCERR_INSUFFICIENT_RESOURCES:
+	case CCERR_ZERO_RDMA_READ_RESOURCES:
+		return -ENOMEM;
+	case CCERR_MR_IN_USE:
+	case CCERR_QP_IN_USE:
+		return -EBUSY;
+	case CCERR_ADDR_IN_USE:
+		return -EADDRINUSE;
+	case CCERR_ADDR_NOT_AVAIL:
+		return -EADDRNOTAVAIL;
+	case CCERR_CONN_RESET:
+		return -ECONNRESET;
+	case CCERR_NOT_IMPLEMENTED:
+	case CCERR_INVALID_WQE:
+		return -ENOSYS;
+	case CCERR_QP_NOT_PRIVILEGED:
+		return -EPERM;
+	case CCERR_STACK_ERROR:
+		return -EPROTO;
+	case CCERR_ACCESS_VIOLATION:
+	case CCERR_BASE_AND_BOUNDS_VIOLATION:
+		return -EFAULT;
+	case CCERR_STAG_STATE_NOT_INVALID:
+	case CCERR_INVALID_ADDRESS:
+	case CCERR_INVALID_CQ:
+	case CCERR_INVALID_EP:
+	case CCERR_INVALID_MODIFIER:
+	case CCERR_INVALID_MTU:
+	case CCERR_INVALID_PD_ID:
+	case CCERR_INVALID_QP:
+	case CCERR_INVALID_RNIC:
+	case CCERR_INVALID_STAG:
+		return -EINVAL;
+	default:
+		return -EAGAIN;
+	}
+}
+
+/* Device */
+extern int c2_register_device(struct c2_dev *c2dev);
+extern void c2_unregister_device(struct c2_dev *c2dev);
+extern int c2_rnic_init(struct c2_dev *c2dev);
+extern void c2_rnic_term(struct c2_dev *c2dev);
+extern void c2_rnic_interrupt(struct c2_dev *c2dev);
+extern int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+extern int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+
+/* QPs */
+extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
+		       struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
+extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
+extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
+extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+			struct ib_qp_attr *attr, int attr_mask);
+extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+				 int ord, int ird);
+extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+			struct ib_send_wr **bad_wr);
+extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+			   struct ib_recv_wr **bad_wr);
+extern void __devinit c2_init_qp_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev);
+extern void c2_set_qp_state(struct c2_qp *, int);
+extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
+
+/* PDs */
+extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
+extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
+extern int __devinit c2_init_pd_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev);
+
+/* CQs */
+extern int c2_init_cq(struct c2_dev *c2dev, int entries,
+		      struct c2_ucontext *ctx, struct c2_cq *cq);
+extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
+extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
+extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
+extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
+
+/* CM */
+extern int c2_llp_connect(struct iw_cm_id *cm_id,
+			  struct iw_cm_conn_param *iw_param);
+extern int c2_llp_accept(struct iw_cm_id *cm_id,
+			 struct iw_cm_conn_param *iw_param);
+extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
+			 u8 pdata_len);
+extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
+extern int c2_llp_service_destroy(struct iw_cm_id *cm_id);
+
+/* MM */
+extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ 				      int page_size, int pbl_depth, u32 length,
+ 				      u32 off, u64 *va, enum c2_acf acf,
+				      struct c2_mr *mr);
+extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
+
+/* AE */
+extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
+
+/* MQSP Allocator */
+extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+			     struct sp_chunk **root);
+extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
+extern u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+			  dma_addr_t *dma_addr, gfp_t gfp_mask);
+extern void c2_free_mqsp(u16 * mqsp);
+#endif
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
new file mode 100644
index 0000000000000..08f46c83a3a43
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_status.h"
+#include "c2_ae.h"
+
+static int c2_convert_cm_status(u32 c2_status)
+{
+	switch (c2_status) {
+	case C2_CONN_STATUS_SUCCESS:
+		return 0;
+	case C2_CONN_STATUS_REJECTED:
+		return -ENETRESET;
+	case C2_CONN_STATUS_REFUSED:
+		return -ECONNREFUSED;
+	case C2_CONN_STATUS_TIMEDOUT:
+		return -ETIMEDOUT;
+	case C2_CONN_STATUS_NETUNREACH:
+		return -ENETUNREACH;
+	case C2_CONN_STATUS_HOSTUNREACH:
+		return -EHOSTUNREACH;
+	case C2_CONN_STATUS_INVALID_RNIC:
+		return -EINVAL;
+	case C2_CONN_STATUS_INVALID_QP:
+		return -EINVAL;
+	case C2_CONN_STATUS_INVALID_QP_STATE:
+		return -EINVAL;
+	case C2_CONN_STATUS_ADDR_NOT_AVAIL:
+		return -EADDRNOTAVAIL;
+	default:
+		printk(KERN_ERR PFX
+		       "%s - Unable to convert CM status: %d\n",
+		       __FUNCTION__, c2_status);
+		return -EIO;
+	}
+}
+
+#ifdef DEBUG
+static const char* to_event_str(int event)
+{
+	static const char* event_str[] = {
+		"CCAE_REMOTE_SHUTDOWN",
+		"CCAE_ACTIVE_CONNECT_RESULTS",
+		"CCAE_CONNECTION_REQUEST",
+		"CCAE_LLP_CLOSE_COMPLETE",
+		"CCAE_TERMINATE_MESSAGE_RECEIVED",
+		"CCAE_LLP_CONNECTION_RESET",
+		"CCAE_LLP_CONNECTION_LOST",
+		"CCAE_LLP_SEGMENT_SIZE_INVALID",
+		"CCAE_LLP_INVALID_CRC",
+		"CCAE_LLP_BAD_FPDU",
+		"CCAE_INVALID_DDP_VERSION",
+		"CCAE_INVALID_RDMA_VERSION",
+		"CCAE_UNEXPECTED_OPCODE",
+		"CCAE_INVALID_DDP_QUEUE_NUMBER",
+		"CCAE_RDMA_READ_NOT_ENABLED",
+		"CCAE_RDMA_WRITE_NOT_ENABLED",
+		"CCAE_RDMA_READ_TOO_SMALL",
+		"CCAE_NO_L_BIT",
+		"CCAE_TAGGED_INVALID_STAG",
+		"CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
+		"CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
+		"CCAE_TAGGED_INVALID_PD",
+		"CCAE_WRAP_ERROR",
+		"CCAE_BAD_CLOSE",
+		"CCAE_BAD_LLP_CLOSE",
+		"CCAE_INVALID_MSN_RANGE",
+		"CCAE_INVALID_MSN_GAP",
+		"CCAE_IRRQ_OVERFLOW",
+		"CCAE_IRRQ_MSN_GAP",
+		"CCAE_IRRQ_MSN_RANGE",
+		"CCAE_IRRQ_INVALID_STAG",
+		"CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
+		"CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
+		"CCAE_IRRQ_INVALID_PD",
+		"CCAE_IRRQ_WRAP_ERROR",
+		"CCAE_CQ_SQ_COMPLETION_OVERFLOW",
+		"CCAE_CQ_RQ_COMPLETION_ERROR",
+		"CCAE_QP_SRQ_WQE_ERROR",
+		"CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
+		"CCAE_CQ_OVERFLOW",
+		"CCAE_CQ_OPERATION_ERROR",
+		"CCAE_SRQ_LIMIT_REACHED",
+		"CCAE_QP_RQ_LIMIT_REACHED",
+		"CCAE_SRQ_CATASTROPHIC_ERROR",
+		"CCAE_RNIC_CATASTROPHIC_ERROR"
+	};
+
+	if (event < CCAE_REMOTE_SHUTDOWN ||
+	    event > CCAE_RNIC_CATASTROPHIC_ERROR)
+		return "<invalid event>";
+
+	event -= CCAE_REMOTE_SHUTDOWN;
+	return event_str[event];
+}
+
+static const char *to_qp_state_str(int state)
+{
+	switch (state) {
+	case C2_QP_STATE_IDLE:
+		return "C2_QP_STATE_IDLE";
+	case C2_QP_STATE_CONNECTING:
+		return "C2_QP_STATE_CONNECTING";
+	case C2_QP_STATE_RTS:
+		return "C2_QP_STATE_RTS";
+	case C2_QP_STATE_CLOSING:
+		return "C2_QP_STATE_CLOSING";
+	case C2_QP_STATE_TERMINATE:
+		return "C2_QP_STATE_TERMINATE";
+	case C2_QP_STATE_ERROR:
+		return "C2_QP_STATE_ERROR";
+	default:
+		return "<invalid QP state>";
+	};
+}
+#endif
+
+void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
+{
+	struct c2_mq *mq = c2dev->qptr_array[mq_index];
+	union c2wr *wr;
+	void *resource_user_context;
+	struct iw_cm_event cm_event;
+	struct ib_event ib_event;
+	enum c2_resource_indicator resource_indicator;
+	enum c2_event_id event_id;
+	unsigned long flags;
+	int status;
+
+	/*
+	 * retreive the message
+	 */
+	wr = c2_mq_consume(mq);
+	if (!wr)
+		return;
+
+	memset(&ib_event, 0, sizeof(ib_event));
+	memset(&cm_event, 0, sizeof(cm_event));
+
+	event_id = c2_wr_get_id(wr);
+	resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
+	resource_user_context =
+	    (void *) (unsigned long) wr->ae.ae_generic.user_context;
+
+	status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
+
+	pr_debug("event received c2_dev=%p, event_id=%d, "
+		"resource_indicator=%d, user_context=%p, status = %d\n",
+		c2dev, event_id, resource_indicator, resource_user_context,
+		status);
+
+	switch (resource_indicator) {
+	case C2_RES_IND_QP:{
+
+		struct c2_qp *qp = (struct c2_qp *)resource_user_context;
+		struct iw_cm_id *cm_id = qp->cm_id;
+		struct c2wr_ae_active_connect_results *res;
+
+		if (!cm_id) {
+			pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
+				qp);
+			goto ignore_it;
+		}
+		pr_debug("%s: event = %s, user_context=%llx, "
+			"resource_type=%x, "
+			"resource=%x, qp_state=%s\n",
+			__FUNCTION__,
+			to_event_str(event_id),
+			be64_to_cpu(wr->ae.ae_generic.user_context),
+			be32_to_cpu(wr->ae.ae_generic.resource_type),
+			be32_to_cpu(wr->ae.ae_generic.resource),
+			to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
+
+		c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
+
+		switch (event_id) {
+		case CCAE_ACTIVE_CONNECT_RESULTS:
+			res = &wr->ae.ae_active_connect_results;
+			cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+			cm_event.local_addr.sin_addr.s_addr = res->laddr;
+			cm_event.remote_addr.sin_addr.s_addr = res->raddr;
+			cm_event.local_addr.sin_port = res->lport;
+			cm_event.remote_addr.sin_port =	res->rport;
+			if (status == 0) {
+				cm_event.private_data_len =
+					be32_to_cpu(res->private_data_length);
+				cm_event.private_data = res->private_data;
+			} else {
+				spin_lock_irqsave(&qp->lock, flags);
+				if (qp->cm_id) {
+					qp->cm_id->rem_ref(qp->cm_id);
+					qp->cm_id = NULL;
+				}
+				spin_unlock_irqrestore(&qp->lock, flags);
+				cm_event.private_data_len = 0;
+				cm_event.private_data = NULL;
+			}
+			if (cm_id->event_handler)
+				cm_id->event_handler(cm_id, &cm_event);
+			break;
+		case CCAE_TERMINATE_MESSAGE_RECEIVED:
+		case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
+			ib_event.device = &c2dev->ibdev;
+			ib_event.element.qp = &qp->ibqp;
+			ib_event.event = IB_EVENT_QP_REQ_ERR;
+
+			if (qp->ibqp.event_handler)
+				qp->ibqp.event_handler(&ib_event,
+						       qp->ibqp.
+						       qp_context);
+			break;
+		case CCAE_BAD_CLOSE:
+		case CCAE_LLP_CLOSE_COMPLETE:
+		case CCAE_LLP_CONNECTION_RESET:
+		case CCAE_LLP_CONNECTION_LOST:
+			BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
+
+			spin_lock_irqsave(&qp->lock, flags);
+			if (qp->cm_id) {
+				qp->cm_id->rem_ref(qp->cm_id);
+				qp->cm_id = NULL;
+			}
+			spin_unlock_irqrestore(&qp->lock, flags);
+			cm_event.event = IW_CM_EVENT_CLOSE;
+			cm_event.status = 0;
+			if (cm_id->event_handler)
+				cm_id->event_handler(cm_id, &cm_event);
+			break;
+		default:
+			BUG_ON(1);
+			pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
+				"CM_ID=%p\n",
+				__FUNCTION__, __LINE__,
+				event_id, qp, cm_id);
+			break;
+		}
+		break;
+	}
+
+	case C2_RES_IND_EP:{
+
+		struct c2wr_ae_connection_request *req =
+			&wr->ae.ae_connection_request;
+		struct iw_cm_id *cm_id =
+			(struct iw_cm_id *)resource_user_context;
+
+		pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
+		if (event_id != CCAE_CONNECTION_REQUEST) {
+			pr_debug("%s: Invalid event_id: %d\n",
+				__FUNCTION__, event_id);
+			break;
+		}
+		cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
+		cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
+		cm_event.local_addr.sin_addr.s_addr = req->laddr;
+		cm_event.remote_addr.sin_addr.s_addr = req->raddr;
+		cm_event.local_addr.sin_port = req->lport;
+		cm_event.remote_addr.sin_port = req->rport;
+		cm_event.private_data_len =
+			be32_to_cpu(req->private_data_length);
+		cm_event.private_data = req->private_data;
+
+		if (cm_id->event_handler)
+			cm_id->event_handler(cm_id, &cm_event);
+		break;
+	}
+
+	case C2_RES_IND_CQ:{
+		struct c2_cq *cq =
+		    (struct c2_cq *) resource_user_context;
+
+		pr_debug("IB_EVENT_CQ_ERR\n");
+		ib_event.device = &c2dev->ibdev;
+		ib_event.element.cq = &cq->ibcq;
+		ib_event.event = IB_EVENT_CQ_ERR;
+
+		if (cq->ibcq.event_handler)
+			cq->ibcq.event_handler(&ib_event,
+					       cq->ibcq.cq_context);
+	}
+
+	default:
+		printk("Bad resource indicator = %d\n",
+		       resource_indicator);
+		break;
+	}
+
+ ignore_it:
+	c2_mq_free(mq);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.h b/drivers/infiniband/hw/amso1100/c2_ae.h
new file mode 100644
index 0000000000000..3a065c33b83b9
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_ae.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_AE_H_
+#define _C2_AE_H_
+
+/*
+ * WARNING: If you change this file, also bump C2_IVN_BASE
+ * in common/include/clustercore/c2_ivn.h.
+ */
+
+/*
+ * Asynchronous Event Identifiers
+ *
+ * These start at 0x80 only so it's obvious from inspection that
+ * they are not work-request statuses.  This isn't critical.
+ *
+ * NOTE: these event id's must fit in eight bits.
+ */
+enum c2_event_id {
+	CCAE_REMOTE_SHUTDOWN = 0x80,
+	CCAE_ACTIVE_CONNECT_RESULTS,
+	CCAE_CONNECTION_REQUEST,
+	CCAE_LLP_CLOSE_COMPLETE,
+	CCAE_TERMINATE_MESSAGE_RECEIVED,
+	CCAE_LLP_CONNECTION_RESET,
+	CCAE_LLP_CONNECTION_LOST,
+	CCAE_LLP_SEGMENT_SIZE_INVALID,
+	CCAE_LLP_INVALID_CRC,
+	CCAE_LLP_BAD_FPDU,
+	CCAE_INVALID_DDP_VERSION,
+	CCAE_INVALID_RDMA_VERSION,
+	CCAE_UNEXPECTED_OPCODE,
+	CCAE_INVALID_DDP_QUEUE_NUMBER,
+	CCAE_RDMA_READ_NOT_ENABLED,
+	CCAE_RDMA_WRITE_NOT_ENABLED,
+	CCAE_RDMA_READ_TOO_SMALL,
+	CCAE_NO_L_BIT,
+	CCAE_TAGGED_INVALID_STAG,
+	CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
+	CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
+	CCAE_TAGGED_INVALID_PD,
+	CCAE_WRAP_ERROR,
+	CCAE_BAD_CLOSE,
+	CCAE_BAD_LLP_CLOSE,
+	CCAE_INVALID_MSN_RANGE,
+	CCAE_INVALID_MSN_GAP,
+	CCAE_IRRQ_OVERFLOW,
+	CCAE_IRRQ_MSN_GAP,
+	CCAE_IRRQ_MSN_RANGE,
+	CCAE_IRRQ_INVALID_STAG,
+	CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
+	CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
+	CCAE_IRRQ_INVALID_PD,
+	CCAE_IRRQ_WRAP_ERROR,
+	CCAE_CQ_SQ_COMPLETION_OVERFLOW,
+	CCAE_CQ_RQ_COMPLETION_ERROR,
+	CCAE_QP_SRQ_WQE_ERROR,
+	CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
+	CCAE_CQ_OVERFLOW,
+	CCAE_CQ_OPERATION_ERROR,
+	CCAE_SRQ_LIMIT_REACHED,
+	CCAE_QP_RQ_LIMIT_REACHED,
+	CCAE_SRQ_CATASTROPHIC_ERROR,
+	CCAE_RNIC_CATASTROPHIC_ERROR
+/* WARNING If you add more id's, make sure their values fit in eight bits. */
+};
+
+/*
+ * Resource Indicators and Identifiers
+ */
+enum c2_resource_indicator {
+	C2_RES_IND_QP = 1,
+	C2_RES_IND_EP,
+	C2_RES_IND_CQ,
+	C2_RES_IND_SRQ,
+};
+
+#endif /* _C2_AE_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c
new file mode 100644
index 0000000000000..1d2529992c0c9
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_alloc.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "c2.h"
+
+static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
+			       struct sp_chunk **head)
+{
+	int i;
+	struct sp_chunk *new_head;
+
+	new_head = (struct sp_chunk *) __get_free_page(gfp_mask);
+	if (new_head == NULL)
+		return -ENOMEM;
+
+	new_head->dma_addr = dma_map_single(c2dev->ibdev.dma_device, new_head,
+					    PAGE_SIZE, DMA_FROM_DEVICE);
+	pci_unmap_addr_set(new_head, mapping, new_head->dma_addr);
+
+	new_head->next = NULL;
+	new_head->head = 0;
+
+	/* build list where each index is the next free slot */
+	for (i = 0;
+	     i < (PAGE_SIZE - sizeof(struct sp_chunk) -
+		  sizeof(u16)) / sizeof(u16) - 1;
+	     i++) {
+		new_head->shared_ptr[i] = i + 1;
+	}
+	/* terminate list */
+	new_head->shared_ptr[i] = 0xFFFF;
+
+	*head = new_head;
+	return 0;
+}
+
+int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+		      struct sp_chunk **root)
+{
+	return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
+}
+
+void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
+{
+	struct sp_chunk *next;
+
+	while (root) {
+		next = root->next;
+		dma_unmap_single(c2dev->ibdev.dma_device,
+				 pci_unmap_addr(root, mapping), PAGE_SIZE,
+			         DMA_FROM_DEVICE);
+		__free_page((struct page *) root);
+		root = next;
+	}
+}
+
+u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+		   dma_addr_t *dma_addr, gfp_t gfp_mask)
+{
+	u16 mqsp;
+
+	while (head) {
+		mqsp = head->head;
+		if (mqsp != 0xFFFF) {
+			head->head = head->shared_ptr[mqsp];
+			break;
+		} else if (head->next == NULL) {
+			if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
+			    0) {
+				head = head->next;
+				mqsp = head->head;
+				head->head = head->shared_ptr[mqsp];
+				break;
+			} else
+				return NULL;
+		} else
+			head = head->next;
+	}
+	if (head) {
+		*dma_addr = head->dma_addr +
+			    ((unsigned long) &(head->shared_ptr[mqsp]) -
+			     (unsigned long) head);
+		pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__,
+			 &(head->shared_ptr[mqsp]), (u64)*dma_addr);
+		return &(head->shared_ptr[mqsp]);
+	}
+	return NULL;
+}
+
+void c2_free_mqsp(u16 * mqsp)
+{
+	struct sp_chunk *head;
+	u16 idx;
+
+	/* The chunk containing this ptr begins at the page boundary */
+	head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
+
+	/* Link head to new mqsp */
+	*mqsp = head->head;
+
+	/* Compute the shared_ptr index */
+	idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1;
+	idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
+
+	/* Point this index at the head */
+	head->shared_ptr[idx] = head->head;
+
+	/* Point head at this index */
+	head->head = idx;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c
new file mode 100644
index 0000000000000..485254efdd1e5
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_cm.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_vq.h"
+#include <rdma/iw_cm.h>
+
+int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct c2_dev *c2dev = to_c2dev(cm_id->device);
+	struct ib_qp *ibqp;
+	struct c2_qp *qp;
+	struct c2wr_qp_connect_req *wr;	/* variable size needs a malloc. */
+	struct c2_vq_req *vq_req;
+	int err;
+
+	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	qp = to_c2qp(ibqp);
+
+	/* Associate QP <--> CM_ID */
+	cm_id->provider_data = qp;
+	cm_id->add_ref(cm_id);
+	qp->cm_id = cm_id;
+
+	/*
+	 * only support the max private_data length
+	 */
+	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+		err = -EINVAL;
+		goto bail0;
+	}
+	/*
+	 * Set the rdma read limits
+	 */
+	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Create and send a WR_QP_CONNECT...
+	 */
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	c2_wr_set_id(wr, CCWR_QP_CONNECT);
+	wr->hdr.context = 0;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->qp_handle = qp->adapter_handle;
+
+	wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr;
+	wr->remote_port = cm_id->remote_addr.sin_port;
+
+	/*
+	 * Move any private data from the callers's buf into
+	 * the WR.
+	 */
+	if (iw_param->private_data) {
+		wr->private_data_length =
+			cpu_to_be32(iw_param->private_data_len);
+		memcpy(&wr->private_data[0], iw_param->private_data,
+		       iw_param->private_data_len);
+	} else
+		wr->private_data_length = 0;
+
+	/*
+	 * Send WR to adapter.  NOTE: There is no synch reply from
+	 * the adapter.
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	vq_req_free(c2dev, vq_req);
+
+ bail1:
+	kfree(wr);
+ bail0:
+	if (err) {
+		/*
+		 * If we fail, release reference on QP and
+		 * disassociate QP from CM_ID
+		 */
+		cm_id->provider_data = NULL;
+		qp->cm_id = NULL;
+		cm_id->rem_ref(cm_id);
+	}
+	return err;
+}
+
+int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+	struct c2_dev *c2dev;
+	struct c2wr_ep_listen_create_req wr;
+	struct c2wr_ep_listen_create_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+	if (c2dev == NULL)
+		return -EINVAL;
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
+	wr.hdr.context = (u64) (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.local_addr = cm_id->local_addr.sin_addr.s_addr;
+	wr.local_port = cm_id->local_addr.sin_port;
+	wr.backlog = cpu_to_be32(backlog);
+	wr.user_context = (u64) (unsigned long) cm_id;
+
+	/*
+	 * Reference the request struct.  Dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply =
+	    (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	if ((err = c2_errno(reply)) != 0)
+		goto bail1;
+
+	/*
+	 * Keep the adapter handle. Used in subsequent destroy
+	 */
+	cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
+
+	/*
+	 * free vq stuff
+	 */
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	return 0;
+
+ bail1:
+	vq_repbuf_free(c2dev, reply);
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+
+int c2_llp_service_destroy(struct iw_cm_id *cm_id)
+{
+
+	struct c2_dev *c2dev;
+	struct c2wr_ep_listen_destroy_req wr;
+	struct c2wr_ep_listen_destroy_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+	if (c2dev == NULL)
+		return -EINVAL;
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+	if ((err = c2_errno(reply)) != 0)
+		goto bail1;
+
+ bail1:
+	vq_repbuf_free(c2dev, reply);
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct c2_dev *c2dev = to_c2dev(cm_id->device);
+	struct c2_qp *qp;
+	struct ib_qp *ibqp;
+	struct c2wr_cr_accept_req *wr;	/* variable length WR */
+	struct c2_vq_req *vq_req;
+	struct c2wr_cr_accept_rep *reply;	/* VQ Reply msg ptr. */
+	int err;
+
+	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	qp = to_c2qp(ibqp);
+
+	/* Set the RDMA read limits */
+	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+	if (err)
+		goto bail0;
+
+	/* Allocate verbs request. */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	vq_req->qp = qp;
+	vq_req->cm_id = cm_id;
+	vq_req->event = IW_CM_EVENT_ESTABLISHED;
+
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	/* Build the WR */
+	c2_wr_set_id(wr, CCWR_CR_ACCEPT);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
+	wr->qp_handle = qp->adapter_handle;
+
+	/* Replace the cr_handle with the QP after accept */
+	cm_id->provider_data = qp;
+	cm_id->add_ref(cm_id);
+	qp->cm_id = cm_id;
+
+	cm_id->provider_data = qp;
+
+	/* Validate private_data length */
+	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+		err = -EINVAL;
+		goto bail2;
+	}
+
+	if (iw_param->private_data) {
+		wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
+		memcpy(&wr->private_data[0],
+		       iw_param->private_data, iw_param->private_data_len);
+	} else
+		wr->private_data_length = 0;
+
+	/* Reference the request struct.  Dereferenced in the int handler. */
+	vq_req_get(c2dev, vq_req);
+
+	/* Send WR to adapter */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail2;
+	}
+
+	/* Wait for reply from adapter */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail2;
+
+	/* Check that reply is present */
+	reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+	if (!err)
+		c2_set_qp_state(qp, C2_QP_STATE_RTS);
+ bail2:
+	kfree(wr);
+ bail1:
+	vq_req_free(c2dev, vq_req);
+ bail0:
+	if (err) {
+		/*
+		 * If we fail, release reference on QP and
+		 * disassociate QP from CM_ID
+		 */
+		cm_id->provider_data = NULL;
+		qp->cm_id = NULL;
+		cm_id->rem_ref(cm_id);
+	}
+	return err;
+}
+
+int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	struct c2_dev *c2dev;
+	struct c2wr_cr_reject_req wr;
+	struct c2_vq_req *vq_req;
+	struct c2wr_cr_reject_rep *reply;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_CR_REJECT);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_cr_reject_rep *) (unsigned long)
+		vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+	err = c2_errno(reply);
+	/*
+	 * free vq stuff
+	 */
+	vq_repbuf_free(c2dev, reply);
+
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
new file mode 100644
index 0000000000000..9d7bcc5ade93d
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
+
+static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
+{
+	struct c2_cq *cq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2dev->lock, flags);
+	cq = c2dev->qptr_array[cqn];
+	if (!cq) {
+		spin_unlock_irqrestore(&c2dev->lock, flags);
+		return NULL;
+	}
+	atomic_inc(&cq->refcount);
+	spin_unlock_irqrestore(&c2dev->lock, flags);
+	return cq;
+}
+
+static void c2_cq_put(struct c2_cq *cq)
+{
+	if (atomic_dec_and_test(&cq->refcount))
+		wake_up(&cq->wait);
+}
+
+void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
+{
+	struct c2_cq *cq;
+
+	cq = c2_cq_get(c2dev, mq_index);
+	if (!cq) {
+		printk("discarding events on destroyed CQN=%d\n", mq_index);
+		return;
+	}
+
+	(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+	c2_cq_put(cq);
+}
+
+void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
+{
+	struct c2_cq *cq;
+	struct c2_mq *q;
+
+	cq = c2_cq_get(c2dev, mq_index);
+	if (!cq)
+		return;
+
+	spin_lock_irq(&cq->lock);
+	q = &cq->mq;
+	if (q && !c2_mq_empty(q)) {
+		u16 priv = q->priv;
+		struct c2wr_ce *msg;
+
+		while (priv != be16_to_cpu(*q->shared)) {
+			msg = (struct c2wr_ce *)
+				(q->msg_pool.host + priv * q->msg_size);
+			if (msg->qp_user_context == (u64) (unsigned long) qp) {
+				msg->qp_user_context = (u64) 0;
+			}
+			priv = (priv + 1) % q->q_size;
+		}
+	}
+	spin_unlock_irq(&cq->lock);
+	c2_cq_put(cq);
+}
+
+static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
+{
+	switch (status) {
+	case C2_OK:
+		return IB_WC_SUCCESS;
+	case CCERR_FLUSHED:
+		return IB_WC_WR_FLUSH_ERR;
+	case CCERR_BASE_AND_BOUNDS_VIOLATION:
+		return IB_WC_LOC_PROT_ERR;
+	case CCERR_ACCESS_VIOLATION:
+		return IB_WC_LOC_ACCESS_ERR;
+	case CCERR_TOTAL_LENGTH_TOO_BIG:
+		return IB_WC_LOC_LEN_ERR;
+	case CCERR_INVALID_WINDOW:
+		return IB_WC_MW_BIND_ERR;
+	default:
+		return IB_WC_GENERAL_ERR;
+	}
+}
+
+
+static inline int c2_poll_one(struct c2_dev *c2dev,
+			      struct c2_cq *cq, struct ib_wc *entry)
+{
+	struct c2wr_ce *ce;
+	struct c2_qp *qp;
+	int is_recv = 0;
+
+	ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+	if (!ce) {
+		return -EAGAIN;
+	}
+
+	/*
+	 * if the qp returned is null then this qp has already
+	 * been freed and we are unable process the completion.
+	 * try pulling the next message
+	 */
+	while ((qp =
+		(struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
+		c2_mq_free(&cq->mq);
+		ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+		if (!ce)
+			return -EAGAIN;
+	}
+
+	entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
+	entry->wr_id = ce->hdr.context;
+	entry->qp_num = ce->handle;
+	entry->wc_flags = 0;
+	entry->slid = 0;
+	entry->sl = 0;
+	entry->src_qp = 0;
+	entry->dlid_path_bits = 0;
+	entry->pkey_index = 0;
+
+	switch (c2_wr_get_id(ce)) {
+	case C2_WR_TYPE_SEND:
+		entry->opcode = IB_WC_SEND;
+		break;
+	case C2_WR_TYPE_RDMA_WRITE:
+		entry->opcode = IB_WC_RDMA_WRITE;
+		break;
+	case C2_WR_TYPE_RDMA_READ:
+		entry->opcode = IB_WC_RDMA_READ;
+		break;
+	case C2_WR_TYPE_BIND_MW:
+		entry->opcode = IB_WC_BIND_MW;
+		break;
+	case C2_WR_TYPE_RECV:
+		entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
+		entry->opcode = IB_WC_RECV;
+		is_recv = 1;
+		break;
+	default:
+		break;
+	}
+
+	/* consume the WQEs */
+	if (is_recv)
+		c2_mq_lconsume(&qp->rq_mq, 1);
+	else
+		c2_mq_lconsume(&qp->sq_mq,
+			       be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
+
+	/* free the message */
+	c2_mq_free(&cq->mq);
+
+	return 0;
+}
+
+int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+	struct c2_dev *c2dev = to_c2dev(ibcq->device);
+	struct c2_cq *cq = to_c2cq(ibcq);
+	unsigned long flags;
+	int npolled, err;
+
+	spin_lock_irqsave(&cq->lock, flags);
+
+	for (npolled = 0; npolled < num_entries; ++npolled) {
+
+		err = c2_poll_one(c2dev, cq, entry + npolled);
+		if (err)
+			break;
+	}
+
+	spin_unlock_irqrestore(&cq->lock, flags);
+
+	return npolled;
+}
+
+int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+{
+	struct c2_mq_shared __iomem *shared;
+	struct c2_cq *cq;
+
+	cq = to_c2cq(ibcq);
+	shared = cq->mq.peer;
+
+	if (notify == IB_CQ_NEXT_COMP)
+		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
+	else if (notify == IB_CQ_SOLICITED)
+		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
+	else
+		return -EINVAL;
+
+	writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
+
+	/*
+	 * Now read back shared->armed to make the PCI
+	 * write synchronous.  This is necessary for
+	 * correct cq notification semantics.
+	 */
+	readb(&shared->armed);
+
+	return 0;
+}
+
+static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
+{
+
+	dma_unmap_single(c2dev->ibdev.dma_device, pci_unmap_addr(mq, mapping),
+			 mq->q_size * mq->msg_size, DMA_FROM_DEVICE);
+	free_pages((unsigned long) mq->msg_pool.host,
+		   get_order(mq->q_size * mq->msg_size));
+}
+
+static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
+			   int msg_size)
+{
+	unsigned long pool_start;
+
+	pool_start = __get_free_pages(GFP_KERNEL,
+				      get_order(q_size * msg_size));
+	if (!pool_start)
+		return -ENOMEM;
+
+	c2_mq_rep_init(mq,
+		       0,		/* index (currently unknown) */
+		       q_size,
+		       msg_size,
+		       (u8 *) pool_start,
+		       NULL,	/* peer (currently unknown) */
+		       C2_MQ_HOST_TARGET);
+
+	mq->host_dma = dma_map_single(c2dev->ibdev.dma_device,
+				      (void *)pool_start,
+				      q_size * msg_size, DMA_FROM_DEVICE);
+	pci_unmap_addr_set(mq, mapping, mq->host_dma);
+
+	return 0;
+}
+
+int c2_init_cq(struct c2_dev *c2dev, int entries,
+	       struct c2_ucontext *ctx, struct c2_cq *cq)
+{
+	struct c2wr_cq_create_req wr;
+	struct c2wr_cq_create_rep *reply;
+	unsigned long peer_pa;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	might_sleep();
+
+	cq->ibcq.cqe = entries - 1;
+	cq->is_kernel = !ctx;
+
+	/* Allocate a shared pointer */
+	cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+				      &cq->mq.shared_dma, GFP_KERNEL);
+	if (!cq->mq.shared)
+		return -ENOMEM;
+
+	/* Allocate pages for the message pool */
+	err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
+	if (err)
+		goto bail0;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_CQ_CREATE);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.msg_size = cpu_to_be32(cq->mq.msg_size);
+	wr.depth = cpu_to_be32(cq->mq.q_size);
+	wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
+	wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
+	wr.user_context = (u64) (unsigned long) (cq);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail2;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail2;
+
+	reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	if ((err = c2_errno(reply)) != 0)
+		goto bail3;
+
+	cq->adapter_handle = reply->cq_handle;
+	cq->mq.index = be32_to_cpu(reply->mq_index);
+
+	peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
+	cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
+	if (!cq->mq.peer) {
+		err = -ENOMEM;
+		goto bail3;
+	}
+
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	spin_lock_init(&cq->lock);
+	atomic_set(&cq->refcount, 1);
+	init_waitqueue_head(&cq->wait);
+
+	/*
+	 * Use the MQ index allocated by the adapter to
+	 * store the CQ in the qptr_array
+	 */
+	cq->cqn = cq->mq.index;
+	c2dev->qptr_array[cq->cqn] = cq;
+
+	return 0;
+
+      bail3:
+	vq_repbuf_free(c2dev, reply);
+      bail2:
+	vq_req_free(c2dev, vq_req);
+      bail1:
+	c2_free_cq_buf(c2dev, &cq->mq);
+      bail0:
+	c2_free_mqsp(cq->mq.shared);
+
+	return err;
+}
+
+void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
+{
+	int err;
+	struct c2_vq_req *vq_req;
+	struct c2wr_cq_destroy_req wr;
+	struct c2wr_cq_destroy_rep *reply;
+
+	might_sleep();
+
+	/* Clear CQ from the qptr array */
+	spin_lock_irq(&c2dev->lock);
+	c2dev->qptr_array[cq->mq.index] = NULL;
+	atomic_dec(&cq->refcount);
+	spin_unlock_irq(&c2dev->lock);
+
+	wait_event(cq->wait, !atomic_read(&cq->refcount));
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		goto bail0;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.cq_handle = cq->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+
+	vq_repbuf_free(c2dev, reply);
+      bail1:
+	vq_req_free(c2dev, vq_req);
+      bail0:
+	if (cq->is_kernel) {
+		c2_free_cq_buf(c2dev, &cq->mq);
+	}
+
+	return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
new file mode 100644
index 0000000000000..0d0bc33ca30a6
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_vq.h"
+
+static void handle_mq(struct c2_dev *c2dev, u32 index);
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
+
+/*
+ * Handle RNIC interrupts
+ */
+void c2_rnic_interrupt(struct c2_dev *c2dev)
+{
+	unsigned int mq_index;
+
+	while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
+		mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
+		if (mq_index & 0x80000000) {
+			break;
+		}
+
+		c2dev->hints_read++;
+		handle_mq(c2dev, mq_index);
+	}
+
+}
+
+/*
+ * Top level MQ handler
+ */
+static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
+{
+	if (c2dev->qptr_array[mq_index] == NULL) {
+		pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n",
+			mq_index);
+		return;
+	}
+
+	switch (mq_index) {
+	case (0):
+		/*
+		 * An index of 0 in the activity queue
+		 * indicates the req vq now has messages
+		 * available...
+		 *
+		 * Wake up any waiters waiting on req VQ
+		 * message availability.
+		 */
+		wake_up(&c2dev->req_vq_wo);
+		break;
+	case (1):
+		handle_vq(c2dev, mq_index);
+		break;
+	case (2):
+		/* We have to purge the VQ in case there are pending
+		 * accept reply requests that would result in the
+		 * generation of an ESTABLISHED event. If we don't
+		 * generate these first, a CLOSE event could end up
+		 * being delivered before the ESTABLISHED event.
+		 */
+		handle_vq(c2dev, 1);
+
+		c2_ae_event(c2dev, mq_index);
+		break;
+	default:
+		/* There is no event synchronization between CQ events
+		 * and AE or CM events. In fact, CQE could be
+		 * delivered for all of the I/O up to and including the
+		 * FLUSH for a peer disconenct prior to the ESTABLISHED
+		 * event being delivered to the app. The reason for this
+		 * is that CM events are delivered on a thread, while AE
+		 * and CM events are delivered on interrupt context.
+		 */
+		c2_cq_event(c2dev, mq_index);
+		break;
+	}
+
+	return;
+}
+
+/*
+ * Handles verbs WR replies.
+ */
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
+{
+	void *adapter_msg, *reply_msg;
+	struct c2wr_hdr *host_msg;
+	struct c2wr_hdr tmp;
+	struct c2_mq *reply_vq;
+	struct c2_vq_req *req;
+	struct iw_cm_event cm_event;
+	int err;
+
+	reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index];
+
+	/*
+	 * get next msg from mq_index into adapter_msg.
+	 * don't free it yet.
+	 */
+	adapter_msg = c2_mq_consume(reply_vq);
+	if (adapter_msg == NULL) {
+		return;
+	}
+
+	host_msg = vq_repbuf_alloc(c2dev);
+
+	/*
+	 * If we can't get a host buffer, then we'll still
+	 * wakeup the waiter, we just won't give him the msg.
+	 * It is assumed the waiter will deal with this...
+	 */
+	if (!host_msg) {
+		pr_debug("handle_vq: no repbufs!\n");
+
+		/*
+		 * just copy the WR header into a local variable.
+		 * this allows us to still demux on the context
+		 */
+		host_msg = &tmp;
+		memcpy(host_msg, adapter_msg, sizeof(tmp));
+		reply_msg = NULL;
+	} else {
+		memcpy(host_msg, adapter_msg, reply_vq->msg_size);
+		reply_msg = host_msg;
+	}
+
+	/*
+	 * consume the msg from the MQ
+	 */
+	c2_mq_free(reply_vq);
+
+	/*
+	 * wakeup the waiter.
+	 */
+	req = (struct c2_vq_req *) (unsigned long) host_msg->context;
+	if (req == NULL) {
+		/*
+		 * We should never get here, as the adapter should
+		 * never send us a reply that we're not expecting.
+		 */
+		vq_repbuf_free(c2dev, host_msg);
+		pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
+		return;
+	}
+
+	err = c2_errno(reply_msg);
+	if (!err) switch (req->event) {
+	case IW_CM_EVENT_ESTABLISHED:
+		c2_set_qp_state(req->qp,
+				C2_QP_STATE_RTS);
+	case IW_CM_EVENT_CLOSE:
+
+		/*
+		 * Move the QP to RTS if this is
+		 * the established event
+		 */
+		cm_event.event = req->event;
+		cm_event.status = 0;
+		cm_event.local_addr = req->cm_id->local_addr;
+		cm_event.remote_addr = req->cm_id->remote_addr;
+		cm_event.private_data = NULL;
+		cm_event.private_data_len = 0;
+		req->cm_id->event_handler(req->cm_id, &cm_event);
+		break;
+	default:
+		break;
+	}
+
+	req->reply_msg = (u64) (unsigned long) (reply_msg);
+	atomic_set(&req->reply_ready, 1);
+	wake_up(&req->wait_object);
+
+	/*
+	 * If the request was cancelled, then this put will
+	 * free the vq_req memory...and reply_msg!!!
+	 */
+	vq_req_put(c2dev, req);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c
new file mode 100644
index 0000000000000..1e4f46493fcb3
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mm.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_vq.h"
+
+#define PBL_VIRT 1
+#define PBL_PHYS 2
+
+/*
+ * Send all the PBL messages to convey the remainder of the PBL
+ * Wait for the adapter's reply on the last one.
+ * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
+ *
+ * NOTE:  vq_req is _not_ freed by this function.  The VQ Host
+ *	  Reply buffer _is_ freed by this function.
+ */
+static int
+send_pbl_messages(struct c2_dev *c2dev, u32 stag_index,
+		  unsigned long va, u32 pbl_depth,
+		  struct c2_vq_req *vq_req, int pbl_type)
+{
+	u32 pbe_count;		/* amt that fits in a PBL msg */
+	u32 count;		/* amt in this PBL MSG. */
+	struct c2wr_nsmr_pbl_req *wr;	/* PBL WR ptr */
+	struct c2wr_nsmr_pbl_rep *reply;	/* reply ptr */
+ 	int err, pbl_virt, pbl_index, i;
+
+	switch (pbl_type) {
+	case PBL_VIRT:
+		pbl_virt = 1;
+		break;
+	case PBL_PHYS:
+		pbl_virt = 0;
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	pbe_count = (c2dev->req_vq.msg_size -
+		     sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		return -ENOMEM;
+	}
+	c2_wr_set_id(wr, CCWR_NSMR_PBL);
+
+	/*
+	 * Only the last PBL message will generate a reply from the verbs,
+	 * so we set the context to 0 indicating there is no kernel verbs
+	 * handler blocked awaiting this reply.
+	 */
+	wr->hdr.context = 0;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->stag_index = stag_index;	/* already swapped */
+	wr->flags = 0;
+	pbl_index = 0;
+	while (pbl_depth) {
+		count = min(pbe_count, pbl_depth);
+		wr->addrs_length = cpu_to_be32(count);
+
+		/*
+		 *  If this is the last message, then reference the
+		 *  vq request struct cuz we're gonna wait for a reply.
+		 *  also make this PBL msg as the last one.
+		 */
+		if (count == pbl_depth) {
+			/*
+			 * reference the request struct.  dereferenced in the
+			 * int handler.
+			 */
+			vq_req_get(c2dev, vq_req);
+			wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
+
+			/*
+			 * This is the last PBL message.
+			 * Set the context to our VQ Request Object so we can
+			 * wait for the reply.
+			 */
+			wr->hdr.context = (unsigned long) vq_req;
+		}
+
+		/*
+		 * If pbl_virt is set then va is a virtual address
+		 * that describes a virtually contiguous memory
+		 * allocation. The wr needs the start of each virtual page
+		 * to be converted to the corresponding physical address
+		 * of the page. If pbl_virt is not set then va is an array
+		 * of physical addresses and there is no conversion to do.
+		 * Just fill in the wr with what is in the array.
+		 */
+		for (i = 0; i < count; i++) {
+			if (pbl_virt) {
+				va += PAGE_SIZE;
+			} else {
+ 				wr->paddrs[i] =
+				    cpu_to_be64(((u64 *)va)[pbl_index + i]);
+			}
+		}
+
+		/*
+		 * Send WR to adapter
+		 */
+		err = vq_send_wr(c2dev, (union c2wr *) wr);
+		if (err) {
+			if (count <= pbe_count) {
+				vq_req_put(c2dev, vq_req);
+			}
+			goto bail0;
+		}
+		pbl_depth -= count;
+		pbl_index += count;
+	}
+
+	/*
+	 *  Now wait for the reply...
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	kfree(wr);
+	return err;
+}
+
+#define C2_PBL_MAX_DEPTH 131072
+int
+c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ 			   int page_size, int pbl_depth, u32 length,
+ 			   u32 offset, u64 *va, enum c2_acf acf,
+			   struct c2_mr *mr)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_nsmr_register_req *wr;
+	struct c2wr_nsmr_register_rep *reply;
+	u16 flags;
+	int i, pbe_count, count;
+	int err;
+
+	if (!va || !length || !addr_list || !pbl_depth)
+		return -EINTR;
+
+	/*
+	 * Verify PBL depth is within rnic max
+	 */
+	if (pbl_depth > C2_PBL_MAX_DEPTH) {
+		return -EINTR;
+	}
+
+	/*
+	 * allocate verbs request object
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	/*
+	 * build the WR
+	 */
+	c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+
+	flags = (acf | MEM_VA_BASED | MEM_REMOTE);
+
+	/*
+	 * compute how many pbes can fit in the message
+	 */
+	pbe_count = (c2dev->req_vq.msg_size -
+		     sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
+
+	if (pbl_depth <= pbe_count) {
+		flags |= MEM_PBL_COMPLETE;
+	}
+	wr->flags = cpu_to_be16(flags);
+	wr->stag_key = 0;	//stag_key;
+	wr->va = cpu_to_be64(*va);
+	wr->pd_id = mr->pd->pd_id;
+	wr->pbe_size = cpu_to_be32(page_size);
+	wr->length = cpu_to_be32(length);
+	wr->pbl_depth = cpu_to_be32(pbl_depth);
+	wr->fbo = cpu_to_be32(offset);
+	count = min(pbl_depth, pbe_count);
+	wr->addrs_length = cpu_to_be32(count);
+
+	/*
+	 * fill out the PBL for this message
+	 */
+	for (i = 0; i < count; i++) {
+		wr->paddrs[i] = cpu_to_be64(addr_list[i]);
+	}
+
+	/*
+	 * regerence the request struct
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * send the WR to the adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	/*
+	 * wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail1;
+	}
+
+	/*
+	 * process reply
+	 */
+	reply =
+	    (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	if ((err = c2_errno(reply))) {
+		goto bail2;
+	}
+	//*p_pb_entries = be32_to_cpu(reply->pbl_depth);
+	mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
+	vq_repbuf_free(c2dev, reply);
+
+	/*
+	 * if there are still more PBEs we need to send them to
+	 * the adapter and wait for a reply on the final one.
+	 * reuse vq_req for this purpose.
+	 */
+	pbl_depth -= count;
+	if (pbl_depth) {
+
+		vq_req->reply_msg = (unsigned long) NULL;
+		atomic_set(&vq_req->reply_ready, 0);
+		err = send_pbl_messages(c2dev,
+					cpu_to_be32(mr->ibmr.lkey),
+					(unsigned long) &addr_list[i],
+					pbl_depth, vq_req, PBL_PHYS);
+		if (err) {
+			goto bail1;
+		}
+	}
+
+	vq_req_free(c2dev, vq_req);
+	kfree(wr);
+
+	return err;
+
+      bail2:
+	vq_repbuf_free(c2dev, reply);
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
+{
+	struct c2_vq_req *vq_req;	/* verbs request object */
+	struct c2wr_stag_dealloc_req wr;	/* work request */
+	struct c2wr_stag_dealloc_rep *reply;	/* WR reply  */
+	int err;
+
+
+	/*
+	 * allocate verbs request object
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		return -ENOMEM;
+	}
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
+	wr.hdr.context = (u64) (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.stag_index = cpu_to_be32(stag_index);
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.c b/drivers/infiniband/hw/amso1100/c2_mq.c
new file mode 100644
index 0000000000000..b88a755921026
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mq.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_mq.h"
+
+void *c2_mq_alloc(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	if (c2_mq_full(q)) {
+		return NULL;
+	} else {
+#ifdef DEBUG
+		struct c2wr_hdr *m =
+		    (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+		BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
+		m->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+		return m;
+#else
+		return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+	}
+}
+
+void c2_mq_produce(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	if (!c2_mq_full(q)) {
+		q->priv = (q->priv + 1) % q->q_size;
+		q->hint_count++;
+		/* Update peer's offset. */
+		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+	}
+}
+
+void *c2_mq_consume(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+	if (c2_mq_empty(q)) {
+		return NULL;
+	} else {
+#ifdef DEBUG
+		struct c2wr_hdr *m = (struct c2wr_hdr *)
+		    (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+		BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
+#endif
+		return m;
+#else
+		return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+	}
+}
+
+void c2_mq_free(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+	if (!c2_mq_empty(q)) {
+
+#ifdef CCMSGMAGIC
+		{
+			struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
+			    (q->msg_pool.adapter + q->priv * q->msg_size);
+			__raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
+		}
+#endif
+		q->priv = (q->priv + 1) % q->q_size;
+		/* Update peer's offset. */
+		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+	}
+}
+
+
+void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	while (wqe_count--) {
+		BUG_ON(c2_mq_empty(q));
+		*q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
+	}
+}
+
+#if 0
+u32 c2_mq_count(struct c2_mq *q)
+{
+	s32 count;
+
+	if (q->type == C2_MQ_HOST_TARGET)
+		count = be16_to_cpu(*q->shared) - q->priv;
+	else
+		count = q->priv - be16_to_cpu(*q->shared);
+
+	if (count < 0)
+		count += q->q_size;
+
+	return (u32) count;
+}
+#endif  /*  0  */
+
+void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		    u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
+{
+	BUG_ON(!q->shared);
+
+	/* This code assumes the byte swapping has already been done! */
+	q->index = index;
+	q->q_size = q_size;
+	q->msg_size = msg_size;
+	q->msg_pool.adapter = pool_start;
+	q->peer = (struct c2_mq_shared __iomem *) peer;
+	q->magic = C2_MQ_MAGIC;
+	q->type = type;
+	q->priv = 0;
+	q->hint_count = 0;
+	return;
+}
+void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		    u8 *pool_start, u16 __iomem *peer, u32 type)
+{
+	BUG_ON(!q->shared);
+
+	/* This code assumes the byte swapping has already been done! */
+	q->index = index;
+	q->q_size = q_size;
+	q->msg_size = msg_size;
+	q->msg_pool.host = pool_start;
+	q->peer = (struct c2_mq_shared __iomem *) peer;
+	q->magic = C2_MQ_MAGIC;
+	q->type = type;
+	q->priv = 0;
+	q->hint_count = 0;
+	return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h
new file mode 100644
index 0000000000000..9185bbb216583
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mq.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _C2_MQ_H_
+#define _C2_MQ_H_
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include "c2_wr.h"
+
+enum c2_shared_regs {
+
+	C2_SHARED_ARMED = 0x10,
+	C2_SHARED_NOTIFY = 0x18,
+	C2_SHARED_SHARED = 0x40,
+};
+
+struct c2_mq_shared {
+	u16 unused1;
+	u8 armed;
+	u8 notification_type;
+	u32 unused2;
+	u16 shared;
+	/* Pad to 64 bytes. */
+	u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
+};
+
+enum c2_mq_type {
+	C2_MQ_HOST_TARGET = 1,
+	C2_MQ_ADAPTER_TARGET = 2,
+};
+
+/*
+ * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
+ * c2_user_mq_t (which is the same format) is for user-mode MQs...
+ */
+#define C2_MQ_MAGIC 0x4d512020	/* 'MQ  ' */
+struct c2_mq {
+	u32 magic;
+	union {
+		u8 *host;
+		u8 __iomem *adapter;
+	} msg_pool;
+	dma_addr_t host_dma;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	u16 hint_count;
+	u16 priv;
+	struct c2_mq_shared __iomem *peer;
+	u16 *shared;
+	dma_addr_t shared_dma;
+	u32 q_size;
+	u32 msg_size;
+	u32 index;
+	enum c2_mq_type type;
+};
+
+static __inline__ int c2_mq_empty(struct c2_mq *q)
+{
+	return q->priv == be16_to_cpu(*q->shared);
+}
+
+static __inline__ int c2_mq_full(struct c2_mq *q)
+{
+	return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
+}
+
+extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
+extern void *c2_mq_alloc(struct c2_mq *q);
+extern void c2_mq_produce(struct c2_mq *q);
+extern void *c2_mq_consume(struct c2_mq *q);
+extern void c2_mq_free(struct c2_mq *q);
+extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		       u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
+extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+			   u8 *pool_start, u16 __iomem *peer, u32 type);
+
+#endif				/* _C2_MQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c
new file mode 100644
index 0000000000000..00c709926c8dc
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_pd.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include "c2.h"
+#include "c2_provider.h"
+
+int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
+{
+	u32 obj;
+	int ret = 0;
+
+	spin_lock(&c2dev->pd_table.lock);
+	obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
+				 c2dev->pd_table.last);
+	if (obj >= c2dev->pd_table.max)
+		obj = find_first_zero_bit(c2dev->pd_table.table,
+					  c2dev->pd_table.max);
+	if (obj < c2dev->pd_table.max) {
+		pd->pd_id = obj;
+		__set_bit(obj, c2dev->pd_table.table);
+		c2dev->pd_table.last = obj+1;
+		if (c2dev->pd_table.last >= c2dev->pd_table.max)
+			c2dev->pd_table.last = 0;
+	} else
+		ret = -ENOMEM;
+	spin_unlock(&c2dev->pd_table.lock);
+	return ret;
+}
+
+void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
+{
+	spin_lock(&c2dev->pd_table.lock);
+	__clear_bit(pd->pd_id, c2dev->pd_table.table);
+	spin_unlock(&c2dev->pd_table.lock);
+}
+
+int __devinit c2_init_pd_table(struct c2_dev *c2dev)
+{
+
+	c2dev->pd_table.last = 0;
+	c2dev->pd_table.max = c2dev->props.max_pd;
+	spin_lock_init(&c2dev->pd_table.lock);
+	c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
+					sizeof(long), GFP_KERNEL);
+	if (!c2dev->pd_table.table)
+		return -ENOMEM;
+	bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
+	return 0;
+}
+
+void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev)
+{
+	kfree(c2dev->pd_table.table);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
new file mode 100644
index 0000000000000..8fddc8cccdf3d
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -0,0 +1,869 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_arp.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include "c2.h"
+#include "c2_provider.h"
+#include "c2_user.h"
+
+static int c2_query_device(struct ib_device *ibdev,
+			   struct ib_device_attr *props)
+{
+	struct c2_dev *c2dev = to_c2dev(ibdev);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	*props = c2dev->props;
+	return 0;
+}
+
+static int c2_query_port(struct ib_device *ibdev,
+			 u8 port, struct ib_port_attr *props)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	props->max_mtu = IB_MTU_4096;
+	props->lid = 0;
+	props->lmc = 0;
+	props->sm_lid = 0;
+	props->sm_sl = 0;
+	props->state = IB_PORT_ACTIVE;
+	props->phys_state = 0;
+	props->port_cap_flags =
+	    IB_PORT_CM_SUP |
+	    IB_PORT_REINIT_SUP |
+	    IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+	props->gid_tbl_len = 1;
+	props->pkey_tbl_len = 1;
+	props->qkey_viol_cntr = 0;
+	props->active_width = 1;
+	props->active_speed = 1;
+
+	return 0;
+}
+
+static int c2_modify_port(struct ib_device *ibdev,
+			  u8 port, int port_modify_mask,
+			  struct ib_port_modify *props)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return 0;
+}
+
+static int c2_query_pkey(struct ib_device *ibdev,
+			 u8 port, u16 index, u16 * pkey)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	*pkey = 0;
+	return 0;
+}
+
+static int c2_query_gid(struct ib_device *ibdev, u8 port,
+			int index, union ib_gid *gid)
+{
+	struct c2_dev *c2dev = to_c2dev(ibdev);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+	memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
+
+	return 0;
+}
+
+/* Allocate the user context data structure. This keeps track
+ * of all objects associated with a particular user-mode client.
+ */
+static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
+					     struct ib_udata *udata)
+{
+	struct c2_ucontext *context;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	context = kmalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+
+	return &context->ibucontext;
+}
+
+static int c2_dealloc_ucontext(struct ib_ucontext *context)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	kfree(context);
+	return 0;
+}
+
+static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
+				 struct ib_ucontext *context,
+				 struct ib_udata *udata)
+{
+	struct c2_pd *pd;
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
+	if (err) {
+		kfree(pd);
+		return ERR_PTR(err);
+	}
+
+	if (context) {
+		if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
+			c2_pd_free(to_c2dev(ibdev), pd);
+			kfree(pd);
+			return ERR_PTR(-EFAULT);
+		}
+	}
+
+	return &pd->ibpd;
+}
+
+static int c2_dealloc_pd(struct ib_pd *pd)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
+	kfree(pd);
+
+	return 0;
+}
+
+static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return ERR_PTR(-ENOSYS);
+}
+
+static int c2_ah_destroy(struct ib_ah *ah)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static void c2_add_ref(struct ib_qp *ibqp)
+{
+	struct c2_qp *qp;
+	BUG_ON(!ibqp);
+	qp = to_c2qp(ibqp);
+	atomic_inc(&qp->refcount);
+}
+
+static void c2_rem_ref(struct ib_qp *ibqp)
+{
+	struct c2_qp *qp;
+	BUG_ON(!ibqp);
+	qp = to_c2qp(ibqp);
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+}
+
+struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
+{
+	struct c2_dev* c2dev = to_c2dev(device);
+	struct c2_qp *qp;
+
+	qp = c2_find_qpn(c2dev, qpn);
+	pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
+		__FUNCTION__, qp, qpn, device,
+		(qp?atomic_read(&qp->refcount):0));
+
+	return (qp?&qp->ibqp:NULL);
+}
+
+static struct ib_qp *c2_create_qp(struct ib_pd *pd,
+				  struct ib_qp_init_attr *init_attr,
+				  struct ib_udata *udata)
+{
+	struct c2_qp *qp;
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_RC:
+		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+		if (!qp) {
+			pr_debug("%s: Unable to allocate QP\n", __FUNCTION__);
+			return ERR_PTR(-ENOMEM);
+		}
+		spin_lock_init(&qp->lock);
+		if (pd->uobject) {
+			/* userspace specific */
+		}
+
+		err = c2_alloc_qp(to_c2dev(pd->device),
+				  to_c2pd(pd), init_attr, qp);
+
+		if (err && pd->uobject) {
+			/* userspace specific */
+		}
+
+		break;
+	default:
+		pr_debug("%s: Invalid QP type: %d\n", __FUNCTION__,
+			init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+		break;
+	}
+
+	if (err) {
+		kfree(qp);
+		return ERR_PTR(err);
+	}
+
+	return &qp->ibqp;
+}
+
+static int c2_destroy_qp(struct ib_qp *ib_qp)
+{
+	struct c2_qp *qp = to_c2qp(ib_qp);
+
+	pr_debug("%s:%u qp=%p,qp->state=%d\n",
+		__FUNCTION__, __LINE__,ib_qp,qp->state);
+	c2_free_qp(to_c2dev(ib_qp->device), qp);
+	kfree(qp);
+	return 0;
+}
+
+static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries,
+				  struct ib_ucontext *context,
+				  struct ib_udata *udata)
+{
+	struct c2_cq *cq;
+	int err;
+
+	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq) {
+		pr_debug("%s: Unable to allocate CQ\n", __FUNCTION__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
+	if (err) {
+		pr_debug("%s: error initializing CQ\n", __FUNCTION__);
+		kfree(cq);
+		return ERR_PTR(err);
+	}
+
+	return &cq->ibcq;
+}
+
+static int c2_destroy_cq(struct ib_cq *ib_cq)
+{
+	struct c2_cq *cq = to_c2cq(ib_cq);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	c2_free_cq(to_c2dev(ib_cq->device), cq);
+	kfree(cq);
+
+	return 0;
+}
+
+static inline u32 c2_convert_access(int acc)
+{
+	return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
+	    (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
+	    (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
+	    C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
+}
+
+static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
+				    struct ib_phys_buf *buffer_list,
+				    int num_phys_buf, int acc, u64 * iova_start)
+{
+	struct c2_mr *mr;
+	u64 *page_list;
+	u32 total_len;
+	int err, i, j, k, page_shift, pbl_depth;
+
+	pbl_depth = 0;
+	total_len = 0;
+
+	page_shift = PAGE_SHIFT;
+	/*
+	 * If there is only 1 buffer we assume this could
+	 * be a map of all phy mem...use a 32k page_shift.
+	 */
+	if (num_phys_buf == 1)
+		page_shift += 3;
+
+	for (i = 0; i < num_phys_buf; i++) {
+
+		if (buffer_list[i].addr & ~PAGE_MASK) {
+			pr_debug("Unaligned Memory Buffer: 0x%x\n",
+				(unsigned int) buffer_list[i].addr);
+			return ERR_PTR(-EINVAL);
+		}
+
+		if (!buffer_list[i].size) {
+			pr_debug("Invalid Buffer Size\n");
+			return ERR_PTR(-EINVAL);
+		}
+
+		total_len += buffer_list[i].size;
+		pbl_depth += ALIGN(buffer_list[i].size,
+				   (1 << page_shift)) >> page_shift;
+	}
+
+	page_list = vmalloc(sizeof(u64) * pbl_depth);
+	if (!page_list) {
+		pr_debug("couldn't vmalloc page_list of size %zd\n",
+			(sizeof(u64) * pbl_depth));
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0, j = 0; i < num_phys_buf; i++) {
+
+		int naddrs;
+
+ 		naddrs = ALIGN(buffer_list[i].size,
+			       (1 << page_shift)) >> page_shift;
+		for (k = 0; k < naddrs; k++)
+			page_list[j++] = (buffer_list[i].addr +
+						     (k << page_shift));
+	}
+
+	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->pd = to_c2pd(ib_pd);
+	pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
+		"*iova_start %llx, first pa %llx, last pa %llx\n",
+		__FUNCTION__, page_shift, pbl_depth, total_len,
+		*iova_start, page_list[0], page_list[pbl_depth-1]);
+  	err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
+ 					 (1 << page_shift), pbl_depth,
+					 total_len, 0, iova_start,
+					 c2_convert_access(acc), mr);
+	vfree(page_list);
+	if (err) {
+		kfree(mr);
+		return ERR_PTR(err);
+	}
+
+	return &mr->ibmr;
+}
+
+static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	struct ib_phys_buf bl;
+	u64 kva = 0;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* AMSO1100 limit */
+	bl.size = 0xffffffff;
+	bl.addr = 0;
+	return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
+}
+
+static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
+				    int acc, struct ib_udata *udata)
+{
+	u64 *pages;
+	u64 kva = 0;
+	int shift, n, len;
+	int i, j, k;
+	int err = 0;
+	struct ib_umem_chunk *chunk;
+	struct c2_pd *c2pd = to_c2pd(pd);
+	struct c2_mr *c2mr;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	shift = ffs(region->page_size) - 1;
+
+	c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
+	if (!c2mr)
+		return ERR_PTR(-ENOMEM);
+	c2mr->pd = c2pd;
+
+	n = 0;
+	list_for_each_entry(chunk, &region->chunk_list, list)
+		n += chunk->nents;
+
+	pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
+	if (!pages) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	i = 0;
+	list_for_each_entry(chunk, &region->chunk_list, list) {
+		for (j = 0; j < chunk->nmap; ++j) {
+			len = sg_dma_len(&chunk->page_list[j]) >> shift;
+			for (k = 0; k < len; ++k) {
+				pages[i++] =
+					sg_dma_address(&chunk->page_list[j]) +
+					(region->page_size * k);
+			}
+		}
+	}
+
+	kva = (u64)region->virt_base;
+  	err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
+					 pages,
+ 					 region->page_size,
+					 i,
+					 region->length,
+					 region->offset,
+					 &kva,
+					 c2_convert_access(acc),
+					 c2mr);
+	kfree(pages);
+	if (err) {
+		kfree(c2mr);
+		return ERR_PTR(err);
+	}
+	return &c2mr->ibmr;
+
+err:
+	kfree(c2mr);
+	return ERR_PTR(err);
+}
+
+static int c2_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct c2_mr *mr = to_c2mr(ib_mr);
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
+	if (err)
+		pr_debug("c2_stag_dealloc failed: %d\n", err);
+	else
+		kfree(mr);
+
+	return err;
+}
+
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%x\n", dev->props.hw_ver);
+}
+
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%x.%x.%x\n",
+		       (int) (dev->props.fw_ver >> 32),
+		       (int) (dev->props.fw_ver >> 16) & 0xffff,
+		       (int) (dev->props.fw_ver & 0xffff));
+}
+
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "AMSO1100\n");
+}
+
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
+}
+
+static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct class_device_attribute *c2_class_attributes[] = {
+	&class_device_attr_hw_rev,
+	&class_device_attr_fw_ver,
+	&class_device_attr_hca_type,
+	&class_device_attr_board_id
+};
+
+static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+			int attr_mask, struct ib_udata *udata)
+{
+	int err;
+
+	err =
+	    c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
+			 attr_mask);
+
+	return err;
+}
+
+static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_process_mad(struct ib_device *ibdev,
+			  int mad_flags,
+			  u8 port_num,
+			  struct ib_wc *in_wc,
+			  struct ib_grh *in_grh,
+			  struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* Request a connection */
+	return c2_llp_connect(cm_id, iw_param);
+}
+
+static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* Accept the new connection */
+	return c2_llp_accept(cm_id, iw_param);
+}
+
+static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_llp_reject(cm_id, pdata, pdata_len);
+	return err;
+}
+
+static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	err = c2_llp_service_create(cm_id, backlog);
+	pr_debug("%s:%u err=%d\n",
+		__FUNCTION__, __LINE__,
+		err);
+	return err;
+}
+
+static int c2_service_destroy(struct iw_cm_id *cm_id)
+{
+	int err;
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_llp_service_destroy(cm_id);
+
+	return err;
+}
+
+static int c2_pseudo_up(struct net_device *netdev)
+{
+	struct in_device *ind;
+	struct c2_dev *c2dev = netdev->priv;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	pr_debug("adding...\n");
+	for_ifa(ind) {
+#ifdef DEBUG
+		u8 *ip = (u8 *) & ifa->ifa_address;
+
+		pr_debug("%s: %d.%d.%d.%d\n",
+		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+		c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+
+	return 0;
+}
+
+static int c2_pseudo_down(struct net_device *netdev)
+{
+	struct in_device *ind;
+	struct c2_dev *c2dev = netdev->priv;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	pr_debug("deleting...\n");
+	for_ifa(ind) {
+#ifdef DEBUG
+		u8 *ip = (u8 *) & ifa->ifa_address;
+
+		pr_debug("%s: %d.%d.%d.%d\n",
+		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+		c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+
+	return 0;
+}
+
+static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int ret = 0;
+
+	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+
+	/* TODO: Tell rnic about new rmda interface mtu */
+	return ret;
+}
+
+static void setup(struct net_device *netdev)
+{
+	SET_MODULE_OWNER(netdev);
+	netdev->open = c2_pseudo_up;
+	netdev->stop = c2_pseudo_down;
+	netdev->hard_start_xmit = c2_pseudo_xmit_frame;
+	netdev->get_stats = NULL;
+	netdev->tx_timeout = NULL;
+	netdev->set_mac_address = NULL;
+	netdev->change_mtu = c2_pseudo_change_mtu;
+	netdev->watchdog_timeo = 0;
+	netdev->type = ARPHRD_ETHER;
+	netdev->mtu = 1500;
+	netdev->hard_header_len = ETH_HLEN;
+	netdev->addr_len = ETH_ALEN;
+	netdev->tx_queue_len = 0;
+	netdev->flags |= IFF_NOARP;
+	return;
+}
+
+static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
+{
+	char name[IFNAMSIZ];
+	struct net_device *netdev;
+
+	/* change ethxxx to iwxxx */
+	strcpy(name, "iw");
+	strcat(name, &c2dev->netdev->name[3]);
+	netdev = alloc_netdev(sizeof(*netdev), name, setup);
+	if (!netdev) {
+		printk(KERN_ERR PFX "%s -  etherdev alloc failed",
+			__FUNCTION__);
+		return NULL;
+	}
+
+	netdev->priv = c2dev;
+
+	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+	memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
+
+	/* Print out the MAC address */
+	pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
+		netdev->name,
+		netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+		netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
+
+#if 0
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+#endif
+	return netdev;
+}
+
+int c2_register_device(struct c2_dev *dev)
+{
+	int ret;
+	int i;
+
+	/* Register pseudo network device */
+	dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
+	if (dev->pseudo_netdev) {
+		ret = register_netdev(dev->pseudo_netdev);
+		if (ret) {
+			printk(KERN_ERR PFX
+				"Unable to register netdev, ret = %d\n", ret);
+			free_netdev(dev->pseudo_netdev);
+			return ret;
+		}
+	}
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
+	dev->ibdev.owner = THIS_MODULE;
+	dev->ibdev.uverbs_cmd_mask =
+	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
+
+	dev->ibdev.node_type = RDMA_NODE_RNIC;
+	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+	memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
+	dev->ibdev.phys_port_cnt = 1;
+	dev->ibdev.dma_device = &dev->pcidev->dev;
+	dev->ibdev.class_dev.dev = &dev->pcidev->dev;
+	dev->ibdev.query_device = c2_query_device;
+	dev->ibdev.query_port = c2_query_port;
+	dev->ibdev.modify_port = c2_modify_port;
+	dev->ibdev.query_pkey = c2_query_pkey;
+	dev->ibdev.query_gid = c2_query_gid;
+	dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
+	dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
+	dev->ibdev.mmap = c2_mmap_uar;
+	dev->ibdev.alloc_pd = c2_alloc_pd;
+	dev->ibdev.dealloc_pd = c2_dealloc_pd;
+	dev->ibdev.create_ah = c2_ah_create;
+	dev->ibdev.destroy_ah = c2_ah_destroy;
+	dev->ibdev.create_qp = c2_create_qp;
+	dev->ibdev.modify_qp = c2_modify_qp;
+	dev->ibdev.destroy_qp = c2_destroy_qp;
+	dev->ibdev.create_cq = c2_create_cq;
+	dev->ibdev.destroy_cq = c2_destroy_cq;
+	dev->ibdev.poll_cq = c2_poll_cq;
+	dev->ibdev.get_dma_mr = c2_get_dma_mr;
+	dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
+	dev->ibdev.reg_user_mr = c2_reg_user_mr;
+	dev->ibdev.dereg_mr = c2_dereg_mr;
+
+	dev->ibdev.alloc_fmr = NULL;
+	dev->ibdev.unmap_fmr = NULL;
+	dev->ibdev.dealloc_fmr = NULL;
+	dev->ibdev.map_phys_fmr = NULL;
+
+	dev->ibdev.attach_mcast = c2_multicast_attach;
+	dev->ibdev.detach_mcast = c2_multicast_detach;
+	dev->ibdev.process_mad = c2_process_mad;
+
+	dev->ibdev.req_notify_cq = c2_arm_cq;
+	dev->ibdev.post_send = c2_post_send;
+	dev->ibdev.post_recv = c2_post_receive;
+
+	dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
+	dev->ibdev.iwcm->add_ref = c2_add_ref;
+	dev->ibdev.iwcm->rem_ref = c2_rem_ref;
+	dev->ibdev.iwcm->get_qp = c2_get_qp;
+	dev->ibdev.iwcm->connect = c2_connect;
+	dev->ibdev.iwcm->accept = c2_accept;
+	dev->ibdev.iwcm->reject = c2_reject;
+	dev->ibdev.iwcm->create_listen = c2_service_create;
+	dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
+
+	ret = ib_register_device(&dev->ibdev);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(c2_class_attributes); ++i) {
+		ret = class_device_create_file(&dev->ibdev.class_dev,
+					       c2_class_attributes[i]);
+		if (ret) {
+			unregister_netdev(dev->pseudo_netdev);
+			free_netdev(dev->pseudo_netdev);
+			ib_unregister_device(&dev->ibdev);
+			return ret;
+		}
+	}
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return 0;
+}
+
+void c2_unregister_device(struct c2_dev *dev)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	unregister_netdev(dev->pseudo_netdev);
+	free_netdev(dev->pseudo_netdev);
+	ib_unregister_device(&dev->ibdev);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
new file mode 100644
index 0000000000000..fc906223220fc
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_PROVIDER_H
+#define C2_PROVIDER_H
+#include <linux/inetdevice.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+
+#include "c2_mq.h"
+#include <rdma/iw_cm.h>
+
+#define C2_MPT_FLAG_ATOMIC        (1 << 14)
+#define C2_MPT_FLAG_REMOTE_WRITE  (1 << 13)
+#define C2_MPT_FLAG_REMOTE_READ   (1 << 12)
+#define C2_MPT_FLAG_LOCAL_WRITE   (1 << 11)
+#define C2_MPT_FLAG_LOCAL_READ    (1 << 10)
+
+struct c2_buf_list {
+	void *buf;
+	 DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+
+/* The user context keeps track of objects allocated for a
+ * particular user-mode client. */
+struct c2_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+struct c2_mtt;
+
+/* All objects associated with a PD are kept in the
+ * associated user context if present.
+ */
+struct c2_pd {
+	struct ib_pd ibpd;
+	u32 pd_id;
+};
+
+struct c2_mr {
+	struct ib_mr ibmr;
+	struct c2_pd *pd;
+};
+
+struct c2_av;
+
+enum c2_ah_type {
+	C2_AH_ON_HCA,
+	C2_AH_PCI_POOL,
+	C2_AH_KMALLOC
+};
+
+struct c2_ah {
+	struct ib_ah ibah;
+};
+
+struct c2_cq {
+	struct ib_cq ibcq;
+	spinlock_t lock;
+	atomic_t refcount;
+	int cqn;
+	int is_kernel;
+	wait_queue_head_t wait;
+
+	u32 adapter_handle;
+	struct c2_mq mq;
+};
+
+struct c2_wq {
+	spinlock_t lock;
+};
+struct iw_cm_id;
+struct c2_qp {
+	struct ib_qp ibqp;
+	struct iw_cm_id *cm_id;
+	spinlock_t lock;
+	atomic_t refcount;
+	wait_queue_head_t wait;
+	int qpn;
+
+	u32 adapter_handle;
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u8 state;
+
+	struct c2_mq sq_mq;
+	struct c2_mq rq_mq;
+};
+
+struct c2_cr_query_attrs {
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+};
+
+static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct c2_pd, ibpd);
+}
+
+static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct c2_ucontext, ibucontext);
+}
+
+static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct c2_mr, ibmr);
+}
+
+
+static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
+{
+	return container_of(ibah, struct c2_ah, ibah);
+}
+
+static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct c2_cq, ibcq);
+}
+
+static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct c2_qp, ibqp);
+}
+
+static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
+{
+	struct in_device *ind;
+	int ret = 0;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	for_ifa(ind) {
+		if (ifa->ifa_address == addr) {
+			ret = 1;
+			break;
+		}
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+	return ret;
+}
+#endif				/* C2_PROVIDER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
new file mode 100644
index 0000000000000..12261132b0778
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -0,0 +1,975 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_MAX_ORD_PER_QP 128
+#define C2_MAX_IRD_PER_QP 128
+
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+#define NO_SUPPORT -1
+static const u8 c2_opcode[] = {
+	[IB_WR_SEND] = C2_WR_TYPE_SEND,
+	[IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
+	[IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
+	[IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
+	[IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
+	[IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
+	[IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
+};
+
+static int to_c2_state(enum ib_qp_state ib_state)
+{
+	switch (ib_state) {
+	case IB_QPS_RESET:
+		return C2_QP_STATE_IDLE;
+	case IB_QPS_RTS:
+		return C2_QP_STATE_RTS;
+	case IB_QPS_SQD:
+		return C2_QP_STATE_CLOSING;
+	case IB_QPS_SQE:
+		return C2_QP_STATE_CLOSING;
+	case IB_QPS_ERR:
+		return C2_QP_STATE_ERROR;
+	default:
+		return -1;
+	}
+}
+
+static int to_ib_state(enum c2_qp_state c2_state)
+{
+	switch (c2_state) {
+	case C2_QP_STATE_IDLE:
+		return IB_QPS_RESET;
+	case C2_QP_STATE_CONNECTING:
+		return IB_QPS_RTR;
+	case C2_QP_STATE_RTS:
+		return IB_QPS_RTS;
+	case C2_QP_STATE_CLOSING:
+		return IB_QPS_SQD;
+	case C2_QP_STATE_ERROR:
+		return IB_QPS_ERR;
+	case C2_QP_STATE_TERMINATE:
+		return IB_QPS_SQE;
+	default:
+		return -1;
+	}
+}
+
+static const char *to_ib_state_str(int ib_state)
+{
+	static const char *state_str[] = {
+		"IB_QPS_RESET",
+		"IB_QPS_INIT",
+		"IB_QPS_RTR",
+		"IB_QPS_RTS",
+		"IB_QPS_SQD",
+		"IB_QPS_SQE",
+		"IB_QPS_ERR"
+	};
+	if (ib_state < IB_QPS_RESET ||
+	    ib_state > IB_QPS_ERR)
+		return "<invalid IB QP state>";
+
+	ib_state -= IB_QPS_RESET;
+	return state_str[ib_state];
+}
+
+void c2_set_qp_state(struct c2_qp *qp, int c2_state)
+{
+	int new_state = to_ib_state(c2_state);
+
+	pr_debug("%s: qp[%p] state modify %s --> %s\n",
+	       __FUNCTION__,
+		qp,
+		to_ib_state_str(qp->state),
+		to_ib_state_str(new_state));
+	qp->state = new_state;
+}
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+		 struct ib_qp_attr *attr, int attr_mask)
+{
+	struct c2wr_qp_modify_req wr;
+	struct c2wr_qp_modify_rep *reply;
+	struct c2_vq_req *vq_req;
+	unsigned long flags;
+	u8 next_state;
+	int err;
+
+	pr_debug("%s:%d qp=%p, %s --> %s\n",
+		__FUNCTION__, __LINE__,
+		qp,
+		to_ib_state_str(qp->state),
+		to_ib_state_str(attr->qp_state));
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+	wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+	if (attr_mask & IB_QP_STATE) {
+		/* Ensure the state is valid */
+		if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
+			return -EINVAL;
+
+		wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
+
+		if (attr->qp_state == IB_QPS_ERR) {
+			spin_lock_irqsave(&qp->lock, flags);
+			if (qp->cm_id && qp->state == IB_QPS_RTS) {
+				pr_debug("Generating CLOSE event for QP-->ERR, "
+					"qp=%p, cm_id=%p\n",qp,qp->cm_id);
+				/* Generate an CLOSE event */
+				vq_req->cm_id = qp->cm_id;
+				vq_req->event = IW_CM_EVENT_CLOSE;
+			}
+			spin_unlock_irqrestore(&qp->lock, flags);
+		}
+		next_state =  attr->qp_state;
+
+	} else if (attr_mask & IB_QP_CUR_STATE) {
+
+		if (attr->cur_qp_state != IB_QPS_RTR &&
+		    attr->cur_qp_state != IB_QPS_RTS &&
+		    attr->cur_qp_state != IB_QPS_SQD &&
+		    attr->cur_qp_state != IB_QPS_SQE)
+			return -EINVAL;
+		else
+			wr.next_qp_state =
+			    cpu_to_be32(to_c2_state(attr->cur_qp_state));
+
+		next_state = attr->cur_qp_state;
+
+	} else {
+		err = 0;
+		goto bail0;
+	}
+
+	/* reference the request struct */
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+	if (!err)
+		qp->state = next_state;
+#ifdef DEBUG
+	else
+		pr_debug("%s: c2_errno=%d\n", __FUNCTION__, err);
+#endif
+	/*
+	 * If we're going to error and generating the event here, then
+	 * we need to remove the reference because there will be no
+	 * close event generated by the adapter
+	*/
+	spin_lock_irqsave(&qp->lock, flags);
+	if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
+		qp->cm_id->rem_ref(qp->cm_id);
+		qp->cm_id = NULL;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+
+	pr_debug("%s:%d qp=%p, cur_state=%s\n",
+		__FUNCTION__, __LINE__,
+		qp,
+		to_ib_state_str(qp->state));
+	return err;
+}
+
+int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+			  int ord, int ird)
+{
+	struct c2wr_qp_modify_req wr;
+	struct c2wr_qp_modify_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+	wr.ord = cpu_to_be32(ord);
+	wr.ird = cpu_to_be32(ird);
+	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+	/* reference the request struct */
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	reply = (struct c2wr_qp_modify_rep *) (unsigned long)
+		vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_qp_destroy_req wr;
+	struct c2wr_qp_destroy_rep *reply;
+	unsigned long flags;
+	int err;
+
+	/*
+	 * Allocate a verb request message
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		return -ENOMEM;
+	}
+
+	/*
+	 * Initialize the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_QP_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	spin_lock_irqsave(&qp->lock, flags);
+	if (qp->cm_id && qp->state == IB_QPS_RTS) {
+		pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
+			"qp=%p, cm_id=%p\n",qp,qp->cm_id);
+		/* Generate an CLOSE event */
+		vq_req->qp = qp;
+		vq_req->cm_id = qp->cm_id;
+		vq_req->event = IW_CM_EVENT_CLOSE;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	spin_lock_irqsave(&qp->lock, flags);
+	if (qp->cm_id) {
+		qp->cm_id->rem_ref(qp->cm_id);
+		qp->cm_id = NULL;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	int ret;
+
+        do {
+		spin_lock_irq(&c2dev->qp_table.lock);
+		ret = idr_get_new_above(&c2dev->qp_table.idr, qp,
+					c2dev->qp_table.last++, &qp->qpn);
+		spin_unlock_irq(&c2dev->qp_table.lock);
+        } while ((ret == -EAGAIN) &&
+	 	 idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL));
+	return ret;
+}
+
+static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
+{
+	spin_lock_irq(&c2dev->qp_table.lock);
+	idr_remove(&c2dev->qp_table.idr, qpn);
+	spin_unlock_irq(&c2dev->qp_table.lock);
+}
+
+struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
+{
+	unsigned long flags;
+	struct c2_qp *qp;
+
+	spin_lock_irqsave(&c2dev->qp_table.lock, flags);
+	qp = idr_find(&c2dev->qp_table.idr, qpn);
+	spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
+	return qp;
+}
+
+int c2_alloc_qp(struct c2_dev *c2dev,
+		struct c2_pd *pd,
+		struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
+{
+	struct c2wr_qp_create_req wr;
+	struct c2wr_qp_create_rep *reply;
+	struct c2_vq_req *vq_req;
+	struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
+	struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
+	unsigned long peer_pa;
+	u32 q_size, msg_size, mmap_size;
+	void __iomem *mmap;
+	int err;
+
+	err = c2_alloc_qpn(c2dev, qp);
+	if (err)
+		return err;
+	qp->ibqp.qp_num = qp->qpn;
+	qp->ibqp.qp_type = IB_QPT_RC;
+
+	/* Allocate the SQ and RQ shared pointers */
+	qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					 &qp->sq_mq.shared_dma, GFP_KERNEL);
+	if (!qp->sq_mq.shared) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					 &qp->rq_mq.shared_dma, GFP_KERNEL);
+	if (!qp->rq_mq.shared) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	/* Allocate the verbs request */
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	/* Initialize the work request */
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_QP_CREATE);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.sq_cq_handle = send_cq->adapter_handle;
+	wr.rq_cq_handle = recv_cq->adapter_handle;
+	wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
+	wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
+	wr.srq_handle = 0;
+	wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
+			       QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
+	wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+	wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
+	wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+	wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
+	wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
+	wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
+	wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
+	wr.pd_id = pd->pd_id;
+	wr.user_context = (unsigned long) qp;
+
+	vq_req_get(c2dev, vq_req);
+
+	/* Send the WR to the adapter */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail3;
+	}
+
+	/* Wait for the verb reply  */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail3;
+	}
+
+	/* Process the reply */
+	reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail3;
+	}
+
+	if ((err = c2_wr_get_result(reply)) != 0) {
+		goto bail4;
+	}
+
+	/* Fill in the kernel QP struct */
+	atomic_set(&qp->refcount, 1);
+	qp->adapter_handle = reply->qp_handle;
+	qp->state = IB_QPS_RESET;
+	qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
+	qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
+	qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
+
+	/* Initialize the SQ MQ */
+	q_size = be32_to_cpu(reply->sq_depth);
+	msg_size = be32_to_cpu(reply->sq_msg_size);
+	peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
+	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+	mmap = ioremap_nocache(peer_pa, mmap_size);
+	if (!mmap) {
+		err = -ENOMEM;
+		goto bail5;
+	}
+
+	c2_mq_req_init(&qp->sq_mq,
+		       be32_to_cpu(reply->sq_mq_index),
+		       q_size,
+		       msg_size,
+		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
+		       mmap,				/* peer */
+		       C2_MQ_ADAPTER_TARGET);
+
+	/* Initialize the RQ mq */
+	q_size = be32_to_cpu(reply->rq_depth);
+	msg_size = be32_to_cpu(reply->rq_msg_size);
+	peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
+	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+	mmap = ioremap_nocache(peer_pa, mmap_size);
+	if (!mmap) {
+		err = -ENOMEM;
+		goto bail6;
+	}
+
+	c2_mq_req_init(&qp->rq_mq,
+		       be32_to_cpu(reply->rq_mq_index),
+		       q_size,
+		       msg_size,
+		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
+		       mmap,				/* peer */
+		       C2_MQ_ADAPTER_TARGET);
+
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	return 0;
+
+      bail6:
+	iounmap(qp->sq_mq.peer);
+      bail5:
+	destroy_qp(c2dev, qp);
+      bail4:
+	vq_repbuf_free(c2dev, reply);
+      bail3:
+	vq_req_free(c2dev, vq_req);
+      bail2:
+	c2_free_mqsp(qp->rq_mq.shared);
+      bail1:
+	c2_free_mqsp(qp->sq_mq.shared);
+      bail0:
+	c2_free_qpn(c2dev, qp->qpn);
+	return err;
+}
+
+void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	struct c2_cq *send_cq;
+	struct c2_cq *recv_cq;
+
+	send_cq = to_c2cq(qp->ibqp.send_cq);
+	recv_cq = to_c2cq(qp->ibqp.recv_cq);
+
+	/*
+	 * Lock CQs here, so that CQ polling code can do QP lookup
+	 * without taking a lock.
+	 */
+	spin_lock_irq(&send_cq->lock);
+	if (send_cq != recv_cq)
+		spin_lock(&recv_cq->lock);
+
+	c2_free_qpn(c2dev, qp->qpn);
+
+	if (send_cq != recv_cq)
+		spin_unlock(&recv_cq->lock);
+	spin_unlock_irq(&send_cq->lock);
+
+	/*
+	 * Destory qp in the rnic...
+	 */
+	destroy_qp(c2dev, qp);
+
+	/*
+	 * Mark any unreaped CQEs as null and void.
+	 */
+	c2_cq_clean(c2dev, qp, send_cq->cqn);
+	if (send_cq != recv_cq)
+		c2_cq_clean(c2dev, qp, recv_cq->cqn);
+	/*
+	 * Unmap the MQs and return the shared pointers
+	 * to the message pool.
+	 */
+	iounmap(qp->sq_mq.peer);
+	iounmap(qp->rq_mq.peer);
+	c2_free_mqsp(qp->sq_mq.shared);
+	c2_free_mqsp(qp->rq_mq.shared);
+
+	atomic_dec(&qp->refcount);
+	wait_event(qp->wait, !atomic_read(&qp->refcount));
+}
+
+/*
+ * Function: move_sgl
+ *
+ * Description:
+ * Move an SGL from the user's work request struct into a CCIL Work Request
+ * message, swapping to WR byte order and ensure the total length doesn't
+ * overflow.
+ *
+ * IN:
+ * dst		- ptr to CCIL Work Request message SGL memory.
+ * src		- ptr to the consumers SGL memory.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int
+move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
+	 u8 * actual_count)
+{
+	u32 tot = 0;		/* running total */
+	u8 acount = 0;		/* running total non-0 len sge's */
+
+	while (count > 0) {
+		/*
+		 * If the addition of this SGE causes the
+		 * total SGL length to exceed 2^32-1, then
+		 * fail-n-bail.
+		 *
+		 * If the current total plus the next element length
+		 * wraps, then it will go negative and be less than the
+		 * current total...
+		 */
+		if ((tot + src->length) < tot) {
+			return -EINVAL;
+		}
+		/*
+		 * Bug: 1456 (as well as 1498 & 1643)
+		 * Skip over any sge's supplied with len=0
+		 */
+		if (src->length) {
+			tot += src->length;
+			dst->stag = cpu_to_be32(src->lkey);
+			dst->to = cpu_to_be64(src->addr);
+			dst->length = cpu_to_be32(src->length);
+			dst++;
+			acount++;
+		}
+		src++;
+		count--;
+	}
+
+	if (acount == 0) {
+		/*
+		 * Bug: 1476 (as well as 1498, 1456 and 1643)
+		 * Setup the SGL in the WR to make it easier for the RNIC.
+		 * This way, the FW doesn't have to deal with special cases.
+		 * Setting length=0 should be sufficient.
+		 */
+		dst->stag = 0;
+		dst->to = 0;
+		dst->length = 0;
+	}
+
+	*p_len = tot;
+	*actual_count = acount;
+	return 0;
+}
+
+/*
+ * Function: c2_activity (private function)
+ *
+ * Description:
+ * Post an mq index to the host->adapter activity fifo.
+ *
+ * IN:
+ * c2dev	- ptr to c2dev structure
+ * mq_index	- mq index to post
+ * shared	- value most recently written to shared
+ *
+ * OUT:
+ *
+ * Return:
+ * none
+ */
+static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
+{
+	/*
+	 * First read the register to see if the FIFO is full, and if so,
+	 * spin until it's not.  This isn't perfect -- there is no
+	 * synchronization among the clients of the register, but in
+	 * practice it prevents multiple CPU from hammering the bus
+	 * with PCI RETRY. Note that when this does happen, the card
+	 * cannot get on the bus and the card and system hang in a
+	 * deadlock -- thus the need for this code. [TOT]
+	 */
+	while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(0);
+	}
+
+	__raw_writel(C2_HINT_MAKE(mq_index, shared),
+		     c2dev->regs + PCI_BAR0_ADAPTER_HINT);
+}
+
+/*
+ * Function: qp_wr_post
+ *
+ * Description:
+ * This in-line function allocates a MQ msg, then moves the host-copy of
+ * the completed WR into msg.  Then it posts the message.
+ *
+ * IN:
+ * q		- ptr to user MQ.
+ * wr		- ptr to host-copy of the WR.
+ * qp		- ptr to user qp
+ * size		- Number of bytes to post.  Assumed to be divisible by 4.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
+{
+	union c2wr *msg;
+
+	msg = c2_mq_alloc(q);
+	if (msg == NULL) {
+		return -EINVAL;
+	}
+#ifdef CCMSGMAGIC
+	((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+
+	/*
+	 * Since all header fields in the WR are the same as the
+	 * CQE, set the following so the adapter need not.
+	 */
+	c2_wr_set_result(wr, CCERR_PENDING);
+
+	/*
+	 * Copy the wr down to the adapter
+	 */
+	memcpy((void *) msg, (void *) wr, size);
+
+	c2_mq_produce(q);
+	return 0;
+}
+
+
+int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+		 struct ib_send_wr **bad_wr)
+{
+	struct c2_dev *c2dev = to_c2dev(ibqp->device);
+	struct c2_qp *qp = to_c2qp(ibqp);
+	union c2wr wr;
+	int err = 0;
+
+	u32 flags;
+	u32 tot_len;
+	u8 actual_sge_count;
+	u32 msg_size;
+
+	if (qp->state > IB_QPS_RTS)
+		return -EINVAL;
+
+	while (ib_wr) {
+
+		flags = 0;
+		wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+		if (ib_wr->send_flags & IB_SEND_SIGNALED) {
+			flags |= SQ_SIGNALED;
+		}
+
+		switch (ib_wr->opcode) {
+		case IB_WR_SEND:
+			if (ib_wr->send_flags & IB_SEND_SOLICITED) {
+				c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
+				msg_size = sizeof(struct c2wr_send_req);
+			} else {
+				c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
+				msg_size = sizeof(struct c2wr_send_req);
+			}
+
+			wr.sqwr.send.remote_stag = 0;
+			msg_size += sizeof(struct c2_data_addr) * ib_wr->num_sge;
+			if (ib_wr->num_sge > qp->send_sgl_depth) {
+				err = -EINVAL;
+				break;
+			}
+			if (ib_wr->send_flags & IB_SEND_FENCE) {
+				flags |= SQ_READ_FENCE;
+			}
+			err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
+				       ib_wr->sg_list,
+				       ib_wr->num_sge,
+				       &tot_len, &actual_sge_count);
+			wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
+			c2_wr_set_sge_count(&wr, actual_sge_count);
+			break;
+		case IB_WR_RDMA_WRITE:
+			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
+			msg_size = sizeof(struct c2wr_rdma_write_req) +
+			    (sizeof(struct c2_data_addr) * ib_wr->num_sge);
+			if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
+				err = -EINVAL;
+				break;
+			}
+			if (ib_wr->send_flags & IB_SEND_FENCE) {
+				flags |= SQ_READ_FENCE;
+			}
+			wr.sqwr.rdma_write.remote_stag =
+			    cpu_to_be32(ib_wr->wr.rdma.rkey);
+			wr.sqwr.rdma_write.remote_to =
+			    cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+			err = move_sgl((struct c2_data_addr *)
+				       & (wr.sqwr.rdma_write.data),
+				       ib_wr->sg_list,
+				       ib_wr->num_sge,
+				       &tot_len, &actual_sge_count);
+			wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
+			c2_wr_set_sge_count(&wr, actual_sge_count);
+			break;
+		case IB_WR_RDMA_READ:
+			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
+			msg_size = sizeof(struct c2wr_rdma_read_req);
+
+			/* IWarp only suppots 1 sge for RDMA reads */
+			if (ib_wr->num_sge > 1) {
+				err = -EINVAL;
+				break;
+			}
+
+			/*
+			 * Move the local and remote stag/to/len into the WR.
+			 */
+			wr.sqwr.rdma_read.local_stag =
+			    cpu_to_be32(ib_wr->sg_list->lkey);
+			wr.sqwr.rdma_read.local_to =
+			    cpu_to_be64(ib_wr->sg_list->addr);
+			wr.sqwr.rdma_read.remote_stag =
+			    cpu_to_be32(ib_wr->wr.rdma.rkey);
+			wr.sqwr.rdma_read.remote_to =
+			    cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+			wr.sqwr.rdma_read.length =
+			    cpu_to_be32(ib_wr->sg_list->length);
+			break;
+		default:
+			/* error */
+			msg_size = 0;
+			err = -EINVAL;
+			break;
+		}
+
+		/*
+		 * If we had an error on the last wr build, then
+		 * break out.  Possible errors include bogus WR
+		 * type, and a bogus SGL length...
+		 */
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Store flags
+		 */
+		c2_wr_set_flags(&wr, flags);
+
+		/*
+		 * Post the puppy!
+		 */
+		err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Enqueue mq index to activity FIFO.
+		 */
+		c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
+
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+		    struct ib_recv_wr **bad_wr)
+{
+	struct c2_dev *c2dev = to_c2dev(ibqp->device);
+	struct c2_qp *qp = to_c2qp(ibqp);
+	union c2wr wr;
+	int err = 0;
+
+	if (qp->state > IB_QPS_RTS)
+		return -EINVAL;
+
+	/*
+	 * Try and post each work request
+	 */
+	while (ib_wr) {
+		u32 tot_len;
+		u8 actual_sge_count;
+
+		if (ib_wr->num_sge > qp->recv_sgl_depth) {
+			err = -EINVAL;
+			break;
+		}
+
+		/*
+		 * Create local host-copy of the WR
+		 */
+		wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+		c2_wr_set_id(&wr, CCWR_RECV);
+		c2_wr_set_flags(&wr, 0);
+
+		/* sge_count is limited to eight bits. */
+		BUG_ON(ib_wr->num_sge >= 256);
+		err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
+			       ib_wr->sg_list,
+			       ib_wr->num_sge, &tot_len, &actual_sge_count);
+		c2_wr_set_sge_count(&wr, actual_sge_count);
+
+		/*
+		 * If we had an error on the last wr build, then
+		 * break out.  Possible errors include bogus WR
+		 * type, and a bogus SGL length...
+		 */
+		if (err) {
+			break;
+		}
+
+		err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Enqueue mq index to activity FIFO
+		 */
+		c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
+
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+void __devinit c2_init_qp_table(struct c2_dev *c2dev)
+{
+	spin_lock_init(&c2dev->qp_table.lock);
+	idr_init(&c2dev->qp_table.idr);
+}
+
+void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev)
+{
+	idr_destroy(&c2dev->qp_table.idr);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
new file mode 100644
index 0000000000000..1c3c9d65ecea6
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -0,0 +1,663 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/inet.h>
+
+#include <linux/route.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_vq.h"
+
+/* Device capabilities */
+#define C2_MIN_PAGESIZE  1024
+
+#define C2_MAX_MRS       32768
+#define C2_MAX_QPS       16000
+#define C2_MAX_WQE_SZ    256
+#define C2_MAX_QP_WR     ((128*1024)/C2_MAX_WQE_SZ)
+#define C2_MAX_SGES      4
+#define C2_MAX_SGE_RD    1
+#define C2_MAX_CQS       32768
+#define C2_MAX_CQES      4096
+#define C2_MAX_PDS       16384
+
+/*
+ * Send the adapter INIT message to the amso1100
+ */
+static int c2_adapter_init(struct c2_dev *c2dev)
+{
+	struct c2wr_init_req wr;
+	int err;
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_INIT);
+	wr.hdr.context = 0;
+	wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
+	wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
+	wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
+	wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
+	wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
+	wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
+
+	/* Post the init message */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+
+	return err;
+}
+
+/*
+ * Send the adapter TERM message to the amso1100
+ */
+static void c2_adapter_term(struct c2_dev *c2dev)
+{
+	struct c2wr_init_req wr;
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_TERM);
+	wr.hdr.context = 0;
+
+	/* Post the init message */
+	vq_send_wr(c2dev, (union c2wr *) & wr);
+	c2dev->init = 0;
+
+	return;
+}
+
+/*
+ * Query the adapter
+ */
+static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_query_req wr;
+	struct c2wr_rnic_query_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply)
+		err = -ENOMEM;
+
+	err = c2_errno(reply);
+	if (err)
+		goto bail2;
+
+	props->fw_ver =
+		((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
+		((be32_to_cpu(reply->fw_ver_minor) && 0xFFFF) << 16) |
+		(be32_to_cpu(reply->fw_ver_patch) && 0xFFFF);
+	memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
+	props->max_mr_size         = 0xFFFFFFFF;
+	props->page_size_cap       = ~(C2_MIN_PAGESIZE-1);
+	props->vendor_id           = be32_to_cpu(reply->vendor_id);
+	props->vendor_part_id      = be32_to_cpu(reply->part_number);
+	props->hw_ver              = be32_to_cpu(reply->hw_version);
+	props->max_qp              = be32_to_cpu(reply->max_qps);
+	props->max_qp_wr           = be32_to_cpu(reply->max_qp_depth);
+	props->device_cap_flags    = c2dev->device_cap_flags;
+	props->max_sge             = C2_MAX_SGES;
+	props->max_sge_rd          = C2_MAX_SGE_RD;
+	props->max_cq              = be32_to_cpu(reply->max_cqs);
+	props->max_cqe             = be32_to_cpu(reply->max_cq_depth);
+	props->max_mr              = be32_to_cpu(reply->max_mrs);
+	props->max_pd              = be32_to_cpu(reply->max_pds);
+	props->max_qp_rd_atom      = be32_to_cpu(reply->max_qp_ird);
+	props->max_ee_rd_atom      = 0;
+	props->max_res_rd_atom     = be32_to_cpu(reply->max_global_ird);
+	props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
+	props->max_ee_init_rd_atom = 0;
+	props->atomic_cap          = IB_ATOMIC_NONE;
+	props->max_ee              = 0;
+	props->max_rdd             = 0;
+	props->max_mw              = be32_to_cpu(reply->max_mws);
+	props->max_raw_ipv6_qp     = 0;
+	props->max_raw_ethy_qp     = 0;
+	props->max_mcast_grp       = 0;
+	props->max_mcast_qp_attach = 0;
+	props->max_total_mcast_qp_attach = 0;
+	props->max_ah              = 0;
+	props->max_fmr             = 0;
+	props->max_map_per_fmr     = 0;
+	props->max_srq             = 0;
+	props->max_srq_wr          = 0;
+	props->max_srq_sge         = 0;
+	props->max_pkeys           = 0;
+	props->local_ca_ack_delay  = 0;
+
+ bail2:
+	vq_repbuf_free(c2dev, reply);
+
+ bail1:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Add an IP address to the RNIC interface
+ */
+int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_setconfig_req *wr;
+	struct c2wr_rnic_setconfig_rep *reply;
+	struct c2_netaddr netaddr;
+	int err, len;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	len = sizeof(struct c2_netaddr);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
+
+	netaddr.ip_addr = inaddr;
+	netaddr.netmask = inmask;
+	netaddr.mtu = 0;
+
+	memcpy(wr->data, &netaddr, len);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Delete an IP address from the RNIC interface
+ */
+int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_setconfig_req *wr;
+	struct c2wr_rnic_setconfig_rep *reply;
+	struct c2_netaddr netaddr;
+	int err, len;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	len = sizeof(struct c2_netaddr);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
+
+	netaddr.ip_addr = inaddr;
+	netaddr.netmask = inmask;
+	netaddr.mtu = 0;
+
+	memcpy(wr->data, &netaddr, len);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Open a single RNIC instance to use with all
+ * low level openib calls
+ */
+static int c2_rnic_open(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *vq_req;
+	union c2wr wr;
+	struct c2wr_rnic_open_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		return -ENOMEM;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
+	wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
+	wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
+	wr.rnic_open.req.port_num = cpu_to_be16(0);
+	wr.rnic_open.req.user_context = (unsigned long) c2dev;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	if ((err = c2_errno(reply)) != 0) {
+		goto bail1;
+	}
+
+	c2dev->adapter_handle = reply->rnic_handle;
+
+      bail1:
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Close the RNIC instance
+ */
+static int c2_rnic_close(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *vq_req;
+	union c2wr wr;
+	struct c2wr_rnic_close_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		return -ENOMEM;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
+	wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
+	wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	if ((err = c2_errno(reply)) != 0) {
+		goto bail1;
+	}
+
+	c2dev->adapter_handle = 0;
+
+      bail1:
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Called by c2_probe to initialize the RNIC. This principally
+ * involves initalizing the various limits and resouce pools that
+ * comprise the RNIC instance.
+ */
+int c2_rnic_init(struct c2_dev *c2dev)
+{
+	int err;
+	u32 qsize, msgsize;
+	void *q1_pages;
+	void *q2_pages;
+	void __iomem *mmio_regs;
+
+	/* Device capabilities */
+	c2dev->device_cap_flags =
+	    (IB_DEVICE_RESIZE_MAX_WR |
+	     IB_DEVICE_CURR_QP_STATE_MOD |
+	     IB_DEVICE_SYS_IMAGE_GUID |
+	     IB_DEVICE_ZERO_STAG |
+	     IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+
+	/* Allocate the qptr_array */
+	c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
+	if (!c2dev->qptr_array) {
+		return -ENOMEM;
+	}
+
+	/* Inialize the qptr_array */
+	memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *));
+	c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
+	c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
+	c2dev->qptr_array[2] = (void *) &c2dev->aeq;
+
+	/* Initialize data structures */
+	init_waitqueue_head(&c2dev->req_vq_wo);
+	spin_lock_init(&c2dev->vqlock);
+	spin_lock_init(&c2dev->lock);
+
+	/* Allocate MQ shared pointer pool for kernel clients. User
+	 * mode client pools are hung off the user context
+	 */
+	err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
+	if (err) {
+		goto bail0;
+	}
+
+	/* Allocate shared pointers for Q0, Q1, and Q2 from
+	 * the shared pointer pool.
+	 */
+
+	c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->hint_count_dma,
+					     GFP_KERNEL);
+	c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->req_vq.shared_dma,
+					     GFP_KERNEL);
+	c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->rep_vq.shared_dma,
+					     GFP_KERNEL);
+	c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					  &c2dev->aeq.shared_dma, GFP_KERNEL);
+	if (!c2dev->hint_count || !c2dev->req_vq.shared ||
+	    !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	mmio_regs = c2dev->kva;
+	/* Initialize the Verbs Request Queue */
+	c2_mq_req_init(&c2dev->req_vq, 0,
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_QSIZE)),
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_SHARED)),
+		       C2_MQ_ADAPTER_TARGET);
+
+	/* Initialize the Verbs Reply Queue */
+	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_QSIZE));
+	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
+	q1_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
+	if (!q1_pages) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	c2dev->rep_vq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
+					        (void *)q1_pages, qsize * msgsize,
+				      		DMA_FROM_DEVICE);
+	pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
+	pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages,
+		 (u64)c2dev->rep_vq.host_dma);
+	c2_mq_rep_init(&c2dev->rep_vq,
+		   1,
+		   qsize,
+		   msgsize,
+		   q1_pages,
+		   mmio_regs +
+		   be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_SHARED)),
+		   C2_MQ_HOST_TARGET);
+
+	/* Initialize the Asynchronus Event Queue */
+	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_QSIZE));
+	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
+	q2_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
+	if (!q2_pages) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+	c2dev->aeq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
+					        (void *)q2_pages, qsize * msgsize,
+				      		DMA_FROM_DEVICE);
+	pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
+	pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages,
+		 (u64)c2dev->rep_vq.host_dma);
+	c2_mq_rep_init(&c2dev->aeq,
+		       2,
+		       qsize,
+		       msgsize,
+		       q2_pages,
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_SHARED)),
+		       C2_MQ_HOST_TARGET);
+
+	/* Initialize the verbs request allocator */
+	err = vq_init(c2dev);
+	if (err)
+		goto bail3;
+
+	/* Enable interrupts on the adapter */
+	writel(0, c2dev->regs + C2_IDIS);
+
+	/* create the WR init message */
+	err = c2_adapter_init(c2dev);
+	if (err)
+		goto bail4;
+	c2dev->init++;
+
+	/* open an adapter instance */
+	err = c2_rnic_open(c2dev);
+	if (err)
+		goto bail4;
+
+	/* Initialize cached the adapter limits */
+	if (c2_rnic_query(c2dev, &c2dev->props))
+		goto bail5;
+
+	/* Initialize the PD pool */
+	err = c2_init_pd_table(c2dev);
+	if (err)
+		goto bail5;
+
+	/* Initialize the QP pool */
+	c2_init_qp_table(c2dev);
+	return 0;
+
+      bail5:
+	c2_rnic_close(c2dev);
+      bail4:
+	vq_term(c2dev);
+      bail3:
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->aeq, mapping),
+			 c2dev->aeq.q_size * c2dev->aeq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(q2_pages);
+      bail2:
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->rep_vq, mapping),
+			 c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(q1_pages);
+      bail1:
+	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+      bail0:
+	vfree(c2dev->qptr_array);
+
+	return err;
+}
+
+/*
+ * Called by c2_remove to cleanup the RNIC resources.
+ */
+void c2_rnic_term(struct c2_dev *c2dev)
+{
+
+	/* Close the open adapter instance */
+	c2_rnic_close(c2dev);
+
+	/* Send the TERM message to the adapter */
+	c2_adapter_term(c2dev);
+
+	/* Disable interrupts on the adapter */
+	writel(1, c2dev->regs + C2_IDIS);
+
+	/* Free the QP pool */
+	c2_cleanup_qp_table(c2dev);
+
+	/* Free the PD pool */
+	c2_cleanup_pd_table(c2dev);
+
+	/* Free the verbs request allocator */
+	vq_term(c2dev);
+
+	/* Unmap and free the asynchronus event queue */
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->aeq, mapping),
+			 c2dev->aeq.q_size * c2dev->aeq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(c2dev->aeq.msg_pool.host);
+
+	/* Unmap and free the verbs reply queue */
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->rep_vq, mapping),
+			 c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(c2dev->rep_vq.msg_pool.host);
+
+	/* Free the MQ shared pointer pool */
+	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+
+	/* Free the qptr_array */
+	vfree(c2dev->qptr_array);
+
+	return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_status.h b/drivers/infiniband/hw/amso1100/c2_status.h
new file mode 100644
index 0000000000000..6ee4aa92d8753
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_status.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef	_C2_STATUS_H_
+#define _C2_STATUS_H_
+
+/*
+ * Verbs Status Codes
+ */
+enum c2_status {
+	C2_OK = 0,		/* This must be zero */
+	CCERR_INSUFFICIENT_RESOURCES = 1,
+	CCERR_INVALID_MODIFIER = 2,
+	CCERR_INVALID_MODE = 3,
+	CCERR_IN_USE = 4,
+	CCERR_INVALID_RNIC = 5,
+	CCERR_INTERRUPTED_OPERATION = 6,
+	CCERR_INVALID_EH = 7,
+	CCERR_INVALID_CQ = 8,
+	CCERR_CQ_EMPTY = 9,
+	CCERR_NOT_IMPLEMENTED = 10,
+	CCERR_CQ_DEPTH_TOO_SMALL = 11,
+	CCERR_PD_IN_USE = 12,
+	CCERR_INVALID_PD = 13,
+	CCERR_INVALID_SRQ = 14,
+	CCERR_INVALID_ADDRESS = 15,
+	CCERR_INVALID_NETMASK = 16,
+	CCERR_INVALID_QP = 17,
+	CCERR_INVALID_QP_STATE = 18,
+	CCERR_TOO_MANY_WRS_POSTED = 19,
+	CCERR_INVALID_WR_TYPE = 20,
+	CCERR_INVALID_SGL_LENGTH = 21,
+	CCERR_INVALID_SQ_DEPTH = 22,
+	CCERR_INVALID_RQ_DEPTH = 23,
+	CCERR_INVALID_ORD = 24,
+	CCERR_INVALID_IRD = 25,
+	CCERR_QP_ATTR_CANNOT_CHANGE = 26,
+	CCERR_INVALID_STAG = 27,
+	CCERR_QP_IN_USE = 28,
+	CCERR_OUTSTANDING_WRS = 29,
+	CCERR_STAG_IN_USE = 30,
+	CCERR_INVALID_STAG_INDEX = 31,
+	CCERR_INVALID_SGL_FORMAT = 32,
+	CCERR_ADAPTER_TIMEOUT = 33,
+	CCERR_INVALID_CQ_DEPTH = 34,
+	CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
+	CCERR_INVALID_EP = 36,
+	CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
+	CCERR_FLUSHED = 38,
+	CCERR_INVALID_WQE = 39,
+	CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
+	CCERR_REMOTE_TERMINATION_ERROR = 41,
+	CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
+	CCERR_ACCESS_VIOLATION = 43,
+	CCERR_INVALID_PD_ID = 44,
+	CCERR_WRAP_ERROR = 45,
+	CCERR_INV_STAG_ACCESS_ERROR = 46,
+	CCERR_ZERO_RDMA_READ_RESOURCES = 47,
+	CCERR_QP_NOT_PRIVILEGED = 48,
+	CCERR_STAG_STATE_NOT_INVALID = 49,
+	CCERR_INVALID_PAGE_SIZE = 50,
+	CCERR_INVALID_BUFFER_SIZE = 51,
+	CCERR_INVALID_PBE = 52,
+	CCERR_INVALID_FBO = 53,
+	CCERR_INVALID_LENGTH = 54,
+	CCERR_INVALID_ACCESS_RIGHTS = 55,
+	CCERR_PBL_TOO_BIG = 56,
+	CCERR_INVALID_VA = 57,
+	CCERR_INVALID_REGION = 58,
+	CCERR_INVALID_WINDOW = 59,
+	CCERR_TOTAL_LENGTH_TOO_BIG = 60,
+	CCERR_INVALID_QP_ID = 61,
+	CCERR_ADDR_IN_USE = 62,
+	CCERR_ADDR_NOT_AVAIL = 63,
+	CCERR_NET_DOWN = 64,
+	CCERR_NET_UNREACHABLE = 65,
+	CCERR_CONN_ABORTED = 66,
+	CCERR_CONN_RESET = 67,
+	CCERR_NO_BUFS = 68,
+	CCERR_CONN_TIMEDOUT = 69,
+	CCERR_CONN_REFUSED = 70,
+	CCERR_HOST_UNREACHABLE = 71,
+	CCERR_INVALID_SEND_SGL_DEPTH = 72,
+	CCERR_INVALID_RECV_SGL_DEPTH = 73,
+	CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
+	CCERR_INSUFFICIENT_PRIVILEGES = 75,
+	CCERR_STACK_ERROR = 76,
+	CCERR_INVALID_VERSION = 77,
+	CCERR_INVALID_MTU = 78,
+	CCERR_INVALID_IMAGE = 79,
+	CCERR_PENDING = 98,	/* not an error; user internally by adapter */
+	CCERR_DEFER = 99,	/* not an error; used internally by adapter */
+	CCERR_FAILED_WRITE = 100,
+	CCERR_FAILED_ERASE = 101,
+	CCERR_FAILED_VERIFICATION = 102,
+	CCERR_NOT_FOUND = 103,
+
+};
+
+/*
+ * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
+ */
+enum c2_connect_status {
+	C2_CONN_STATUS_SUCCESS = C2_OK,
+	C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
+	C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
+	C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
+	C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
+	C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
+	C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
+	C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
+	C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
+	C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
+	C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
+};
+
+/*
+ * Flash programming status codes.
+ */
+enum c2_flash_status {
+	C2_FLASH_STATUS_SUCCESS = 0x0000,
+	C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
+	C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
+	C2_FLASH_STATUS_ECLBS = 0x0400,
+	C2_FLASH_STATUS_PSLBS = 0x0800,
+	C2_FLASH_STATUS_VPENS = 0x1000,
+};
+
+#endif				/* _C2_STATUS_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_user.h b/drivers/infiniband/hw/amso1100/c2_user.h
new file mode 100644
index 0000000000000..7e9e7ad654670
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_user.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_USER_H
+#define C2_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct c2_alloc_ucontext_resp {
+	__u32 qp_tab_size;
+	__u32 uarc_size;
+};
+
+struct c2_alloc_pd_resp {
+	__u32 pdn;
+	__u32 reserved;
+};
+
+struct c2_create_cq {
+	__u32 lkey;
+	__u32 pdn;
+	__u64 arm_db_page;
+	__u64 set_db_page;
+	__u32 arm_db_index;
+	__u32 set_db_index;
+};
+
+struct c2_create_cq_resp {
+	__u32 cqn;
+	__u32 reserved;
+};
+
+struct c2_create_qp {
+	__u32 lkey;
+	__u32 reserved;
+	__u64 sq_db_page;
+	__u64 rq_db_page;
+	__u32 sq_db_index;
+	__u32 rq_db_index;
+};
+
+#endif				/* C2_USER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c
new file mode 100644
index 0000000000000..40caeb5f41b46
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_vq.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "c2_vq.h"
+#include "c2_provider.h"
+
+/*
+ * Verbs Request Objects:
+ *
+ * VQ Request Objects are allocated by the kernel verbs handlers.
+ * They contain a wait object, a refcnt, an atomic bool indicating that the
+ * adapter has replied, and a copy of the verb reply work request.
+ * A pointer to the VQ Request Object is passed down in the context
+ * field of the work request message, and reflected back by the adapter
+ * in the verbs reply message.  The function handle_vq() in the interrupt
+ * path will use this pointer to:
+ * 	1) append a copy of the verbs reply message
+ * 	2) mark that the reply is ready
+ * 	3) wake up the kernel verbs handler blocked awaiting the reply.
+ *
+ *
+ * The kernel verbs handlers do a "get" to put a 2nd reference on the
+ * VQ Request object.  If the kernel verbs handler exits before the adapter
+ * can respond, this extra reference will keep the VQ Request object around
+ * until the adapter's reply can be processed.  The reason we need this is
+ * because a pointer to this object is stuffed into the context field of
+ * the verbs work request message, and reflected back in the reply message.
+ * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
+ * kernel verb handler that is blocked awaiting the verb reply.
+ * So handle_vq() will do a "put" on the object when it's done accessing it.
+ * NOTE:  If we guarantee that the kernel verb handler will never bail before
+ *        getting the reply, then we don't need these refcnts.
+ *
+ *
+ * VQ Request objects are freed by the kernel verbs handlers only
+ * after the verb has been processed, or when the adapter fails and
+ * does not reply.
+ *
+ *
+ * Verbs Reply Buffers:
+ *
+ * VQ Reply bufs are local host memory copies of a
+ * outstanding Verb Request reply
+ * message.  The are always allocated by the kernel verbs handlers, and _may_ be
+ * freed by either the kernel verbs handler -or- the interrupt handler.  The
+ * kernel verbs handler _must_ free the repbuf, then free the vq request object
+ * in that order.
+ */
+
+int vq_init(struct c2_dev *c2dev)
+{
+	sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
+		(char) ('0' + c2dev->devnum));
+	c2dev->host_msg_cache =
+	    kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
+			      SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (c2dev->host_msg_cache == NULL) {
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void vq_term(struct c2_dev *c2dev)
+{
+	kmem_cache_destroy(c2dev->host_msg_cache);
+}
+
+/* vq_req_alloc - allocate a VQ Request Object and initialize it.
+ * The refcnt is set to 1.
+ */
+struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *r;
+
+	r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
+	if (r) {
+		init_waitqueue_head(&r->wait_object);
+		r->reply_msg = (u64) NULL;
+		r->event = 0;
+		r->cm_id = NULL;
+		r->qp = NULL;
+		atomic_set(&r->refcnt, 1);
+		atomic_set(&r->reply_ready, 0);
+	}
+	return r;
+}
+
+
+/* vq_req_free - free the VQ Request Object.  It is assumed the verbs handler
+ * has already free the VQ Reply Buffer if it existed.
+ */
+void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	r->reply_msg = (u64) NULL;
+	if (atomic_dec_and_test(&r->refcnt)) {
+		kfree(r);
+	}
+}
+
+/* vq_req_get - reference a VQ Request Object.  Done
+ * only in the kernel verbs handlers.
+ */
+void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	atomic_inc(&r->refcnt);
+}
+
+
+/* vq_req_put - dereference and potentially free a VQ Request Object.
+ *
+ * This is only called by handle_vq() on the
+ * interrupt when it is done processing
+ * a verb reply message.  If the associated
+ * kernel verbs handler has already bailed,
+ * then this put will actually free the VQ
+ * Request object _and_ the VQ Reply Buffer
+ * if it exists.
+ */
+void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	if (atomic_dec_and_test(&r->refcnt)) {
+		if (r->reply_msg != (u64) NULL)
+			vq_repbuf_free(c2dev,
+				       (void *) (unsigned long) r->reply_msg);
+		kfree(r);
+	}
+}
+
+
+/*
+ * vq_repbuf_alloc - allocate a VQ Reply Buffer.
+ */
+void *vq_repbuf_alloc(struct c2_dev *c2dev)
+{
+	return kmem_cache_alloc(c2dev->host_msg_cache, SLAB_ATOMIC);
+}
+
+/*
+ * vq_send_wr - post a verbs request message to the Verbs Request Queue.
+ * If a message is not available in the MQ, then block until one is available.
+ * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
+ * When the adapter drains the Verbs Request Queue,
+ * it inserts MQ index 0 in to the
+ * adapter->host activity fifo and interrupts the host.
+ */
+int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
+{
+	void *msg;
+	wait_queue_t __wait;
+
+	/*
+	 * grab adapter vq lock
+	 */
+	spin_lock(&c2dev->vqlock);
+
+	/*
+	 * allocate msg
+	 */
+	msg = c2_mq_alloc(&c2dev->req_vq);
+
+	/*
+	 * If we cannot get a msg, then we'll wait
+	 * When a messages are available, the int handler will wake_up()
+	 * any waiters.
+	 */
+	while (msg == NULL) {
+		pr_debug("%s:%d no available msg in VQ, waiting...\n",
+		       __FUNCTION__, __LINE__);
+		init_waitqueue_entry(&__wait, current);
+		add_wait_queue(&c2dev->req_vq_wo, &__wait);
+		spin_unlock(&c2dev->vqlock);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (!c2_mq_full(&c2dev->req_vq)) {
+				break;
+			}
+			if (!signal_pending(current)) {
+				schedule_timeout(1 * HZ);	/* 1 second... */
+				continue;
+			}
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+			return -EINTR;
+		}
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+		spin_lock(&c2dev->vqlock);
+		msg = c2_mq_alloc(&c2dev->req_vq);
+	}
+
+	/*
+	 * copy wr into adapter msg
+	 */
+	memcpy(msg, wr, c2dev->req_vq.msg_size);
+
+	/*
+	 * post msg
+	 */
+	c2_mq_produce(&c2dev->req_vq);
+
+	/*
+	 * release adapter vq lock
+	 */
+	spin_unlock(&c2dev->vqlock);
+	return 0;
+}
+
+
+/*
+ * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
+ */
+int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
+{
+	if (!wait_event_timeout(req->wait_object,
+				atomic_read(&req->reply_ready),
+				60*HZ))
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/*
+ * vq_repbuf_free - Free a Verbs Reply Buffer.
+ */
+void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
+{
+	kmem_cache_free(c2dev->host_msg_cache, reply);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.h b/drivers/infiniband/hw/amso1100/c2_vq.h
new file mode 100644
index 0000000000000..33805627a6074
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_vq.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_VQ_H_
+#define _C2_VQ_H_
+#include <linux/sched.h>
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_provider.h"
+
+struct c2_vq_req {
+	u64 reply_msg;		/* ptr to reply msg */
+	wait_queue_head_t wait_object;	/* wait object for vq reqs */
+	atomic_t reply_ready;	/* set when reply is ready */
+	atomic_t refcnt;	/* used to cancel WRs... */
+	int event;
+	struct iw_cm_id *cm_id;
+	struct c2_qp *qp;
+};
+
+extern int vq_init(struct c2_dev *c2dev);
+extern void vq_term(struct c2_dev *c2dev);
+
+extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
+extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
+
+extern void *vq_repbuf_alloc(struct c2_dev *c2dev);
+extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
+
+extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
+#endif				/* _C2_VQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h
new file mode 100644
index 0000000000000..3ec6c43bb0efb
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_wr.h
@@ -0,0 +1,1520 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_WR_H_
+#define _C2_WR_H_
+
+#ifdef CCDEBUG
+#define CCWR_MAGIC		0xb07700b0
+#endif
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+/* Maximum allowed size in bytes of private_data exchange
+ * on connect.
+ */
+#define C2_MAX_PRIVATE_DATA_SIZE 200
+
+/*
+ * These types are shared among the adapter, host, and CCIL consumer.
+ */
+enum c2_cq_notification_type {
+	C2_CQ_NOTIFICATION_TYPE_NONE = 1,
+	C2_CQ_NOTIFICATION_TYPE_NEXT,
+	C2_CQ_NOTIFICATION_TYPE_NEXT_SE
+};
+
+enum c2_setconfig_cmd {
+	C2_CFG_ADD_ADDR = 1,
+	C2_CFG_DEL_ADDR = 2,
+	C2_CFG_ADD_ROUTE = 3,
+	C2_CFG_DEL_ROUTE = 4
+};
+
+enum c2_getconfig_cmd {
+	C2_GETCONFIG_ROUTES = 1,
+	C2_GETCONFIG_ADDRS
+};
+
+/*
+ *  CCIL Work Request Identifiers
+ */
+enum c2wr_ids {
+	CCWR_RNIC_OPEN = 1,
+	CCWR_RNIC_QUERY,
+	CCWR_RNIC_SETCONFIG,
+	CCWR_RNIC_GETCONFIG,
+	CCWR_RNIC_CLOSE,
+	CCWR_CQ_CREATE,
+	CCWR_CQ_QUERY,
+	CCWR_CQ_MODIFY,
+	CCWR_CQ_DESTROY,
+	CCWR_QP_CONNECT,
+	CCWR_PD_ALLOC,
+	CCWR_PD_DEALLOC,
+	CCWR_SRQ_CREATE,
+	CCWR_SRQ_QUERY,
+	CCWR_SRQ_MODIFY,
+	CCWR_SRQ_DESTROY,
+	CCWR_QP_CREATE,
+	CCWR_QP_QUERY,
+	CCWR_QP_MODIFY,
+	CCWR_QP_DESTROY,
+	CCWR_NSMR_STAG_ALLOC,
+	CCWR_NSMR_REGISTER,
+	CCWR_NSMR_PBL,
+	CCWR_STAG_DEALLOC,
+	CCWR_NSMR_REREGISTER,
+	CCWR_SMR_REGISTER,
+	CCWR_MR_QUERY,
+	CCWR_MW_ALLOC,
+	CCWR_MW_QUERY,
+	CCWR_EP_CREATE,
+	CCWR_EP_GETOPT,
+	CCWR_EP_SETOPT,
+	CCWR_EP_DESTROY,
+	CCWR_EP_BIND,
+	CCWR_EP_CONNECT,
+	CCWR_EP_LISTEN,
+	CCWR_EP_SHUTDOWN,
+	CCWR_EP_LISTEN_CREATE,
+	CCWR_EP_LISTEN_DESTROY,
+	CCWR_EP_QUERY,
+	CCWR_CR_ACCEPT,
+	CCWR_CR_REJECT,
+	CCWR_CONSOLE,
+	CCWR_TERM,
+	CCWR_FLASH_INIT,
+	CCWR_FLASH,
+	CCWR_BUF_ALLOC,
+	CCWR_BUF_FREE,
+	CCWR_FLASH_WRITE,
+	CCWR_INIT,		/* WARNING: Don't move this ever again! */
+
+
+
+	/* Add new IDs here */
+
+
+
+	/*
+	 * WARNING: CCWR_LAST must always be the last verbs id defined!
+	 *          All the preceding IDs are fixed, and must not change.
+	 *          You can add new IDs, but must not remove or reorder
+	 *          any IDs. If you do, YOU will ruin any hope of
+	 *          compatability between versions.
+	 */
+	CCWR_LAST,
+
+	/*
+	 * Start over at 1 so that arrays indexed by user wr id's
+	 * begin at 1.  This is OK since the verbs and user wr id's
+	 * are always used on disjoint sets of queues.
+	 */
+	/*
+	 * The order of the CCWR_SEND_XX verbs must
+	 * match the order of the RDMA_OPs
+	 */
+	CCWR_SEND = 1,
+	CCWR_SEND_INV,
+	CCWR_SEND_SE,
+	CCWR_SEND_SE_INV,
+	CCWR_RDMA_WRITE,
+	CCWR_RDMA_READ,
+	CCWR_RDMA_READ_INV,
+	CCWR_MW_BIND,
+	CCWR_NSMR_FASTREG,
+	CCWR_STAG_INVALIDATE,
+	CCWR_RECV,
+	CCWR_NOP,
+	CCWR_UNIMPL,
+/* WARNING: This must always be the last user wr id defined! */
+};
+#define RDMA_SEND_OPCODE_FROM_WR_ID(x)   (x+2)
+
+/*
+ * SQ/RQ Work Request Types
+ */
+enum c2_wr_type {
+	C2_WR_TYPE_SEND = CCWR_SEND,
+	C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
+	C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
+	C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
+	C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
+	C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
+	C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
+	C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
+	C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
+	C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
+	C2_WR_TYPE_RECV = CCWR_RECV,
+	C2_WR_TYPE_NOP = CCWR_NOP,
+};
+
+struct c2_netaddr {
+	u32 ip_addr;
+	u32 netmask;
+	u32 mtu;
+};
+
+struct c2_route {
+	u32 ip_addr;		/* 0 indicates the default route */
+	u32 netmask;		/* netmask associated with dst */
+	u32 flags;
+	union {
+		u32 ipaddr;	/* address of the nexthop interface */
+		u8 enaddr[6];
+	} nexthop;
+};
+
+/*
+ * A Scatter Gather Entry.
+ */
+struct c2_data_addr {
+	u32 stag;
+	u32 length;
+	u64 to;
+};
+
+/*
+ * MR and MW flags used by the consumer, RI, and RNIC.
+ */
+enum c2_mm_flags {
+	MEM_REMOTE = 0x0001,	/* allow mw binds with remote access. */
+	MEM_VA_BASED = 0x0002,	/* Not Zero-based */
+	MEM_PBL_COMPLETE = 0x0004,	/* PBL array is complete in this msg */
+	MEM_LOCAL_READ = 0x0008,	/* allow local reads */
+	MEM_LOCAL_WRITE = 0x0010,	/* allow local writes */
+	MEM_REMOTE_READ = 0x0020,	/* allow remote reads */
+	MEM_REMOTE_WRITE = 0x0040,	/* allow remote writes */
+	MEM_WINDOW_BIND = 0x0080,	/* binds allowed */
+	MEM_SHARED = 0x0100,	/* set if MR is shared */
+	MEM_STAG_VALID = 0x0200	/* set if STAG is in valid state */
+};
+
+/*
+ * CCIL API ACF flags defined in terms of the low level mem flags.
+ * This minimizes translation needed in the user API
+ */
+enum c2_acf {
+	C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
+	C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
+	C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
+	C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
+	C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
+};
+
+/*
+ * Image types of objects written to flash
+ */
+#define C2_FLASH_IMG_BITFILE 1
+#define C2_FLASH_IMG_OPTION_ROM 2
+#define C2_FLASH_IMG_VPD 3
+
+/*
+ *  to fix bug 1815 we define the max size allowable of the
+ *  terminate message (per the IETF spec).Refer to the IETF
+ *  protocal specification, section 12.1.6, page 64)
+ *  The message is prefixed by 20 types of DDP info.
+ *
+ *  Then the message has 6 bytes for the terminate control
+ *  and DDP segment length info plus a DDP header (either
+ *  14 or 18 byts) plus 28 bytes for the RDMA header.
+ *  Thus the max size in:
+ *  20 + (6 + 18 + 28) = 72
+ */
+#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
+
+/*
+ * Build String Length.  It must be the same as C2_BUILD_STR_LEN in ccil_api.h
+ */
+#define WR_BUILD_STR_LEN 64
+
+/*
+ * WARNING:  All of these structs need to align any 64bit types on
+ * 64 bit boundaries!  64bit types include u64 and u64.
+ */
+
+/*
+ * Clustercore Work Request Header.  Be sensitive to field layout
+ * and alignment.
+ */
+struct c2wr_hdr {
+	/* wqe_count is part of the cqe.  It is put here so the
+	 * adapter can write to it while the wr is pending without
+	 * clobbering part of the wr.  This word need not be dma'd
+	 * from the host to adapter by libccil, but we copy it anyway
+	 * to make the memcpy to the adapter better aligned.
+	 */
+	u32 wqe_count;
+
+	/* Put these fields next so that later 32- and 64-bit
+	 * quantities are naturally aligned.
+	 */
+	u8 id;
+	u8 result;		/* adapter -> host */
+	u8 sge_count;		/* host -> adapter */
+	u8 flags;		/* host -> adapter */
+
+	u64 context;
+#ifdef CCMSGMAGIC
+	u32 magic;
+	u32 pad;
+#endif
+} __attribute__((packed));
+
+/*
+ *------------------------ RNIC ------------------------
+ */
+
+/*
+ * WR_RNIC_OPEN
+ */
+
+/*
+ * Flags for the RNIC WRs
+ */
+enum c2_rnic_flags {
+	RNIC_IRD_STATIC = 0x0001,
+	RNIC_ORD_STATIC = 0x0002,
+	RNIC_QP_STATIC = 0x0004,
+	RNIC_SRQ_SUPPORTED = 0x0008,
+	RNIC_PBL_BLOCK_MODE = 0x0010,
+	RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
+	RNIC_CQ_OVF_DETECTED = 0x0040,
+	RNIC_PRIV_MODE = 0x0080
+};
+
+struct c2wr_rnic_open_req {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u16 flags;		/* See enum c2_rnic_flags */
+	u16 port_num;
+} __attribute__((packed));
+
+struct c2wr_rnic_open_rep {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+union c2wr_rnic_open {
+	struct c2wr_rnic_open_req req;
+	struct c2wr_rnic_open_rep rep;
+} __attribute__((packed));
+
+struct c2wr_rnic_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_QUERY
+ */
+struct c2wr_rnic_query_rep {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u32 vendor_id;
+	u32 part_number;
+	u32 hw_version;
+	u32 fw_ver_major;
+	u32 fw_ver_minor;
+	u32 fw_ver_patch;
+	char fw_ver_build_str[WR_BUILD_STR_LEN];
+	u32 max_qps;
+	u32 max_qp_depth;
+	u32 max_srq_depth;
+	u32 max_send_sgl_depth;
+	u32 max_rdma_sgl_depth;
+	u32 max_cqs;
+	u32 max_cq_depth;
+	u32 max_cq_event_handlers;
+	u32 max_mrs;
+	u32 max_pbl_depth;
+	u32 max_pds;
+	u32 max_global_ird;
+	u32 max_global_ord;
+	u32 max_qp_ird;
+	u32 max_qp_ord;
+	u32 flags;
+	u32 max_mws;
+	u32 pbe_range_low;
+	u32 pbe_range_high;
+	u32 max_srqs;
+	u32 page_size;
+} __attribute__((packed));
+
+union c2wr_rnic_query {
+	struct c2wr_rnic_query_req req;
+	struct c2wr_rnic_query_rep rep;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_GETCONFIG
+ */
+
+struct c2wr_rnic_getconfig_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 option;		/* see c2_getconfig_cmd_t */
+	u64 reply_buf;
+	u32 reply_buf_len;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_getconfig_rep {
+	struct c2wr_hdr hdr;
+	u32 option;		/* see c2_getconfig_cmd_t */
+	u32 count_len;		/* length of the number of addresses configured */
+} __attribute__((packed)) ;
+
+union c2wr_rnic_getconfig {
+	struct c2wr_rnic_getconfig_req req;
+	struct c2wr_rnic_getconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_SETCONFIG
+ */
+struct c2wr_rnic_setconfig_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 option;		/* See c2_setconfig_cmd_t */
+	/* variable data and pad. See c2_netaddr and c2_route */
+	u8 data[0];
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_setconfig_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_setconfig {
+	struct c2wr_rnic_setconfig_req req;
+	struct c2wr_rnic_setconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_CLOSE
+ */
+struct c2wr_rnic_close_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_close_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_close {
+	struct c2wr_rnic_close_req req;
+	struct c2wr_rnic_close_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ CQ ------------------------
+ */
+struct c2wr_cq_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_ht;
+	u64 user_context;
+	u64 msg_pool;
+	u32 rnic_handle;
+	u32 msg_size;
+	u32 depth;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_create_rep {
+	struct c2wr_hdr hdr;
+	u32 mq_index;
+	u32 adapter_shared;
+	u32 cq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_cq_create {
+	struct c2wr_cq_create_req req;
+	struct c2wr_cq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 cq_handle;
+	u32 new_depth;
+	u64 new_msg_pool;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_modify {
+	struct c2wr_cq_modify_req req;
+	struct c2wr_cq_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 cq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_destroy {
+	struct c2wr_cq_destroy_req req;
+	struct c2wr_cq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ PD ------------------------
+ */
+struct c2wr_pd_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_alloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_alloc {
+	struct c2wr_pd_alloc_req req;
+	struct c2wr_pd_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_dealloc {
+	struct c2wr_pd_dealloc_req req;
+	struct c2wr_pd_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ SRQ ------------------------
+ */
+struct c2wr_srq_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_ht;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 srq_depth;
+	u32 srq_limit;
+	u32 sgl_depth;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_create_rep {
+	struct c2wr_hdr hdr;
+	u32 srq_depth;
+	u32 sgl_depth;
+	u32 msg_size;
+	u32 mq_index;
+	u32 mq_start;
+	u32 srq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_srq_create {
+	struct c2wr_srq_create_req req;
+	struct c2wr_srq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 srq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_srq_destroy {
+	struct c2wr_srq_destroy_req req;
+	struct c2wr_srq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ QP ------------------------
+ */
+enum c2wr_qp_flags {
+	QP_RDMA_READ = 0x00000001,	/* RDMA read enabled? */
+	QP_RDMA_WRITE = 0x00000002,	/* RDMA write enabled? */
+	QP_MW_BIND = 0x00000004,	/* MWs enabled */
+	QP_ZERO_STAG = 0x00000008,	/* enabled? */
+	QP_REMOTE_TERMINATION = 0x00000010,	/* remote end terminated */
+	QP_RDMA_READ_RESPONSE = 0x00000020	/* Remote RDMA read  */
+	    /* enabled? */
+};
+
+struct c2wr_qp_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_sq_ht;
+	u64 shared_rq_ht;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 sq_cq_handle;
+	u32 rq_cq_handle;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 srq_handle;
+	u32 srq_limit;
+	u32 flags;		/* see enum c2wr_qp_flags */
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_create_rep {
+	struct c2wr_hdr hdr;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u32 sq_msg_size;
+	u32 sq_mq_index;
+	u32 sq_mq_start;
+	u32 rq_msg_size;
+	u32 rq_mq_index;
+	u32 rq_mq_start;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+union c2wr_qp_create {
+	struct c2wr_qp_create_req req;
+	struct c2wr_qp_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_rep {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 send_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u16 qp_state;
+	u16 flags;		/* see c2wr_qp_flags_t */
+	u32 qp_id;
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+	u32 terminate_msg_length;	/* 0 if not present */
+	u8 data[0];
+	/* Terminate Message in-line here. */
+} __attribute__((packed)) ;
+
+union c2wr_qp_query {
+	struct c2wr_qp_query_req req;
+	struct c2wr_qp_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_req {
+	struct c2wr_hdr hdr;
+	u64 stream_msg;
+	u32 stream_msg_length;
+	u32 rnic_handle;
+	u32 qp_handle;
+	u32 next_qp_state;
+	u32 ord;
+	u32 ird;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 llp_ep_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_rep {
+	struct c2wr_hdr hdr;
+	u32 ord;
+	u32 ird;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 sq_msg_size;
+	u32 sq_mq_index;
+	u32 sq_mq_start;
+	u32 rq_msg_size;
+	u32 rq_mq_index;
+	u32 rq_mq_start;
+} __attribute__((packed)) ;
+
+union c2wr_qp_modify {
+	struct c2wr_qp_modify_req req;
+	struct c2wr_qp_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_qp_destroy {
+	struct c2wr_qp_destroy_req req;
+	struct c2wr_qp_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * The CCWR_QP_CONNECT msg is posted on the verbs request queue.  It can
+ * only be posted when a QP is in IDLE state.  After the connect request is
+ * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
+ * No synchronous reply from adapter to this WR.  The results of
+ * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
+ * See c2wr_ae_active_connect_results_t
+ */
+struct c2wr_qp_connect_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+	u32 remote_addr;
+	u16 remote_port;
+	u16 pad;
+	u32 private_data_length;
+	u8 private_data[0];	/* Private data in-line. */
+} __attribute__((packed)) ;
+
+struct c2wr_qp_connect {
+	struct c2wr_qp_connect_req req;
+	/* no synchronous reply.         */
+} __attribute__((packed)) ;
+
+
+/*
+ *------------------------ MM ------------------------
+ */
+
+struct c2wr_nsmr_stag_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pbl_depth;
+	u32 pd_id;
+	u32 flags;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_stag_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_stag_alloc {
+	struct c2wr_nsmr_stag_alloc_req req;
+	struct c2wr_nsmr_stag_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 pd_id;
+	u32 pbl_depth;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_register {
+	struct c2wr_nsmr_register_req req;
+	struct c2wr_nsmr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 flags;
+	u32 stag_index;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_pbl {
+	struct c2wr_nsmr_pbl_req req;
+	struct c2wr_nsmr_pbl_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_rep {
+	struct c2wr_hdr hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 pd_id;
+	u32 flags;
+	u32 pbl_depth;
+} __attribute__((packed)) ;
+
+union c2wr_mr_query {
+	struct c2wr_mr_query_req req;
+	struct c2wr_mr_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_rep {
+	struct c2wr_hdr hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 pd_id;
+	u32 flags;
+} __attribute__((packed)) ;
+
+union c2wr_mw_query {
+	struct c2wr_mw_query_req req;
+	struct c2wr_mw_query_rep rep;
+} __attribute__((packed)) ;
+
+
+struct c2wr_stag_dealloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_stag_dealloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_stag_dealloc {
+	struct c2wr_stag_dealloc_req req;
+	struct c2wr_stag_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 stag_index;
+	u32 pd_id;
+	u32 pbl_depth;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	u32 pad1;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_reregister {
+	struct c2wr_nsmr_reregister_req req;
+	struct c2wr_nsmr_reregister_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 stag_index;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_rep {
+	struct c2wr_hdr hdr;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_smr_register {
+	struct c2wr_smr_register_req req;
+	struct c2wr_smr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_mw_alloc {
+	struct c2wr_mw_alloc_req req;
+	struct c2wr_mw_alloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ WRs -----------------------
+ */
+
+struct c2wr_user_hdr {
+	struct c2wr_hdr hdr;		/* Has status and WR Type */
+} __attribute__((packed)) ;
+
+enum c2_qp_state {
+	C2_QP_STATE_IDLE = 0x01,
+	C2_QP_STATE_CONNECTING = 0x02,
+	C2_QP_STATE_RTS = 0x04,
+	C2_QP_STATE_CLOSING = 0x08,
+	C2_QP_STATE_TERMINATE = 0x10,
+	C2_QP_STATE_ERROR = 0x20,
+};
+
+/* Completion queue entry. */
+struct c2wr_ce {
+	struct c2wr_hdr hdr;		/* Has status and WR Type */
+	u64 qp_user_context;	/* c2_user_qp_t * */
+	u32 qp_state;		/* Current QP State */
+	u32 handle;		/* QPID or EP Handle */
+	u32 bytes_rcvd;		/* valid for RECV WCs */
+	u32 stag;
+} __attribute__((packed)) ;
+
+
+/*
+ * Flags used for all post-sq WRs.  These must fit in the flags
+ * field of the struct c2wr_hdr (eight bits).
+ */
+enum {
+	SQ_SIGNALED = 0x01,
+	SQ_READ_FENCE = 0x02,
+	SQ_FENCE = 0x04,
+};
+
+/*
+ * Common fields for all post-sq WRs.  Namely the standard header and a
+ * secondary header with fields common to all post-sq WRs.
+ */
+struct c2_sq_hdr {
+	struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * Same as above but for post-rq WRs.
+ */
+struct c2_rq_hdr {
+	struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * use the same struct for all sends.
+ */
+struct c2wr_send_req {
+	struct c2_sq_hdr sq_hdr;
+	u32 sge_len;
+	u32 remote_stag;
+	u8 data[0];		/* SGE array */
+} __attribute__((packed));
+
+union c2wr_send {
+	struct c2wr_send_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_write_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 remote_to;
+	u32 remote_stag;
+	u32 sge_len;
+	u8 data[0];		/* SGE array */
+} __attribute__((packed));
+
+union c2wr_rdma_write {
+	struct c2wr_rdma_write_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_read_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 local_to;
+	u64 remote_to;
+	u32 local_stag;
+	u32 remote_stag;
+	u32 length;
+} __attribute__((packed));
+
+union c2wr_rdma_read {
+	struct c2wr_rdma_read_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_mw_bind_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 va;
+	u8 stag_key;
+	u8 pad[3];
+	u32 mw_stag_index;
+	u32 mr_stag_index;
+	u32 length;
+	u32 flags;
+} __attribute__((packed));
+
+union c2wr_mw_bind {
+	struct c2wr_mw_bind_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_nsmr_fastreg_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 va;
+	u8 stag_key;
+	u8 pad[3];
+	u32 stag_index;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed));
+
+union c2wr_nsmr_fastreg {
+	struct c2wr_nsmr_fastreg_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_stag_invalidate_req {
+	struct c2_sq_hdr sq_hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 stag_index;
+} __attribute__((packed));
+
+union c2wr_stag_invalidate {
+	struct c2wr_stag_invalidate_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+union c2wr_sqwr {
+	struct c2_sq_hdr sq_hdr;
+	struct c2wr_send_req send;
+	struct c2wr_send_req send_se;
+	struct c2wr_send_req send_inv;
+	struct c2wr_send_req send_se_inv;
+	struct c2wr_rdma_write_req rdma_write;
+	struct c2wr_rdma_read_req rdma_read;
+	struct c2wr_mw_bind_req mw_bind;
+	struct c2wr_nsmr_fastreg_req nsmr_fastreg;
+	struct c2wr_stag_invalidate_req stag_inv;
+} __attribute__((packed));
+
+
+/*
+ * RQ WRs
+ */
+struct c2wr_rqwr {
+	struct c2_rq_hdr rq_hdr;
+	u8 data[0];		/* array of SGEs */
+} __attribute__((packed));
+
+union c2wr_recv {
+	struct c2wr_rqwr req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+/*
+ * All AEs start with this header.  Most AEs only need to convey the
+ * information in the header.  Some, like LLP connection events, need
+ * more info.  The union typdef c2wr_ae_t has all the possible AEs.
+ *
+ * hdr.context is the user_context from the rnic_open WR.  NULL If this
+ * is not affiliated with an rnic
+ *
+ * hdr.id is the AE identifier (eg;  CCAE_REMOTE_SHUTDOWN,
+ * CCAE_LLP_CLOSE_COMPLETE)
+ *
+ * resource_type is one of:  C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
+ *
+ * user_context is the context passed down when the host created the resource.
+ */
+struct c2wr_ae_hdr {
+	struct c2wr_hdr hdr;
+	u64 user_context;	/* user context for this res. */
+	u32 resource_type;	/* see enum c2_resource_indicator */
+	u32 resource;		/* handle for resource */
+	u32 qp_state;		/* current QP State */
+} __attribute__((packed));
+
+/*
+ * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
+ * the adapter moves the QP into RTS state
+ */
+struct c2wr_ae_active_connect_results {
+	struct c2wr_ae_hdr ae_hdr;
+	u32 laddr;
+	u32 raddr;
+	u16 lport;
+	u16 rport;
+	u32 private_data_length;
+	u8 private_data[0];	/* data is in-line in the msg. */
+} __attribute__((packed));
+
+/*
+ * When connections are established by the stack (and the private data
+ * MPA frame is received), the adapter will generate an event to the host.
+ * The details of the connection, any private data, and the new connection
+ * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
+ * AE queue:
+ */
+struct c2wr_ae_connection_request {
+	struct c2wr_ae_hdr ae_hdr;
+	u32 cr_handle;		/* connreq handle (sock ptr) */
+	u32 laddr;
+	u32 raddr;
+	u16 lport;
+	u16 rport;
+	u32 private_data_length;
+	u8 private_data[0];	/* data is in-line in the msg. */
+} __attribute__((packed));
+
+union c2wr_ae {
+	struct c2wr_ae_hdr ae_generic;
+	struct c2wr_ae_active_connect_results ae_active_connect_results;
+	struct c2wr_ae_connection_request ae_connection_request;
+} __attribute__((packed));
+
+struct c2wr_init_req {
+	struct c2wr_hdr hdr;
+	u64 hint_count;
+	u64 q0_host_shared;
+	u64 q1_host_shared;
+	u64 q1_host_msg_pool;
+	u64 q2_host_shared;
+	u64 q2_host_msg_pool;
+} __attribute__((packed));
+
+struct c2wr_init_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_init {
+	struct c2wr_init_req req;
+	struct c2wr_init_rep rep;
+} __attribute__((packed));
+
+/*
+ * For upgrading flash.
+ */
+
+struct c2wr_flash_init_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+struct c2wr_flash_init_rep {
+	struct c2wr_hdr hdr;
+	u32 adapter_flash_buf_offset;
+	u32 adapter_flash_len;
+} __attribute__((packed));
+
+union c2wr_flash_init {
+	struct c2wr_flash_init_req req;
+	struct c2wr_flash_init_rep rep;
+} __attribute__((packed));
+
+struct c2wr_flash_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 len;
+} __attribute__((packed));
+
+struct c2wr_flash_rep {
+	struct c2wr_hdr hdr;
+	u32 status;
+} __attribute__((packed));
+
+union c2wr_flash {
+	struct c2wr_flash_req req;
+	struct c2wr_flash_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 size;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 offset;		/* 0 if mem not available */
+	u32 size;		/* 0 if mem not available */
+} __attribute__((packed));
+
+union c2wr_buf_alloc {
+	struct c2wr_buf_alloc_req req;
+	struct c2wr_buf_alloc_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_free_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 offset;		/* Must match value from alloc */
+	u32 size;		/* Must match value from alloc */
+} __attribute__((packed));
+
+struct c2wr_buf_free_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_buf_free {
+	struct c2wr_buf_free_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_flash_write_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 offset;
+	u32 size;
+	u32 type;
+	u32 flags;
+} __attribute__((packed));
+
+struct c2wr_flash_write_rep {
+	struct c2wr_hdr hdr;
+	u32 status;
+} __attribute__((packed));
+
+union c2wr_flash_write {
+	struct c2wr_flash_write_req req;
+	struct c2wr_flash_write_rep rep;
+} __attribute__((packed));
+
+/*
+ * Messages for LLP connection setup.
+ */
+
+/*
+ * Listen Request.  This allocates a listening endpoint to allow passive
+ * connection setup.  Newly established LLP connections are passed up
+ * via an AE.  See c2wr_ae_connection_request_t
+ */
+struct c2wr_ep_listen_create_req {
+	struct c2wr_hdr hdr;
+	u64 user_context;	/* returned in AEs. */
+	u32 rnic_handle;
+	u32 local_addr;		/* local addr, or 0  */
+	u16 local_port;		/* 0 means "pick one" */
+	u16 pad;
+	u32 backlog;		/* tradional tcp listen bl */
+} __attribute__((packed));
+
+struct c2wr_ep_listen_create_rep {
+	struct c2wr_hdr hdr;
+	u32 ep_handle;		/* handle to new listening ep */
+	u16 local_port;		/* resulting port... */
+	u16 pad;
+} __attribute__((packed));
+
+union c2wr_ep_listen_create {
+	struct c2wr_ep_listen_create_req req;
+	struct c2wr_ep_listen_create_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_ep_listen_destroy {
+	struct c2wr_ep_listen_destroy_req req;
+	struct c2wr_ep_listen_destroy_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_query_rep {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+} __attribute__((packed));
+
+union c2wr_ep_query {
+	struct c2wr_ep_query_req req;
+	struct c2wr_ep_query_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * The host passes this down to indicate acceptance of a pending iWARP
+ * connection.  The cr_handle was obtained from the CONNECTION_REQUEST
+ * AE passed up by the adapter.  See c2wr_ae_connection_request_t.
+ */
+struct c2wr_cr_accept_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;		/* QP to bind to this LLP conn */
+	u32 ep_handle;		/* LLP  handle to accept */
+	u32 private_data_length;
+	u8 private_data[0];	/* data in-line in msg. */
+} __attribute__((packed));
+
+/*
+ * adapter sends reply when private data is successfully submitted to
+ * the LLP.
+ */
+struct c2wr_cr_accept_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_accept {
+	struct c2wr_cr_accept_req req;
+	struct c2wr_cr_accept_rep rep;
+} __attribute__((packed));
+
+/*
+ * The host sends this down if a given iWARP connection request was
+ * rejected by the consumer.  The cr_handle was obtained from a
+ * previous c2wr_ae_connection_request_t AE sent by the adapter.
+ */
+struct  c2wr_cr_reject_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;		/* LLP handle to reject */
+} __attribute__((packed));
+
+/*
+ * Dunno if this is needed, but we'll add it for now.  The adapter will
+ * send the reject_reply after the LLP endpoint has been destroyed.
+ */
+struct  c2wr_cr_reject_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_reject {
+	struct c2wr_cr_reject_req req;
+	struct c2wr_cr_reject_rep rep;
+} __attribute__((packed));
+
+/*
+ * console command.  Used to implement a debug console over the verbs
+ * request and reply queues.
+ */
+
+/*
+ * Console request message.  It contains:
+ *	- message hdr with id = CCWR_CONSOLE
+ *	- the physaddr/len of host memory to be used for the reply.
+ *	- the command string.  eg:  "netstat -s" or "zoneinfo"
+ */
+struct c2wr_console_req {
+	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
+	u64 reply_buf;		/* pinned host buf for reply */
+	u32 reply_buf_len;	/* length of reply buffer */
+	u8 command[0];		/* NUL terminated ascii string */
+	/* containing the command req */
+} __attribute__((packed));
+
+/*
+ * flags used in the console reply.
+ */
+enum c2_console_flags {
+	CONS_REPLY_TRUNCATED = 0x00000001	/* reply was truncated */
+} __attribute__((packed));
+
+/*
+ * Console reply message.
+ * hdr.result contains the c2_status_t error if the reply was _not_ generated,
+ * or C2_OK if the reply was generated.
+ */
+struct c2wr_console_rep {
+	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
+	u32 flags;
+} __attribute__((packed));
+
+union c2wr_console {
+	struct c2wr_console_req req;
+	struct c2wr_console_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * Giant union with all WRs.  Makes life easier...
+ */
+union c2wr {
+	struct c2wr_hdr hdr;
+	struct c2wr_user_hdr user_hdr;
+	union c2wr_rnic_open rnic_open;
+	union c2wr_rnic_query rnic_query;
+	union c2wr_rnic_getconfig rnic_getconfig;
+	union c2wr_rnic_setconfig rnic_setconfig;
+	union c2wr_rnic_close rnic_close;
+	union c2wr_cq_create cq_create;
+	union c2wr_cq_modify cq_modify;
+	union c2wr_cq_destroy cq_destroy;
+	union c2wr_pd_alloc pd_alloc;
+	union c2wr_pd_dealloc pd_dealloc;
+	union c2wr_srq_create srq_create;
+	union c2wr_srq_destroy srq_destroy;
+	union c2wr_qp_create qp_create;
+	union c2wr_qp_query qp_query;
+	union c2wr_qp_modify qp_modify;
+	union c2wr_qp_destroy qp_destroy;
+	struct c2wr_qp_connect qp_connect;
+	union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
+	union c2wr_nsmr_register nsmr_register;
+	union c2wr_nsmr_pbl nsmr_pbl;
+	union c2wr_mr_query mr_query;
+	union c2wr_mw_query mw_query;
+	union c2wr_stag_dealloc stag_dealloc;
+	union c2wr_sqwr sqwr;
+	struct c2wr_rqwr rqwr;
+	struct c2wr_ce ce;
+	union c2wr_ae ae;
+	union c2wr_init init;
+	union c2wr_ep_listen_create ep_listen_create;
+	union c2wr_ep_listen_destroy ep_listen_destroy;
+	union c2wr_cr_accept cr_accept;
+	union c2wr_cr_reject cr_reject;
+	union c2wr_console console;
+	union c2wr_flash_init flash_init;
+	union c2wr_flash flash;
+	union c2wr_buf_alloc buf_alloc;
+	union c2wr_buf_free buf_free;
+	union c2wr_flash_write flash_write;
+} __attribute__((packed));
+
+
+/*
+ * Accessors for the wr fields that are packed together tightly to
+ * reduce the wr message size.  The wr arguments are void* so that
+ * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
+ * in the struct c2wr union can be passed in.
+ */
+static __inline__ u8 c2_wr_get_id(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->id;
+}
+static __inline__ void c2_wr_set_id(void *wr, u8 id)
+{
+	((struct c2wr_hdr *) wr)->id = id;
+}
+static __inline__ u8 c2_wr_get_result(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->result;
+}
+static __inline__ void c2_wr_set_result(void *wr, u8 result)
+{
+	((struct c2wr_hdr *) wr)->result = result;
+}
+static __inline__ u8 c2_wr_get_flags(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->flags;
+}
+static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
+{
+	((struct c2wr_hdr *) wr)->flags = flags;
+}
+static __inline__ u8 c2_wr_get_sge_count(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->sge_count;
+}
+static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
+{
+	((struct c2wr_hdr *) wr)->sge_count = sge_count;
+}
+static __inline__ u32 c2_wr_get_wqe_count(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->wqe_count;
+}
+static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
+{
+	((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
+}
+
+#endif				/* _C2_WR_H_ */
-- 
GitLab


From 61a73c708f37295892176bc911b178278df6a091 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Fri, 1 Sep 2006 15:33:55 -0700
Subject: [PATCH 0775/1063] RDMA/cma: Protect against adding device during
 destruction

Closes a window where address resolution can attach an rdma_cm_id to a
device during destruction of the rdma_cm_id.  This can result in the
rdma_cm_id remaining in the device list after its memory has been
freed.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cma.c | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e88a7c652ca01..488fa1d642a79 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -279,7 +279,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
 	default:
 		return -ENODEV;
 	}
-	mutex_lock(&lock);
+
 	list_for_each_entry(cma_dev, &dev_list, list) {
 		ret = ib_find_cached_gid(cma_dev->device, &gid,
 					 &id_priv->id.port_num, NULL);
@@ -288,7 +288,6 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
 			break;
 		}
 	}
-	mutex_unlock(&lock);
 	return ret;
 }
 
@@ -712,7 +711,9 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 	state = cma_exch(id_priv, CMA_DESTROYING);
 	cma_cancel_operation(id_priv, state);
 
+	mutex_lock(&lock);
 	if (id_priv->cma_dev) {
+		mutex_unlock(&lock);
 		switch (rdma_node_get_transport(id->device->node_type)) {
 		case RDMA_TRANSPORT_IB:
 			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
@@ -727,8 +728,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 		}
 		mutex_lock(&lock);
 		cma_detach_from_dev(id_priv);
-		mutex_unlock(&lock);
 	}
+	mutex_unlock(&lock);
 
 	cma_release_port(id_priv);
 	cma_deref_id(id_priv);
@@ -925,7 +926,9 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	}
 
 	atomic_inc(&conn_id->dev_remove);
+	mutex_lock(&lock);
 	ret = cma_acquire_dev(conn_id);
+	mutex_unlock(&lock);
 	if (ret) {
 		ret = -ENODEV;
 		cma_release_remove(conn_id);
@@ -1097,7 +1100,9 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 		goto out;
 	}
 
+	mutex_lock(&lock);
 	ret = cma_acquire_dev(conn_id);
+	mutex_unlock(&lock);
 	if (ret) {
 		cma_release_remove(conn_id);
 		rdma_destroy_id(new_cm_id);
@@ -1507,16 +1512,26 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 	enum rdma_cm_event_type event;
 
 	atomic_inc(&id_priv->dev_remove);
-	if (!id_priv->cma_dev && !status)
+
+	/*
+	 * Grab mutex to block rdma_destroy_id() from removing the device while
+	 * we're trying to acquire it.
+	 */
+	mutex_lock(&lock);
+	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
+		mutex_unlock(&lock);
+		goto out;
+	}
+
+	if (!status && !id_priv->cma_dev)
 		status = cma_acquire_dev(id_priv);
+	mutex_unlock(&lock);
 
 	if (status) {
-		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND))
+		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
 			goto out;
 		event = RDMA_CM_EVENT_ADDR_ERROR;
 	} else {
-		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
-			goto out;
 		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
 		       ip_addr_size(src_addr));
 		event = RDMA_CM_EVENT_ADDR_RESOLVED;
@@ -1740,8 +1755,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
 	if (!cma_any_addr(addr)) {
 		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
-		if (!ret)
+		if (!ret) {
+			mutex_lock(&lock);
 			ret = cma_acquire_dev(id_priv);
+			mutex_unlock(&lock);
+		}
 		if (ret)
 			goto err;
 	}
-- 
GitLab


From 777a71dd4d901f055967ddbd038d2a74ffce0eb8 Mon Sep 17 00:00:00 2001
From: Erez Zilber <erezz@voltaire.com>
Date: Mon, 11 Sep 2006 12:19:17 +0300
Subject: [PATCH 0776/1063] IB/iser: fix a check of SG alignment for RDMA

dma mapping may include a "compaction" of the sg associated with scsi command.
Hence, the size of the maximal prefix of the SG which is aligned for rdma must be
compared against the length of the dma mapped sg (mem->dma_nents) and not against
the size of it before it was mapped (mem->size).

Signed-off-by: Erez Zilber <erezz@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/iser/iser_memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 31950a522a1c8..53af9567632ea 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -378,7 +378,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
 	regd_buf = &iser_ctask->rdma_regd[cmd_dir];
 
 	aligned_len = iser_data_buf_aligned_len(mem);
-	if (aligned_len != mem->size) {
+	if (aligned_len != mem->dma_nents) {
 		iser_err("rdma alignment violation %d/%d aligned\n",
 			 aligned_len, mem->size);
 		iser_data_buf_dump(mem);
-- 
GitLab


From 8072ec2f8f6790df91e85d833e672c9c30a7ab3c Mon Sep 17 00:00:00 2001
From: Erez Zilber <erezz@voltaire.com>
Date: Mon, 11 Sep 2006 12:20:54 +0300
Subject: [PATCH 0777/1063] IB/iser: Limit the max size of a scsi command

Currently, the data length of a command coming down from scsi-ml
is limited only by the size of its sg list (sg_tablesize). The
max data length may be different for different page size values.
By setting max_sectors, we limit the data length to
max_sectors*512 bytes.

Signed-off-by: Erez Zilber <erezz@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 1437d7ee3b190..e9cf1a9f1e1cd 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -555,6 +555,7 @@ static struct scsi_host_template iscsi_iser_sht = {
 	.queuecommand           = iscsi_queuecommand,
 	.can_queue		= ISCSI_XMIT_CMDS_MAX - 1,
 	.sg_tablesize           = ISCSI_ISER_SG_TABLESIZE,
+	.max_sectors		= 1024,
 	.cmd_per_lun            = ISCSI_MAX_CMD_PER_LUN,
 	.eh_abort_handler       = iscsi_eh_abort,
 	.eh_host_reset_handler	= iscsi_eh_host_reset,
-- 
GitLab


From 8dfa0876d3dde5f9c1818a4c35caaabc3ddba78b Mon Sep 17 00:00:00 2001
From: Erez Zilber <erezz@voltaire.com>
Date: Mon, 11 Sep 2006 12:22:30 +0300
Subject: [PATCH 0778/1063] IB/iser: make FMR "page size" be 4K and not
 PAGE_SIZE

As iser is able to use at most one rdma operation for the
execution of a scsi command, and registration of the sg
associated with scsi command has its restrictions, the code
checks if an sg is "aligned for rdma".

Alignment for rdma is measured in "fmr page" units whose
possible resolutions are different between HCAs and can be
smaller, equal or bigger to the system page size.

When the system page size is bigger than 4KB (eg the default
with ia64 kernels) there a bigger chance that an sg would be
aligned for rdma if the fmr page size is 4KB.

Change the code to create FMR whose pages are of size 4KB
and to take that into account when processing the sg.

Signed-off-by: Erez Zilber <erezz@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.h  |  6 ++++-
 drivers/infiniband/ulp/iser/iser_memory.c | 31 +++++++++++++++--------
 drivers/infiniband/ulp/iser/iser_verbs.c  |  4 +--
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 3350ba690cfe1..0ba02abb04140 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -82,8 +82,12 @@
 		       __func__ , ## arg);		\
 	} while (0)
 
+#define SHIFT_4K	12
+#define SIZE_4K	(1UL << SHIFT_4K)
+#define MASK_4K	(~(SIZE_4K-1))
+
 					/* support upto 512KB in one RDMA */
-#define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> PAGE_SHIFT)
+#define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> SHIFT_4K)
 #define ISCSI_ISER_MAX_LUN		256
 #define ISCSI_ISER_MAX_CMD_LEN		16
 
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 53af9567632ea..bcef0d31f7561 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -42,6 +42,7 @@
 #include "iscsi_iser.h"
 
 #define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
+
 /**
  * Decrements the reference count for the
  * registered buffer & releases it
@@ -239,7 +240,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
 	int i;
 
 	/* compute the offset of first element */
-	page_vec->offset = (u64) sg[0].offset;
+	page_vec->offset = (u64) sg[0].offset & ~MASK_4K;
 
 	for (i = 0; i < data->dma_nents; i++) {
 		total_sz += sg_dma_len(&sg[i]);
@@ -247,21 +248,30 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
 		first_addr = sg_dma_address(&sg[i]);
 		last_addr  = first_addr + sg_dma_len(&sg[i]);
 
-		start_aligned = !(first_addr & ~PAGE_MASK);
-		end_aligned   = !(last_addr  & ~PAGE_MASK);
+		start_aligned = !(first_addr & ~MASK_4K);
+		end_aligned   = !(last_addr  & ~MASK_4K);
 
 		/* continue to collect page fragments till aligned or SG ends */
 		while (!end_aligned && (i + 1 < data->dma_nents)) {
 			i++;
 			total_sz += sg_dma_len(&sg[i]);
 			last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]);
-			end_aligned = !(last_addr  & ~PAGE_MASK);
+			end_aligned = !(last_addr  & ~MASK_4K);
 		}
 
-		first_addr = first_addr & PAGE_MASK;
-
-		for (page = first_addr; page < last_addr; page += PAGE_SIZE)
-			page_vec->pages[cur_page++] = page;
+		/* handle the 1st page in the 1st DMA element */
+		if (cur_page == 0) {
+			page = first_addr & MASK_4K;
+			page_vec->pages[cur_page] = page;
+			cur_page++;
+			page += SIZE_4K;
+		} else
+			page = first_addr;
+
+		for (; page < last_addr; page += SIZE_4K) {
+			page_vec->pages[cur_page] = page;
+			cur_page++;
+		}
 
 	}
 	page_vec->data_size = total_sz;
@@ -269,8 +279,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
 	return cur_page;
 }
 
-#define MASK_4K			((1UL << 12) - 1) /* 0xFFF */
-#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & MASK_4K) == 0)
+#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)
 
 /**
  * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
@@ -352,7 +361,7 @@ static void iser_page_vec_build(struct iser_data_buf *data,
 
 	page_vec->length = page_vec_len;
 
-	if (page_vec_len * PAGE_SIZE < page_vec->data_size) {
+	if (page_vec_len * SIZE_4K < page_vec->data_size) {
 		iser_err("page_vec too short to hold this SG\n");
 		iser_data_buf_dump(data);
 		iser_dump_page_vec(page_vec);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 72febf1f8ff8d..9b27a7c26aa87 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -150,7 +150,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
 	}
 	ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
 
-	params.page_shift        = PAGE_SHIFT;
+	params.page_shift        = SHIFT_4K;
 	/* when the first/last SG element are not start/end *
 	 * page aligned, the map whould be of N+1 pages     */
 	params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
@@ -604,7 +604,7 @@ int iser_reg_page_vec(struct iser_conn     *ib_conn,
 
 	mem_reg->lkey  = mem->fmr->lkey;
 	mem_reg->rkey  = mem->fmr->rkey;
-	mem_reg->len   = page_vec->length * PAGE_SIZE;
+	mem_reg->len   = page_vec->length * SIZE_4K;
 	mem_reg->va    = io_addr;
 	mem_reg->mem_h = (void *)mem;
 
-- 
GitLab


From e981f1d4b8288072ba7cf6b7141cd4aefb404383 Mon Sep 17 00:00:00 2001
From: Erez Zilber <erezz@voltaire.com>
Date: Mon, 11 Sep 2006 12:24:00 +0300
Subject: [PATCH 0779/1063] IB/iser: fix some debug prints

fix and add some debug prints related to iser
handling of memory for rdma.

Signed-off-by: Erez Zilber <erezz@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/iser/iser_memory.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index bcef0d31f7561..8fea0bce5042c 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -329,9 +329,9 @@ static void iser_data_buf_dump(struct iser_data_buf *data)
 	struct scatterlist *sg = (struct scatterlist *)data->buf;
 	int i;
 
-	for (i = 0; i < data->size; i++)
+	for (i = 0; i < data->dma_nents; i++)
 		iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
-			 "off:%d sz:%d dma_len:%d\n",
+			 "off:0x%x sz:0x%x dma_len:0x%x\n",
 			 i, (unsigned long)sg_dma_address(&sg[i]),
 			 sg[i].page, sg[i].offset,
 			 sg[i].length,sg_dma_len(&sg[i]));
@@ -383,6 +383,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
 	struct iser_regd_buf *regd_buf;
 	int aligned_len;
 	int err;
+	int i;
 
 	regd_buf = &iser_ctask->rdma_regd[cmd_dir];
 
@@ -400,8 +401,18 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
 
 	iser_page_vec_build(mem, ib_conn->page_vec);
 	err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
-	if (err)
+	if (err) {
+		iser_data_buf_dump(mem);
+		iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents,
+			 ntoh24(iser_ctask->desc.iscsi_header.dlength));
+		iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
+			 ib_conn->page_vec->data_size, ib_conn->page_vec->length,
+			 ib_conn->page_vec->offset);
+		for (i=0 ; i<ib_conn->page_vec->length ; i++) {
+			iser_err("page_vec[%d] = 0x%lx\n", i, ib_conn->page_vec->pages[i]);
+		}
 		return err;
+	}
 
 	/* take a reference on this regd buf such that it will not be released *
 	 * (eg in send dto completion) before we get the scsi response         */
-- 
GitLab


From d81110285f7f6c07a0ce8f99a5ff158a647cd649 Mon Sep 17 00:00:00 2001
From: Erez Zilber <erezz@voltaire.com>
Date: Mon, 11 Sep 2006 12:26:33 +0300
Subject: [PATCH 0780/1063] IB/iser: Do not use FMR for a single dma entry sg

Fast Memory Registration (fmr) is used to register for rdma an sg whose
elements are not linearly sequential after dma mapping.

The IB verbs layer provides an "all dma memory MR (memory region)" which
can be used for RDMA-ing a dma linearly sequential buffer.

Change the code to use the dma mr instead of doing fmr when dma mapping
produces a single dma entry sg.

Signed-off-by: Erez Zilber <erezz@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.h  |  1 +
 drivers/infiniband/ulp/iser/iser_memory.c | 48 ++++++++++++++++-------
 drivers/infiniband/ulp/iser/iser_verbs.c  |  6 ++-
 3 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 0ba02abb04140..7e1a411db2a30 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -175,6 +175,7 @@ struct iser_mem_reg {
 	u64  va;
 	u64  len;
 	void *mem_h;
+	int  is_fmr;
 };
 
 struct iser_regd_buf {
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 8fea0bce5042c..d0b03f4265811 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -56,7 +56,7 @@ int iser_regd_buff_release(struct iser_regd_buf *regd_buf)
 	if ((atomic_read(&regd_buf->ref_count) == 0) ||
 	    atomic_dec_and_test(&regd_buf->ref_count)) {
 		/* if we used the dma mr, unreg is just NOP */
-		if (regd_buf->reg.rkey != 0)
+		if (regd_buf->reg.is_fmr)
 			iser_unreg_mem(&regd_buf->reg);
 
 		if (regd_buf->dma_addr) {
@@ -91,9 +91,9 @@ void iser_reg_single(struct iser_device *device,
 	BUG_ON(dma_mapping_error(dma_addr));
 
 	regd_buf->reg.lkey = device->mr->lkey;
-	regd_buf->reg.rkey = 0; /* indicate there's no need to unreg */
 	regd_buf->reg.len  = regd_buf->data_size;
 	regd_buf->reg.va   = dma_addr;
+	regd_buf->reg.is_fmr = 0;
 
 	regd_buf->dma_addr  = dma_addr;
 	regd_buf->direction = direction;
@@ -379,11 +379,13 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
 		      enum   iser_data_dir        cmd_dir)
 {
 	struct iser_conn     *ib_conn = iser_ctask->iser_conn->ib_conn;
+	struct iser_device   *device = ib_conn->device;
 	struct iser_data_buf *mem = &iser_ctask->data[cmd_dir];
 	struct iser_regd_buf *regd_buf;
 	int aligned_len;
 	int err;
 	int i;
+	struct scatterlist *sg;
 
 	regd_buf = &iser_ctask->rdma_regd[cmd_dir];
 
@@ -399,19 +401,37 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
 		mem = &iser_ctask->data_copy[cmd_dir];
 	}
 
-	iser_page_vec_build(mem, ib_conn->page_vec);
-	err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
-	if (err) {
-		iser_data_buf_dump(mem);
-		iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents,
-			 ntoh24(iser_ctask->desc.iscsi_header.dlength));
-		iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
-			 ib_conn->page_vec->data_size, ib_conn->page_vec->length,
-			 ib_conn->page_vec->offset);
-		for (i=0 ; i<ib_conn->page_vec->length ; i++) {
-			iser_err("page_vec[%d] = 0x%lx\n", i, ib_conn->page_vec->pages[i]);
+	/* if there a single dma entry, FMR is not needed */
+	if (mem->dma_nents == 1) {
+		sg = (struct scatterlist *)mem->buf;
+
+		regd_buf->reg.lkey = device->mr->lkey;
+		regd_buf->reg.rkey = device->mr->rkey;
+		regd_buf->reg.len  = sg_dma_len(&sg[0]);
+		regd_buf->reg.va   = sg_dma_address(&sg[0]);
+		regd_buf->reg.is_fmr = 0;
+
+		iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
+			 "va: 0x%08lX sz: %ld]\n",
+			 (unsigned int)regd_buf->reg.lkey,
+			 (unsigned int)regd_buf->reg.rkey,
+			 (unsigned long)regd_buf->reg.va,
+			 (unsigned long)regd_buf->reg.len);
+	} else { /* use FMR for multiple dma entries */
+		iser_page_vec_build(mem, ib_conn->page_vec);
+		err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
+		if (err) {
+			iser_data_buf_dump(mem);
+			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents,
+				 ntoh24(iser_ctask->desc.iscsi_header.dlength));
+			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
+				 ib_conn->page_vec->data_size, ib_conn->page_vec->length,
+				 ib_conn->page_vec->offset);
+			for (i=0 ; i<ib_conn->page_vec->length ; i++)
+				iser_err("page_vec[%d] = 0x%llx\n", i,
+					 (unsigned long long) ib_conn->page_vec->pages[i]);
+			return err;
 		}
-		return err;
 	}
 
 	/* take a reference on this regd buf such that it will not be released *
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 9b27a7c26aa87..ecdca7fc1e4cf 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -88,8 +88,9 @@ static int iser_create_device_ib_res(struct iser_device *device)
 		     iser_cq_tasklet_fn,
 		     (unsigned long)device);
 
-	device->mr = ib_get_dma_mr(device->pd,
-				   IB_ACCESS_LOCAL_WRITE);
+	device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
+				   IB_ACCESS_REMOTE_WRITE |
+				   IB_ACCESS_REMOTE_READ);
 	if (IS_ERR(device->mr))
 		goto dma_mr_err;
 
@@ -606,6 +607,7 @@ int iser_reg_page_vec(struct iser_conn     *ib_conn,
 	mem_reg->rkey  = mem->fmr->rkey;
 	mem_reg->len   = page_vec->length * SIZE_4K;
 	mem_reg->va    = io_addr;
+	mem_reg->is_fmr = 1;
 	mem_reg->mem_h = (void *)mem;
 
 	mem_reg->va   += page_vec->offset;
-- 
GitLab


From 2439a6e65ff09729c3b4215f134dc5cd4e8a30c0 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Fri, 22 Sep 2006 15:22:52 -0700
Subject: [PATCH 0781/1063] IPoIB: Refactor completion handling

Split up ipoib_ib_handle_wc() into ipoib_ib_handle_rx_wc() and
ipoib_ib_handle_tx_wc() to make the code easier to read.  This will
also help implement NAPI in the future.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_ib.c | 188 +++++++++++++-----------
 1 file changed, 100 insertions(+), 88 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5033666b14817..722177ea069b7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -169,117 +169,129 @@ static int ipoib_ib_post_receives(struct net_device *dev)
 	return 0;
 }
 
-static void ipoib_ib_handle_wc(struct net_device *dev,
-			       struct ib_wc *wc)
+static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	unsigned int wr_id = wc->wr_id;
+	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
+	struct sk_buff *skb;
+	dma_addr_t addr;
 
-	ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
+	ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n",
 		       wr_id, wc->opcode, wc->status);
 
-	if (wr_id & IPOIB_OP_RECV) {
-		wr_id &= ~IPOIB_OP_RECV;
-
-		if (wr_id < ipoib_recvq_size) {
-			struct sk_buff *skb  = priv->rx_ring[wr_id].skb;
-			dma_addr_t      addr = priv->rx_ring[wr_id].mapping;
-
-			if (unlikely(wc->status != IB_WC_SUCCESS)) {
-				if (wc->status != IB_WC_WR_FLUSH_ERR)
-					ipoib_warn(priv, "failed recv event "
-						   "(status=%d, wrid=%d vend_err %x)\n",
-						   wc->status, wr_id, wc->vendor_err);
-				dma_unmap_single(priv->ca->dma_device, addr,
-						 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
-				dev_kfree_skb_any(skb);
-				priv->rx_ring[wr_id].skb = NULL;
-				return;
-			}
+	if (unlikely(wr_id >= ipoib_recvq_size)) {
+		ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
+			   wr_id, ipoib_recvq_size);
+		return;
+	}
 
-			/*
-			 * If we can't allocate a new RX buffer, dump
-			 * this packet and reuse the old buffer.
-			 */
-			if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
-				++priv->stats.rx_dropped;
-				goto repost;
-			}
+	skb  = priv->rx_ring[wr_id].skb;
+	addr = priv->rx_ring[wr_id].mapping;
 
-			ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
-				       wc->byte_len, wc->slid);
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			ipoib_warn(priv, "failed recv event "
+				   "(status=%d, wrid=%d vend_err %x)\n",
+				   wc->status, wr_id, wc->vendor_err);
+		dma_unmap_single(priv->ca->dma_device, addr,
+				 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+		dev_kfree_skb_any(skb);
+		priv->rx_ring[wr_id].skb = NULL;
+		return;
+	}
 
-			dma_unmap_single(priv->ca->dma_device, addr,
-					 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+	/*
+	 * If we can't allocate a new RX buffer, dump
+	 * this packet and reuse the old buffer.
+	 */
+	if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+		++priv->stats.rx_dropped;
+		goto repost;
+	}
 
-			skb_put(skb, wc->byte_len);
-			skb_pull(skb, IB_GRH_BYTES);
+	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+		       wc->byte_len, wc->slid);
 
-			if (wc->slid != priv->local_lid ||
-			    wc->src_qp != priv->qp->qp_num) {
-				skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-				skb->mac.raw = skb->data;
-				skb_pull(skb, IPOIB_ENCAP_LEN);
+	dma_unmap_single(priv->ca->dma_device, addr,
+			 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
 
-				dev->last_rx = jiffies;
-				++priv->stats.rx_packets;
-				priv->stats.rx_bytes += skb->len;
+	skb_put(skb, wc->byte_len);
+	skb_pull(skb, IB_GRH_BYTES);
 
-				skb->dev = dev;
-				/* XXX get correct PACKET_ type here */
-				skb->pkt_type = PACKET_HOST;
-				netif_rx_ni(skb);
-			} else {
-				ipoib_dbg_data(priv, "dropping loopback packet\n");
-				dev_kfree_skb_any(skb);
-			}
+	if (wc->slid != priv->local_lid ||
+	    wc->src_qp != priv->qp->qp_num) {
+		skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+		skb->mac.raw = skb->data;
+		skb_pull(skb, IPOIB_ENCAP_LEN);
 
-		repost:
-			if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
-				ipoib_warn(priv, "ipoib_ib_post_receive failed "
-					   "for buf %d\n", wr_id);
-		} else
-			ipoib_warn(priv, "completion event with wrid %d\n",
-				   wr_id);
+		dev->last_rx = jiffies;
+		++priv->stats.rx_packets;
+		priv->stats.rx_bytes += skb->len;
 
+		skb->dev = dev;
+		/* XXX get correct PACKET_ type here */
+		skb->pkt_type = PACKET_HOST;
+		netif_rx_ni(skb);
 	} else {
-		struct ipoib_tx_buf *tx_req;
-		unsigned long flags;
+		ipoib_dbg_data(priv, "dropping loopback packet\n");
+		dev_kfree_skb_any(skb);
+	}
 
-		if (wr_id >= ipoib_sendq_size) {
-			ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
-				   wr_id, ipoib_sendq_size);
-			return;
-		}
+repost:
+	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
+		ipoib_warn(priv, "ipoib_ib_post_receive failed "
+			   "for buf %d\n", wr_id);
+}
 
-		ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
+static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	unsigned int wr_id = wc->wr_id;
+	struct ipoib_tx_buf *tx_req;
+	unsigned long flags;
 
-		tx_req = &priv->tx_ring[wr_id];
+	ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n",
+		       wr_id, wc->opcode, wc->status);
 
-		dma_unmap_single(priv->ca->dma_device,
-				 pci_unmap_addr(tx_req, mapping),
-				 tx_req->skb->len,
-				 DMA_TO_DEVICE);
+	if (unlikely(wr_id >= ipoib_sendq_size)) {
+		ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
+			   wr_id, ipoib_sendq_size);
+		return;
+	}
 
-		++priv->stats.tx_packets;
-		priv->stats.tx_bytes += tx_req->skb->len;
+	tx_req = &priv->tx_ring[wr_id];
 
-		dev_kfree_skb_any(tx_req->skb);
+	dma_unmap_single(priv->ca->dma_device,
+			 pci_unmap_addr(tx_req, mapping),
+			 tx_req->skb->len,
+			 DMA_TO_DEVICE);
 
-		spin_lock_irqsave(&priv->tx_lock, flags);
-		++priv->tx_tail;
-		if (netif_queue_stopped(dev) &&
-		    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
-		    priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
-			netif_wake_queue(dev);
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
+	++priv->stats.tx_packets;
+	priv->stats.tx_bytes += tx_req->skb->len;
 
-		if (wc->status != IB_WC_SUCCESS &&
-		    wc->status != IB_WC_WR_FLUSH_ERR)
-			ipoib_warn(priv, "failed send event "
-				   "(status=%d, wrid=%d vend_err %x)\n",
-				   wc->status, wr_id, wc->vendor_err);
-	}
+	dev_kfree_skb_any(tx_req->skb);
+
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	++priv->tx_tail;
+	if (netif_queue_stopped(dev) &&
+	    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
+	    priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
+		netif_wake_queue(dev);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+	if (wc->status != IB_WC_SUCCESS &&
+	    wc->status != IB_WC_WR_FLUSH_ERR)
+		ipoib_warn(priv, "failed send event "
+			   "(status=%d, wrid=%d vend_err %x)\n",
+			   wc->status, wr_id, wc->vendor_err);
+}
+
+static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
+{
+	if (wc->wr_id & IPOIB_OP_RECV)
+		ipoib_ib_handle_rx_wc(dev, wc);
+	else
+		ipoib_ib_handle_tx_wc(dev, wc);
 }
 
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
-- 
GitLab


From c1a0b23bf477c2e1068905f4e2b5c3cee139e853 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@mellanox.co.il>
Date: Mon, 21 Aug 2006 16:40:12 -0700
Subject: [PATCH 0782/1063] IB/sa: Require SA registration

Require users to register with SA module, to prevent the sa_query
module text from going away while an SA query callback is still
running.  Update all in-tree users for the new interface.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cma.c                 |  7 ++-
 drivers/infiniband/core/sa_query.c            | 60 ++++++++++++++++---
 drivers/infiniband/ulp/ipoib/ipoib.h          |  2 +
 drivers/infiniband/ulp/ipoib/ipoib_main.c     | 12 +++-
 .../infiniband/ulp/ipoib/ipoib_multicast.c    | 12 ++--
 drivers/infiniband/ulp/srp/ib_srp.c           |  9 ++-
 include/rdma/ib_sa.h                          | 41 ++++++++++---
 7 files changed, 116 insertions(+), 27 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 488fa1d642a79..1178bd434d1b1 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -62,6 +62,7 @@ static struct ib_client cma_client = {
 	.remove = cma_remove_one
 };
 
+static struct ib_sa_client sa_client;
 static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
@@ -1323,7 +1324,7 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
 	path_rec.numb_path = 1;
 
-	id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
+	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
 				id_priv->id.port_num, &path_rec,
 				IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
 				IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
@@ -2199,12 +2200,15 @@ static int cma_init(void)
 	if (!cma_wq)
 		return -ENOMEM;
 
+	ib_sa_register_client(&sa_client);
+
 	ret = ib_register_client(&cma_client);
 	if (ret)
 		goto err;
 	return 0;
 
 err:
+	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	return ret;
 }
@@ -2212,6 +2216,7 @@ static int cma_init(void)
 static void cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
+	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	idr_destroy(&sdp_ps);
 	idr_destroy(&tcp_ps);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index ca8760a7d88c6..1706d3c7e95ea 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -75,6 +76,7 @@ struct ib_sa_device {
 struct ib_sa_query {
 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
 	void (*release)(struct ib_sa_query *);
+	struct ib_sa_client    *client;
 	struct ib_sa_port      *port;
 	struct ib_mad_send_buf *mad_buf;
 	struct ib_sa_sm_ah     *sm_ah;
@@ -415,6 +417,31 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
 	}
 }
 
+void ib_sa_register_client(struct ib_sa_client *client)
+{
+	atomic_set(&client->users, 1);
+	init_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_register_client);
+
+static inline void ib_sa_client_get(struct ib_sa_client *client)
+{
+	atomic_inc(&client->users);
+}
+
+static inline void ib_sa_client_put(struct ib_sa_client *client)
+{
+	if (atomic_dec_and_test(&client->users))
+		complete(&client->comp);
+}
+
+void ib_sa_unregister_client(struct ib_sa_client *client)
+{
+	ib_sa_client_put(client);
+	wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_unregister_client);
+
 /**
  * ib_sa_cancel_query - try to cancel an SA query
  * @id:ID of query to cancel
@@ -557,6 +584,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
 
 /**
  * ib_sa_path_rec_get - Start a Path get query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:Path Record to send in query
@@ -579,7 +607,8 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
  * error code.  Otherwise it is a query ID that can be used to cancel
  * the query.
  */
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
 		       struct ib_sa_path_rec *rec,
 		       ib_sa_comp_mask comp_mask,
 		       int timeout_ms, gfp_t gfp_mask,
@@ -614,8 +643,10 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -639,6 +670,7 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -671,6 +703,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
 
 /**
  * ib_sa_service_rec_query - Start Service Record operation
+ * @client:SA client
  * @device:device to send request on
  * @port_num: port number to send request on
  * @method:SA method - should be get, set, or delete
@@ -695,7 +728,8 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
  * error code.  Otherwise it is a request ID that can be used to cancel
  * the query.
  */
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+			    struct ib_device *device, u8 port_num, u8 method,
 			    struct ib_sa_service_rec *rec,
 			    ib_sa_comp_mask comp_mask,
 			    int timeout_ms, gfp_t gfp_mask,
@@ -735,8 +769,10 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -761,6 +797,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -791,7 +828,8 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
 }
 
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+			     struct ib_device *device, u8 port_num,
 			     u8 method,
 			     struct ib_sa_mcmember_rec *rec,
 			     ib_sa_comp_mask comp_mask,
@@ -827,8 +865,10 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -853,6 +893,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -889,6 +930,7 @@ static void send_handler(struct ib_mad_agent *agent,
 
 	ib_free_send_mad(mad_send_wc->send_buf);
 	kref_put(&query->sm_ah->ref, free_sm_ah);
+	ib_sa_client_put(query->client);
 	query->release(query);
 }
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 474aa214ab57e..0b8a79d53a00d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -336,6 +336,8 @@ static inline void ipoib_unregister_debugfs(void) { }
 extern int ipoib_sendq_size;
 extern int ipoib_recvq_size;
 
+extern struct ib_sa_client ipoib_sa_client;
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 extern int ipoib_debug_level;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e9a7659eb1d79..ae3a4982cddbc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -82,6 +82,8 @@ static const u8 ipv4_bcast_addr[] = {
 
 struct workqueue_struct *ipoib_workqueue;
 
+struct ib_sa_client ipoib_sa_client;
+
 static void ipoib_add_one(struct ib_device *device);
 static void ipoib_remove_one(struct ib_device *device);
 
@@ -463,7 +465,7 @@ static int path_rec_start(struct net_device *dev,
 	init_completion(&path->done);
 
 	path->query_id =
-		ib_sa_path_rec_get(priv->ca, priv->port,
+		ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
 				   &path->pathrec,
 				   IB_SA_PATH_REC_DGID		|
 				   IB_SA_PATH_REC_SGID		|
@@ -1191,13 +1193,16 @@ static int __init ipoib_init_module(void)
 		goto err_fs;
 	}
 
+	ib_sa_register_client(&ipoib_sa_client);
+
 	ret = ib_register_client(&ipoib_client);
 	if (ret)
-		goto err_wq;
+		goto err_sa;
 
 	return 0;
 
-err_wq:
+err_sa:
+	ib_sa_unregister_client(&ipoib_sa_client);
 	destroy_workqueue(ipoib_workqueue);
 
 err_fs:
@@ -1209,6 +1214,7 @@ static int __init ipoib_init_module(void)
 static void __exit ipoib_cleanup_module(void)
 {
 	ib_unregister_client(&ipoib_client);
+	ib_sa_unregister_client(&ipoib_sa_client);
 	ipoib_unregister_debugfs();
 	destroy_workqueue(ipoib_workqueue);
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 60b09f5cb3476..fb3e4875a46d6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -361,7 +361,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
 
 	init_completion(&mcast->done);
 
-	ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec,
+	ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec,
 				     IB_SA_MCMEMBER_REC_MGID		|
 				     IB_SA_MCMEMBER_REC_PORT_GID	|
 				     IB_SA_MCMEMBER_REC_PKEY		|
@@ -485,9 +485,9 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 
 	init_completion(&mcast->done);
 
-	ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask,
-				     mcast->backoff * 1000, GFP_ATOMIC,
-				     ipoib_mcast_join_complete,
+	ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port,
+				     &rec, comp_mask, mcast->backoff * 1000,
+				     GFP_ATOMIC, ipoib_mcast_join_complete,
 				     mcast, &mcast->query);
 
 	if (ret < 0) {
@@ -528,7 +528,7 @@ void ipoib_mcast_join_task(void *dev_ptr)
 			priv->local_rate = attr.active_speed *
 				ib_width_enum_to_int(attr.active_width);
 		} else
-			ipoib_warn(priv, "ib_query_port failed\n");
+		ipoib_warn(priv, "ib_query_port failed\n");
 	}
 
 	if (!priv->broadcast) {
@@ -681,7 +681,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 	 * Just make one shot at leaving and don't wait for a reply;
 	 * if we fail, too bad.
 	 */
-	ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec,
+	ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec,
 					IB_SA_MCMEMBER_REC_MGID		|
 					IB_SA_MCMEMBER_REC_PORT_GID	|
 					IB_SA_MCMEMBER_REC_PKEY		|
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index feb1fcd0f2fb1..44b9e5be66879 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -96,6 +96,8 @@ static struct ib_client srp_client = {
 	.remove = srp_remove_one
 };
 
+static struct ib_sa_client srp_sa_client;
+
 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
 {
 	return (struct srp_target_port *) host->hostdata;
@@ -267,7 +269,8 @@ static int srp_lookup_path(struct srp_target_port *target)
 
 	init_completion(&target->done);
 
-	target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev->dev,
+	target->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
+						   target->srp_host->dev->dev,
 						   target->srp_host->port,
 						   &target->path,
 						   IB_SA_PATH_REC_DGID		|
@@ -1998,9 +2001,12 @@ static int __init srp_init_module(void)
 		return ret;
 	}
 
+	ib_sa_register_client(&srp_sa_client);
+
 	ret = ib_register_client(&srp_client);
 	if (ret) {
 		printk(KERN_ERR PFX "couldn't register IB client\n");
+		ib_sa_unregister_client(&srp_sa_client);
 		class_unregister(&srp_class);
 		return ret;
 	}
@@ -2011,6 +2017,7 @@ static int __init srp_init_module(void)
 static void __exit srp_cleanup_module(void)
 {
 	ib_unregister_client(&srp_client);
+	ib_sa_unregister_client(&srp_sa_client);
 	class_unregister(&srp_class);
 }
 
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index c99e4420fd7ec..58bb5f716fe36 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -36,8 +37,11 @@
 #ifndef IB_SA_H
 #define IB_SA_H
 
+#include <linux/completion.h>
 #include <linux/compiler.h>
 
+#include <asm/atomic.h>
+
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_mad.h>
 
@@ -250,11 +254,28 @@ struct ib_sa_service_rec {
 	u64		data64[2];
 };
 
+struct ib_sa_client {
+	atomic_t users;
+	struct completion comp;
+};
+
+/**
+ * ib_sa_register_client - Register an SA client.
+ */
+void ib_sa_register_client(struct ib_sa_client *client);
+
+/**
+ * ib_sa_unregister_client - Deregister an SA client.
+ * @client: Client object to deregister.
+ */
+void ib_sa_unregister_client(struct ib_sa_client *client);
+
 struct ib_sa_query;
 
 void ib_sa_cancel_query(int id, struct ib_sa_query *query);
 
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
 		       struct ib_sa_path_rec *rec,
 		       ib_sa_comp_mask comp_mask,
 		       int timeout_ms, gfp_t gfp_mask,
@@ -264,7 +285,8 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 		       void *context,
 		       struct ib_sa_query **query);
 
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+			     struct ib_device *device, u8 port_num,
 			     u8 method,
 			     struct ib_sa_mcmember_rec *rec,
 			     ib_sa_comp_mask comp_mask,
@@ -275,7 +297,8 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 			     void *context,
 			     struct ib_sa_query **query);
 
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+			 struct ib_device *device, u8 port_num,
 			 u8 method,
 			 struct ib_sa_service_rec *rec,
 			 ib_sa_comp_mask comp_mask,
@@ -288,6 +311,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
 
 /**
  * ib_sa_mcmember_rec_set - Start an MCMember set query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:MCMember Record to send in query
@@ -311,7 +335,8 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
  * cancel the query.
  */
 static inline int
-ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
+ib_sa_mcmember_rec_set(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
 		       struct ib_sa_mcmember_rec *rec,
 		       ib_sa_comp_mask comp_mask,
 		       int timeout_ms, gfp_t gfp_mask,
@@ -321,7 +346,7 @@ ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
 		       void *context,
 		       struct ib_sa_query **query)
 {
-	return ib_sa_mcmember_rec_query(device, port_num,
+	return ib_sa_mcmember_rec_query(client, device, port_num,
 					IB_MGMT_METHOD_SET,
 					rec, comp_mask,
 					timeout_ms, gfp_mask, callback,
@@ -330,6 +355,7 @@ ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
 
 /**
  * ib_sa_mcmember_rec_delete - Start an MCMember delete query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:MCMember Record to send in query
@@ -353,7 +379,8 @@ ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
  * cancel the query.
  */
 static inline int
-ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num,
+ib_sa_mcmember_rec_delete(struct ib_sa_client *client,
+			  struct ib_device *device, u8 port_num,
 			  struct ib_sa_mcmember_rec *rec,
 			  ib_sa_comp_mask comp_mask,
 			  int timeout_ms, gfp_t gfp_mask,
@@ -363,7 +390,7 @@ ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num,
 			  void *context,
 			  struct ib_sa_query **query)
 {
-	return ib_sa_mcmember_rec_query(device, port_num,
+	return ib_sa_mcmember_rec_query(client, device, port_num,
 					IB_SA_METHOD_DELETE,
 					rec, comp_mask,
 					timeout_ms, gfp_mask, callback,
-- 
GitLab


From a70d059009f4a207e2a9c794f40fc8c870096d54 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@mellanox.co.il>
Date: Mon, 28 Aug 2006 16:32:50 +0300
Subject: [PATCH 0783/1063] IB/cm: Do not track remote QPN in timewait state

Do not track remote QPN in TimeWait state, since QP is not connected.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index e130d2e895152..f35fcc4c06389 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -685,6 +685,8 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
 {
 	int wait_time;
 
+	cm_cleanup_timewait(cm_id_priv->timewait_info);
+
 	/*
 	 * The cm_id could be destroyed by the user before we exit timewait.
 	 * To protect against this, we search for the cm_id after exiting
-- 
GitLab


From 07eeec0627e93a1a753c4df004a97a4d0a7b9ceb Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@voltaire.com>
Date: Tue, 12 Sep 2006 09:03:33 -0700
Subject: [PATCH 0784/1063] RDMA/cma: Document rdma_destroy_id() function

Clarify that rdma_destroy_id cancels outstanding asynchronous operations on the
Associated id.

Signed-off-by: Or Gerlitz <ogerlitz@voltaire.com>
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/rdma/rdma_cm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 402c63d7226ba..1566be568ab6e 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -117,6 +117,14 @@ struct rdma_cm_id {
 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
 				  void *context, enum rdma_port_space ps);
 
+/**
+  * rdma_destroy_id - Destroys an RDMA identifier.
+  *
+  * @id: RDMA identifier.
+  *
+  * Note: calling this function has the effect of canceling in-flight
+  * asynchronous operations associated with the id.
+  */
 void rdma_destroy_id(struct rdma_cm_id *id);
 
 /**
-- 
GitLab


From b3b30f5e8a0c50db3d76b6f7c7cc50245aeb57fd Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@mellanox.co.il>
Date: Tue, 15 Aug 2006 21:11:18 +0300
Subject: [PATCH 0785/1063] IB/mthca: Recover from catastrophic errors

Trigger device remove and then add when a catastrophic error is
detected in hardware.  This, in turn, will cause a device reset, which
we hope will recover from the catastrophic condition.

Since this might interefere with debugging the root cause, add a
module option to suppress this behaviour.

Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_catas.c | 62 ++++++++++++++++
 drivers/infiniband/hw/mthca/mthca_dev.h   |  7 ++
 drivers/infiniband/hw/mthca/mthca_main.c  | 88 +++++++++++++++++------
 3 files changed, 136 insertions(+), 21 deletions(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index c3bec7490f52e..cd044ea2dfa42 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -34,6 +34,7 @@
 
 #include <linux/jiffies.h>
 #include <linux/timer.h>
+#include <linux/workqueue.h>
 
 #include "mthca_dev.h"
 
@@ -48,9 +49,41 @@ enum {
 
 static DEFINE_SPINLOCK(catas_lock);
 
+static LIST_HEAD(catas_list);
+static struct workqueue_struct *catas_wq;
+static struct work_struct catas_work;
+
+static int catas_reset_disable;
+module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
+MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
+
+static void catas_reset(void *work_ptr)
+{
+	struct mthca_dev *dev, *tmpdev;
+	LIST_HEAD(tlist);
+	int ret;
+
+	mutex_lock(&mthca_device_mutex);
+
+	spin_lock_irq(&catas_lock);
+	list_splice_init(&catas_list, &tlist);
+	spin_unlock_irq(&catas_lock);
+
+	list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
+		ret = __mthca_restart_one(dev->pdev);
+		if (ret)
+			mthca_err(dev, "Reset failed (%d)\n", ret);
+		else
+			mthca_dbg(dev, "Reset succeeded\n");
+	}
+
+	mutex_unlock(&mthca_device_mutex);
+}
+
 static void handle_catas(struct mthca_dev *dev)
 {
 	struct ib_event event;
+	unsigned long flags;
 	const char *type;
 	int i;
 
@@ -82,6 +115,14 @@ static void handle_catas(struct mthca_dev *dev)
 	for (i = 0; i < dev->catas_err.size; ++i)
 		mthca_err(dev, "  buf[%02x]: %08x\n",
 			  i, swab32(readl(dev->catas_err.map + i)));
+
+	if (catas_reset_disable)
+		return;
+
+	spin_lock_irqsave(&catas_lock, flags);
+	list_add(&dev->catas_err.list, &catas_list);
+	queue_work(catas_wq, &catas_work);
+	spin_unlock_irqrestore(&catas_lock, flags);
 }
 
 static void poll_catas(unsigned long dev_ptr)
@@ -135,6 +176,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
 	dev->catas_err.timer.data     = (unsigned long) dev;
 	dev->catas_err.timer.function = poll_catas;
 	dev->catas_err.timer.expires  = jiffies + MTHCA_CATAS_POLL_INTERVAL;
+	INIT_LIST_HEAD(&dev->catas_err.list);
 	add_timer(&dev->catas_err.timer);
 }
 
@@ -153,4 +195,24 @@ void mthca_stop_catas_poll(struct mthca_dev *dev)
 				    dev->catas_err.addr),
 				   dev->catas_err.size * 4);
 	}
+
+	spin_lock_irq(&catas_lock);
+	list_del(&dev->catas_err.list);
+	spin_unlock_irq(&catas_lock);
+}
+
+int __init mthca_catas_init(void)
+{
+	INIT_WORK(&catas_work, catas_reset, NULL);
+
+	catas_wq = create_singlethread_workqueue("mthca_catas");
+	if (!catas_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mthca_catas_cleanup(void)
+{
+	destroy_workqueue(catas_wq);
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 33bd0b8bfd130..fe5cecf70fedd 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -45,6 +45,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/timer.h>
 #include <linux/mutex.h>
+#include <linux/list.h>
 
 #include <asm/semaphore.h>
 
@@ -283,8 +284,11 @@ struct mthca_catas_err {
 	unsigned long		stop;
 	u32			size;
 	struct timer_list	timer;
+	struct list_head	list;
 };
 
+extern struct mutex mthca_device_mutex;
+
 struct mthca_dev {
 	struct ib_device  ib_dev;
 	struct pci_dev   *pdev;
@@ -450,6 +454,9 @@ void mthca_unregister_device(struct mthca_dev *dev);
 
 void mthca_start_catas_poll(struct mthca_dev *dev);
 void mthca_stop_catas_poll(struct mthca_dev *dev);
+int __mthca_restart_one(struct pci_dev *pdev);
+int mthca_catas_init(void);
+void mthca_catas_cleanup(void);
 
 int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
 void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 7b82c1907f042..47ea021483684 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -80,6 +80,8 @@ static int tune_pci = 0;
 module_param(tune_pci, int, 0444);
 MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
 
+struct mutex mthca_device_mutex;
+
 static const char mthca_version[] __devinitdata =
 	DRV_NAME ": Mellanox InfiniBand HCA driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -978,28 +980,15 @@ static struct {
 					MTHCA_FLAG_SINAI_OPT }
 };
 
-static int __devinit mthca_init_one(struct pci_dev *pdev,
-				    const struct pci_device_id *id)
+static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 {
-	static int mthca_version_printed = 0;
 	int ddr_hidden = 0;
 	int err;
 	struct mthca_dev *mdev;
 
-	if (!mthca_version_printed) {
-		printk(KERN_INFO "%s", mthca_version);
-		++mthca_version_printed;
-	}
-
 	printk(KERN_INFO PFX "Initializing %s\n",
 	       pci_name(pdev));
 
-	if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
-		printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
-		       pci_name(pdev), id->driver_data);
-		return -ENODEV;
-	}
-
 	err = pci_enable_device(pdev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot enable PCI device, "
@@ -1065,7 +1054,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 
 	mdev->pdev = pdev;
 
-	mdev->mthca_flags = mthca_hca_table[id->driver_data].flags;
+	mdev->mthca_flags = mthca_hca_table[hca_type].flags;
 	if (ddr_hidden)
 		mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
 
@@ -1099,13 +1088,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 	if (err)
 		goto err_cmd;
 
-	if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
+	if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
 		mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n",
 			   (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
 			   (int) (mdev->fw_ver & 0xffff),
-			   (int) (mthca_hca_table[id->driver_data].latest_fw >> 32),
-			   (int) (mthca_hca_table[id->driver_data].latest_fw >> 16) & 0xffff,
-			   (int) (mthca_hca_table[id->driver_data].latest_fw & 0xffff));
+			   (int) (mthca_hca_table[hca_type].latest_fw >> 32),
+			   (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff,
+			   (int) (mthca_hca_table[hca_type].latest_fw & 0xffff));
 		mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n");
 	}
 
@@ -1122,6 +1111,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 		goto err_unregister;
 
 	pci_set_drvdata(pdev, mdev);
+	mdev->hca_type = hca_type;
 
 	return 0;
 
@@ -1166,7 +1156,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 	return err;
 }
 
-static void __devexit mthca_remove_one(struct pci_dev *pdev)
+static void __mthca_remove_one(struct pci_dev *pdev)
 {
 	struct mthca_dev *mdev = pci_get_drvdata(pdev);
 	u8 status;
@@ -1211,6 +1201,51 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev)
 	}
 }
 
+int __mthca_restart_one(struct pci_dev *pdev)
+{
+	struct mthca_dev *mdev;
+
+	mdev = pci_get_drvdata(pdev);
+	if (!mdev)
+		return -ENODEV;
+	__mthca_remove_one(pdev);
+	return __mthca_init_one(pdev, mdev->hca_type);
+}
+
+static int __devinit mthca_init_one(struct pci_dev *pdev,
+			     const struct pci_device_id *id)
+{
+	static int mthca_version_printed = 0;
+	int ret;
+
+	mutex_lock(&mthca_device_mutex);
+
+	if (!mthca_version_printed) {
+		printk(KERN_INFO "%s", mthca_version);
+		++mthca_version_printed;
+	}
+
+	if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
+		printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
+		       pci_name(pdev), id->driver_data);
+		mutex_unlock(&mthca_device_mutex);
+		return -ENODEV;
+	}
+
+	ret = __mthca_init_one(pdev, id->driver_data);
+
+	mutex_unlock(&mthca_device_mutex);
+
+	return ret;
+}
+
+static void __devexit mthca_remove_one(struct pci_dev *pdev)
+{
+	mutex_lock(&mthca_device_mutex);
+	__mthca_remove_one(pdev);
+	mutex_unlock(&mthca_device_mutex);
+}
+
 static struct pci_device_id mthca_pci_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
 	  .driver_data = TAVOR },
@@ -1248,13 +1283,24 @@ static int __init mthca_init(void)
 {
 	int ret;
 
+	mutex_init(&mthca_device_mutex);
+	ret = mthca_catas_init();
+	if (ret)
+		return ret;
+
 	ret = pci_register_driver(&mthca_driver);
-	return ret < 0 ? ret : 0;
+	if (ret < 0) {
+		mthca_catas_cleanup();
+		return ret;
+	}
+
+	return 0;
 }
 
 static void __exit mthca_cleanup(void)
 {
 	pci_unregister_driver(&mthca_driver);
+	mthca_catas_cleanup();
 }
 
 module_init(mthca_init);
-- 
GitLab


From 951f7fc1372da3d826b1d975b3cc5e3db92af5d0 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@voltaire.com>
Date: Fri, 22 Sep 2006 15:22:54 -0700
Subject: [PATCH 0786/1063] RDMA/cma: Document rdma_accept() error handling

Document the reject sending and modifying QP to error done in rdma_accept().

Signed-off-by: Or Gerlitz <ogerlitz@voltaire.com>
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/rdma/rdma_cm.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 1566be568ab6e..deb5a0a4cee5c 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -245,6 +245,10 @@ int rdma_listen(struct rdma_cm_id *id, int backlog);
  * Typically, this routine is only called by the listener to accept a connection
  * request.  It must also be called on the active side of a connection if the
  * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
  */
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
 
-- 
GitLab


From d35cc330a2058a32410ef42784b8d3b942f37b8b Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Fri, 22 Sep 2006 15:22:55 -0700
Subject: [PATCH 0787/1063] IB/mthca: Simplify calls to mthca_cq_clean()

If a QP has separate send and receive CQs, then the send CQ will never
have receive completions from that QP in it.  So when cleaning the
send CQ, there's no need to pass in an SRQ pointer, even if the QP is
attached to an SRQ.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_qp.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 9324b6204ac57..5e5c58b9920b5 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -845,11 +845,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 	 * entries and reinitialize the QP.
 	 */
 	if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
-		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
+		mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
 			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
 		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
-			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
-				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+			mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);
 
 		mthca_wq_reset(&qp->sq);
 		qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
-- 
GitLab


From 5755d6dad95808a24a65dd9e61e23c305f9b077c Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Fri, 22 Sep 2006 15:22:55 -0700
Subject: [PATCH 0788/1063] IB/iser: INFINIBAND_ISER depends on INET

iSER won't build without CONFIG_INET enabled, so make Kconfig reflect that.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/iser/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig
index fead87d1eff95..365a1b5f19e04 100644
--- a/drivers/infiniband/ulp/iser/Kconfig
+++ b/drivers/infiniband/ulp/iser/Kconfig
@@ -1,6 +1,6 @@
 config INFINIBAND_ISER
 	tristate "ISCSI RDMA Protocol"
-	depends on INFINIBAND && SCSI
+	depends on INFINIBAND && SCSI && INET
 	select SCSI_ISCSI_ATTRS
 	---help---
 	  Support for the ISCSI RDMA Protocol over InfiniBand.  This
-- 
GitLab


From aec79fcc3ea3b536a2788b4e22b7ebabbb176485 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@mellanox.co.il>
Date: Mon, 18 Sep 2006 22:17:08 +0300
Subject: [PATCH 0789/1063] IB/sa: fix ib_sa_selector names

Relevant SA queries are actually "greater than" / "less than", not
"greater than or equal" / "less than or equal" as the names imply.
(See IB spec 1.2 Vol 1, 15.2.5.16 PATHRECORD/Table 205 PathRecord)

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/rdma/ib_sa.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 58bb5f716fe36..97715b0c20b69 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -83,8 +83,8 @@ enum {
 };
 
 enum ib_sa_selector {
-	IB_SA_GTE  = 0,
-	IB_SA_LTE  = 1,
+	IB_SA_GT   = 0,
+	IB_SA_LT   = 1,
 	IB_SA_EQ   = 2,
 	/*
 	 * The meaning of "best" depends on the attribute: for
-- 
GitLab


From d0df6d6d4539241179a1ef5394787825bf05bbce Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Fri, 22 Sep 2006 15:22:56 -0700
Subject: [PATCH 0790/1063] IPoIB: Create MCGs with all attributes required by
 RFC

RFC 4391 ("Transmission of IP over InfiniBand (IPoIB)") says:

  If the IB multicast group does not already exist, one must be
  created first with the IPoIB link MTU.  The MGID MUST use the same
  P_Key, Q_Key, SL, MTU, and HopLimit as those used in the
  broadcast-GID.  The rest of attributes SHOULD follow the values used
  in the broadcast-GID as well.

However, the current IPoIB driver is only setting the attributes
required by the InfiniBand spec to create a multicast group, so in
particular the MTU and HopLimit are not being set.  Add these
attributes when creating MCGs, and also set the Rate attribute, since
IPoIB pays attention to that attribute as well.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 .../infiniband/ulp/ipoib/ipoib_multicast.c    | 20 ++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index fb3e4875a46d6..3faa1820f0e98 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -472,15 +472,25 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 
 	if (create) {
 		comp_mask |=
-			IB_SA_MCMEMBER_REC_QKEY		|
-			IB_SA_MCMEMBER_REC_SL		|
-			IB_SA_MCMEMBER_REC_FLOW_LABEL	|
-			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+			IB_SA_MCMEMBER_REC_QKEY			|
+			IB_SA_MCMEMBER_REC_MTU_SELECTOR		|
+			IB_SA_MCMEMBER_REC_MTU			|
+			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS	|
+			IB_SA_MCMEMBER_REC_RATE_SELECTOR	|
+			IB_SA_MCMEMBER_REC_RATE			|
+			IB_SA_MCMEMBER_REC_SL			|
+			IB_SA_MCMEMBER_REC_FLOW_LABEL		|
+			IB_SA_MCMEMBER_REC_HOP_LIMIT;
 
 		rec.qkey	  = priv->broadcast->mcmember.qkey;
+		rec.mtu_selector  = IB_SA_EQ;
+		rec.mtu		  = priv->broadcast->mcmember.mtu;
+		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
+		rec.rate_selector = IB_SA_EQ;
+		rec.rate	  = priv->broadcast->mcmember.rate;
 		rec.sl		  = priv->broadcast->mcmember.sl;
 		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
-		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
+		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
 	}
 
 	init_completion(&mcast->done);
-- 
GitLab


From 5ccd025553d73e523212ee0860b7f4a75e886bfa Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Fri, 22 Sep 2006 15:22:56 -0700
Subject: [PATCH 0791/1063] IPoIB: Rejoin all multicast groups after a port
 event

When ipoib_ib_dev_flush() is called because of a port event, the
driver needs to rejoin all multicast groups, since the flush will call
ipoib_mcast_dev_flush() (via ipoib_ib_dev_down()).  Otherwise no
(non-broadcast) multicast groups will be rejoined until the networking
core calls ->set_multicast_list again, and so multicast reception will
be broken for potentially a long time.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_ib.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 722177ea069b7..240befdf90dc9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -631,8 +631,10 @@ void ipoib_ib_dev_flush(void *_dev)
 	 * The device could have been brought down between the start and when
 	 * we get here, don't bring it back up if it's not configured up
 	 */
-	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
 		ipoib_ib_dev_up(dev);
+		ipoib_mcast_restart_task(dev);
+	}
 
 	mutex_lock(&priv->vlan_mutex);
 
-- 
GitLab


From 507c33504686e733a14ef0b2dc9db0c20fae4653 Mon Sep 17 00:00:00 2001
From: Dotan Barak <dotanb@dev.mellanox.co.il>
Date: Thu, 21 Sep 2006 18:26:43 +0300
Subject: [PATCH 0792/1063] IPoIB: Remove unused include of vmalloc.h

IPoIB doesn't use anything from <linux/vmalloc.h>, so don't include it.

Signed-off-by: Dotan Barak <dotanb@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ae3a4982cddbc..867d62742054f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -40,7 +40,6 @@
 
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 
 #include <linux/if_arp.h>	/* For ARPHRD_xxx */
-- 
GitLab


From a8bfca024326560d86c6323b0504288ca55a75fc Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Fri, 22 Sep 2006 15:22:58 -0700
Subject: [PATCH 0793/1063] IPoIB: Add some likely/unlikely annotations in hot
 path

Signed-off-by: Eli Cohen <eli@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_ib.c   | 2 +-
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 240befdf90dc9..f426a69d9a436 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -332,7 +332,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 	struct ipoib_tx_buf *tx_req;
 	dma_addr_t addr;
 
-	if (skb->len > dev->mtu + INFINIBAND_ALEN) {
+	if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) {
 		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
 			   skb->len, dev->mtu + INFINIBAND_ALEN);
 		++priv->stats.tx_dropped;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 867d62742054f..1eaf00e9862c5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -620,7 +620,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ipoib_neigh *neigh;
 	unsigned long flags;
 
-	if (!spin_trylock_irqsave(&priv->tx_lock, flags))
+	if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
 		return NETDEV_TX_LOCKED;
 
 	/*
@@ -633,7 +633,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_BUSY;
 	}
 
-	if (skb->dst && skb->dst->neighbour) {
+	if (likely(skb->dst && skb->dst->neighbour)) {
 		if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
 			ipoib_path_lookup(skb, dev);
 			goto out;
-- 
GitLab


From 9cd330d36b32ed48d49561b165842db20bd153cc Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Fri, 22 Sep 2006 15:22:58 -0700
Subject: [PATCH 0794/1063] IB: Fix typo in kerneldoc for ib_set_client_data()

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index d978fbe975355..63d2a39fb82c8 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -385,7 +385,7 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
 EXPORT_SYMBOL(ib_get_client_data);
 
 /**
- * ib_set_client_data - Get IB client context
+ * ib_set_client_data - Set IB client context
  * @device:Device to set context for
  * @client:Client to set context for
  * @data:Context to set
-- 
GitLab


From ddad65df0048e210c93640b59b3bad12701febb6 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 22 Sep 2006 19:15:23 -0400
Subject: [PATCH 0795/1063] [CPUFREQ] Fix some more CPU hotplug locking.

Lukewarm IQ detected in hotplug locking
BUG: warning at kernel/cpu.c:38/lock_cpu_hotplug()
[<b0134a42>] lock_cpu_hotplug+0x42/0x65
[<b02f8af1>] cpufreq_update_policy+0x25/0xad
[<b0358756>] kprobe_flush_task+0x18/0x40
[<b0355aab>] schedule+0x63f/0x68b
[<b01377c2>] __link_module+0x0/0x1f
[<b0119e7d>] __cond_resched+0x16/0x34
[<b03560bf>] cond_resched+0x26/0x31
[<b0355b0e>] wait_for_completion+0x17/0xb1
[<f965c547>] cpufreq_stat_cpu_callback+0x13/0x20 [cpufreq_stats]
[<f9670074>] cpufreq_stats_init+0x74/0x8b [cpufreq_stats]
[<b0137872>] sys_init_module+0x91/0x174
[<b0102c81>] sysenter_past_esp+0x56/0x79

As there are other places that call cpufreq_update_policy without
the hotplug lock, it seems better to keep the hotplug locking
at the lower level for the time being until this is revamped.

Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/cpufreq/cpufreq_stats.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 25eee5394201d..c2ecc599dc5f3 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -350,12 +350,10 @@ __init cpufreq_stats_init(void)
 	}
 
 	register_hotcpu_notifier(&cpufreq_stat_cpu_notifier);
-	lock_cpu_hotplug();
 	for_each_online_cpu(cpu) {
 		cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_ONLINE,
 			(void *)(long)cpu);
 	}
-	unlock_cpu_hotplug();
 	return 0;
 }
 static void
-- 
GitLab


From 24669f7d00d387799fc6a39452ab22d7f078f043 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 12 Sep 2006 18:55:53 -0700
Subject: [PATCH 0796/1063] [CPUFREQ] sw_any_bug_dmi_table can be used on
 resume, so it isn't initdata

sw_any_bug_dmi_table can be used on resume, so it isn't initdata.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index dba6bb28d2981..7a9325349e949 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -393,7 +393,7 @@ static int __init sw_any_bug_found(struct dmi_system_id *d)
 }
 
 
-static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = {
+static struct dmi_system_id sw_any_bug_dmi_table[] = {
 	{
 		.callback = sw_any_bug_found,
 		.ident = "Supermicro Server X6DLP",
-- 
GitLab


From a83fbf635992442edf6aa3252e4008d4a08edf12 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 00:10:18 +0100
Subject: [PATCH 0797/1063] [PATCH] fix missing ifdefs in syscall classes
 hookup for generic targets

several targets have no ....at() family and m32r calls its only chown variant
chown32(), with __NR_chown being undefined.  creat(2) is also absent in some
targets.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-generic/audit_change_attr.h | 4 ++++
 include/asm-generic/audit_dir_write.h   | 4 ++++
 lib/audit.c                             | 2 ++
 3 files changed, 10 insertions(+)

diff --git a/include/asm-generic/audit_change_attr.h b/include/asm-generic/audit_change_attr.h
index cb05bf69745a5..50764550a60c3 100644
--- a/include/asm-generic/audit_change_attr.h
+++ b/include/asm-generic/audit_change_attr.h
@@ -1,16 +1,20 @@
 __NR_chmod,
 __NR_fchmod,
+#ifdef __NR_chown
 __NR_chown,
 __NR_fchown,
 __NR_lchown,
+#endif
 __NR_setxattr,
 __NR_lsetxattr,
 __NR_fsetxattr,
 __NR_removexattr,
 __NR_lremovexattr,
 __NR_fremovexattr,
+#ifdef __NR_fchownat
 __NR_fchownat,
 __NR_fchmodat,
+#endif
 #ifdef __NR_chown32
 __NR_chown32,
 __NR_fchown32,
diff --git a/include/asm-generic/audit_dir_write.h b/include/asm-generic/audit_dir_write.h
index 161a7a58fbab5..6621bd82cbe82 100644
--- a/include/asm-generic/audit_dir_write.h
+++ b/include/asm-generic/audit_dir_write.h
@@ -1,14 +1,18 @@
 __NR_rename,
 __NR_mkdir,
 __NR_rmdir,
+#ifdef __NR_creat
 __NR_creat,
+#endif
 __NR_link,
 __NR_unlink,
 __NR_symlink,
 __NR_mknod,
+#ifdef __NR_mkdirat
 __NR_mkdirat,
 __NR_mknodat,
 __NR_unlinkat,
 __NR_renameat,
 __NR_linkat,
 __NR_symlinkat,
+#endif
diff --git a/lib/audit.c b/lib/audit.c
index 8c21625ef9382..3b1289fadf06b 100644
--- a/lib/audit.c
+++ b/lib/audit.c
@@ -28,8 +28,10 @@ int audit_classify_syscall(int abi, unsigned syscall)
 	switch(syscall) {
 	case __NR_open:
 		return 2;
+#ifdef __NR_openat
 	case __NR_openat:
 		return 3;
+#endif
 #ifdef __NR_socketcall
 	case __NR_socketcall:
 		return 4;
-- 
GitLab


From cc9bd99e9adfa4f44ea050a63fb41a3f764acf84 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 01:18:41 +0100
Subject: [PATCH 0798/1063] [PATCH] fix ancient breakage in ebus_init()

Back when pci_dev had base_address[], loop of form
	base = &...->base_address[0];
	for (.....) {
		...
		*base++ = addr;
	}
was fine, but when that array got spread in ->resource[...].start
replacing the initialization with
	base = &...->resource[0].start;
was not a sufficient modification.  IOW this code got broken for cases
when there had been more than one resource to fill.  All way back in
2.3.41-pre3...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc/kernel/ebus.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c
index 81c0cbd96ff01..75ac24d229b1d 100644
--- a/arch/sparc/kernel/ebus.c
+++ b/arch/sparc/kernel/ebus.c
@@ -277,7 +277,7 @@ void __init ebus_init(void)
 	struct pci_dev *pdev;
 	struct pcidev_cookie *cookie;
 	struct device_node *dp;
-	unsigned long addr, *base;
+	struct resource *p;
 	unsigned short pci_command;
 	int len, reg, nreg;
 	int num_ebus = 0;
@@ -321,13 +321,12 @@ void __init ebus_init(void)
 		}
 		nreg = len / sizeof(struct linux_prom_pci_registers);
 
-		base = &ebus->self->resource[0].start;
+		p = &ebus->self->resource[0];
 		for (reg = 0; reg < nreg; reg++) {
 			if (!(regs[reg].which_io & 0x03000000))
 				continue;
 
-			addr = regs[reg].phys_lo;
-			*base++ = addr;
+			(p++)->start = regs[reg].phys_lo;
 		}
 
 		ebus->ofdev.node = dp;
-- 
GitLab


From 634965f5cfda1763f51e7916cfa49265b70e2a8d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 01:20:31 +0100
Subject: [PATCH 0799/1063] [PATCH] memcpy_fromio() missing in istallion

memcpy() from iomem is a bad thing...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/istallion.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index 84dfc4278139e..8c09997cc3d63 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -3488,7 +3488,7 @@ static int stli_initecp(stlibrd_t *brdp)
  */
 	EBRDENABLE(brdp);
 	sigsp = (cdkecpsig_t __iomem *) EBRDGETMEMPTR(brdp, CDK_SIGADDR);
-	memcpy(&sig, sigsp, sizeof(cdkecpsig_t));
+	memcpy_fromio(&sig, sigsp, sizeof(cdkecpsig_t));
 	EBRDDISABLE(brdp);
 
 	if (sig.magic != cpu_to_le32(ECP_MAGIC))
-- 
GitLab


From 55ae922323c90fdcb733c13ccf0da2ee72763913 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 01:24:25 +0100
Subject: [PATCH 0800/1063] [PATCH] aoa is pmac-only

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 sound/aoa/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/aoa/Kconfig b/sound/aoa/Kconfig
index 2f4334d19ccd2..5d5813cec4c85 100644
--- a/sound/aoa/Kconfig
+++ b/sound/aoa/Kconfig
@@ -1,5 +1,5 @@
 menu "Apple Onboard Audio driver"
-	depends on SND!=n && PPC
+	depends on SND!=n && PPC_PMAC
 
 config SND_AOA
 	tristate "Apple Onboard Audio driver"
-- 
GitLab


From 00ddaf20b0049c65ddd0c2b1cbed16c7a433e47c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 01:22:46 +0100
Subject: [PATCH 0801/1063] [PATCH] sanitize frv archclean

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/frv/Makefile      | 5 +----
 arch/frv/boot/Makefile | 3 ++-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/frv/Makefile b/arch/frv/Makefile
index d163747d17c0e..038e3a8457e0b 100644
--- a/arch/frv/Makefile
+++ b/arch/frv/Makefile
@@ -108,11 +108,8 @@ Image: vmlinux
 bootstrap:
 	$(Q)$(MAKEBOOT) bootstrap
 
-archmrproper:
-	$(Q)$(MAKE) $(build)=arch/frv/boot mrproper
-
 archclean:
-	$(Q)$(MAKE) $(build)=arch/frv/boot clean
+	$(Q)$(MAKE) $(clean)=arch/frv/boot
 
 archdep: scripts/mkdep symlinks
 	$(Q)$(MAKE) $(build)=arch/frv/boot dep
diff --git a/arch/frv/boot/Makefile b/arch/frv/boot/Makefile
index 5dfc93fd945a9..dc6f03824423c 100644
--- a/arch/frv/boot/Makefile
+++ b/arch/frv/boot/Makefile
@@ -8,6 +8,8 @@
 # Copyright (C) 1995-2000 Russell King
 #
 
+targets := Image zImage bootpImage
+
 SYSTEM	=$(TOPDIR)/$(LINUX)
 
 ZTEXTADDR	 = 0x02080000
@@ -66,7 +68,6 @@ zinstall: $(CONFIGURE) zImage
 # miscellany
 #
 mrproper clean:
-	$(RM) Image zImage bootpImage
 #	@$(MAKE) -C compressed clean
 #	@$(MAKE) -C bootp clean
 
-- 
GitLab


From a07562e03a3f4a1276931e3fb3cb532622a6c616 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 01:25:18 +0100
Subject: [PATCH 0802/1063] [PATCH] asm/backlight.h is ppc-only

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/macintosh/adbhid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index c69d23bb255e5..efd51e01c06eb 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -45,8 +45,8 @@
 #include <linux/pmu.h>
 
 #include <asm/machdep.h>
-#include <asm/backlight.h>
 #ifdef CONFIG_PPC_PMAC
+#include <asm/backlight.h>
 #include <asm/pmac_feature.h>
 #endif
 
-- 
GitLab


From 5932ef077716e3e798eaba6738ef874849f62a17 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 01:26:02 +0100
Subject: [PATCH 0803/1063] [PATCH] sun4: fix sbus_setup_iommu()

iommu_init() and iounit_init() are never called for sun4, but that's not
enough - these calls should be ifdefed out since the functions in question
simply do not exist for CONFIG_SUN4 kernel.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc/kernel/ioport.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 8654b446ac9ed..d33f8a07ccaca 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -508,6 +508,7 @@ void __init sbus_arch_bus_ranges_init(struct device_node *pn, struct sbus_bus *s
 
 void __init sbus_setup_iommu(struct sbus_bus *sbus, struct device_node *dp)
 {
+#ifndef CONFIG_SUN4
 	struct device_node *parent = dp->parent;
 
 	if (sparc_cpu_model != sun4d &&
@@ -524,6 +525,7 @@ void __init sbus_setup_iommu(struct sbus_bus *sbus, struct device_node *dp)
 
 		iounit_init(dp->node, parent->node, sbus);
 	}
+#endif
 }
 
 void __init sbus_setup_arch_props(struct sbus_bus *sbus, struct device_node *dp)
-- 
GitLab


From 956295d50dc5462722f029de64d44a7ecba54e69 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 01:27:30 +0100
Subject: [PATCH 0804/1063] [PATCH] fix the survivors of fbcon_vbl_handler()
 renaming

In

|Author: James Simmons <jsimmons@kozmo.(none)>
|Date:   Thu Mar 13 22:37:08 2003 -0800
|
|    [FBCON] Cursor handling clean up. I nuked several static variables.

we have

-static void fbcon_vbl_handler(int irq, void *dummy, struct pt_regs *fp)
+static void fb_vbl_handler(int irq, void *dev_id, struct pt_regs *fp)

and 3 years later a couple of instances missed back then still remains
there.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/video/console/fbcon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 390439b3d899e..1b4f75d1f8a94 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -3197,11 +3197,11 @@ static void fbcon_exit(void)
 		return;
 
 #ifdef CONFIG_ATARI
-	free_irq(IRQ_AUTO_4, fbcon_vbl_handler);
+	free_irq(IRQ_AUTO_4, fb_vbl_handler);
 #endif
 #ifdef CONFIG_MAC
 	if (MACH_IS_MAC && vbl_detected)
-		free_irq(IRQ_MAC_VBL, fbcon_vbl_handler);
+		free_irq(IRQ_MAC_VBL, fb_vbl_handler);
 #endif
 
 	kfree((void *)softback_buf);
-- 
GitLab


From c03efdb202a4882f426ce49766859af4058c9b8a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 01:29:34 +0100
Subject: [PATCH 0805/1063] [PATCH] fallout from hcd-core patch

missing le16_to_cpu()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/usb/input/hid-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c
index a2c56b2de5898..3305fb6079eb1 100644
--- a/drivers/usb/input/hid-core.c
+++ b/drivers/usb/input/hid-core.c
@@ -1818,7 +1818,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 	int n, len, insize = 0;
 
         /* Ignore all Wacom devices */
-        if (dev->descriptor.idVendor == USB_VENDOR_ID_WACOM)
+        if (le16_to_cpu(dev->descriptor.idVendor) == USB_VENDOR_ID_WACOM)
                 return NULL;
 
 	for (n = 0; hid_blacklist[n].idVendor; n++)
-- 
GitLab


From 1c3c07e9f6cc50dab2aeb8051325e317d4f6c70e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 25 Jul 2006 11:28:18 -0400
Subject: [PATCH 0806/1063] NFS: Add a new ACCESS rpc call cache to the linux
 nfs client

The current access cache only allows one entry at a time to be cached for each
inode. Add a per-inode red-black tree in order to allow more than one to
be cached at a time.

Should significantly cut down the time spent in path traversal for shared
directories such as ${PATH}, /usr/share, etc.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c           | 133 +++++++++++++++++++++++++++++++++++------
 fs/nfs/inode.c         |  13 ++--
 include/linux/nfs_fs.h |   5 +-
 3 files changed, 124 insertions(+), 27 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e7ffb4deb3e5f..094afded2b115 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1638,35 +1638,134 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return error;
 }
 
-int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+static void nfs_access_free_entry(struct nfs_access_entry *entry)
+{
+	put_rpccred(entry->cred);
+	kfree(entry);
+}
+
+static void __nfs_access_zap_cache(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_access_entry *cache = &nfsi->cache_access;
+	struct rb_root *root_node = &nfsi->access_cache;
+	struct rb_node *n, *dispose = NULL;
+	struct nfs_access_entry *entry;
+
+	/* Unhook entries from the cache */
+	while ((n = rb_first(root_node)) != NULL) {
+		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+		rb_erase(n, root_node);
+		n->rb_left = dispose;
+		dispose = n;
+	}
+	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+	spin_unlock(&inode->i_lock);
 
-	if (cache->cred != cred
-			|| time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
-			|| (nfsi->cache_validity & NFS_INO_INVALID_ACCESS))
-		return -ENOENT;
-	memcpy(res, cache, sizeof(*res));
-	return 0;
+	/* Now kill them all! */
+	while (dispose != NULL) {
+		n = dispose;
+		dispose = n->rb_left;
+		nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
+	}
 }
 
-void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+void nfs_access_zap_cache(struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_access_entry *cache = &nfsi->cache_access;
+	spin_lock(&inode->i_lock);
+	/* This will release the spinlock */
+	__nfs_access_zap_cache(inode);
+}
 
-	if (cache->cred != set->cred) {
-		if (cache->cred)
-			put_rpccred(cache->cred);
-		cache->cred = get_rpccred(set->cred);
+static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
+{
+	struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
+	struct nfs_access_entry *entry;
+
+	while (n != NULL) {
+		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+
+		if (cred < entry->cred)
+			n = n->rb_left;
+		else if (cred > entry->cred)
+			n = n->rb_right;
+		else
+			return entry;
 	}
-	/* FIXME: replace current access_cache BKL reliance with inode->i_lock */
+	return NULL;
+}
+
+int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_access_entry *cache;
+	int err = -ENOENT;
+
 	spin_lock(&inode->i_lock);
-	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+		goto out_zap;
+	cache = nfs_access_search_rbtree(inode, cred);
+	if (cache == NULL)
+		goto out;
+	if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
+		goto out_stale;
+	res->jiffies = cache->jiffies;
+	res->cred = cache->cred;
+	res->mask = cache->mask;
+	err = 0;
+out:
+	spin_unlock(&inode->i_lock);
+	return err;
+out_stale:
+	rb_erase(&cache->rb_node, &nfsi->access_cache);
+	spin_unlock(&inode->i_lock);
+	nfs_access_free_entry(cache);
+	return -ENOENT;
+out_zap:
+	/* This will release the spinlock */
+	__nfs_access_zap_cache(inode);
+	return -ENOENT;
+}
+
+static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
+{
+	struct rb_root *root_node = &NFS_I(inode)->access_cache;
+	struct rb_node **p = &root_node->rb_node;
+	struct rb_node *parent = NULL;
+	struct nfs_access_entry *entry;
+
+	spin_lock(&inode->i_lock);
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct nfs_access_entry, rb_node);
+
+		if (set->cred < entry->cred)
+			p = &parent->rb_left;
+		else if (set->cred > entry->cred)
+			p = &parent->rb_right;
+		else
+			goto found;
+	}
+	rb_link_node(&set->rb_node, parent, p);
+	rb_insert_color(&set->rb_node, root_node);
 	spin_unlock(&inode->i_lock);
+	return;
+found:
+	rb_replace_node(parent, &set->rb_node, root_node);
+	spin_unlock(&inode->i_lock);
+	nfs_access_free_entry(entry);
+}
+
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+{
+	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+	if (cache == NULL)
+		return;
+	RB_CLEAR_NODE(&cache->rb_node);
 	cache->jiffies = set->jiffies;
+	cache->cred = get_rpccred(set->cred);
 	cache->mask = set->mask;
+
+	nfs_access_add_rbtree(inode, cache);
 }
 
 static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d349fb2245da4..b94ab060bb1ee 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -76,19 +76,14 @@ int nfs_write_inode(struct inode *inode, int sync)
 
 void nfs_clear_inode(struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct rpc_cred *cred;
-
 	/*
 	 * The following should never happen...
 	 */
 	BUG_ON(nfs_have_writebacks(inode));
-	BUG_ON (!list_empty(&nfsi->open_files));
+	BUG_ON(!list_empty(&NFS_I(inode)->open_files));
+	BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
 	nfs_zap_acl_cache(inode);
-	cred = nfsi->cache_access.cred;
-	if (cred)
-		put_rpccred(cred);
-	BUG_ON(atomic_read(&nfsi->data_updates) != 0);
+	nfs_access_zap_cache(inode);
 }
 
 /**
@@ -290,7 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
-		nfsi->cache_access.cred = NULL;
+		nfsi->access_cache = RB_ROOT;
 
 		unlock_new_inode(inode);
 	} else
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6c2066caeaab1..cc013ed2e52ef 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -42,6 +42,7 @@
 #include <linux/in.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/rbtree.h>
 #include <linux/rwsem.h>
 #include <linux/wait.h>
 
@@ -69,6 +70,7 @@
  * NFSv3/v4 Access mode cache entry
  */
 struct nfs_access_entry {
+	struct rb_node		rb_node;
 	unsigned long		jiffies;
 	struct rpc_cred *	cred;
 	int			mask;
@@ -145,7 +147,7 @@ struct nfs_inode {
 	 */
 	atomic_t		data_updates;
 
-	struct nfs_access_entry	cache_access;
+	struct rb_root		access_cache;
 #ifdef CONFIG_NFS_V3_ACL
 	struct posix_acl	*acl_access;
 	struct posix_acl	*acl_default;
@@ -297,6 +299,7 @@ extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int nfs_permission(struct inode *, int, struct nameidata *);
 extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
 extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
+extern void nfs_access_zap_cache(struct inode *inode);
 extern int nfs_open(struct inode *, struct file *);
 extern int nfs_release(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);
-- 
GitLab


From cfcea3e8c66c2dcde98d5c2693d4bff50b5cac97 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 25 Jul 2006 11:28:18 -0400
Subject: [PATCH 0807/1063] NFS: Add a global LRU list for the ACCESS cache

...in order to allow the addition of a memory shrinker.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c           | 35 ++++++++++++++++++++++++++++++++++-
 fs/nfs/inode.c         |  2 ++
 include/linux/nfs_fs.h |  4 ++++
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 094afded2b115..bf4f5ffda703c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1638,10 +1638,17 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return error;
 }
 
+static DEFINE_SPINLOCK(nfs_access_lru_lock);
+static LIST_HEAD(nfs_access_lru_list);
+static atomic_long_t nfs_access_nr_entries;
+
 static void nfs_access_free_entry(struct nfs_access_entry *entry)
 {
 	put_rpccred(entry->cred);
 	kfree(entry);
+	smp_mb__before_atomic_dec();
+	atomic_long_dec(&nfs_access_nr_entries);
+	smp_mb__after_atomic_dec();
 }
 
 static void __nfs_access_zap_cache(struct inode *inode)
@@ -1655,6 +1662,7 @@ static void __nfs_access_zap_cache(struct inode *inode)
 	while ((n = rb_first(root_node)) != NULL) {
 		entry = rb_entry(n, struct nfs_access_entry, rb_node);
 		rb_erase(n, root_node);
+		list_del(&entry->lru);
 		n->rb_left = dispose;
 		dispose = n;
 	}
@@ -1671,6 +1679,13 @@ static void __nfs_access_zap_cache(struct inode *inode)
 
 void nfs_access_zap_cache(struct inode *inode)
 {
+	/* Remove from global LRU init */
+	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+		spin_lock(&nfs_access_lru_lock);
+		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
+		spin_unlock(&nfs_access_lru_lock);
+	}
+
 	spin_lock(&inode->i_lock);
 	/* This will release the spinlock */
 	__nfs_access_zap_cache(inode);
@@ -1711,12 +1726,14 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs
 	res->jiffies = cache->jiffies;
 	res->cred = cache->cred;
 	res->mask = cache->mask;
+	list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
 	err = 0;
 out:
 	spin_unlock(&inode->i_lock);
 	return err;
 out_stale:
 	rb_erase(&cache->rb_node, &nfsi->access_cache);
+	list_del(&cache->lru);
 	spin_unlock(&inode->i_lock);
 	nfs_access_free_entry(cache);
 	return -ENOENT;
@@ -1728,7 +1745,8 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs
 
 static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
 {
-	struct rb_root *root_node = &NFS_I(inode)->access_cache;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct rb_root *root_node = &nfsi->access_cache;
 	struct rb_node **p = &root_node->rb_node;
 	struct rb_node *parent = NULL;
 	struct nfs_access_entry *entry;
@@ -1747,10 +1765,13 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *
 	}
 	rb_link_node(&set->rb_node, parent, p);
 	rb_insert_color(&set->rb_node, root_node);
+	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
 	spin_unlock(&inode->i_lock);
 	return;
 found:
 	rb_replace_node(parent, &set->rb_node, root_node);
+	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
+	list_del(&entry->lru);
 	spin_unlock(&inode->i_lock);
 	nfs_access_free_entry(entry);
 }
@@ -1766,6 +1787,18 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
 	cache->mask = set->mask;
 
 	nfs_access_add_rbtree(inode, cache);
+
+	/* Update accounting */
+	smp_mb__before_atomic_inc();
+	atomic_long_inc(&nfs_access_nr_entries);
+	smp_mb__after_atomic_inc();
+
+	/* Add inode to global LRU list */
+	if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+		spin_lock(&nfs_access_lru_lock);
+		list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
+		spin_unlock(&nfs_access_lru_lock);
+	}
 }
 
 static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b94ab060bb1ee..6ed018c9aad2e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1104,6 +1104,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		INIT_LIST_HEAD(&nfsi->dirty);
 		INIT_LIST_HEAD(&nfsi->commit);
 		INIT_LIST_HEAD(&nfsi->open_files);
+		INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+		INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 		INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 		atomic_set(&nfsi->data_updates, 0);
 		nfsi->ndirty = 0;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index cc013ed2e52ef..a36e01cd6321e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -71,6 +71,7 @@
  */
 struct nfs_access_entry {
 	struct rb_node		rb_node;
+	struct list_head	lru;
 	unsigned long		jiffies;
 	struct rpc_cred *	cred;
 	int			mask;
@@ -148,6 +149,8 @@ struct nfs_inode {
 	atomic_t		data_updates;
 
 	struct rb_root		access_cache;
+	struct list_head	access_cache_entry_lru;
+	struct list_head	access_cache_inode_lru;
 #ifdef CONFIG_NFS_V3_ACL
 	struct posix_acl	*acl_access;
 	struct posix_acl	*acl_default;
@@ -201,6 +204,7 @@ struct nfs_inode {
 #define NFS_INO_REVALIDATING	(0)		/* revalidating attrs */
 #define NFS_INO_ADVISE_RDPLUS	(1)		/* advise readdirplus */
 #define NFS_INO_STALE		(2)		/* possible stale inode */
+#define NFS_INO_ACL_LRU_SET	(3)		/* Inode is on the LRU list */
 
 static inline struct nfs_inode *NFS_I(struct inode *inode)
 {
-- 
GitLab


From 979df72e6f963b42ee484f2eca049c3344da0ba7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 25 Jul 2006 11:28:19 -0400
Subject: [PATCH 0808/1063] NFS: Add an ACCESS cache memory shrinker

A pinned inode may in theory end up filling memory with cached ACCESS
calls. This patch ensures that the VM may shrink away the cache in these
particular cases.
The shrinker works by iterating through the list of inodes on the global
nfs_access_lru_list, and removing the least recently used access
cache entry until it is done (or until the entire cache is empty).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c      | 44 ++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/internal.h |  3 +++
 fs/nfs/super.c    |  5 +++++
 3 files changed, 52 insertions(+)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index bf4f5ffda703c..067d144d141b0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1651,6 +1651,50 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry)
 	smp_mb__after_atomic_dec();
 }
 
+int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+{
+	LIST_HEAD(head);
+	struct nfs_inode *nfsi;
+	struct nfs_access_entry *cache;
+
+	spin_lock(&nfs_access_lru_lock);
+restart:
+	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+		struct inode *inode;
+
+		if (nr_to_scan-- == 0)
+			break;
+		inode = igrab(&nfsi->vfs_inode);
+		if (inode == NULL)
+			continue;
+		spin_lock(&inode->i_lock);
+		if (list_empty(&nfsi->access_cache_entry_lru))
+			goto remove_lru_entry;
+		cache = list_entry(nfsi->access_cache_entry_lru.next,
+				struct nfs_access_entry, lru);
+		list_move(&cache->lru, &head);
+		rb_erase(&cache->rb_node, &nfsi->access_cache);
+		if (!list_empty(&nfsi->access_cache_entry_lru))
+			list_move_tail(&nfsi->access_cache_inode_lru,
+					&nfs_access_lru_list);
+		else {
+remove_lru_entry:
+			list_del_init(&nfsi->access_cache_inode_lru);
+			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
+		}
+		spin_unlock(&inode->i_lock);
+		iput(inode);
+		goto restart;
+	}
+	spin_unlock(&nfs_access_lru_lock);
+	while (!list_empty(&head)) {
+		cache = list_entry(head.next, struct nfs_access_entry, lru);
+		list_del(&cache->lru);
+		nfs_access_free_entry(cache);
+	}
+	return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+}
+
 static void __nfs_access_zap_cache(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e4f4e5def0fcc..660e9ff5341cb 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -66,6 +66,9 @@ extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
 				  struct page *page);
 #endif
 
+/* dir.c */
+extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+
 /* inode.c */
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e8a9bee74d9d2..06c321beacfe6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -221,6 +221,8 @@ module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
 		 &nfs_idmap_cache_timeout, 0644);
 #endif
 
+static struct shrinker *acl_shrinker;
+
 /*
  * Register the NFS filesystems
  */
@@ -240,6 +242,7 @@ int __init register_nfs_fs(void)
 	if (ret < 0)
 		goto error_2;
 #endif
+	acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
 	return 0;
 
 #ifdef CONFIG_NFS_V4
@@ -257,6 +260,8 @@ int __init register_nfs_fs(void)
  */
 void __exit unregister_nfs_fs(void)
 {
+	if (acl_shrinker != NULL)
+		remove_shrinker(acl_shrinker);
 #ifdef CONFIG_NFS_V4
 	unregister_filesystem(&nfs4_fs_type);
 	nfs_unregister_sysctl();
-- 
GitLab


From 770bfad846ab6628444428467b11fa6773ae9ea1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:07 -0400
Subject: [PATCH 0809/1063] NFS: Add dentry materialisation op

The attached patch adds a new directory cache management function that prepares
a disconnected anonymous function to be connected into the dentry tree. The
anonymous dentry is transferred the name and parentage from another dentry.

The following changes were made in [try #2]:

 (*) d_materialise_dentry() now switches the parentage of the two nodes around
     correctly when one or other of them is self-referential.

The following changes were made in [try #7]:

 (*) d_instantiate_unique() has had the interior part split out as function
     __d_instantiate_unique(). Callers of this latter function must be holding
     the appropriate locks.

 (*) _d_rehash() has been added as a wrapper around __d_rehash() to call it
     with the most obvious hash list (the one from the name). d_rehash() now
     calls _d_rehash().

 (*) d_materialise_dentry() is now __d_materialise_dentry() and is static.

 (*) d_materialise_unique() added to perform the combination of d_find_alias(),
     d_materialise_dentry() and d_add_unique() that the NFS client was doing
     twice, all within a single dcache_lock critical section. This reduces the
     number of times two different spinlocks were being accessed.

The following further changes were made:

 (*) Add the dentries onto their parents d_subdirs lists.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/dcache.c            | 164 +++++++++++++++++++++++++++++++++++++----
 include/linux/dcache.h |   1 +
 2 files changed, 151 insertions(+), 14 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 1b4a3a34ec57f..17b392a2049eb 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -828,17 +828,19 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
  * (or otherwise set) by the caller to indicate that it is now
  * in use by the dcache.
  */
-struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+static struct dentry *__d_instantiate_unique(struct dentry *entry,
+					     struct inode *inode)
 {
 	struct dentry *alias;
 	int len = entry->d_name.len;
 	const char *name = entry->d_name.name;
 	unsigned int hash = entry->d_name.hash;
 
-	BUG_ON(!list_empty(&entry->d_alias));
-	spin_lock(&dcache_lock);
-	if (!inode)
-		goto do_negative;
+	if (!inode) {
+		entry->d_inode = NULL;
+		return NULL;
+	}
+
 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
 		struct qstr *qstr = &alias->d_name;
 
@@ -851,19 +853,35 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 		if (memcmp(qstr->name, name, len))
 			continue;
 		dget_locked(alias);
-		spin_unlock(&dcache_lock);
-		BUG_ON(!d_unhashed(alias));
-		iput(inode);
 		return alias;
 	}
+
 	list_add(&entry->d_alias, &inode->i_dentry);
-do_negative:
 	entry->d_inode = inode;
 	fsnotify_d_instantiate(entry, inode);
-	spin_unlock(&dcache_lock);
-	security_d_instantiate(entry, inode);
 	return NULL;
 }
+
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *result;
+
+	BUG_ON(!list_empty(&entry->d_alias));
+
+	spin_lock(&dcache_lock);
+	result = __d_instantiate_unique(entry, inode);
+	spin_unlock(&dcache_lock);
+
+	if (!result) {
+		security_d_instantiate(entry, inode);
+		return NULL;
+	}
+
+	BUG_ON(!d_unhashed(result));
+	iput(inode);
+	return result;
+}
+
 EXPORT_SYMBOL(d_instantiate_unique);
 
 /**
@@ -1235,6 +1253,11 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
  	hlist_add_head_rcu(&entry->d_hash, list);
 }
 
+static void _d_rehash(struct dentry * entry)
+{
+	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
+}
+
 /**
  * d_rehash	- add an entry back to the hash
  * @entry: dentry to add to the hash
@@ -1244,11 +1267,9 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
  
 void d_rehash(struct dentry * entry)
 {
-	struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-
 	spin_lock(&dcache_lock);
 	spin_lock(&entry->d_lock);
-	__d_rehash(entry, list);
+	_d_rehash(entry);
 	spin_unlock(&entry->d_lock);
 	spin_unlock(&dcache_lock);
 }
@@ -1386,6 +1407,120 @@ void d_move(struct dentry * dentry, struct dentry * target)
 	spin_unlock(&dcache_lock);
 }
 
+/*
+ * Prepare an anonymous dentry for life in the superblock's dentry tree as a
+ * named dentry in place of the dentry to be replaced.
+ */
+static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
+{
+	struct dentry *dparent, *aparent;
+
+	switch_names(dentry, anon);
+	do_switch(dentry->d_name.len, anon->d_name.len);
+	do_switch(dentry->d_name.hash, anon->d_name.hash);
+
+	dparent = dentry->d_parent;
+	aparent = anon->d_parent;
+
+	dentry->d_parent = (aparent == anon) ? dentry : aparent;
+	list_del(&dentry->d_u.d_child);
+	if (!IS_ROOT(dentry))
+		list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+	else
+		INIT_LIST_HEAD(&dentry->d_u.d_child);
+
+	anon->d_parent = (dparent == dentry) ? anon : dparent;
+	list_del(&anon->d_u.d_child);
+	if (!IS_ROOT(anon))
+		list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
+	else
+		INIT_LIST_HEAD(&anon->d_u.d_child);
+
+	anon->d_flags &= ~DCACHE_DISCONNECTED;
+}
+
+/**
+ * d_materialise_unique - introduce an inode into the tree
+ * @dentry: candidate dentry
+ * @inode: inode to bind to the dentry, to which aliases may be attached
+ *
+ * Introduces an dentry into the tree, substituting an extant disconnected
+ * root directory alias in its place if there is one
+ */
+struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
+{
+	struct dentry *alias, *actual;
+
+	BUG_ON(!d_unhashed(dentry));
+
+	spin_lock(&dcache_lock);
+
+	if (!inode) {
+		actual = dentry;
+		dentry->d_inode = NULL;
+		goto found_lock;
+	}
+
+	/* See if a disconnected directory already exists as an anonymous root
+	 * that we should splice into the tree instead */
+	if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
+		spin_lock(&alias->d_lock);
+
+		/* Is this a mountpoint that we could splice into our tree? */
+		if (IS_ROOT(alias))
+			goto connect_mountpoint;
+
+		if (alias->d_name.len == dentry->d_name.len &&
+		    alias->d_parent == dentry->d_parent &&
+		    memcmp(alias->d_name.name,
+			   dentry->d_name.name,
+			   dentry->d_name.len) == 0)
+			goto replace_with_alias;
+
+		spin_unlock(&alias->d_lock);
+
+		/* Doh! Seem to be aliasing directories for some reason... */
+		dput(alias);
+	}
+
+	/* Add a unique reference */
+	actual = __d_instantiate_unique(dentry, inode);
+	if (!actual)
+		actual = dentry;
+	else if (unlikely(!d_unhashed(actual)))
+		goto shouldnt_be_hashed;
+
+found_lock:
+	spin_lock(&actual->d_lock);
+found:
+	_d_rehash(actual);
+	spin_unlock(&actual->d_lock);
+	spin_unlock(&dcache_lock);
+
+	if (actual == dentry) {
+		security_d_instantiate(dentry, inode);
+		return NULL;
+	}
+
+	iput(inode);
+	return actual;
+
+	/* Convert the anonymous/root alias into an ordinary dentry */
+connect_mountpoint:
+	__d_materialise_dentry(dentry, alias);
+
+	/* Replace the candidate dentry with the alias in the tree */
+replace_with_alias:
+	__d_drop(alias);
+	actual = alias;
+	goto found;
+
+shouldnt_be_hashed:
+	spin_unlock(&dcache_lock);
+	BUG();
+	goto shouldnt_be_hashed;
+}
+
 /**
  * d_path - return the path of a dentry
  * @dentry: dentry to report
@@ -1784,6 +1919,7 @@ EXPORT_SYMBOL(d_instantiate);
 EXPORT_SYMBOL(d_invalidate);
 EXPORT_SYMBOL(d_lookup);
 EXPORT_SYMBOL(d_move);
+EXPORT_SYMBOL_GPL(d_materialise_unique);
 EXPORT_SYMBOL(d_path);
 EXPORT_SYMBOL(d_prune_aliases);
 EXPORT_SYMBOL(d_rehash);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 471781ffeab11..44605be594090 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -221,6 +221,7 @@ static inline int dname_external(struct dentry *dentry)
  */
 extern void d_instantiate(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
+extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
 extern void d_delete(struct dentry *);
 
 /* allocate/de-allocate */
-- 
GitLab


From 7d4e2747a0412583526a162fbbd6edeeafcceb08 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:07 -0400
Subject: [PATCH 0810/1063] NFS: Fix up split of fs/nfs/inode.c

Fix ups for the splitting of the superblock stuff out of fs/nfs/inode.c,
including:

 (*) Move the callback tcpport module param into callback.c.

 (*) Move the idmap cache timeout module param into idmap.c.

 (*) Changes to internal.h:

     (*) namespace-nfs4.c was renamed to nfs4namespace.c.

     (*) nfs_stat_to_errno() is in nfs2xdr.c, not nfs4xdr.c.

     (*) nfs4xdr.c is contingent on CONFIG_NFS_V4.

     (*) nfs4_path() is only uses if CONFIG_NFS_V4 is set.

Plus also:

 (*) The sec_flavours[] table should really be const.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c | 15 +++++++++++++++
 fs/nfs/idmap.c    | 14 ++++++++++++++
 fs/nfs/internal.h | 12 ++++++------
 fs/nfs/super.c    | 40 ++++------------------------------------
 4 files changed, 39 insertions(+), 42 deletions(-)

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fe0a6b8ac149c..d6c4bae14bb9c 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -36,6 +36,21 @@ static struct svc_program nfs4_callback_program;
 
 unsigned int nfs_callback_set_tcpport;
 unsigned short nfs_callback_tcpport;
+static const int nfs_set_port_min = 0;
+static const int nfs_set_port_max = 65535;
+
+static int param_set_port(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
+		return -EINVAL;
+	*((int *)kp->arg) = num;
+	return 0;
+}
+
+module_param_call(callback_tcpport, param_set_port, param_get_int,
+		 &nfs_callback_set_tcpport, 0644);
 
 /*
  * This is the callback kernel thread.
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 07a5dd57646e3..873deb96a6cdc 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -57,6 +57,20 @@
 /* Default cache timeout is 10 minutes */
 unsigned int nfs_idmap_cache_timeout = 600 * HZ;
 
+static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	int jif = num * HZ;
+	if (endp == val || *endp || num < 0 || jif < num)
+		return -EINVAL;
+	*((int *)kp->arg) = jif;
+	return 0;
+}
+
+module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
+		 &nfs_idmap_cache_timeout, 0644);
+
 struct idmap_hashent {
 	unsigned long ih_expires;
 	__u32 ih_id;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 660e9ff5341cb..4802157963f8d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -15,7 +15,7 @@ struct nfs_clone_mount {
 	rpc_authflavor_t authflavor;
 };
 
-/* namespace-nfs4.c */
+/* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
 extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
 #else
@@ -46,6 +46,7 @@ extern void nfs_destroy_directcache(void);
 #endif
 
 /* nfs2xdr.c */
+extern int nfs_stat_to_errno(int);
 extern struct rpc_procinfo nfs_procedures[];
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
@@ -54,8 +55,9 @@ extern struct rpc_procinfo nfs3_procedures[];
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
 /* nfs4xdr.c */
-extern int nfs_stat_to_errno(int);
+#ifdef CONFIG_NFS_V4
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+#endif
 
 /* nfs4proc.c */
 #ifdef CONFIG_NFS_V4
@@ -97,15 +99,13 @@ extern char *nfs_path(const char *base, const struct dentry *dentry,
 /*
  * Determine the mount path as a string
  */
+#ifdef CONFIG_NFS_V4
 static inline char *
 nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
 {
-#ifdef CONFIG_NFS_V4
 	return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen);
-#else
-	return NULL;
-#endif
 }
+#endif
 
 /*
  * Determine the device name as a string
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 06c321beacfe6..63497345806b3 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -187,40 +187,6 @@ static struct super_operations nfs4_sops = {
 };
 #endif
 
-#ifdef CONFIG_NFS_V4
-static const int nfs_set_port_min = 0;
-static const int nfs_set_port_max = 65535;
-
-static int param_set_port(const char *val, struct kernel_param *kp)
-{
-	char *endp;
-	int num = simple_strtol(val, &endp, 0);
-	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
-		return -EINVAL;
-	*((int *)kp->arg) = num;
-	return 0;
-}
-
-module_param_call(callback_tcpport, param_set_port, param_get_int,
-		 &nfs_callback_set_tcpport, 0644);
-#endif
-
-#ifdef CONFIG_NFS_V4
-static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
-{
-	char *endp;
-	int num = simple_strtol(val, &endp, 0);
-	int jif = num * HZ;
-	if (endp == val || *endp || num < 0 || jif < num)
-		return -EINVAL;
-	*((int *)kp->arg) = jif;
-	return 0;
-}
-
-module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
-		 &nfs_idmap_cache_timeout, 0644);
-#endif
-
 static struct shrinker *acl_shrinker;
 
 /*
@@ -328,9 +294,12 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 }
 
+/*
+ * Map the security flavour number to a name
+ */
 static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
 {
-	static struct {
+	static const struct {
 		rpc_authflavor_t flavour;
 		const char *str;
 	} sec_flavours[] = {
@@ -1368,7 +1337,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	}
 
 	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
-
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_free;
-- 
GitLab


From 0a8ea4372b2868842986118ca90912f3382e6c5a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:08 -0400
Subject: [PATCH 0811/1063] NFS: Disambiguate nfs_stat_to_errno()

Rename the NFS4 version of nfs_stat_to_errno() so that it doesn't conflict with
the common one used by NFS2 and NFS3.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 730ec8fb31c68..1dee6ef7e5a97 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -58,7 +58,7 @@
 /* Mapping from NFS error code to "errno" error code. */
 #define errno_NFSERR_IO		EIO
 
-static int nfs_stat_to_errno(int);
+static int nfs4_stat_to_errno(int);
 
 /* NFSv4 COMPOUND tags are only wanted for debugging purposes */
 #ifdef DEBUG
@@ -2127,7 +2127,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 	}
 	READ32(nfserr);
 	if (nfserr != NFS_OK)
-		return -nfs_stat_to_errno(nfserr);
+		return -nfs4_stat_to_errno(nfserr);
 	return 0;
 }
 
@@ -3598,7 +3598,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
 		READ_BUF(len);
 		return -NFSERR_CLID_INUSE;
 	} else
-		return -nfs_stat_to_errno(nfserr);
+		return -nfs4_stat_to_errno(nfserr);
 
 	return 0;
 }
@@ -4256,7 +4256,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsi
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4346,7 +4346,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
 	if (!status)
 		status = decode_setclientid(&xdr, clp);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4368,7 +4368,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4521,7 +4521,7 @@ static struct {
  * This one is used jointly by NFSv2 and NFSv3.
  */
 static int
-nfs_stat_to_errno(int stat)
+nfs4_stat_to_errno(int stat)
 {
 	int i;
 	for (i = 0; nfs_errtbl[i].stat != -1; i++) {
-- 
GitLab


From 5ae1fbce142b67bf59e15fb1af96e88a96abde7b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:08 -0400
Subject: [PATCH 0812/1063] NFS: Fix NFS4 callback up/down prototypes

Make the nfs_callback_up()/down() prototypes just do nothing if NFS4 is not
enabled.  Also make the down function void type since we can't really do
anything if it fails.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c | 5 +----
 fs/nfs/callback.h | 7 ++++++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index d6c4bae14bb9c..b1f7dc4153928 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -149,10 +149,8 @@ int nfs_callback_up(void)
 /*
  * Kill the server process if it is not already up.
  */
-int nfs_callback_down(void)
+void nfs_callback_down(void)
 {
-	int ret = 0;
-
 	lock_kernel();
 	mutex_lock(&nfs_callback_mutex);
 	nfs_callback_info.users--;
@@ -164,7 +162,6 @@ int nfs_callback_down(void)
 	} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
 	mutex_unlock(&nfs_callback_mutex);
 	unlock_kernel();
-	return ret;
 }
 
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b252e7fe53a5c..5676163d26e81 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -62,8 +62,13 @@ struct cb_recallargs {
 extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
 
+#ifdef CONFIG_NFS_V4
 extern int nfs_callback_up(void);
-extern int nfs_callback_down(void);
+extern void nfs_callback_down(void);
+#else
+#define nfs_callback_up()	(0)
+#define nfs_callback_down()	do {} while(0)
+#endif
 
 extern unsigned int nfs_callback_set_tcpport;
 extern unsigned short nfs_callback_tcpport;
-- 
GitLab


From adfa6f980bd46974e6b32b22dd0c45e3f52063f4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:08 -0400
Subject: [PATCH 0813/1063] NFS: Rename struct nfs4_client to struct nfs_client

Rename struct nfs4_client to struct nfs_client so that it can become the basis
for a general client record for NFS2 and NFS3 in addition to NFS4.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c         |  2 +-
 fs/nfs/callback_proc.c    |  4 ++--
 fs/nfs/delegation.c       | 24 +++++++++----------
 fs/nfs/delegation.h       | 10 ++++----
 fs/nfs/idmap.c            | 12 +++++-----
 fs/nfs/nfs4_fs.h          | 30 +++++++++++------------
 fs/nfs/nfs4proc.c         | 32 ++++++++++++-------------
 fs/nfs/nfs4renewd.c       |  8 +++----
 fs/nfs/nfs4state.c        | 50 +++++++++++++++++++--------------------
 fs/nfs/nfs4xdr.c          | 18 +++++++-------
 fs/nfs/super.c            |  4 ++--
 include/linux/nfs_fs_sb.h |  2 +-
 include/linux/nfs_idmap.h | 14 +++++------
 13 files changed, 105 insertions(+), 105 deletions(-)

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index b1f7dc4153928..1b596b6d9dc21 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -167,7 +167,7 @@ void nfs_callback_down(void)
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
 	struct in_addr *addr = &rqstp->rq_addr.sin_addr;
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 
 	/* Don't talk to strangers */
 	clp = nfs4_find_client(addr);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 7719483ecdfc0..55d6e2ec157fc 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -15,7 +15,7 @@
  
 unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct nfs_delegation *delegation;
 	struct nfs_inode *nfsi;
 	struct inode *inode;
@@ -56,7 +56,7 @@ unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres
 
 unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct inode *inode;
 	unsigned res;
 	
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 9540a316c05e6..5a1105c258bdd 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -114,7 +114,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
  */
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int status = 0;
@@ -176,7 +176,7 @@ static void nfs_msync_inode(struct inode *inode)
  */
 int __nfs_inode_return_delegation(struct inode *inode)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
@@ -208,7 +208,7 @@ int __nfs_inode_return_delegation(struct inode *inode)
  */
 void nfs_return_all_delegations(struct super_block *sb)
 {
-	struct nfs4_client *clp = NFS_SB(sb)->nfs4_state;
+	struct nfs_client *clp = NFS_SB(sb)->nfs4_state;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -232,7 +232,7 @@ void nfs_return_all_delegations(struct super_block *sb)
 
 int nfs_do_expire_all_delegations(void *ptr)
 {
-	struct nfs4_client *clp = ptr;
+	struct nfs_client *clp = ptr;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -258,7 +258,7 @@ int nfs_do_expire_all_delegations(void *ptr)
 	module_put_and_exit(0);
 }
 
-void nfs_expire_all_delegations(struct nfs4_client *clp)
+void nfs_expire_all_delegations(struct nfs_client *clp)
 {
 	struct task_struct *task;
 
@@ -276,7 +276,7 @@ void nfs_expire_all_delegations(struct nfs4_client *clp)
 /*
  * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
  */
-void nfs_handle_cb_pathdown(struct nfs4_client *clp)
+void nfs_handle_cb_pathdown(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
 	struct inode *inode;
@@ -299,7 +299,7 @@ void nfs_handle_cb_pathdown(struct nfs4_client *clp)
 
 struct recall_threadargs {
 	struct inode *inode;
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	const nfs4_stateid *stateid;
 
 	struct completion started;
@@ -310,7 +310,7 @@ static int recall_thread(void *data)
 {
 	struct recall_threadargs *args = (struct recall_threadargs *)data;
 	struct inode *inode = igrab(args->inode);
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 
@@ -371,7 +371,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
 /*
  * Retrieve the inode associated with a delegation
  */
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle)
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
 {
 	struct nfs_delegation *delegation;
 	struct inode *res = NULL;
@@ -389,7 +389,7 @@ struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nf
 /*
  * Mark all delegations as needing to be reclaimed
  */
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
+void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
 	spin_lock(&clp->cl_lock);
@@ -401,7 +401,7 @@ void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
 /*
  * Reap all unclaimed delegations after reboot recovery is done
  */
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation, *n;
 	LIST_HEAD(head);
@@ -423,7 +423,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
 
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 3858694652fa0..2cfd4b24c7feb 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,13 +29,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
 int __nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 void nfs_return_all_delegations(struct super_block *sb);
-void nfs_expire_all_delegations(struct nfs4_client *clp);
-void nfs_handle_cb_pathdown(struct nfs4_client *clp);
+void nfs_expire_all_delegations(struct nfs_client *clp);
+void nfs_handle_cb_pathdown(struct nfs_client *clp);
 
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
+void nfs_delegation_mark_reclaim(struct nfs_client *clp);
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 
 /* NFSv4 delegation-related procedures */
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 873deb96a6cdc..d05148ec9414c 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -109,7 +109,7 @@ static struct rpc_pipe_ops idmap_upcall_ops = {
 };
 
 void
-nfs_idmap_new(struct nfs4_client *clp)
+nfs_idmap_new(struct nfs_client *clp)
 {
 	struct idmap *idmap;
 
@@ -138,7 +138,7 @@ nfs_idmap_new(struct nfs4_client *clp)
 }
 
 void
-nfs_idmap_delete(struct nfs4_client *clp)
+nfs_idmap_delete(struct nfs_client *clp)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
@@ -491,27 +491,27 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
 	return (hash);
 }
 
-int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
 }
 
-int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
 }
 
-int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
 }
-int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9a102860df374..4e334cb484986 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -43,9 +43,9 @@ enum nfs4_client_state {
 };
 
 /*
- * The nfs4_client identifies our client state to the server.
+ * The nfs_client identifies our client state to the server.
  */
-struct nfs4_client {
+struct nfs_client {
 	struct list_head	cl_servers;	/* Global list of servers */
 	struct in_addr		cl_addr;	/* Server identifier */
 	u64			cl_clientid;	/* constant */
@@ -127,7 +127,7 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
 struct nfs4_state_owner {
 	spinlock_t	     so_lock;
 	struct list_head     so_list;	 /* per-clientid list of state_owners */
-	struct nfs4_client   *so_client;
+	struct nfs_client    *so_client;
 	u32                  so_id;      /* 32-bit identifier, unique */
 	atomic_t	     so_count;
 
@@ -210,10 +210,10 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
 
 /* nfs4proc.c */
 extern int nfs4_map_errors(int err);
-extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *);
-extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *);
+extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
+extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -231,19 +231,19 @@ extern const u32 nfs4_fsinfo_bitmap[2];
 extern const u32 nfs4_fs_locations_bitmap[2];
 
 /* nfs4renewd.c */
-extern void nfs4_schedule_state_renewal(struct nfs4_client *);
+extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
-extern void nfs4_kill_renewd(struct nfs4_client *);
+extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(void *);
 
 /* nfs4state.c */
 extern void init_nfsv4_state(struct nfs_server *);
 extern void destroy_nfsv4_state(struct nfs_server *);
-extern struct nfs4_client *nfs4_get_client(struct in_addr *);
-extern void nfs4_put_client(struct nfs4_client *clp);
-extern struct nfs4_client *nfs4_find_client(struct in_addr *);
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp);
-extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
+extern struct nfs_client *nfs4_get_client(struct in_addr *);
+extern void nfs4_put_client(struct nfs_client *clp);
+extern struct nfs_client *nfs4_find_client(struct in_addr *);
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
+extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
@@ -252,7 +252,7 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, mode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
-extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b14145b7b87f3..168f3ffb059f4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,7 +64,7 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf
 static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
 static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp);
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
 
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
@@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 
 static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	spin_lock(&clp->cl_lock);
 	if (time_before(clp->cl_last_renewal,timestamp))
 		clp->cl_last_renewal = timestamp;
@@ -792,7 +792,7 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
 
 int nfs4_recover_expired_lease(struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 
 	if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
 		nfs4_schedule_state_recovery(clp);
@@ -867,7 +867,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 {
 	struct nfs_delegation *delegation;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs4_state_owner *sp = NULL;
 	struct nfs4_state *state = NULL;
@@ -953,7 +953,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	struct nfs4_opendata *opendata;
 	int                     status;
 
@@ -2521,7 +2521,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
  */
 static void nfs4_renew_done(struct rpc_task *task, void *data)
 {
-	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
+	struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp;
 	unsigned long timestamp = (unsigned long)data;
 
 	if (task->tk_status < 0) {
@@ -2543,7 +2543,7 @@ static const struct rpc_call_ops nfs4_renew_ops = {
 	.rpc_call_done = nfs4_renew_done,
 };
 
-int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2555,7 +2555,7 @@ int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
 			&nfs4_renew_ops, (void *)jiffies);
 }
 
-int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2791,7 +2791,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 static int
 nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 
 	if (!clp || task->tk_status >= 0)
 		return 0;
@@ -2828,7 +2828,7 @@ static int nfs4_wait_bit_interruptible(void *word)
 	return 0;
 }
 
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
 {
 	sigset_t oldset;
 	int res;
@@ -2871,7 +2871,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
  */
 int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	int ret = errorcode;
 
 	exception->retry = 0;
@@ -2898,7 +2898,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
 	return nfs4_map_errors(ret);
 }
 
-int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
+int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
 {
 	nfs4_verifier sc_verifier;
 	struct nfs4_setclientid setclientid = {
@@ -2945,7 +2945,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
 	return status;
 }
 
-static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs_fsinfo fsinfo;
 	struct rpc_message msg = {
@@ -2969,7 +2969,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cr
 	return status;
 }
 
-int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	long timeout;
 	int err;
@@ -3106,7 +3106,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	struct nfs_lockt_args arg = {
 		.fh = NFS_FH(inode),
 		.fl = request,
@@ -3513,7 +3513,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
 
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs4_client *clp = state->owner->so_client;
+	struct nfs_client *clp = state->owner->so_client;
 	unsigned char fl_flags = request->fl_flags;
 	int status;
 
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 5d764d8e6d8ac..208764069f616 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -61,7 +61,7 @@
 void
 nfs4_renew_state(void *data)
 {
-	struct nfs4_client *clp = (struct nfs4_client *)data;
+	struct nfs_client *clp = (struct nfs_client *)data;
 	struct rpc_cred *cred;
 	long lease, timeout;
 	unsigned long last, now;
@@ -108,7 +108,7 @@ nfs4_renew_state(void *data)
 
 /* Must be called with clp->cl_sem locked for writes */
 void
-nfs4_schedule_state_renewal(struct nfs4_client *clp)
+nfs4_schedule_state_renewal(struct nfs_client *clp)
 {
 	long timeout;
 
@@ -127,7 +127,7 @@ nfs4_schedule_state_renewal(struct nfs4_client *clp)
 void
 nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 
 	if (!clp)
 		return;
@@ -140,7 +140,7 @@ nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 
 /* Must be called with clp->cl_sem locked for writes */
 void
-nfs4_kill_renewd(struct nfs4_client *clp)
+nfs4_kill_renewd(struct nfs_client *clp)
 {
 	down_read(&clp->cl_sem);
 	if (!list_empty(&clp->cl_superblocks)) {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 090a36b07a223..c0b6439f1f713 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -83,10 +83,10 @@ destroy_nfsv4_state(struct nfs_server *server)
  * Since these are allocated/deallocated very rarely, we don't
  * bother putting them in a slab cache...
  */
-static struct nfs4_client *
+static struct nfs_client *
 nfs4_alloc_client(struct in_addr *addr)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 
 	if (nfs_callback_up() < 0)
 		return NULL;
@@ -111,7 +111,7 @@ nfs4_alloc_client(struct in_addr *addr)
 }
 
 static void
-nfs4_free_client(struct nfs4_client *clp)
+nfs4_free_client(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 
@@ -130,9 +130,9 @@ nfs4_free_client(struct nfs4_client *clp)
 	nfs_callback_down();
 }
 
-static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
+static struct nfs_client *__nfs4_find_client(struct in_addr *addr)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
 		if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
 			atomic_inc(&clp->cl_count);
@@ -142,19 +142,19 @@ static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
 	return NULL;
 }
 
-struct nfs4_client *nfs4_find_client(struct in_addr *addr)
+struct nfs_client *nfs4_find_client(struct in_addr *addr)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	spin_lock(&state_spinlock);
 	clp = __nfs4_find_client(addr);
 	spin_unlock(&state_spinlock);
 	return clp;
 }
 
-struct nfs4_client *
+struct nfs_client *
 nfs4_get_client(struct in_addr *addr)
 {
-	struct nfs4_client *clp, *new = NULL;
+	struct nfs_client *clp, *new = NULL;
 
 	spin_lock(&state_spinlock);
 	for (;;) {
@@ -180,7 +180,7 @@ nfs4_get_client(struct in_addr *addr)
 }
 
 void
-nfs4_put_client(struct nfs4_client *clp)
+nfs4_put_client(struct nfs_client *clp)
 {
 	if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
 		return;
@@ -192,7 +192,7 @@ nfs4_put_client(struct nfs4_client *clp)
 	nfs4_free_client(clp);
 }
 
-static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
+static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
 			nfs_callback_tcpport, cred);
@@ -204,13 +204,13 @@ static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
 }
 
 u32
-nfs4_alloc_lockowner_id(struct nfs4_client *clp)
+nfs4_alloc_lockowner_id(struct nfs_client *clp)
 {
 	return clp->cl_lockowner_id ++;
 }
 
 static struct nfs4_state_owner *
-nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs4_state_owner *sp = NULL;
 
@@ -224,7 +224,7 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
 	return sp;
 }
 
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct rpc_cred *cred = NULL;
@@ -238,7 +238,7 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
 	return cred;
 }
 
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 
@@ -251,7 +251,7 @@ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
 }
 
 static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs4_state_owner *sp, *res = NULL;
 
@@ -294,7 +294,7 @@ nfs4_alloc_state_owner(void)
 void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs4_client *clp = sp->so_client;
+	struct nfs_client *clp = sp->so_client;
 	spin_lock(&clp->cl_lock);
 	list_del_init(&sp->so_list);
 	spin_unlock(&clp->cl_lock);
@@ -306,7 +306,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
  */
 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	struct nfs4_state_owner *sp, *new;
 
 	get_rpccred(cred);
@@ -337,7 +337,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
  */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs4_client *clp = sp->so_client;
+	struct nfs_client *clp = sp->so_client;
 	struct rpc_cred *cred = sp->so_cred;
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
@@ -540,7 +540,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 {
 	struct nfs4_lock_state *lsp;
-	struct nfs4_client *clp = state->owner->so_client;
+	struct nfs_client *clp = state->owner->so_client;
 
 	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
 	if (lsp == NULL)
@@ -752,7 +752,7 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
 
 static int reclaimer(void *);
 
-static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
+static inline void nfs4_clear_recover_bit(struct nfs_client *clp)
 {
 	smp_mb__before_clear_bit();
 	clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state);
@@ -764,7 +764,7 @@ static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
 /*
  * State recovery routine
  */
-static void nfs4_recover_state(struct nfs4_client *clp)
+static void nfs4_recover_state(struct nfs_client *clp)
 {
 	struct task_struct *task;
 
@@ -782,7 +782,7 @@ static void nfs4_recover_state(struct nfs4_client *clp)
 /*
  * Schedule a state recovery attempt
  */
-void nfs4_schedule_state_recovery(struct nfs4_client *clp)
+void nfs4_schedule_state_recovery(struct nfs_client *clp)
 {
 	if (!clp)
 		return;
@@ -879,7 +879,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n
 	return status;
 }
 
-static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
+static void nfs4_state_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct nfs4_state *state;
@@ -903,7 +903,7 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
 
 static int reclaimer(void *ptr)
 {
-	struct nfs4_client *clp = ptr;
+	struct nfs_client *clp = ptr;
 	struct nfs4_state_owner *sp;
 	struct nfs4_state_recovery_ops *ops;
 	struct rpc_cred *cred;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1dee6ef7e5a97..04748ab9ed555 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1160,7 +1160,7 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con
 	return 0;
 }
 
-static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid)
+static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid)
 {
 	uint32_t *p;
 
@@ -1246,7 +1246,7 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien
 	return 0;
 }
 
-static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state)
+static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state)
 {
         uint32_t *p;
 
@@ -1945,7 +1945,7 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const str
 /*
  * a RENEW request
  */
-static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1975,7 +1975,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nf
 /*
  * a SETCLIENTID_CONFIRM request
  */
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -2132,7 +2132,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 }
 
 /* Dummy routine */
-static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp)
+static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
 {
 	uint32_t *p;
 	unsigned int strlen;
@@ -2636,7 +2636,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
 	return 0;
 }
 
-static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid)
+static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid)
 {
 	uint32_t len, *p;
 
@@ -2660,7 +2660,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
 	return 0;
 }
 
-static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid)
+static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid)
 {
 	uint32_t len, *p;
 
@@ -3565,7 +3565,7 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
 	return 0;
 }
 
-static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
+static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
 {
 	uint32_t *p;
 	uint32_t opnum;
@@ -4335,7 +4335,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
  * a SETCLIENTID request
  */
 static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
-		struct nfs4_client *clp)
+		struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 63497345806b3..d03ede5b1aca7 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1099,7 +1099,7 @@ static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
 static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 	struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct rpc_xprt *xprt = NULL;
 	struct rpc_clnt *clnt = NULL;
 	int err = -EIO;
@@ -1416,7 +1416,7 @@ static inline char *nfs4_dup_path(const struct dentry *dentry)
 static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
 {
 	const struct dentry *dentry = data->dentry;
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	struct super_block *sb;
 
 	server->fsid = data->fattr->fsid;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6b4a13c79474c..4db90df2aed05 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -43,7 +43,7 @@ struct nfs_server {
 	 */
 	char			ip_addr[16];
 	char *			mnt_path;
-	struct nfs4_client *	nfs4_state;	/* all NFSv4 state starts here */
+	struct nfs_client *	nfs4_state;	/* all NFSv4 state starts here */
 	struct list_head	nfs4_siblings;	/* List of other nfs_server structs
 						 * that share the same clientid
 						 */
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h
index 102e560942960..678fe68982efe 100644
--- a/include/linux/nfs_idmap.h
+++ b/include/linux/nfs_idmap.h
@@ -62,15 +62,15 @@ struct idmap_msg {
 #ifdef __KERNEL__
 
 /* Forward declaration to make this header independent of others */
-struct nfs4_client;
+struct nfs_client;
 
-void nfs_idmap_new(struct nfs4_client *);
-void nfs_idmap_delete(struct nfs4_client *);
+void nfs_idmap_new(struct nfs_client *);
+void nfs_idmap_delete(struct nfs_client *);
 
-int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *);
-int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *);
-int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *);
-int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *);
+int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *);
+int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *);
+int nfs_map_uid_to_name(struct nfs_client *, __u32, char *);
+int nfs_map_gid_to_group(struct nfs_client *, __u32, char *);
 
 extern unsigned int nfs_idmap_cache_timeout;
 #endif /* __KERNEL__ */
-- 
GitLab


From 7539bbab8062aadc1db95a22b377146843cfa88f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:09 -0400
Subject: [PATCH 0814/1063] NFS: Rename nfs_server::nfs4_state

Rename nfs_server::nfs4_state to nfs_client as it will be used to represent the
client state for NFS2 and NFS3 also.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/delegation.c       | 12 ++++++------
 fs/nfs/nfs4proc.c         | 26 +++++++++++++-------------
 fs/nfs/nfs4renewd.c       |  2 +-
 fs/nfs/nfs4state.c        | 10 +++++-----
 fs/nfs/nfs4xdr.c          | 10 +++++-----
 fs/nfs/super.c            |  6 +++---
 include/linux/nfs_fs_sb.h |  2 +-
 7 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5a1105c258bdd..cfe239736ac03 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
 			case -NFS4ERR_EXPIRED:
 				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
 			case -NFS4ERR_STALE_CLIENTID:
-				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state);
+				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client);
 				goto out_err;
 		}
 	}
@@ -114,7 +114,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
  */
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int status = 0;
@@ -176,7 +176,7 @@ static void nfs_msync_inode(struct inode *inode)
  */
 int __nfs_inode_return_delegation(struct inode *inode)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
@@ -208,7 +208,7 @@ int __nfs_inode_return_delegation(struct inode *inode)
  */
 void nfs_return_all_delegations(struct super_block *sb)
 {
-	struct nfs_client *clp = NFS_SB(sb)->nfs4_state;
+	struct nfs_client *clp = NFS_SB(sb)->nfs_client;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -310,7 +310,7 @@ static int recall_thread(void *data)
 {
 	struct recall_threadargs *args = (struct recall_threadargs *)data;
 	struct inode *inode = igrab(args->inode);
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 
@@ -423,7 +423,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 168f3ffb059f4..b46597fc81e14 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 
 static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	spin_lock(&clp->cl_lock);
 	if (time_before(clp->cl_last_renewal,timestamp))
 		clp->cl_last_renewal = timestamp;
@@ -252,7 +252,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	atomic_inc(&sp->so_count);
 	p->o_arg.fh = NFS_FH(dir);
 	p->o_arg.open_flags = flags,
-	p->o_arg.clientid = server->nfs4_state->cl_clientid;
+	p->o_arg.clientid = server->nfs_client->cl_clientid;
 	p->o_arg.id = sp->so_id;
 	p->o_arg.name = &dentry->d_name;
 	p->o_arg.server = server;
@@ -550,7 +550,7 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
 				/* Don't recall a delegation if it was lost */
-				nfs4_schedule_state_recovery(server->nfs4_state);
+				nfs4_schedule_state_recovery(server->nfs_client);
 				return err;
 		}
 		err = nfs4_handle_exception(server, err, &exception);
@@ -792,7 +792,7 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
 
 int nfs4_recover_expired_lease(struct nfs_server *server)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
 		nfs4_schedule_state_recovery(clp);
@@ -867,7 +867,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 {
 	struct nfs_delegation *delegation;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs4_state_owner *sp = NULL;
 	struct nfs4_state *state = NULL;
@@ -953,7 +953,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_opendata *opendata;
 	int                     status;
 
@@ -1133,7 +1133,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(server->nfs4_state);
+			nfs4_schedule_state_recovery(server->nfs_client);
 			break;
 		default:
 			if (nfs4_async_handle_error(task, server) == -EAGAIN) {
@@ -2791,7 +2791,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 static int
 nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (!clp || task->tk_status >= 0)
 		return 0;
@@ -2871,7 +2871,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
  */
 int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	int ret = errorcode;
 
 	exception->retry = 0;
@@ -3077,7 +3077,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
 		switch (err) {
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
-				nfs4_schedule_state_recovery(server->nfs4_state);
+				nfs4_schedule_state_recovery(server->nfs_client);
 			case 0:
 				return 0;
 		}
@@ -3106,7 +3106,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_lockt_args arg = {
 		.fh = NFS_FH(inode),
 		.fl = request,
@@ -3231,7 +3231,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(calldata->server->nfs4_state);
+			nfs4_schedule_state_recovery(calldata->server->nfs_client);
 			break;
 		default:
 			if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) {
@@ -3343,7 +3343,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 	if (p->arg.lock_seqid == NULL)
 		goto out_free;
 	p->arg.lock_stateid = &lsp->ls_stateid;
-	p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid;
+	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 208764069f616..ff947ecb8b815 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -127,7 +127,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
 void
 nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (!clp)
 		return;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c0b6439f1f713..fa51a7d4c0222 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -61,7 +61,7 @@ static LIST_HEAD(nfs4_clientid_list);
 void
 init_nfsv4_state(struct nfs_server *server)
 {
-	server->nfs4_state = NULL;
+	server->nfs_client = NULL;
 	INIT_LIST_HEAD(&server->nfs4_siblings);
 }
 
@@ -70,9 +70,9 @@ destroy_nfsv4_state(struct nfs_server *server)
 {
 	kfree(server->mnt_path);
 	server->mnt_path = NULL;
-	if (server->nfs4_state) {
-		nfs4_put_client(server->nfs4_state);
-		server->nfs4_state = NULL;
+	if (server->nfs_client) {
+		nfs4_put_client(server->nfs_client);
+		server->nfs_client = NULL;
 	}
 }
 
@@ -306,7 +306,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
  */
 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp, *new;
 
 	get_rpccred(cred);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 04748ab9ed555..99926067eca45 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -529,7 +529,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 	if (iap->ia_valid & ATTR_MODE)
 		len += 4;
 	if (iap->ia_valid & ATTR_UID) {
-		owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name);
+		owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
 		if (owner_namelen < 0) {
 			printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
 			       iap->ia_uid);
@@ -541,7 +541,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 		len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
 	}
 	if (iap->ia_valid & ATTR_GID) {
-		owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group);
+		owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
 		if (owner_grouplen < 0) {
 			printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
 			       iap->ia_gid);
@@ -3051,9 +3051,9 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
 	fattr->mode |= fmode;
 	if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
 		goto xdr_error;
-	if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0)
+	if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0)
 		goto xdr_error;
-	if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0)
+	if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0)
 		goto xdr_error;
 	if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
 		goto xdr_error;
@@ -3254,7 +3254,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
 			if (decode_space_limit(xdr, &res->maxsize) < 0)
 				return -EIO;
 	}
-	return decode_ace(xdr, NULL, res->server->nfs4_state);
+	return decode_ace(xdr, NULL, res->server->nfs_client);
 }
 
 static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d03ede5b1aca7..ab4c78ee840c1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1141,7 +1141,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
 	clnt = rpc_clone_client(clp->cl_rpcclient);
 	if (!IS_ERR(clnt))
-		server->nfs4_state = clp;
+		server->nfs_client = clp;
 	up_write(&clp->cl_sem);
 	clp = NULL;
 
@@ -1151,7 +1151,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 		return clnt;
 	}
 
-	if (server->nfs4_state->cl_idmap == NULL) {
+	if (server->nfs_client->cl_idmap == NULL) {
 		dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1416,7 +1416,7 @@ static inline char *nfs4_dup_path(const struct dentry *dentry)
 static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
 {
 	const struct dentry *dentry = data->dentry;
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct super_block *sb;
 
 	server->fsid = data->fattr->fsid;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 4db90df2aed05..fc20d6b934fb6 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -43,7 +43,7 @@ struct nfs_server {
 	 */
 	char			ip_addr[16];
 	char *			mnt_path;
-	struct nfs_client *	nfs4_state;	/* all NFSv4 state starts here */
+	struct nfs_client *	nfs_client;	/* all NFSv4 state starts here */
 	struct list_head	nfs4_siblings;	/* List of other nfs_server structs
 						 * that share the same clientid
 						 */
-- 
GitLab


From b7162792b5c0e0f6e91b8997f8e6bbc76ec5420a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:09 -0400
Subject: [PATCH 0815/1063] NFS: Return an error when starting the idmapping
 pipe

Return an error when starting the idmapping pipe so that we can detect it
failing.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c            | 12 ++++++++----
 fs/nfs/super.c            |  3 ++-
 include/linux/nfs_idmap.h |  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index d05148ec9414c..231c20ffc0ff7 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -108,15 +108,17 @@ static struct rpc_pipe_ops idmap_upcall_ops = {
         .destroy_msg    = idmap_pipe_destroy_msg,
 };
 
-void
+int
 nfs_idmap_new(struct nfs_client *clp)
 {
 	struct idmap *idmap;
+	int error;
 
 	if (clp->cl_idmap != NULL)
-		return;
+		return 0;
+
         if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
-                return;
+                return -ENOMEM;
 
 	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
 	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
@@ -124,8 +126,9 @@ nfs_idmap_new(struct nfs_client *clp)
         idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
 	    idmap, &idmap_upcall_ops, 0);
         if (IS_ERR(idmap->idmap_dentry)) {
+		error = PTR_ERR(idmap->idmap_dentry);
 		kfree(idmap);
-		return;
+		return error;
 	}
 
         mutex_init(&idmap->idmap_lock);
@@ -135,6 +138,7 @@ nfs_idmap_new(struct nfs_client *clp)
 	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
 
 	clp->cl_idmap = idmap;
+	return 0;
 }
 
 void
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ab4c78ee840c1..3ee85c4e65d86 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1136,7 +1136,8 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 		clnt->cl_softrtry = 1;
 		clp->cl_rpcclient = clnt;
 		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
-		nfs_idmap_new(clp);
+		if (nfs_idmap_new(clp) < 0)
+			goto out_fail;
 	}
 	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
 	clnt = rpc_clone_client(clp->cl_rpcclient);
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h
index 678fe68982efe..15a9f3b7289ab 100644
--- a/include/linux/nfs_idmap.h
+++ b/include/linux/nfs_idmap.h
@@ -64,7 +64,7 @@ struct idmap_msg {
 /* Forward declaration to make this header independent of others */
 struct nfs_client;
 
-void nfs_idmap_new(struct nfs_client *);
+int nfs_idmap_new(struct nfs_client *);
 void nfs_idmap_delete(struct nfs_client *);
 
 int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *);
-- 
GitLab


From 2b3de4411b3ccaeb00018c99d1bbe7203554cf7f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:09 -0400
Subject: [PATCH 0816/1063] NFS: Add a lookupfh NFS RPC op

Add a lookup filehandle NFS RPC op so that a file handle can be looked up
without requiring dentries and inodes and other VFS stuff when doing an NFS4
pathwalk during mounting.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 47 +++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_xdr.h |  3 +++
 2 files changed, 50 insertions(+)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b46597fc81e14..de2006f754ef9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1583,6 +1583,52 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	return status;
 }
 
+static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+		struct qstr *name, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
+{
+	int		       status;
+	struct nfs4_lookup_arg args = {
+		.bitmask = server->attr_bitmask,
+		.dir_fh = dirfh,
+		.name = name,
+	};
+	struct nfs4_lookup_res res = {
+		.server = server,
+		.fattr = fattr,
+		.fh = fhandle,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+
+	nfs_fattr_init(fattr);
+
+	dprintk("NFS call  lookupfh %s\n", name->name);
+	status = rpc_call_sync(server->client, &msg, 0);
+	dprintk("NFS reply lookupfh: %d\n", status);
+	if (status == -NFS4ERR_MOVED)
+		status = -EREMOTE;
+	return status;
+}
+
+static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+			      struct qstr *name, struct nfs_fh *fhandle,
+			      struct nfs_fattr *fattr)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_proc_lookupfh(server, dirfh, name,
+						    fhandle, fattr),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
 		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
@@ -3723,6 +3769,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
+	.lookupfh	= nfs4_proc_lookupfh,
 	.lookup		= nfs4_proc_lookup,
 	.access		= nfs4_proc_access,
 	.readlink	= nfs4_proc_readlink,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 41e5a19199e90..26879771831d1 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -770,6 +770,9 @@ struct nfs_rpc_ops {
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
+	int	(*lookupfh)(struct nfs_server *, struct nfs_fh *,
+			    struct qstr *, struct nfs_fh *,
+			    struct nfs_fattr *);
 	int	(*getattr) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fattr *);
 	int	(*setattr) (struct dentry *, struct nfs_fattr *,
-- 
GitLab


From e9326dcab413848e70ab746c7c5363da13e5f801 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:10 -0400
Subject: [PATCH 0817/1063] NFS: Add a server capabilities NFS RPC op

Add a set_capabilities NFS RPC op so that the server capabilities can be set.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 1 +
 include/linux/nfs_xdr.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de2006f754ef9..850f0851023a6 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3790,6 +3790,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.statfs		= nfs4_proc_statfs,
 	.fsinfo		= nfs4_proc_fsinfo,
 	.pathconf	= nfs4_proc_pathconf,
+	.set_capabilities = nfs4_server_capabilities,
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
 	.read_done	= nfs4_read_done,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 26879771831d1..dd9ae6761f717 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -809,6 +809,7 @@ struct nfs_rpc_ops {
 			    struct nfs_fsinfo *);
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
+	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	u32 *	(*decode_dirent)(u32 *, struct nfs_entry *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
-- 
GitLab


From 24c8dbbb5f777187d660393599641ab3307b4b97 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:10 -0400
Subject: [PATCH 0818/1063] NFS: Generalise the nfs_client structure

Generalise the nfs_client structure by:

 (1) Moving nfs_client to a more general place (nfs_fs_sb.h).

 (2) Renaming its maintenance routines to be non-NFS4 specific.

 (3) Move those maintenance routines to a new non-NFS4 specific file (client.c)
     and move the declarations to internal.h.

 (4) Make nfs_find/get_client() take a full sockaddr_in to include the port
     number (will be required for NFS2/3).

 (5) Make nfs_find/get_client() take the NFS protocol version (again will be
     required to differentiate NFS2, 3 & 4 client records).

Also:

 (6) Make nfs_client construction proceed akin to inodes, marking them as under
     construction and providing a function to indicate completion.

 (7) Make nfs_get_client() wait interruptibly if it finds a client that it can
     share, but that client is currently being constructed.

 (8) Make nfs4_create_client() use (6) and (7) instead of locking cl_sem.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/Makefile           |   6 +-
 fs/nfs/callback.c         |   9 +-
 fs/nfs/callback_proc.c    |   9 +-
 fs/nfs/client.c           | 312 ++++++++++++++++++++++++++++++++++++++
 fs/nfs/delegation.c       |   9 +-
 fs/nfs/internal.h         |   6 +
 fs/nfs/nfs4_fs.h          |  52 -------
 fs/nfs/nfs4proc.c         |   2 +-
 fs/nfs/nfs4state.c        | 128 +---------------
 fs/nfs/super.c            |  53 +++----
 include/linux/nfs_fs.h    |   1 +
 include/linux/nfs_fs_sb.h |  60 ++++++++
 12 files changed, 425 insertions(+), 222 deletions(-)
 create mode 100644 fs/nfs/client.c

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 0b572a0c19678..3b993a6f81637 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,9 +4,9 @@
 
 obj-$(CONFIG_NFS_FS) += nfs.o
 
-nfs-y 			:= dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \
-			   proc.o read.o symlink.o unlink.o write.o \
-			   namespace.o
+nfs-y 			:= client.o dir.o file.o inode.o super.o nfs2xdr.o \
+			   pagelist.o proc.o read.o symlink.o unlink.o \
+			   write.o namespace.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 1b596b6d9dc21..a3ee11364db02 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -19,6 +19,7 @@
 
 #include "nfs4_fs.h"
 #include "callback.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 
@@ -166,15 +167,15 @@ void nfs_callback_down(void)
 
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
-	struct in_addr *addr = &rqstp->rq_addr.sin_addr;
+	struct sockaddr_in *addr = &rqstp->rq_addr;
 	struct nfs_client *clp;
 
 	/* Don't talk to strangers */
-	clp = nfs4_find_client(addr);
+	clp = nfs_find_client(addr, 4);
 	if (clp == NULL)
 		return SVC_DROP;
-	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
-	nfs4_put_client(clp);
+	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr));
+	nfs_put_client(clp);
 	switch (rqstp->rq_authop->flavour) {
 		case RPC_AUTH_NULL:
 			if (rqstp->rq_proc != CB_NULL)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 55d6e2ec157fc..97cf8f71451ff 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -10,6 +10,7 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
  
@@ -22,7 +23,7 @@ unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres
 	
 	res->bitmap[0] = res->bitmap[1] = 0;
 	res->status = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs4_find_client(&args->addr->sin_addr);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 	inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -48,7 +49,7 @@ unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres
 	up_read(&nfsi->rwsem);
 	iput(inode);
 out_putclient:
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
 	return res->status;
@@ -61,7 +62,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 	unsigned res;
 	
 	res = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs4_find_client(&args->addr->sin_addr);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 	inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -80,7 +81,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 	}
 	iput(inode);
 out_putclient:
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
 	return res;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
new file mode 100644
index 0000000000000..cb5e92463bdb0
--- /dev/null
+++ b/fs/nfs/client.c
@@ -0,0 +1,312 @@
+/* client.c: NFS client sharing and management code
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/inet.h>
+#include <linux/nfs_xdr.h>
+
+#include <asm/system.h>
+
+#include "nfs4_fs.h"
+#include "callback.h"
+#include "delegation.h"
+#include "iostat.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY		NFSDBG_CLIENT
+
+static DEFINE_SPINLOCK(nfs_client_lock);
+static LIST_HEAD(nfs_client_list);
+static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
+
+/*
+ * Allocate a shared client record
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+static struct nfs_client *nfs_alloc_client(const char *hostname,
+					   const struct sockaddr_in *addr,
+					   int nfsversion)
+{
+	struct nfs_client *clp;
+	int error;
+
+	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
+		goto error_0;
+
+	error = rpciod_up();
+	if (error < 0) {
+		dprintk("%s: couldn't start rpciod! Error = %d\n",
+				__FUNCTION__, error);
+		__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+		goto error_1;
+	}
+
+	if (nfsversion == 4) {
+		if (nfs_callback_up() < 0)
+			goto error_2;
+		__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+	}
+
+	atomic_set(&clp->cl_count, 1);
+	clp->cl_cons_state = NFS_CS_INITING;
+
+	clp->cl_nfsversion = nfsversion;
+	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+
+	if (hostname) {
+		clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+		if (!clp->cl_hostname)
+			goto error_3;
+	}
+
+	INIT_LIST_HEAD(&clp->cl_superblocks);
+	clp->cl_rpcclient = ERR_PTR(-EINVAL);
+
+#ifdef CONFIG_NFS_V4
+	init_rwsem(&clp->cl_sem);
+	INIT_LIST_HEAD(&clp->cl_delegations);
+	INIT_LIST_HEAD(&clp->cl_state_owners);
+	INIT_LIST_HEAD(&clp->cl_unused);
+	spin_lock_init(&clp->cl_lock);
+	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
+	clp->cl_boot_time = CURRENT_TIME;
+	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+#endif
+
+	return clp;
+
+error_3:
+	nfs_callback_down();
+	__clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+error_2:
+	rpciod_down();
+	__clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+error_1:
+	kfree(clp);
+error_0:
+	return NULL;
+}
+
+/*
+ * Destroy a shared client record
+ */
+static void nfs_free_client(struct nfs_client *clp)
+{
+	dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
+
+#ifdef CONFIG_NFS_V4
+	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) {
+		while (!list_empty(&clp->cl_unused)) {
+			struct nfs4_state_owner *sp;
+
+			sp = list_entry(clp->cl_unused.next,
+					struct nfs4_state_owner,
+					so_list);
+			list_del(&sp->so_list);
+			kfree(sp);
+		}
+		BUG_ON(!list_empty(&clp->cl_state_owners));
+		nfs_idmap_delete(clp);
+	}
+#endif
+
+	/* -EIO all pending I/O */
+	if (!IS_ERR(clp->cl_rpcclient))
+		rpc_shutdown_client(clp->cl_rpcclient);
+
+	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+		nfs_callback_down();
+
+	if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
+	rpciod_down();
+
+	kfree(clp->cl_hostname);
+	kfree(clp);
+
+	dprintk("<-- nfs_free_client()\n");
+}
+
+/*
+ * Release a reference to a shared client record
+ */
+void nfs_put_client(struct nfs_client *clp)
+{
+	dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
+
+	if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
+		list_del(&clp->cl_share_link);
+		spin_unlock(&nfs_client_lock);
+
+		BUG_ON(!list_empty(&clp->cl_superblocks));
+
+		nfs_free_client(clp);
+	}
+}
+
+/*
+ * Find a client by address
+ * - caller must hold nfs_client_lock
+ */
+static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+	struct nfs_client *clp;
+
+	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		/* Different NFS versions cannot share the same nfs_client */
+		if (clp->cl_nfsversion != nfsversion)
+			continue;
+
+		if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
+			   sizeof(clp->cl_addr.sin_addr)) != 0)
+			continue;
+
+		if (clp->cl_addr.sin_port == addr->sin_port)
+			goto found;
+	}
+
+	return NULL;
+
+found:
+	atomic_inc(&clp->cl_count);
+	return clp;
+}
+
+/*
+ * Find a client by IP address and protocol version
+ * - returns NULL if no such client
+ */
+struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+	struct nfs_client *clp;
+
+	spin_lock(&nfs_client_lock);
+	clp = __nfs_find_client(addr, nfsversion);
+	spin_unlock(&nfs_client_lock);
+
+	BUG_ON(clp->cl_cons_state == 0);
+
+	return clp;
+}
+
+/*
+ * Look up a client by IP address and protocol version
+ * - creates a new record if one doesn't yet exist
+ */
+struct nfs_client *nfs_get_client(const char *hostname,
+				  const struct sockaddr_in *addr,
+				  int nfsversion)
+{
+	struct nfs_client *clp, *new = NULL;
+	int error;
+
+	dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
+		hostname ?: "", NIPQUAD(addr->sin_addr),
+		addr->sin_port, nfsversion);
+
+	/* see if the client already exists */
+	do {
+		spin_lock(&nfs_client_lock);
+
+		clp = __nfs_find_client(addr, nfsversion);
+		if (clp)
+			goto found_client;
+		if (new)
+			goto install_client;
+
+		spin_unlock(&nfs_client_lock);
+
+		new = nfs_alloc_client(hostname, addr, nfsversion);
+	} while (new);
+
+	return ERR_PTR(-ENOMEM);
+
+	/* install a new client and return with it unready */
+install_client:
+	clp = new;
+	list_add(&clp->cl_share_link, &nfs_client_list);
+	spin_unlock(&nfs_client_lock);
+	dprintk("--> nfs_get_client() = %p [new]\n", clp);
+	return clp;
+
+	/* found an existing client
+	 * - make sure it's ready before returning
+	 */
+found_client:
+	spin_unlock(&nfs_client_lock);
+
+	if (new)
+		nfs_free_client(new);
+
+	if (clp->cl_cons_state == NFS_CS_INITING) {
+		DECLARE_WAITQUEUE(myself, current);
+
+		add_wait_queue(&nfs_client_active_wq, &myself);
+
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (signal_pending(current) ||
+			    clp->cl_cons_state > NFS_CS_READY)
+				break;
+			schedule();
+		}
+
+		remove_wait_queue(&nfs_client_active_wq, &myself);
+
+		if (signal_pending(current)) {
+			nfs_put_client(clp);
+			return ERR_PTR(-ERESTARTSYS);
+		}
+	}
+
+	if (clp->cl_cons_state < NFS_CS_READY) {
+		error = clp->cl_cons_state;
+		nfs_put_client(clp);
+		return ERR_PTR(error);
+	}
+
+	dprintk("--> nfs_get_client() = %p [share]\n", clp);
+	return clp;
+}
+
+/*
+ * Mark a server as ready or failed
+ */
+void nfs_mark_client_ready(struct nfs_client *clp, int state)
+{
+	clp->cl_cons_state = state;
+	wake_up_all(&nfs_client_active_wq);
+}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index cfe239736ac03..57133678db166 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -18,6 +18,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "internal.h"
 
 static struct nfs_delegation *nfs_alloc_delegation(void)
 {
@@ -145,7 +146,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 					sizeof(delegation->stateid)) != 0 ||
 				delegation->type != nfsi->delegation->type) {
 			printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
-					__FUNCTION__, NIPQUAD(clp->cl_addr));
+					__FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr));
 			status = -EIO;
 		}
 	}
@@ -254,7 +255,7 @@ int nfs_do_expire_all_delegations(void *ptr)
 	}
 out:
 	spin_unlock(&clp->cl_lock);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put_and_exit(0);
 }
 
@@ -266,10 +267,10 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
 	atomic_inc(&clp->cl_count);
 	task = kthread_run(nfs_do_expire_all_delegations, clp,
 			"%u.%u.%u.%u-delegreturn",
-			NIPQUAD(clp->cl_addr));
+			NIPQUAD(clp->cl_addr.sin_addr));
 	if (!IS_ERR(task))
 		return;
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put(THIS_MODULE);
 }
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4802157963f8d..ac370d5d44947 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -15,6 +15,12 @@ struct nfs_clone_mount {
 	rpc_authflavor_t authflavor;
 };
 
+/* client.c */
+extern void nfs_put_client(struct nfs_client *);
+extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
+extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int);
+extern void nfs_mark_client_ready(struct nfs_client *, int);
+
 /* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
 extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 4e334cb484986..e7879245361eb 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -42,55 +42,6 @@ enum nfs4_client_state {
 	NFS4CLNT_LEASE_EXPIRED,
 };
 
-/*
- * The nfs_client identifies our client state to the server.
- */
-struct nfs_client {
-	struct list_head	cl_servers;	/* Global list of servers */
-	struct in_addr		cl_addr;	/* Server identifier */
-	u64			cl_clientid;	/* constant */
-	nfs4_verifier		cl_confirm;
-	unsigned long		cl_state;
-
-	u32			cl_lockowner_id;
-
-	/*
-	 * The following rwsem ensures exclusive access to the server
-	 * while we recover the state following a lease expiration.
-	 */
-	struct rw_semaphore	cl_sem;
-
-	struct list_head	cl_delegations;
-	struct list_head	cl_state_owners;
-	struct list_head	cl_unused;
-	int			cl_nunused;
-	spinlock_t		cl_lock;
-	atomic_t		cl_count;
-
-	struct rpc_clnt *	cl_rpcclient;
-
-	struct list_head	cl_superblocks;	/* List of nfs_server structs */
-
-	unsigned long		cl_lease_time;
-	unsigned long		cl_last_renewal;
-	struct work_struct	cl_renewd;
-	struct work_struct	cl_recoverd;
-
-	struct rpc_wait_queue	cl_rpcwaitq;
-
-	/* used for the setclientid verifier */
-	struct timespec		cl_boot_time;
-
-	/* idmapper */
-	struct idmap *		cl_idmap;
-
-	/* Our own IP address, as a null-terminated string.
-	 * This is used to generate the clientid, and the callback address.
-	 */
-	char			cl_ipaddr[16];
-	unsigned char		cl_id_uniquifier;
-};
-
 /*
  * struct rpc_sequence ensures that RPC calls are sent in the exact
  * order that they appear on the list.
@@ -239,9 +190,6 @@ extern void nfs4_renew_state(void *);
 /* nfs4state.c */
 extern void init_nfsv4_state(struct nfs_server *);
 extern void destroy_nfsv4_state(struct nfs_server *);
-extern struct nfs_client *nfs4_get_client(struct in_addr *);
-extern void nfs4_put_client(struct nfs_client *clp);
-extern struct nfs_client *nfs4_find_client(struct in_addr *);
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
 extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 850f0851023a6..803c31b88bb52 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2968,7 +2968,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
 	for(;;) {
 		setclientid.sc_name_len = scnprintf(setclientid.sc_name,
 				sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
-				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr),
+				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
 				cred->cr_ops->cr_name,
 				clp->cl_id_uniquifier);
 		setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index fa51a7d4c0222..058811e395550 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -50,12 +50,12 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "internal.h"
 
 #define OPENOWNER_POOL_SIZE	8
 
 const nfs4_stateid zero_stateid;
 
-static DEFINE_SPINLOCK(state_spinlock);
 static LIST_HEAD(nfs4_clientid_list);
 
 void
@@ -71,127 +71,11 @@ destroy_nfsv4_state(struct nfs_server *server)
 	kfree(server->mnt_path);
 	server->mnt_path = NULL;
 	if (server->nfs_client) {
-		nfs4_put_client(server->nfs_client);
+		nfs_put_client(server->nfs_client);
 		server->nfs_client = NULL;
 	}
 }
 
-/*
- * nfs4_get_client(): returns an empty client structure
- * nfs4_put_client(): drops reference to client structure
- *
- * Since these are allocated/deallocated very rarely, we don't
- * bother putting them in a slab cache...
- */
-static struct nfs_client *
-nfs4_alloc_client(struct in_addr *addr)
-{
-	struct nfs_client *clp;
-
-	if (nfs_callback_up() < 0)
-		return NULL;
-	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
-		nfs_callback_down();
-		return NULL;
-	}
-	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
-	init_rwsem(&clp->cl_sem);
-	INIT_LIST_HEAD(&clp->cl_delegations);
-	INIT_LIST_HEAD(&clp->cl_state_owners);
-	INIT_LIST_HEAD(&clp->cl_unused);
-	spin_lock_init(&clp->cl_lock);
-	atomic_set(&clp->cl_count, 1);
-	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
-	INIT_LIST_HEAD(&clp->cl_superblocks);
-	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
-	clp->cl_rpcclient = ERR_PTR(-EINVAL);
-	clp->cl_boot_time = CURRENT_TIME;
-	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
-	return clp;
-}
-
-static void
-nfs4_free_client(struct nfs_client *clp)
-{
-	struct nfs4_state_owner *sp;
-
-	while (!list_empty(&clp->cl_unused)) {
-		sp = list_entry(clp->cl_unused.next,
-				struct nfs4_state_owner,
-				so_list);
-		list_del(&sp->so_list);
-		kfree(sp);
-	}
-	BUG_ON(!list_empty(&clp->cl_state_owners));
-	nfs_idmap_delete(clp);
-	if (!IS_ERR(clp->cl_rpcclient))
-		rpc_shutdown_client(clp->cl_rpcclient);
-	kfree(clp);
-	nfs_callback_down();
-}
-
-static struct nfs_client *__nfs4_find_client(struct in_addr *addr)
-{
-	struct nfs_client *clp;
-	list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
-		if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
-			atomic_inc(&clp->cl_count);
-			return clp;
-		}
-	}
-	return NULL;
-}
-
-struct nfs_client *nfs4_find_client(struct in_addr *addr)
-{
-	struct nfs_client *clp;
-	spin_lock(&state_spinlock);
-	clp = __nfs4_find_client(addr);
-	spin_unlock(&state_spinlock);
-	return clp;
-}
-
-struct nfs_client *
-nfs4_get_client(struct in_addr *addr)
-{
-	struct nfs_client *clp, *new = NULL;
-
-	spin_lock(&state_spinlock);
-	for (;;) {
-		clp = __nfs4_find_client(addr);
-		if (clp != NULL)
-			break;
-		clp = new;
-		if (clp != NULL) {
-			list_add(&clp->cl_servers, &nfs4_clientid_list);
-			new = NULL;
-			break;
-		}
-		spin_unlock(&state_spinlock);
-		new = nfs4_alloc_client(addr);
-		spin_lock(&state_spinlock);
-		if (new == NULL)
-			break;
-	}
-	spin_unlock(&state_spinlock);
-	if (new)
-		nfs4_free_client(new);
-	return clp;
-}
-
-void
-nfs4_put_client(struct nfs_client *clp)
-{
-	if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
-		return;
-	list_del(&clp->cl_servers);
-	spin_unlock(&state_spinlock);
-	BUG_ON(!list_empty(&clp->cl_superblocks));
-	rpc_wake_up(&clp->cl_rpcwaitq);
-	nfs4_kill_renewd(clp);
-	nfs4_free_client(clp);
-}
-
 static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
@@ -771,11 +655,11 @@ static void nfs4_recover_state(struct nfs_client *clp)
 	__module_get(THIS_MODULE);
 	atomic_inc(&clp->cl_count);
 	task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim",
-			NIPQUAD(clp->cl_addr));
+			NIPQUAD(clp->cl_addr.sin_addr));
 	if (!IS_ERR(task))
 		return;
 	nfs4_clear_recover_bit(clp);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put(THIS_MODULE);
 }
 
@@ -970,12 +854,12 @@ static int reclaimer(void *ptr)
 	if (status == -NFS4ERR_CB_PATH_DOWN)
 		nfs_handle_cb_pathdown(clp);
 	nfs4_clear_recover_bit(clp);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put_and_exit(0);
 	return 0;
 out_error:
 	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
-				NIPQUAD(clp->cl_addr.s_addr), -status);
+				NIPQUAD(clp->cl_addr.sin_addr), -status);
 	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 	goto out;
 }
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3ee85c4e65d86..f97d7d9c5c32e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1104,47 +1104,46 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 	struct rpc_clnt *clnt = NULL;
 	int err = -EIO;
 
-	clp = nfs4_get_client(&server->addr.sin_addr);
+	clp = nfs_get_client(server->hostname, &server->addr, 4);
 	if (!clp) {
 		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
 		return ERR_PTR(err);
 	}
 
 	/* Now create transport and client */
-	down_write(&clp->cl_sem);
-	if (IS_ERR(clp->cl_rpcclient)) {
+	if (clp->cl_cons_state == NFS_CS_INITING) {
 		xprt = xprt_create_proto(proto, &server->addr, timeparms);
 		if (IS_ERR(xprt)) {
-			up_write(&clp->cl_sem);
 			err = PTR_ERR(xprt);
 			dprintk("%s: cannot create RPC transport. Error = %d\n",
 					__FUNCTION__, err);
-			goto out_fail;
+			goto client_init_error;
 		}
 		/* Bind to a reserved port! */
 		xprt->resvport = 1;
 		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
 				server->rpc_ops->version, flavor);
 		if (IS_ERR(clnt)) {
-			up_write(&clp->cl_sem);
 			err = PTR_ERR(clnt);
 			dprintk("%s: cannot create RPC client. Error = %d\n",
 					__FUNCTION__, err);
-			goto out_fail;
+			goto client_init_error;
 		}
 		clnt->cl_intr     = 1;
 		clnt->cl_softrtry = 1;
 		clp->cl_rpcclient = clnt;
 		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
-		if (nfs_idmap_new(clp) < 0)
-			goto out_fail;
+		err = nfs_idmap_new(clp);
+		if (err < 0) {
+			dprintk("%s: failed to create idmapper.\n",
+				__FUNCTION__);
+			goto client_init_error;
+		}
+		__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
+		nfs_mark_client_ready(clp, 0);
 	}
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
+
 	clnt = rpc_clone_client(clp->cl_rpcclient);
-	if (!IS_ERR(clnt))
-		server->nfs_client = clp;
-	up_write(&clp->cl_sem);
-	clp = NULL;
 
 	if (IS_ERR(clnt)) {
 		dprintk("%s: cannot create RPC client. Error = %d\n",
@@ -1152,11 +1151,6 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 		return clnt;
 	}
 
-	if (server->nfs_client->cl_idmap == NULL) {
-		dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
-		return ERR_PTR(-ENOMEM);
-	}
-
 	if (clnt->cl_auth->au_flavor != flavor) {
 		struct rpc_auth *auth;
 
@@ -1166,11 +1160,16 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 			return (struct rpc_clnt *)auth;
 		}
 	}
+
+	server->nfs_client = clp;
+	down_write(&clp->cl_sem);
+	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
+	up_write(&clp->cl_sem);
 	return clnt;
 
- out_fail:
-	if (clp)
-		nfs4_put_client(clp);
+client_init_error:
+	nfs_mark_client_ready(clp, err);
+	nfs_put_client(clp);
 	return ERR_PTR(err);
 }
 
@@ -1329,14 +1328,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		goto out_free;
 	}
 
-	/* Fire up rpciod if not yet running */
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto out_free;
-	}
-
 	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
@@ -1383,8 +1374,6 @@ static void nfs4_kill_super(struct super_block *sb)
 
 	destroy_nfsv4_state(server);
 
-	rpciod_down();
-
 	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
 	kfree(server);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a36e01cd6321e..70e1dc9162e21 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -586,6 +586,7 @@ extern void * nfs_root_data(void);
 #define NFSDBG_FILE		0x0040
 #define NFSDBG_ROOT		0x0080
 #define NFSDBG_CALLBACK		0x0100
+#define NFSDBG_CLIENT		0x0200
 #define NFSDBG_ALL		0xFFFF
 
 #ifdef __KERNEL__
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index fc20d6b934fb6..a727657e0ad31 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -6,6 +6,66 @@
 
 struct nfs_iostats;
 
+/*
+ * The nfs_client identifies our client state to the server.
+ */
+struct nfs_client {
+	atomic_t		cl_count;
+	int			cl_cons_state;	/* current construction state (-ve: init error) */
+#define NFS_CS_READY		0		/* ready to be used */
+#define NFS_CS_INITING		1		/* busy initialising */
+	int			cl_nfsversion;	/* NFS protocol version */
+	unsigned long		cl_res_state;	/* NFS resources state */
+#define NFS_CS_RPCIOD		0		/* - rpciod started */
+#define NFS_CS_CALLBACK		1		/* - callback started */
+#define NFS_CS_IDMAP		2		/* - idmap started */
+	struct sockaddr_in	cl_addr;	/* server identifier */
+	char *			cl_hostname;	/* hostname of server */
+	struct list_head	cl_share_link;	/* link in global client list */
+	struct list_head	cl_superblocks;	/* List of nfs_server structs */
+
+	struct rpc_clnt *	cl_rpcclient;
+
+#ifdef CONFIG_NFS_V4
+	u64			cl_clientid;	/* constant */
+	nfs4_verifier		cl_confirm;
+	unsigned long		cl_state;
+
+	u32			cl_lockowner_id;
+
+	/*
+	 * The following rwsem ensures exclusive access to the server
+	 * while we recover the state following a lease expiration.
+	 */
+	struct rw_semaphore	cl_sem;
+
+	struct list_head	cl_delegations;
+	struct list_head	cl_state_owners;
+	struct list_head	cl_unused;
+	int			cl_nunused;
+	spinlock_t		cl_lock;
+
+	unsigned long		cl_lease_time;
+	unsigned long		cl_last_renewal;
+	struct work_struct	cl_renewd;
+	struct work_struct	cl_recoverd;
+
+	struct rpc_wait_queue	cl_rpcwaitq;
+
+	/* used for the setclientid verifier */
+	struct timespec		cl_boot_time;
+
+	/* idmapper */
+	struct idmap *		cl_idmap;
+
+	/* Our own IP address, as a null-terminated string.
+	 * This is used to generate the clientid, and the callback address.
+	 */
+	char			cl_ipaddr[16];
+	unsigned char		cl_id_uniquifier;
+#endif
+};
+
 /*
  * NFS client parameters stored in the superblock.
  */
-- 
GitLab


From 0c7d90cfed91a283228017ba6faf37ee0bcd32b1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:10 -0400
Subject: [PATCH 0819/1063] NFS: Use the dentry superblock directly in
 nfs_statfs()

Use the nominated dentry's superblock directly in the NFS statfs() op to get a
file handle, rather than using s_root (which will become a dummy dentry in a
future patch).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f97d7d9c5c32e..a41d516ed595b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -240,11 +240,10 @@ void __exit unregister_nfs_fs(void)
  */
 static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct super_block *sb = dentry->d_sb;
-	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_server *server = NFS_SB(dentry->d_sb);
 	unsigned char blockbits;
 	unsigned long blockres;
-	struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
+	struct nfs_fh *fh = NFS_FH(dentry->d_inode);
 	struct nfs_fattr fattr;
 	struct nfs_fsstat res = {
 			.fattr = &fattr,
@@ -253,7 +252,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	lock_kernel();
 
-	error = server->rpc_ops->statfs(server, rootfh, &res);
+	error = server->rpc_ops->statfs(server, fh, &res);
 	buf->f_type = NFS_SUPER_MAGIC;
 	if (error < 0)
 		goto out_err;
@@ -263,7 +262,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	 * case where f_frsize != f_bsize.  Eventually we want to
 	 * report the value of wtmult in this field.
 	 */
-	buf->f_frsize = sb->s_blocksize;
+	buf->f_frsize = dentry->d_sb->s_blocksize;
 
 	/*
 	 * On most *nix systems, f_blocks, f_bfree, and f_bavail
@@ -272,8 +271,8 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	 * thus historically Linux's sys_statfs reports these
 	 * fields in units of f_bsize.
 	 */
-	buf->f_bsize = sb->s_blocksize;
-	blockbits = sb->s_blocksize_bits;
+	buf->f_bsize = dentry->d_sb->s_blocksize;
+	blockbits = dentry->d_sb->s_blocksize_bits;
 	blockres = (1 << blockbits) - 1;
 	buf->f_blocks = (res.tbytes + blockres) >> blockbits;
 	buf->f_bfree = (res.fbytes + blockres) >> blockbits;
-- 
GitLab


From 509de8111656a7d89b4a1a5f430f4460ce510f0f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:11 -0400
Subject: [PATCH 0820/1063] NFS: Add extra const qualifiers

Add some extra const qualifiers into NFS.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/namespace.c        |  3 ++-
 fs/nfs/nfs3proc.c         |  2 +-
 fs/nfs/nfs4namespace.c    |  8 ++++----
 fs/nfs/nfs4proc.c         |  2 +-
 fs/nfs/proc.c             |  2 +-
 fs/nfs/super.c            | 10 +++++-----
 include/linux/nfs_fs_sb.h |  2 +-
 include/linux/nfs_xdr.h   |  6 +++---
 8 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 86b3169c8cac0..85d9ed1dcf42a 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -172,7 +172,8 @@ void nfs_release_automount_timer(void)
 /*
  * Clone a mountpoint of the appropriate type
  */
-static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname,
+static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
+					   const char *devname,
 					   struct nfs_clone_mount *mountdata)
 {
 #ifdef CONFIG_NFS_V4
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7143b1f82cea4..3e5371241cea4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -886,7 +886,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 	return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
 }
 
-struct nfs_rpc_ops	nfs_v3_clientops = {
+const struct nfs_rpc_ops nfs_v3_clientops = {
 	.version	= 3,			/* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs3_dir_inode_operations,
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index ea38d27b74e6f..faed9bcba50fb 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -23,7 +23,7 @@
 /*
  * Check if fs_root is valid
  */
-static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
+static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
 					 char *buffer, ssize_t buflen)
 {
 	char *end = buffer + buflen;
@@ -34,7 +34,7 @@ static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
 
 	n = pathname->ncomponents;
 	while (--n >= 0) {
-		struct nfs4_string *component = &pathname->components[n];
+		const struct nfs4_string *component = &pathname->components[n];
 		buflen -= component->len + 1;
 		if (buflen < 0)
 			goto Elong;
@@ -60,7 +60,7 @@ static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
  */
 static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 					    const struct dentry *dentry,
-					    struct nfs4_fs_locations *locations)
+					    const struct nfs4_fs_locations *locations)
 {
 	struct vfsmount *mnt = ERR_PTR(-ENOENT);
 	struct nfs_clone_mount mountdata = {
@@ -108,7 +108,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 
 	loc = 0;
 	while (loc < locations->nlocations && IS_ERR(mnt)) {
-		struct nfs4_fs_location *location = &locations->locations[loc];
+		const struct nfs4_fs_location *location = &locations->locations[loc];
 		char *mnt_path;
 
 		if (location == NULL || location->nservers <= 0 ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 803c31b88bb52..061be713b206d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3761,7 +3761,7 @@ static struct inode_operations nfs4_file_inode_operations = {
 	.listxattr	= nfs4_listxattr,
 };
 
-struct nfs_rpc_ops	nfs_v4_clientops = {
+const struct nfs_rpc_ops nfs_v4_clientops = {
 	.version	= 4,			/* protocol version */
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b3899ea3229e3..77676903e0f52 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -671,7 +671,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 }
 
 
-struct nfs_rpc_ops	nfs_v2_clientops = {
+const struct nfs_rpc_ops nfs_v2_clientops = {
 	.version	= 2,		       /* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a41d516ed595b..c97f30967955b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -329,10 +329,10 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
  */
 static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
 {
-	static struct proc_nfs_info {
+	static const struct proc_nfs_info {
 		int flag;
-		char *str;
-		char *nostr;
+		const char *str;
+		const char *nostr;
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
 		{ NFS_MOUNT_INTR, ",intr", "" },
@@ -342,9 +342,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ 0, NULL, NULL }
 	};
-	struct proc_nfs_info *nfs_infop;
+	const struct proc_nfs_info *nfs_infop;
 	char buf[12];
-	char *proto;
+	const char *proto;
 
 	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index a727657e0ad31..95f32d5f6e9cd 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -73,7 +73,7 @@ struct nfs_server {
 	struct rpc_clnt *	client;		/* RPC client handle */
 	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
-	struct nfs_rpc_ops *	rpc_ops;	/* NFS protocol vector */
+	const struct nfs_rpc_ops *rpc_ops;	/* NFS protocol vector */
 	struct nfs_iostats *	io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
 	int			flags;		/* various flags */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index dd9ae6761f717..2426b11b6cce5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -833,9 +833,9 @@ struct nfs_rpc_ops {
 /*
  * Function vectors etc. for the NFS client
  */
-extern struct nfs_rpc_ops	nfs_v2_clientops;
-extern struct nfs_rpc_ops	nfs_v3_clientops;
-extern struct nfs_rpc_ops	nfs_v4_clientops;
+extern const struct nfs_rpc_ops	nfs_v2_clientops;
+extern const struct nfs_rpc_ops	nfs_v3_clientops;
+extern const struct nfs_rpc_ops	nfs_v4_clientops;
 extern struct rpc_version	nfs_version2;
 extern struct rpc_version	nfs_version3;
 extern struct rpc_version	nfs_version4;
-- 
GitLab


From 27951bd26031f6c27d38df9e94623bbe208a2464 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:11 -0400
Subject: [PATCH 0821/1063] NFS: Maintain a common server record for NFS2/3 as
 well as for NFS4

Maintain a common server record for NFS2/3 as well as for NFS4 so that common
stuff can be moved there from struct nfs_server.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c            | 21 ++++++++++++++++++++-
 include/linux/nfs_fs_sb.h |  2 +-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c97f30967955b..d1b4a5b36e338 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -658,11 +658,19 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned
 static struct rpc_clnt *
 nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 {
+	struct nfs_client	*clp;
 	struct rpc_timeout	timeparms;
 	struct rpc_xprt		*xprt = NULL;
 	struct rpc_clnt		*clnt = NULL;
 	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
 
+	clp = nfs_get_client(server->hostname, &server->addr,
+			     server->rpc_ops->version);
+	if (!clp) {
+		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
+		return ERR_PTR(PTR_ERR(clp));
+	}
+
 	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
 
 	server->retrans_timeo = timeparms.to_initval;
@@ -673,6 +681,8 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	if (IS_ERR(xprt)) {
 		dprintk("%s: cannot create RPC transport. Error = %ld\n",
 				__FUNCTION__, PTR_ERR(xprt));
+		nfs_mark_client_ready(clp, PTR_ERR(xprt));
+		nfs_put_client(clp);
 		return (struct rpc_clnt *)xprt;
 	}
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
@@ -686,9 +696,13 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	clnt->cl_intr     = 1;
 	clnt->cl_softrtry = 1;
 
+	nfs_mark_client_ready(clp, 0);
+	server->nfs_client = clp;
 	return clnt;
 
 out_fail:
+	nfs_mark_client_ready(clp, PTR_ERR(xprt));
+	nfs_put_client(clp);
 	return clnt;
 }
 
@@ -764,6 +778,7 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
 	if (server == NULL)
 		goto out_err;
 	memcpy(server, parent, sizeof(*server));
+	atomic_inc(&server->nfs_client->cl_count);
 	hostname = (data->hostname != NULL) ? data->hostname : parent->hostname;
 	len = strlen(hostname) + 1;
 	server->hostname = kmalloc(len, GFP_KERNEL);
@@ -796,6 +811,7 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
 out_rpciod_down:
 	rpciod_down();
 	kfree(server->hostname);
+	nfs_put_client(server->nfs_client);
 	kfree(server);
 	return simple_set_mnt(mnt, sb);
 kill_rpciod:
@@ -803,6 +819,7 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
 free_hostname:
 	kfree(server->hostname);
 free_server:
+	nfs_put_client(server->nfs_client);
 	kfree(server);
 out_err:
 	return error;
@@ -1071,6 +1088,7 @@ static void nfs_kill_super(struct super_block *s)
 
 	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
+	nfs_put_client(server->nfs_client);
 	kfree(server);
 	nfs_release_automount_timer();
 }
@@ -1421,7 +1439,6 @@ static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_c
 	nfs4_server_capabilities(server, &server->fh);
 
 	down_write(&clp->cl_sem);
-	atomic_inc(&clp->cl_count);
 	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
 	up_write(&clp->cl_sem);
 	return sb;
@@ -1476,6 +1493,8 @@ static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nf
 	retrans = 1;
 	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
 
+	nfs_put_client(server->nfs_client);
+	server->nfs_client = NULL;
 	server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor);
 	if (IS_ERR((err = server->client)))
 		goto out_err;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 95f32d5f6e9cd..e7d7662f51fd9 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -70,6 +70,7 @@ struct nfs_client {
  * NFS client parameters stored in the superblock.
  */
 struct nfs_server {
+	struct nfs_client *	nfs_client;	/* shared client and NFS4 state */
 	struct rpc_clnt *	client;		/* RPC client handle */
 	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
@@ -103,7 +104,6 @@ struct nfs_server {
 	 */
 	char			ip_addr[16];
 	char *			mnt_path;
-	struct nfs_client *	nfs_client;	/* all NFSv4 state starts here */
 	struct list_head	nfs4_siblings;	/* List of other nfs_server structs
 						 * that share the same clientid
 						 */
-- 
GitLab


From 1f163415dc05983830bcc47b33c155b2528b1574 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:11 -0400
Subject: [PATCH 0822/1063] NFS: Make better use of inode* dereferencing macros

Make better use of inode* dereferencing macros to hide dereferencing chains
(including NFS_PROTO and NFS_CLIENT).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c     |  2 +-
 fs/nfs/nfs4proc.c | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 48e892880d5b9..a146ed338534a 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -111,7 +111,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	lock_kernel();
-	res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
+	res = NFS_PROTO(inode)->file_open(inode, filp);
 	unlock_kernel();
 	return res;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 061be713b206d..b731b19452700 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1268,7 +1268,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 		BUG_ON(nd->intent.open.flags & O_CREAT);
 	}
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return (struct dentry *)cred;
 	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
@@ -1291,7 +1291,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
@@ -1565,7 +1565,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
 	nfs_fattr_init(fattr);
 	
-	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 
@@ -1927,7 +1927,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	struct rpc_cred *cred;
 	int status = 0;
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred)) {
 		status = PTR_ERR(cred);
 		goto out;
@@ -2816,7 +2816,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 		return -EOPNOTSUPP;
 	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
-	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (ret == 0)
 		nfs4_write_cached_acl(inode, buf, buflen);
 	return ret;
-- 
GitLab


From 8fa5c000d7f986ef9cdc6d95f9f7fcee20e0a7d6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:12 -0400
Subject: [PATCH 0823/1063] NFS: Move rpc_ops from nfs_server to nfs_client

Move the rpc_ops from the nfs_server struct to the nfs_client struct as they're
common to all server records of a particular NFS protocol version.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c              |  2 +-
 fs/nfs/inode.c            |  4 +--
 fs/nfs/namespace.c        |  6 ++--
 fs/nfs/nfs4proc.c         |  2 +-
 fs/nfs/super.c            | 59 ++++++++++++++++++++++-----------------
 include/linux/nfs_fs.h    |  2 +-
 include/linux/nfs_fs_sb.h |  2 +-
 7 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 067d144d141b0..19362712452f0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1147,7 +1147,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 	}
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
 		struct nfs_server *server = NFS_SB(dentry->d_sb);
-		error = server->rpc_ops->getattr(server, fhandle, fattr);
+		error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
 		if (error < 0)
 			goto out_err;
 	}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6ed018c9aad2e..771c3b833757d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -237,13 +237,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 */
-		inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
+		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
 			inode->i_fop = &nfs_file_operations;
 			inode->i_data.a_ops = &nfs_file_aops;
 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
 		} else if (S_ISDIR(inode->i_mode)) {
-			inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops;
+			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
 			inode->i_fop = &nfs_dir_operations;
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
 			    && fattr->size <= NFS_LIMIT_READDIRPLUS)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 85d9ed1dcf42a..d8b8d56266cbd 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -104,7 +104,9 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 		goto out_follow;
 	/* Look it up again */
 	parent = dget_parent(nd->dentry);
-	err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr);
+	err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
+						  &nd->dentry->d_name,
+						  &fh, &fattr);
 	dput(parent);
 	if (err != 0)
 		goto out_err;
@@ -178,7 +180,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
 {
 #ifdef CONFIG_NFS_V4
 	struct vfsmount *mnt = NULL;
-	switch (server->rpc_ops->version) {
+	switch (server->nfs_client->cl_nfsversion) {
 		case 2:
 		case 3:
 			mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b731b19452700..1573eeb07ce10 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -758,7 +758,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	}
 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
-		return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
+		return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
 	return 0;
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d1b4a5b36e338..e1e5eab0259b6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -252,7 +252,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	lock_kernel();
 
-	error = server->rpc_ops->statfs(server, fh, &res);
+	error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
 	buf->f_type = NFS_SUPER_MAGIC;
 	if (error < 0)
 		goto out_err;
@@ -343,10 +343,11 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		{ 0, NULL, NULL }
 	};
 	const struct proc_nfs_info *nfs_infop;
+	struct nfs_client *clp = nfss->nfs_client;
 	char buf[12];
 	const char *proto;
 
-	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
+	seq_printf(m, ",vers=%d", clp->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
 	seq_printf(m, ",wsize=%d", nfss->wsize);
 	if (nfss->acregmin != 3*HZ || showdefaults)
@@ -427,7 +428,7 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",namelen=%d", nfss->namelen);
 
 #ifdef CONFIG_NFS_V4
-	if (nfss->rpc_ops->version == 4) {
+	if (nfss->nfs_client->cl_nfsversion == 4) {
 		seq_printf(m, "\n\tnfsv4:\t");
 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
@@ -503,7 +504,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f
 	struct nfs_server	*server = NFS_SB(sb);
 	int			error;
 
-	error = server->rpc_ops->getroot(server, rootfh, fsinfo);
+	error = server->nfs_client->rpc_ops->getroot(server, rootfh, fsinfo);
 	if (error < 0) {
 		dprintk("nfs_get_root: getattr error = %d\n", -error);
 		return ERR_PTR(error);
@@ -553,14 +554,14 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 		no_root_error = -ENOMEM;
 		goto out_no_root;
 	}
-	sb->s_root->d_op = server->rpc_ops->dentry_ops;
+	sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops;
 
 	/* mount time stamp, in seconds */
 	server->mount_time = jiffies;
 
 	/* Get some general file system info */
 	if (server->namelen == 0 &&
-	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
+	    server->nfs_client->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
 		server->namelen = pathinfo.max_namelen;
 	/* Work out a lot of parameters */
 	if (server->rsize == 0)
@@ -663,9 +664,14 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	struct rpc_xprt		*xprt = NULL;
 	struct rpc_clnt		*clnt = NULL;
 	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+	int			nfsversion = 2;
 
-	clp = nfs_get_client(server->hostname, &server->addr,
-			     server->rpc_ops->version);
+#ifdef CONFIG_NFS_V3
+	if (server->flags & NFS_MOUNT_VER3)
+		nfsversion = 3;
+#endif
+
+	clp = nfs_get_client(server->hostname, &server->addr, nfsversion);
 	if (!clp) {
 		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
 		return ERR_PTR(PTR_ERR(clp));
@@ -676,6 +682,19 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	server->retrans_timeo = timeparms.to_initval;
 	server->retrans_count = timeparms.to_retries;
 
+	/* Check NFS protocol revision and initialize RPC op vector
+	 * and file handle pool. */
+#ifdef CONFIG_NFS_V3
+	if (nfsversion == 3) {
+		clp->rpc_ops = &nfs_v3_clientops;
+		server->caps |= NFS_CAP_READDIRPLUS;
+	} else {
+		clp->rpc_ops = &nfs_v2_clientops;
+	}
+#else
+	clp->rpc_ops = &nfs_v2_clientops;
+#endif
+
 	/* create transport and client */
 	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 	if (IS_ERR(xprt)) {
@@ -686,7 +705,7 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 		return (struct rpc_clnt *)xprt;
 	}
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				 server->rpc_ops->version, data->pseudoflavor);
+				 clp->cl_nfsversion, data->pseudoflavor);
 	if (IS_ERR(clnt)) {
 		dprintk("%s: cannot create RPC client. Error = %ld\n",
 				__FUNCTION__, PTR_ERR(xprt));
@@ -750,7 +769,7 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl
 	fsinfo.fattr = data->fattr;
 	if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
 		nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
-	sb->s_root->d_op = server->rpc_ops->dentry_ops;
+	sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops;
 	sb->s_flags |= MS_ACTIVE;
 	return server;
 out_put_root:
@@ -865,19 +884,6 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 		return -ENOMEM;
 	strcpy(server->hostname, data->hostname);
 
-	/* Check NFS protocol revision and initialize RPC op vector
-	 * and file handle pool. */
-#ifdef CONFIG_NFS_V3
-	if (server->flags & NFS_MOUNT_VER3) {
-		server->rpc_ops = &nfs_v3_clientops;
-		server->caps |= NFS_CAP_READDIRPLUS;
-	} else {
-		server->rpc_ops = &nfs_v2_clientops;
-	}
-#else
-	server->rpc_ops = &nfs_v2_clientops;
-#endif
-
 	/* Fill in pseudoflavor for mount version < 5 */
 	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
 		data->pseudoflavor = RPC_AUTH_UNIX;
@@ -888,6 +894,7 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 	server->client = nfs_create_client(server, data);
 	if (IS_ERR(server->client))
 		return PTR_ERR(server->client);
+
 	/* RFC 2623, sec 2.3.2 */
 	if (authflavor != RPC_AUTH_UNIX) {
 		struct rpc_auth *auth;
@@ -1129,6 +1136,8 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 
 	/* Now create transport and client */
 	if (clp->cl_cons_state == NFS_CS_INITING) {
+		clp->rpc_ops = &nfs_v4_clientops;
+
 		xprt = xprt_create_proto(proto, &server->addr, timeparms);
 		if (IS_ERR(xprt)) {
 			err = PTR_ERR(xprt);
@@ -1139,7 +1148,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 		/* Bind to a reserved port! */
 		xprt->resvport = 1;
 		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				server->rpc_ops->version, flavor);
+				clp->cl_nfsversion, flavor);
 		if (IS_ERR(clnt)) {
 			err = PTR_ERR(clnt);
 			dprintk("%s: cannot create RPC client. Error = %d\n",
@@ -1215,8 +1224,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	server->acdirmin = data->acdirmin*HZ;
 	server->acdirmax = data->acdirmax*HZ;
 
-	server->rpc_ops = &nfs_v4_clientops;
-
 	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
 
 	server->retrans_timeo = timeparms.to_initval;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 70e1dc9162e21..51e9bd90dedcc 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -215,7 +215,7 @@ static inline struct nfs_inode *NFS_I(struct inode *inode)
 #define NFS_FH(inode)			(&NFS_I(inode)->fh)
 #define NFS_SERVER(inode)		(NFS_SB(inode->i_sb))
 #define NFS_CLIENT(inode)		(NFS_SERVER(inode)->client)
-#define NFS_PROTO(inode)		(NFS_SERVER(inode)->rpc_ops)
+#define NFS_PROTO(inode)		(NFS_SERVER(inode)->nfs_client->rpc_ops)
 #define NFS_ADDR(inode)			(RPC_PEERADDR(NFS_CLIENT(inode)))
 #define NFS_COOKIEVERF(inode)		(NFS_I(inode)->cookieverf)
 #define NFS_READTIME(inode)		(NFS_I(inode)->read_cache_jiffies)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e7d7662f51fd9..aae7c117597a6 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -25,6 +25,7 @@ struct nfs_client {
 	struct list_head	cl_superblocks;	/* List of nfs_server structs */
 
 	struct rpc_clnt *	cl_rpcclient;
+	const struct nfs_rpc_ops *rpc_ops;	/* NFS protocol vector */
 
 #ifdef CONFIG_NFS_V4
 	u64			cl_clientid;	/* constant */
@@ -74,7 +75,6 @@ struct nfs_server {
 	struct rpc_clnt *	client;		/* RPC client handle */
 	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
-	const struct nfs_rpc_ops *rpc_ops;	/* NFS protocol vector */
 	struct nfs_iostats *	io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
 	int			flags;		/* various flags */
-- 
GitLab


From 5006a76cca8f86c6975c16fcf67e83b8b0eee2b6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:12 -0400
Subject: [PATCH 0824/1063] NFS: Eliminate client_sys in favour of cl_rpcclient

Eliminate nfs_server::client_sys in favour of nfs_client::cl_rpcclient as we
only really need one per server that we're talking to since it doesn't have any
security on it.

The retransmission management variables are also moved to the common struct as
they're required to set up the cl_rpcclient connection.

The NFS2/3 client and client_acl connections are thenceforth derived by cloning
the cl_rpcclient connection and post-applying the authorisation flavour.

The code for setting up the initial common connection has been moved to
client.c as nfs_create_rpc_client().  All the NFS program definition tables are
also moved there as that's where they're now required rather than super.c.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 119 ++++++++++++++++++++
 fs/nfs/internal.h         |   2 +
 fs/nfs/nfs3proc.c         |   6 +-
 fs/nfs/proc.c             |   4 +-
 fs/nfs/super.c            | 222 +++++++++-----------------------------
 include/linux/nfs_fs_sb.h |   5 +-
 6 files changed, 179 insertions(+), 179 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index cb5e92463bdb0..c08cab935ad51 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -50,6 +50,48 @@ static DEFINE_SPINLOCK(nfs_client_lock);
 static LIST_HEAD(nfs_client_list);
 static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
 
+/*
+ * RPC cruft for NFS
+ */
+static struct rpc_version *nfs_version[5] = {
+	[2]			= &nfs_version2,
+#ifdef CONFIG_NFS_V3
+	[3]			= &nfs_version3,
+#endif
+#ifdef CONFIG_NFS_V4
+	[4]			= &nfs_version4,
+#endif
+};
+
+struct rpc_program nfs_program = {
+	.name			= "nfs",
+	.number			= NFS_PROGRAM,
+	.nrvers			= ARRAY_SIZE(nfs_version),
+	.version		= nfs_version,
+	.stats			= &nfs_rpcstat,
+	.pipe_dir_name		= "/nfs",
+};
+
+struct rpc_stat nfs_rpcstat = {
+	.program		= &nfs_program
+};
+
+
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
+static struct rpc_version *	nfsacl_version[] = {
+	[3]			= &nfsacl_version3,
+};
+
+struct rpc_program		nfsacl_program = {
+	.name			= "nfsacl",
+	.number			= NFS_ACL_PROGRAM,
+	.nrvers			= ARRAY_SIZE(nfsacl_version),
+	.version		= nfsacl_version,
+	.stats			= &nfsacl_rpcstat,
+};
+#endif  /* CONFIG_NFS_V3_ACL */
+
 /*
  * Allocate a shared client record
  *
@@ -310,3 +352,80 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state)
 	clp->cl_cons_state = state;
 	wake_up_all(&nfs_client_active_wq);
 }
+
+/*
+ * Initialise the timeout values for a connection
+ */
+static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
+				    unsigned int timeo, unsigned int retrans)
+{
+	to->to_initval = timeo * HZ / 10;
+	to->to_retries = retrans;
+	if (!to->to_retries)
+		to->to_retries = 2;
+
+	switch (proto) {
+	case IPPROTO_TCP:
+		if (!to->to_initval)
+			to->to_initval = 60 * HZ;
+		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
+			to->to_initval = NFS_MAX_TCP_TIMEOUT;
+		to->to_increment = to->to_initval;
+		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
+		to->to_exponential = 0;
+		break;
+	case IPPROTO_UDP:
+	default:
+		if (!to->to_initval)
+			to->to_initval = 11 * HZ / 10;
+		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
+			to->to_initval = NFS_MAX_UDP_TIMEOUT;
+		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
+		to->to_exponential = 1;
+		break;
+	}
+}
+
+/*
+ * Create an RPC client handle
+ */
+int nfs_create_rpc_client(struct nfs_client *clp, int proto,
+			  unsigned int timeo,
+			  unsigned int retrans,
+			  rpc_authflavor_t flavor)
+{
+	struct rpc_timeout	timeparms;
+	struct rpc_xprt		*xprt = NULL;
+	struct rpc_clnt		*clnt = NULL;
+
+	if (!IS_ERR(clp->cl_rpcclient))
+		return 0;
+
+	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
+	clp->retrans_timeo = timeparms.to_initval;
+	clp->retrans_count = timeparms.to_retries;
+
+	/* create transport and client */
+	xprt = xprt_create_proto(proto, &clp->cl_addr, &timeparms);
+	if (IS_ERR(xprt)) {
+		dprintk("%s: cannot create RPC transport. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(xprt));
+		return PTR_ERR(xprt);
+	}
+
+	/* Bind to a reserved port! */
+	xprt->resvport = 1;
+	/* Create the client RPC handle */
+	clnt = rpc_create_client(xprt, clp->cl_hostname, &nfs_program,
+				 clp->rpc_ops->version, RPC_AUTH_UNIX);
+	if (IS_ERR(clnt)) {
+		dprintk("%s: cannot create RPC client. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(clnt));
+		return PTR_ERR(clnt);
+	}
+
+	clnt->cl_intr     = 1;
+	clnt->cl_softrtry = 1;
+	clp->cl_rpcclient = clnt;
+	return 0;
+}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ac370d5d44947..2f3aa52fbefc7 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -20,6 +20,8 @@ extern void nfs_put_client(struct nfs_client *);
 extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
 extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int);
 extern void nfs_mark_client_ready(struct nfs_client *, int);
+extern int nfs_create_rpc_client(struct nfs_client *, int, unsigned int,
+				 unsigned int, rpc_authflavor_t);
 
 /* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 3e5371241cea4..0622af0122bef 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -90,8 +90,8 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	status = do_proc_get_root(server->client, fhandle, info);
-	if (status && server->client_sys != server->client)
-		status = do_proc_get_root(server->client_sys, fhandle, info);
+	if (status && server->nfs_client->cl_rpcclient != server->client)
+		status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info);
 	return status;
 }
 
@@ -785,7 +785,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	return status;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 77676903e0f52..5a8b9407ee9a8 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -66,14 +66,14 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 
 	dprintk("%s: call getattr\n", __FUNCTION__);
 	nfs_fattr_init(fattr);
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	dprintk("%s: call statfs\n", __FUNCTION__);
 	msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
 	msg.rpc_resp = &fsinfo;
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e1e5eab0259b6..85583414a3ca2 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -60,52 +60,6 @@
  */
 #define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
 
-/*
- * RPC cruft for NFS
- */
-static struct rpc_version * nfs_version[] = {
-	NULL,
-	NULL,
-	&nfs_version2,
-#if defined(CONFIG_NFS_V3)
-	&nfs_version3,
-#elif defined(CONFIG_NFS_V4)
-	NULL,
-#endif
-#if defined(CONFIG_NFS_V4)
-	&nfs_version4,
-#endif
-};
-
-static struct rpc_program nfs_program = {
-	.name			= "nfs",
-	.number			= NFS_PROGRAM,
-	.nrvers			= ARRAY_SIZE(nfs_version),
-	.version		= nfs_version,
-	.stats			= &nfs_rpcstat,
-	.pipe_dir_name		= "/nfs",
-};
-
-struct rpc_stat nfs_rpcstat = {
-	.program		= &nfs_program
-};
-
-
-#ifdef CONFIG_NFS_V3_ACL
-static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
-static struct rpc_version *	nfsacl_version[] = {
-	[3]			= &nfsacl_version3,
-};
-
-struct rpc_program		nfsacl_program = {
-	.name =			"nfsacl",
-	.number =		NFS_ACL_PROGRAM,
-	.nrvers =		ARRAY_SIZE(nfsacl_version),
-	.version =		nfsacl_version,
-	.stats =		&nfsacl_rpcstat,
-};
-#endif  /* CONFIG_NFS_V3_ACL */
-
 static void nfs_umount_begin(struct vfsmount *, int);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
@@ -376,8 +330,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 			proto = buf;
 	}
 	seq_printf(m, ",proto=%s", proto);
-	seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
-	seq_printf(m, ",retrans=%u", nfss->retrans_count);
+	seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
+	seq_printf(m, ",retrans=%u", clp->retrans_count);
 	seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
 }
 
@@ -621,38 +575,6 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 	return no_root_error;
 }
 
-/*
- * Initialise the timeout values for a connection
- */
-static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans)
-{
-	to->to_initval = timeo * HZ / 10;
-	to->to_retries = retrans;
-	if (!to->to_retries)
-		to->to_retries = 2;
-
-	switch (proto) {
-	case IPPROTO_TCP:
-		if (!to->to_initval)
-			to->to_initval = 60 * HZ;
-		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
-			to->to_initval = NFS_MAX_TCP_TIMEOUT;
-		to->to_increment = to->to_initval;
-		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
-		to->to_exponential = 0;
-		break;
-	case IPPROTO_UDP:
-	default:
-		if (!to->to_initval)
-			to->to_initval = 11 * HZ / 10;
-		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
-			to->to_initval = NFS_MAX_UDP_TIMEOUT;
-		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
-		to->to_exponential = 1;
-		break;
-	}
-}
-
 /*
  * Create an RPC client handle.
  */
@@ -660,11 +582,10 @@ static struct rpc_clnt *
 nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 {
 	struct nfs_client	*clp;
-	struct rpc_timeout	timeparms;
-	struct rpc_xprt		*xprt = NULL;
-	struct rpc_clnt		*clnt = NULL;
+	struct rpc_clnt		*clnt;
 	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
 	int			nfsversion = 2;
+	int			err;
 
 #ifdef CONFIG_NFS_V3
 	if (server->flags & NFS_MOUNT_VER3)
@@ -677,52 +598,54 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 		return ERR_PTR(PTR_ERR(clp));
 	}
 
-	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
-
-	server->retrans_timeo = timeparms.to_initval;
-	server->retrans_count = timeparms.to_retries;
-
-	/* Check NFS protocol revision and initialize RPC op vector
-	 * and file handle pool. */
+	if (clp->cl_cons_state == NFS_CS_INITING) {
+		/* Check NFS protocol revision and initialize RPC op
+		 * vector and file handle pool. */
 #ifdef CONFIG_NFS_V3
-	if (nfsversion == 3) {
-		clp->rpc_ops = &nfs_v3_clientops;
-		server->caps |= NFS_CAP_READDIRPLUS;
-	} else {
-		clp->rpc_ops = &nfs_v2_clientops;
-	}
+		if (nfsversion == 3) {
+			clp->rpc_ops = &nfs_v3_clientops;
+			server->caps |= NFS_CAP_READDIRPLUS;
+		} else {
+			clp->rpc_ops = &nfs_v2_clientops;
+		}
 #else
-	clp->rpc_ops = &nfs_v2_clientops;
+		clp->rpc_ops = &nfs_v2_clientops;
 #endif
 
-	/* create transport and client */
-	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
-	if (IS_ERR(xprt)) {
-		dprintk("%s: cannot create RPC transport. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		nfs_mark_client_ready(clp, PTR_ERR(xprt));
-		nfs_put_client(clp);
-		return (struct rpc_clnt *)xprt;
+		/* create transport and client */
+		err = nfs_create_rpc_client(clp, proto, data->timeo,
+					    data->retrans, RPC_AUTH_UNIX);
+		if (err < 0)
+			goto client_init_error;
+
+		nfs_mark_client_ready(clp, 0);
 	}
-	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				 clp->cl_nfsversion, data->pseudoflavor);
+
+	/* create an nfs_server-specific client */
+	clnt = rpc_clone_client(clp->cl_rpcclient);
 	if (IS_ERR(clnt)) {
-		dprintk("%s: cannot create RPC client. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		goto out_fail;
+		dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
+		nfs_put_client(clp);
+		return ERR_PTR(PTR_ERR(clnt));
 	}
 
-	clnt->cl_intr     = 1;
-	clnt->cl_softrtry = 1;
+	if (data->pseudoflavor != clp->cl_rpcclient->cl_auth->au_flavor) {
+		struct rpc_auth *auth;
+
+		auth = rpcauth_create(data->pseudoflavor, server->client);
+		if (IS_ERR(auth)) {
+			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
+			return ERR_PTR(PTR_ERR(auth));
+		}
+	}
 
-	nfs_mark_client_ready(clp, 0);
 	server->nfs_client = clp;
 	return clnt;
 
-out_fail:
-	nfs_mark_client_ready(clp, PTR_ERR(xprt));
+client_init_error:
+	nfs_mark_client_ready(clp, err);
 	nfs_put_client(clp);
-	return clnt;
+	return ERR_PTR(err);
 }
 
 /*
@@ -741,7 +664,7 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl
 	sb->s_blocksize_bits = data->sb->s_blocksize_bits;
 	sb->s_maxbytes = data->sb->s_maxbytes;
 
-	server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+	server->client_acl = ERR_PTR(-EINVAL);
 	server->io_stats = nfs_alloc_iostats();
 	if (server->io_stats == NULL)
 		goto out;
@@ -750,11 +673,6 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl
 	if (IS_ERR((err = server->client)))
 		goto out;
 
-	if (!IS_ERR(parent->client_sys)) {
-		server->client_sys = rpc_clone_client(parent->client_sys);
-		if (IS_ERR((err = server->client_sys)))
-			goto out;
-	}
 	if (!IS_ERR(parent->client_acl)) {
 		server->client_acl = rpc_clone_client(parent->client_acl);
 		if (IS_ERR((err = server->client_acl)))
@@ -813,7 +731,7 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
 		error = PTR_ERR(sb);
 		goto kill_rpciod;
 	}
-		
+
 	if (sb->s_root)
 		goto out_rpciod_down;
 
@@ -896,19 +814,6 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 		return PTR_ERR(server->client);
 
 	/* RFC 2623, sec 2.3.2 */
-	if (authflavor != RPC_AUTH_UNIX) {
-		struct rpc_auth *auth;
-
-		server->client_sys = rpc_clone_client(server->client);
-		if (IS_ERR(server->client_sys))
-			return PTR_ERR(server->client_sys);
-		auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
-		if (IS_ERR(auth))
-			return PTR_ERR(auth);
-	} else {
-		atomic_inc(&server->client->cl_count);
-		server->client_sys = server->client;
-	}
 	if (server->flags & NFS_MOUNT_VER3) {
 #ifdef CONFIG_NFS_V3_ACL
 		if (!(server->flags & NFS_MOUNT_NOACL)) {
@@ -1012,7 +917,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 		goto out_err_noserver;
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
-	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+	server->client = server->client_acl = ERR_PTR(-EINVAL);
 
 	root = &server->fh;
 	if (data->flags & NFS_MOUNT_VER3)
@@ -1083,8 +988,6 @@ static void nfs_kill_super(struct super_block *s)
 
 	if (!IS_ERR(server->client))
 		rpc_shutdown_client(server->client);
-	if (!IS_ERR(server->client_sys))
-		rpc_shutdown_client(server->client_sys);
 	if (!IS_ERR(server->client_acl))
 		rpc_shutdown_client(server->client_acl);
 
@@ -1121,10 +1024,9 @@ static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
 
 #ifdef CONFIG_NFS_V4
 static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
-	struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
+	int timeo, int retrans, int proto, rpc_authflavor_t flavor)
 {
 	struct nfs_client *clp;
-	struct rpc_xprt *xprt = NULL;
 	struct rpc_clnt *clnt = NULL;
 	int err = -EIO;
 
@@ -1138,26 +1040,10 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 	if (clp->cl_cons_state == NFS_CS_INITING) {
 		clp->rpc_ops = &nfs_v4_clientops;
 
-		xprt = xprt_create_proto(proto, &server->addr, timeparms);
-		if (IS_ERR(xprt)) {
-			err = PTR_ERR(xprt);
-			dprintk("%s: cannot create RPC transport. Error = %d\n",
-					__FUNCTION__, err);
+		err = nfs_create_rpc_client(clp, proto, timeo, retrans, flavor);
+		if (err < 0)
 			goto client_init_error;
-		}
-		/* Bind to a reserved port! */
-		xprt->resvport = 1;
-		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				clp->cl_nfsversion, flavor);
-		if (IS_ERR(clnt)) {
-			err = PTR_ERR(clnt);
-			dprintk("%s: cannot create RPC client. Error = %d\n",
-					__FUNCTION__, err);
-			goto client_init_error;
-		}
-		clnt->cl_intr     = 1;
-		clnt->cl_softrtry = 1;
-		clp->cl_rpcclient = clnt;
+
 		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
 		err = nfs_idmap_new(clp);
 		if (err < 0) {
@@ -1205,7 +1091,6 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
 {
 	struct nfs_server *server;
-	struct rpc_timeout timeparms;
 	rpc_authflavor_t authflavour;
 	int err = -EIO;
 
@@ -1224,11 +1109,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	server->acdirmin = data->acdirmin*HZ;
 	server->acdirmax = data->acdirmax*HZ;
 
-	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
-
-	server->retrans_timeo = timeparms.to_initval;
-	server->retrans_count = timeparms.to_retries;
-
 	/* Now create transport and client */
 	authflavour = RPC_AUTH_UNIX;
 	if (data->auth_flavourlen != 0) {
@@ -1244,7 +1124,8 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 		}
 	}
 
-	server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour);
+	server->client = nfs4_create_client(server, data->timeo, data->retrans,
+					    data->proto, authflavour);
 	if (IS_ERR(server->client)) {
 		err = PTR_ERR(server->client);
 			dprintk("%s: cannot create RPC client. Error = %d\n",
@@ -1318,7 +1199,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		return -ENOMEM;
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
-	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+	server->client = server->client_acl = ERR_PTR(-EINVAL);
 
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
@@ -1489,7 +1370,6 @@ static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nf
 static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data)
 {
 	struct nfs_server *server = NFS_SB(sb);
-	struct rpc_timeout timeparms;
 	int proto, timeo, retrans;
 	void *err;
 
@@ -1498,11 +1378,11 @@ static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nf
 	   set the timeouts and retries to low values */
 	timeo = 2;
 	retrans = 1;
-	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
 
 	nfs_put_client(server->nfs_client);
 	server->nfs_client = NULL;
-	server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor);
+	server->client = nfs4_create_client(server, timeo, retrans, proto,
+					    data->authflavor);
 	if (IS_ERR((err = server->client)))
 		goto out_err;
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index aae7c117597a6..d404ceca9168f 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -26,6 +26,8 @@ struct nfs_client {
 
 	struct rpc_clnt *	cl_rpcclient;
 	const struct nfs_rpc_ops *rpc_ops;	/* NFS protocol vector */
+	unsigned long		retrans_timeo;	/* retransmit timeout */
+	unsigned int		retrans_count;	/* number of retransmit tries */
 
 #ifdef CONFIG_NFS_V4
 	u64			cl_clientid;	/* constant */
@@ -73,7 +75,6 @@ struct nfs_client {
 struct nfs_server {
 	struct nfs_client *	nfs_client;	/* shared client and NFS4 state */
 	struct rpc_clnt *	client;		/* RPC client handle */
-	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nfs_iostats *	io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
@@ -90,8 +91,6 @@ struct nfs_server {
 	unsigned int		acregmax;
 	unsigned int		acdirmin;
 	unsigned int		acdirmax;
-	unsigned long		retrans_timeo;	/* retransmit timeout */
-	unsigned int		retrans_count;	/* number of retransmit tries */
 	unsigned int		namelen;
 	char *			hostname;	/* remote hostname */
 	struct nfs_fh		fh;
-- 
GitLab


From cf6d7b5de8535a9f0088c5cc28ee2dae87371b4a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:12 -0400
Subject: [PATCH 0825/1063] NFS: Start rpciod in server common management

Start rpciod in the server common (nfs_client struct) management code rather
than in the superblock management code.  This means we only need to "start" it
once per server instead of once per superblock.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 31 ++++++-------------------------
 1 file changed, 6 insertions(+), 25 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 85583414a3ca2..5842d510d732c 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -722,18 +722,15 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
 	if (server->hostname == NULL)
 		goto free_server;
 	memcpy(server->hostname, hostname, len);
-	error = rpciod_up();
-	if (error != 0)
-		goto free_hostname;
 
 	sb = fill_sb(server, data);
 	if (IS_ERR(sb)) {
 		error = PTR_ERR(sb);
-		goto kill_rpciod;
+		goto free_hostname;
 	}
 
 	if (sb->s_root)
-		goto out_rpciod_down;
+		goto out_share;
 
 	server = fill_server(sb, data);
 	if (IS_ERR(server)) {
@@ -745,14 +742,11 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
 	up_write(&sb->s_umount);
 	deactivate_super(sb);
 	return error;
-out_rpciod_down:
-	rpciod_down();
+out_share:
 	kfree(server->hostname);
 	nfs_put_client(server->nfs_client);
 	kfree(server);
 	return simple_set_mnt(mnt, sb);
-kill_rpciod:
-	rpciod_down();
 free_hostname:
 	kfree(server->hostname);
 free_server:
@@ -939,22 +933,14 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 		goto out_err;
 	}
 
-	/* Fire up rpciod if not yet running */
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto out_err;
-	}
-
 	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
-		goto out_err_rpciod;
+		goto out_err;
 	}
 
 	if (s->s_root)
-		goto out_rpciod_down;
+		goto out_share;
 
 	s->s_flags = flags;
 
@@ -967,13 +953,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	s->s_flags |= MS_ACTIVE;
 	return simple_set_mnt(mnt, s);
 
-out_rpciod_down:
-	rpciod_down();
+out_share:
 	kfree(server);
 	return simple_set_mnt(mnt, s);
 
-out_err_rpciod:
-	rpciod_down();
 out_err:
 	kfree(server);
 out_err_noserver:
@@ -994,8 +977,6 @@ static void nfs_kill_super(struct super_block *s)
 	if (!(server->flags & NFS_MOUNT_NONLM))
 		lockd_down();	/* release rpc.lockd */
 
-	rpciod_down();		/* release rpciod */
-
 	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
 	nfs_put_client(server->nfs_client);
-- 
GitLab


From 54ceac4515986030c2502960be620198dd8fe25b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:13 -0400
Subject: [PATCH 0826/1063] NFS: Share NFS superblocks per-protocol per-server
 per-FSID

The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.

It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.

We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.

Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:

 (1) The root and intervening nodes may not be accessible to the client.

     With NFS2 and NFS3, for instance, mountd is called on the server to get
     the filehandle for the tip of a path. mountd won't give us handles for
     anything we don't have permission to access, and so we can't set up NFS
     inodes for such nodes, and so can't easily set up dentries (we'd have to
     have ghost inodes or something).

     With this patch we don't actually create dentries until we get handles
     from the server that we can use to set up their inodes, and we don't
     actually bind them into the tree until we know for sure where they go.

 (2) Inaccessible symbolic links.

     If we're asked to mount two exports from the server, eg:

	mount warthog:/warthog/aaa/xxx /mmm
	mount warthog:/warthog/bbb/yyy /nnn

     We may not be able to access anything nearer the root than xxx and yyy,
     but we may find out later that /mmm/www/yyy, say, is actually the same
     directory as the one mounted on /nnn. What we might then find out, for
     example, is that /warthog/bbb was actually a symbolic link to
     /warthog/aaa/xxx/www, but we can't actually determine that by talking to
     the server until /warthog is made available by NFS.

     This would lead to having constructed an errneous dentry tree which we
     can't easily fix. We can end up with a dentry marked as a directory when
     it should actually be a symlink, or we could end up with an apparently
     hardlinked directory.

     With this patch we need not make assumptions about the type of a dentry
     for which we can't retrieve information, nor need we assume we know its
     place in the grand scheme of things until we actually see that place.

This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).

This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.

Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.

This patch makes the following changes:

 (1) The server record construction/destruction has been abstracted out into
     its own set of functions to make things easier to get right.  These have
     been moved into fs/nfs/client.c.

     All the code in fs/nfs/client.c has to do with the management of
     connections to servers, and doesn't touch superblocks in any way; the
     remaining code in fs/nfs/super.c has to do with VFS superblock management.

 (2) The sequence of events undertaken by NFS mount is now reordered:

     (a) A volume representation (struct nfs_server) is allocated.

     (b) A server representation (struct nfs_client) is acquired.  This may be
     	 allocated or shared, and is keyed on server address, port and NFS
     	 version.

     (c) If allocated, the client representation is initialised.  The state
     	 member variable of nfs_client is used to prevent a race during
     	 initialisation from two mounts.

     (d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
     	 the root filehandle for the mount (fs/nfs/getroot.c).  For NFS2/3 we
     	 are given the root FH in advance.

     (e) The volume FSID is probed for on the root FH.

     (f) The volume representation is initialised from the FSINFO record
     	 retrieved on the root FH.

     (g) sget() is called to acquire a superblock.  This may be allocated or
     	 shared, keyed on client pointer and FSID.

     (h) If allocated, the superblock is initialised.

     (i) If the superblock is shared, then the new nfs_server record is
     	 discarded.

     (j) The root dentry for this mount is looked up from the root FH.

     (k) The root dentry for this mount is assigned to the vfsmount.

 (3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
     returns; this function now attaches disconnected trees from alternate
     roots that happen to be discovered attached to a directory being read (in
     the same way nfs_lookup() is made to do for lookup ops).

     The new d_materialise_unique() function is now used to do this, thus
     permitting the whole thing to be done under one set of locks, and thus
     avoiding any race between mount and lookup operations on the same
     directory.

 (4) The client management code uses a new debug facility: NFSDBG_CLIENT which
     is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.

 (5) Clone mounts are now called xdev mounts.

 (6) Use the dentry passed to the statfs() op as the handle for retrieving fs
     statistics rather than the root dentry of the superblock (which is now a
     dummy).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/Makefile           |    2 +-
 fs/nfs/client.c           |  735 +++++++++++++++++++++-
 fs/nfs/dir.c              |   16 +-
 fs/nfs/getroot.c          |  306 ++++++++++
 fs/nfs/idmap.c            |    3 +-
 fs/nfs/inode.c            |    2 +-
 fs/nfs/internal.h         |   82 +--
 fs/nfs/namespace.c        |   25 +-
 fs/nfs/nfs3proc.c         |    2 +-
 fs/nfs/nfs4_fs.h          |    6 -
 fs/nfs/nfs4namespace.c    |  110 +++-
 fs/nfs/nfs4proc.c         |   59 +-
 fs/nfs/nfs4renewd.c       |   13 -
 fs/nfs/nfs4state.c        |   18 -
 fs/nfs/read.c             |    2 +-
 fs/nfs/super.c            | 1207 ++++++++++++++-----------------------
 fs/nfs/write.c            |    2 +-
 include/linux/nfs_fs_sb.h |   21 +-
 18 files changed, 1655 insertions(+), 956 deletions(-)
 create mode 100644 fs/nfs/getroot.c

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 3b993a6f81637..f4580b44eef4b 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_NFS_FS) += nfs.o
 
-nfs-y 			:= client.o dir.o file.o inode.o super.o nfs2xdr.o \
+nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
 			   pagelist.o proc.o read.o symlink.o unlink.o \
 			   write.o namespace.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index c08cab935ad51..dafba608c0a05 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -48,6 +48,7 @@
 
 static DEFINE_SPINLOCK(nfs_client_lock);
 static LIST_HEAD(nfs_client_list);
+static LIST_HEAD(nfs_volume_list);
 static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
 
 /*
@@ -268,9 +269,9 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio
  * Look up a client by IP address and protocol version
  * - creates a new record if one doesn't yet exist
  */
-struct nfs_client *nfs_get_client(const char *hostname,
-				  const struct sockaddr_in *addr,
-				  int nfsversion)
+static struct nfs_client *nfs_get_client(const char *hostname,
+					 const struct sockaddr_in *addr,
+					 int nfsversion)
 {
 	struct nfs_client *clp, *new = NULL;
 	int error;
@@ -340,6 +341,8 @@ struct nfs_client *nfs_get_client(const char *hostname,
 		return ERR_PTR(error);
 	}
 
+	BUG_ON(clp->cl_cons_state != NFS_CS_READY);
+
 	dprintk("--> nfs_get_client() = %p [share]\n", clp);
 	return clp;
 }
@@ -347,7 +350,7 @@ struct nfs_client *nfs_get_client(const char *hostname,
 /*
  * Mark a server as ready or failed
  */
-void nfs_mark_client_ready(struct nfs_client *clp, int state)
+static void nfs_mark_client_ready(struct nfs_client *clp, int state)
 {
 	clp->cl_cons_state = state;
 	wake_up_all(&nfs_client_active_wq);
@@ -389,10 +392,10 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
 /*
  * Create an RPC client handle
  */
-int nfs_create_rpc_client(struct nfs_client *clp, int proto,
-			  unsigned int timeo,
-			  unsigned int retrans,
-			  rpc_authflavor_t flavor)
+static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
+						unsigned int timeo,
+						unsigned int retrans,
+						rpc_authflavor_t flavor)
 {
 	struct rpc_timeout	timeparms;
 	struct rpc_xprt		*xprt = NULL;
@@ -429,3 +432,719 @@ int nfs_create_rpc_client(struct nfs_client *clp, int proto,
 	clp->cl_rpcclient = clnt;
 	return 0;
 }
+
+/*
+ * Version 2 or 3 client destruction
+ */
+static void nfs_destroy_server(struct nfs_server *server)
+{
+	if (!IS_ERR(server->client_acl))
+		rpc_shutdown_client(server->client_acl);
+
+	if (!(server->flags & NFS_MOUNT_NONLM))
+		lockd_down();	/* release rpc.lockd */
+}
+
+/*
+ * Version 2 or 3 lockd setup
+ */
+static int nfs_start_lockd(struct nfs_server *server)
+{
+	int error = 0;
+
+	if (server->nfs_client->cl_nfsversion > 3)
+		goto out;
+	if (server->flags & NFS_MOUNT_NONLM)
+		goto out;
+	error = lockd_up();
+	if (error < 0)
+		server->flags |= NFS_MOUNT_NONLM;
+	else
+		server->destroy = nfs_destroy_server;
+out:
+	return error;
+}
+
+/*
+ * Initialise an NFSv3 ACL client connection
+ */
+#ifdef CONFIG_NFS_V3_ACL
+static void nfs_init_server_aclclient(struct nfs_server *server)
+{
+	if (server->nfs_client->cl_nfsversion != 3)
+		goto out_noacl;
+	if (server->flags & NFS_MOUNT_NOACL)
+		goto out_noacl;
+
+	server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
+	if (IS_ERR(server->client_acl))
+		goto out_noacl;
+
+	/* No errors! Assume that Sun nfsacls are supported */
+	server->caps |= NFS_CAP_ACLS;
+	return;
+
+out_noacl:
+	server->caps &= ~NFS_CAP_ACLS;
+}
+#else
+static inline void nfs_init_server_aclclient(struct nfs_server *server)
+{
+	server->flags &= ~NFS_MOUNT_NOACL;
+	server->caps &= ~NFS_CAP_ACLS;
+}
+#endif
+
+/*
+ * Create a general RPC client
+ */
+static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
+{
+	struct nfs_client *clp = server->nfs_client;
+
+	server->client = rpc_clone_client(clp->cl_rpcclient);
+	if (IS_ERR(server->client)) {
+		dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
+		return PTR_ERR(server->client);
+	}
+
+	if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
+		struct rpc_auth *auth;
+
+		auth = rpcauth_create(pseudoflavour, server->client);
+		if (IS_ERR(auth)) {
+			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
+			return PTR_ERR(auth);
+		}
+	}
+	server->client->cl_softrtry = 0;
+	if (server->flags & NFS_MOUNT_SOFT)
+		server->client->cl_softrtry = 1;
+
+	server->client->cl_intr = 0;
+	if (server->flags & NFS4_MOUNT_INTR)
+		server->client->cl_intr = 1;
+
+	return 0;
+}
+
+/*
+ * Initialise an NFS2 or NFS3 client
+ */
+static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data)
+{
+	int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+	int error;
+
+	if (clp->cl_cons_state == NFS_CS_READY) {
+		/* the client is already initialised */
+		dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
+		return 0;
+	}
+
+	/* Check NFS protocol revision and initialize RPC op vector */
+	clp->rpc_ops = &nfs_v2_clientops;
+#ifdef CONFIG_NFS_V3
+	if (clp->cl_nfsversion == 3)
+		clp->rpc_ops = &nfs_v3_clientops;
+#endif
+	/*
+	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
+	 * - RFC 2623, sec 2.3.2
+	 */
+	error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
+			RPC_AUTH_UNIX);
+	if (error < 0)
+		goto error;
+	nfs_mark_client_ready(clp, NFS_CS_READY);
+	return 0;
+
+error:
+	nfs_mark_client_ready(clp, error);
+	dprintk("<-- nfs_init_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 2 or 3 client
+ */
+static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data)
+{
+	struct nfs_client *clp;
+	int error, nfsvers = 2;
+
+	dprintk("--> nfs_init_server()\n");
+
+#ifdef CONFIG_NFS_V3
+	if (data->flags & NFS_MOUNT_VER3)
+		nfsvers = 3;
+#endif
+
+	/* Allocate or find a client reference we can use */
+	clp = nfs_get_client(data->hostname, &data->addr, nfsvers);
+	if (IS_ERR(clp)) {
+		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
+		return PTR_ERR(clp);
+	}
+
+	error = nfs_init_client(clp, data);
+	if (error < 0)
+		goto error;
+
+	server->nfs_client = clp;
+
+	/* Initialise the client representation from the mount data */
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+
+	server->acregmin = data->acregmin * HZ;
+	server->acregmax = data->acregmax * HZ;
+	server->acdirmin = data->acdirmin * HZ;
+	server->acdirmax = data->acdirmax * HZ;
+
+	/* Start lockd here, before we might error out */
+	error = nfs_start_lockd(server);
+	if (error < 0)
+		goto error;
+
+	error = nfs_init_server_rpcclient(server, data->pseudoflavor);
+	if (error < 0)
+		goto error;
+
+	server->namelen  = data->namlen;
+	/* Create a client RPC handle for the NFSv3 ACL management interface */
+	nfs_init_server_aclclient(server);
+	if (clp->cl_nfsversion == 3) {
+		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+			server->namelen = NFS3_MAXNAMLEN;
+		server->caps |= NFS_CAP_READDIRPLUS;
+	} else {
+		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+			server->namelen = NFS2_MAXNAMLEN;
+	}
+
+	dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
+	return 0;
+
+error:
+	server->nfs_client = NULL;
+	nfs_put_client(clp);
+	dprintk("<-- nfs_init_server() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Load up the server record from information gained in an fsinfo record
+ */
+static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
+{
+	unsigned long max_rpc_payload;
+
+	/* Work out a lot of parameters */
+	if (server->rsize == 0)
+		server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
+	if (server->wsize == 0)
+		server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
+
+	if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
+		server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
+	if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
+		server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
+
+	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
+	if (server->rsize > max_rpc_payload)
+		server->rsize = max_rpc_payload;
+	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
+		server->rsize = NFS_MAX_FILE_IO_SIZE;
+	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+
+	if (server->wsize > max_rpc_payload)
+		server->wsize = max_rpc_payload;
+	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+		server->wsize = NFS_MAX_FILE_IO_SIZE;
+	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+
+	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+	if (server->dtsize > PAGE_CACHE_SIZE)
+		server->dtsize = PAGE_CACHE_SIZE;
+	if (server->dtsize > server->rsize)
+		server->dtsize = server->rsize;
+
+	if (server->flags & NFS_MOUNT_NOAC) {
+		server->acregmin = server->acregmax = 0;
+		server->acdirmin = server->acdirmax = 0;
+	}
+
+	server->maxfilesize = fsinfo->maxfilesize;
+
+	/* We're airborne Set socket buffersize */
+	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+}
+
+/*
+ * Probe filesystem information, including the FSID on v2/v3
+ */
+static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr)
+{
+	struct nfs_fsinfo fsinfo;
+	struct nfs_client *clp = server->nfs_client;
+	int error;
+
+	dprintk("--> nfs_probe_fsinfo()\n");
+
+	if (clp->rpc_ops->set_capabilities != NULL) {
+		error = clp->rpc_ops->set_capabilities(server, mntfh);
+		if (error < 0)
+			goto out_error;
+	}
+
+	fsinfo.fattr = fattr;
+	nfs_fattr_init(fattr);
+	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
+	if (error < 0)
+		goto out_error;
+
+	nfs_server_set_fsinfo(server, &fsinfo);
+
+	/* Get some general file system info */
+	if (server->namelen == 0) {
+		struct nfs_pathconf pathinfo;
+
+		pathinfo.fattr = fattr;
+		nfs_fattr_init(fattr);
+
+		if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0)
+			server->namelen = pathinfo.max_namelen;
+	}
+
+	dprintk("<-- nfs_probe_fsinfo() = 0\n");
+	return 0;
+
+out_error:
+	dprintk("nfs_probe_fsinfo: error = %d\n", -error);
+	return error;
+}
+
+/*
+ * Copy useful information when duplicating a server record
+ */
+static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
+{
+	target->flags = source->flags;
+	target->acregmin = source->acregmin;
+	target->acregmax = source->acregmax;
+	target->acdirmin = source->acdirmin;
+	target->acdirmax = source->acdirmax;
+	target->caps = source->caps;
+}
+
+/*
+ * Allocate and initialise a server record
+ */
+static struct nfs_server *nfs_alloc_server(void)
+{
+	struct nfs_server *server;
+
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (!server)
+		return NULL;
+
+	server->client = server->client_acl = ERR_PTR(-EINVAL);
+
+	/* Zero out the NFS state stuff */
+	INIT_LIST_HEAD(&server->client_link);
+	INIT_LIST_HEAD(&server->master_link);
+
+	server->io_stats = nfs_alloc_iostats();
+	if (!server->io_stats) {
+		kfree(server);
+		return NULL;
+	}
+
+	return server;
+}
+
+/*
+ * Free up a server record
+ */
+void nfs_free_server(struct nfs_server *server)
+{
+	dprintk("--> nfs_free_server()\n");
+
+	spin_lock(&nfs_client_lock);
+	list_del(&server->client_link);
+	list_del(&server->master_link);
+	spin_unlock(&nfs_client_lock);
+
+	if (server->destroy != NULL)
+		server->destroy(server);
+	if (!IS_ERR(server->client))
+		rpc_shutdown_client(server->client);
+
+	nfs_put_client(server->nfs_client);
+
+	nfs_free_iostats(server->io_stats);
+	kfree(server);
+	nfs_release_automount_timer();
+	dprintk("<-- nfs_free_server()\n");
+}
+
+/*
+ * Create a version 2 or 3 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
+				     struct nfs_fh *mntfh)
+{
+	struct nfs_server *server;
+	struct nfs_fattr fattr;
+	int error;
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Get a client representation */
+	error = nfs_init_server(server, data);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* Probe the root fh to retrieve its FSID */
+	error = nfs_probe_fsinfo(server, mntfh, &fattr);
+	if (error < 0)
+		goto error;
+	if (!(fattr.valid & NFS_ATTR_FATTR)) {
+		error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+		if (error < 0) {
+			dprintk("nfs_create_server: getattr error = %d\n", -error);
+			goto error;
+		}
+	}
+	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+
+	dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor);
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+	return server;
+
+error:
+	nfs_free_server(server);
+	return ERR_PTR(error);
+}
+
+#ifdef CONFIG_NFS_V4
+/*
+ * Initialise an NFS4 client record
+ */
+static int nfs4_init_client(struct nfs_client *clp,
+		int proto, int timeo, int retrans,
+		rpc_authflavor_t authflavour)
+{
+	int error;
+
+	if (clp->cl_cons_state == NFS_CS_READY) {
+		/* the client is initialised already */
+		dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
+		return 0;
+	}
+
+	/* Check NFS protocol revision and initialize RPC op vector */
+	clp->rpc_ops = &nfs_v4_clientops;
+
+	error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour);
+	if (error < 0)
+		goto error;
+
+	error = nfs_idmap_new(clp);
+	if (error < 0) {
+		dprintk("%s: failed to create idmapper. Error = %d\n",
+			__FUNCTION__, error);
+		__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
+		goto error;
+	}
+
+	nfs_mark_client_ready(clp, NFS_CS_READY);
+	return 0;
+
+error:
+	nfs_mark_client_ready(clp, error);
+	dprintk("<-- nfs4_init_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Set up an NFS4 client
+ */
+static int nfs4_set_client(struct nfs_server *server,
+		const char *hostname, const struct sockaddr_in *addr,
+		rpc_authflavor_t authflavour,
+		int proto, int timeo, int retrans)
+{
+	struct nfs_client *clp;
+	int error;
+
+	dprintk("--> nfs4_set_client()\n");
+
+	/* Allocate or find a client reference we can use */
+	clp = nfs_get_client(hostname, addr, 4);
+	if (IS_ERR(clp)) {
+		error = PTR_ERR(clp);
+		goto error;
+	}
+	error = nfs4_init_client(clp, proto, timeo, retrans, authflavour);
+	if (error < 0)
+		goto error_put;
+
+	server->nfs_client = clp;
+	dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
+	return 0;
+
+error_put:
+	nfs_put_client(clp);
+error:
+	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 4 volume record
+ */
+static int nfs4_init_server(struct nfs_server *server,
+		const struct nfs4_mount_data *data, rpc_authflavor_t authflavour)
+{
+	int error;
+
+	dprintk("--> nfs4_init_server()\n");
+
+	/* Initialise the client representation from the mount data */
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+	server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+
+	server->acregmin = data->acregmin * HZ;
+	server->acregmax = data->acregmax * HZ;
+	server->acdirmin = data->acdirmin * HZ;
+	server->acdirmax = data->acdirmax * HZ;
+
+	error = nfs_init_server_rpcclient(server, authflavour);
+
+	/* Done */
+	dprintk("<-- nfs4_init_server() = %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 4 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
+				      const char *hostname,
+				      const struct sockaddr_in *addr,
+				      const char *mntpath,
+				      const char *ip_addr,
+				      rpc_authflavor_t authflavour,
+				      struct nfs_fh *mntfh)
+{
+	struct nfs_fattr fattr;
+	struct nfs_server *server;
+	int error;
+
+	dprintk("--> nfs4_create_server()\n");
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Get a client record */
+	error = nfs4_set_client(server, hostname, addr, authflavour,
+			data->proto, data->timeo, data->retrans);
+	if (error < 0)
+		goto error;
+
+	/* set up the general RPC client */
+	error = nfs4_init_server(server, data, authflavour);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* Probe the root fh to retrieve its FSID */
+	error = nfs4_path_walk(server, mntfh, mntpath);
+	if (error < 0)
+		goto error;
+
+	dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor);
+	dprintk("Mount FH: %d\n", mntfh->size);
+
+	error = nfs_probe_fsinfo(server, mntfh, &fattr);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+	dprintk("<-- nfs4_create_server() = %p\n", server);
+	return server;
+
+error:
+	nfs_free_server(server);
+	dprintk("<-- nfs4_create_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+/*
+ * Create an NFS4 referral server record
+ */
+struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
+					       struct nfs_fh *fh)
+{
+	struct nfs_client *parent_client;
+	struct nfs_server *server, *parent_server;
+	struct nfs_fattr fattr;
+	int error;
+
+	dprintk("--> nfs4_create_referral_server()\n");
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	parent_server = NFS_SB(data->sb);
+	parent_client = parent_server->nfs_client;
+
+	/* Get a client representation.
+	 * Note: NFSv4 always uses TCP, */
+	error = nfs4_set_client(server, data->hostname, data->addr,
+			data->authflavor,
+			parent_server->client->cl_xprt->prot,
+			parent_client->retrans_timeo,
+			parent_client->retrans_count);
+
+	/* Initialise the client representation from the parent server */
+	nfs_server_copy_userdata(server, parent_server);
+	server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+	error = nfs_init_server_rpcclient(server, data->authflavor);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* probe the filesystem info for this server filesystem */
+	error = nfs_probe_fsinfo(server, fh, &fattr);
+	if (error < 0)
+		goto error;
+
+	dprintk("Referral FSID: %llx:%llx\n",
+		server->fsid.major, server->fsid.minor);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+
+	dprintk("<-- nfs_create_referral_server() = %p\n", server);
+	return server;
+
+error:
+	nfs_free_server(server);
+	dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+#endif /* CONFIG_NFS_V4 */
+
+/*
+ * Clone an NFS2, NFS3 or NFS4 server record
+ */
+struct nfs_server *nfs_clone_server(struct nfs_server *source,
+				    struct nfs_fh *fh,
+				    struct nfs_fattr *fattr)
+{
+	struct nfs_server *server;
+	struct nfs_fattr fattr_fsinfo;
+	int error;
+
+	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
+		fattr->fsid.major, fattr->fsid.minor);
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Copy data from the source */
+	server->nfs_client = source->nfs_client;
+	atomic_inc(&server->nfs_client->cl_count);
+	nfs_server_copy_userdata(server, source);
+
+	server->fsid = fattr->fsid;
+
+	error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
+	if (error < 0)
+		goto out_free_server;
+	if (!IS_ERR(source->client_acl))
+		nfs_init_server_aclclient(server);
+
+	/* probe the filesystem info for this server filesystem */
+	error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo);
+	if (error < 0)
+		goto out_free_server;
+
+	dprintk("Cloned FSID: %llx:%llx\n",
+		server->fsid.major, server->fsid.minor);
+
+	error = nfs_start_lockd(server);
+	if (error < 0)
+		goto out_free_server;
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+
+	dprintk("<-- nfs_clone_server() = %p\n", server);
+	return server;
+
+out_free_server:
+	nfs_free_server(server);
+	dprintk("<-- nfs_clone_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 19362712452f0..9b496ef4abeae 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -31,6 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -870,14 +871,14 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
 	return (nd->intent.open.flags & O_EXCL) != 0;
 }
 
-static inline int nfs_reval_fsid(struct inode *dir,
-		struct nfs_fh *fh, struct nfs_fattr *fattr)
+static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
+				 struct nfs_fh *fh, struct nfs_fattr *fattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 
 	if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
 		/* Revalidate fsid on root dir */
-		return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode);
+		return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
 	return 0;
 }
 
@@ -913,7 +914,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	error = nfs_reval_fsid(dir, &fhandle, &fattr);
+	error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
 	if (error < 0) {
 		res = ERR_PTR(error);
 		goto out_unlock;
@@ -922,8 +923,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	res = (struct dentry *)inode;
 	if (IS_ERR(res))
 		goto out_unlock;
+
 no_entry:
-	res = d_add_unique(dentry, inode);
+	res = d_materialise_unique(dentry, inode);
 	if (res != NULL)
 		dentry = res;
 	nfs_renew_times(dentry);
@@ -1117,11 +1119,13 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 		dput(dentry);
 		return NULL;
 	}
-	alias = d_add_unique(dentry, inode);
+
+	alias = d_materialise_unique(dentry, inode);
 	if (alias != NULL) {
 		dput(dentry);
 		dentry = alias;
 	}
+
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	return dentry;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
new file mode 100644
index 0000000000000..977e59088eeb6
--- /dev/null
+++ b/fs/nfs/getroot.c
@@ -0,0 +1,306 @@
+/* getroot.c: get the root dentry for an NFS mount
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/namei.h>
+#include <linux/namespace.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "nfs4_fs.h"
+#include "delegation.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY		NFSDBG_CLIENT
+#define NFS_PARANOIA 1
+
+/*
+ * get an NFS2/NFS3 root dentry from the root filehandle
+ */
+struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fsinfo fsinfo;
+	struct nfs_fattr fattr;
+	struct dentry *mntroot;
+	struct inode *inode;
+	int error;
+
+	/* create a dummy root dentry with dummy inode for this superblock */
+	if (!sb->s_root) {
+		struct nfs_fh dummyfh;
+		struct dentry *root;
+		struct inode *iroot;
+
+		memset(&dummyfh, 0, sizeof(dummyfh));
+		memset(&fattr, 0, sizeof(fattr));
+		nfs_fattr_init(&fattr);
+		fattr.valid = NFS_ATTR_FATTR;
+		fattr.type = NFDIR;
+		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+		fattr.nlink = 2;
+
+		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		if (IS_ERR(iroot))
+			return ERR_PTR(PTR_ERR(iroot));
+
+		root = d_alloc_root(iroot);
+		if (!root) {
+			iput(iroot);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		sb->s_root = root;
+	}
+
+	/* get the actual root for this mount */
+	fsinfo.fattr = &fattr;
+
+	error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+	if (error < 0) {
+		dprintk("nfs_get_root: getattr error = %d\n", -error);
+		return ERR_PTR(error);
+	}
+
+	inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
+	if (IS_ERR(inode)) {
+		dprintk("nfs_get_root: get root inode failed\n");
+		return ERR_PTR(PTR_ERR(inode));
+	}
+
+	/* root dentries normally start off anonymous and get spliced in later
+	 * if the dentry tree reaches them; however if the dentry already
+	 * exists, we'll pick it up at this point and use it as the root
+	 */
+	mntroot = d_alloc_anon(inode);
+	if (!mntroot) {
+		iput(inode);
+		dprintk("nfs_get_root: get root dentry failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (!mntroot->d_op)
+		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+	return mntroot;
+}
+
+#ifdef CONFIG_NFS_V4
+
+/*
+ * Do a simple pathwalk from the root FH of the server to the nominated target
+ * of the mountpoint
+ * - give error on symlinks
+ * - give error on ".." occurring in the path
+ * - follow traversals
+ */
+int nfs4_path_walk(struct nfs_server *server,
+		   struct nfs_fh *mntfh,
+		   const char *path)
+{
+	struct nfs_fsinfo fsinfo;
+	struct nfs_fattr fattr;
+	struct nfs_fh lastfh;
+	struct qstr name;
+	int ret;
+	//int referral_count = 0;
+
+	dprintk("--> nfs4_path_walk(,,%s)\n", path);
+
+	fsinfo.fattr = &fattr;
+	nfs_fattr_init(&fattr);
+
+	if (*path++ != '/') {
+		dprintk("nfs4_get_root: Path does not begin with a slash\n");
+		return -EINVAL;
+	}
+
+	/* Start by getting the root filehandle from the server */
+	ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+	if (ret < 0) {
+		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+		return ret;
+	}
+
+	if (fattr.type != NFDIR) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " getroot encountered non-directory\n");
+		return -ENOTDIR;
+	}
+
+	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " getroot obtained referral\n");
+		return -EREMOTE;
+	}
+
+next_component:
+	dprintk("Next: %s\n", path);
+
+	/* extract the next bit of the path */
+	if (!*path)
+		goto path_walk_complete;
+
+	name.name = path;
+	while (*path && *path != '/')
+		path++;
+	name.len = path - (const char *) name.name;
+
+eat_dot_dir:
+	while (*path == '/')
+		path++;
+
+	if (path[0] == '.' && (path[1] == '/' || !path[1])) {
+		path += 2;
+		goto eat_dot_dir;
+	}
+
+	if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
+	    ) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " Mount path contains reference to \"..\"\n");
+		return -EINVAL;
+	}
+
+	/* lookup the next FH in the sequence */
+	memcpy(&lastfh, mntfh, sizeof(lastfh));
+
+	dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path);
+
+	ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
+						    mntfh, &fattr);
+	if (ret < 0) {
+		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+		return ret;
+	}
+
+	if (fattr.type != NFDIR) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " lookupfh encountered non-directory\n");
+		return -ENOTDIR;
+	}
+
+	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " lookupfh obtained referral\n");
+		return -EREMOTE;
+	}
+
+	goto next_component;
+
+path_walk_complete:
+	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+	dprintk("<-- nfs4_path_walk() = 0\n");
+	return 0;
+}
+
+/*
+ * get an NFS4 root dentry from the root filehandle
+ */
+struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fattr fattr;
+	struct dentry *mntroot;
+	struct inode *inode;
+	int error;
+
+	dprintk("--> nfs4_get_root()\n");
+
+	/* create a dummy root dentry with dummy inode for this superblock */
+	if (!sb->s_root) {
+		struct nfs_fh dummyfh;
+		struct dentry *root;
+		struct inode *iroot;
+
+		memset(&dummyfh, 0, sizeof(dummyfh));
+		memset(&fattr, 0, sizeof(fattr));
+		nfs_fattr_init(&fattr);
+		fattr.valid = NFS_ATTR_FATTR;
+		fattr.type = NFDIR;
+		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+		fattr.nlink = 2;
+
+		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		if (IS_ERR(iroot))
+			return ERR_PTR(PTR_ERR(iroot));
+
+		root = d_alloc_root(iroot);
+		if (!root) {
+			iput(iroot);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		sb->s_root = root;
+	}
+
+	/* get the info about the server and filesystem */
+	error = nfs4_server_capabilities(server, mntfh);
+	if (error < 0) {
+		dprintk("nfs_get_root: getcaps error = %d\n",
+			-error);
+		return ERR_PTR(error);
+	}
+
+	/* get the actual root for this mount */
+	error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+	if (error < 0) {
+		dprintk("nfs_get_root: getattr error = %d\n", -error);
+		return ERR_PTR(error);
+	}
+
+	inode = nfs_fhget(sb, mntfh, &fattr);
+	if (IS_ERR(inode)) {
+		dprintk("nfs_get_root: get root inode failed\n");
+		return ERR_PTR(PTR_ERR(inode));
+	}
+
+	/* root dentries normally start off anonymous and get spliced in later
+	 * if the dentry tree reaches them; however if the dentry already
+	 * exists, we'll pick it up at this point and use it as the root
+	 */
+	mntroot = d_alloc_anon(inode);
+	if (!mntroot) {
+		iput(inode);
+		dprintk("nfs_get_root: get root dentry failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (!mntroot->d_op)
+		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+	dprintk("<-- nfs4_get_root()\n");
+	return mntroot;
+}
+
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 231c20ffc0ff7..f96dfac7dc9a6 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -114,8 +114,7 @@ nfs_idmap_new(struct nfs_client *clp)
 	struct idmap *idmap;
 	int error;
 
-	if (clp->cl_idmap != NULL)
-		return 0;
+	BUG_ON(clp->cl_idmap != NULL);
 
         if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
                 return -ENOMEM;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 771c3b833757d..a547c58a83e6b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1020,7 +1020,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  out_fileid:
 	printk(KERN_ERR "NFS: server %s error: fileid changed\n"
 		"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
-		NFS_SERVER(inode)->hostname, inode->i_sb->s_id,
+		NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
 		(long long)nfsi->fileid, (long long)fattr->fileid);
 	goto out_err;
 }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2f3aa52fbefc7..e73ba4f1052a7 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,6 +4,18 @@
 
 #include <linux/mount.h>
 
+struct nfs_string;
+struct nfs_mount_data;
+struct nfs4_mount_data;
+
+/* Maximum number of readahead requests
+ * FIXME: this should really be a sysctl so that users may tune it to suit
+ *        their needs. People that do NFS over a slow network, might for
+ *        instance want to reduce it to something closer to 1 for improved
+ *        interactive response.
+ */
+#define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
+
 struct nfs_clone_mount {
 	const struct super_block *sb;
 	const struct dentry *dentry;
@@ -16,12 +28,25 @@ struct nfs_clone_mount {
 };
 
 /* client.c */
+extern struct rpc_program nfs_program;
+
 extern void nfs_put_client(struct nfs_client *);
 extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
-extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int);
-extern void nfs_mark_client_ready(struct nfs_client *, int);
-extern int nfs_create_rpc_client(struct nfs_client *, int, unsigned int,
-				 unsigned int, rpc_authflavor_t);
+extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
+					    struct nfs_fh *);
+extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
+					     const char *,
+					     const struct sockaddr_in *,
+					     const char *,
+					     const char *,
+					     rpc_authflavor_t,
+					     struct nfs_fh *);
+extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
+						      struct nfs_fh *);
+extern void nfs_free_server(struct nfs_server *server);
+extern struct nfs_server *nfs_clone_server(struct nfs_server *,
+					   struct nfs_fh *,
+					   struct nfs_fattr *);
 
 /* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
@@ -89,10 +114,10 @@ extern void nfs4_clear_inode(struct inode *);
 #endif
 
 /* super.c */
-extern struct file_system_type nfs_referral_nfs4_fs_type;
-extern struct file_system_type clone_nfs_fs_type;
+extern struct file_system_type nfs_xdev_fs_type;
 #ifdef CONFIG_NFS_V4
-extern struct file_system_type clone_nfs4_fs_type;
+extern struct file_system_type nfs4_xdev_fs_type;
+extern struct file_system_type nfs4_referral_fs_type;
 #endif
 
 extern struct rpc_stat nfs_rpcstat;
@@ -101,28 +126,30 @@ extern int __init register_nfs_fs(void);
 extern void __exit unregister_nfs_fs(void);
 
 /* namespace.c */
-extern char *nfs_path(const char *base, const struct dentry *dentry,
+extern char *nfs_path(const char *base,
+		      const struct dentry *droot,
+		      const struct dentry *dentry,
 		      char *buffer, ssize_t buflen);
 
-/*
- * Determine the mount path as a string
- */
+/* getroot.c */
+extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
 #ifdef CONFIG_NFS_V4
-static inline char *
-nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
-{
-	return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen);
-}
+extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
+
+extern int nfs4_path_walk(struct nfs_server *server,
+			  struct nfs_fh *mntfh,
+			  const char *path);
 #endif
 
 /*
  * Determine the device name as a string
  */
 static inline char *nfs_devname(const struct vfsmount *mnt_parent,
-			 const struct dentry *dentry,
-			 char *buffer, ssize_t buflen)
+				const struct dentry *dentry,
+				char *buffer, ssize_t buflen)
 {
-	return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen);
+	return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root,
+			dentry, buffer, buflen);
 }
 
 /*
@@ -178,20 +205,3 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
 	if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
 		sb->s_maxbytes = MAX_LFS_FILESIZE;
 }
-
-/*
- * Check if the string represents a "valid" IPv4 address
- */
-static inline int valid_ipaddr4(const char *buf)
-{
-	int rc, count, in[4];
-
-	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
-	if (rc != 4)
-		return -EINVAL;
-	for (count = 0; count < 4; count++) {
-		if (in[count] > 255)
-			return -EINVAL;
-	}
-	return 0;
-}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index d8b8d56266cbd..77b00684894d9 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -2,6 +2,7 @@
  * linux/fs/nfs/namespace.c
  *
  * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
  *
  * NFS namespace
  */
@@ -28,6 +29,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
 /*
  * nfs_path - reconstruct the path given an arbitrary dentry
  * @base - arbitrary string to prepend to the path
+ * @droot - pointer to root dentry for mountpoint
  * @dentry - pointer to dentry
  * @buffer - result buffer
  * @buflen - length of buffer
@@ -38,7 +40,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
  * This is mainly for use in figuring out the path on the
  * server side when automounting on top of an existing partition.
  */
-char *nfs_path(const char *base, const struct dentry *dentry,
+char *nfs_path(const char *base,
+	       const struct dentry *droot,
+	       const struct dentry *dentry,
 	       char *buffer, ssize_t buflen)
 {
 	char *end = buffer+buflen;
@@ -47,7 +51,7 @@ char *nfs_path(const char *base, const struct dentry *dentry,
 	*--end = '\0';
 	buflen--;
 	spin_lock(&dcache_lock);
-	while (!IS_ROOT(dentry)) {
+	while (!IS_ROOT(dentry) && dentry != droot) {
 		namelen = dentry->d_name.len;
 		buflen -= namelen + 1;
 		if (buflen < 0)
@@ -96,12 +100,13 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 	struct nfs_fattr fattr;
 	int err;
 
+	dprintk("--> nfs_follow_mountpoint()\n");
+
 	BUG_ON(IS_ROOT(dentry));
 	dprintk("%s: enter\n", __FUNCTION__);
 	dput(nd->dentry);
 	nd->dentry = dget(dentry);
-	if (d_mountpoint(nd->dentry))
-		goto out_follow;
+
 	/* Look it up again */
 	parent = dget_parent(nd->dentry);
 	err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
@@ -134,6 +139,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
 out:
 	dprintk("%s: done, returned %d\n", __FUNCTION__, err);
+
+	dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
 	return ERR_PTR(err);
 out_err:
 	path_release(nd);
@@ -183,14 +190,14 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
 	switch (server->nfs_client->cl_nfsversion) {
 		case 2:
 		case 3:
-			mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+			mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
 			break;
 		case 4:
-			mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata);
+			mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata);
 	}
 	return mnt;
 #else
-	return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+	return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
 #endif
 }
 
@@ -216,6 +223,8 @@ struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
 	char *page = (char *) __get_free_page(GFP_USER);
 	char *devname;
 
+	dprintk("--> nfs_do_submount()\n");
+
 	dprintk("%s: submounting on %s/%s\n", __FUNCTION__,
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name);
@@ -230,5 +239,7 @@ struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
 	free_page((unsigned long)page);
 out:
 	dprintk("%s: done\n", __FUNCTION__);
+
+	dprintk("<-- nfs_do_submount() = %p\n", mnt);
 	return mnt;
 }
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 0622af0122bef..9e8258ece6fd8 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -81,7 +81,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
 }
 
 /*
- * Bare-bones access to getattr: this is for nfs_read_super.
+ * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb
  */
 static int
 nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e7879245361eb..61095fe4b5ca5 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -188,8 +188,6 @@ extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(void *);
 
 /* nfs4state.c */
-extern void init_nfsv4_state(struct nfs_server *);
-extern void destroy_nfsv4_state(struct nfs_server *);
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
 extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
@@ -224,10 +222,6 @@ extern struct svc_version nfs4_callback_version1;
 
 #else
 
-#define init_nfsv4_state(server)  do { } while (0)
-#define destroy_nfsv4_state(server)       do { } while (0)
-#define nfs4_put_state_owner(inode, owner) do { } while (0)
-#define nfs4_put_open_state(state) do { } while (0)
 #define nfs4_close_state(a, b) do { } while (0)
 
 #endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index faed9bcba50fb..24e47f3bbd173 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -2,6 +2,7 @@
  * linux/fs/nfs/nfs4namespace.c
  *
  * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
  *
  * NFSv4 namespace
  */
@@ -47,6 +48,68 @@ static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
 	return ERR_PTR(-ENAMETOOLONG);
 }
 
+/*
+ * Determine the mount path as a string
+ */
+static char *nfs4_path(const struct vfsmount *mnt_parent,
+		       const struct dentry *dentry,
+		       char *buffer, ssize_t buflen)
+{
+	const char *srvpath;
+
+	srvpath = strchr(mnt_parent->mnt_devname, ':');
+	if (srvpath)
+		srvpath++;
+	else
+		srvpath = mnt_parent->mnt_devname;
+
+	return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
+}
+
+/*
+ * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
+ * believe to be the server path to this dentry
+ */
+static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
+				const struct dentry *dentry,
+				const struct nfs4_fs_locations *locations,
+				char *page, char *page2)
+{
+	const char *path, *fs_path;
+
+	path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
+	if (IS_ERR(fs_path))
+		return PTR_ERR(fs_path);
+
+	if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
+		dprintk("%s: path %s does not begin with fsroot %s\n",
+			__FUNCTION__, path, fs_path);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/*
+ * Check if the string represents a "valid" IPv4 address
+ */
+static inline int valid_ipaddr4(const char *buf)
+{
+	int rc, count, in[4];
+
+	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
+	if (rc != 4)
+		return -EINVAL;
+	for (count = 0; count < 4; count++) {
+		if (in[count] > 255)
+			return -EINVAL;
+	}
+	return 0;
+}
 
 /**
  * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
@@ -68,10 +131,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 		.dentry = dentry,
 		.authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
 	};
-	char *page, *page2;
-	char *path, *fs_path;
+	char *page = NULL, *page2 = NULL;
 	char *devname;
-	int loc, s;
+	int loc, s, error;
 
 	if (locations == NULL || locations->nlocations <= 0)
 		goto out;
@@ -79,31 +141,25 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 	dprintk("%s: referral at %s/%s\n", __FUNCTION__,
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	/* Ensure fs path is a prefix of current dentry path */
 	page = (char *) __get_free_page(GFP_USER);
-	if (page == NULL)
+	if (!page)
 		goto out;
+
 	page2 = (char *) __get_free_page(GFP_USER);
-	if (page2 == NULL)
+	if (!page2)
 		goto out;
 
-	path = nfs4_path(dentry, page, PAGE_SIZE);
-	if (IS_ERR(path))
-		goto out_free;
-
-	fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
-	if (IS_ERR(fs_path))
-		goto out_free;
-
-	if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
-		dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path);
-		goto out_free;
+	/* Ensure fs path is a prefix of current dentry path */
+	error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2);
+	if (error < 0) {
+		mnt = ERR_PTR(error);
+		goto out;
 	}
 
 	devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
 	if (IS_ERR(devname)) {
 		mnt = (struct vfsmount *)devname;
-		goto out_free;
+		goto out;
 	}
 
 	loc = 0;
@@ -140,7 +196,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 			addr.sin_port = htons(NFS_PORT);
 			mountdata.addr = &addr;
 
-			mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata);
+			mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata);
 			if (!IS_ERR(mnt)) {
 				break;
 			}
@@ -149,10 +205,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 		loc++;
 	}
 
-out_free:
-	free_page((unsigned long)page);
-	free_page((unsigned long)page2);
 out:
+	free_page((unsigned long) page);
+	free_page((unsigned long) page2);
 	dprintk("%s: done\n", __FUNCTION__);
 	return mnt;
 }
@@ -165,7 +220,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
  */
 struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
 {
-	struct vfsmount *mnt = ERR_PTR(-ENOENT);
+	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
 	struct dentry *parent;
 	struct nfs4_fs_locations *fs_locations = NULL;
 	struct page *page;
@@ -183,11 +238,16 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
 		goto out_free;
 
 	/* Get locations */
+	mnt = ERR_PTR(-ENOENT);
+
 	parent = dget_parent(dentry);
-	dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name);
+	dprintk("%s: getting locations for %s/%s\n",
+		__FUNCTION__, parent->d_name.name, dentry->d_name.name);
+
 	err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page);
 	dput(parent);
-	if (err != 0 || fs_locations->nlocations <= 0 ||
+	if (err != 0 ||
+	    fs_locations->nlocations <= 0 ||
 	    fs_locations->fs_path.ncomponents <= 0)
 		goto out_free;
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1573eeb07ce10..a825547e8214f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1393,70 +1393,19 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	return err;
 }
 
+/*
+ * get the file handle for the "/" directory on the server
+ */
 static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
-		struct nfs_fsinfo *info)
+			      struct nfs_fsinfo *info)
 {
-	struct nfs_fattr *	fattr = info->fattr;
-	unsigned char *		p;
-	struct qstr		q;
-	struct nfs4_lookup_arg args = {
-		.dir_fh = fhandle,
-		.name = &q,
-		.bitmask = nfs4_fattr_bitmap,
-	};
-	struct nfs4_lookup_res res = {
-		.server = server,
-		.fattr = fattr,
-		.fh = fhandle,
-	};
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
-		.rpc_argp = &args,
-		.rpc_resp = &res,
-	};
 	int status;
 
-	/*
-	 * Now we do a separate LOOKUP for each component of the mount path.
-	 * The LOOKUPs are done separately so that we can conveniently
-	 * catch an ERR_WRONGSEC if it occurs along the way...
-	 */
 	status = nfs4_lookup_root(server, fhandle, info);
-	if (status)
-		goto out;
-
-	p = server->mnt_path;
-	for (;;) {
-		struct nfs4_exception exception = { };
-
-		while (*p == '/')
-			p++;
-		if (!*p)
-			break;
-		q.name = p;
-		while (*p && (*p != '/'))
-			p++;
-		q.len = p - q.name;
-
-		do {
-			nfs_fattr_init(fattr);
-			status = nfs4_handle_exception(server,
-					rpc_call_sync(server->client, &msg, 0),
-					&exception);
-		} while (exception.retry);
-		if (status == 0)
-			continue;
-		if (status == -ENOENT) {
-			printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
-			printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
-		}
-		break;
-	}
 	if (status == 0)
 		status = nfs4_server_capabilities(server, fhandle);
 	if (status == 0)
 		status = nfs4_do_fsinfo(server, fhandle, info);
-out:
 	return nfs4_map_errors(status);
 }
 
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index ff947ecb8b815..f2c893690ac49 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -127,26 +127,13 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
 void
 nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 {
-	struct nfs_client *clp = server->nfs_client;
-
-	if (!clp)
-		return;
 	flush_scheduled_work();
-	down_write(&clp->cl_sem);
-	if (!list_empty(&server->nfs4_siblings))
-		list_del_init(&server->nfs4_siblings);
-	up_write(&clp->cl_sem);
 }
 
-/* Must be called with clp->cl_sem locked for writes */
 void
 nfs4_kill_renewd(struct nfs_client *clp)
 {
 	down_read(&clp->cl_sem);
-	if (!list_empty(&clp->cl_superblocks)) {
-		up_read(&clp->cl_sem);
-		return;
-	}
 	cancel_delayed_work(&clp->cl_renewd);
 	up_read(&clp->cl_sem);
 	flush_scheduled_work();
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 058811e395550..5fffbdfa971f4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -58,24 +58,6 @@ const nfs4_stateid zero_stateid;
 
 static LIST_HEAD(nfs4_clientid_list);
 
-void
-init_nfsv4_state(struct nfs_server *server)
-{
-	server->nfs_client = NULL;
-	INIT_LIST_HEAD(&server->nfs4_siblings);
-}
-
-void
-destroy_nfsv4_state(struct nfs_server *server)
-{
-	kfree(server->mnt_path);
-	server->mnt_path = NULL;
-	if (server->nfs_client) {
-		nfs_put_client(server->nfs_client);
-		server->nfs_client = NULL;
-	}
-}
-
 static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f0aff824a291b..dae33c1e8a77b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -171,7 +171,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 		rdata->args.offset = page_offset(page) + rdata->args.pgbase;
 
 		dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
-			NFS_SERVER(inode)->hostname,
+			NFS_SERVER(inode)->nfs_client->cl_hostname,
 			inode->i_sb->s_id,
 			(long long)NFS_FILEID(inode),
 			(unsigned long long)rdata->args.pgbase,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 5842d510d732c..867b5dcd3a40a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -13,6 +13,11 @@
  *
  *  Split from inode.c by David Howells <dhowells@redhat.com>
  *
+ * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a
+ *   particular server are held in the same superblock
+ * - NFS superblocks can have several effective roots to the dentry tree
+ * - directory type roots are spliced into the tree when a path from one root reaches the root
+ *   of another (see nfs_lookup())
  */
 
 #include <linux/config.h>
@@ -52,20 +57,12 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-/* Maximum number of readahead requests
- * FIXME: this should really be a sysctl so that users may tune it to suit
- *        their needs. People that do NFS over a slow network, might for
- *        instance want to reduce it to something closer to 1 for improved
- *        interactive response.
- */
-#define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
-
 static void nfs_umount_begin(struct vfsmount *, int);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
+static int nfs_xdev_get_sb(struct file_system_type *fs_type,
 		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs_kill_super(struct super_block *);
 
@@ -77,10 +74,10 @@ static struct file_system_type nfs_fs_type = {
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type clone_nfs_fs_type = {
+struct file_system_type nfs_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs",
-	.get_sb		= nfs_clone_nfs_sb,
+	.get_sb		= nfs_xdev_get_sb,
 	.kill_sb	= nfs_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -99,10 +96,10 @@ static struct super_operations nfs_sops = {
 #ifdef CONFIG_NFS_V4
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_referral_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs4_kill_super(struct super_block *sb);
 
 static struct file_system_type nfs4_fs_type = {
@@ -113,18 +110,18 @@ static struct file_system_type nfs4_fs_type = {
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type clone_nfs4_fs_type = {
+struct file_system_type nfs4_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs_clone_nfs4_sb,
+	.get_sb		= nfs4_xdev_get_sb,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type nfs_referral_nfs4_fs_type = {
+struct file_system_type nfs4_referral_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs_referral_nfs4_sb,
+	.get_sb		= nfs4_referral_get_sb,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -345,7 +342,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	nfs_show_mount_options(m, nfss, 0);
 
 	seq_puts(m, ",addr=");
-	seq_escape(m, nfss->hostname, " \t\n\\");
+	seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\");
 
 	return 0;
 }
@@ -429,714 +426,351 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 
 /*
  * Begin unmount by attempting to remove all automounted mountpoints we added
- * in response to traversals
+ * in response to xdev traversals and referrals
  */
 static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
 {
-	struct nfs_server *server;
-	struct rpc_clnt	*rpc;
-
 	shrink_submounts(vfsmnt, &nfs_automount_list);
-	if (!(flags & MNT_FORCE))
-		return;
-	/* -EIO all pending I/O */
-	server = NFS_SB(vfsmnt->mnt_sb);
-	rpc = server->client;
-	if (!IS_ERR(rpc))
-		rpc_killall_tasks(rpc);
-	rpc = server->client_acl;
-	if (!IS_ERR(rpc))
-		rpc_killall_tasks(rpc);
 }
 
 /*
- * Obtain the root inode of the file system.
+ * Validate the NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
  */
-static struct inode *
-nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
+static int nfs_validate_mount_data(struct nfs_mount_data *data,
+				   struct nfs_fh *mntfh)
 {
-	struct nfs_server	*server = NFS_SB(sb);
-	int			error;
-
-	error = server->nfs_client->rpc_ops->getroot(server, rootfh, fsinfo);
-	if (error < 0) {
-		dprintk("nfs_get_root: getattr error = %d\n", -error);
-		return ERR_PTR(error);
+	if (data == NULL) {
+		dprintk("%s: missing data argument\n", __FUNCTION__);
+		return -EINVAL;
 	}
 
-	server->fsid = fsinfo->fattr->fsid;
-	return nfs_fhget(sb, rootfh, fsinfo->fattr);
-}
-
-/*
- * Do NFS version-independent mount processing, and sanity checking
- */
-static int
-nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
-{
-	struct nfs_server	*server;
-	struct inode		*root_inode;
-	struct nfs_fattr	fattr;
-	struct nfs_fsinfo	fsinfo = {
-					.fattr = &fattr,
-				};
-	struct nfs_pathconf pathinfo = {
-			.fattr = &fattr,
-	};
-	int no_root_error = 0;
-	unsigned long max_rpc_payload;
-
-	/* We probably want something more informative here */
-	snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
-
-	server = NFS_SB(sb);
-
-	sb->s_magic      = NFS_SUPER_MAGIC;
-
-	server->io_stats = nfs_alloc_iostats();
-	if (server->io_stats == NULL)
-		return -ENOMEM;
+	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
+		dprintk("%s: bad mount version\n", __FUNCTION__);
+		return -EINVAL;
+	}
 
-	root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
-	/* Did getting the root inode fail? */
-	if (IS_ERR(root_inode)) {
-		no_root_error = PTR_ERR(root_inode);
-		goto out_no_root;
+	switch (data->version) {
+		case 1:
+			data->namlen = 0;
+		case 2:
+			data->bsize  = 0;
+		case 3:
+			if (data->flags & NFS_MOUNT_VER3) {
+				dprintk("%s: mount structure version %d does not support NFSv3\n",
+						__FUNCTION__,
+						data->version);
+				return -EINVAL;
+			}
+			data->root.size = NFS2_FHSIZE;
+			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+		case 4:
+			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
+				dprintk("%s: mount structure version %d does not support strong security\n",
+						__FUNCTION__,
+						data->version);
+				return -EINVAL;
+			}
+			/* Fill in pseudoflavor for mount version < 5 */
+			data->pseudoflavor = RPC_AUTH_UNIX;
+		case 5:
+			memset(data->context, 0, sizeof(data->context));
 	}
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root) {
-		no_root_error = -ENOMEM;
-		goto out_no_root;
+
+#ifndef CONFIG_NFS_V3
+	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
+	if (data->flags & NFS_MOUNT_VER3) {
+		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
+		return -EPROTONOSUPPORT;
 	}
-	sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops;
-
-	/* mount time stamp, in seconds */
-	server->mount_time = jiffies;
-
-	/* Get some general file system info */
-	if (server->namelen == 0 &&
-	    server->nfs_client->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
-		server->namelen = pathinfo.max_namelen;
-	/* Work out a lot of parameters */
-	if (server->rsize == 0)
-		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
-	if (server->wsize == 0)
-		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
-
-	if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
-		server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
-	if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
-		server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
-
-	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
-	if (server->rsize > max_rpc_payload)
-		server->rsize = max_rpc_payload;
-	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
-		server->rsize = NFS_MAX_FILE_IO_SIZE;
-	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
-	if (server->wsize > max_rpc_payload)
-		server->wsize = max_rpc_payload;
-	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
-		server->wsize = NFS_MAX_FILE_IO_SIZE;
-	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+#endif /* CONFIG_NFS_V3 */
 
-	if (sb->s_blocksize == 0)
-		sb->s_blocksize = nfs_block_bits(server->wsize,
-							 &sb->s_blocksize_bits);
-	server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL);
-
-	server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
-	if (server->dtsize > PAGE_CACHE_SIZE)
-		server->dtsize = PAGE_CACHE_SIZE;
-	if (server->dtsize > server->rsize)
-		server->dtsize = server->rsize;
-
-	if (server->flags & NFS_MOUNT_NOAC) {
-		server->acregmin = server->acregmax = 0;
-		server->acdirmin = server->acdirmax = 0;
-		sb->s_flags |= MS_SYNCHRONOUS;
+	/* We now require that the mount process passes the remote address */
+	if (data->addr.sin_addr.s_addr == INADDR_ANY) {
+		dprintk("%s: mount program didn't pass remote address!\n",
+			__FUNCTION__);
+		return -EINVAL;
 	}
-	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
 
-	nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
+	/* Prepare the root filehandle */
+	if (data->flags & NFS_MOUNT_VER3)
+		mntfh->size = data->root.size;
+	else
+		mntfh->size = NFS2_FHSIZE;
 
-	server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
-	server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
+	if (mntfh->size > sizeof(mntfh->data)) {
+		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
+		return -EINVAL;
+	}
+
+	memcpy(mntfh->data, data->root.data, mntfh->size);
+	if (mntfh->size < sizeof(mntfh->data))
+		memset(mntfh->data + mntfh->size, 0,
+		       sizeof(mntfh->data) - mntfh->size);
 
-	/* We're airborne Set socket buffersize */
-	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
 	return 0;
-	/* Yargs. It didn't work out. */
-out_no_root:
-	dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
-	if (!IS_ERR(root_inode))
-		iput(root_inode);
-	return no_root_error;
 }
 
 /*
- * Create an RPC client handle.
+ * Initialise the common bits of the superblock
  */
-static struct rpc_clnt *
-nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
+static inline void nfs_initialise_sb(struct super_block *sb)
 {
-	struct nfs_client	*clp;
-	struct rpc_clnt		*clnt;
-	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
-	int			nfsversion = 2;
-	int			err;
-
-#ifdef CONFIG_NFS_V3
-	if (server->flags & NFS_MOUNT_VER3)
-		nfsversion = 3;
-#endif
-
-	clp = nfs_get_client(server->hostname, &server->addr, nfsversion);
-	if (!clp) {
-		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
-		return ERR_PTR(PTR_ERR(clp));
-	}
-
-	if (clp->cl_cons_state == NFS_CS_INITING) {
-		/* Check NFS protocol revision and initialize RPC op
-		 * vector and file handle pool. */
-#ifdef CONFIG_NFS_V3
-		if (nfsversion == 3) {
-			clp->rpc_ops = &nfs_v3_clientops;
-			server->caps |= NFS_CAP_READDIRPLUS;
-		} else {
-			clp->rpc_ops = &nfs_v2_clientops;
-		}
-#else
-		clp->rpc_ops = &nfs_v2_clientops;
-#endif
-
-		/* create transport and client */
-		err = nfs_create_rpc_client(clp, proto, data->timeo,
-					    data->retrans, RPC_AUTH_UNIX);
-		if (err < 0)
-			goto client_init_error;
-
-		nfs_mark_client_ready(clp, 0);
-	}
+	struct nfs_server *server = NFS_SB(sb);
 
-	/* create an nfs_server-specific client */
-	clnt = rpc_clone_client(clp->cl_rpcclient);
-	if (IS_ERR(clnt)) {
-		dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
-		nfs_put_client(clp);
-		return ERR_PTR(PTR_ERR(clnt));
-	}
+	sb->s_magic = NFS_SUPER_MAGIC;
 
-	if (data->pseudoflavor != clp->cl_rpcclient->cl_auth->au_flavor) {
-		struct rpc_auth *auth;
+	/* We probably want something more informative here */
+	snprintf(sb->s_id, sizeof(sb->s_id),
+		 "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
 
-		auth = rpcauth_create(data->pseudoflavor, server->client);
-		if (IS_ERR(auth)) {
-			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
-			return ERR_PTR(PTR_ERR(auth));
-		}
-	}
+	if (sb->s_blocksize == 0)
+		sb->s_blocksize = nfs_block_bits(server->wsize,
+						 &sb->s_blocksize_bits);
 
-	server->nfs_client = clp;
-	return clnt;
+	if (server->flags & NFS_MOUNT_NOAC)
+		sb->s_flags |= MS_SYNCHRONOUS;
 
-client_init_error:
-	nfs_mark_client_ready(clp, err);
-	nfs_put_client(clp);
-	return ERR_PTR(err);
+	nfs_super_set_maxbytes(sb, server->maxfilesize);
 }
 
 /*
- * Clone a server record
+ * Finish setting up an NFS2/3 superblock
  */
-static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data)
+static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data)
 {
 	struct nfs_server *server = NFS_SB(sb);
-	struct nfs_server *parent = NFS_SB(data->sb);
-	struct inode *root_inode;
-	struct nfs_fsinfo fsinfo;
-	void *err = ERR_PTR(-ENOMEM);
-
-	sb->s_op = data->sb->s_op;
-	sb->s_blocksize = data->sb->s_blocksize;
-	sb->s_blocksize_bits = data->sb->s_blocksize_bits;
-	sb->s_maxbytes = data->sb->s_maxbytes;
-
-	server->client_acl = ERR_PTR(-EINVAL);
-	server->io_stats = nfs_alloc_iostats();
-	if (server->io_stats == NULL)
-		goto out;
-
-	server->client = rpc_clone_client(parent->client);
-	if (IS_ERR((err = server->client)))
-		goto out;
-
-	if (!IS_ERR(parent->client_acl)) {
-		server->client_acl = rpc_clone_client(parent->client_acl);
-		if (IS_ERR((err = server->client_acl)))
-			goto out;
-	}
-	root_inode = nfs_fhget(sb, data->fh, data->fattr);
-	if (!root_inode)
-		goto out;
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root)
-		goto out_put_root;
-	fsinfo.fattr = data->fattr;
-	if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
-		nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
-	sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops;
-	sb->s_flags |= MS_ACTIVE;
-	return server;
-out_put_root:
-	iput(root_inode);
-out:
-	return err;
-}
-
-/*
- * Copy an existing superblock and attach revised data
- */
-static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
-		struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *),
-		struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *),
-		struct vfsmount *mnt)
-{
-	struct nfs_server *server;
-	struct nfs_server *parent = NFS_SB(data->sb);
-	struct super_block *sb = ERR_PTR(-EINVAL);
-	char *hostname;
-	int error = -ENOMEM;
-	int len;
-
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (server == NULL)
-		goto out_err;
-	memcpy(server, parent, sizeof(*server));
-	atomic_inc(&server->nfs_client->cl_count);
-	hostname = (data->hostname != NULL) ? data->hostname : parent->hostname;
-	len = strlen(hostname) + 1;
-	server->hostname = kmalloc(len, GFP_KERNEL);
-	if (server->hostname == NULL)
-		goto free_server;
-	memcpy(server->hostname, hostname, len);
-
-	sb = fill_sb(server, data);
-	if (IS_ERR(sb)) {
-		error = PTR_ERR(sb);
-		goto free_hostname;
-	}
 
-	if (sb->s_root)
-		goto out_share;
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	if (data->bsize)
+		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
 
-	server = fill_server(sb, data);
-	if (IS_ERR(server)) {
-		error = PTR_ERR(server);
-		goto out_deactivate;
+	if (server->flags & NFS_MOUNT_VER3) {
+		/* The VFS shouldn't apply the umask to mode bits. We will do
+		 * so ourselves when necessary.
+		 */
+		sb->s_flags |= MS_POSIXACL;
+		sb->s_time_gran = 1;
 	}
-	return simple_set_mnt(mnt, sb);
-out_deactivate:
-	up_write(&sb->s_umount);
-	deactivate_super(sb);
-	return error;
-out_share:
-	kfree(server->hostname);
-	nfs_put_client(server->nfs_client);
-	kfree(server);
-	return simple_set_mnt(mnt, sb);
-free_hostname:
-	kfree(server->hostname);
-free_server:
-	nfs_put_client(server->nfs_client);
-	kfree(server);
-out_err:
-	return error;
+
+	sb->s_op = &nfs_sops;
+ 	nfs_initialise_sb(sb);
 }
 
 /*
- * Set up an NFS2/3 superblock
- *
- * The way this works is that the mount process passes a structure
- * in the data argument which contains the server's IP address
- * and the root file handle obtained from the server's mount
- * daemon. We stash these away in the private superblock fields.
+ * Finish setting up a cloned NFS2/3 superblock
  */
-static int
-nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
+static void nfs_clone_super(struct super_block *sb,
+			    const struct super_block *old_sb)
 {
-	struct nfs_server	*server;
-	rpc_authflavor_t	authflavor;
+	struct nfs_server *server = NFS_SB(sb);
+
+	sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+	sb->s_blocksize = old_sb->s_blocksize;
+	sb->s_maxbytes = old_sb->s_maxbytes;
 
-	server           = NFS_SB(sb);
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize = 0;
-	if (data->bsize)
-		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
-	if (data->rsize)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize)
-		server->wsize = nfs_block_size(data->wsize, NULL);
-	server->flags    = data->flags & NFS_MOUNT_FLAGMASK;
-
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
-
-	/* Start lockd here, before we might error out */
-	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_up();
-
-	server->namelen  = data->namlen;
-	server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
-	if (!server->hostname)
-		return -ENOMEM;
-	strcpy(server->hostname, data->hostname);
-
-	/* Fill in pseudoflavor for mount version < 5 */
-	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
-		data->pseudoflavor = RPC_AUTH_UNIX;
-	authflavor = data->pseudoflavor;	/* save for sb_init() */
-	/* XXX maybe we want to add a server->pseudoflavor field */
-
-	/* Create RPC client handles */
-	server->client = nfs_create_client(server, data);
-	if (IS_ERR(server->client))
-		return PTR_ERR(server->client);
-
-	/* RFC 2623, sec 2.3.2 */
 	if (server->flags & NFS_MOUNT_VER3) {
-#ifdef CONFIG_NFS_V3_ACL
-		if (!(server->flags & NFS_MOUNT_NOACL)) {
-			server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
-			/* No errors! Assume that Sun nfsacls are supported */
-			if (!IS_ERR(server->client_acl))
-				server->caps |= NFS_CAP_ACLS;
-		}
-#else
-		server->flags &= ~NFS_MOUNT_NOACL;
-#endif /* CONFIG_NFS_V3_ACL */
-		/*
-		 * The VFS shouldn't apply the umask to mode bits. We will
-		 * do so ourselves when necessary.
+		/* The VFS shouldn't apply the umask to mode bits. We will do
+		 * so ourselves when necessary.
 		 */
 		sb->s_flags |= MS_POSIXACL;
-		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
-			server->namelen = NFS3_MAXNAMLEN;
 		sb->s_time_gran = 1;
-	} else {
-		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
-			server->namelen = NFS2_MAXNAMLEN;
 	}
 
-	sb->s_op = &nfs_sops;
-	return nfs_sb_init(sb, authflavor);
+	sb->s_op = old_sb->s_op;
+ 	nfs_initialise_sb(sb);
 }
 
-static int nfs_set_super(struct super_block *s, void *data)
+static int nfs_set_super(struct super_block *s, void *_server)
 {
-	s->s_fs_info = data;
-	return set_anon_super(s, data);
+	struct nfs_server *server = _server;
+	int ret;
+
+	s->s_fs_info = server;
+	ret = set_anon_super(s, server);
+	if (ret == 0)
+		server->s_dev = s->s_dev;
+	return ret;
 }
 
 static int nfs_compare_super(struct super_block *sb, void *data)
 {
-	struct nfs_server *server = data;
-	struct nfs_server *old = NFS_SB(sb);
+	struct nfs_server *server = data, *old = NFS_SB(sb);
 
-	if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr)
+	if (old->nfs_client != server->nfs_client)
 		return 0;
-	if (old->addr.sin_port != server->addr.sin_port)
+	if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
 		return 0;
-	return !nfs_compare_fh(&old->fh, &server->fh);
+	return 1;
 }
 
 static int nfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	int error;
 	struct nfs_server *server = NULL;
 	struct super_block *s;
-	struct nfs_fh *root;
+	struct nfs_fh mntfh;
 	struct nfs_mount_data *data = raw_data;
+	struct dentry *mntroot;
+	int error;
 
-	error = -EINVAL;
-	if (data == NULL) {
-		dprintk("%s: missing data argument\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
-		dprintk("%s: bad mount version\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-	switch (data->version) {
-		case 1:
-			data->namlen = 0;
-		case 2:
-			data->bsize  = 0;
-		case 3:
-			if (data->flags & NFS_MOUNT_VER3) {
-				dprintk("%s: mount structure version %d does not support NFSv3\n",
-						__FUNCTION__,
-						data->version);
-				goto out_err_noserver;
-			}
-			data->root.size = NFS2_FHSIZE;
-			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
-		case 4:
-			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
-				dprintk("%s: mount structure version %d does not support strong security\n",
-						__FUNCTION__,
-						data->version);
-				goto out_err_noserver;
-			}
-		case 5:
-			memset(data->context, 0, sizeof(data->context));
-	}
-#ifndef CONFIG_NFS_V3
-	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
-	error = -EPROTONOSUPPORT;
-	if (data->flags & NFS_MOUNT_VER3) {
-		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-#endif /* CONFIG_NFS_V3 */
+	/* Validate the mount data */
+	error = nfs_validate_mount_data(data, &mntfh);
+	if (error < 0)
+		return error;
 
-	error = -ENOMEM;
-	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (!server)
+	/* Get a volume representation */
+	server = nfs_create_server(data, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
 		goto out_err_noserver;
-	/* Zero out the NFS state stuff */
-	init_nfsv4_state(server);
-	server->client = server->client_acl = ERR_PTR(-EINVAL);
-
-	root = &server->fh;
-	if (data->flags & NFS_MOUNT_VER3)
-		root->size = data->root.size;
-	else
-		root->size = NFS2_FHSIZE;
-	error = -EINVAL;
-	if (root->size > sizeof(root->data)) {
-		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
-		goto out_err;
-	}
-	memcpy(root->data, data->root.data, root->size);
-
-	/* We now require that the mount process passes the remote address */
-	memcpy(&server->addr, &data->addr, sizeof(server->addr));
-	if (server->addr.sin_addr.s_addr == INADDR_ANY) {
-		dprintk("%s: mount program didn't pass remote address!\n",
-				__FUNCTION__);
-		goto out_err;
 	}
 
+	/* Get a superblock - note that we may end up sharing one that already exists */
 	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
-		goto out_err;
+		goto out_err_nosb;
 	}
 
-	if (s->s_root)
-		goto out_share;
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
 
-	s->s_flags = flags;
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs_fill_super(s, data);
+	}
 
-	error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-	if (error) {
-		up_write(&s->s_umount);
-		deactivate_super(s);
-		return error;
+	mntroot = nfs_get_root(s, &mntfh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
 	}
-	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
 
-out_share:
-	kfree(server);
-	return simple_set_mnt(mnt, s);
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+	return 0;
 
-out_err:
-	kfree(server);
+out_err_nosb:
+	nfs_free_server(server);
 out_err_noserver:
 	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	return error;
 }
 
+/*
+ * Destroy an NFS2/3 superblock
+ */
 static void nfs_kill_super(struct super_block *s)
 {
 	struct nfs_server *server = NFS_SB(s);
 
 	kill_anon_super(s);
-
-	if (!IS_ERR(server->client))
-		rpc_shutdown_client(server->client);
-	if (!IS_ERR(server->client_acl))
-		rpc_shutdown_client(server->client_acl);
-
-	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_down();	/* release rpc.lockd */
-
-	nfs_free_iostats(server->io_stats);
-	kfree(server->hostname);
-	nfs_put_client(server->nfs_client);
-	kfree(server);
-	nfs_release_automount_timer();
+	nfs_free_server(server);
 }
 
-static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	struct super_block *sb;
-
-	server->fsid = data->fattr->fsid;
-	nfs_copy_fh(&server->fh, data->fh);
-	sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
-	if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM))
-		lockd_up();
-	return sb;
-}
-
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+/*
+ * Clone an NFS2/3 server record on xdev traversal (FSID-change)
+ */
+static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
+			   const char *dev_name, void *raw_data,
+			   struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt);
-}
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	int error;
 
-#ifdef CONFIG_NFS_V4
-static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
-	int timeo, int retrans, int proto, rpc_authflavor_t flavor)
-{
-	struct nfs_client *clp;
-	struct rpc_clnt *clnt = NULL;
-	int err = -EIO;
-
-	clp = nfs_get_client(server->hostname, &server->addr, 4);
-	if (!clp) {
-		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
-		return ERR_PTR(err);
-	}
+	dprintk("--> nfs_xdev_get_sb()\n");
 
-	/* Now create transport and client */
-	if (clp->cl_cons_state == NFS_CS_INITING) {
-		clp->rpc_ops = &nfs_v4_clientops;
+	/* create a new volume representation */
+	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
+	}
 
-		err = nfs_create_rpc_client(clp, proto, timeo, retrans, flavor);
-		if (err < 0)
-			goto client_init_error;
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
+	}
 
-		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
-		err = nfs_idmap_new(clp);
-		if (err < 0) {
-			dprintk("%s: failed to create idmapper.\n",
-				__FUNCTION__);
-			goto client_init_error;
-		}
-		__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
-		nfs_mark_client_ready(clp, 0);
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
 	}
 
-	clnt = rpc_clone_client(clp->cl_rpcclient);
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs_clone_super(s, data->sb);
+	}
 
-	if (IS_ERR(clnt)) {
-		dprintk("%s: cannot create RPC client. Error = %d\n",
-				__FUNCTION__, err);
-		return clnt;
+	mntroot = nfs_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
 	}
 
-	if (clnt->cl_auth->au_flavor != flavor) {
-		struct rpc_auth *auth;
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
 
-		auth = rpcauth_create(flavor, clnt);
-		if (IS_ERR(auth)) {
-			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
-			return (struct rpc_clnt *)auth;
-		}
-	}
+	dprintk("<-- nfs_xdev_get_sb() = 0\n");
+	return 0;
 
-	server->nfs_client = clp;
-	down_write(&clp->cl_sem);
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
-	up_write(&clp->cl_sem);
-	return clnt;
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
+	return error;
 
-client_init_error:
-	nfs_mark_client_ready(clp, err);
-	nfs_put_client(clp);
-	return ERR_PTR(err);
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
+#ifdef CONFIG_NFS_V4
+
 /*
- * Set up an NFS4 superblock
+ * Finish setting up a cloned NFS4 superblock
  */
-static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
+static void nfs4_clone_super(struct super_block *sb,
+			    const struct super_block *old_sb)
 {
-	struct nfs_server *server;
-	rpc_authflavor_t authflavour;
-	int err = -EIO;
-
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize = 0;
-	server = NFS_SB(sb);
-	if (data->rsize != 0)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize != 0)
-		server->wsize = nfs_block_size(data->wsize, NULL);
-	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
-	server->caps = NFS_CAP_ATOMIC_OPEN;
-
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
-
-	/* Now create transport and client */
-	authflavour = RPC_AUTH_UNIX;
-	if (data->auth_flavourlen != 0) {
-		if (data->auth_flavourlen != 1) {
-			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
-					__FUNCTION__, data->auth_flavourlen);
-			err = -EINVAL;
-			goto out_fail;
-		}
-		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
-			err = -EFAULT;
-			goto out_fail;
-		}
-	}
-
-	server->client = nfs4_create_client(server, data->timeo, data->retrans,
-					    data->proto, authflavour);
-	if (IS_ERR(server->client)) {
-		err = PTR_ERR(server->client);
-			dprintk("%s: cannot create RPC client. Error = %d\n",
-					__FUNCTION__, err);
-			goto out_fail;
-	}
-
+	sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+	sb->s_blocksize = old_sb->s_blocksize;
+	sb->s_maxbytes = old_sb->s_maxbytes;
 	sb->s_time_gran = 1;
-
-	sb->s_op = &nfs4_sops;
-	err = nfs_sb_init(sb, authflavour);
-
- out_fail:
-	return err;
+	sb->s_op = old_sb->s_op;
+ 	nfs_initialise_sb(sb);
 }
 
-static int nfs4_compare_super(struct super_block *sb, void *data)
+/*
+ * Set up an NFS4 superblock
+ */
+static void nfs4_fill_super(struct super_block *sb)
 {
-	struct nfs_server *server = data;
-	struct nfs_server *old = NFS_SB(sb);
-
-	if (strcmp(server->hostname, old->hostname) != 0)
-		return 0;
-	if (strcmp(server->mnt_path, old->mnt_path) != 0)
-		return 0;
-	return 1;
+	sb->s_time_gran = 1;
+	sb->s_op = &nfs4_sops;
+	nfs_initialise_sb(sb);
 }
 
-static void *
-nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
 {
 	void *p = NULL;
 
@@ -1157,14 +791,22 @@ nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
 	return dst;
 }
 
+/*
+ * Get the superblock for an NFS4 mountpoint
+ */
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	int error;
-	struct nfs_server *server;
-	struct super_block *s;
 	struct nfs4_mount_data *data = raw_data;
+	struct super_block *s;
+	struct nfs_server *server;
+	struct sockaddr_in addr;
+	rpc_authflavor_t authflavour;
+	struct nfs_fh mntfh;
+	struct dentry *mntroot;
+	char *mntpath = NULL, *hostname = NULL, ip_addr[16];
 	void *p;
+	int error;
 
 	if (data == NULL) {
 		dprintk("%s: missing data argument\n", __FUNCTION__);
@@ -1175,75 +817,107 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		return -EINVAL;
 	}
 
-	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (!server)
-		return -ENOMEM;
-	/* Zero out the NFS state stuff */
-	init_nfsv4_state(server);
-	server->client = server->client_acl = ERR_PTR(-EINVAL);
+	/* We now require that the mount process passes the remote address */
+	if (data->host_addrlen != sizeof(addr))
+		return -EINVAL;
+
+	if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
+		return -EFAULT;
+
+	if (addr.sin_family != AF_INET ||
+	    addr.sin_addr.s_addr == INADDR_ANY
+	    ) {
+		dprintk("%s: mount program didn't pass remote IP address!\n",
+				__FUNCTION__);
+		return -EINVAL;
+	}
+
+	/* Grab the authentication type */
+	authflavour = RPC_AUTH_UNIX;
+	if (data->auth_flavourlen != 0) {
+		if (data->auth_flavourlen != 1) {
+			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
+					__FUNCTION__, data->auth_flavourlen);
+			error = -EINVAL;
+			goto out_err_noserver;
+		}
+
+		if (copy_from_user(&authflavour, data->auth_flavours,
+				   sizeof(authflavour))) {
+			error = -EFAULT;
+			goto out_err_noserver;
+		}
+	}
 
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
 		goto out_err;
-	server->hostname = p;
+	hostname = p;
 
 	p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
 	if (IS_ERR(p))
 		goto out_err;
-	server->mnt_path = p;
+	mntpath = p;
 
-	p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
-			sizeof(server->ip_addr) - 1);
+	dprintk("MNTPATH: %s\n", mntpath);
+
+	p = nfs_copy_user_string(ip_addr, &data->client_addr,
+				 sizeof(ip_addr) - 1);
 	if (IS_ERR(p))
 		goto out_err;
 
-	/* We now require that the mount process passes the remote address */
-	if (data->host_addrlen != sizeof(server->addr)) {
-		error = -EINVAL;
-		goto out_free;
-	}
-	if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
-		error = -EFAULT;
-		goto out_free;
-	}
-	if (server->addr.sin_family != AF_INET ||
-	    server->addr.sin_addr.s_addr == INADDR_ANY) {
-		dprintk("%s: mount program didn't pass remote IP address!\n",
-				__FUNCTION__);
-		error = -EINVAL;
-		goto out_free;
+	/* Get a volume representation */
+	server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
+				    authflavour, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
 	}
 
-	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_free;
 	}
 
-	if (s->s_root) {
-		kfree(server->mnt_path);
-		kfree(server->hostname);
-		kfree(server);
-		return simple_set_mnt(mnt, s);
-	}
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
 
-	s->s_flags = flags;
+		nfs4_fill_super(s);
+	} else {
+		nfs_free_server(server);
+	}
 
-	error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-	if (error) {
-		up_write(&s->s_umount);
-		deactivate_super(s);
-		return error;
+	mntroot = nfs4_get_root(s, &mntfh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
 	}
+
 	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+	kfree(mntpath);
+	kfree(hostname);
+	return 0;
+
 out_err:
 	error = PTR_ERR(p);
+	goto out_err_noserver;
+
 out_free:
-	kfree(server->mnt_path);
-	kfree(server->hostname);
-	kfree(server);
+	nfs_free_server(server);
+out_err_noserver:
+	kfree(mntpath);
+	kfree(hostname);
 	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	goto out_err_noserver;
 }
 
 static void nfs4_kill_super(struct super_block *sb)
@@ -1254,133 +928,140 @@ static void nfs4_kill_super(struct super_block *sb)
 	kill_anon_super(sb);
 
 	nfs4_renewd_prepare_shutdown(server);
-
-	if (server->client != NULL && !IS_ERR(server->client))
-		rpc_shutdown_client(server->client);
-
-	destroy_nfsv4_state(server);
-
-	nfs_free_iostats(server->io_stats);
-	kfree(server->hostname);
-	kfree(server);
-	nfs_release_automount_timer();
+	nfs_free_server(server);
 }
 
 /*
- * Constructs the SERVER-side path
+ * Clone an NFS4 server record on xdev traversal (FSID-change)
  */
-static inline char *nfs4_dup_path(const struct dentry *dentry)
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
+			    const char *dev_name, void *raw_data,
+			    struct vfsmount *mnt)
 {
-	char *page = (char *) __get_free_page(GFP_USER);
-	char *path;
+	struct nfs_clone_mount *data = raw_data;
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	int error;
 
-	path = nfs4_path(dentry, page, PAGE_SIZE);
-	if (!IS_ERR(path)) {
-		int len = PAGE_SIZE + page - path;
-		char *tmp = path;
+	dprintk("--> nfs4_xdev_get_sb()\n");
 
-		path = kmalloc(len, GFP_KERNEL);
-		if (path)
-			memcpy(path, tmp, len);
-		else
-			path = ERR_PTR(-ENOMEM);
+	/* create a new volume representation */
+	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
 	}
-	free_page((unsigned long)page);
-	return path;
-}
 
-static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	const struct dentry *dentry = data->dentry;
-	struct nfs_client *clp = server->nfs_client;
-	struct super_block *sb;
-
-	server->fsid = data->fattr->fsid;
-	nfs_copy_fh(&server->fh, data->fh);
-	server->mnt_path = nfs4_dup_path(dentry);
-	if (IS_ERR(server->mnt_path)) {
-		sb = (struct super_block *)server->mnt_path;
-		goto err;
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
 	}
-	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
-	if (IS_ERR(sb) || sb->s_root)
-		goto free_path;
-	nfs4_server_capabilities(server, &server->fh);
-
-	down_write(&clp->cl_sem);
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
-	up_write(&clp->cl_sem);
-	return sb;
-free_path:
-	kfree(server->mnt_path);
-err:
-	server->mnt_path = NULL;
-	return sb;
-}
 
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
-{
-	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt);
-}
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
 
-static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	struct super_block *sb = ERR_PTR(-ENOMEM);
-	int len;
-
-	len = strlen(data->mnt_path) + 1;
-	server->mnt_path = kmalloc(len, GFP_KERNEL);
-	if (server->mnt_path == NULL)
-		goto err;
-	memcpy(server->mnt_path, data->mnt_path, len);
-	memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in));
-
-	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
-	if (IS_ERR(sb) || sb->s_root)
-		goto free_path;
-	return sb;
-free_path:
-	kfree(server->mnt_path);
-err:
-	server->mnt_path = NULL;
-	return sb;
-}
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_clone_super(s, data->sb);
+	}
 
-static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data)
-{
-	struct nfs_server *server = NFS_SB(sb);
-	int proto, timeo, retrans;
-	void *err;
-
-	proto = IPPROTO_TCP;
-	/* Since we are following a referral and there may be alternatives,
-	   set the timeouts and retries to low values */
-	timeo = 2;
-	retrans = 1;
-
-	nfs_put_client(server->nfs_client);
-	server->nfs_client = NULL;
-	server->client = nfs4_create_client(server, timeo, retrans, proto,
-					    data->authflavor);
-	if (IS_ERR((err = server->client)))
-		goto out_err;
+	mntroot = nfs4_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
 
-	sb->s_time_gran = 1;
-	sb->s_op = &nfs4_sops;
-	err = ERR_PTR(nfs_sb_init(sb, data->authflavor));
-	if (!IS_ERR(err))
-		return server;
-out_err:
-	return (struct nfs_server *)err;
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+
+	dprintk("<-- nfs4_xdev_get_sb() = 0\n");
+	return 0;
+
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
+	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+/*
+ * Create an NFS4 server record on referral traversal
+ */
+static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *raw_data,
+				struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt);
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	struct nfs_fh mntfh;
+	int error;
+
+	dprintk("--> nfs4_referral_get_sb()\n");
+
+	/* create a new volume representation */
+	server = nfs4_create_referral_server(data, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
+	}
+
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
+	}
+
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
+
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_fill_super(s);
+	}
+
+	mntroot = nfs4_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
+
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+
+	dprintk("<-- nfs4_referral_get_sb() = 0\n");
+	return 0;
+
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
+	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
-#endif
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7084ac9a64555..453d44666ea58 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1273,7 +1273,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		if (time_before(complain, jiffies)) {
 			dprintk("NFS: faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
-				NFS_SERVER(data->inode)->hostname,
+				NFS_SERVER(data->inode)->nfs_client->cl_hostname,
 				resp->verf->committed, argp->stable);
 			complain = jiffies + 300 * HZ;
 		}
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index d404ceca9168f..6d0be0efd1b52 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -51,7 +51,6 @@ struct nfs_client {
 	unsigned long		cl_lease_time;
 	unsigned long		cl_last_renewal;
 	struct work_struct	cl_renewd;
-	struct work_struct	cl_recoverd;
 
 	struct rpc_wait_queue	cl_rpcwaitq;
 
@@ -74,6 +73,10 @@ struct nfs_client {
  */
 struct nfs_server {
 	struct nfs_client *	nfs_client;	/* shared client and NFS4 state */
+	struct list_head	client_link;	/* List of other nfs_server structs
+						 * that share the same client
+						 */
+	struct list_head	master_link;	/* link in master servers list */
 	struct rpc_clnt *	client;		/* RPC client handle */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nfs_iostats *	io_stats;	/* I/O statistics */
@@ -92,20 +95,13 @@ struct nfs_server {
 	unsigned int		acdirmin;
 	unsigned int		acdirmax;
 	unsigned int		namelen;
-	char *			hostname;	/* remote hostname */
-	struct nfs_fh		fh;
-	struct sockaddr_in	addr;
+
 	struct nfs_fsid		fsid;
+	__u64			maxfilesize;	/* maximum file size */
 	unsigned long		mount_time;	/* when this fs was mounted */
+	dev_t			s_dev;		/* superblock dev numbers */
+
 #ifdef CONFIG_NFS_V4
-	/* Our own IP address, as a null-terminated string.
-	 * This is used to generate the clientid, and the callback address.
-	 */
-	char			ip_addr[16];
-	char *			mnt_path;
-	struct list_head	nfs4_siblings;	/* List of other nfs_server structs
-						 * that share the same clientid
-						 */
 	u32			attr_bitmask[2];/* V4 bitmask representing the set
 						   of attributes supported on this
 						   filesystem */
@@ -113,6 +109,7 @@ struct nfs_server {
 						   that are supported on this
 						   filesystem */
 #endif
+	void (*destroy)(struct nfs_server *);
 };
 
 /* Server capabilities */
-- 
GitLab


From 6aaca566503296a73f956908ec98173946134fe2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:13 -0400
Subject: [PATCH 0827/1063] NFS: Add server and volume lists to /proc

Make two new proc files available:

	/proc/fs/nfsfs/servers
	/proc/fs/nfsfs/volumes

The first lists the servers with which we are currently dealing (struct
nfs_client), and the second lists the volumes we have on those servers (struct
nfs_server).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c   | 284 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/inode.c    |   7 ++
 fs/nfs/internal.h |  12 ++
 3 files changed, 303 insertions(+)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index dafba608c0a05..27f64781444ae 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1148,3 +1148,287 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	dprintk("<-- nfs_clone_server() = error %d\n", error);
 	return ERR_PTR(error);
 }
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *proc_fs_nfs;
+
+static int nfs_server_list_open(struct inode *inode, struct file *file);
+static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_server_list_stop(struct seq_file *p, void *v);
+static int nfs_server_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_server_list_ops = {
+	.start	= nfs_server_list_start,
+	.next	= nfs_server_list_next,
+	.stop	= nfs_server_list_stop,
+	.show	= nfs_server_list_show,
+};
+
+static struct file_operations nfs_server_list_fops = {
+	.open		= nfs_server_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int nfs_volume_list_open(struct inode *inode, struct file *file);
+static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_volume_list_stop(struct seq_file *p, void *v);
+static int nfs_volume_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_volume_list_ops = {
+	.start	= nfs_volume_list_start,
+	.next	= nfs_volume_list_next,
+	.stop	= nfs_volume_list_stop,
+	.show	= nfs_volume_list_show,
+};
+
+static struct file_operations nfs_volume_list_fops = {
+	.open		= nfs_volume_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+/*
+ * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
+ * we're dealing
+ */
+static int nfs_server_list_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &nfs_server_list_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+}
+
+/*
+ * set up the iterator to start reading from the server list and return the first item
+ */
+static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	spin_lock(&nfs_client_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &nfs_client_list)
+		if (!pos--)
+			break;
+
+	return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * move to next server
+ */
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next;
+
+	return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_server_list_stop(struct seq_file *p, void *v)
+{
+	spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_server_list_show(struct seq_file *m, void *v)
+{
+	struct nfs_client *clp;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "NV SERVER   PORT USE HOSTNAME\n");
+		return 0;
+	}
+
+	/* display one transport per line on subsequent lines */
+	clp = list_entry(v, struct nfs_client, cl_share_link);
+
+	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
+		   clp->cl_nfsversion,
+		   NIPQUAD(clp->cl_addr.sin_addr),
+		   ntohs(clp->cl_addr.sin_port),
+		   atomic_read(&clp->cl_count),
+		   clp->cl_hostname);
+
+	return 0;
+}
+
+/*
+ * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
+ */
+static int nfs_volume_list_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &nfs_volume_list_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+}
+
+/*
+ * set up the iterator to start reading from the volume list and return the first item
+ */
+static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	spin_lock(&nfs_client_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &nfs_volume_list)
+		if (!pos--)
+			break;
+
+	return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * move to next volume
+ */
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next;
+
+	return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_volume_list_stop(struct seq_file *p, void *v)
+{
+	spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_volume_list_show(struct seq_file *m, void *v)
+{
+	struct nfs_server *server;
+	struct nfs_client *clp;
+	char dev[8], fsid[17];
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "NV SERVER   PORT DEV     FSID\n");
+		return 0;
+	}
+	/* display one transport per line on subsequent lines */
+	server = list_entry(v, struct nfs_server, master_link);
+	clp = server->nfs_client;
+
+	snprintf(dev, 8, "%u:%u",
+		 MAJOR(server->s_dev), MINOR(server->s_dev));
+
+	snprintf(fsid, 17, "%llx:%llx",
+		 server->fsid.major, server->fsid.minor);
+
+	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
+		   clp->cl_nfsversion,
+		   NIPQUAD(clp->cl_addr.sin_addr),
+		   ntohs(clp->cl_addr.sin_port),
+		   dev,
+		   fsid);
+
+	return 0;
+}
+
+/*
+ * initialise the /proc/fs/nfsfs/ directory
+ */
+int __init nfs_fs_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs);
+	if (!proc_fs_nfs)
+		goto error_0;
+
+	proc_fs_nfs->owner = THIS_MODULE;
+
+	/* a file of servers with which we're dealing */
+	p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs);
+	if (!p)
+		goto error_1;
+
+	p->proc_fops = &nfs_server_list_fops;
+	p->owner = THIS_MODULE;
+
+	/* a file of volumes that we have mounted */
+	p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs);
+	if (!p)
+		goto error_2;
+
+	p->proc_fops = &nfs_volume_list_fops;
+	p->owner = THIS_MODULE;
+	return 0;
+
+error_2:
+	remove_proc_entry("servers", proc_fs_nfs);
+error_1:
+	remove_proc_entry("nfsfs", proc_root_fs);
+error_0:
+	return -ENOMEM;
+}
+
+/*
+ * clean up the /proc/fs/nfsfs/ directory
+ */
+void nfs_fs_proc_exit(void)
+{
+	remove_proc_entry("volumes", proc_fs_nfs);
+	remove_proc_entry("servers", proc_fs_nfs);
+	remove_proc_entry("nfsfs", proc_root_fs);
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a547c58a83e6b..cb5c65f0bc12f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1141,6 +1141,10 @@ static int __init init_nfs_fs(void)
 {
 	int err;
 
+	err = nfs_fs_proc_init();
+	if (err)
+		goto out5;
+
 	err = nfs_init_nfspagecache();
 	if (err)
 		goto out4;
@@ -1181,6 +1185,8 @@ static int __init init_nfs_fs(void)
 out3:
 	nfs_destroy_nfspagecache();
 out4:
+	nfs_fs_proc_exit();
+out5:
 	return err;
 }
 
@@ -1195,6 +1201,7 @@ static void __exit exit_nfs_fs(void)
 	rpc_proc_unregister("nfs");
 #endif
 	unregister_nfs_fs();
+	nfs_fs_proc_exit();
 }
 
 /* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e73ba4f1052a7..bea0b016bd709 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -47,6 +47,18 @@ extern void nfs_free_server(struct nfs_server *server);
 extern struct nfs_server *nfs_clone_server(struct nfs_server *,
 					   struct nfs_fh *,
 					   struct nfs_fattr *);
+#ifdef CONFIG_PROC_FS
+extern int __init nfs_fs_proc_init(void);
+extern void nfs_fs_proc_exit(void);
+#else
+static inline int nfs_fs_proc_init(void)
+{
+	return 0;
+}
+static inline void nfs_fs_proc_exit(void)
+{
+}
+#endif
 
 /* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
-- 
GitLab


From 27ba851244f627a302d0fc6469d1ad413fc34fcb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sun, 30 Jul 2006 14:40:56 -0400
Subject: [PATCH 0828/1063] NFS: Fix error handling

Fix an error handling problem: nfs_put_client() can be given a NULL pointer if
nfs_free_server() is asked to destroy a partially initialised record.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 27f64781444ae..700bd58012239 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -208,6 +208,9 @@ static void nfs_free_client(struct nfs_client *clp)
  */
 void nfs_put_client(struct nfs_client *clp)
 {
+	if (!clp)
+		return;
+
 	dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
 
 	if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
-- 
GitLab


From 738a35195941ecf604d3070e2a053e1df3de350b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sun, 30 Jul 2006 14:58:27 -0400
Subject: [PATCH 0829/1063] NFS: Secure the roots of the NFS subtrees in a
 shared superblock

Invoke security_d_instantiate() on root dentries after allocating them with
dentry_alloc_anon().  Normally dentry_alloc_root() would do that, but we don't
call that as we don't want to assign a name to the root dentry at this point
(we may discover the real name later).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/getroot.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 977e59088eeb6..76b08ae9ed82f 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -33,6 +33,7 @@
 #include <linux/vfs.h>
 #include <linux/namei.h>
 #include <linux/namespace.h>
+#include <linux/security.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -109,6 +110,8 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
 		return ERR_PTR(-ENOMEM);
 	}
 
+	security_d_instantiate(mntroot, inode);
+
 	if (!mntroot->d_op)
 		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
 
@@ -296,6 +299,8 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
 		return ERR_PTR(-ENOMEM);
 	}
 
+	security_d_instantiate(mntroot, inode);
+
 	if (!mntroot->d_op)
 		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
 
-- 
GitLab


From 36b15c54cd0d6f707a3ac03e4a2a60bb530a95b9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:14 -0400
Subject: [PATCH 0830/1063] NFS: Ensure NFSv2/v3 mounts respect the
 NFS_MOUNT_SECFLAVOUR flag

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 867b5dcd3a40a..97cfb143e09fe 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -471,12 +471,14 @@ static int nfs_validate_mount_data(struct nfs_mount_data *data,
 						data->version);
 				return -EINVAL;
 			}
-			/* Fill in pseudoflavor for mount version < 5 */
-			data->pseudoflavor = RPC_AUTH_UNIX;
 		case 5:
 			memset(data->context, 0, sizeof(data->context));
 	}
 
+	/* Set the pseudoflavor */
+	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+		data->pseudoflavor = RPC_AUTH_UNIX;
+
 #ifndef CONFIG_NFS_V3
 	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
 	if (data->flags & NFS_MOUNT_VER3) {
-- 
GitLab


From 9c5bf38d85a31b946664bcc21078ef5bb10672f7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:14 -0400
Subject: [PATCH 0831/1063] NFS: Fix nfs_alloc_client()

The scheme to indicate which services have been started up appears to be
seriously broken.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 700bd58012239..471d975e63c38 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -113,9 +113,9 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	if (error < 0) {
 		dprintk("%s: couldn't start rpciod! Error = %d\n",
 				__FUNCTION__, error);
-		__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
 		goto error_1;
 	}
+	__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
 
 	if (nfsversion == 4) {
 		if (nfs_callback_up() < 0)
@@ -153,8 +153,8 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	return clp;
 
 error_3:
-	nfs_callback_down();
-	__clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+		nfs_callback_down();
 error_2:
 	rpciod_down();
 	__clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
@@ -195,7 +195,7 @@ static void nfs_free_client(struct nfs_client *clp)
 		nfs_callback_down();
 
 	if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
-	rpciod_down();
+		rpciod_down();
 
 	kfree(clp->cl_hostname);
 	kfree(clp);
@@ -881,9 +881,9 @@ static int nfs4_init_client(struct nfs_client *clp,
 	if (error < 0) {
 		dprintk("%s: failed to create idmapper. Error = %d\n",
 			__FUNCTION__, error);
-		__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
 		goto error;
 	}
+	__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
 
 	nfs_mark_client_ready(clp, NFS_CS_READY);
 	return 0;
-- 
GitLab


From ec739ef03dc926d05051c8c5838971445504470a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:15 -0400
Subject: [PATCH 0832/1063] SUNRPC: Create a helper to tell whether a transport
 is bound

Hide the contents and format of xprt->addr by eliminating direct uses
of the xprt->addr.sin_port field.  This change is required to support
alternate RPC host address formats (eg IPv6).

Test-plan:
Destructive testing (unplugging the network temporarily).  Repeated runs of
Connectathon locking suite with UDP and TCP.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h | 16 ++++++++++++++++
 net/sunrpc/clnt.c           | 10 +++++-----
 net/sunrpc/pmap_clnt.c      |  5 ++++-
 net/sunrpc/xprt.c           |  2 +-
 net/sunrpc/xprtsock.c       | 14 ++++++++++----
 5 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3a0cca255b76e..a71106723d719 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -269,6 +269,7 @@ int			xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to);
 #define XPRT_CONNECTED		(1)
 #define XPRT_CONNECTING		(2)
 #define XPRT_CLOSE_WAIT		(3)
+#define XPRT_BOUND		(4)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
@@ -312,6 +313,21 @@ static inline int xprt_test_and_set_connecting(struct rpc_xprt *xprt)
 	return test_and_set_bit(XPRT_CONNECTING, &xprt->state);
 }
 
+static inline void xprt_set_bound(struct rpc_xprt *xprt)
+{
+	test_and_set_bit(XPRT_BOUND, &xprt->state);
+}
+
+static inline int xprt_bound(struct rpc_xprt *xprt)
+{
+	return test_bit(XPRT_BOUND, &xprt->state);
+}
+
+static inline void xprt_clear_bound(struct rpc_xprt *xprt)
+{
+	clear_bit(XPRT_BOUND, &xprt->state);
+}
+
 #endif /* __KERNEL__*/
 
 #endif /* _LINUX_SUNRPC_XPRT_H */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 3e19d321067a9..0b8d03d085611 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -148,7 +148,6 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 	clnt->cl_maxproc  = version->nrprocs;
 	clnt->cl_protname = program->name;
 	clnt->cl_pmap	  = &clnt->cl_pmap_default;
-	clnt->cl_port     = xprt->addr.sin_port;
 	clnt->cl_prog     = program->number;
 	clnt->cl_vers     = version->number;
 	clnt->cl_prot     = xprt->prot;
@@ -156,7 +155,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 	clnt->cl_metrics  = rpc_alloc_iostats(clnt);
 	rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
 
-	if (!clnt->cl_port)
+	if (!xprt_bound(clnt->cl_xprt))
 		clnt->cl_autobind = 1;
 
 	clnt->cl_rtt = &clnt->cl_rtt_default;
@@ -570,7 +569,7 @@ EXPORT_SYMBOL(rpc_max_payload);
 void rpc_force_rebind(struct rpc_clnt *clnt)
 {
 	if (clnt->cl_autobind)
-		clnt->cl_port = 0;
+		xprt_clear_bound(clnt->cl_xprt);
 }
 EXPORT_SYMBOL(rpc_force_rebind);
 
@@ -782,14 +781,15 @@ static void
 call_bind(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_xprt *xprt = task->tk_xprt;
 
 	dprintk("RPC: %4d call_bind (status %d)\n",
 				task->tk_pid, task->tk_status);
 
 	task->tk_action = call_connect;
-	if (!clnt->cl_port) {
+	if (!xprt_bound(xprt)) {
 		task->tk_action = call_bind_status;
-		task->tk_timeout = task->tk_xprt->bind_timeout;
+		task->tk_timeout = xprt->bind_timeout;
 		rpc_getport(task, clnt);
 	}
 }
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 623180f224c97..209ffdfee10be 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -142,15 +142,17 @@ pmap_getport_done(struct rpc_task *task)
 	dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n",
 			task->tk_pid, task->tk_status, clnt->cl_port);
 
-	xprt->ops->set_port(xprt, 0);
 	if (task->tk_status < 0) {
 		/* Make the calling task exit with an error */
+		xprt->ops->set_port(xprt, 0);
 		task->tk_action = rpc_exit_task;
 	} else if (clnt->cl_port == 0) {
 		/* Program not registered */
+		xprt->ops->set_port(xprt, 0);
 		rpc_exit(task, -EACCES);
 	} else {
 		xprt->ops->set_port(xprt, clnt->cl_port);
+		xprt_set_bound(xprt);
 		clnt->cl_port = htons(clnt->cl_port);
 	}
 	spin_lock(&pmap_lock);
@@ -218,6 +220,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
 	if (IS_ERR(xprt))
 		return (struct rpc_clnt *)xprt;
 	xprt->ops->set_port(xprt, RPC_PMAP_PORT);
+	xprt_set_bound(xprt);
 	if (!privileged)
 		xprt->resvport = 0;
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e8c2bc4977f3a..e239ef985ef74 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -534,7 +534,7 @@ void xprt_connect(struct rpc_task *task)
 	dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid,
 			xprt, (xprt_connected(xprt) ? "is" : "is not"));
 
-	if (!xprt->addr.sin_port) {
+	if (!xprt_bound(xprt)) {
 		task->tk_status = -EIO;
 		return;
 	}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 441bd53f5eca8..123ac1e5ba15a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1016,7 +1016,7 @@ static void xs_udp_connect_worker(void *args)
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
 
-	if (xprt->shutdown || xprt->addr.sin_port == 0)
+	if (xprt->shutdown || !xprt_bound(xprt))
 		goto out;
 
 	dprintk("RPC:      xs_udp_connect_worker for xprt %p\n", xprt);
@@ -1099,7 +1099,7 @@ static void xs_tcp_connect_worker(void *args)
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
 
-	if (xprt->shutdown || xprt->addr.sin_port == 0)
+	if (xprt->shutdown || !xprt_bound(xprt))
 		goto out;
 
 	dprintk("RPC:      xs_tcp_connect_worker for xprt %p\n", xprt);
@@ -1307,8 +1307,11 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	if (xprt->slot == NULL)
 		return -ENOMEM;
 
-	xprt->prot = IPPROTO_UDP;
+	if (ntohs(xprt->addr.sin_port) != 0)
+		xprt_set_bound(xprt);
 	xprt->port = xs_get_random_port();
+
+	xprt->prot = IPPROTO_UDP;
 	xprt->tsh_size = 0;
 	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
@@ -1348,8 +1351,11 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	if (xprt->slot == NULL)
 		return -ENOMEM;
 
-	xprt->prot = IPPROTO_TCP;
+	if (ntohs(xprt->addr.sin_port) != 0)
+		xprt_set_bound(xprt);
 	xprt->port = xs_get_random_port();
+
+	xprt->prot = IPPROTO_TCP;
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
-- 
GitLab


From 4a68179d38874c37be2802442a71b847f5d1a2a9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:15 -0400
Subject: [PATCH 0833/1063] SUNRPC: Make RPC portmapper use per-transport
 storage

Move connection and bind state that was maintained in the rpc_clnt
structure to the rpc_xprt structure.  This will allow the creation of
a clean API for plugging in different types of bind mechanisms.

This brings improvements such as the elimination of a single spin lock to
control serialization for all in-kernel RPC binding.  A set of per-xprt
bitops is used to serialize tasks during RPC binding, just like it now
works for making RPC transport connections.

Test-plan:
Destructive testing (unplugging the network temporarily).  Connectathon
with UDP and TCP.  NFSv2/3 and NFSv4 mounting should be carefully checked.
Probably need to rig a server where certain services aren't running, or
that returns an error for some typical operation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |  23 +-----
 include/linux/sunrpc/xprt.h |  14 ++++
 net/sunrpc/clnt.c           |   8 +-
 net/sunrpc/pmap_clnt.c      | 158 ++++++++++++++++++++++++------------
 net/sunrpc/xprt.c           |   1 +
 5 files changed, 123 insertions(+), 81 deletions(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 8fe9f35eba31f..00e9dbaec9c5e 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -18,18 +18,6 @@
 #include <linux/sunrpc/timer.h>
 #include <asm/signal.h>
 
-/*
- * This defines an RPC port mapping
- */
-struct rpc_portmap {
-	__u32			pm_prog;
-	__u32			pm_vers;
-	__u32			pm_prot;
-	__u16			pm_port;
-	unsigned char		pm_binding : 1;	/* doing a getport() */
-	struct rpc_wait_queue	pm_bindwait;	/* waiting on getport() */
-};
-
 struct rpc_inode;
 
 /*
@@ -40,7 +28,9 @@ struct rpc_clnt {
 	atomic_t		cl_users;	/* number of references */
 	struct rpc_xprt *	cl_xprt;	/* transport */
 	struct rpc_procinfo *	cl_procinfo;	/* procedure info */
-	u32			cl_maxproc;	/* max procedure number */
+	u32			cl_prog,	/* RPC program number */
+				cl_vers,	/* RPC version number */
+				cl_maxproc;	/* max procedure number */
 
 	char *			cl_server;	/* server machine name */
 	char *			cl_protname;	/* protocol name */
@@ -55,7 +45,6 @@ struct rpc_clnt {
 				cl_dead     : 1;/* abandoned */
 
 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
-	struct rpc_portmap *	cl_pmap;	/* port mapping */
 
 	int			cl_nodelen;	/* nodename length */
 	char 			cl_nodename[UNX_MAXNODENAME];
@@ -64,14 +53,8 @@ struct rpc_clnt {
 	struct dentry *		cl_dentry;	/* inode */
 	struct rpc_clnt *	cl_parent;	/* Points to parent of clones */
 	struct rpc_rtt		cl_rtt_default;
-	struct rpc_portmap	cl_pmap_default;
 	char			cl_inline_name[32];
 };
-#define cl_timeout		cl_xprt->timeout
-#define cl_prog			cl_pmap->pm_prog
-#define cl_vers			cl_pmap->pm_vers
-#define cl_port			cl_pmap->pm_port
-#define cl_prot			cl_pmap->pm_prot
 
 /*
  * General RPC program info
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index a71106723d719..4ce82616873d8 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -138,6 +138,7 @@ struct rpc_xprt {
 	unsigned int		tsh_size;	/* size of transport specific
 						   header */
 
+	struct rpc_wait_queue	binding;	/* requests waiting on rpcbind */
 	struct rpc_wait_queue	sending;	/* requests waiting to send */
 	struct rpc_wait_queue	resend;		/* requests waiting to resend */
 	struct rpc_wait_queue	pending;	/* requests in flight */
@@ -270,6 +271,7 @@ int			xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to);
 #define XPRT_CONNECTING		(2)
 #define XPRT_CLOSE_WAIT		(3)
 #define XPRT_BOUND		(4)
+#define XPRT_BINDING		(5)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
@@ -328,6 +330,18 @@ static inline void xprt_clear_bound(struct rpc_xprt *xprt)
 	clear_bit(XPRT_BOUND, &xprt->state);
 }
 
+static inline void xprt_clear_binding(struct rpc_xprt *xprt)
+{
+	smp_mb__before_clear_bit();
+	clear_bit(XPRT_BINDING, &xprt->state);
+	smp_mb__after_clear_bit();
+}
+
+static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt)
+{
+	return test_and_set_bit(XPRT_BINDING, &xprt->state);
+}
+
 #endif /* __KERNEL__*/
 
 #endif /* _LINUX_SUNRPC_XPRT_H */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0b8d03d085611..cee504162a3fb 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -147,13 +147,10 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 	clnt->cl_procinfo = version->procs;
 	clnt->cl_maxproc  = version->nrprocs;
 	clnt->cl_protname = program->name;
-	clnt->cl_pmap	  = &clnt->cl_pmap_default;
 	clnt->cl_prog     = program->number;
 	clnt->cl_vers     = version->number;
-	clnt->cl_prot     = xprt->prot;
 	clnt->cl_stats    = program->stats;
 	clnt->cl_metrics  = rpc_alloc_iostats(clnt);
-	rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
 
 	if (!xprt_bound(clnt->cl_xprt))
 		clnt->cl_autobind = 1;
@@ -243,8 +240,6 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	atomic_set(&new->cl_users, 0);
 	new->cl_parent = clnt;
 	atomic_inc(&clnt->cl_count);
-	/* Duplicate portmapper */
-	rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
 	/* Turn off autobind on clones */
 	new->cl_autobind = 0;
 	new->cl_oneshot = 0;
@@ -254,8 +249,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
-	new->cl_pmap		= &new->cl_pmap_default;
-	new->cl_metrics         = rpc_alloc_iostats(clnt);
+	new->cl_metrics = rpc_alloc_iostats(clnt);
 	return new;
 out_no_clnt:
 	printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 209ffdfee10be..59d542436ca93 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -24,11 +24,57 @@
 #define PMAP_UNSET		2
 #define PMAP_GETPORT		3
 
+struct portmap_args {
+	u32			pm_prog;
+	u32			pm_vers;
+	u32			pm_prot;
+	unsigned short		pm_port;
+	struct rpc_task *	pm_task;
+};
+
 static struct rpc_procinfo	pmap_procedures[];
 static struct rpc_clnt *	pmap_create(char *, struct sockaddr_in *, int, int);
-static void			pmap_getport_done(struct rpc_task *);
+static void			pmap_getport_done(struct rpc_task *, void *);
 static struct rpc_program	pmap_program;
-static DEFINE_SPINLOCK(pmap_lock);
+
+static void pmap_getport_prepare(struct rpc_task *task, void *calldata)
+{
+	struct portmap_args *map = calldata;
+	struct rpc_message msg = {
+		.rpc_proc	= &pmap_procedures[PMAP_GETPORT],
+		.rpc_argp	= map,
+		.rpc_resp	= &map->pm_port,
+	};
+
+	rpc_call_setup(task, &msg, 0);
+}
+
+static inline struct portmap_args *pmap_map_alloc(void)
+{
+	return kmalloc(sizeof(struct portmap_args), GFP_NOFS);
+}
+
+static inline void pmap_map_free(struct portmap_args *map)
+{
+	kfree(map);
+}
+
+static void pmap_map_release(void *data)
+{
+	pmap_map_free(data);
+}
+
+static const struct rpc_call_ops pmap_getport_ops = {
+	.rpc_call_prepare	= pmap_getport_prepare,
+	.rpc_call_done		= pmap_getport_done,
+	.rpc_release		= pmap_map_release,
+};
+
+static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt)
+{
+	xprt_clear_binding(xprt);
+	rpc_wake_up(&xprt->binding);
+}
 
 /*
  * Obtain the port for a given RPC service on a given host. This one can
@@ -37,67 +83,71 @@ static DEFINE_SPINLOCK(pmap_lock);
 void
 rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
 {
-	struct rpc_portmap *map = clnt->cl_pmap;
-	struct sockaddr_in *sap = &clnt->cl_xprt->addr;
-	struct rpc_message msg = {
-		.rpc_proc	= &pmap_procedures[PMAP_GETPORT],
-		.rpc_argp	= map,
-		.rpc_resp	= &clnt->cl_port,
-		.rpc_cred	= NULL
-	};
+	struct rpc_xprt *xprt = task->tk_xprt;
+	struct sockaddr_in *sap = &xprt->addr;
+	struct portmap_args *map;
 	struct rpc_clnt	*pmap_clnt;
-	struct rpc_task	*child;
+	struct rpc_task *child;
 
-	dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n",
+	dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n",
 			task->tk_pid, clnt->cl_server,
-			map->pm_prog, map->pm_vers, map->pm_prot);
+			clnt->cl_prog, clnt->cl_vers, xprt->prot);
 
 	/* Autobind on cloned rpc clients is discouraged */
 	BUG_ON(clnt->cl_parent != clnt);
 
-	spin_lock(&pmap_lock);
-	if (map->pm_binding) {
-		rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL);
-		spin_unlock(&pmap_lock);
+	if (xprt_test_and_set_binding(xprt)) {
+		task->tk_status = -EACCES;	/* tell caller to check again */
+		rpc_sleep_on(&xprt->binding, task, NULL, NULL);
 		return;
 	}
-	map->pm_binding = 1;
-	spin_unlock(&pmap_lock);
+
+	/* Someone else may have bound if we slept */
+	if (xprt_bound(xprt)) {
+		task->tk_status = 0;
+		goto bailout_nofree;
+	}
+
+	map = pmap_map_alloc();
+	if (!map) {
+		task->tk_status = -ENOMEM;
+		goto bailout_nofree;
+	}
+	map->pm_prog = clnt->cl_prog;
+	map->pm_vers = clnt->cl_vers;
+	map->pm_prot = xprt->prot;
+	map->pm_port = 0;
+	map->pm_task = task;
 
 	pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0);
 	if (IS_ERR(pmap_clnt)) {
 		task->tk_status = PTR_ERR(pmap_clnt);
 		goto bailout;
 	}
-	task->tk_status = 0;
 
-	/*
-	 * Note: rpc_new_child will release client after a failure.
-	 */
-	if (!(child = rpc_new_child(pmap_clnt, task)))
+	child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
+	if (IS_ERR(child)) {
+		task->tk_status = -EIO;
 		goto bailout;
+	}
+	rpc_release_task(child);
 
-	/* Setup the call info struct */
-	rpc_call_setup(child, &msg, 0);
+	rpc_sleep_on(&xprt->binding, task, NULL, NULL);
 
-	/* ... and run the child task */
 	task->tk_xprt->stat.bind_count++;
-	rpc_run_child(task, child, pmap_getport_done);
 	return;
 
 bailout:
-	spin_lock(&pmap_lock);
-	map->pm_binding = 0;
-	rpc_wake_up(&map->pm_bindwait);
-	spin_unlock(&pmap_lock);
-	rpc_exit(task, -EIO);
+	pmap_map_free(map);
+bailout_nofree:
+	pmap_wake_portmap_waiters(xprt);
 }
 
 #ifdef CONFIG_ROOT_NFS
 int
 rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
 {
-	struct rpc_portmap map = {
+	struct portmap_args map = {
 		.pm_prog	= prog,
 		.pm_vers	= vers,
 		.pm_prot	= prot,
@@ -133,32 +183,32 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
 #endif
 
 static void
-pmap_getport_done(struct rpc_task *task)
+pmap_getport_done(struct rpc_task *child, void *data)
 {
-	struct rpc_clnt	*clnt = task->tk_client;
+	struct portmap_args *map = data;
+	struct rpc_task *task = map->pm_task;
 	struct rpc_xprt *xprt = task->tk_xprt;
-	struct rpc_portmap *map = clnt->cl_pmap;
-
-	dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n",
-			task->tk_pid, task->tk_status, clnt->cl_port);
+	int status = child->tk_status;
 
-	if (task->tk_status < 0) {
-		/* Make the calling task exit with an error */
+	if (status < 0) {
+		/* Portmapper not available */
 		xprt->ops->set_port(xprt, 0);
-		task->tk_action = rpc_exit_task;
-	} else if (clnt->cl_port == 0) {
-		/* Program not registered */
+		task->tk_status = status;
+	} else if (map->pm_port == 0) {
+		/* Requested RPC service wasn't registered */
 		xprt->ops->set_port(xprt, 0);
-		rpc_exit(task, -EACCES);
+		task->tk_status = -EACCES;
 	} else {
-		xprt->ops->set_port(xprt, clnt->cl_port);
+		/* Succeeded */
+		xprt->ops->set_port(xprt, map->pm_port);
 		xprt_set_bound(xprt);
-		clnt->cl_port = htons(clnt->cl_port);
+		task->tk_status = 0;
 	}
-	spin_lock(&pmap_lock);
-	map->pm_binding = 0;
-	rpc_wake_up(&map->pm_bindwait);
-	spin_unlock(&pmap_lock);
+
+	dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n",
+			child->tk_pid, child->tk_status, map->pm_port);
+
+	pmap_wake_portmap_waiters(xprt);
 }
 
 /*
@@ -172,7 +222,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 		.sin_family	= AF_INET,
 		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
 	};
-	struct rpc_portmap	map = {
+	struct portmap_args	map = {
 		.pm_prog	= prog,
 		.pm_vers	= vers,
 		.pm_prot	= prot,
@@ -239,7 +289,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
  * XDR encode/decode functions for PMAP
  */
 static int
-xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map)
+xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map)
 {
 	dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n",
 		map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e239ef985ef74..b45abd0743cb8 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -928,6 +928,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc
 	xprt->last_used = jiffies;
 	xprt->cwnd = RPC_INITCWND;
 
+	rpc_init_wait_queue(&xprt->binding, "xprt_binding");
 	rpc_init_wait_queue(&xprt->pending, "xprt_pending");
 	rpc_init_wait_queue(&xprt->sending, "xprt_sending");
 	rpc_init_wait_queue(&xprt->resend, "xprt_resend");
-- 
GitLab


From c4a5692fb83f23008c720fe84454d5603e80b103 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:16 -0400
Subject: [PATCH 0834/1063] SUNRPC: Clean-up after recent changes to
 sunrpc/pmap_clnt.c

Add comments for external functions, use modern function definition style,
and fix up dprintk formatting.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/pmap_clnt.c | 70 +++++++++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 28 deletions(-)

diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 59d542436ca93..0efcbf1302a20 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -1,7 +1,9 @@
 /*
- * linux/net/sunrpc/pmap.c
+ * linux/net/sunrpc/pmap_clnt.c
  *
- * Portmapper client.
+ * In-kernel RPC portmapper client.
+ *
+ * Portmapper supports version 2 of the rpcbind protocol (RFC 1833).
  *
  * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
  */
@@ -76,12 +78,15 @@ static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt)
 	rpc_wake_up(&xprt->binding);
 }
 
-/*
- * Obtain the port for a given RPC service on a given host. This one can
- * be called for an ongoing RPC request.
+/**
+ * rpc_getport - obtain the port for a given RPC service on a given host
+ * @task: task that is waiting for portmapper request
+ * @clnt: controlling rpc_clnt
+ *
+ * This one can be called for an ongoing RPC request, and can be used in
+ * an async (rpciod) context.
  */
-void
-rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
+void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
 {
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct sockaddr_in *sap = &xprt->addr;
@@ -144,8 +149,16 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
 }
 
 #ifdef CONFIG_ROOT_NFS
-int
-rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
+/**
+ * rpc_getport_external - obtain the port for a given RPC service on a given host
+ * @sin: address of remote peer
+ * @prog: RPC program number to bind
+ * @vers: RPC version number to bind
+ * @prot: transport protocol to use to make this request
+ *
+ * This one is called from outside the RPC client in a synchronous task context.
+ */
+int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
 {
 	struct portmap_args map = {
 		.pm_prog	= prog,
@@ -162,7 +175,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
 	char		hostname[32];
 	int		status;
 
-	dprintk("RPC:      rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n",
+	dprintk("RPC:      rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
 			NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
 
 	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
@@ -182,8 +195,10 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
 }
 #endif
 
-static void
-pmap_getport_done(struct rpc_task *child, void *data)
+/*
+ * Portmapper child task invokes this callback via tk_exit.
+ */
+static void pmap_getport_done(struct rpc_task *child, void *data)
 {
 	struct portmap_args *map = data;
 	struct rpc_task *task = map->pm_task;
@@ -211,12 +226,17 @@ pmap_getport_done(struct rpc_task *child, void *data)
 	pmap_wake_portmap_waiters(xprt);
 }
 
-/*
- * Set or unset a port registration with the local portmapper.
+/**
+ * rpc_register - set or unset a port registration with the local portmapper
+ * @prog: RPC program number to bind
+ * @vers: RPC version number to bind
+ * @prot: transport protocol to use to make this request
+ * @port: port value to register
+ * @okay: result code
+ *
  * port == 0 means unregister, port != 0 means register.
  */
-int
-rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
+int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 {
 	struct sockaddr_in	sin = {
 		.sin_family	= AF_INET,
@@ -236,7 +256,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 	struct rpc_clnt		*pmap_clnt;
 	int error = 0;
 
-	dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
+	dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n",
 			prog, vers, prot, port);
 
 	pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
@@ -259,13 +279,11 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 	return error;
 }
 
-static struct rpc_clnt *
-pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
+static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
 {
 	struct rpc_xprt	*xprt;
 	struct rpc_clnt	*clnt;
 
-	/* printk("pmap: create xprt\n"); */
 	xprt = xprt_create_proto(proto, srvaddr, NULL);
 	if (IS_ERR(xprt))
 		return (struct rpc_clnt *)xprt;
@@ -274,7 +292,6 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
 	if (!privileged)
 		xprt->resvport = 0;
 
-	/* printk("pmap: create clnt\n"); */
 	clnt = rpc_new_client(xprt, hostname,
 				&pmap_program, RPC_PMAP_VERSION,
 				RPC_AUTH_UNIX);
@@ -288,10 +305,9 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
 /*
  * XDR encode/decode functions for PMAP
  */
-static int
-xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map)
+static int xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map)
 {
-	dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n",
+	dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n",
 		map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port);
 	*p++ = htonl(map->pm_prog);
 	*p++ = htonl(map->pm_vers);
@@ -302,15 +318,13 @@ xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map)
 	return 0;
 }
 
-static int
-xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp)
+static int xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp)
 {
 	*portp = (unsigned short) ntohl(*p++);
 	return 0;
 }
 
-static int
-xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp)
+static int xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp)
 {
 	*boolp = (unsigned int) ntohl(*p++);
 	return 0;
-- 
GitLab


From 5b1eacbcd78930d976eb50a93f1779d311b553d1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:16 -0400
Subject: [PATCH 0835/1063] SUNRPC: Support for RPC child tasks no longer
 needed

The previous patches removed the last user of RPC child tasks, so we can
remove support for child tasks from net/sunrpc/sched.c now.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  5 ---
 net/sunrpc/sched.c           | 82 ------------------------------------
 2 files changed, 87 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 82a91bb223621..f399c138f79de 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -127,7 +127,6 @@ struct rpc_call_ops {
  */
 #define RPC_TASK_ASYNC		0x0001		/* is an async task */
 #define RPC_TASK_SWAPPER	0x0002		/* is swapping in/out */
-#define RPC_TASK_CHILD		0x0008		/* is child of other task */
 #define RPC_CALL_MAJORSEEN	0x0020		/* major timeout seen */
 #define RPC_TASK_ROOTCREDS	0x0040		/* force root creds */
 #define RPC_TASK_DYNAMIC	0x0080		/* task was kmalloc'ed */
@@ -136,7 +135,6 @@ struct rpc_call_ops {
 #define RPC_TASK_NOINTR		0x0400		/* uninterruptible task */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
-#define RPC_IS_CHILD(t)		((t)->tk_flags & RPC_TASK_CHILD)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
 #define RPC_DO_ROOTOVERRIDE(t)	((t)->tk_flags & RPC_TASK_ROOTCREDS)
 #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED)
@@ -253,7 +251,6 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags,
 				const struct rpc_call_ops *ops, void *data);
 struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
 				const struct rpc_call_ops *ops, void *data);
-struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent);
 void		rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
 				int flags, const struct rpc_call_ops *ops,
 				void *data);
@@ -261,8 +258,6 @@ void		rpc_release_task(struct rpc_task *);
 void		rpc_exit_task(struct rpc_task *);
 void		rpc_killall_tasks(struct rpc_clnt *);
 int		rpc_execute(struct rpc_task *);
-void		rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
-					rpc_action action);
 void		rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
 void		rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
 void		rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5c3eee7685045..015ffe423a2f9 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -44,12 +44,6 @@ static void			__rpc_default_timer(struct rpc_task *task);
 static void			rpciod_killall(void);
 static void			rpc_async_schedule(void *);
 
-/*
- * RPC tasks that create another task (e.g. for contacting the portmapper)
- * will wait on this queue for their child's completion
- */
-static RPC_WAITQ(childq, "childq");
-
 /*
  * RPC tasks sit here while waiting for conditions to improve.
  */
@@ -323,16 +317,6 @@ static void rpc_make_runnable(struct rpc_task *task)
 		wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
 }
 
-/*
- * Place a newly initialized task on the workqueue.
- */
-static inline void
-rpc_schedule_run(struct rpc_task *task)
-{
-	rpc_set_active(task);
-	rpc_make_runnable(task);
-}
-
 /*
  * Prepare for sleeping on a wait queue.
  * By always appending tasks to the list we ensure FIFO behavior.
@@ -933,72 +917,6 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
 }
 EXPORT_SYMBOL(rpc_run_task);
 
-/**
- * rpc_find_parent - find the parent of a child task.
- * @child: child task
- * @parent: parent task
- *
- * Checks that the parent task is still sleeping on the
- * queue 'childq'. If so returns a pointer to the parent.
- * Upon failure returns NULL.
- *
- * Caller must hold childq.lock
- */
-static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent)
-{
-	struct rpc_task	*task;
-	struct list_head *le;
-
-	task_for_each(task, le, &childq.tasks[0])
-		if (task == parent)
-			return parent;
-
-	return NULL;
-}
-
-static void rpc_child_exit(struct rpc_task *child, void *calldata)
-{
-	struct rpc_task	*parent;
-
-	spin_lock_bh(&childq.lock);
-	if ((parent = rpc_find_parent(child, calldata)) != NULL) {
-		parent->tk_status = child->tk_status;
-		__rpc_wake_up_task(parent);
-	}
-	spin_unlock_bh(&childq.lock);
-}
-
-static const struct rpc_call_ops rpc_child_ops = {
-	.rpc_call_done = rpc_child_exit,
-};
-
-/*
- * Note: rpc_new_task releases the client after a failure.
- */
-struct rpc_task *
-rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
-{
-	struct rpc_task	*task;
-
-	task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent);
-	if (!task)
-		goto fail;
-	return task;
-
-fail:
-	parent->tk_status = -ENOMEM;
-	return NULL;
-}
-
-void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
-{
-	spin_lock_bh(&childq.lock);
-	/* N.B. Is it possible for the child to have already finished? */
-	__rpc_sleep_on(&childq, task, func, NULL);
-	rpc_schedule_run(child);
-	spin_unlock_bh(&childq.lock);
-}
-
 /*
  * Kill all tasks for the given client.
  * XXX: kill their descendants as well?
-- 
GitLab


From bbf7c1dd2ae2b4040b41b1065ee9b1b6905b1605 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:16 -0400
Subject: [PATCH 0836/1063] SUNRPC: Introduce transport switch callout for
 pluggable rpcbind

Introduce a clean transport switch API for plugging in different types of
rpcbind mechanisms.  For instance, rpcbind can cleanly replace the
existing portmapper client, or a transport can choose to implement RPC
binding any way it likes.

Test plan:
Destructive testing (unplugging the network temporarily).  Connectathon
with UDP and TCP.  NFSv2/3 and NFSv4 mounting should be carefully checked.
Probably need to rig a server where certain services aren't running, or
that returns an error for some typical operation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 2 +-
 include/linux/sunrpc/xprt.h | 1 +
 net/sunrpc/clnt.c           | 3 +--
 net/sunrpc/pmap_clnt.c      | 4 ++--
 net/sunrpc/xprtsock.c       | 2 ++
 5 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 00e9dbaec9c5e..2e68ac0aa0225 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -106,7 +106,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 int		rpc_shutdown_client(struct rpc_clnt *);
 int		rpc_destroy_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
-void		rpc_getport(struct rpc_task *, struct rpc_clnt *);
+void		rpc_getport(struct rpc_task *);
 int		rpc_register(u32, u32, int, unsigned short, int *);
 
 void		rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 4ce82616873d8..84122559fa17d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -105,6 +105,7 @@ struct rpc_xprt_ops {
 	void		(*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
 	int		(*reserve_xprt)(struct rpc_task *task);
 	void		(*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
+	void		(*rpcbind)(struct rpc_task *task);
 	void		(*set_port)(struct rpc_xprt *xprt, unsigned short port);
 	void		(*connect)(struct rpc_task *task);
 	void *		(*buf_alloc)(struct rpc_task *task, size_t size);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cee504162a3fb..d003c2f5688f8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -774,7 +774,6 @@ call_encode(struct rpc_task *task)
 static void
 call_bind(struct rpc_task *task)
 {
-	struct rpc_clnt	*clnt = task->tk_client;
 	struct rpc_xprt *xprt = task->tk_xprt;
 
 	dprintk("RPC: %4d call_bind (status %d)\n",
@@ -784,7 +783,7 @@ call_bind(struct rpc_task *task)
 	if (!xprt_bound(xprt)) {
 		task->tk_action = call_bind_status;
 		task->tk_timeout = xprt->bind_timeout;
-		rpc_getport(task, clnt);
+		xprt->ops->rpcbind(task);
 	}
 }
 
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 0efcbf1302a20..f7b279a63baa8 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -81,13 +81,13 @@ static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt)
 /**
  * rpc_getport - obtain the port for a given RPC service on a given host
  * @task: task that is waiting for portmapper request
- * @clnt: controlling rpc_clnt
  *
  * This one can be called for an ongoing RPC request, and can be used in
  * an async (rpciod) context.
  */
-void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
+void rpc_getport(struct rpc_task *task)
 {
+	struct rpc_clnt *clnt = task->tk_client;
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct sockaddr_in *sap = &xprt->addr;
 	struct portmap_args *map;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 123ac1e5ba15a..4c98b89a5b48a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1262,6 +1262,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong,
+	.rpcbind		= rpc_getport,
 	.set_port		= xs_set_port,
 	.connect		= xs_connect,
 	.buf_alloc		= rpc_malloc,
@@ -1278,6 +1279,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
 static struct rpc_xprt_ops xs_tcp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xs_tcp_release_xprt,
+	.rpcbind		= rpc_getport,
 	.set_port		= xs_set_port,
 	.connect		= xs_connect,
 	.buf_alloc		= rpc_malloc,
-- 
GitLab


From ed39440a2573abc926f230267000f21fa5a87822 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:17 -0400
Subject: [PATCH 0837/1063] SUNRPC: create API for getting remote peer address

Provide an API for retrieving the remote peer address without allowing
direct access to the rpc_xprt struct.

Test-plan:
Compile kernel with CONFIG_NFS enabled.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |  1 +
 net/sunrpc/clnt.c           | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 2e68ac0aa0225..65196b03f0abe 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -123,6 +123,7 @@ void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
 size_t		rpc_max_payload(struct rpc_clnt *);
 void		rpc_force_rebind(struct rpc_clnt *);
 int		rpc_ping(struct rpc_clnt *clnt, int flags);
+size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 
 /*
  * Helper function for NFSroot support
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d003c2f5688f8..94768cf5fd5bb 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -533,6 +533,27 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
 		task->tk_action = rpc_exit_task;
 }
 
+/**
+ * rpc_peeraddr - extract remote peer address from clnt's xprt
+ * @clnt: RPC client structure
+ * @buf: target buffer
+ * @size: length of target buffer
+ *
+ * Returns the number of bytes that are actually in the stored address.
+ */
+size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize)
+{
+	size_t bytes;
+	struct rpc_xprt *xprt = clnt->cl_xprt;
+
+	bytes = sizeof(xprt->addr);
+	if (bytes > bufsize)
+		bytes = bufsize;
+	memcpy(buf, &clnt->cl_xprt->addr, bytes);
+	return sizeof(xprt->addr);
+}
+EXPORT_SYMBOL(rpc_peeraddr);
+
 void
 rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
 {
-- 
GitLab


From 44c31be261540acf66ddd730631ead8009cc361d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:17 -0400
Subject: [PATCH 0838/1063] LOCKD: Teach lockd to use the new rpc_peeraddr()
 API

Hide the details of how the RPC client stores remote peer addresses from
the Network Lock Manager.

Test plan:
Destructive testing (unplugging the network temporarily).  Connectathon
with UDP and TCP.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 89ba0df14c220..50dbb67ae0c4b 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -151,11 +151,13 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 int
 nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 {
+	struct rpc_clnt		*client = NFS_CLIENT(inode);
+	struct sockaddr_in	addr;
 	struct nlm_host		*host;
 	struct nlm_rqst		*call;
 	sigset_t		oldset;
 	unsigned long		flags;
-	int			status, proto, vers;
+	int			status, vers;
 
 	vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
 	if (NFS_PROTO(inode)->version > 3) {
@@ -163,10 +165,8 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 		return -ENOLCK;
 	}
 
-	/* Retrieve transport protocol from NFS client */
-	proto = NFS_CLIENT(inode)->cl_xprt->prot;
-
-	host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+	rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr));
+	host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers);
 	if (host == NULL)
 		return -ENOLCK;
 
-- 
GitLab


From 081f79a9b09b634f0dc08ed014e0195464d52535 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:17 -0400
Subject: [PATCH 0839/1063] SUNRPC: Teach the RPC portmapper to use the new
 rpc_peeraddr() API.

Hide the details of how the RPC client stores remote peer addresses from
the RPC portmapper.

Test plan:
Destructive testing (unplugging the network temporarily).  Connectathon
with UDP and TCP.  NFSv2/3 and NFSv4 mounting should be carefully checked.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/pmap_clnt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index f7b279a63baa8..3eee8e907275c 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -89,7 +89,7 @@ void rpc_getport(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
 	struct rpc_xprt *xprt = task->tk_xprt;
-	struct sockaddr_in *sap = &xprt->addr;
+	struct sockaddr_in addr;
 	struct portmap_args *map;
 	struct rpc_clnt	*pmap_clnt;
 	struct rpc_task *child;
@@ -124,7 +124,8 @@ void rpc_getport(struct rpc_task *task)
 	map->pm_port = 0;
 	map->pm_task = task;
 
-	pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0);
+	rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr));
+	pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0);
 	if (IS_ERR(pmap_clnt)) {
 		task->tk_status = PTR_ERR(pmap_clnt);
 		goto bailout;
-- 
GitLab


From 39d7bbcb5ba5e9d8d658b70903dd7939400e57db Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:18 -0400
Subject: [PATCH 0840/1063] SUNRPC: remove extraneous header inclusions

include/linux/sunrpc/clnt.h already includes include/linux/sunrpc/xprt.h.
We can remove xprt.h from source files that already include clnt.h.
Likewise include/linux/sunrpc/timer.h.

Test plan:
Compile kernel with CONFIG_NFS enabled.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/mount_clnt.c     | 1 -
 include/linux/nfs_xdr.h | 1 -
 net/sunrpc/pmap_clnt.c  | 1 -
 net/sunrpc/sched.c      | 1 -
 net/sunrpc/timer.c      | 2 --
 5 files changed, 6 deletions(-)

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 445abb4d42146..41274874b9a57 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -14,7 +14,6 @@
 #include <linux/net.h>
 #include <linux/in.h>
 #include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/nfs_fs.h>
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2426b11b6cce5..0f33e621892f5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_NFS_XDR_H
 #define _LINUX_NFS_XDR_H
 
-#include <linux/sunrpc/xprt.h>
 #include <linux/nfsacl.h>
 
 /*
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 3eee8e907275c..523f0e825dea9 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -15,7 +15,6 @@
 #include <linux/uio.h>
 #include <linux/in.h>
 #include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/sched.h>
 
 #ifdef RPC_DEBUG
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 015ffe423a2f9..ecf366351bf71 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -21,7 +21,6 @@
 #include <linux/mutex.h>
 
 #include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
 
 #ifdef RPC_DEBUG
 #define RPCDBG_FACILITY		RPCDBG_SCHED
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c
index bcbdf6430d5c0..8142fdb8a9306 100644
--- a/net/sunrpc/timer.c
+++ b/net/sunrpc/timer.c
@@ -19,8 +19,6 @@
 #include <linux/unistd.h>
 
 #include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
-#include <linux/sunrpc/timer.h>
 
 #define RPC_RTO_MAX (60*HZ)
 #define RPC_RTO_INIT (HZ/5)
-- 
GitLab


From edb267a688fcee5335d596752f117a30c7152e44 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:18 -0400
Subject: [PATCH 0841/1063] SUNRPC: add xprt switch API for printing formatted
 remote peer addresses

Add a new method to the transport switch API to provide a way to convert
the opaque contents of xprt->addr to a human-readable string.

Test plan:
Compile kernel with CONFIG_NFS enabled.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h | 11 ++++++
 net/sunrpc/xprtsock.c       | 79 +++++++++++++++++++++++++++++++++----
 2 files changed, 82 insertions(+), 8 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 84122559fa17d..8372ab8fc9b58 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -51,6 +51,14 @@ struct rpc_timeout {
 	unsigned char		to_exponential;
 };
 
+enum rpc_display_format_t {
+	RPC_DISPLAY_ADDR = 0,
+	RPC_DISPLAY_PORT,
+	RPC_DISPLAY_PROTO,
+	RPC_DISPLAY_ALL,
+	RPC_DISPLAY_MAX,
+};
+
 struct rpc_task;
 struct rpc_xprt;
 struct seq_file;
@@ -103,6 +111,7 @@ struct rpc_rqst {
 
 struct rpc_xprt_ops {
 	void		(*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
+	char *		(*print_addr)(struct rpc_xprt *xprt, enum rpc_display_format_t format);
 	int		(*reserve_xprt)(struct rpc_task *task);
 	void		(*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*rpcbind)(struct rpc_task *task);
@@ -207,6 +216,8 @@ struct rpc_xprt {
 	void			(*old_data_ready)(struct sock *, int);
 	void			(*old_state_change)(struct sock *);
 	void			(*old_write_space)(struct sock *);
+
+	char *			address_strings[RPC_DISPLAY_MAX];
 };
 
 #define XPRT_LAST_FRAG		(1 << 0)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4c98b89a5b48a..cb8e6c34e12f7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -125,6 +125,47 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count)
 }
 #endif
 
+static void xs_format_peer_addresses(struct rpc_xprt *xprt)
+{
+	struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
+	char *buf;
+
+	buf = kzalloc(20, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 20, "%u.%u.%u.%u",
+				NIPQUAD(addr->sin_addr.s_addr));
+	}
+	xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 8, "%u",
+				ntohs(addr->sin_port));
+	}
+	xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+
+	if (xprt->prot == IPPROTO_UDP)
+		xprt->address_strings[RPC_DISPLAY_PROTO] = "udp";
+	else
+		xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp";
+
+	buf = kzalloc(48, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s",
+			NIPQUAD(addr->sin_addr.s_addr),
+			ntohs(addr->sin_port),
+			xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+	}
+	xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+}
+
+static void xs_free_peer_addresses(struct rpc_xprt *xprt)
+{
+	kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
+	kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+	kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+}
+
 #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
 
 static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len)
@@ -490,6 +531,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
 
 	xprt_disconnect(xprt);
 	xs_close(xprt);
+	xs_free_peer_addresses(xprt);
 	kfree(xprt->slot);
 }
 
@@ -964,6 +1006,19 @@ static unsigned short xs_get_random_port(void)
 	return rand + xprt_min_resvport;
 }
 
+/**
+ * xs_print_peer_address - format an IPv4 address for printing
+ * @xprt: generic transport
+ * @format: flags field indicating which parts of the address to render
+ */
+static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format)
+{
+	if (xprt->address_strings[format] != NULL)
+		return xprt->address_strings[format];
+	else
+		return "unprintable";
+}
+
 /**
  * xs_set_port - reset the port number in the remote endpoint address
  * @xprt: generic transport
@@ -1019,8 +1074,6 @@ static void xs_udp_connect_worker(void *args)
 	if (xprt->shutdown || !xprt_bound(xprt))
 		goto out;
 
-	dprintk("RPC:      xs_udp_connect_worker for xprt %p\n", xprt);
-
 	/* Start by resetting any existing state */
 	xs_close(xprt);
 
@@ -1034,6 +1087,9 @@ static void xs_udp_connect_worker(void *args)
 		goto out;
 	}
 
+	dprintk("RPC:      worker connecting xprt %p to address: %s\n",
+			xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
 	if (!xprt->inet) {
 		struct sock *sk = sock->sk;
 
@@ -1102,8 +1158,6 @@ static void xs_tcp_connect_worker(void *args)
 	if (xprt->shutdown || !xprt_bound(xprt))
 		goto out;
 
-	dprintk("RPC:      xs_tcp_connect_worker for xprt %p\n", xprt);
-
 	if (!xprt->sock) {
 		/* start from scratch */
 		if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
@@ -1119,6 +1173,9 @@ static void xs_tcp_connect_worker(void *args)
 		/* "close" the socket, preserving the local port */
 		xs_tcp_reuse_connection(xprt);
 
+	dprintk("RPC:      worker connecting xprt %p to address: %s\n",
+			xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
 	if (!xprt->inet) {
 		struct sock *sk = sock->sk;
 
@@ -1260,6 +1317,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 
 static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
+	.print_addr		= xs_print_peer_address,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong,
 	.rpcbind		= rpc_getport,
@@ -1277,6 +1335,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
 };
 
 static struct rpc_xprt_ops xs_tcp_ops = {
+	.print_addr		= xs_print_peer_address,
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xs_tcp_release_xprt,
 	.rpcbind		= rpc_getport,
@@ -1301,8 +1360,6 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 {
 	size_t slot_table_size;
 
-	dprintk("RPC:      setting up udp-ipv4 transport...\n");
-
 	xprt->max_reqs = xprt_udp_slot_table_entries;
 	slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
 	xprt->slot = kzalloc(slot_table_size, GFP_KERNEL);
@@ -1332,6 +1389,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	else
 		xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
 
+	xs_format_peer_addresses(xprt);
+	dprintk("RPC:      set up transport to address %s\n",
+			xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
 	return 0;
 }
 
@@ -1345,8 +1406,6 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 {
 	size_t slot_table_size;
 
-	dprintk("RPC:      setting up tcp-ipv4 transport...\n");
-
 	xprt->max_reqs = xprt_tcp_slot_table_entries;
 	slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
 	xprt->slot = kzalloc(slot_table_size, GFP_KERNEL);
@@ -1375,5 +1434,9 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	else
 		xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
 
+	xs_format_peer_addresses(xprt);
+	dprintk("RPC:      set up transport to address %s\n",
+			xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
 	return 0;
 }
-- 
GitLab


From f425eba437f0051bde979ea2eef8bc875a77cd00 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:18 -0400
Subject: [PATCH 0842/1063] SUNRPC: Create API for displaying remote peer
 address

Provide an API for formatting the remote peer address for printing without
exposing its internal structure.  The address could be dynamic, so we
support a function call to get the address rather than reading it straight
out of a structure.

Test-plan:
Destructive testing (unplugging the network temporarily).  Probably need
to rig a server where certain services aren't running, or that returns an
error for some typical operation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |  1 +
 net/sunrpc/clnt.c           | 13 +++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 65196b03f0abe..b7d47f018353e 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -124,6 +124,7 @@ size_t		rpc_max_payload(struct rpc_clnt *);
 void		rpc_force_rebind(struct rpc_clnt *);
 int		rpc_ping(struct rpc_clnt *clnt, int flags);
 size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
+char *		rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 
 /*
  * Helper function for NFSroot support
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 94768cf5fd5bb..e5b19e348d88d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -554,6 +554,19 @@ size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize)
 }
 EXPORT_SYMBOL(rpc_peeraddr);
 
+/**
+ * rpc_peeraddr2str - return remote peer address in printable format
+ * @clnt: RPC client structure
+ * @format: address format
+ *
+ */
+char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
+{
+	struct rpc_xprt *xprt = clnt->cl_xprt;
+	return xprt->ops->print_addr(xprt, format);
+}
+EXPORT_SYMBOL(rpc_peeraddr2str);
+
 void
 rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
 {
-- 
GitLab


From e7f7865743fff3d3938ec7540e5a784d662426da Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:19 -0400
Subject: [PATCH 0843/1063] SUNRPC: Teach rpc_pipe.c to use new rpc_peeraddr()
 API

Hide the details of how the RPC client stores remote peer addresses from
the RPC pipefs implementation.

Test plan:
Connectathon with Kerberos 5 authentication.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpc_pipe.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0b1a1ac8a4bc8..c21dc07f2a8cc 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -327,10 +327,8 @@ rpc_show_info(struct seq_file *m, void *v)
 	seq_printf(m, "RPC server: %s\n", clnt->cl_server);
 	seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
 			clnt->cl_prog, clnt->cl_vers);
-	seq_printf(m, "address: %u.%u.%u.%u\n",
-			NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr));
-	seq_printf(m, "protocol: %s\n",
-			clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+	seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
+	seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
 	return 0;
 }
 
-- 
GitLab


From c4efcb1d3e0bc76aeb9ca6301d19a5079893c6c9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:19 -0400
Subject: [PATCH 0844/1063] SUNRPC: Use "sockaddr_storage" for storing RPC
 client's remote peer address

IPv6 addresses are big (128 bytes).  Now that no RPC client consumers treat
the addr field in rpc_xprt structs as an opaque, and access it only via the
API calls, we can safely widen the field in the rpc_xprt struct to
accomodate larger addresses.

Test plan:
Compile kernel with CONFIG_NFS enabled.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h |  3 ++-
 net/sunrpc/clnt.c           |  2 +-
 net/sunrpc/xprt.c           |  3 ++-
 net/sunrpc/xprtsock.c       | 15 ++++++++++-----
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 8372ab8fc9b58..fc05cfbd58056 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -134,7 +134,8 @@ struct rpc_xprt {
 	struct sock *		inet;		/* INET layer */
 
 	struct rpc_timeout	timeout;	/* timeout parms */
-	struct sockaddr_in	addr;		/* server address */
+	struct sockaddr_storage	addr;		/* server address */
+	size_t			addrlen;	/* size of server address */
 	int			prot;		/* IP protocol */
 
 	unsigned long		cong;		/* current congestion */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e5b19e348d88d..ff1e90fd81ab8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -550,7 +550,7 @@ size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize)
 	if (bytes > bufsize)
 		bytes = bufsize;
 	memcpy(buf, &clnt->cl_xprt->addr, bytes);
-	return sizeof(xprt->addr);
+	return xprt->addrlen;
 }
 EXPORT_SYMBOL(rpc_peeraddr);
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index b45abd0743cb8..4987517cc74bb 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -896,7 +896,8 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc
 	if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	xprt->addr = *ap;
+	memcpy(&xprt->addr, ap, sizeof(*ap));
+	xprt->addrlen = sizeof(*ap);
 
 	switch (proto) {
 	case IPPROTO_UDP:
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index cb8e6c34e12f7..17179aa4c2071 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -341,7 +341,7 @@ static int xs_udp_send_request(struct rpc_task *task)
 
 	req->rq_xtime = jiffies;
 	status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr,
-				sizeof(xprt->addr), xdr, req->rq_bytes_sent);
+				xprt->addrlen, xdr, req->rq_bytes_sent);
 
 	dprintk("RPC:      xs_udp_send_request(%u) = %d\n",
 			xdr->len - req->rq_bytes_sent, status);
@@ -1027,8 +1027,11 @@ static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_forma
  */
 static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
 {
+	struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr;
+
 	dprintk("RPC:      setting port for xprt %p to %u\n", xprt, port);
-	xprt->addr.sin_port = htons(port);
+
+	sap->sin_port = htons(port);
 }
 
 static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
@@ -1209,7 +1212,7 @@ static void xs_tcp_connect_worker(void *args)
 	xprt->stat.connect_count++;
 	xprt->stat.connect_start = jiffies;
 	status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
-			sizeof(xprt->addr), O_NONBLOCK);
+			xprt->addrlen, O_NONBLOCK);
 	dprintk("RPC: %p  connect status %d connected %d sock state %d\n",
 			xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
 	if (status < 0) {
@@ -1359,6 +1362,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 {
 	size_t slot_table_size;
+	struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
 
 	xprt->max_reqs = xprt_udp_slot_table_entries;
 	slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
@@ -1366,7 +1370,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	if (xprt->slot == NULL)
 		return -ENOMEM;
 
-	if (ntohs(xprt->addr.sin_port) != 0)
+	if (ntohs(addr->sin_port != 0))
 		xprt_set_bound(xprt);
 	xprt->port = xs_get_random_port();
 
@@ -1405,6 +1409,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 {
 	size_t slot_table_size;
+	struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
 
 	xprt->max_reqs = xprt_tcp_slot_table_entries;
 	slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
@@ -1412,7 +1417,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	if (xprt->slot == NULL)
 		return -ENOMEM;
 
-	if (ntohs(xprt->addr.sin_port) != 0)
+	if (ntohs(addr->sin_port) != 0)
 		xprt_set_bound(xprt);
 	xprt->port = xs_get_random_port();
 
-- 
GitLab


From 6ca948238724c945bd353f51d54ae7d285f3889f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:19 -0400
Subject: [PATCH 0845/1063] SUNRPC: Clean-up after previous patches.

Remove some unused macros related to accessing an RPC peer address

Test plan:
Compile kernel with CONFIG_NFS option enabled.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c             | 1 -
 include/linux/nfs_fs.h      | 1 -
 include/linux/sunrpc/clnt.h | 3 ---
 3 files changed, 5 deletions(-)

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 38b0e8a1aec09..a516a01561b8b 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -26,7 +26,6 @@
 #define NLM_HOST_REBIND		(60 * HZ)
 #define NLM_HOST_EXPIRE		((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
 #define NLM_HOST_COLLECT	((nrhosts > NLM_HOST_MAX)? 120 * HZ :  60 * HZ)
-#define NLM_HOST_ADDR(sv)	(&(sv)->s_nlmclnt->cl_xprt->addr)
 
 static struct nlm_host *	nlm_hosts[NLM_HOST_NRHASH];
 static unsigned long		next_gc;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 51e9bd90dedcc..3b5b04193feeb 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -216,7 +216,6 @@ static inline struct nfs_inode *NFS_I(struct inode *inode)
 #define NFS_SERVER(inode)		(NFS_SB(inode->i_sb))
 #define NFS_CLIENT(inode)		(NFS_SERVER(inode)->client)
 #define NFS_PROTO(inode)		(NFS_SERVER(inode)->nfs_client->rpc_ops)
-#define NFS_ADDR(inode)			(RPC_PEERADDR(NFS_CLIENT(inode)))
 #define NFS_COOKIEVERF(inode)		(NFS_I(inode)->cookieverf)
 #define NFS_READTIME(inode)		(NFS_I(inode)->read_cache_jiffies)
 #define NFS_CHANGE_ATTR(inode)		(NFS_I(inode)->change_attr)
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index b7d47f018353e..a26d69583c7a3 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -89,9 +89,6 @@ struct rpc_procinfo {
 	char *			p_name;		/* name of procedure */
 };
 
-#define RPC_CONGESTED(clnt)	(RPCXPRT_CONGESTED((clnt)->cl_xprt))
-#define RPC_PEERADDR(clnt)	(&(clnt)->cl_xprt->addr)
-
 #ifdef __KERNEL__
 
 struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
-- 
GitLab


From c2866763b4029411d166040306691773c12d4caf Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:20 -0400
Subject: [PATCH 0846/1063] SUNRPC: use sockaddr + size when creating remote
 transport endpoints

Prepare for more generic transport endpoint handling needed by transports
that might use different forms of addressing, such as IPv6.

Introduce a single function call to replace the two-call
xprt_create_proto/rpc_create_client API.  Define a new rpc_create_args
structure that allows callers to pass in remote endpoint addresses of
varying length.

Test-plan:
Compile kernel with CONFIG_NFS enabled.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 22 +++++++++++
 include/linux/sunrpc/xprt.h |  1 +
 net/sunrpc/clnt.c           | 61 ++++++++++++++++++++++++++++++
 net/sunrpc/xprt.c           | 75 +++++++++++++++++++++++++++++++++++++
 4 files changed, 159 insertions(+)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index a26d69583c7a3..7817ba82f1b2e 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -97,6 +97,28 @@ struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
 struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname,
 				struct rpc_program *info,
 				u32 version, rpc_authflavor_t authflavor);
+
+struct rpc_create_args {
+	int			protocol;
+	struct sockaddr		*address;
+	size_t			addrsize;
+	struct rpc_timeout	*timeout;
+	char			*servername;
+	struct rpc_program	*program;
+	u32			version;
+	rpc_authflavor_t	authflavor;
+	unsigned long		flags;
+};
+
+/* Values for "flags" field */
+#define RPC_CLNT_CREATE_HARDRTRY	(1UL << 0)
+#define RPC_CLNT_CREATE_INTR		(1UL << 1)
+#define RPC_CLNT_CREATE_AUTOBIND	(1UL << 2)
+#define RPC_CLNT_CREATE_ONESHOT		(1UL << 3)
+#define RPC_CLNT_CREATE_NONPRIVPORT	(1UL << 4)
+#define RPC_CLNT_CREATE_NOPING		(1UL << 5)
+
+struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
 				struct rpc_program *, int);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index fc05cfbd58056..bc80fcfdd892f 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -237,6 +237,7 @@ void			xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long
 /*
  * Generic internal transport functions
  */
+struct rpc_xprt *	xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms);
 void			xprt_connect(struct rpc_task *task);
 void			xprt_reserve(struct rpc_task *task);
 int			xprt_reserve_xprt(struct rpc_task *task);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ff1e90fd81ab8..dbb93bdf6cc98 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -192,6 +192,67 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 	return ERR_PTR(err);
 }
 
+/*
+ * rpc_create - create an RPC client and transport with one call
+ * @args: rpc_clnt create argument structure
+ *
+ * Creates and initializes an RPC transport and an RPC client.
+ *
+ * It can ping the server in order to determine if it is up, and to see if
+ * it supports this program and version.  RPC_CLNT_CREATE_NOPING disables
+ * this behavior so asynchronous tasks can also use rpc_create.
+ */
+struct rpc_clnt *rpc_create(struct rpc_create_args *args)
+{
+	struct rpc_xprt *xprt;
+	struct rpc_clnt *clnt;
+
+	xprt = xprt_create_transport(args->protocol, args->address,
+					args->addrsize, args->timeout);
+	if (IS_ERR(xprt))
+		return (struct rpc_clnt *)xprt;
+
+	/*
+	 * By default, kernel RPC client connects from a reserved port.
+	 * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
+	 * but it is always enabled for rpciod, which handles the connect
+	 * operation.
+	 */
+	xprt->resvport = 1;
+	if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
+		xprt->resvport = 0;
+
+	dprintk("RPC:       creating %s client for %s (xprt %p)\n",
+		args->program->name, args->servername, xprt);
+
+	clnt = rpc_new_client(xprt, args->servername, args->program,
+				args->version, args->authflavor);
+	if (IS_ERR(clnt))
+		return clnt;
+
+	if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
+		int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
+		if (err != 0) {
+			rpc_shutdown_client(clnt);
+			return ERR_PTR(err);
+		}
+	}
+
+	clnt->cl_softrtry = 1;
+	if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
+		clnt->cl_softrtry = 0;
+
+	if (args->flags & RPC_CLNT_CREATE_INTR)
+		clnt->cl_intr = 1;
+	if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
+		clnt->cl_autobind = 1;
+	if (args->flags & RPC_CLNT_CREATE_ONESHOT)
+		clnt->cl_oneshot = 1;
+
+	return clnt;
+}
+EXPORT_SYMBOL(rpc_create);
+
 /**
  * Create an RPC client
  * @xprt - pointer to xprt struct
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 4987517cc74bb..17f56cfe24127 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -887,6 +887,81 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
 	to->to_exponential = 0;
 }
 
+/**
+ * xprt_create_transport - create an RPC transport
+ * @proto: requested transport protocol
+ * @ap: remote peer address
+ * @size: length of address
+ * @to: timeout parameters
+ *
+ */
+struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to)
+{
+	int result;
+	struct rpc_xprt	*xprt;
+	struct rpc_rqst	*req;
+
+	if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) {
+		dprintk("RPC:      xprt_create_transport: no memory\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	if (size <= sizeof(xprt->addr)) {
+		memcpy(&xprt->addr, ap, size);
+		xprt->addrlen = size;
+	} else {
+		kfree(xprt);
+		dprintk("RPC:      xprt_create_transport: address too large\n");
+		return ERR_PTR(-EBADF);
+	}
+
+	switch (proto) {
+	case IPPROTO_UDP:
+		result = xs_setup_udp(xprt, to);
+		break;
+	case IPPROTO_TCP:
+		result = xs_setup_tcp(xprt, to);
+		break;
+	default:
+		printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
+				proto);
+		return ERR_PTR(-EIO);
+	}
+	if (result) {
+		kfree(xprt);
+		dprintk("RPC:      xprt_create_transport: failed, %d\n", result);
+		return ERR_PTR(result);
+	}
+
+	spin_lock_init(&xprt->transport_lock);
+	spin_lock_init(&xprt->reserve_lock);
+
+	INIT_LIST_HEAD(&xprt->free);
+	INIT_LIST_HEAD(&xprt->recv);
+	INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt);
+	init_timer(&xprt->timer);
+	xprt->timer.function = xprt_init_autodisconnect;
+	xprt->timer.data = (unsigned long) xprt;
+	xprt->last_used = jiffies;
+	xprt->cwnd = RPC_INITCWND;
+
+	rpc_init_wait_queue(&xprt->binding, "xprt_binding");
+	rpc_init_wait_queue(&xprt->pending, "xprt_pending");
+	rpc_init_wait_queue(&xprt->sending, "xprt_sending");
+	rpc_init_wait_queue(&xprt->resend, "xprt_resend");
+	rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
+
+	/* initialize free list */
+	for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--)
+		list_add(&req->rq_list, &xprt->free);
+
+	xprt_init_xid(xprt);
+
+	dprintk("RPC:      created transport %p with %u slots\n", xprt,
+			xprt->max_reqs);
+
+	return xprt;
+}
+
 static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
 {
 	int result;
-- 
GitLab


From e1ec78928b4d5a31b7a847e65c6009f4229f7c0f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:20 -0400
Subject: [PATCH 0847/1063] LOCKD: Convert to use new rpc_create() API

Replace xprt_create_proto/rpc_create_client with new rpc_create()
interface in the Network Lock Manager.

Note that the semantics of NLM transports is now "hard" instead of "soft"
to provide a better guarantee that lock requests will get to the server.

Test plan:
Repeated runs of Connectathon locking suite.  Check network trace to ensure
NLM requests are working correctly.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c | 50 ++++++++++++++++++++++++++-----------------------
 fs/lockd/mon.c  | 41 +++++++++++++++++-----------------------
 2 files changed, 44 insertions(+), 47 deletions(-)

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index a516a01561b8b..703fb038c813c 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -166,7 +166,6 @@ struct rpc_clnt *
 nlm_bind_host(struct nlm_host *host)
 {
 	struct rpc_clnt	*clnt;
-	struct rpc_xprt	*xprt;
 
 	dprintk("lockd: nlm_bind_host(%08x)\n",
 			(unsigned)ntohl(host->h_addr.sin_addr.s_addr));
@@ -178,7 +177,6 @@ nlm_bind_host(struct nlm_host *host)
 	 * RPC rebind is required
 	 */
 	if ((clnt = host->h_rpcclnt) != NULL) {
-		xprt = clnt->cl_xprt;
 		if (time_after_eq(jiffies, host->h_nextrebind)) {
 			rpc_force_rebind(clnt);
 			host->h_nextrebind = jiffies + NLM_HOST_REBIND;
@@ -186,31 +184,37 @@ nlm_bind_host(struct nlm_host *host)
 					host->h_nextrebind - jiffies);
 		}
 	} else {
-		xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL);
-		if (IS_ERR(xprt))
-			goto forgetit;
-
-		xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
-		xprt->resvport = 1;	/* NLM requires a reserved port */
-
-		/* Existing NLM servers accept AUTH_UNIX only */
-		clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
-					host->h_version, RPC_AUTH_UNIX);
-		if (IS_ERR(clnt))
-			goto forgetit;
-		clnt->cl_autobind = 1;	/* turn on pmap queries */
-		clnt->cl_softrtry = 1; /* All queries are soft */
-
-		host->h_rpcclnt = clnt;
+		unsigned long increment = nlmsvc_timeout * HZ;
+		struct rpc_timeout timeparms = {
+			.to_initval	= increment,
+			.to_increment	= increment,
+			.to_maxval	= increment * 6UL,
+			.to_retries	= 5U,
+		};
+		struct rpc_create_args args = {
+			.protocol	= host->h_proto,
+			.address	= (struct sockaddr *)&host->h_addr,
+			.addrsize	= sizeof(host->h_addr),
+			.timeout	= &timeparms,
+			.servername	= host->h_name,
+			.program	= &nlm_program,
+			.version	= host->h_version,
+			.authflavor	= RPC_AUTH_UNIX,
+			.flags		= (RPC_CLNT_CREATE_HARDRTRY |
+					   RPC_CLNT_CREATE_AUTOBIND),
+		};
+
+		clnt = rpc_create(&args);
+		if (!IS_ERR(clnt))
+			host->h_rpcclnt = clnt;
+		else {
+			printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
+			clnt = NULL;
+		}
 	}
 
 	mutex_unlock(&host->h_mutex);
 	return clnt;
-
-forgetit:
-	printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
-	mutex_unlock(&host->h_mutex);
-	return NULL;
 }
 
 /*
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3fc683f46b3e1..5954dcb497e4e 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -109,30 +109,23 @@ nsm_unmonitor(struct nlm_host *host)
 static struct rpc_clnt *
 nsm_create(void)
 {
-	struct rpc_xprt		*xprt;
-	struct rpc_clnt		*clnt;
-	struct sockaddr_in	sin;
-
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-	sin.sin_port = 0;
-
-	xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL);
-	if (IS_ERR(xprt))
-		return (struct rpc_clnt *)xprt;
-	xprt->resvport = 1;	/* NSM requires a reserved port */
-
-	clnt = rpc_create_client(xprt, "localhost",
-				&nsm_program, SM_VERSION,
-				RPC_AUTH_NULL);
-	if (IS_ERR(clnt))
-		goto out_err;
-	clnt->cl_softrtry = 1;
-	clnt->cl_oneshot  = 1;
-	return clnt;
-
-out_err:
-	return clnt;
+	struct sockaddr_in	sin = {
+		.sin_family	= AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+		.sin_port	= 0,
+	};
+	struct rpc_create_args args = {
+		.protocol	= IPPROTO_UDP,
+		.address	= (struct sockaddr *)&sin,
+		.addrsize	= sizeof(sin),
+		.servername	= "localhost",
+		.program	= &nsm_program,
+		.version	= SM_VERSION,
+		.authflavor	= RPC_AUTH_NULL,
+		.flags		= (RPC_CLNT_CREATE_ONESHOT),
+	};
+
+	return rpc_create(&args);
 }
 
 /*
-- 
GitLab


From 41877d207c46f050b709f452703ade20c3b4a096 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:20 -0400
Subject: [PATCH 0848/1063] NFS: Convert NFS client to use new rpc_create() API

Convert NFS client mount logic to use rpc_create() instead of the old
xprt_create_proto/rpc_create_client API.

Test plan:
Mount stress tests.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 471d975e63c38..12941a8a6d752 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -401,8 +401,17 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
 						rpc_authflavor_t flavor)
 {
 	struct rpc_timeout	timeparms;
-	struct rpc_xprt		*xprt = NULL;
 	struct rpc_clnt		*clnt = NULL;
+	struct rpc_create_args args = {
+		.protocol	= proto,
+		.address	= (struct sockaddr *)&clp->cl_addr,
+		.addrsize	= sizeof(clp->cl_addr),
+		.timeout	= &timeparms,
+		.servername	= clp->cl_hostname,
+		.program	= &nfs_program,
+		.version	= clp->rpc_ops->version,
+		.authflavor	= flavor,
+	};
 
 	if (!IS_ERR(clp->cl_rpcclient))
 		return 0;
@@ -411,27 +420,13 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
 	clp->retrans_timeo = timeparms.to_initval;
 	clp->retrans_count = timeparms.to_retries;
 
-	/* create transport and client */
-	xprt = xprt_create_proto(proto, &clp->cl_addr, &timeparms);
-	if (IS_ERR(xprt)) {
-		dprintk("%s: cannot create RPC transport. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		return PTR_ERR(xprt);
-	}
-
-	/* Bind to a reserved port! */
-	xprt->resvport = 1;
-	/* Create the client RPC handle */
-	clnt = rpc_create_client(xprt, clp->cl_hostname, &nfs_program,
-				 clp->rpc_ops->version, RPC_AUTH_UNIX);
+	clnt = rpc_create(&args);
 	if (IS_ERR(clnt)) {
 		dprintk("%s: cannot create RPC client. Error = %ld\n",
 				__FUNCTION__, PTR_ERR(clnt));
 		return PTR_ERR(clnt);
 	}
 
-	clnt->cl_intr     = 1;
-	clnt->cl_softrtry = 1;
 	clp->cl_rpcclient = clnt;
 	return 0;
 }
-- 
GitLab


From ae5c79476f36512d1100e162606bb5691f2cce5a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:21 -0400
Subject: [PATCH 0849/1063] NFSD: Convert NFS server callback logic to use new
 rpc_create API

Replace xprt_create_proto/rpc_create_client call in NFS server callback
functions to use new rpc_create() API.

Test plan:
NFSv4 delegation functionality tests.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/nfs4callback.c | 66 +++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 39 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 54b37b1d2e3a9..8583d99ee7407 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -375,16 +375,28 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 {
 	struct sockaddr_in	addr;
 	struct nfs4_callback    *cb = &clp->cl_callback;
-	struct rpc_timeout	timeparms;
-	struct rpc_xprt *	xprt;
+	struct rpc_timeout	timeparms = {
+		.to_initval	= (NFSD_LEASE_TIME/4) * HZ,
+		.to_retries	= 5,
+		.to_maxval	= (NFSD_LEASE_TIME/2) * HZ,
+		.to_exponential	= 1,
+	};
 	struct rpc_program *	program = &cb->cb_program;
-	struct rpc_stat *	stat = &cb->cb_stat;
-	struct rpc_clnt *	clnt;
+	struct rpc_create_args args = {
+		.protocol	= IPPROTO_TCP,
+		.address	= (struct sockaddr *)&addr,
+		.addrsize	= sizeof(addr),
+		.timeout	= &timeparms,
+		.servername	= clp->cl_name.data,
+		.program	= program,
+		.version	= nfs_cb_version[1]->number,
+		.authflavor	= RPC_AUTH_UNIX,	/* XXX: need AUTH_GSS... */
+		.flags		= (RPC_CLNT_CREATE_NOPING),
+	};
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
 	};
-	char                    hostname[32];
 	int status;
 
 	if (atomic_read(&cb->cb_set))
@@ -396,51 +408,27 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	addr.sin_port = htons(cb->cb_port);
 	addr.sin_addr.s_addr = htonl(cb->cb_addr);
 
-	/* Initialize timeout */
-	timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
-	timeparms.to_retries = 0;
-	timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
-	timeparms.to_exponential = 1;
-
-	/* Create RPC transport */
-	xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms);
-	if (IS_ERR(xprt)) {
-		dprintk("NFSD: couldn't create callback transport!\n");
-		goto out_err;
-	}
-
 	/* Initialize rpc_program */
 	program->name = "nfs4_cb";
 	program->number = cb->cb_prog;
 	program->nrvers = ARRAY_SIZE(nfs_cb_version);
 	program->version = nfs_cb_version;
-	program->stats = stat;
+	program->stats = &cb->cb_stat;
 
 	/* Initialize rpc_stat */
-	memset(stat, 0, sizeof(struct rpc_stat));
-	stat->program = program;
-
-	/* Create RPC client
- 	 *
-	 * XXX AUTH_UNIX only - need AUTH_GSS....
-	 */
-	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
-	clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
-	if (IS_ERR(clnt)) {
+	memset(program->stats, 0, sizeof(cb->cb_stat));
+	program->stats->program = program;
+
+	/* Create RPC client */
+	cb->cb_client = rpc_create(&args);
+	if (!cb->cb_client) {
 		dprintk("NFSD: couldn't create callback client\n");
 		goto out_err;
 	}
-	clnt->cl_intr = 0;
-	clnt->cl_softrtry = 1;
 
 	/* Kick rpciod, put the call on the wire. */
-
-	if (rpciod_up() != 0) {
-		dprintk("nfsd: couldn't start rpciod for callbacks!\n");
+	if (rpciod_up() != 0)
 		goto out_clnt;
-	}
-
-	cb->cb_client = clnt;
 
 	/* the task holds a reference to the nfs4_client struct */
 	atomic_inc(&clp->cl_count);
@@ -448,7 +436,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	msg.rpc_cred = nfsd4_lookupcred(clp,0);
 	if (IS_ERR(msg.rpc_cred))
 		goto out_rpciod;
-	status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
+	status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
 	put_rpccred(msg.rpc_cred);
 
 	if (status != 0) {
@@ -462,7 +450,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	rpciod_down();
 	cb->cb_client = NULL;
 out_clnt:
-	rpc_shutdown_client(clnt);
+	rpc_shutdown_client(cb->cb_client);
 out_err:
 	dprintk("NFSD: warning: no callback path to client %.*s\n",
 		(int)clp->cl_name.len, clp->cl_name.data);
-- 
GitLab


From 9e1968c58d72c4b85d8a69bda1e194f9701fb224 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:21 -0400
Subject: [PATCH 0850/1063] SUNRPC: Convert RPC portmapper to use new
 rpc_create() API

Replace xprt_create_proto/rpc_create_client calls in pmap_clnt.c with new
rpc_create() API.

Test plan:
Repeated runs of Connectathon locking suite.  Check network trace for
proper PMAP calls and replies.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/pmap_clnt.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 523f0e825dea9..f476f4df0f480 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -281,25 +281,22 @@ int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 
 static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
 {
-	struct rpc_xprt	*xprt;
-	struct rpc_clnt	*clnt;
-
-	xprt = xprt_create_proto(proto, srvaddr, NULL);
-	if (IS_ERR(xprt))
-		return (struct rpc_clnt *)xprt;
-	xprt->ops->set_port(xprt, RPC_PMAP_PORT);
-	xprt_set_bound(xprt);
+	struct rpc_create_args args = {
+		.protocol	= proto,
+		.address	= (struct sockaddr *)srvaddr,
+		.addrsize	= sizeof(*srvaddr),
+		.servername	= hostname,
+		.program	= &pmap_program,
+		.version	= RPC_PMAP_VERSION,
+		.authflavor	= RPC_AUTH_UNIX,
+		.flags		= (RPC_CLNT_CREATE_ONESHOT |
+				   RPC_CLNT_CREATE_NOPING),
+	};
+
+	srvaddr->sin_port = htons(RPC_PMAP_PORT);
 	if (!privileged)
-		xprt->resvport = 0;
-
-	clnt = rpc_new_client(xprt, hostname,
-				&pmap_program, RPC_PMAP_VERSION,
-				RPC_AUTH_UNIX);
-	if (!IS_ERR(clnt)) {
-		clnt->cl_softrtry = 1;
-		clnt->cl_oneshot  = 1;
-	}
-	return clnt;
+		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+	return rpc_create(&args);
 }
 
 /*
-- 
GitLab


From ff9aa5e56df60cc8565a93cc868fe25ae3f20e49 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:21 -0400
Subject: [PATCH 0851/1063] SUNRPC: Eliminate xprt_create_proto and
 rpc_create_client

The two function call API for creating a new RPC client is now obsolete.
Remove it.

Also, remove an unnecessary check to see whether the caller is capable of
using privileged network services.  The kernel RPC client always uses a
privileged ephemeral port by default; callers are responsible for checking
the authority of users to make use of any RPC service, or for specifying
that a nonprivileged port is acceptable.

Test plan:
Repeated runs of Connectathon locking suite.  Check network trace to ensure
correctness of NLM requests and replies.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |  7 ----
 include/linux/sunrpc/xprt.h |  1 -
 net/sunrpc/clnt.c           | 42 +-------------------
 net/sunrpc/sunrpc_syms.c    |  3 --
 net/sunrpc/xprt.c           | 79 -------------------------------------
 net/sunrpc/xprtsock.c       |  2 -
 6 files changed, 1 insertion(+), 133 deletions(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 7817ba82f1b2e..f6d1d646ce05c 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -91,13 +91,6 @@ struct rpc_procinfo {
 
 #ifdef __KERNEL__
 
-struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
-				struct rpc_program *info,
-				u32 version, rpc_authflavor_t authflavor);
-struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname,
-				struct rpc_program *info,
-				u32 version, rpc_authflavor_t authflavor);
-
 struct rpc_create_args {
 	int			protocol;
 	struct sockaddr		*address;
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index bc80fcfdd892f..de4efea7c856d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -231,7 +231,6 @@ struct rpc_xprt {
 /*
  * Transport operations used by ULPs
  */
-struct rpc_xprt *	xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *to);
 void			xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr);
 
 /*
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index dbb93bdf6cc98..428704dd5b3ea 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -97,17 +97,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
 	}
 }
 
-/*
- * Create an RPC client
- * FIXME: This should also take a flags argument (as in task->tk_flags).
- * It's called (among others) from pmap_create_client, which may in
- * turn be called by an async task. In this case, rpciod should not be
- * made to sleep too long.
- */
-struct rpc_clnt *
-rpc_new_client(struct rpc_xprt *xprt, char *servname,
-		  struct rpc_program *program, u32 vers,
-		  rpc_authflavor_t flavor)
+static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor)
 {
 	struct rpc_version	*version;
 	struct rpc_clnt		*clnt = NULL;
@@ -253,36 +243,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 }
 EXPORT_SYMBOL(rpc_create);
 
-/**
- * Create an RPC client
- * @xprt - pointer to xprt struct
- * @servname - name of server
- * @info - rpc_program
- * @version - rpc_program version
- * @authflavor - rpc_auth flavour to use
- *
- * Creates an RPC client structure, then pings the server in order to
- * determine if it is up, and if it supports this program and version.
- *
- * This function should never be called by asynchronous tasks such as
- * the portmapper.
- */
-struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
-		struct rpc_program *info, u32 version, rpc_authflavor_t authflavor)
-{
-	struct rpc_clnt *clnt;
-	int err;
-	
-	clnt = rpc_new_client(xprt, servname, info, version, authflavor);
-	if (IS_ERR(clnt))
-		return clnt;
-	err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
-	if (err == 0)
-		return clnt;
-	rpc_shutdown_client(clnt);
-	return ERR_PTR(err);
-}
-
 /*
  * This function clones the RPC client structure. It allows us to share the
  * same transport while varying parameters such as the authentication
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index f38f939ce95ff..26c0531d7e253 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -36,8 +36,6 @@ EXPORT_SYMBOL(rpc_wake_up_status);
 EXPORT_SYMBOL(rpc_release_task);
 
 /* RPC client functions */
-EXPORT_SYMBOL(rpc_create_client);
-EXPORT_SYMBOL(rpc_new_client);
 EXPORT_SYMBOL(rpc_clone_client);
 EXPORT_SYMBOL(rpc_bind_new_program);
 EXPORT_SYMBOL(rpc_destroy_client);
@@ -57,7 +55,6 @@ EXPORT_SYMBOL(rpc_queue_upcall);
 EXPORT_SYMBOL(rpc_mkpipe);
 
 /* Client transport */
-EXPORT_SYMBOL(xprt_create_proto);
 EXPORT_SYMBOL(xprt_set_timeout);
 
 /* Client credential cache */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 17f56cfe24127..e4f64fb58ff27 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -962,85 +962,6 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si
 	return xprt;
 }
 
-static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
-{
-	int result;
-	struct rpc_xprt	*xprt;
-	struct rpc_rqst	*req;
-
-	if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	memcpy(&xprt->addr, ap, sizeof(*ap));
-	xprt->addrlen = sizeof(*ap);
-
-	switch (proto) {
-	case IPPROTO_UDP:
-		result = xs_setup_udp(xprt, to);
-		break;
-	case IPPROTO_TCP:
-		result = xs_setup_tcp(xprt, to);
-		break;
-	default:
-		printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
-				proto);
-		result = -EIO;
-		break;
-	}
-	if (result) {
-		kfree(xprt);
-		return ERR_PTR(result);
-	}
-
-	spin_lock_init(&xprt->transport_lock);
-	spin_lock_init(&xprt->reserve_lock);
-
-	INIT_LIST_HEAD(&xprt->free);
-	INIT_LIST_HEAD(&xprt->recv);
-	INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt);
-	init_timer(&xprt->timer);
-	xprt->timer.function = xprt_init_autodisconnect;
-	xprt->timer.data = (unsigned long) xprt;
-	xprt->last_used = jiffies;
-	xprt->cwnd = RPC_INITCWND;
-
-	rpc_init_wait_queue(&xprt->binding, "xprt_binding");
-	rpc_init_wait_queue(&xprt->pending, "xprt_pending");
-	rpc_init_wait_queue(&xprt->sending, "xprt_sending");
-	rpc_init_wait_queue(&xprt->resend, "xprt_resend");
-	rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
-
-	/* initialize free list */
-	for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--)
-		list_add(&req->rq_list, &xprt->free);
-
-	xprt_init_xid(xprt);
-
-	dprintk("RPC:      created transport %p with %u slots\n", xprt,
-			xprt->max_reqs);
-	
-	return xprt;
-}
-
-/**
- * xprt_create_proto - create an RPC client transport
- * @proto: requested transport protocol
- * @sap: remote peer's address
- * @to: timeout parameters for new transport
- *
- */
-struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
-{
-	struct rpc_xprt	*xprt;
-
-	xprt = xprt_setup(proto, sap, to);
-	if (IS_ERR(xprt))
-		dprintk("RPC:      xprt_create_proto failed\n");
-	else
-		dprintk("RPC:      xprt_create_proto created xprt %p\n", xprt);
-	return xprt;
-}
-
 /**
  * xprt_destroy - destroy an RPC transport, killing off all requests.
  * @xprt: transport to destroy
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 17179aa4c2071..0b84fab68d7e9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1376,7 +1376,6 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 
 	xprt->prot = IPPROTO_UDP;
 	xprt->tsh_size = 0;
-	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
@@ -1423,7 +1422,6 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 
 	xprt->prot = IPPROTO_TCP;
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
-	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
 	INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
-- 
GitLab


From b86acd501a34227e0ed2b2d54dc8002c1701ce17 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:22 -0400
Subject: [PATCH 0852/1063] SUNRPC: export new RPC client functions with _GPL

This patch is optional.

It has been suggested that the RPC client internal functions used by upper
layer protocols (such as NFS) be exported via EXPORT_SYMBOL_GPL.  This
patch does that.

Test plan:
Compile kernel with CONFIG_NFS enabled as a module.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 428704dd5b3ea..87efcd207f233 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -241,7 +241,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 
 	return clnt;
 }
-EXPORT_SYMBOL(rpc_create);
+EXPORT_SYMBOL_GPL(rpc_create);
 
 /*
  * This function clones the RPC client structure. It allows us to share the
@@ -573,7 +573,7 @@ size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize)
 	memcpy(buf, &clnt->cl_xprt->addr, bytes);
 	return xprt->addrlen;
 }
-EXPORT_SYMBOL(rpc_peeraddr);
+EXPORT_SYMBOL_GPL(rpc_peeraddr);
 
 /**
  * rpc_peeraddr2str - return remote peer address in printable format
@@ -586,7 +586,7 @@ char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
 	struct rpc_xprt *xprt = clnt->cl_xprt;
 	return xprt->ops->print_addr(xprt, format);
 }
-EXPORT_SYMBOL(rpc_peeraddr2str);
+EXPORT_SYMBOL_GPL(rpc_peeraddr2str);
 
 void
 rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
@@ -608,7 +608,7 @@ size_t rpc_max_payload(struct rpc_clnt *clnt)
 {
 	return clnt->cl_xprt->max_payload;
 }
-EXPORT_SYMBOL(rpc_max_payload);
+EXPORT_SYMBOL_GPL(rpc_max_payload);
 
 /**
  * rpc_force_rebind - force transport to check that remote port is unchanged
@@ -620,7 +620,7 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
 	if (clnt->cl_autobind)
 		xprt_clear_bound(clnt->cl_xprt);
 }
-EXPORT_SYMBOL(rpc_force_rebind);
+EXPORT_SYMBOL_GPL(rpc_force_rebind);
 
 /*
  * Restart an (async) RPC call. Usually called from within the
-- 
GitLab


From d3db90e270791b21cd00d3c094884bffa907cc9e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:22 -0400
Subject: [PATCH 0853/1063] NFS: remove a no-longer-needed error check in
 nfs_symlink()

In the early days of NFS, there was no duplicate reply cache on the server.
Thus retransmitted non-idempotent requests often found that the request had
already completed on the server.  To avoid passing an unanticipated return
code to unsuspecting applications, NFS clients would often shunt error
codes that implied the request had been retried but already completed.

Thanks to NFS over TCP, duplicate reply caches on the server, and network
performance and reliability improvements, it is safe to remove such checks.

Test plan:
None.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9b496ef4abeae..084e8cb41c84f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1476,14 +1476,10 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
 	error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname,
 					  &attr, &sym_fh, &sym_attr);
 	nfs_end_data_update(dir);
-	if (!error) {
+	if (!error)
 		error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
-	} else {
-		if (error == -EEXIST)
-			printk("nfs_proc_symlink: %s/%s already exists??\n",
-			       dentry->d_parent->d_name.name, dentry->d_name.name);
+	else
 		d_drop(dentry);
-	}
 	unlock_kernel();
 	return error;
 }
-- 
GitLab


From 4f390c152bc87165da4b1f5b7d870b46fb106d4e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:22 -0400
Subject: [PATCH 0854/1063] NFS: Fix double d_drop in nfs_instantiate() error
 path

If the LOOKUP or GETATTR in nfs_instantiate fail, nfs_instantiate will do a
d_drop before returning.  But some callers already do a d_drop in the case
of an error return.  Make certain we do only one d_drop in all error paths.

This issue was introduced because over time, the symlink proc API diverged
slightly from the create/mkdir/mknod proc API.  To prevent other coding
mistakes of this type, change the symlink proc API to be more like
create/mkdir/mknod and move the nfs_instantiate call into the symlink proc
routines so it is used in exactly the same way for create, mkdir, mknod,
and symlink.

Test plan:
Connectathon, all versions of NFS.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            | 16 ++++------------
 fs/nfs/nfs3proc.c       | 26 ++++++++++++++++----------
 fs/nfs/nfs4proc.c       | 31 ++++++++++++++++---------------
 fs/nfs/proc.c           | 29 +++++++++++++++++++++--------
 include/linux/nfs_xdr.h |  5 ++---
 5 files changed, 59 insertions(+), 48 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 084e8cb41c84f..affd3ae52e551 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1147,23 +1147,20 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 		struct inode *dir = dentry->d_parent->d_inode;
 		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
 		if (error)
-			goto out_err;
+			return error;
 	}
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
 		struct nfs_server *server = NFS_SB(dentry->d_sb);
 		error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
 		if (error < 0)
-			goto out_err;
+			return error;
 	}
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
 	error = PTR_ERR(inode);
 	if (IS_ERR(inode))
-		goto out_err;
+		return error;
 	d_instantiate(dentry, inode);
 	return 0;
-out_err:
-	d_drop(dentry);
-	return error;
 }
 
 /*
@@ -1448,8 +1445,6 @@ static int
 nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 {
 	struct iattr attr;
-	struct nfs_fattr sym_attr;
-	struct nfs_fh sym_fh;
 	struct qstr qsymname;
 	int error;
 
@@ -1473,12 +1468,9 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
 
 	lock_kernel();
 	nfs_begin_data_update(dir);
-	error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname,
-					  &attr, &sym_fh, &sym_attr);
+	error = NFS_PROTO(dir)->symlink(dir, dentry, &qsymname, &attr);
 	nfs_end_data_update(dir);
 	if (!error)
-		error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
-	else
 		d_drop(dentry);
 	unlock_kernel();
 	return error;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 9e8258ece6fd8..d85ac427c3263 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -544,23 +544,23 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 }
 
 static int
-nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
-		  struct iattr *sattr, struct nfs_fh *fhandle,
-		  struct nfs_fattr *fattr)
+nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
+		  struct iattr *sattr)
 {
-	struct nfs_fattr	dir_attr;
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr, dir_attr;
 	struct nfs3_symlinkargs	arg = {
 		.fromfh		= NFS_FH(dir),
-		.fromname	= name->name,
-		.fromlen	= name->len,
+		.fromname	= dentry->d_name.name,
+		.fromlen	= dentry->d_name.len,
 		.topath		= path->name,
 		.tolen		= path->len,
 		.sattr		= sattr
 	};
 	struct nfs3_diropres	res = {
 		.dir_attr	= &dir_attr,
-		.fh		= fhandle,
-		.fattr		= fattr
+		.fh		= &fhandle,
+		.fattr		= &fattr
 	};
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_SYMLINK],
@@ -571,11 +571,17 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 
 	if (path->len > NFS3_MAXPATHLEN)
 		return -ENAMETOOLONG;
-	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
+
+	dprintk("NFS call  symlink %s -> %s\n", dentry->d_name.name,
+			path->name);
 	nfs_fattr_init(&dir_attr);
-	nfs_fattr_init(fattr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
+	if (status != 0)
+		goto out;
+	status = nfs_instantiate(dentry, &fhandle, &fattr);
+out:
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a825547e8214f..2d18eac6bee51 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2084,24 +2084,24 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n
 	return err;
 }
 
-static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
-		struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr)
+static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
+		struct qstr *path, struct iattr *sattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
-	struct nfs_fattr dir_fattr;
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr, dir_fattr;
 	struct nfs4_create_arg arg = {
 		.dir_fh = NFS_FH(dir),
 		.server = server,
-		.name = name,
+		.name = &dentry->d_name,
 		.attrs = sattr,
 		.ftype = NF4LNK,
 		.bitmask = server->attr_bitmask,
 	};
 	struct nfs4_create_res res = {
 		.server = server,
-		.fh = fhandle,
-		.fattr = fattr,
+		.fh = &fhandle,
+		.fattr = &fattr,
 		.dir_fattr = &dir_fattr,
 	};
 	struct rpc_message msg = {
@@ -2113,27 +2113,28 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
 
 	if (path->len > NFS4_MAXPATHLEN)
 		return -ENAMETOOLONG;
+
 	arg.u.symlink = path;
-	nfs_fattr_init(fattr);
+	nfs_fattr_init(&fattr);
 	nfs_fattr_init(&dir_fattr);
 	
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-	if (!status)
+	if (!status) {
 		update_changeattr(dir, &res.dir_cinfo);
-	nfs_post_op_update_inode(dir, res.dir_fattr);
+		nfs_post_op_update_inode(dir, res.dir_fattr);
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	}
 	return status;
 }
 
-static int nfs4_proc_symlink(struct inode *dir, struct qstr *name,
-		struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr)
+static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
+		struct qstr *path, struct iattr *sattr)
 {
 	struct nfs4_exception exception = { };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_symlink(dir, name, path, sattr,
-					fhandle, fattr),
+				_nfs4_proc_symlink(dir, dentry, path, sattr),
 				&exception);
 	} while (exception.retry);
 	return err;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 5a8b9407ee9a8..0b507bf0f330a 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -425,14 +425,15 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 }
 
 static int
-nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
-		 struct iattr *sattr, struct nfs_fh *fhandle,
-		 struct nfs_fattr *fattr)
+nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
+		 struct iattr *sattr)
 {
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr;
 	struct nfs_symlinkargs	arg = {
 		.fromfh		= NFS_FH(dir),
-		.fromname	= name->name,
-		.fromlen	= name->len,
+		.fromname	= dentry->d_name.name,
+		.fromlen	= dentry->d_name.len,
 		.topath		= path->name,
 		.tolen		= path->len,
 		.sattr		= sattr
@@ -445,11 +446,23 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 
 	if (path->len > NFS2_MAXPATHLEN)
 		return -ENAMETOOLONG;
-	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
-	nfs_fattr_init(fattr);
-	fhandle->size = 0;
+
+	dprintk("NFS call  symlink %s -> %s\n", dentry->d_name.name,
+			path->name);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
+
+	/*
+	 * V2 SYMLINK requests don't return any attributes.  Setting the
+	 * filehandle size to zero indicates to nfs_instantiate that it
+	 * should fill in the data with a LOOKUP call on the wire.
+	 */
+	if (status == 0) {
+		nfs_fattr_init(&fattr);
+		fhandle.size = 0;
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	}
+
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0f33e621892f5..ddf5d75e97a22 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -793,9 +793,8 @@ struct nfs_rpc_ops {
 	int	(*rename)  (struct inode *, struct qstr *,
 			    struct inode *, struct qstr *);
 	int	(*link)    (struct inode *, struct inode *, struct qstr *);
-	int	(*symlink) (struct inode *, struct qstr *, struct qstr *,
-			    struct iattr *, struct nfs_fh *,
-			    struct nfs_fattr *);
+	int	(*symlink) (struct inode *, struct dentry *, struct qstr *,
+			    struct iattr *);
 	int	(*mkdir)   (struct inode *, struct dentry *, struct iattr *);
 	int	(*rmdir)   (struct inode *, struct qstr *);
 	int	(*readdir) (struct dentry *, struct rpc_cred *,
-- 
GitLab


From 873101b33776780d32610fc4c90c7358a5e98f51 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:23 -0400
Subject: [PATCH 0855/1063] NFS: copy symlinks into page cache before sending
 NFS SYMLINK request

Currently the NFS client does not cache symlinks it creates.  They get
cached only when the NFS client reads them back from the server.

Copy the symlink into the page cache before sending it.

Test plan:
Connectathon, all NFS versions.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 86 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 68 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index affd3ae52e551..b483e5d206cb7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -30,6 +30,7 @@
 #include <linux/nfs_mount.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
+#include <linux/pagevec.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
 
@@ -1441,39 +1442,88 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 	return error;
 }
 
-static int
-nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+/*
+ * To create a symbolic link, most file systems instantiate a new inode,
+ * add a page to it containing the path, then write it out to the disk
+ * using prepare_write/commit_write.
+ *
+ * Unfortunately the NFS client can't create the in-core inode first
+ * because it needs a file handle to create an in-core inode (see
+ * fs/nfs/inode.c:nfs_fhget).  We only have a file handle *after* the
+ * symlink request has completed on the server.
+ *
+ * So instead we allocate a raw page, copy the symname into it, then do
+ * the SYMLINK request with the page as the buffer.  If it succeeds, we
+ * now have a new file handle and can instantiate an in-core NFS inode
+ * and move the raw page into its mapping.
+ */
+static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 {
+	struct pagevec lru_pvec;
+	struct page *page;
+	char *kaddr;
 	struct iattr attr;
-	struct qstr qsymname;
+	unsigned int pathlen = strlen(symname);
+	struct qstr qsymname = {
+		.name	= symname,
+		.len	= pathlen,
+	};
 	int error;
 
 	dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry->d_name.name, symname);
 
-#ifdef NFS_PARANOIA
-if (dentry->d_inode)
-printk("nfs_proc_symlink: %s/%s not negative!\n",
-dentry->d_parent->d_name.name, dentry->d_name.name);
-#endif
-	/*
-	 * Fill in the sattr for the call.
- 	 * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
-	 */
-	attr.ia_valid = ATTR_MODE;
-	attr.ia_mode = S_IFLNK | S_IRWXUGO;
+	if (pathlen > PAGE_SIZE)
+		return -ENAMETOOLONG;
 
-	qsymname.name = symname;
-	qsymname.len  = strlen(symname);
+	attr.ia_mode = S_IFLNK | S_IRWXUGO;
+	attr.ia_valid = ATTR_MODE;
 
 	lock_kernel();
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page) {
+		unlock_kernel();
+		return -ENOMEM;
+	}
+
+	kaddr = kmap_atomic(page, KM_USER0);
+	memcpy(kaddr, symname, pathlen);
+	if (pathlen < PAGE_SIZE)
+		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
+	kunmap_atomic(kaddr, KM_USER0);
+
+	/* XXX: eventually this will pass in {page, pathlen},
+	 *	instead of qsymname; need XDR changes for that */
 	nfs_begin_data_update(dir);
 	error = NFS_PROTO(dir)->symlink(dir, dentry, &qsymname, &attr);
 	nfs_end_data_update(dir);
-	if (!error)
+	if (error != 0) {
+		dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
+			dir->i_sb->s_id, dir->i_ino,
+			dentry->d_name.name, symname, error);
 		d_drop(dentry);
+		__free_page(page);
+		unlock_kernel();
+		return error;
+	}
+
+	/*
+	 * No big deal if we can't add this page to the page cache here.
+	 * READLINK will get the missing page from the server if needed.
+	 */
+	pagevec_init(&lru_pvec, 0);
+	if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
+							GFP_KERNEL)) {
+		if (!pagevec_add(&lru_pvec, page))
+			__pagevec_lru_add(&lru_pvec);
+		SetPageUptodate(page);
+		unlock_page(page);
+	} else
+		__free_page(page);
+
 	unlock_kernel();
-	return error;
+	return 0;
 }
 
 static int 
-- 
GitLab


From 94a6d75320b3681e6e728b70e18bd186cb55e682 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:23 -0400
Subject: [PATCH 0856/1063] NFS: Use cached page as buffer for NFS symlink
 requests

Now that we have a copy of the symlink path in the page cache, we can pass
a struct page down to the XDR routines instead of a string buffer.

Test plan:
Connectathon, all NFS versions.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            |  8 +-------
 fs/nfs/nfs2xdr.c        | 21 ++++++++++++++++++---
 fs/nfs/nfs3proc.c       | 14 +++++++-------
 fs/nfs/nfs3xdr.c        |  7 +++++--
 fs/nfs/nfs4proc.c       | 12 +++++++-----
 fs/nfs/nfs4xdr.c        |  8 ++++----
 fs/nfs/proc.c           | 14 +++++++-------
 include/linux/nfs_xdr.h | 17 ++++++++++-------
 8 files changed, 59 insertions(+), 42 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b483e5d206cb7..51328ae640dd4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1464,10 +1464,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 	char *kaddr;
 	struct iattr attr;
 	unsigned int pathlen = strlen(symname);
-	struct qstr qsymname = {
-		.name	= symname,
-		.len	= pathlen,
-	};
 	int error;
 
 	dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
@@ -1493,10 +1489,8 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
 	kunmap_atomic(kaddr, KM_USER0);
 
-	/* XXX: eventually this will pass in {page, pathlen},
-	 *	instead of qsymname; need XDR changes for that */
 	nfs_begin_data_update(dir);
-	error = NFS_PROTO(dir)->symlink(dir, dentry, &qsymname, &attr);
+	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
 	nfs_end_data_update(dir);
 	if (error != 0) {
 		dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 67391eef6b935..b49501fc0a798 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -51,7 +51,7 @@
 #define NFS_createargs_sz	(NFS_diropargs_sz+NFS_sattr_sz)
 #define NFS_renameargs_sz	(NFS_diropargs_sz+NFS_diropargs_sz)
 #define NFS_linkargs_sz		(NFS_fhandle_sz+NFS_diropargs_sz)
-#define NFS_symlinkargs_sz	(NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz)
+#define NFS_symlinkargs_sz	(NFS_diropargs_sz+1+NFS_sattr_sz)
 #define NFS_readdirargs_sz	(NFS_fhandle_sz+2)
 
 #define NFS_attrstat_sz		(1+NFS_fattr_sz)
@@ -351,11 +351,26 @@ nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
 static int
 nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
 {
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	size_t pad;
+
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_array(p, args->fromname, args->fromlen);
-	p = xdr_encode_array(p, args->topath, args->tolen);
+	*p++ = htonl(args->pathlen);
+	sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+
+	xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen);
+
+	/*
+	 * xdr_encode_pages may have added a few bytes to ensure the
+	 * pathname ends on a 4-byte boundary.  Start encoding the
+	 * attributes after the pad bytes.
+	 */
+	pad = sndbuf->tail->iov_len;
+	if (pad > 0)
+		p++;
 	p = xdr_encode_sattr(p, args->sattr);
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad;
 	return 0;
 }
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d85ac427c3263..f8688eaa0001f 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -544,8 +544,8 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 }
 
 static int
-nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
-		  struct iattr *sattr)
+nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+		  unsigned int len, struct iattr *sattr)
 {
 	struct nfs_fh fhandle;
 	struct nfs_fattr fattr, dir_attr;
@@ -553,8 +553,8 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
 		.fromfh		= NFS_FH(dir),
 		.fromname	= dentry->d_name.name,
 		.fromlen	= dentry->d_name.len,
-		.topath		= path->name,
-		.tolen		= path->len,
+		.pages		= &page,
+		.pathlen	= len,
 		.sattr		= sattr
 	};
 	struct nfs3_diropres	res = {
@@ -569,11 +569,11 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
 	};
 	int			status;
 
-	if (path->len > NFS3_MAXPATHLEN)
+	if (len > NFS3_MAXPATHLEN)
 		return -ENAMETOOLONG;
 
-	dprintk("NFS call  symlink %s -> %s\n", dentry->d_name.name,
-			path->name);
+	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 0250269e9753a..16556fa4effb1 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -56,7 +56,7 @@
 #define NFS3_writeargs_sz	(NFS3_fh_sz+5)
 #define NFS3_createargs_sz	(NFS3_diropargs_sz+NFS3_sattr_sz)
 #define NFS3_mkdirargs_sz	(NFS3_diropargs_sz+NFS3_sattr_sz)
-#define NFS3_symlinkargs_sz	(NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz)
+#define NFS3_symlinkargs_sz	(NFS3_diropargs_sz+1+NFS3_sattr_sz)
 #define NFS3_mknodargs_sz	(NFS3_diropargs_sz+2+NFS3_sattr_sz)
 #define NFS3_renameargs_sz	(NFS3_diropargs_sz+NFS3_diropargs_sz)
 #define NFS3_linkargs_sz		(NFS3_fh_sz+NFS3_diropargs_sz)
@@ -398,8 +398,11 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_array(p, args->fromname, args->fromlen);
 	p = xdr_encode_sattr(p, args->sattr);
-	p = xdr_encode_array(p, args->topath, args->tolen);
+	*p++ = htonl(args->pathlen);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Copy the page */
+	xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2d18eac6bee51..7f60beb40df32 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2085,7 +2085,7 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n
 }
 
 static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
-		struct qstr *path, struct iattr *sattr)
+		struct page *page, unsigned int len, struct iattr *sattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_fh fhandle;
@@ -2111,10 +2111,11 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
 	};
 	int			status;
 
-	if (path->len > NFS4_MAXPATHLEN)
+	if (len > NFS4_MAXPATHLEN)
 		return -ENAMETOOLONG;
 
-	arg.u.symlink = path;
+	arg.u.symlink.pages = &page;
+	arg.u.symlink.len = len;
 	nfs_fattr_init(&fattr);
 	nfs_fattr_init(&dir_fattr);
 	
@@ -2128,13 +2129,14 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
 }
 
 static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
-		struct qstr *path, struct iattr *sattr)
+		struct page *page, unsigned int len, struct iattr *sattr)
 {
 	struct nfs4_exception exception = { };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_symlink(dir, dentry, path, sattr),
+				_nfs4_proc_symlink(dir, dentry, page,
+							len, sattr),
 				&exception);
 	} while (exception.retry);
 	return err;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 99926067eca45..3dd413f52da11 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -128,7 +128,7 @@ static int nfs4_stat_to_errno(int);
 #define decode_link_maxsz	(op_decode_hdr_maxsz + 5)
 #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
 				1 + nfs4_name_maxsz + \
-				nfs4_path_maxsz + \
+				1 + \
 				nfs4_fattr_maxsz)
 #define decode_symlink_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
@@ -673,9 +673,9 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c
 
 	switch (create->ftype) {
 	case NF4LNK:
-		RESERVE_SPACE(4 + create->u.symlink->len);
-		WRITE32(create->u.symlink->len);
-		WRITEMEM(create->u.symlink->name, create->u.symlink->len);
+		RESERVE_SPACE(4);
+		WRITE32(create->u.symlink.len);
+		xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
 		break;
 
 	case NF4BLK: case NF4CHR:
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 0b507bf0f330a..630e50647bbbf 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -425,8 +425,8 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 }
 
 static int
-nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
-		 struct iattr *sattr)
+nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+		 unsigned int len, struct iattr *sattr)
 {
 	struct nfs_fh fhandle;
 	struct nfs_fattr fattr;
@@ -434,8 +434,8 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
 		.fromfh		= NFS_FH(dir),
 		.fromname	= dentry->d_name.name,
 		.fromlen	= dentry->d_name.len,
-		.topath		= path->name,
-		.tolen		= path->len,
+		.pages		= &page,
+		.pathlen	= len,
 		.sattr		= sattr
 	};
 	struct rpc_message msg = {
@@ -444,11 +444,11 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
 	};
 	int			status;
 
-	if (path->len > NFS2_MAXPATHLEN)
+	if (len > NFS2_MAXPATHLEN)
 		return -ENAMETOOLONG;
 
-	dprintk("NFS call  symlink %s -> %s\n", dentry->d_name.name,
-			path->name);
+	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ddf5d75e97a22..dc5397d9d23cf 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -358,8 +358,8 @@ struct nfs_symlinkargs {
 	struct nfs_fh *		fromfh;
 	const char *		fromname;
 	unsigned int		fromlen;
-	const char *		topath;
-	unsigned int		tolen;
+	struct page **		pages;
+	unsigned int		pathlen;
 	struct iattr *		sattr;
 };
 
@@ -434,8 +434,8 @@ struct nfs3_symlinkargs {
 	struct nfs_fh *		fromfh;
 	const char *		fromname;
 	unsigned int		fromlen;
-	const char *		topath;
-	unsigned int		tolen;
+	struct page **		pages;
+	unsigned int		pathlen;
 	struct iattr *		sattr;
 };
 
@@ -533,7 +533,10 @@ struct nfs4_accessres {
 struct nfs4_create_arg {
 	u32				ftype;
 	union {
-		struct qstr *		symlink;    /* NF4LNK */
+		struct {
+			struct page **	pages;
+			unsigned int	len;
+		} symlink;   /* NF4LNK */
 		struct {
 			u32		specdata1;
 			u32		specdata2;
@@ -793,8 +796,8 @@ struct nfs_rpc_ops {
 	int	(*rename)  (struct inode *, struct qstr *,
 			    struct inode *, struct qstr *);
 	int	(*link)    (struct inode *, struct inode *, struct qstr *);
-	int	(*symlink) (struct inode *, struct dentry *, struct qstr *,
-			    struct iattr *);
+	int	(*symlink) (struct inode *, struct dentry *, struct page *,
+			    unsigned int, struct iattr *);
 	int	(*mkdir)   (struct inode *, struct dentry *, struct iattr *);
 	int	(*rmdir)   (struct inode *, struct qstr *);
 	int	(*readdir) (struct dentry *, struct rpc_cred *,
-- 
GitLab


From 275a082fe9308e710324e26ccb5363c53d8fd45f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:24 -0400
Subject: [PATCH 0857/1063] Add a real API for dealing with
 blk_congestion_wait()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 block/ll_rw_blk.c         | 12 ++++++++++++
 fs/nfs/write.c            |  1 +
 include/linux/blkdev.h    |  1 +
 include/linux/writeback.h |  1 +
 mm/page-writeback.c       |  9 +++++++++
 5 files changed, 24 insertions(+)

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index ddd9253f9d55f..dcbd6ff1fa333 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2734,6 +2734,18 @@ long blk_congestion_wait(int rw, long timeout)
 
 EXPORT_SYMBOL(blk_congestion_wait);
 
+/**
+ * blk_congestion_end - wake up sleepers on a congestion queue
+ * @rw: READ or WRITE
+ */
+void blk_congestion_end(int rw)
+{
+	wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+	if (waitqueue_active(wqh))
+		wake_up(wqh);
+}
+
 /*
  * Has to be called with the request spinlock acquired
  */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 453d44666ea58..38ba5c09af08f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -396,6 +396,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 out:
 	clear_bit(BDI_write_congested, &bdi->state);
 	wake_up_all(&nfs_write_congestion);
+	writeback_congestion_end();
 	return err;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aafe82788b4ef..96c9040c00a87 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -746,6 +746,7 @@ extern void blk_queue_free_tags(request_queue_t *);
 extern int blk_queue_resize_tags(request_queue_t *, int);
 extern void blk_queue_invalidate_tags(request_queue_t *);
 extern long blk_congestion_wait(int rw, long timeout);
+extern void blk_congestion_end(int rw);
 
 extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *);
 extern int blkdev_issue_flush(struct block_device *, sector_t *);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 9e38b566d0e77..0422036af4ebb 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -85,6 +85,7 @@ int wakeup_pdflush(long nr_pages);
 void laptop_io_completion(void);
 void laptop_sync_completion(void);
 void throttle_vm_writeout(void);
+void writeback_congestion_end(void);
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e630188ccc40c..77a0bc4e261ab 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -802,6 +802,15 @@ int test_set_page_writeback(struct page *page)
 }
 EXPORT_SYMBOL(test_set_page_writeback);
 
+/*
+ * Wakes up tasks that are being throttled due to writeback congestion
+ */
+void writeback_congestion_end(void)
+{
+	blk_congestion_end(WRITE);
+}
+EXPORT_SYMBOL(writeback_congestion_end);
+
 /*
  * Return true if any of the pages in the mapping are marged with the
  * passed tag.
-- 
GitLab


From 5dd3177ae5012c1e2ad7a9ffdbd0e0d0de2f60e4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 24 Aug 2006 01:03:05 -0400
Subject: [PATCH 0858/1063] NFSv4: Fix a use-after-free issue with the nfs
 server.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 36 +++++++++++++++++++++---------------
 fs/nfs/nfs4renewd.c       |  1 +
 fs/nfs/super.c            |  8 +++++---
 include/linux/nfs_fs_sb.h |  1 +
 4 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 12941a8a6d752..f1ff2aec2ca53 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -164,6 +164,26 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	return NULL;
 }
 
+static void nfs4_shutdown_client(struct nfs_client *clp)
+{
+#ifdef CONFIG_NFS_V4
+	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
+		nfs4_kill_renewd(clp);
+	while (!list_empty(&clp->cl_unused)) {
+		struct nfs4_state_owner *sp;
+
+		sp = list_entry(clp->cl_unused.next,
+				struct nfs4_state_owner,
+				so_list);
+		list_del(&sp->so_list);
+		kfree(sp);
+	}
+	BUG_ON(!list_empty(&clp->cl_state_owners));
+	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
+		nfs_idmap_delete(clp);
+#endif
+}
+
 /*
  * Destroy a shared client record
  */
@@ -171,21 +191,7 @@ static void nfs_free_client(struct nfs_client *clp)
 {
 	dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
 
-#ifdef CONFIG_NFS_V4
-	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) {
-		while (!list_empty(&clp->cl_unused)) {
-			struct nfs4_state_owner *sp;
-
-			sp = list_entry(clp->cl_unused.next,
-					struct nfs4_state_owner,
-					so_list);
-			list_del(&sp->so_list);
-			kfree(sp);
-		}
-		BUG_ON(!list_empty(&clp->cl_state_owners));
-		nfs_idmap_delete(clp);
-	}
-#endif
+	nfs4_shutdown_client(clp);
 
 	/* -EIO all pending I/O */
 	if (!IS_ERR(clp->cl_rpcclient))
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index f2c893690ac49..7b6df1852e759 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -121,6 +121,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
 			__FUNCTION__, (timeout + HZ - 1) / HZ);
 	cancel_delayed_work(&clp->cl_renewd);
 	schedule_delayed_work(&clp->cl_renewd, timeout);
+	set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
 	spin_unlock(&clp->cl_lock);
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 97cfb143e09fe..665949d277981 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -883,13 +883,15 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		goto out_free;
 	}
 
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
+
 	if (!s->s_root) {
 		/* initial superblock/root creation */
 		s->s_flags = flags;
-
 		nfs4_fill_super(s);
-	} else {
-		nfs_free_server(server);
 	}
 
 	mntroot = nfs4_get_root(s, &mntfh);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6d0be0efd1b52..7ccfc7ef0a83a 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -19,6 +19,7 @@ struct nfs_client {
 #define NFS_CS_RPCIOD		0		/* - rpciod started */
 #define NFS_CS_CALLBACK		1		/* - callback started */
 #define NFS_CS_IDMAP		2		/* - idmap started */
+#define NFS_CS_RENEWD		3		/* - renewd started */
 	struct sockaddr_in	cl_addr;	/* server identifier */
 	char *			cl_hostname;	/* hostname of server */
 	struct list_head	cl_share_link;	/* link in global client list */
-- 
GitLab


From 158998b6fe36f6acef087f574c96d44713499cc9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 24 Aug 2006 01:03:17 -0400
Subject: [PATCH 0859/1063] SUNRPC: Make rpc_mkpipe() take the parent dentry as
 an argument

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c                     |  6 +----
 include/linux/sunrpc/rpc_pipe_fs.h |  2 +-
 net/sunrpc/auth_gss/auth_gss.c     |  7 ++----
 net/sunrpc/rpc_pipe.c              | 38 ++++++++++++++++++------------
 4 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index f96dfac7dc9a6..82ad7110a1c0b 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -84,7 +84,6 @@ struct idmap_hashtable {
 };
 
 struct idmap {
-	char                  idmap_path[48];
 	struct dentry        *idmap_dentry;
 	wait_queue_head_t     idmap_wq;
 	struct idmap_msg      idmap_im;
@@ -119,10 +118,7 @@ nfs_idmap_new(struct nfs_client *clp)
         if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
                 return -ENOMEM;
 
-	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
-	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
-
-        idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
+        idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap",
 	    idmap, &idmap_upcall_ops, 0);
         if (IS_ERR(idmap->idmap_dentry)) {
 		error = PTR_ERR(idmap->idmap_dentry);
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index a481472c9484e..a2eb9b4a9de32 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -43,7 +43,7 @@ extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 
 extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
 extern int rpc_rmdir(struct dentry *);
-extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags);
+extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, struct rpc_pipe_ops *, int flags);
 extern int rpc_unlink(struct dentry *);
 extern struct vfsmount *rpc_get_mount(void);
 extern void rpc_put_mount(void);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index ef1cf5b476c8c..6eed3e166ba33 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -88,7 +88,6 @@ struct gss_auth {
 	struct list_head upcalls;
 	struct rpc_clnt *client;
 	struct dentry *dentry;
-	char path[48];
 	spinlock_t lock;
 };
 
@@ -690,10 +689,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 	if (err)
 		goto err_put_mech;
 
-	snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s",
-			clnt->cl_pathname,
-			gss_auth->mech->gm_name);
-	gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+	gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name,
+			clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
 	if (IS_ERR(gss_auth->dentry)) {
 		err = PTR_ERR(gss_auth->dentry);
 		goto err_put_mech;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index c21dc07f2a8cc..11ec12a09d704 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -621,17 +621,13 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
 }
 
 static struct dentry *
-rpc_lookup_negative(char *path, struct nameidata *nd)
+rpc_lookup_create(struct dentry *parent, const char *name, int len)
 {
+	struct inode *dir = parent->d_inode;
 	struct dentry *dentry;
-	struct inode *dir;
-	int error;
 
-	if ((error = rpc_lookup_parent(path, nd)) != 0)
-		return ERR_PTR(error);
-	dir = nd->dentry->d_inode;
 	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len);
+	dentry = lookup_one_len(name, parent, len);
 	if (IS_ERR(dentry))
 		goto out_err;
 	if (dentry->d_inode) {
@@ -642,7 +638,20 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
 	return dentry;
 out_err:
 	mutex_unlock(&dir->i_mutex);
-	rpc_release_path(nd);
+	return dentry;
+}
+
+static struct dentry *
+rpc_lookup_negative(char *path, struct nameidata *nd)
+{
+	struct dentry *dentry;
+	int error;
+
+	if ((error = rpc_lookup_parent(path, nd)) != 0)
+		return ERR_PTR(error);
+	dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len);
+	if (IS_ERR(dentry))
+		rpc_release_path(nd);
 	return dentry;
 }
 
@@ -701,17 +710,16 @@ rpc_rmdir(struct dentry *dentry)
 }
 
 struct dentry *
-rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
+rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags)
 {
-	struct nameidata nd;
 	struct dentry *dentry;
 	struct inode *dir, *inode;
 	struct rpc_inode *rpci;
 
-	dentry = rpc_lookup_negative(path, &nd);
+	dentry = rpc_lookup_create(parent, name, strlen(name));
 	if (IS_ERR(dentry))
 		return dentry;
-	dir = nd.dentry->d_inode;
+	dir = parent->d_inode;
 	inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR);
 	if (!inode)
 		goto err_dput;
@@ -726,13 +734,13 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
 	dget(dentry);
 out:
 	mutex_unlock(&dir->i_mutex);
-	rpc_release_path(&nd);
 	return dentry;
 err_dput:
 	dput(dentry);
 	dentry = ERR_PTR(-ENOMEM);
-	printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n",
-			__FILE__, __FUNCTION__, path, -ENOMEM);
+	printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n",
+			__FILE__, __FUNCTION__, parent->d_name.name, name,
+			-ENOMEM);
 	goto out;
 }
 
-- 
GitLab


From 6daabf1b04c89f1fbd8eab5450261360943c8e20 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 24 Aug 2006 15:44:16 -0400
Subject: [PATCH 0860/1063] NFS: Fix up compiler warnings on 64-bit platforms
 in client.c

Fix up warnings from compiling on ppc64.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f1ff2aec2ca53..a4aa47913a5c9 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -836,7 +836,9 @@ struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
 	}
 	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
 
-	dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor);
+	dprintk("Server FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
 
 	BUG_ON(!server->nfs_client);
 	BUG_ON(!server->nfs_client->rpc_ops);
@@ -1002,7 +1004,9 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
 	if (error < 0)
 		goto error;
 
-	dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor);
+	dprintk("Server FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
 	dprintk("Mount FH: %d\n", mntfh->size);
 
 	error = nfs_probe_fsinfo(server, mntfh, &fattr);
@@ -1074,7 +1078,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 		goto error;
 
 	dprintk("Referral FSID: %llx:%llx\n",
-		server->fsid.major, server->fsid.minor);
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
 
 	spin_lock(&nfs_client_lock);
 	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
@@ -1106,7 +1111,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	int error;
 
 	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
-		fattr->fsid.major, fattr->fsid.minor);
+		(unsigned long long) fattr->fsid.major,
+		(unsigned long long) fattr->fsid.minor);
 
 	server = nfs_alloc_server();
 	if (!server)
@@ -1131,7 +1137,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 		goto out_free_server;
 
 	dprintk("Cloned FSID: %llx:%llx\n",
-		server->fsid.major, server->fsid.minor);
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
 
 	error = nfs_start_lockd(server);
 	if (error < 0)
@@ -1375,7 +1382,8 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
 		 MAJOR(server->s_dev), MINOR(server->s_dev));
 
 	snprintf(fsid, 17, "%llx:%llx",
-		 server->fsid.major, server->fsid.minor);
+		 (unsigned long long) server->fsid.major,
+		 (unsigned long long) server->fsid.minor);
 
 	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
 		   clp->cl_nfsversion,
-- 
GitLab


From 058ad9cbf14b3c7480d01b20280cb4d5858f7a50 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 27 Aug 2006 17:23:53 -0400
Subject: [PATCH 0861/1063] NFS: NFS_ROOT should use the new rpc_create API

Teach NFS_ROOT to use the new rpc_create API instead of the old two-call
API for creating an RPC transport.

Test plan:
Compile the kernel with the NFS client build-in, and set CONFIG_NFS_ROOT.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/mount_clnt.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 41274874b9a57..d507b021207fd 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -76,22 +76,19 @@ static struct rpc_clnt *
 mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
 		int protocol)
 {
-	struct rpc_xprt	*xprt;
-	struct rpc_clnt	*clnt;
-
-	xprt = xprt_create_proto(protocol, srvaddr, NULL);
-	if (IS_ERR(xprt))
-		return (struct rpc_clnt *)xprt;
-
-	clnt = rpc_create_client(xprt, hostname,
-				&mnt_program, version,
-				RPC_AUTH_UNIX);
-	if (!IS_ERR(clnt)) {
-		clnt->cl_softrtry = 1;
-		clnt->cl_oneshot  = 1;
-		clnt->cl_intr = 1;
-	}
-	return clnt;
+	struct rpc_create_args args = {
+		.protocol	= protocol,
+		.address	= (struct sockaddr *)srvaddr,
+		.addrsize	= sizeof(*srvaddr),
+		.servername	= hostname,
+		.program	= &mnt_program,
+		.version	= version,
+		.authflavor	= RPC_AUTH_UNIX,
+		.flags		= (RPC_CLNT_CREATE_ONESHOT |
+				   RPC_CLNT_CREATE_INTR),
+	};
+
+	return rpc_create(&args);
 }
 
 /*
-- 
GitLab


From 297de4f65698ee1e1c75e27d57933b5fa8227e72 Mon Sep 17 00:00:00 2001
From: "andros@citi.umich.edu" <andros@citi.umich.edu>
Date: Tue, 29 Aug 2006 12:19:41 -0400
Subject: [PATCH 0862/1063] Fix a referral error Oops

Fix an oops when the referral server is not responding.
Check the error return from nfs4_set_client() in nfs4_create_referral_server.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a4aa47913a5c9..110f80e7bd4ce 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1059,6 +1059,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 			parent_server->client->cl_xprt->prot,
 			parent_client->retrans_timeo,
 			parent_client->retrans_count);
+	if (error < 0)
+		goto error;
 
 	/* Initialise the client representation from the parent server */
 	nfs_server_copy_userdata(server, parent_server);
-- 
GitLab


From 8014793b1b2869445adfe678d64cdacd10e99d53 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 31 Aug 2006 18:24:08 -0400
Subject: [PATCH 0863/1063] SUNRPC: rpc_delay() should not clobber the
 rpc_task->tk_status

Doing so prevents stuff like call_encode() from working correctly.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/sched.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index ecf366351bf71..6390461a97563 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -542,24 +542,20 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
 	spin_unlock_bh(&queue->lock);
 }
 
+static void __rpc_atrun(struct rpc_task *task)
+{
+	rpc_wake_up_task(task);
+}
+
 /*
  * Run a task at a later time
  */
-static void	__rpc_atrun(struct rpc_task *);
-void
-rpc_delay(struct rpc_task *task, unsigned long delay)
+void rpc_delay(struct rpc_task *task, unsigned long delay)
 {
 	task->tk_timeout = delay;
 	rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
 }
 
-static void
-__rpc_atrun(struct rpc_task *task)
-{
-	task->tk_status = 0;
-	rpc_wake_up_task(task);
-}
-
 /*
  * Helper to call task->tk_ops->rpc_call_prepare
  */
-- 
GitLab


From 76303992b4701124f4cd0791ae2049ab4332f02c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 30 Aug 2006 14:32:49 -0400
Subject: [PATCH 0864/1063] SUNRPC: Handle ENETUNREACH, EHOSTUNREACH and
 EHOSTDOWN socket errors

In case of any of the above errors occuring, delay for 3 seconds, then
handle as if it were a timeout error.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 87efcd207f233..355e7863c0aa5 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1030,6 +1030,14 @@ call_status(struct rpc_task *task)
 
 	task->tk_status = 0;
 	switch(status) {
+	case -EHOSTDOWN:
+	case -EHOSTUNREACH:
+	case -ENETUNREACH:
+		/*
+		 * Delay any retries for 3 seconds, then handle as if it
+		 * were a timeout.
+		 */
+		rpc_delay(task, 3*HZ);
 	case -ETIMEDOUT:
 		task->tk_action = call_timeout;
 		break;
-- 
GitLab


From da45828e2835057045150b318c4fbe9bb91f18dd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 31 Aug 2006 15:44:52 -0400
Subject: [PATCH 0865/1063] SUNRPC: Clean up soft task error handling

- Ensure that the task aborts the RPC call only when it has actually timed out.
 - Ensure that req->rq_majortimeo is initialised correctly.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 34 ++++++++++++++--------------------
 net/sunrpc/xprt.c |  8 +-------
 2 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 355e7863c0aa5..ceadb728f0da2 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -863,15 +863,11 @@ call_bind_status(struct rpc_task *task)
 		dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n",
 				task->tk_pid);
 		rpc_delay(task, 3*HZ);
-		goto retry_bind;
+		goto retry_timeout;
 	case -ETIMEDOUT:
 		dprintk("RPC: %4d rpcbind request timed out\n",
 				task->tk_pid);
-		if (RPC_IS_SOFT(task)) {
-			status = -EIO;
-			break;
-		}
-		goto retry_bind;
+		goto retry_timeout;
 	case -EPFNOSUPPORT:
 		dprintk("RPC: %4d remote rpcbind service unavailable\n",
 				task->tk_pid);
@@ -884,16 +880,13 @@ call_bind_status(struct rpc_task *task)
 		dprintk("RPC: %4d unrecognized rpcbind error (%d)\n",
 				task->tk_pid, -task->tk_status);
 		status = -EIO;
-		break;
 	}
 
 	rpc_exit(task, status);
 	return;
 
-retry_bind:
-	task->tk_status = 0;
-	task->tk_action = call_bind;
-	return;
+retry_timeout:
+	task->tk_action = call_timeout;
 }
 
 /*
@@ -941,14 +934,16 @@ call_connect_status(struct rpc_task *task)
 
 	switch (status) {
 	case -ENOTCONN:
-	case -ETIMEDOUT:
 	case -EAGAIN:
 		task->tk_action = call_bind;
-		break;
-	default:
-		rpc_exit(task, -EIO);
-		break;
+		if (!RPC_IS_SOFT(task))
+			return;
+		/* if soft mounted, test if we've timed out */
+	case -ETIMEDOUT:
+		task->tk_action = call_timeout;
+		return;
 	}
+	rpc_exit(task, -EIO);
 }
 
 /*
@@ -1057,7 +1052,6 @@ call_status(struct rpc_task *task)
 		printk("%s: RPC call returned error %d\n",
 			       clnt->cl_protname, -status);
 		rpc_exit(task, status);
-		break;
 	}
 }
 
@@ -1125,10 +1119,10 @@ call_decode(struct rpc_task *task)
 			clnt->cl_stats->rpcretrans++;
 			goto out_retry;
 		}
-		printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n",
+		dprintk("%s: too small RPC reply size (%d bytes)\n",
 			clnt->cl_protname, task->tk_status);
-		rpc_exit(task, -EIO);
-		return;
+		task->tk_action = call_timeout;
+		goto out_retry;
 	}
 
 	/*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e4f64fb58ff27..a85f82baefc12 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -585,13 +585,6 @@ static void xprt_connect_status(struct rpc_task *task)
 				task->tk_pid, -task->tk_status, task->tk_client->cl_server);
 		xprt_release_write(xprt, task);
 		task->tk_status = -EIO;
-		return;
-	}
-
-	/* if soft mounted, just cause this RPC to fail */
-	if (RPC_IS_SOFT(task)) {
-		xprt_release_write(xprt, task);
-		task->tk_status = -EIO;
 	}
 }
 
@@ -829,6 +822,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
 	req->rq_bufsize = 0;
 	req->rq_xid     = xprt_alloc_xid(xprt);
 	req->rq_release_snd_buf = NULL;
+	xprt_reset_majortimeo(req);
 	dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
 			req, ntohl(req->rq_xid));
 }
-- 
GitLab


From 6b6ca86b77b62b798cf9ca2599036420abce7796 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Sep 2006 12:55:57 -0400
Subject: [PATCH 0866/1063] SUNRPC: Add refcounting to the struct rpc_xprt

In a subsequent patch, this will allow the portmapper to take a reference
to the rpc_xprt for which it is updating the port number, fixing an Oops.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h |  5 ++++-
 net/sunrpc/clnt.c           |  8 +++-----
 net/sunrpc/xprt.c           | 28 +++++++++++++++++++++++++---
 3 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index de4efea7c856d..bdeba8538c719 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -12,6 +12,7 @@
 #include <linux/uio.h>
 #include <linux/socket.h>
 #include <linux/in.h>
+#include <linux/kref.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xdr.h>
 
@@ -129,6 +130,7 @@ struct rpc_xprt_ops {
 };
 
 struct rpc_xprt {
+	struct kref		kref;		/* Reference count */
 	struct rpc_xprt_ops *	ops;		/* transport methods */
 	struct socket *		sock;		/* BSD socket layer */
 	struct sock *		inet;		/* INET layer */
@@ -248,7 +250,8 @@ int			xprt_adjust_timeout(struct rpc_rqst *req);
 void			xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_release(struct rpc_task *task);
-int			xprt_destroy(struct rpc_xprt *xprt);
+struct rpc_xprt *	xprt_get(struct rpc_xprt *xprt);
+void			xprt_put(struct rpc_xprt *xprt);
 
 static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p)
 {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ceadb728f0da2..084a0ad5c64ea 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -177,7 +177,7 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
 		kfree(clnt->cl_server);
 	kfree(clnt);
 out_err:
-	xprt_destroy(xprt);
+	xprt_put(xprt);
 out_no_xprt:
 	return ERR_PTR(err);
 }
@@ -261,6 +261,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	atomic_set(&new->cl_users, 0);
 	new->cl_parent = clnt;
 	atomic_inc(&clnt->cl_count);
+	new->cl_xprt = xprt_get(clnt->cl_xprt);
 	/* Turn off autobind on clones */
 	new->cl_autobind = 0;
 	new->cl_oneshot = 0;
@@ -337,15 +338,12 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 		rpc_rmdir(clnt->cl_dentry);
 		rpc_put_mount();
 	}
-	if (clnt->cl_xprt) {
-		xprt_destroy(clnt->cl_xprt);
-		clnt->cl_xprt = NULL;
-	}
 	if (clnt->cl_server != clnt->cl_inline_name)
 		kfree(clnt->cl_server);
 out_free:
 	rpc_free_iostats(clnt->cl_metrics);
 	clnt->cl_metrics = NULL;
+	xprt_put(clnt->cl_xprt);
 	kfree(clnt);
 	return 0;
 }
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a85f82baefc12..1f786f68729d4 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -926,6 +926,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si
 		return ERR_PTR(result);
 	}
 
+	kref_init(&xprt->kref);
 	spin_lock_init(&xprt->transport_lock);
 	spin_lock_init(&xprt->reserve_lock);
 
@@ -958,16 +959,37 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si
 
 /**
  * xprt_destroy - destroy an RPC transport, killing off all requests.
- * @xprt: transport to destroy
+ * @kref: kref for the transport to destroy
  *
  */
-int xprt_destroy(struct rpc_xprt *xprt)
+static void xprt_destroy(struct kref *kref)
 {
+	struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref);
+
 	dprintk("RPC:      destroying transport %p\n", xprt);
 	xprt->shutdown = 1;
 	del_timer_sync(&xprt->timer);
 	xprt->ops->destroy(xprt);
 	kfree(xprt);
+}
 
-	return 0;
+/**
+ * xprt_put - release a reference to an RPC transport.
+ * @xprt: pointer to the transport
+ *
+ */
+void xprt_put(struct rpc_xprt *xprt)
+{
+	kref_put(&xprt->kref, xprt_destroy);
+}
+
+/**
+ * xprt_get - return a reference to an RPC transport.
+ * @xprt: pointer to the transport
+ *
+ */
+struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
+{
+	kref_get(&xprt->kref);
+	return xprt;
 }
-- 
GitLab


From 762d4527c2fc19d821a13d9a3455ccc2d4073731 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 3 Sep 2006 00:51:55 -0400
Subject: [PATCH 0867/1063] SUNRPC: Fix Oops in pmap_getport_done

There is no guarantee that the parent task still exists when we exit from
the portmapper. Save the xprt instead.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/pmap_clnt.c | 46 ++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index f476f4df0f480..c04609d3476a9 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -30,7 +30,7 @@ struct portmap_args {
 	u32			pm_vers;
 	u32			pm_prot;
 	unsigned short		pm_port;
-	struct rpc_task *	pm_task;
+	struct rpc_xprt *	pm_xprt;
 };
 
 static struct rpc_procinfo	pmap_procedures[];
@@ -71,10 +71,10 @@ static const struct rpc_call_ops pmap_getport_ops = {
 	.rpc_release		= pmap_map_release,
 };
 
-static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt)
+static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status)
 {
 	xprt_clear_binding(xprt);
-	rpc_wake_up(&xprt->binding);
+	rpc_wake_up_status(&xprt->binding, status);
 }
 
 /**
@@ -92,6 +92,7 @@ void rpc_getport(struct rpc_task *task)
 	struct portmap_args *map;
 	struct rpc_clnt	*pmap_clnt;
 	struct rpc_task *child;
+	int status;
 
 	dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n",
 			task->tk_pid, clnt->cl_server,
@@ -107,34 +108,30 @@ void rpc_getport(struct rpc_task *task)
 	}
 
 	/* Someone else may have bound if we slept */
-	if (xprt_bound(xprt)) {
-		task->tk_status = 0;
+	status = 0;
+	if (xprt_bound(xprt))
 		goto bailout_nofree;
-	}
 
+	status = -ENOMEM;
 	map = pmap_map_alloc();
-	if (!map) {
-		task->tk_status = -ENOMEM;
+	if (!map)
 		goto bailout_nofree;
-	}
 	map->pm_prog = clnt->cl_prog;
 	map->pm_vers = clnt->cl_vers;
 	map->pm_prot = xprt->prot;
 	map->pm_port = 0;
-	map->pm_task = task;
+	map->pm_xprt = xprt_get(xprt);
 
 	rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr));
 	pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0);
-	if (IS_ERR(pmap_clnt)) {
-		task->tk_status = PTR_ERR(pmap_clnt);
+	status = PTR_ERR(pmap_clnt);
+	if (IS_ERR(pmap_clnt))
 		goto bailout;
-	}
 
+	status = -EIO;
 	child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
-	if (IS_ERR(child)) {
-		task->tk_status = -EIO;
+	if (IS_ERR(child))
 		goto bailout;
-	}
 	rpc_release_task(child);
 
 	rpc_sleep_on(&xprt->binding, task, NULL, NULL);
@@ -144,8 +141,10 @@ void rpc_getport(struct rpc_task *task)
 
 bailout:
 	pmap_map_free(map);
+	xprt_put(xprt);
 bailout_nofree:
-	pmap_wake_portmap_waiters(xprt);
+	task->tk_status = status;
+	pmap_wake_portmap_waiters(xprt, status);
 }
 
 #ifdef CONFIG_ROOT_NFS
@@ -201,29 +200,28 @@ int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int pr
 static void pmap_getport_done(struct rpc_task *child, void *data)
 {
 	struct portmap_args *map = data;
-	struct rpc_task *task = map->pm_task;
-	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpc_xprt *xprt = map->pm_xprt;
 	int status = child->tk_status;
 
 	if (status < 0) {
 		/* Portmapper not available */
 		xprt->ops->set_port(xprt, 0);
-		task->tk_status = status;
 	} else if (map->pm_port == 0) {
 		/* Requested RPC service wasn't registered */
 		xprt->ops->set_port(xprt, 0);
-		task->tk_status = -EACCES;
+		status = -EACCES;
 	} else {
 		/* Succeeded */
 		xprt->ops->set_port(xprt, map->pm_port);
 		xprt_set_bound(xprt);
-		task->tk_status = 0;
+		status = 0;
 	}
 
 	dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n",
-			child->tk_pid, child->tk_status, map->pm_port);
+			child->tk_pid, status, map->pm_port);
 
-	pmap_wake_portmap_waiters(xprt);
+	pmap_wake_portmap_waiters(xprt, status);
+	xprt_put(xprt);
 }
 
 /**
-- 
GitLab


From fd6840714d9cf6e93f1d42b904860a94df316b85 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Sep 2006 12:27:44 -0400
Subject: [PATCH 0868/1063] NFS: nfs_lookup - don't hash dentry when optimising
 away the lookup

If the open intents tell us that a given lookup is going to result in a,
exclusive create, we currently optimize away the lookup call itself. The
reason is that the lookup would not be atomic with the create RPC call, so
why do it in the first place?

A problem occurs, however, if the VFS aborts the exclusive create operation
after the lookup, but before the call to create the file/directory: in this
case we will end up with a hashed negative dentry in the dcache that has
never been looked up.
Fix this by only actually hashing the dentry once the create operation has
been successfully completed.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 51328ae640dd4..3419c2da9ba9d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -904,9 +904,15 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 
 	lock_kernel();
 
-	/* If we're doing an exclusive create, optimize away the lookup */
-	if (nfs_is_exclusive_create(dir, nd))
-		goto no_entry;
+	/*
+	 * If we're doing an exclusive create, optimize away the lookup
+	 * but don't hash the dentry.
+	 */
+	if (nfs_is_exclusive_create(dir, nd)) {
+		d_instantiate(dentry, NULL);
+		res = NULL;
+		goto out_unlock;
+	}
 
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error == -ENOENT)
@@ -1161,6 +1167,8 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 	if (IS_ERR(inode))
 		return error;
 	d_instantiate(dentry, inode);
+	if (d_unhashed(dentry))
+		d_rehash(dentry);
 	return 0;
 }
 
-- 
GitLab


From 2dec51466a08ac1c67da41bfd0518d43d983a2eb Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 12 Sep 2006 11:53:23 -0400
Subject: [PATCH 0869/1063] NFSv4: It's perfectly legal for clp to be NULL
 here....

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 110f80e7bd4ce..ec1938d4b814f 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -269,7 +269,7 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio
 	clp = __nfs_find_client(addr, nfsversion);
 	spin_unlock(&nfs_client_lock);
 
-	BUG_ON(clp->cl_cons_state == 0);
+	BUG_ON(clp && clp->cl_cons_state == 0);
 
 	return clp;
 }
-- 
GitLab


From 5f004cf2aa8494708fd8d78e78142b7b2748e765 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Sep 2006 14:03:14 -0400
Subject: [PATCH 0870/1063] NFS: Make read() return an ESTALE if the file has
 been deleted

Currently, a read() request will return EIO even if the file has been
deleted on the server, simply because that is what the VM will return
if the call to readpage() fails to update the page.

Ensure that readpage() marks the inode as stale if it receives an ESTALE.
Then return that error to userland.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/read.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index dae33c1e8a77b..69f1549da2b94 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -568,8 +568,13 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 
 	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
 
-	/* Is this a short read? */
-	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+	if (task->tk_status < 0) {
+		if (task->tk_status == -ESTALE) {
+			set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+			nfs_mark_for_revalidate(data->inode);
+		}
+	} else if (resp->count < argp->count && !resp->eof) {
+		/* This is a short read! */
 		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
@@ -616,6 +621,10 @@ int nfs_readpage(struct file *file, struct page *page)
 	if (error)
 		goto out_error;
 
+	error = -ESTALE;
+	if (NFS_STALE(inode))
+		goto out_error;
+
 	if (file == NULL) {
 		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (ctx == NULL)
@@ -678,7 +687,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	};
 	struct inode *inode = mapping->host;
 	struct nfs_server *server = NFS_SERVER(inode);
-	int ret;
+	int ret = -ESTALE;
 
 	dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
 			inode->i_sb->s_id,
@@ -686,6 +695,9 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 			nr_pages);
 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
+	if (NFS_STALE(inode))
+		goto out;
+
 	if (filp == NULL) {
 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (desc.ctx == NULL)
@@ -701,6 +713,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 			ret = err;
 	}
 	put_nfs_open_context(desc.ctx);
+out:
 	return ret;
 }
 
-- 
GitLab


From 97db8f41792839a6912fd21be8b61dd6c50db58f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Sep 2006 14:03:14 -0400
Subject: [PATCH 0871/1063] NFS: Don't invalidate the symlink we just stuffed
 into the cache

And slight optimisation of nfs_end_data_update(): directories never have
delegations anyway.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index cb5c65f0bc12f..a56add0bc5b71 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -717,13 +717,11 @@ void nfs_end_data_update(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 
-	if (!nfs_have_delegation(inode, FMODE_READ)) {
-		/* Directories and symlinks: invalidate page cache */
-		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
-			spin_lock(&inode->i_lock);
-			nfsi->cache_validity |= NFS_INO_INVALID_DATA;
-			spin_unlock(&inode->i_lock);
-		}
+	/* Directories: invalidate page cache */
+	if (S_ISDIR(inode->i_mode)) {
+		spin_lock(&inode->i_lock);
+		nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+		spin_unlock(&inode->i_lock);
 	}
 	nfsi->cache_change_attribute = jiffies;
 	atomic_dec(&nfsi->data_updates);
-- 
GitLab


From 6b30954ebb569fa1b2abdb21f2f4290eec76bf80 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Sep 2006 14:03:14 -0400
Subject: [PATCH 0872/1063] NFSv4: Retry lease recovery if it failed during a
 synchronous operation.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7f60beb40df32..c218cc450b921 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -793,10 +793,17 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
 int nfs4_recover_expired_lease(struct nfs_server *server)
 {
 	struct nfs_client *clp = server->nfs_client;
+	int ret;
 
-	if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+	for (;;) {
+		ret = nfs4_wait_clnt_recover(server->client, clp);
+		if (ret != 0)
+			return ret;
+		if (!test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+			break;
 		nfs4_schedule_state_recovery(clp);
-	return nfs4_wait_clnt_recover(server->client, clp);
+	}
+	return 0;
 }
 
 /*
-- 
GitLab


From c514983d8d2260020543a81589a2b8c7d4bdab4e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 15 Sep 2006 08:25:04 -0400
Subject: [PATCH 0873/1063] NFSv4: Handle the condition NFS4ERR_FILE_OPEN

Retry a few times before we give up: the error is usually due to ordering
issues with asynchronous RPC calls.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c218cc450b921..c49ac3ea1dc32 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2891,6 +2891,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
 			if (ret == 0)
 				exception->retry = 1;
 			break;
+		case -NFS4ERR_FILE_OPEN:
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
 			ret = nfs4_delay(server->client, &exception->timeout);
-- 
GitLab


From 2066fe89b459c3c787c811b3369df191cddd93d8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 15 Sep 2006 08:30:46 -0400
Subject: [PATCH 0874/1063] NFSv4: Poll more aggressively when handling
 NFS4ERR_DELAY

Change the initial retry delay from 1s to 0.1s (and then back off
exponentially).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c49ac3ea1dc32..47c7e6e3910d7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,7 +55,7 @@
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
-#define NFS4_POLL_RETRY_MIN	(1*HZ)
+#define NFS4_POLL_RETRY_MIN	(HZ/10)
 #define NFS4_POLL_RETRY_MAX	(15*HZ)
 
 struct nfs4_opendata;
-- 
GitLab


From 51b6ded4d9a94a61035deba1d8f51a54e3a3dd86 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 15 Sep 2006 16:31:56 -0400
Subject: [PATCH 0875/1063] NFSv4: When mounting with a port=0 argument,
 substitute port=2049

RFC3530 states that the registered port 2049 for the NFS protocol should be
the default configuration in order to allow clients not to use the RPC
binding protocols.
If the mount program sends us a port=0, we therefore substitute port=2049.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 665949d277981..b99113b0f65fa 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -833,6 +833,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 				__FUNCTION__);
 		return -EINVAL;
 	}
+	/* RFC3530: The default port for NFS is 2049 */
+	if (addr.sin_port == 0)
+		addr.sin_port = NFS_PORT;
 
 	/* Grab the authentication type */
 	authflavour = RPC_AUTH_UNIX;
-- 
GitLab


From aec5e175288c711cbe44750276f61efa3fa3d370 Mon Sep 17 00:00:00 2001
From: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Date: Sat, 16 Sep 2006 21:09:32 -0400
Subject: [PATCH 0876/1063] NFS: Use SEEK_END instead of hardcoded value

Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index a146ed338534a..be997d6491273 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -157,7 +157,7 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
 static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
 {
 	/* origin == SEEK_END => we must revalidate the cached file length */
-	if (origin == 2) {
+	if (origin == SEEK_END) {
 		struct inode *inode = filp->f_mapping->host;
 		int retval = nfs_revalidate_file_size(inode, filp);
 		if (retval < 0)
-- 
GitLab


From a53a3c58fd83e572a7c768d88b4c4e9840a57e82 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Wed, 6 Sep 2006 11:51:21 -0400
Subject: [PATCH 0877/1063] NFSv4: rpc_mkpipe creating socket inodes w/out sk
 buffers

This patch stop rpc_mkpipe from create S_IFSOCK nodes what don't
have associated sk buffers attached (which causes SELinux to oops
during NFSv4 mounts). Instead the S_IFIFO mode bit is set which
probably make more sense and seems to work just fine during
my connectathon and fsx testing...

Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpc_pipe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 11ec12a09d704..dfa504fe383f0 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -720,7 +720,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi
 	if (IS_ERR(dentry))
 		return dentry;
 	dir = parent->d_inode;
-	inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR);
+	inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR);
 	if (!inode)
 		goto err_dput;
 	inode->i_ino = iunique(dir->i_sb, 100);
-- 
GitLab


From f551e44ff11d3e2ec8f37907bb88ec2433cc8b74 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 20 Sep 2006 14:33:04 -0400
Subject: [PATCH 0878/1063] NFS: add comments clarifying the use of
 nfs_post_op_update()

Comments-only change to clarify a detail of the NFS protocol and how it is
implemented in Linux.

Test plan:
None.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 6 ++++++
 fs/nfs/write.c | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a56add0bc5b71..e8c143d182c4d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -840,6 +840,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
  *
  * After an operation that has changed the inode metadata, mark the
  * attribute cache as being invalid, then try to update it.
+ *
+ * NB: if the server didn't return any post op attributes, this
+ * function will force the retrieval of attributes before the next
+ * NFS request.  Thus it should be used only for operations that
+ * are expected to change one or more attributes, to avoid
+ * unnecessary NFS requests and trips through nfs_update_inode().
  */
 int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 38ba5c09af08f..c12effb46fe50 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1253,7 +1253,13 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
 		task->tk_pid, task->tk_status);
 
-	/* Call the NFS version-specific code */
+	/*
+	 * ->write_done will attempt to use post-op attributes to detect
+	 * conflicting writes by other clients.  A strict interpretation
+	 * of close-to-open would allow us to continue caching even if
+	 * another writer had changed the file, but some applications
+	 * depend on tighter cache coherency when writing.
+	 */
 	status = NFS_PROTO(data->inode)->write_done(task, data);
 	if (status != 0)
 		return status;
-- 
GitLab


From 026ed5c9185dcc4b2df92e98c3d61a01cea19cbf Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 20 Sep 2006 14:33:07 -0400
Subject: [PATCH 0879/1063] NFS: unmark NFS direct I/O as experimental

Remove the EXPERIMENTAL flag from the NFS_DIRECTIO option.

Test plan:
Unset the EXPERIMENTAL kernel build option and check to see that the NFS
direct I/O option is still available.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 5305816283113..a27002668bd36 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1471,8 +1471,8 @@ config NFS_V4
 	  If unsure, say N.
 
 config NFS_DIRECTIO
-	bool "Allow direct I/O on NFS files (EXPERIMENTAL)"
-	depends on NFS_FS && EXPERIMENTAL
+	bool "Allow direct I/O on NFS files"
+	depends on NFS_FS
 	help
 	  This option enables applications to perform uncached I/O on files
 	  in NFS file systems using the O_DIRECT open() flag.  When O_DIRECT
-- 
GitLab


From 42750b04c5baa7c5ffdf0a8be2b9b320efdf069f Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@suse.cz>
Date: Thu, 1 Jun 2006 18:34:01 +0200
Subject: [PATCH 0880/1063] [ALSA] Control API - TLV implementation for
 additional information like dB scale

This patch implements a TLV mechanism to transfer an additional information
like dB scale to the user space. The types might be extended in future.
Acked-by: Takashi Iwai <tiwai@suse.de>

Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/asound.h          |  9 ++++++-
 include/sound/control.h         |  2 ++
 include/sound/tlv.h             | 43 +++++++++++++++++++++++++++++++++
 sound/core/control.c            | 41 +++++++++++++++++++++++++++++--
 sound/pci/ca0106/ca0106_mixer.c |  4 +++
 5 files changed, 96 insertions(+), 3 deletions(-)
 create mode 100644 include/sound/tlv.h

diff --git a/include/sound/asound.h b/include/sound/asound.h
index 41885f48ad915..76a20406bd189 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -688,7 +688,7 @@ struct snd_timer_tread {
  *                                                                          *
  ****************************************************************************/
 
-#define SNDRV_CTL_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 3)
+#define SNDRV_CTL_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 4)
 
 struct snd_ctl_card_info {
 	int card;			/* card number */
@@ -818,6 +818,12 @@ struct snd_ctl_elem_value {
         unsigned char reserved[128-sizeof(struct timespec)];
 };
 
+struct snd_ctl_tlv {
+        unsigned int numid;	/* control element numeric identification */
+        unsigned int length;	/* in bytes aligned to 4 */
+        unsigned int tlv[0];	/* first TLV */
+};
+
 enum {
 	SNDRV_CTL_IOCTL_PVERSION = _IOR('U', 0x00, int),
 	SNDRV_CTL_IOCTL_CARD_INFO = _IOR('U', 0x01, struct snd_ctl_card_info),
@@ -831,6 +837,7 @@ enum {
 	SNDRV_CTL_IOCTL_ELEM_ADD = _IOWR('U', 0x17, struct snd_ctl_elem_info),
 	SNDRV_CTL_IOCTL_ELEM_REPLACE = _IOWR('U', 0x18, struct snd_ctl_elem_info),
 	SNDRV_CTL_IOCTL_ELEM_REMOVE = _IOWR('U', 0x19, struct snd_ctl_elem_id),
+	SNDRV_CTL_IOCTL_TLV_READ = _IOWR('U', 0x1a, struct snd_ctl_tlv),
 	SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE = _IOWR('U', 0x20, int),
 	SNDRV_CTL_IOCTL_HWDEP_INFO = _IOR('U', 0x21, struct snd_hwdep_info),
 	SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE = _IOR('U', 0x30, int),
diff --git a/include/sound/control.h b/include/sound/control.h
index 2489b1eb0110a..a93a58d0e6882 100644
--- a/include/sound/control.h
+++ b/include/sound/control.h
@@ -42,6 +42,7 @@ struct snd_kcontrol_new {
 	snd_kcontrol_info_t *info;
 	snd_kcontrol_get_t *get;
 	snd_kcontrol_put_t *put;
+	unsigned int *tlv;
 	unsigned long private_value;
 };
 
@@ -58,6 +59,7 @@ struct snd_kcontrol {
 	snd_kcontrol_info_t *info;
 	snd_kcontrol_get_t *get;
 	snd_kcontrol_put_t *put;
+	unsigned int *tlv;
 	unsigned long private_value;
 	void *private_data;
 	void (*private_free)(struct snd_kcontrol *kcontrol);
diff --git a/include/sound/tlv.h b/include/sound/tlv.h
new file mode 100644
index 0000000000000..b826e1df1da63
--- /dev/null
+++ b/include/sound/tlv.h
@@ -0,0 +1,43 @@
+#ifndef __SOUND_TLV_H
+#define __SOUND_TLV_H
+
+/*
+ *  Advanced Linux Sound Architecture - ALSA - Driver
+ *  Copyright (c) 2006 by Jaroslav Kysela <perex@suse.cz>
+ *
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+/*
+ * TLV structure is right behind the struct snd_ctl_tlv:
+ *   unsigned int type  	- see SNDRV_CTL_TLVT_*
+ *   unsigned int length
+ *   .... data aligned to sizeof(unsigned int), use
+ *        block_length = (length + (sizeof(unsigned int) - 1)) &
+ *                       ~(sizeof(unsigned int) - 1)) ....
+ */
+
+#define SNDRV_CTL_TLVT_CONTAINER 0	/* one level down - group of TLVs */
+#define SNDRV_CTL_TLVT_DB_SCALE	1       /* dB scale */
+
+#define DECLARE_TLV_DB_SCALE(name, min, step, mute) \
+unsigned int name[] = { \
+        SNDRV_CTL_TLVT_DB_SCALE, 2 * sizeof(unsigned int), \
+        (min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0) \
+}
+
+#endif /* __SOUND_TLV_H */
diff --git a/sound/core/control.c b/sound/core/control.c
index bb397eaa71879..e9c8854d2f7b4 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -241,6 +241,7 @@ struct snd_kcontrol *snd_ctl_new1(const struct snd_kcontrol_new *ncontrol,
 	kctl.info = ncontrol->info;
 	kctl.get = ncontrol->get;
 	kctl.put = ncontrol->put;
+	kctl.tlv = ncontrol->tlv;
 	kctl.private_value = ncontrol->private_value;
 	kctl.private_data = private_data;
 	return snd_ctl_new(&kctl, access);
@@ -1067,6 +1068,40 @@ static int snd_ctl_subscribe_events(struct snd_ctl_file *file, int __user *ptr)
 	return 0;
 }
 
+static int snd_ctl_tlv_read(struct snd_card *card,
+                            struct snd_ctl_tlv __user *_tlv)
+{
+	struct snd_ctl_tlv tlv;
+	struct snd_kcontrol *kctl;
+	unsigned int len;
+	int err = 0;
+
+	if (copy_from_user(&tlv, _tlv, sizeof(tlv)))
+		return -EFAULT;
+        if (tlv.length < sizeof(unsigned int) * 3)
+                return -EINVAL;
+        down_read(&card->controls_rwsem);
+        kctl = snd_ctl_find_numid(card, tlv.numid);
+        if (kctl == NULL) {
+                err = -ENOENT;
+                goto __kctl_end;
+        }
+        if (kctl->tlv == NULL) {
+                err = -ENXIO;
+                goto __kctl_end;
+        }
+        len = kctl->tlv[1] + 2 * sizeof(unsigned int);
+        if (tlv.length < len) {
+                err = -ENOMEM;
+                goto __kctl_end;
+        }
+        if (copy_to_user(_tlv->tlv, kctl->tlv, len))
+        	err = -EFAULT;
+      __kctl_end:
+        up_read(&card->controls_rwsem);
+        return err;
+}
+
 static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct snd_ctl_file *ctl;
@@ -1086,11 +1121,11 @@ static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 	case SNDRV_CTL_IOCTL_CARD_INFO:
 		return snd_ctl_card_info(card, ctl, cmd, argp);
 	case SNDRV_CTL_IOCTL_ELEM_LIST:
-		return snd_ctl_elem_list(ctl->card, argp);
+		return snd_ctl_elem_list(card, argp);
 	case SNDRV_CTL_IOCTL_ELEM_INFO:
 		return snd_ctl_elem_info_user(ctl, argp);
 	case SNDRV_CTL_IOCTL_ELEM_READ:
-		return snd_ctl_elem_read_user(ctl->card, argp);
+		return snd_ctl_elem_read_user(card, argp);
 	case SNDRV_CTL_IOCTL_ELEM_WRITE:
 		return snd_ctl_elem_write_user(ctl, argp);
 	case SNDRV_CTL_IOCTL_ELEM_LOCK:
@@ -1105,6 +1140,8 @@ static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 		return snd_ctl_elem_remove(ctl, argp);
 	case SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS:
 		return snd_ctl_subscribe_events(ctl, ip);
+        case SNDRV_CTL_IOCTL_TLV_READ:
+                return snd_ctl_tlv_read(card, argp);
 	case SNDRV_CTL_IOCTL_POWER:
 		return -ENOPROTOOPT;
 	case SNDRV_CTL_IOCTL_POWER_STATE:
diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c
index 146eed70dce6e..35309b3ed8c0c 100644
--- a/sound/pci/ca0106/ca0106_mixer.c
+++ b/sound/pci/ca0106/ca0106_mixer.c
@@ -70,9 +70,12 @@
 #include <sound/pcm.h>
 #include <sound/ac97_codec.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 
 #include "ca0106.h"
 
+static DECLARE_TLV_DB_SCALE(snd_ca0106_db_scale, -5150, 75, 1);
+
 static int snd_ca0106_shared_spdif_info(struct snd_kcontrol *kcontrol,
 					struct snd_ctl_elem_info *uinfo)
 {
@@ -472,6 +475,7 @@ static int snd_ca0106_i2c_volume_put(struct snd_kcontrol *kcontrol,
 	.info =	 snd_ca0106_volume_info,			\
 	.get =   snd_ca0106_volume_get,				\
 	.put =   snd_ca0106_volume_put,				\
+	.tlv =	 snd_ca0106_db_scale,				\
 	.private_value = ((chid) << 8) | (reg)			\
 }
 
-- 
GitLab


From 746d4a02e68499fc6c1f8d0c43d2271853ade181 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 23 Jun 2006 14:37:59 +0200
Subject: [PATCH 0881/1063] [ALSA] Fix disconnection of proc interface

- Add the linked list to each proc entry to enable a single-shot
  disconnection (unregister)
- Deprecate snd_info_unregister(), use snd_info_free_entry()
- Removed NULL checks of snd_info_free_entry()

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/info.h                |   7 +-
 sound/core/hwdep.c                  |   2 +-
 sound/core/info.c                   | 102 ++++++++++++++--------------
 sound/core/info_oss.c               |   6 +-
 sound/core/init.c                   |   7 +-
 sound/core/oss/mixer_oss.c          |   6 +-
 sound/core/oss/pcm_oss.c            |   8 +--
 sound/core/pcm.c                    |  53 +++++----------
 sound/core/pcm_memory.c             |   2 +-
 sound/core/rawmidi.c                |   2 +-
 sound/core/seq/oss/seq_oss.c        |   3 +-
 sound/core/seq/seq_device.c         |   2 +-
 sound/core/seq/seq_info.c           |   6 +-
 sound/core/sound.c                  |   3 +-
 sound/core/sound_oss.c              |   3 +-
 sound/core/timer.c                  |   2 +-
 sound/drivers/opl4/opl4_proc.c      |   3 +-
 sound/pci/ac97/ac97_proc.c          |  18 ++---
 sound/pci/cs46xx/dsp_spos.c         |  52 ++++++--------
 sound/pci/cs46xx/dsp_spos_scb_lib.c |   2 +-
 sound/synth/emux/emux_proc.c        |   6 +-
 21 files changed, 123 insertions(+), 172 deletions(-)

diff --git a/include/sound/info.h b/include/sound/info.h
index 74f6996769c7d..97ffc4fb99697 100644
--- a/include/sound/info.h
+++ b/include/sound/info.h
@@ -71,7 +71,6 @@ struct snd_info_entry {
 	mode_t mode;
 	long size;
 	unsigned short content;
-	unsigned short disconnected: 1;
 	union {
 		struct snd_info_entry_text text;
 		struct snd_info_entry_ops *ops;
@@ -83,6 +82,8 @@ struct snd_info_entry {
 	void (*private_free)(struct snd_info_entry *entry);
 	struct proc_dir_entry *p;
 	struct mutex access;
+	struct list_head children;
+	struct list_head list;
 };
 
 #if defined(CONFIG_SND_OSSEMUL) && defined(CONFIG_PROC_FS)
@@ -122,8 +123,8 @@ int snd_info_restore_text(struct snd_info_entry * entry);
 int snd_info_card_create(struct snd_card * card);
 int snd_info_card_register(struct snd_card * card);
 int snd_info_card_free(struct snd_card * card);
+void snd_info_card_disconnect(struct snd_card * card);
 int snd_info_register(struct snd_info_entry * entry);
-int snd_info_unregister(struct snd_info_entry * entry);
 
 /* for card drivers */
 int snd_card_proc_new(struct snd_card *card, const char *name, struct snd_info_entry **entryp);
@@ -156,8 +157,8 @@ static inline void snd_info_free_entry(struct snd_info_entry * entry) { ; }
 static inline int snd_info_card_create(struct snd_card * card) { return 0; }
 static inline int snd_info_card_register(struct snd_card * card) { return 0; }
 static inline int snd_info_card_free(struct snd_card * card) { return 0; }
+static inline void snd_info_card_disconnect(struct snd_card * card) { }
 static inline int snd_info_register(struct snd_info_entry * entry) { return 0; }
-static inline int snd_info_unregister(struct snd_info_entry * entry) { return 0; }
 
 static inline int snd_card_proc_new(struct snd_card *card, const char *name,
 				    struct snd_info_entry **entryp) { return -EINVAL; }
diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c
index 8bd0dcc93eba9..cbd8a63282b69 100644
--- a/sound/core/hwdep.c
+++ b/sound/core/hwdep.c
@@ -497,7 +497,7 @@ static void __init snd_hwdep_proc_init(void)
 
 static void __exit snd_hwdep_proc_done(void)
 {
-	snd_info_unregister(snd_hwdep_proc_entry);
+	snd_info_free_entry(snd_hwdep_proc_entry);
 }
 #else /* !CONFIG_PROC_FS */
 #define snd_hwdep_proc_init()
diff --git a/sound/core/info.c b/sound/core/info.c
index 340332c6d9733..9663b6be9c3a0 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -78,6 +78,7 @@ struct snd_info_private_data {
 
 static int snd_info_version_init(void);
 static int snd_info_version_done(void);
+static void snd_info_disconnect(struct snd_info_entry *entry);
 
 
 /* resize the proc r/w buffer */
@@ -304,7 +305,7 @@ static int snd_info_entry_open(struct inode *inode, struct file *file)
 	mutex_lock(&info_mutex);
 	p = PDE(inode);
 	entry = p == NULL ? NULL : (struct snd_info_entry *)p->data;
-	if (entry == NULL || entry->disconnected) {
+	if (entry == NULL || ! entry->p) {
 		mutex_unlock(&info_mutex);
 		return -ENODEV;
 	}
@@ -586,10 +587,10 @@ int __exit snd_info_done(void)
 	snd_info_version_done();
 	if (snd_proc_root) {
 #if defined(CONFIG_SND_SEQUENCER) || defined(CONFIG_SND_SEQUENCER_MODULE)
-		snd_info_unregister(snd_seq_root);
+		snd_info_free_entry(snd_seq_root);
 #endif
 #ifdef CONFIG_SND_OSSEMUL
-		snd_info_unregister(snd_oss_root);
+		snd_info_free_entry(snd_oss_root);
 #endif
 		snd_remove_proc_entry(&proc_root, snd_proc_root);
 	}
@@ -648,17 +649,28 @@ int snd_info_card_register(struct snd_card *card)
  * de-register the card proc file
  * called from init.c
  */
-int snd_info_card_free(struct snd_card *card)
+void snd_info_card_disconnect(struct snd_card *card)
 {
-	snd_assert(card != NULL, return -ENXIO);
+	snd_assert(card != NULL, return);
+	mutex_lock(&info_mutex);
 	if (card->proc_root_link) {
 		snd_remove_proc_entry(snd_proc_root, card->proc_root_link);
 		card->proc_root_link = NULL;
 	}
-	if (card->proc_root) {
-		snd_info_unregister(card->proc_root);
-		card->proc_root = NULL;
-	}
+	if (card->proc_root)
+		snd_info_disconnect(card->proc_root);
+	mutex_unlock(&info_mutex);
+}
+
+/*
+ * release the card proc file resources
+ * called from init.c
+ */
+int snd_info_card_free(struct snd_card *card)
+{
+	snd_assert(card != NULL, return -ENXIO);
+	snd_info_free_entry(card->proc_root);
+	card->proc_root = NULL;
 	return 0;
 }
 
@@ -767,6 +779,8 @@ static struct snd_info_entry *snd_info_create_entry(const char *name)
 	entry->mode = S_IFREG | S_IRUGO;
 	entry->content = SNDRV_INFO_CONTENT_TEXT;
 	mutex_init(&entry->access);
+	INIT_LIST_HEAD(&entry->children);
+	INIT_LIST_HEAD(&entry->list);
 	return entry;
 }
 
@@ -819,30 +833,35 @@ struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card,
 
 EXPORT_SYMBOL(snd_info_create_card_entry);
 
-static int snd_info_dev_free_entry(struct snd_device *device)
+static void snd_info_disconnect(struct snd_info_entry *entry)
 {
-	struct snd_info_entry *entry = device->device_data;
-	snd_info_free_entry(entry);
-	return 0;
-}
+	struct list_head *p, *n;
+	struct proc_dir_entry *root;
 
-static int snd_info_dev_register_entry(struct snd_device *device)
-{
-	struct snd_info_entry *entry = device->device_data;
-	return snd_info_register(entry);
+	list_for_each_safe(p, n, &entry->children) {
+		snd_info_disconnect(list_entry(p, struct snd_info_entry, list));
+	}
+
+	if (! entry->p)
+		return;
+	list_del_init(&entry->list);
+	root = entry->parent == NULL ? snd_proc_root : entry->parent->p;
+	snd_assert(root, return);
+	snd_remove_proc_entry(root, entry->p);
+	entry->p = NULL;
 }
 
-static int snd_info_dev_disconnect_entry(struct snd_device *device)
+static int snd_info_dev_free_entry(struct snd_device *device)
 {
 	struct snd_info_entry *entry = device->device_data;
-	entry->disconnected = 1;
+	snd_info_free_entry(entry);
 	return 0;
 }
 
-static int snd_info_dev_unregister_entry(struct snd_device *device)
+static int snd_info_dev_register_entry(struct snd_device *device)
 {
 	struct snd_info_entry *entry = device->device_data;
-	return snd_info_unregister(entry);
+	return snd_info_register(entry);
 }
 
 /**
@@ -871,8 +890,7 @@ int snd_card_proc_new(struct snd_card *card, const char *name,
 	static struct snd_device_ops ops = {
 		.dev_free = snd_info_dev_free_entry,
 		.dev_register =	snd_info_dev_register_entry,
-		.dev_disconnect = snd_info_dev_disconnect_entry,
-		.dev_unregister = snd_info_dev_unregister_entry
+		/* disconnect is done via snd_info_card_disconnect() */
 	};
 	struct snd_info_entry *entry;
 	int err;
@@ -901,6 +919,11 @@ void snd_info_free_entry(struct snd_info_entry * entry)
 {
 	if (entry == NULL)
 		return;
+	if (entry->p) {
+		mutex_lock(&info_mutex);
+		snd_info_disconnect(entry);
+		mutex_unlock(&info_mutex);
+	}
 	kfree(entry->name);
 	if (entry->private_free)
 		entry->private_free(entry);
@@ -935,38 +958,14 @@ int snd_info_register(struct snd_info_entry * entry)
 	p->size = entry->size;
 	p->data = entry;
 	entry->p = p;
+	if (entry->parent)
+		list_add_tail(&entry->list, &entry->parent->children);
 	mutex_unlock(&info_mutex);
 	return 0;
 }
 
 EXPORT_SYMBOL(snd_info_register);
 
-/**
- * snd_info_unregister - de-register the info entry
- * @entry: the info entry
- *
- * De-registers the info entry and releases the instance.
- *
- * Returns zero if successful, or a negative error code on failure.
- */
-int snd_info_unregister(struct snd_info_entry * entry)
-{
-	struct proc_dir_entry *root;
-
-	if (! entry)
-		return 0;
-	snd_assert(entry->p != NULL, return -ENXIO);
-	root = entry->parent == NULL ? snd_proc_root : entry->parent->p;
-	snd_assert(root, return -ENXIO);
-	mutex_lock(&info_mutex);
-	snd_remove_proc_entry(root, entry->p);
-	mutex_unlock(&info_mutex);
-	snd_info_free_entry(entry);
-	return 0;
-}
-
-EXPORT_SYMBOL(snd_info_unregister);
-
 /*
 
  */
@@ -999,8 +998,7 @@ static int __init snd_info_version_init(void)
 
 static int __exit snd_info_version_done(void)
 {
-	if (snd_info_version_entry)
-		snd_info_unregister(snd_info_version_entry);
+	snd_info_free_entry(snd_info_version_entry);
 	return 0;
 }
 
diff --git a/sound/core/info_oss.c b/sound/core/info_oss.c
index bb2c40d0ab66c..3ebc34919c76f 100644
--- a/sound/core/info_oss.c
+++ b/sound/core/info_oss.c
@@ -131,10 +131,8 @@ int snd_info_minor_register(void)
 
 int snd_info_minor_unregister(void)
 {
-	if (snd_sndstat_proc_entry) {
-		snd_info_unregister(snd_sndstat_proc_entry);
-		snd_sndstat_proc_entry = NULL;
-	}
+	snd_info_free_entry(snd_sndstat_proc_entry);
+	snd_sndstat_proc_entry = NULL;
 	return 0;
 }
 
diff --git a/sound/core/init.c b/sound/core/init.c
index 4d9258884e444..1ecb029ff4c91 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -310,6 +310,7 @@ int snd_card_disconnect(struct snd_card *card)
 	if (err < 0)
 		snd_printk(KERN_ERR "not all devices for card %i can be disconnected\n", card->number);
 
+	snd_info_card_disconnect(card);
 	return 0;	
 }
 
@@ -360,7 +361,7 @@ int snd_card_free(struct snd_card *card)
 	}
 	if (card->private_free)
 		card->private_free(card);
-	snd_info_unregister(card->proc_id);
+	snd_info_free_entry(card->proc_id);
 	if (snd_info_card_free(card) < 0) {
 		snd_printk(KERN_WARNING "unable to free card info\n");
 		/* Not fatal error */
@@ -625,9 +626,9 @@ int __init snd_card_info_init(void)
 
 int __exit snd_card_info_done(void)
 {
-	snd_info_unregister(snd_card_info_entry);
+	snd_info_free_entry(snd_card_info_entry);
 #ifdef MODULE
-	snd_info_unregister(snd_card_module_info_entry);
+	snd_info_free_entry(snd_card_module_info_entry);
 #endif
 	return 0;
 }
diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c
index 75a9505c74455..00c95def95aa5 100644
--- a/sound/core/oss/mixer_oss.c
+++ b/sound/core/oss/mixer_oss.c
@@ -1193,10 +1193,8 @@ static void snd_mixer_oss_proc_init(struct snd_mixer_oss *mixer)
 
 static void snd_mixer_oss_proc_done(struct snd_mixer_oss *mixer)
 {
-	if (mixer->proc_entry) {
-		snd_info_unregister(mixer->proc_entry);
-		mixer->proc_entry = NULL;
-	}
+	snd_info_free_entry(mixer->proc_entry);
+	mixer->proc_entry = NULL;
 }
 #else /* !CONFIG_PROC_FS */
 #define snd_mixer_oss_proc_init(mix)
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 472fce0ee0e81..a92b93e5ebd58 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -2846,11 +2846,9 @@ static void snd_pcm_oss_proc_done(struct snd_pcm *pcm)
 	int stream;
 	for (stream = 0; stream < 2; ++stream) {
 		struct snd_pcm_str *pstr = &pcm->streams[stream];
-		if (pstr->oss.proc_entry) {
-			snd_info_unregister(pstr->oss.proc_entry);
-			pstr->oss.proc_entry = NULL;
-			snd_pcm_oss_proc_free_setup_list(pstr);
-		}
+		snd_info_free_entry(pstr->oss.proc_entry);
+		pstr->oss.proc_entry = NULL;
+		snd_pcm_oss_proc_free_setup_list(pstr);
 	}
 }
 #else /* !CONFIG_SND_VERBOSE_PROCFS */
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 7581edd7b9ffb..b8602471f7e52 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -494,19 +494,13 @@ static int snd_pcm_stream_proc_init(struct snd_pcm_str *pstr)
 static int snd_pcm_stream_proc_done(struct snd_pcm_str *pstr)
 {
 #ifdef CONFIG_SND_PCM_XRUN_DEBUG
-	if (pstr->proc_xrun_debug_entry) {
-		snd_info_unregister(pstr->proc_xrun_debug_entry);
-		pstr->proc_xrun_debug_entry = NULL;
-	}
+	snd_info_free_entry(pstr->proc_xrun_debug_entry);
+	pstr->proc_xrun_debug_entry = NULL;
 #endif
-	if (pstr->proc_info_entry) {
-		snd_info_unregister(pstr->proc_info_entry);
-		pstr->proc_info_entry = NULL;
-	}
-	if (pstr->proc_root) {
-		snd_info_unregister(pstr->proc_root);
-		pstr->proc_root = NULL;
-	}
+	snd_info_free_entry(pstr->proc_info_entry);
+	pstr->proc_info_entry = NULL;
+	snd_info_free_entry(pstr->proc_root);
+	pstr->proc_root = NULL;
 	return 0;
 }
 
@@ -570,29 +564,19 @@ static int snd_pcm_substream_proc_init(struct snd_pcm_substream *substream)
 
 	return 0;
 }
-		
+
 static int snd_pcm_substream_proc_done(struct snd_pcm_substream *substream)
 {
-	if (substream->proc_info_entry) {
-		snd_info_unregister(substream->proc_info_entry);
-		substream->proc_info_entry = NULL;
-	}
-	if (substream->proc_hw_params_entry) {
-		snd_info_unregister(substream->proc_hw_params_entry);
-		substream->proc_hw_params_entry = NULL;
-	}
-	if (substream->proc_sw_params_entry) {
-		snd_info_unregister(substream->proc_sw_params_entry);
-		substream->proc_sw_params_entry = NULL;
-	}
-	if (substream->proc_status_entry) {
-		snd_info_unregister(substream->proc_status_entry);
-		substream->proc_status_entry = NULL;
-	}
-	if (substream->proc_root) {
-		snd_info_unregister(substream->proc_root);
-		substream->proc_root = NULL;
-	}
+	snd_info_free_entry(substream->proc_info_entry);
+	substream->proc_info_entry = NULL;
+	snd_info_free_entry(substream->proc_hw_params_entry);
+	substream->proc_hw_params_entry = NULL;
+	snd_info_free_entry(substream->proc_sw_params_entry);
+	substream->proc_sw_params_entry = NULL;
+	snd_info_free_entry(substream->proc_status_entry);
+	substream->proc_status_entry = NULL;
+	snd_info_free_entry(substream->proc_root);
+	substream->proc_root = NULL;
 	return 0;
 }
 #else /* !CONFIG_SND_VERBOSE_PROCFS */
@@ -1090,8 +1074,7 @@ static void snd_pcm_proc_init(void)
 
 static void snd_pcm_proc_done(void)
 {
-	if (snd_pcm_proc_entry)
-		snd_info_unregister(snd_pcm_proc_entry);
+	snd_info_free_entry(snd_pcm_proc_entry);
 }
 
 #else /* !CONFIG_PROC_FS */
diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c
index 067d2056db9a6..be030cb4d3732 100644
--- a/sound/core/pcm_memory.c
+++ b/sound/core/pcm_memory.c
@@ -101,7 +101,7 @@ int snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream)
 {
 	snd_pcm_lib_preallocate_dma_free(substream);
 #ifdef CONFIG_SND_VERBOSE_PROCFS
-	snd_info_unregister(substream->proc_prealloc_entry);
+	snd_info_free_entry(substream->proc_prealloc_entry);
 	substream->proc_prealloc_entry = NULL;
 #endif
 	return 0;
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 8c15c66eb4aa3..51577c22f8ceb 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -1599,7 +1599,7 @@ static int snd_rawmidi_dev_unregister(struct snd_device *device)
 	mutex_lock(&register_mutex);
 	list_del(&rmidi->list);
 	if (rmidi->proc_entry) {
-		snd_info_unregister(rmidi->proc_entry);
+		snd_info_free_entry(rmidi->proc_entry);
 		rmidi->proc_entry = NULL;
 	}
 #ifdef CONFIG_SND_OSSEMUL
diff --git a/sound/core/seq/oss/seq_oss.c b/sound/core/seq/oss/seq_oss.c
index e7234135641cd..92858cf8b6eb0 100644
--- a/sound/core/seq/oss/seq_oss.c
+++ b/sound/core/seq/oss/seq_oss.c
@@ -303,8 +303,7 @@ register_proc(void)
 static void
 unregister_proc(void)
 {
-	if (info_entry)
-		snd_info_unregister(info_entry);
+	snd_info_free_entry(info_entry);
 	info_entry = NULL;
 }
 #endif /* CONFIG_PROC_FS */
diff --git a/sound/core/seq/seq_device.c b/sound/core/seq/seq_device.c
index 102ff548ce693..b85954e956d47 100644
--- a/sound/core/seq/seq_device.c
+++ b/sound/core/seq/seq_device.c
@@ -573,7 +573,7 @@ static void __exit alsa_seq_device_exit(void)
 {
 	remove_drivers();
 #ifdef CONFIG_PROC_FS
-	snd_info_unregister(info_entry);
+	snd_info_free_entry(info_entry);
 #endif
 	if (num_ops)
 		snd_printk(KERN_ERR "drivers not released (%d)\n", num_ops);
diff --git a/sound/core/seq/seq_info.c b/sound/core/seq/seq_info.c
index 142e9e6882c9a..8a7fe5cca1c90 100644
--- a/sound/core/seq/seq_info.c
+++ b/sound/core/seq/seq_info.c
@@ -64,9 +64,9 @@ int __init snd_seq_info_init(void)
 
 int __exit snd_seq_info_done(void)
 {
-	snd_info_unregister(queues_entry);
-	snd_info_unregister(clients_entry);
-	snd_info_unregister(timer_entry);
+	snd_info_free_entry(queues_entry);
+	snd_info_free_entry(clients_entry);
+	snd_info_free_entry(timer_entry);
 	return 0;
 }
 #endif
diff --git a/sound/core/sound.c b/sound/core/sound.c
index 7edd1fc58b174..b4430db3fa4c6 100644
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -387,8 +387,7 @@ int __init snd_minor_info_init(void)
 
 int __exit snd_minor_info_done(void)
 {
-	if (snd_minor_info_entry)
-		snd_info_unregister(snd_minor_info_entry);
+	snd_info_free_entry(snd_minor_info_entry);
 	return 0;
 }
 #endif /* CONFIG_PROC_FS */
diff --git a/sound/core/sound_oss.c b/sound/core/sound_oss.c
index 74f0fe5a1ba08..b2fc40aa520b9 100644
--- a/sound/core/sound_oss.c
+++ b/sound/core/sound_oss.c
@@ -270,8 +270,7 @@ int __init snd_minor_info_oss_init(void)
 
 int __exit snd_minor_info_oss_done(void)
 {
-	if (snd_minor_info_oss_entry)
-		snd_info_unregister(snd_minor_info_oss_entry);
+	snd_info_free_entry(snd_minor_info_oss_entry);
 	return 0;
 }
 #endif /* CONFIG_PROC_FS */
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 0a984e881c108..52ecbe1e9abb1 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1126,7 +1126,7 @@ static void __init snd_timer_proc_init(void)
 
 static void __exit snd_timer_proc_done(void)
 {
-	snd_info_unregister(snd_timer_proc_entry);
+	snd_info_free_entry(snd_timer_proc_entry);
 }
 #else /* !CONFIG_PROC_FS */
 #define snd_timer_proc_init()
diff --git a/sound/drivers/opl4/opl4_proc.c b/sound/drivers/opl4/opl4_proc.c
index e552ec34166fe..11dd811771a4b 100644
--- a/sound/drivers/opl4/opl4_proc.c
+++ b/sound/drivers/opl4/opl4_proc.c
@@ -159,8 +159,7 @@ int snd_opl4_create_proc(struct snd_opl4 *opl4)
 
 void snd_opl4_free_proc(struct snd_opl4 *opl4)
 {
-	if (opl4->proc_entry)
-		snd_info_unregister(opl4->proc_entry);
+	snd_info_free_entry(opl4->proc_entry);
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/sound/pci/ac97/ac97_proc.c b/sound/pci/ac97/ac97_proc.c
index 2118df50b9d61..a3fdd7da911cd 100644
--- a/sound/pci/ac97/ac97_proc.c
+++ b/sound/pci/ac97/ac97_proc.c
@@ -457,14 +457,10 @@ void snd_ac97_proc_init(struct snd_ac97 * ac97)
 
 void snd_ac97_proc_done(struct snd_ac97 * ac97)
 {
-	if (ac97->proc_regs) {
-		snd_info_unregister(ac97->proc_regs);
-		ac97->proc_regs = NULL;
-	}
-	if (ac97->proc) {
-		snd_info_unregister(ac97->proc);
-		ac97->proc = NULL;
-	}
+	snd_info_free_entry(ac97->proc_regs);
+	ac97->proc_regs = NULL;
+	snd_info_free_entry(ac97->proc);
+	ac97->proc = NULL;
 }
 
 void snd_ac97_bus_proc_init(struct snd_ac97_bus * bus)
@@ -485,8 +481,6 @@ void snd_ac97_bus_proc_init(struct snd_ac97_bus * bus)
 
 void snd_ac97_bus_proc_done(struct snd_ac97_bus * bus)
 {
-	if (bus->proc) {
-		snd_info_unregister(bus->proc);
-		bus->proc = NULL;
-	}
+	snd_info_free_entry(bus->proc);
+	bus->proc = NULL;
 }
diff --git a/sound/pci/cs46xx/dsp_spos.c b/sound/pci/cs46xx/dsp_spos.c
index 5c9711c0265c8..89c402770a1d1 100644
--- a/sound/pci/cs46xx/dsp_spos.c
+++ b/sound/pci/cs46xx/dsp_spos.c
@@ -868,35 +868,23 @@ int cs46xx_dsp_proc_done (struct snd_cs46xx *chip)
 	struct dsp_spos_instance * ins = chip->dsp_spos_instance;
 	int i;
 
-	if (ins->proc_sym_info_entry) {
-		snd_info_unregister(ins->proc_sym_info_entry);
-		ins->proc_sym_info_entry = NULL;
-	}
-  
-	if (ins->proc_modules_info_entry) {
-		snd_info_unregister(ins->proc_modules_info_entry);
-		ins->proc_modules_info_entry = NULL;
-	}
- 
-	if (ins->proc_parameter_dump_info_entry) {
-		snd_info_unregister(ins->proc_parameter_dump_info_entry);
-		ins->proc_parameter_dump_info_entry = NULL;
-	}
-  
-	if (ins->proc_sample_dump_info_entry) {
-		snd_info_unregister(ins->proc_sample_dump_info_entry);
-		ins->proc_sample_dump_info_entry = NULL;
-	}
-  
-	if (ins->proc_scb_info_entry) {
-		snd_info_unregister(ins->proc_scb_info_entry);
-		ins->proc_scb_info_entry = NULL;
-	}
-  
-	if (ins->proc_task_info_entry) {
-		snd_info_unregister(ins->proc_task_info_entry);
-		ins->proc_task_info_entry = NULL;
-	}
+	snd_info_free_entry(ins->proc_sym_info_entry);
+	ins->proc_sym_info_entry = NULL;
+
+	snd_info_free_entry(ins->proc_modules_info_entry);
+	ins->proc_modules_info_entry = NULL;
+
+	snd_info_free_entry(ins->proc_parameter_dump_info_entry);
+	ins->proc_parameter_dump_info_entry = NULL;
+
+	snd_info_free_entry(ins->proc_sample_dump_info_entry);
+	ins->proc_sample_dump_info_entry = NULL;
+
+	snd_info_free_entry(ins->proc_scb_info_entry);
+	ins->proc_scb_info_entry = NULL;
+
+	snd_info_free_entry(ins->proc_task_info_entry);
+	ins->proc_task_info_entry = NULL;
 
 	mutex_lock(&chip->spos_mutex);
 	for (i = 0; i < ins->nscb; ++i) {
@@ -905,10 +893,8 @@ int cs46xx_dsp_proc_done (struct snd_cs46xx *chip)
 	}
 	mutex_unlock(&chip->spos_mutex);
 
-	if (ins->proc_dsp_dir) {
-		snd_info_unregister (ins->proc_dsp_dir);
-		ins->proc_dsp_dir = NULL;
-	}
+	snd_info_free_entry(ins->proc_dsp_dir);
+	ins->proc_dsp_dir = NULL;
 
 	return 0;
 }
diff --git a/sound/pci/cs46xx/dsp_spos_scb_lib.c b/sound/pci/cs46xx/dsp_spos_scb_lib.c
index 232b337852fff..343f51d5311b8 100644
--- a/sound/pci/cs46xx/dsp_spos_scb_lib.c
+++ b/sound/pci/cs46xx/dsp_spos_scb_lib.c
@@ -233,7 +233,7 @@ void cs46xx_dsp_proc_free_scb_desc (struct dsp_scb_descriptor * scb)
 
 		snd_printdd("cs46xx_dsp_proc_free_scb_desc: freeing %s\n",scb->scb_name);
 
-		snd_info_unregister(scb->proc_info);
+		snd_info_free_entry(scb->proc_info);
 		scb->proc_info = NULL;
 
 		snd_assert (scb_info != NULL, return);
diff --git a/sound/synth/emux/emux_proc.c b/sound/synth/emux/emux_proc.c
index 58b9601f3ad0c..59144ec026e4c 100644
--- a/sound/synth/emux/emux_proc.c
+++ b/sound/synth/emux/emux_proc.c
@@ -128,10 +128,8 @@ void snd_emux_proc_init(struct snd_emux *emu, struct snd_card *card, int device)
 
 void snd_emux_proc_free(struct snd_emux *emu)
 {
-	if (emu->proc) {
-		snd_info_unregister(emu->proc);
-		emu->proc = NULL;
-	}
+	snd_info_free_entry(emu->proc);
+	emu->proc = NULL;
 }
 
 #endif /* CONFIG_PROC_FS */
-- 
GitLab


From c461482c8072bb073e6146db320d3da85cdc89ad Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 23 Jun 2006 14:38:23 +0200
Subject: [PATCH 0882/1063] [ALSA] Unregister device files at disconnection

Orignally proposed by Sam Revitch <sam.revitch@gmail.com>.
Unregister device files at disconnection to avoid the futher accesses.
Also, the dev_unregister callback is removed and replaced with the
combination of disconnect + free.
A new function snd_card_free_when_closed() is introduced, which is
used in USB disconnect callback.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/core.h        |  3 +-
 include/sound/timer.h       |  1 -
 sound/core/control.c        | 27 +++++----------
 sound/core/device.c         | 20 +++++------
 sound/core/hwdep.c          | 10 +++---
 sound/core/init.c           | 69 +++++++++++++++++++++++++++++--------
 sound/core/oss/mixer_oss.c  | 16 ++++-----
 sound/core/oss/pcm_oss.c    | 16 ++++-----
 sound/core/pcm.c            | 48 ++++++++++----------------
 sound/core/rawmidi.c        | 35 +++++--------------
 sound/core/rtctimer.c       |  2 +-
 sound/core/seq/seq_device.c | 11 ------
 sound/core/timer.c          | 52 ++++++++++++----------------
 sound/pci/ac97/ac97_codec.c |  8 ++---
 sound/usb/usbaudio.c        |  2 +-
 15 files changed, 148 insertions(+), 172 deletions(-)

diff --git a/include/sound/core.h b/include/sound/core.h
index bab3ff457e40c..cf4001cf62484 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -71,7 +71,6 @@ struct snd_device_ops {
 	int (*dev_free)(struct snd_device *dev);
 	int (*dev_register)(struct snd_device *dev);
 	int (*dev_disconnect)(struct snd_device *dev);
-	int (*dev_unregister)(struct snd_device *dev);
 };
 
 struct snd_device {
@@ -131,6 +130,7 @@ struct snd_card {
 								state */
 	spinlock_t files_lock;		/* lock the files for this card */
 	int shutdown;			/* this card is going down */
+	int free_on_last_close;		/* free in context of file_release */
 	wait_queue_head_t shutdown_sleep;
 	struct work_struct free_workq;	/* for free in workqueue */
 	struct device *dev;
@@ -244,6 +244,7 @@ struct snd_card *snd_card_new(int idx, const char *id,
 			 struct module *module, int extra_size);
 int snd_card_disconnect(struct snd_card *card);
 int snd_card_free(struct snd_card *card);
+int snd_card_free_when_closed(struct snd_card *card);
 int snd_card_free_in_thread(struct snd_card *card);
 int snd_card_register(struct snd_card *card);
 int snd_card_info_init(void);
diff --git a/include/sound/timer.h b/include/sound/timer.h
index 5ece2bf541dce..d42c083db1dac 100644
--- a/include/sound/timer.h
+++ b/include/sound/timer.h
@@ -129,7 +129,6 @@ void snd_timer_notify(struct snd_timer *timer, int event, struct timespec *tstam
 int snd_timer_global_new(char *id, int device, struct snd_timer **rtimer);
 int snd_timer_global_free(struct snd_timer *timer);
 int snd_timer_global_register(struct snd_timer *timer);
-int snd_timer_global_unregister(struct snd_timer *timer);
 
 int snd_timer_open(struct snd_timer_instance **ti, char *owner, struct snd_timer_id *tid, unsigned int slave_id);
 int snd_timer_close(struct snd_timer_instance *timeri);
diff --git a/sound/core/control.c b/sound/core/control.c
index e9c8854d2f7b4..f0c7272a2d48d 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -1375,6 +1375,11 @@ static int snd_ctl_dev_disconnect(struct snd_device *device)
 	struct snd_card *card = device->device_data;
 	struct list_head *flist;
 	struct snd_ctl_file *ctl;
+	int err, cardnum;
+
+	snd_assert(card != NULL, return -ENXIO);
+	cardnum = card->number;
+	snd_assert(cardnum >= 0 && cardnum < SNDRV_CARDS, return -ENXIO);
 
 	down_read(&card->controls_rwsem);
 	list_for_each(flist, &card->ctl_files) {
@@ -1383,6 +1388,10 @@ static int snd_ctl_dev_disconnect(struct snd_device *device)
 		kill_fasync(&ctl->fasync, SIGIO, POLL_ERR);
 	}
 	up_read(&card->controls_rwsem);
+
+	if ((err = snd_unregister_device(SNDRV_DEVICE_TYPE_CONTROL,
+					 card, -1)) < 0)
+		return err;
 	return 0;
 }
 
@@ -1403,23 +1412,6 @@ static int snd_ctl_dev_free(struct snd_device *device)
 	return 0;
 }
 
-/*
- * de-registration of the control device
- */
-static int snd_ctl_dev_unregister(struct snd_device *device)
-{
-	struct snd_card *card = device->device_data;
-	int err, cardnum;
-
-	snd_assert(card != NULL, return -ENXIO);
-	cardnum = card->number;
-	snd_assert(cardnum >= 0 && cardnum < SNDRV_CARDS, return -ENXIO);
-	if ((err = snd_unregister_device(SNDRV_DEVICE_TYPE_CONTROL,
-					 card, -1)) < 0)
-		return err;
-	return snd_ctl_dev_free(device);
-}
-
 /*
  * create control core:
  * called from init.c
@@ -1430,7 +1422,6 @@ int snd_ctl_create(struct snd_card *card)
 		.dev_free = snd_ctl_dev_free,
 		.dev_register =	snd_ctl_dev_register,
 		.dev_disconnect = snd_ctl_dev_disconnect,
-		.dev_unregister = snd_ctl_dev_unregister
 	};
 
 	snd_assert(card != NULL, return -ENXIO);
diff --git a/sound/core/device.c b/sound/core/device.c
index 6ce4da4a10819..ccb25816ac9ec 100644
--- a/sound/core/device.c
+++ b/sound/core/device.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL(snd_device_new);
  * @device_data: the data pointer to release
  *
  * Removes the device from the list on the card and invokes the
- * callback, dev_unregister or dev_free, corresponding to the state.
+ * callbacks, dev_disconnect and dev_free, corresponding to the state.
  * Then release the device.
  *
  * Returns zero if successful, or a negative error code on failure or if the
@@ -90,16 +90,14 @@ int snd_device_free(struct snd_card *card, void *device_data)
 			continue;
 		/* unlink */
 		list_del(&dev->list);
-		if ((dev->state == SNDRV_DEV_REGISTERED ||
-		     dev->state == SNDRV_DEV_DISCONNECTED) &&
-		    dev->ops->dev_unregister) {
-			if (dev->ops->dev_unregister(dev))
-				snd_printk(KERN_ERR "device unregister failure\n");
-		} else {
-			if (dev->ops->dev_free) {
-				if (dev->ops->dev_free(dev))
-					snd_printk(KERN_ERR "device free failure\n");
-			}
+		if (dev->state == SNDRV_DEV_REGISTERED &&
+		    dev->ops->dev_disconnect)
+			if (dev->ops->dev_disconnect(dev))
+				snd_printk(KERN_ERR
+					   "device disconnect failure\n");
+		if (dev->ops->dev_free) {
+			if (dev->ops->dev_free(dev))
+				snd_printk(KERN_ERR "device free failure\n");
 		}
 		kfree(dev);
 		return 0;
diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c
index cbd8a63282b69..9aa9d94891f0a 100644
--- a/sound/core/hwdep.c
+++ b/sound/core/hwdep.c
@@ -42,7 +42,7 @@ static DEFINE_MUTEX(register_mutex);
 static int snd_hwdep_free(struct snd_hwdep *hwdep);
 static int snd_hwdep_dev_free(struct snd_device *device);
 static int snd_hwdep_dev_register(struct snd_device *device);
-static int snd_hwdep_dev_unregister(struct snd_device *device);
+static int snd_hwdep_dev_disconnect(struct snd_device *device);
 
 
 static struct snd_hwdep *snd_hwdep_search(struct snd_card *card, int device)
@@ -353,7 +353,7 @@ int snd_hwdep_new(struct snd_card *card, char *id, int device,
 	static struct snd_device_ops ops = {
 		.dev_free = snd_hwdep_dev_free,
 		.dev_register = snd_hwdep_dev_register,
-		.dev_unregister = snd_hwdep_dev_unregister
+		.dev_disconnect = snd_hwdep_dev_disconnect,
 	};
 
 	snd_assert(rhwdep != NULL, return -EINVAL);
@@ -439,7 +439,7 @@ static int snd_hwdep_dev_register(struct snd_device *device)
 	return 0;
 }
 
-static int snd_hwdep_dev_unregister(struct snd_device *device)
+static int snd_hwdep_dev_disconnect(struct snd_device *device)
 {
 	struct snd_hwdep *hwdep = device->device_data;
 
@@ -454,9 +454,9 @@ static int snd_hwdep_dev_unregister(struct snd_device *device)
 		snd_unregister_oss_device(hwdep->oss_type, hwdep->card, hwdep->device);
 #endif
 	snd_unregister_device(SNDRV_DEVICE_TYPE_HWDEP, hwdep->card, hwdep->device);
-	list_del(&hwdep->list);
+	list_del_init(&hwdep->list);
 	mutex_unlock(&register_mutex);
-	return snd_hwdep_free(hwdep);
+	return 0;
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/sound/core/init.c b/sound/core/init.c
index 1ecb029ff4c91..5850d99d21e32 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -327,22 +327,10 @@ EXPORT_SYMBOL(snd_card_disconnect);
  *  Returns zero. Frees all associated devices and frees the control
  *  interface associated to given soundcard.
  */
-int snd_card_free(struct snd_card *card)
+static int snd_card_do_free(struct snd_card *card)
 {
 	struct snd_shutdown_f_ops *s_f_ops;
 
-	if (card == NULL)
-		return -EINVAL;
-	mutex_lock(&snd_card_mutex);
-	snd_cards[card->number] = NULL;
-	mutex_unlock(&snd_card_mutex);
-
-#ifdef CONFIG_PM
-	wake_up(&card->power_sleep);
-#endif
-	/* wait, until all devices are ready for the free operation */
-	wait_event(card->shutdown_sleep, card->files == NULL);
-
 #if defined(CONFIG_SND_MIXER_OSS) || defined(CONFIG_SND_MIXER_OSS_MODULE)
 	if (snd_mixer_oss_notify_callback)
 		snd_mixer_oss_notify_callback(card, SND_MIXER_OSS_NOTIFY_FREE);
@@ -371,10 +359,55 @@ int snd_card_free(struct snd_card *card)
 		card->s_f_ops = s_f_ops->next;
 		kfree(s_f_ops);
 	}
+	kfree(card);
+	return 0;
+}
+
+static int snd_card_free_prepare(struct snd_card *card)
+{
+	if (card == NULL)
+		return -EINVAL;
+	(void) snd_card_disconnect(card);
 	mutex_lock(&snd_card_mutex);
+	snd_cards[card->number] = NULL;
 	snd_cards_lock &= ~(1 << card->number);
 	mutex_unlock(&snd_card_mutex);
-	kfree(card);
+#ifdef CONFIG_PM
+	wake_up(&card->power_sleep);
+#endif
+	return 0;
+}
+
+int snd_card_free_when_closed(struct snd_card *card)
+{
+	int free_now = 0;
+	int ret = snd_card_free_prepare(card);
+	if (ret)
+		return ret;
+
+	spin_lock(&card->files_lock);
+	if (card->files == NULL)
+		free_now = 1;
+	else
+		card->free_on_last_close = 1;
+	spin_unlock(&card->files_lock);
+
+	if (free_now)
+		snd_card_do_free(card);
+	return 0;
+}
+
+EXPORT_SYMBOL(snd_card_free_when_closed);
+
+int snd_card_free(struct snd_card *card)
+{
+	int ret = snd_card_free_prepare(card);
+	if (ret)
+		return ret;
+
+	/* wait, until all devices are ready for the free operation */
+	wait_event(card->shutdown_sleep, card->files == NULL);
+	snd_card_do_free(card);
 	return 0;
 }
 
@@ -718,6 +751,7 @@ EXPORT_SYMBOL(snd_card_file_add);
 int snd_card_file_remove(struct snd_card *card, struct file *file)
 {
 	struct snd_monitor_file *mfile, *pfile = NULL;
+	int last_close = 0;
 
 	spin_lock(&card->files_lock);
 	mfile = card->files;
@@ -732,9 +766,14 @@ int snd_card_file_remove(struct snd_card *card, struct file *file)
 		pfile = mfile;
 		mfile = mfile->next;
 	}
-	spin_unlock(&card->files_lock);
 	if (card->files == NULL)
+		last_close = 1;
+	spin_unlock(&card->files_lock);
+	if (last_close) {
 		wake_up(&card->shutdown_sleep);
+		if (card->free_on_last_close)
+			snd_card_do_free(card);
+	}
 	if (!mfile) {
 		snd_printk(KERN_ERR "ALSA card file remove problem (%p)\n", file);
 		return -ENOENT;
diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c
index 00c95def95aa5..f4c67042e3ac1 100644
--- a/sound/core/oss/mixer_oss.c
+++ b/sound/core/oss/mixer_oss.c
@@ -1310,21 +1310,19 @@ static int snd_mixer_oss_notify_handler(struct snd_card *card, int cmd)
 		card->mixer_oss = mixer;
 		snd_mixer_oss_build(mixer);
 		snd_mixer_oss_proc_init(mixer);
-	} else if (cmd == SND_MIXER_OSS_NOTIFY_DISCONNECT) {
-		mixer = card->mixer_oss;
-		if (mixer == NULL || !mixer->oss_dev_alloc)
-			return 0;
-		snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIXER, mixer->card, 0);
-		mixer->oss_dev_alloc = 0;
-	} else {		/* free */
+	} else {
 		mixer = card->mixer_oss;
 		if (mixer == NULL)
 			return 0;
+		if (mixer->oss_dev_alloc) {
 #ifdef SNDRV_OSS_INFO_DEV_MIXERS
-		snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_MIXERS, mixer->card->number);
+			snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_MIXERS, mixer->card->number);
 #endif
-		if (mixer->oss_dev_alloc)
 			snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIXER, mixer->card, 0);
+			mixer->oss_dev_alloc = 0;
+		}
+		if (cmd == SND_MIXER_OSS_NOTIFY_DISCONNECT)
+			return 0;
 		snd_mixer_oss_proc_done(mixer);
 		return snd_mixer_oss_free1(mixer);
 	}
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index a92b93e5ebd58..505b23ec4058c 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -2929,25 +2929,23 @@ static int snd_pcm_oss_disconnect_minor(struct snd_pcm *pcm)
 			snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_PCM,
 						  pcm->card, 1);
 		}
-	}
-	return 0;
-}
-
-static int snd_pcm_oss_unregister_minor(struct snd_pcm *pcm)
-{
-	snd_pcm_oss_disconnect_minor(pcm);
-	if (pcm->oss.reg) {
 		if (dsp_map[pcm->card->number] == (int)pcm->device) {
 #ifdef SNDRV_OSS_INFO_DEV_AUDIO
 			snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_AUDIO, pcm->card->number);
 #endif
 		}
 		pcm->oss.reg = 0;
-		snd_pcm_oss_proc_done(pcm);
 	}
 	return 0;
 }
 
+static int snd_pcm_oss_unregister_minor(struct snd_pcm *pcm)
+{
+	snd_pcm_oss_disconnect_minor(pcm);
+	snd_pcm_oss_proc_done(pcm);
+	return 0;
+}
+
 static struct snd_pcm_notify snd_pcm_oss_notify =
 {
 	.n_register =	snd_pcm_oss_register_minor,
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index b8602471f7e52..f52178abf1209 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -42,7 +42,6 @@ static int snd_pcm_free(struct snd_pcm *pcm);
 static int snd_pcm_dev_free(struct snd_device *device);
 static int snd_pcm_dev_register(struct snd_device *device);
 static int snd_pcm_dev_disconnect(struct snd_device *device);
-static int snd_pcm_dev_unregister(struct snd_device *device);
 
 static struct snd_pcm *snd_pcm_search(struct snd_card *card, int device)
 {
@@ -680,7 +679,6 @@ int snd_pcm_new(struct snd_card *card, char *id, int device,
 		.dev_free = snd_pcm_dev_free,
 		.dev_register =	snd_pcm_dev_register,
 		.dev_disconnect = snd_pcm_dev_disconnect,
-		.dev_unregister = snd_pcm_dev_unregister
 	};
 
 	snd_assert(rpcm != NULL, return -EINVAL);
@@ -724,6 +722,7 @@ static void snd_pcm_free_stream(struct snd_pcm_str * pstr)
 	substream = pstr->substream;
 	while (substream) {
 		substream_next = substream->next;
+		snd_pcm_timer_done(substream);
 		snd_pcm_substream_proc_done(substream);
 		kfree(substream);
 		substream = substream_next;
@@ -740,7 +739,12 @@ static void snd_pcm_free_stream(struct snd_pcm_str * pstr)
 
 static int snd_pcm_free(struct snd_pcm *pcm)
 {
+	struct snd_pcm_notify *notify;
+
 	snd_assert(pcm != NULL, return -ENXIO);
+	list_for_each_entry(notify, &snd_pcm_notify_list, list) {
+		notify->n_unregister(pcm);
+	}
 	if (pcm->private_free)
 		pcm->private_free(pcm);
 	snd_pcm_lib_preallocate_free_for_all(pcm);
@@ -955,35 +959,22 @@ static int snd_pcm_dev_register(struct snd_device *device)
 static int snd_pcm_dev_disconnect(struct snd_device *device)
 {
 	struct snd_pcm *pcm = device->device_data;
-	struct list_head *list;
+	struct snd_pcm_notify *notify;
 	struct snd_pcm_substream *substream;
-	int cidx;
+	int cidx, devtype;
 
 	mutex_lock(&register_mutex);
+	if (list_empty(&pcm->list))
+		goto unlock;
+
 	list_del_init(&pcm->list);
 	for (cidx = 0; cidx < 2; cidx++)
 		for (substream = pcm->streams[cidx].substream; substream; substream = substream->next)
 			if (substream->runtime)
 				substream->runtime->status->state = SNDRV_PCM_STATE_DISCONNECTED;
-	list_for_each(list, &snd_pcm_notify_list) {
-		struct snd_pcm_notify *notify;
-		notify = list_entry(list, struct snd_pcm_notify, list);
+	list_for_each_entry(notify, &snd_pcm_notify_list, list) {
 		notify->n_disconnect(pcm);
 	}
-	mutex_unlock(&register_mutex);
-	return 0;
-}
-
-static int snd_pcm_dev_unregister(struct snd_device *device)
-{
-	int cidx, devtype;
-	struct snd_pcm_substream *substream;
-	struct list_head *list;
-	struct snd_pcm *pcm = device->device_data;
-
-	snd_assert(pcm != NULL, return -ENXIO);
-	mutex_lock(&register_mutex);
-	list_del(&pcm->list);
 	for (cidx = 0; cidx < 2; cidx++) {
 		devtype = -1;
 		switch (cidx) {
@@ -995,23 +986,20 @@ static int snd_pcm_dev_unregister(struct snd_device *device)
 			break;
 		}
 		snd_unregister_device(devtype, pcm->card, pcm->device);
-		for (substream = pcm->streams[cidx].substream; substream; substream = substream->next)
-			snd_pcm_timer_done(substream);
-	}
-	list_for_each(list, &snd_pcm_notify_list) {
-		struct snd_pcm_notify *notify;
-		notify = list_entry(list, struct snd_pcm_notify, list);
-		notify->n_unregister(pcm);
 	}
+ unlock:
 	mutex_unlock(&register_mutex);
-	return snd_pcm_free(pcm);
+	return 0;
 }
 
 int snd_pcm_notify(struct snd_pcm_notify *notify, int nfree)
 {
 	struct list_head *p;
 
-	snd_assert(notify != NULL && notify->n_register != NULL && notify->n_unregister != NULL, return -EINVAL);
+	snd_assert(notify != NULL &&
+		   notify->n_register != NULL &&
+		   notify->n_unregister != NULL &&
+		   notify->n_disconnect, return -EINVAL);
 	mutex_lock(&register_mutex);
 	if (nfree) {
 		list_del(&notify->list);
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 51577c22f8ceb..8a2bdfae63e39 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -55,7 +55,6 @@ static int snd_rawmidi_free(struct snd_rawmidi *rawmidi);
 static int snd_rawmidi_dev_free(struct snd_device *device);
 static int snd_rawmidi_dev_register(struct snd_device *device);
 static int snd_rawmidi_dev_disconnect(struct snd_device *device);
-static int snd_rawmidi_dev_unregister(struct snd_device *device);
 
 static LIST_HEAD(snd_rawmidi_devices);
 static DEFINE_MUTEX(register_mutex);
@@ -1426,7 +1425,6 @@ int snd_rawmidi_new(struct snd_card *card, char *id, int device,
 		.dev_free = snd_rawmidi_dev_free,
 		.dev_register = snd_rawmidi_dev_register,
 		.dev_disconnect = snd_rawmidi_dev_disconnect,
-		.dev_unregister = snd_rawmidi_dev_unregister
 	};
 
 	snd_assert(rrawmidi != NULL, return -EINVAL);
@@ -1479,6 +1477,14 @@ static void snd_rawmidi_free_substreams(struct snd_rawmidi_str *stream)
 static int snd_rawmidi_free(struct snd_rawmidi *rmidi)
 {
 	snd_assert(rmidi != NULL, return -ENXIO);	
+
+	snd_info_free_entry(rmidi->proc_entry);
+	rmidi->proc_entry = NULL;
+	mutex_lock(&register_mutex);
+	if (rmidi->ops && rmidi->ops->dev_unregister)
+		rmidi->ops->dev_unregister(rmidi);
+	mutex_unlock(&register_mutex);
+
 	snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT]);
 	snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT]);
 	if (rmidi->private_free)
@@ -1587,21 +1593,6 @@ static int snd_rawmidi_dev_disconnect(struct snd_device *device)
 
 	mutex_lock(&register_mutex);
 	list_del_init(&rmidi->list);
-	mutex_unlock(&register_mutex);
-	return 0;
-}
-
-static int snd_rawmidi_dev_unregister(struct snd_device *device)
-{
-	struct snd_rawmidi *rmidi = device->device_data;
-
-	snd_assert(rmidi != NULL, return -ENXIO);
-	mutex_lock(&register_mutex);
-	list_del(&rmidi->list);
-	if (rmidi->proc_entry) {
-		snd_info_free_entry(rmidi->proc_entry);
-		rmidi->proc_entry = NULL;
-	}
 #ifdef CONFIG_SND_OSSEMUL
 	if (rmidi->ossreg) {
 		if ((int)rmidi->device == midi_map[rmidi->card->number]) {
@@ -1615,17 +1606,9 @@ static int snd_rawmidi_dev_unregister(struct snd_device *device)
 		rmidi->ossreg = 0;
 	}
 #endif /* CONFIG_SND_OSSEMUL */
-	if (rmidi->ops && rmidi->ops->dev_unregister)
-		rmidi->ops->dev_unregister(rmidi);
 	snd_unregister_device(SNDRV_DEVICE_TYPE_RAWMIDI, rmidi->card, rmidi->device);
 	mutex_unlock(&register_mutex);
-#if defined(CONFIG_SND_SEQUENCER) || (defined(MODULE) && defined(CONFIG_SND_SEQUENCER_MODULE))
-	if (rmidi->seq_dev) {
-		snd_device_free(rmidi->card, rmidi->seq_dev);
-		rmidi->seq_dev = NULL;
-	}
-#endif
-	return snd_rawmidi_free(rmidi);
+	return 0;
 }
 
 /**
diff --git a/sound/core/rtctimer.c b/sound/core/rtctimer.c
index 84704ccb18293..412dd62b654ec 100644
--- a/sound/core/rtctimer.c
+++ b/sound/core/rtctimer.c
@@ -156,7 +156,7 @@ static int __init rtctimer_init(void)
 static void __exit rtctimer_exit(void)
 {
 	if (rtctimer) {
-		snd_timer_global_unregister(rtctimer);
+		snd_timer_global_free(rtctimer);
 		rtctimer = NULL;
 	}
 }
diff --git a/sound/core/seq/seq_device.c b/sound/core/seq/seq_device.c
index b85954e956d47..b79d011813c02 100644
--- a/sound/core/seq/seq_device.c
+++ b/sound/core/seq/seq_device.c
@@ -90,7 +90,6 @@ static int snd_seq_device_free(struct snd_seq_device *dev);
 static int snd_seq_device_dev_free(struct snd_device *device);
 static int snd_seq_device_dev_register(struct snd_device *device);
 static int snd_seq_device_dev_disconnect(struct snd_device *device);
-static int snd_seq_device_dev_unregister(struct snd_device *device);
 
 static int init_device(struct snd_seq_device *dev, struct ops_list *ops);
 static int free_device(struct snd_seq_device *dev, struct ops_list *ops);
@@ -189,7 +188,6 @@ int snd_seq_device_new(struct snd_card *card, int device, char *id, int argsize,
 		.dev_free = snd_seq_device_dev_free,
 		.dev_register = snd_seq_device_dev_register,
 		.dev_disconnect = snd_seq_device_dev_disconnect,
-		.dev_unregister = snd_seq_device_dev_unregister
 	};
 
 	if (result)
@@ -308,15 +306,6 @@ static int snd_seq_device_dev_disconnect(struct snd_device *device)
 	return 0;
 }
 
-/*
- * unregister the existing device
- */
-static int snd_seq_device_dev_unregister(struct snd_device *device)
-{
-	struct snd_seq_device *dev = device->device_data;
-	return snd_seq_device_free(dev);
-}
-
 /*
  * register device driver
  * id = driver id
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 52ecbe1e9abb1..7e5e562fe356e 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -88,7 +88,7 @@ static DEFINE_MUTEX(register_mutex);
 static int snd_timer_free(struct snd_timer *timer);
 static int snd_timer_dev_free(struct snd_device *device);
 static int snd_timer_dev_register(struct snd_device *device);
-static int snd_timer_dev_unregister(struct snd_device *device);
+static int snd_timer_dev_disconnect(struct snd_device *device);
 
 static void snd_timer_reschedule(struct snd_timer * timer, unsigned long ticks_left);
 
@@ -773,7 +773,7 @@ int snd_timer_new(struct snd_card *card, char *id, struct snd_timer_id *tid,
 	static struct snd_device_ops ops = {
 		.dev_free = snd_timer_dev_free,
 		.dev_register = snd_timer_dev_register,
-		.dev_unregister = snd_timer_dev_unregister
+		.dev_disconnect = snd_timer_dev_disconnect,
 	};
 
 	snd_assert(tid != NULL, return -EINVAL);
@@ -813,6 +813,21 @@ int snd_timer_new(struct snd_card *card, char *id, struct snd_timer_id *tid,
 static int snd_timer_free(struct snd_timer *timer)
 {
 	snd_assert(timer != NULL, return -ENXIO);
+
+	mutex_lock(&register_mutex);
+	if (! list_empty(&timer->open_list_head)) {
+		struct list_head *p, *n;
+		struct snd_timer_instance *ti;
+		snd_printk(KERN_WARNING "timer %p is busy?\n", timer);
+		list_for_each_safe(p, n, &timer->open_list_head) {
+			list_del_init(p);
+			ti = list_entry(p, struct snd_timer_instance, open_list);
+			ti->timer = NULL;
+		}
+	}
+	list_del(&timer->device_list);
+	mutex_unlock(&register_mutex);
+
 	if (timer->private_free)
 		timer->private_free(timer);
 	kfree(timer);
@@ -867,30 +882,13 @@ static int snd_timer_dev_register(struct snd_device *dev)
 	return 0;
 }
 
-static int snd_timer_unregister(struct snd_timer *timer)
+static int snd_timer_dev_disconnect(struct snd_device *device)
 {
-	struct list_head *p, *n;
-	struct snd_timer_instance *ti;
-
-	snd_assert(timer != NULL, return -ENXIO);
+	struct snd_timer *timer = device->device_data;
 	mutex_lock(&register_mutex);
-	if (! list_empty(&timer->open_list_head)) {
-		snd_printk(KERN_WARNING "timer 0x%lx is busy?\n", (long)timer);
-		list_for_each_safe(p, n, &timer->open_list_head) {
-			list_del_init(p);
-			ti = list_entry(p, struct snd_timer_instance, open_list);
-			ti->timer = NULL;
-		}
-	}
-	list_del(&timer->device_list);
+	list_del_init(&timer->device_list);
 	mutex_unlock(&register_mutex);
-	return snd_timer_free(timer);
-}
-
-static int snd_timer_dev_unregister(struct snd_device *device)
-{
-	struct snd_timer *timer = device->device_data;
-	return snd_timer_unregister(timer);
+	return 0;
 }
 
 void snd_timer_notify(struct snd_timer *timer, int event, struct timespec *tstamp)
@@ -955,11 +953,6 @@ int snd_timer_global_register(struct snd_timer *timer)
 	return snd_timer_dev_register(&dev);
 }
 
-int snd_timer_global_unregister(struct snd_timer *timer)
-{
-	return snd_timer_unregister(timer);
-}
-
 /*
  *  System timer
  */
@@ -1982,7 +1975,7 @@ static void __exit alsa_timer_exit(void)
 	/* unregister the system timer */
 	list_for_each_safe(p, n, &snd_timer_list) {
 		struct snd_timer *timer = list_entry(p, struct snd_timer, device_list);
-		snd_timer_unregister(timer);
+		snd_timer_free(timer);
 	}
 	snd_timer_proc_done();
 #ifdef SNDRV_OSS_INFO_DEV_TIMERS
@@ -2005,5 +1998,4 @@ EXPORT_SYMBOL(snd_timer_notify);
 EXPORT_SYMBOL(snd_timer_global_new);
 EXPORT_SYMBOL(snd_timer_global_free);
 EXPORT_SYMBOL(snd_timer_global_register);
-EXPORT_SYMBOL(snd_timer_global_unregister);
 EXPORT_SYMBOL(snd_timer_interrupt);
diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
index 51e83d7a839a3..b35280ca2465d 100644
--- a/sound/pci/ac97/ac97_codec.c
+++ b/sound/pci/ac97/ac97_codec.c
@@ -1817,13 +1817,13 @@ static int snd_ac97_dev_register(struct snd_device *device)
 	return 0;
 }
 
-/* unregister ac97 codec */
-static int snd_ac97_dev_unregister(struct snd_device *device)
+/* disconnect ac97 codec */
+static int snd_ac97_dev_disconnect(struct snd_device *device)
 {
 	struct snd_ac97 *ac97 = device->device_data;
 	if (ac97->dev.bus)
 		device_unregister(&ac97->dev);
-	return snd_ac97_free(ac97);
+	return 0;
 }
 
 /* build_ops to do nothing */
@@ -1860,7 +1860,7 @@ int snd_ac97_mixer(struct snd_ac97_bus *bus, struct snd_ac97_template *template,
 	static struct snd_device_ops ops = {
 		.dev_free =	snd_ac97_dev_free,
 		.dev_register =	snd_ac97_dev_register,
-		.dev_unregister =	snd_ac97_dev_unregister,
+		.dev_disconnect =	snd_ac97_dev_disconnect,
 	};
 
 	snd_assert(rac97 != NULL, return -EINVAL);
diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index 1b7f499c549da..3144313859137 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -3499,7 +3499,7 @@ static void snd_usb_audio_disconnect(struct usb_device *dev, void *ptr)
 		}
 		usb_chip[chip->index] = NULL;
 		mutex_unlock(&register_mutex);
-		snd_card_free(card);
+		snd_card_free_when_closed(card);
 	} else {
 		mutex_unlock(&register_mutex);
 	}
-- 
GitLab


From 2b29b13c5794f648cd5e839796496704d787f5a6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 23 Jun 2006 14:38:26 +0200
Subject: [PATCH 0883/1063] [ALSA] Deprecate snd_card_free_in_thread()

Deprecated snd_card_free_in_thread(), replaced with
snd_card_free_when_closed().

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 .../alsa/DocBook/writing-an-alsa-driver.tmpl  |  5 +-
 include/sound/core.h                          |  3 -
 sound/core/init.c                             | 56 +------------------
 sound/drivers/mpu401/mpu401.c                 |  2 +-
 sound/pcmcia/pdaudiocf/pdaudiocf.c            |  2 +-
 sound/pcmcia/vx/vxpocket.c                    |  4 +-
 6 files changed, 9 insertions(+), 63 deletions(-)

diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
index b8dc51ca776c8..4807ef79a94d8 100644
--- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
@@ -1054,9 +1054,8 @@
 
       <para>
       For a device which allows hotplugging, you can use
-      <function>snd_card_free_in_thread</function>.  This one will
-      postpone the destruction and wait in a kernel-thread until all
-      devices are closed.
+      <function>snd_card_free_when_closed</function>.  This one will
+      postpone the destruction until all devices are closed.
       </para>
 
     </section>
diff --git a/include/sound/core.h b/include/sound/core.h
index cf4001cf62484..1359c532b68e3 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -25,7 +25,6 @@
 #include <linux/sched.h>		/* wake_up() */
 #include <linux/mutex.h>		/* struct mutex */
 #include <linux/rwsem.h>		/* struct rw_semaphore */
-#include <linux/workqueue.h>		/* struct workqueue_struct */
 #include <linux/pm.h>			/* pm_message_t */
 
 /* forward declarations */
@@ -132,7 +131,6 @@ struct snd_card {
 	int shutdown;			/* this card is going down */
 	int free_on_last_close;		/* free in context of file_release */
 	wait_queue_head_t shutdown_sleep;
-	struct work_struct free_workq;	/* for free in workqueue */
 	struct device *dev;
 
 #ifdef CONFIG_PM
@@ -245,7 +243,6 @@ struct snd_card *snd_card_new(int idx, const char *id,
 int snd_card_disconnect(struct snd_card *card);
 int snd_card_free(struct snd_card *card);
 int snd_card_free_when_closed(struct snd_card *card);
-int snd_card_free_in_thread(struct snd_card *card);
 int snd_card_register(struct snd_card *card);
 int snd_card_info_init(void);
 int snd_card_info_done(void);
diff --git a/sound/core/init.c b/sound/core/init.c
index 5850d99d21e32..d7607a25acdf7 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -81,8 +81,6 @@ static inline int init_info_for_card(struct snd_card *card)
 #define init_info_for_card(card)
 #endif
 
-static void snd_card_free_thread(void * __card);
-
 /**
  *  snd_card_new - create and initialize a soundcard structure
  *  @idx: card index (address) [0 ... (SNDRV_CARDS-1)]
@@ -145,7 +143,6 @@ struct snd_card *snd_card_new(int idx, const char *xid,
 	INIT_LIST_HEAD(&card->ctl_files);
 	spin_lock_init(&card->files_lock);
 	init_waitqueue_head(&card->shutdown_sleep);
-	INIT_WORK(&card->free_workq, snd_card_free_thread, card);
 #ifdef CONFIG_PM
 	mutex_init(&card->power_lock);
 	init_waitqueue_head(&card->power_sleep);
@@ -413,53 +410,6 @@ int snd_card_free(struct snd_card *card)
 
 EXPORT_SYMBOL(snd_card_free);
 
-static void snd_card_free_thread(void * __card)
-{
-	struct snd_card *card = __card;
-	struct module * module = card->module;
-
-	if (!try_module_get(module)) {
-		snd_printk(KERN_ERR "unable to lock toplevel module for card %i in free thread\n", card->number);
-		module = NULL;
-	}
-
-	snd_card_free(card);
-
-	module_put(module);
-}
-
-/**
- *  snd_card_free_in_thread - call snd_card_free() in thread
- *  @card: soundcard structure
- *
- *  This function schedules the call of snd_card_free() function in a
- *  work queue.  When all devices are released (non-busy), the work
- *  is woken up and calls snd_card_free().
- *
- *  When a card can be disconnected at any time by hotplug service,
- *  this function should be used in disconnect (or detach) callback
- *  instead of calling snd_card_free() directly.
- *  
- *  Returns - zero otherwise a negative error code if the start of thread failed.
- */
-int snd_card_free_in_thread(struct snd_card *card)
-{
-	if (card->files == NULL) {
-		snd_card_free(card);
-		return 0;
-	}
-
-	if (schedule_work(&card->free_workq))
-		return 0;
-
-	snd_printk(KERN_ERR "schedule_work() failed in snd_card_free_in_thread for card %i\n", card->number);
-	/* try to free the structure immediately */
-	snd_card_free(card);
-	return -EFAULT;
-}
-
-EXPORT_SYMBOL(snd_card_free_in_thread);
-
 static void choose_default_id(struct snd_card *card)
 {
 	int i, len, idx_flag = 0, loops = SNDRV_CARDS;
@@ -742,9 +692,9 @@ EXPORT_SYMBOL(snd_card_file_add);
  *
  *  This function removes the file formerly added to the card via
  *  snd_card_file_add() function.
- *  If all files are removed and the release of the card is
- *  scheduled, it will wake up the the thread to call snd_card_free()
- *  (see snd_card_free_in_thread() function).
+ *  If all files are removed and snd_card_free_when_closed() was
+ *  called beforehand, it processes the pending release of
+ *  resources.
  *
  *  Returns zero or a negative error code.
  */
diff --git a/sound/drivers/mpu401/mpu401.c b/sound/drivers/mpu401/mpu401.c
index 17cc105b26fcf..2de181ad0b050 100644
--- a/sound/drivers/mpu401/mpu401.c
+++ b/sound/drivers/mpu401/mpu401.c
@@ -211,7 +211,7 @@ static void __devexit snd_mpu401_pnp_remove(struct pnp_dev *dev)
 	struct snd_card *card = (struct snd_card *) pnp_get_drvdata(dev);
 
 	snd_card_disconnect(card);
-	snd_card_free_in_thread(card);
+	snd_card_free_when_closed(card);
 }
 
 static struct pnp_driver snd_mpu401_pnp_driver = {
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c
index 1c09e5f49da8b..fd3590fcaedbc 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf.c
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c
@@ -206,7 +206,7 @@ static void snd_pdacf_detach(struct pcmcia_device *link)
 		snd_pdacf_powerdown(chip);
 	chip->chip_status |= PDAUDIOCF_STAT_IS_STALE; /* to be sure */
 	snd_card_disconnect(chip->card);
-	snd_card_free_in_thread(chip->card);
+	snd_card_free_when_closed(chip->card);
 }
 
 /*
diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c
index cafe6640cc1a6..76c85cffb40e6 100644
--- a/sound/pcmcia/vx/vxpocket.c
+++ b/sound/pcmcia/vx/vxpocket.c
@@ -65,7 +65,7 @@ static void vxpocket_release(struct pcmcia_device *link)
 }
 
 /*
- * destructor, called from snd_card_free_in_thread()
+ * destructor, called from snd_card_free_when_closed()
  */
 static int snd_vxpocket_dev_free(struct snd_device *device)
 {
@@ -363,7 +363,7 @@ static void vxpocket_detach(struct pcmcia_device *link)
 	chip->chip_status |= VX_STAT_IS_STALE; /* to be sure */
 	snd_card_disconnect(chip->card);
 	vxpocket_release(link);
-	snd_card_free_in_thread(chip->card);
+	snd_card_free_when_closed(chip->card);
 }
 
 /*
-- 
GitLab


From 6dbe662874ba08585eaf732d126762c25ac8e3f7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 27 Jun 2006 18:28:53 +0200
Subject: [PATCH 0884/1063] [ALSA] Add experimental support of aggressive AC97
 power-saving mode

Added CONFIG_SND_AC97_POWER_SAVE kernel config to enable the support
of aggressive AC97 power-saving mode.  In this mode, the AC97
powerdown register bits are dynamically controlled at each open/close
of PCM streams.
The mode is activated via power_save option for snd-ac97-codec
driver.  As default it's off.  It can be turned on/off on the fly
via sysfs, too.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/ac97_codec.h  |  32 +++++
 sound/drivers/Kconfig       |  13 ++
 sound/pci/ac97/ac97_codec.c | 264 +++++++++++++++++++++++++++++++-----
 sound/pci/ac97/ac97_pcm.c   |  18 +++
 sound/pci/intel8x0.c        |  14 +-
 sound/pci/via82xx.c         |  13 +-
 6 files changed, 315 insertions(+), 39 deletions(-)

diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h
index 758f8bf133c7c..4c43521cc493b 100644
--- a/include/sound/ac97_codec.h
+++ b/include/sound/ac97_codec.h
@@ -27,6 +27,7 @@
 
 #include <linux/bitops.h>
 #include <linux/device.h>
+#include <linux/workqueue.h>
 #include "pcm.h"
 #include "control.h"
 #include "info.h"
@@ -140,6 +141,20 @@
 #define AC97_GP_DRSS_1011	0x0000	/* LR(C) 10+11(+12) */
 #define AC97_GP_DRSS_78		0x0400	/* LR 7+8 */
 
+/* powerdown bits */
+#define AC97_PD_ADC_STATUS	0x0001	/* ADC status (RO) */
+#define AC97_PD_DAC_STATUS	0x0002	/* DAC status (RO) */
+#define AC97_PD_MIXER_STATUS	0x0004	/* Analog mixer status (RO) */
+#define AC97_PD_VREF_STATUS	0x0008	/* Vref status (RO) */
+#define AC97_PD_PR0		0x0100	/* Power down PCM ADCs and input MUX */
+#define AC97_PD_PR1		0x0200	/* Power down PCM front DAC */
+#define AC97_PD_PR2		0x0400	/* Power down Mixer (Vref still on) */
+#define AC97_PD_PR3		0x0800	/* Power down Mixer (Vref off) */
+#define AC97_PD_PR4		0x1000	/* Power down AC-Link */
+#define AC97_PD_PR5		0x2000	/* Disable internal clock usage */
+#define AC97_PD_PR6		0x4000	/* Headphone amplifier */
+#define AC97_PD_EAPD		0x8000	/* External Amplifer Power Down (EAPD) */
+
 /* extended audio ID bit defines */
 #define AC97_EI_VRA		0x0001	/* Variable bit rate supported */
 #define AC97_EI_DRA		0x0002	/* Double rate supported */
@@ -359,6 +374,7 @@
 #define AC97_SCAP_INV_EAPD	(1<<7)	/* inverted EAPD */
 #define AC97_SCAP_DETECT_BY_VENDOR (1<<8) /* use vendor registers for read tests */
 #define AC97_SCAP_NO_SPDIF	(1<<9)	/* don't build SPDIF controls */
+#define AC97_SCAP_EAPD_LED	(1<<10)	/* EAPD as mute LED */
 
 /* ac97->flags */
 #define AC97_HAS_PC_BEEP	(1<<0)	/* force PC Speaker usage */
@@ -491,6 +507,12 @@ struct snd_ac97 {
 	/* jack-sharing info */
 	unsigned char indep_surround;
 	unsigned char channel_mode;
+
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	unsigned int power_up;	/* power states */
+	struct workqueue_struct *power_workq;
+	struct work_struct power_work;
+#endif
 	struct device dev;
 };
 
@@ -532,6 +554,15 @@ unsigned short snd_ac97_read(struct snd_ac97 *ac97, unsigned short reg);
 void snd_ac97_write_cache(struct snd_ac97 *ac97, unsigned short reg, unsigned short value);
 int snd_ac97_update(struct snd_ac97 *ac97, unsigned short reg, unsigned short value);
 int snd_ac97_update_bits(struct snd_ac97 *ac97, unsigned short reg, unsigned short mask, unsigned short value);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+int snd_ac97_update_power(struct snd_ac97 *ac97, int reg, int powerup);
+#else
+static inline int snd_ac97_update_power(struct snd_ac97 *ac97, int reg,
+					int powerup)
+{
+	return 0;
+}
+#endif
 #ifdef CONFIG_PM
 void snd_ac97_suspend(struct snd_ac97 *ac97);
 void snd_ac97_resume(struct snd_ac97 *ac97);
@@ -583,6 +614,7 @@ struct ac97_pcm {
 		     copy_flag: 1,	   /* lowlevel driver must fill all entries */
 		     spdif: 1;		   /* spdif pcm */
 	unsigned short aslots;		   /* active slots */
+	unsigned short cur_dbl;		   /* current double-rate state */
 	unsigned int rates;		   /* available rates */
 	struct {
 		unsigned short slots;	   /* driver input: requested AC97 slot numbers */
diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig
index 395c4ef52ac9b..897dc2dfd7ddb 100644
--- a/sound/drivers/Kconfig
+++ b/sound/drivers/Kconfig
@@ -100,4 +100,17 @@ config SND_MPU401
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-mpu401.
 
+config SND_AC97_POWER_SAVE
+	bool "AC97 Power-Saving Mode"
+	depends on SND_AC97_CODEC && EXPERIMENTAL
+	default n
+	help
+	  Say Y here to enable the aggressive power-saving support of
+	  AC97 codecs.  In this mode, the power-mode is dynamically
+	  controlled at each open/close.
+
+	  The mode is activated by passing power_save=1 option to
+	  snd-ac97-codec driver.  You can toggle it dynamically over
+	  sysfs, too.
+
 endmenu
diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
index b35280ca2465d..f82c636e99a9f 100644
--- a/sound/pci/ac97/ac97_codec.c
+++ b/sound/pci/ac97/ac97_codec.c
@@ -47,6 +47,11 @@ static int enable_loopback;
 module_param(enable_loopback, bool, 0444);
 MODULE_PARM_DESC(enable_loopback, "Enable AC97 ADC/DAC Loopback Control");
 
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+static int power_save;
+module_param(power_save, bool, 0644);
+MODULE_PARM_DESC(power_save, "Enable AC97 power-saving control");
+#endif
 /*
 
  */
@@ -187,6 +192,8 @@ static const struct ac97_codec_id snd_ac97_codec_ids[] = {
 };
 
 
+static void update_power_regs(struct snd_ac97 *ac97);
+
 /*
  *  I/O routines
  */
@@ -554,6 +561,18 @@ int snd_ac97_put_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value
 	}
 	err = snd_ac97_update_bits(ac97, reg, val_mask, val);
 	snd_ac97_page_restore(ac97, page_save);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	/* check analog mixer power-down */
+	if ((val_mask & 0x8000) &&
+	    (kcontrol->private_value & (1<<30))) {
+		if (val & 0x8000)
+			ac97->power_up &= ~(1 << (reg>>1));
+		else
+			ac97->power_up |= 1 << (reg>>1);
+		if (power_save)
+			update_power_regs(ac97);
+	}
+#endif
 	return err;
 }
 
@@ -962,6 +981,10 @@ static int snd_ac97_bus_dev_free(struct snd_device *device)
 static int snd_ac97_free(struct snd_ac97 *ac97)
 {
 	if (ac97) {
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+		if (ac97->power_workq)
+			destroy_workqueue(ac97->power_workq);
+#endif
 		snd_ac97_proc_done(ac97);
 		if (ac97->bus)
 			ac97->bus->codec[ac97->num] = NULL;
@@ -1117,7 +1140,9 @@ struct snd_kcontrol *snd_ac97_cnew(const struct snd_kcontrol_new *_template, str
 /*
  * create mute switch(es) for normal stereo controls
  */
-static int snd_ac97_cmute_new_stereo(struct snd_card *card, char *name, int reg, int check_stereo, struct snd_ac97 *ac97)
+static int snd_ac97_cmute_new_stereo(struct snd_card *card, char *name, int reg,
+				     int check_stereo, int check_amix,
+				     struct snd_ac97 *ac97)
 {
 	struct snd_kcontrol *kctl;
 	int err;
@@ -1137,10 +1162,14 @@ static int snd_ac97_cmute_new_stereo(struct snd_card *card, char *name, int reg,
 	}
 	if (mute_mask == 0x8080) {
 		struct snd_kcontrol_new tmp = AC97_DOUBLE(name, reg, 15, 7, 1, 1);
+		if (check_amix)
+			tmp.private_value |= (1 << 30);
 		tmp.index = ac97->num;
 		kctl = snd_ctl_new1(&tmp, ac97);
 	} else {
 		struct snd_kcontrol_new tmp = AC97_SINGLE(name, reg, 15, 1, 1);
+		if (check_amix)
+			tmp.private_value |= (1 << 30);
 		tmp.index = ac97->num;
 		kctl = snd_ctl_new1(&tmp, ac97);
 	}
@@ -1186,7 +1215,9 @@ static int snd_ac97_cvol_new(struct snd_card *card, char *name, int reg, unsigne
 /*
  * create a mute-switch and a volume for normal stereo/mono controls
  */
-static int snd_ac97_cmix_new_stereo(struct snd_card *card, const char *pfx, int reg, int check_stereo, struct snd_ac97 *ac97)
+static int snd_ac97_cmix_new_stereo(struct snd_card *card, const char *pfx,
+				    int reg, int check_stereo, int check_amix,
+				    struct snd_ac97 *ac97)
 {
 	int err;
 	char name[44];
@@ -1197,7 +1228,9 @@ static int snd_ac97_cmix_new_stereo(struct snd_card *card, const char *pfx, int
 
 	if (snd_ac97_try_bit(ac97, reg, 15)) {
 		sprintf(name, "%s Switch", pfx);
-		if ((err = snd_ac97_cmute_new_stereo(card, name, reg, check_stereo, ac97)) < 0)
+		if ((err = snd_ac97_cmute_new_stereo(card, name, reg,
+						     check_stereo, check_amix,
+						     ac97)) < 0)
 			return err;
 	}
 	check_volume_resolution(ac97, reg, &lo_max, &hi_max);
@@ -1209,8 +1242,10 @@ static int snd_ac97_cmix_new_stereo(struct snd_card *card, const char *pfx, int
 	return 0;
 }
 
-#define snd_ac97_cmix_new(card, pfx, reg, ac97)	snd_ac97_cmix_new_stereo(card, pfx, reg, 0, ac97)
-#define snd_ac97_cmute_new(card, name, reg, ac97)	snd_ac97_cmute_new_stereo(card, name, reg, 0, ac97)
+#define snd_ac97_cmix_new(card, pfx, reg, acheck, ac97) \
+	snd_ac97_cmix_new_stereo(card, pfx, reg, 0, acheck, ac97)
+#define snd_ac97_cmute_new(card, name, reg, acheck, ac97) \
+	snd_ac97_cmute_new_stereo(card, name, reg, 0, acheck, ac97)
 
 static unsigned int snd_ac97_determine_spdif_rates(struct snd_ac97 *ac97);
 
@@ -1226,9 +1261,11 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 	/* AD claims to remove this control from AD1887, although spec v2.2 does not allow this */
 	if (snd_ac97_try_volume_mix(ac97, AC97_MASTER)) {
 		if (ac97->flags & AC97_HAS_NO_MASTER_VOL)
-			err = snd_ac97_cmute_new(card, "Master Playback Switch", AC97_MASTER, ac97);
+			err = snd_ac97_cmute_new(card, "Master Playback Switch",
+						 AC97_MASTER, 0, ac97);
 		else
-			err = snd_ac97_cmix_new(card, "Master Playback", AC97_MASTER, ac97);
+			err = snd_ac97_cmix_new(card, "Master Playback",
+						AC97_MASTER, 0, ac97);
 		if (err < 0)
 			return err;
 	}
@@ -1265,19 +1302,23 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 	if ((snd_ac97_try_volume_mix(ac97, AC97_SURROUND_MASTER)) 
 		&& !(ac97->flags & AC97_AD_MULTI)) {
 		/* Surround Master (0x38) is with stereo mutes */
-		if ((err = snd_ac97_cmix_new_stereo(card, "Surround Playback", AC97_SURROUND_MASTER, 1, ac97)) < 0)
+		if ((err = snd_ac97_cmix_new_stereo(card, "Surround Playback",
+						    AC97_SURROUND_MASTER, 1, 0,
+						    ac97)) < 0)
 			return err;
 	}
 
 	/* build headphone controls */
 	if (snd_ac97_try_volume_mix(ac97, AC97_HEADPHONE)) {
-		if ((err = snd_ac97_cmix_new(card, "Headphone Playback", AC97_HEADPHONE, ac97)) < 0)
+		if ((err = snd_ac97_cmix_new(card, "Headphone Playback",
+					     AC97_HEADPHONE, 0, ac97)) < 0)
 			return err;
 	}
 	
 	/* build master mono controls */
 	if (snd_ac97_try_volume_mix(ac97, AC97_MASTER_MONO)) {
-		if ((err = snd_ac97_cmix_new(card, "Master Mono Playback", AC97_MASTER_MONO, ac97)) < 0)
+		if ((err = snd_ac97_cmix_new(card, "Master Mono Playback",
+					     AC97_MASTER_MONO, 0, ac97)) < 0)
 			return err;
 	}
 	
@@ -1310,7 +1351,8 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 	/* build Phone controls */
 	if (!(ac97->flags & AC97_HAS_NO_PHONE)) {
 		if (snd_ac97_try_volume_mix(ac97, AC97_PHONE)) {
-			if ((err = snd_ac97_cmix_new(card, "Phone Playback", AC97_PHONE, ac97)) < 0)
+			if ((err = snd_ac97_cmix_new(card, "Phone Playback",
+						     AC97_PHONE, 1, ac97)) < 0)
 				return err;
 		}
 	}
@@ -1318,7 +1360,8 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 	/* build MIC controls */
 	if (!(ac97->flags & AC97_HAS_NO_MIC)) {
 		if (snd_ac97_try_volume_mix(ac97, AC97_MIC)) {
-			if ((err = snd_ac97_cmix_new(card, "Mic Playback", AC97_MIC, ac97)) < 0)
+			if ((err = snd_ac97_cmix_new(card, "Mic Playback",
+						     AC97_MIC, 1, ac97)) < 0)
 				return err;
 			if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_mic_boost, ac97))) < 0)
 				return err;
@@ -1327,14 +1370,16 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 
 	/* build Line controls */
 	if (snd_ac97_try_volume_mix(ac97, AC97_LINE)) {
-		if ((err = snd_ac97_cmix_new(card, "Line Playback", AC97_LINE, ac97)) < 0)
+		if ((err = snd_ac97_cmix_new(card, "Line Playback",
+					     AC97_LINE, 1, ac97)) < 0)
 			return err;
 	}
 	
 	/* build CD controls */
 	if (!(ac97->flags & AC97_HAS_NO_CD)) {
 		if (snd_ac97_try_volume_mix(ac97, AC97_CD)) {
-			if ((err = snd_ac97_cmix_new(card, "CD Playback", AC97_CD, ac97)) < 0)
+			if ((err = snd_ac97_cmix_new(card, "CD Playback",
+						     AC97_CD, 1, ac97)) < 0)
 				return err;
 		}
 	}
@@ -1342,7 +1387,8 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 	/* build Video controls */
 	if (!(ac97->flags & AC97_HAS_NO_VIDEO)) {
 		if (snd_ac97_try_volume_mix(ac97, AC97_VIDEO)) {
-			if ((err = snd_ac97_cmix_new(card, "Video Playback", AC97_VIDEO, ac97)) < 0)
+			if ((err = snd_ac97_cmix_new(card, "Video Playback",
+						     AC97_VIDEO, 1, ac97)) < 0)
 				return err;
 		}
 	}
@@ -1350,7 +1396,8 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 	/* build Aux controls */
 	if (!(ac97->flags & AC97_HAS_NO_AUX)) {
 		if (snd_ac97_try_volume_mix(ac97, AC97_AUX)) {
-			if ((err = snd_ac97_cmix_new(card, "Aux Playback", AC97_AUX, ac97)) < 0)
+			if ((err = snd_ac97_cmix_new(card, "Aux Playback",
+						     AC97_AUX, 1, ac97)) < 0)
 				return err;
 		}
 	}
@@ -1385,9 +1432,12 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 	} else {
 		if (!(ac97->flags & AC97_HAS_NO_STD_PCM)) {
 			if (ac97->flags & AC97_HAS_NO_PCM_VOL)
-				err = snd_ac97_cmute_new(card, "PCM Playback Switch", AC97_PCM, ac97);
+				err = snd_ac97_cmute_new(card,
+							 "PCM Playback Switch",
+							 AC97_PCM, 0, ac97);
 			else
-				err = snd_ac97_cmix_new(card, "PCM Playback", AC97_PCM, ac97);
+				err = snd_ac97_cmix_new(card, "PCM Playback",
+							AC97_PCM, 0, ac97);
 			if (err < 0)
 				return err;
 		}
@@ -1398,7 +1448,9 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 		if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_control_capture_src, ac97))) < 0)
 			return err;
 		if (snd_ac97_try_bit(ac97, AC97_REC_GAIN, 15)) {
-			if ((err = snd_ac97_cmute_new(card, "Capture Switch", AC97_REC_GAIN, ac97)) < 0)
+			err = snd_ac97_cmute_new(card, "Capture Switch",
+						 AC97_REC_GAIN, 0, ac97);
+			if (err < 0)
 				return err;
 		}
 		if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_control_capture_vol, ac97))) < 0)
@@ -1829,6 +1881,13 @@ static int snd_ac97_dev_disconnect(struct snd_device *device)
 /* build_ops to do nothing */
 static struct snd_ac97_build_ops null_build_ops;
 
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+static void do_update_power(void *data)
+{
+	update_power_regs(data);
+}
+#endif
+
 /**
  * snd_ac97_mixer - create an Codec97 component
  * @bus: the AC97 bus which codec is attached to
@@ -1883,6 +1942,10 @@ int snd_ac97_mixer(struct snd_ac97_bus *bus, struct snd_ac97_template *template,
 	bus->codec[ac97->num] = ac97;
 	mutex_init(&ac97->reg_mutex);
 	mutex_init(&ac97->page_mutex);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	ac97->power_workq = create_workqueue("ac97");
+	INIT_WORK(&ac97->power_work, do_update_power, ac97);
+#endif
 
 #ifdef CONFIG_PCI
 	if (ac97->pci) {
@@ -2117,15 +2180,8 @@ int snd_ac97_mixer(struct snd_ac97_bus *bus, struct snd_ac97_template *template,
 			return -ENOMEM;
 		}
 	}
-	/* make sure the proper powerdown bits are cleared */
-	if (ac97->scaps && ac97_is_audio(ac97)) {
-		reg = snd_ac97_read(ac97, AC97_EXTENDED_STATUS);
-		if (ac97->scaps & AC97_SCAP_SURROUND_DAC) 
-			reg &= ~AC97_EA_PRJ;
-		if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC) 
-			reg &= ~(AC97_EA_PRI | AC97_EA_PRK);
-		snd_ac97_write_cache(ac97, AC97_EXTENDED_STATUS, reg);
-	}
+	if (ac97_is_audio(ac97))
+		update_power_regs(ac97);
 	snd_ac97_proc_init(ac97);
 	if ((err = snd_device_new(card, SNDRV_DEV_CODEC, ac97, &ops)) < 0) {
 		snd_ac97_free(ac97);
@@ -2153,22 +2209,155 @@ static void snd_ac97_powerdown(struct snd_ac97 *ac97)
 		snd_ac97_write(ac97, AC97_HEADPHONE, 0x9f9f);
 	}
 
-	power = ac97->regs[AC97_POWERDOWN] | 0x8000;	/* EAPD */
-	power |= 0x4000;	/* Headphone amplifier powerdown */
-	power |= 0x0300;	/* ADC & DAC powerdown */
+	/* surround, CLFE, mic powerdown */
+	power = ac97->regs[AC97_EXTENDED_STATUS];
+	if (ac97->scaps & AC97_SCAP_SURROUND_DAC)
+		power |= AC97_EA_PRJ;
+	if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC)
+		power |= AC97_EA_PRI | AC97_EA_PRK;
+	power |= AC97_EA_PRL;
+	snd_ac97_write(ac97, AC97_EXTENDED_STATUS, power);
+
+	/* powerdown external amplifier */
+	if (ac97->scaps & AC97_SCAP_INV_EAPD)
+		power = ac97->regs[AC97_POWERDOWN] & ~AC97_PD_EAPD;
+	else if (! (ac97->scaps & AC97_SCAP_EAPD_LED))
+		power = ac97->regs[AC97_POWERDOWN] | AC97_PD_EAPD;
+	power |= AC97_PD_PR6;	/* Headphone amplifier powerdown */
+	power |= AC97_PD_PR0 | AC97_PD_PR1;	/* ADC & DAC powerdown */
 	snd_ac97_write(ac97, AC97_POWERDOWN, power);
 	udelay(100);
-	power |= 0x0400;	/* Analog Mixer powerdown (Vref on) */
-	snd_ac97_write(ac97, AC97_POWERDOWN, power);
-	udelay(100);
-#if 0
-	/* FIXME: this causes click noises on some boards at resume */
-	power |= 0x3800;	/* AC-link powerdown, internal Clk disable */
+	power |= AC97_PD_PR2 | AC97_PD_PR3;	/* Analog Mixer powerdown */
 	snd_ac97_write(ac97, AC97_POWERDOWN, power);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	if (power_save) {
+		udelay(100);
+		/* AC-link powerdown, internal Clk disable */
+		/* FIXME: this may cause click noises on some boards */
+		power |= AC97_PD_PR4 | AC97_PD_PR5;
+		snd_ac97_write(ac97, AC97_POWERDOWN, power);
+	}
 #endif
 }
 
 
+struct ac97_power_reg {
+	unsigned short reg;
+	unsigned short power_reg;
+	unsigned short mask;
+};
+
+enum { PWIDX_ADC, PWIDX_FRONT, PWIDX_CLFE, PWIDX_SURR, PWIDX_MIC, PWIDX_SIZE };
+
+static struct ac97_power_reg power_regs[PWIDX_SIZE] = {
+	[PWIDX_ADC] = { AC97_PCM_LR_ADC_RATE, AC97_POWERDOWN, AC97_PD_PR0},
+	[PWIDX_FRONT] = { AC97_PCM_FRONT_DAC_RATE, AC97_POWERDOWN, AC97_PD_PR1},
+	[PWIDX_CLFE] = { AC97_PCM_LFE_DAC_RATE, AC97_EXTENDED_STATUS,
+			 AC97_EA_PRI | AC97_EA_PRK},
+	[PWIDX_SURR] = { AC97_PCM_SURR_DAC_RATE, AC97_EXTENDED_STATUS,
+			 AC97_EA_PRJ},
+	[PWIDX_MIC] = { AC97_PCM_MIC_ADC_RATE, AC97_EXTENDED_STATUS,
+			AC97_EA_PRL},
+};
+
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+/**
+ * snd_ac97_update_power - update the powerdown register
+ * @ac97: the codec instance
+ * @reg: the rate register, e.g. AC97_PCM_FRONT_DAC_RATE
+ * @powerup: non-zero when power up the part
+ *
+ * Update the AC97 powerdown register bits of the given part.
+ */
+int snd_ac97_update_power(struct snd_ac97 *ac97, int reg, int powerup)
+{
+	int i;
+
+	if (! ac97)
+		return 0;
+
+	if (reg) {
+		/* SPDIF requires DAC power, too */
+		if (reg == AC97_SPDIF)
+			reg = AC97_PCM_FRONT_DAC_RATE;
+		for (i = 0; i < PWIDX_SIZE; i++) {
+			if (power_regs[i].reg == reg) {
+				if (powerup)
+					ac97->power_up |= (1 << i);
+				else
+					ac97->power_up &= ~(1 << i);
+				break;
+			}
+		}
+	}
+
+	if (! power_save)
+		return 0;
+
+	if (! powerup && ac97->power_workq)
+		/* adjust power-down bits after two seconds delay
+		 * (for avoiding loud click noises for many (OSS) apps
+		 *  that open/close frequently)
+		 */
+		queue_delayed_work(ac97->power_workq, &ac97->power_work, HZ*2);
+	else
+		update_power_regs(ac97);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(snd_ac97_update_power);
+#endif /* CONFIG_SND_AC97_POWER_SAVE */
+
+static void update_power_regs(struct snd_ac97 *ac97)
+{
+	unsigned int power_up, bits;
+	int i;
+
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	if (power_save)
+		power_up = ac97->power_up;
+	else {
+#endif
+		power_up = (1 << PWIDX_FRONT) | (1 << PWIDX_ADC);
+		power_up |= (1 << PWIDX_MIC);
+		if (ac97->scaps & AC97_SCAP_SURROUND_DAC)
+			power_up |= (1 << PWIDX_SURR);
+		if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC)
+			power_up |= (1 << PWIDX_CLFE);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	}
+#endif
+	if (power_up) {
+		if (ac97->regs[AC97_POWERDOWN] & AC97_PD_PR2) {
+			/* needs power-up analog mix and vref */
+			snd_ac97_update_bits(ac97, AC97_POWERDOWN,
+					     AC97_PD_PR3, 0);
+			msleep(1);
+			snd_ac97_update_bits(ac97, AC97_POWERDOWN,
+					     AC97_PD_PR2, 0);
+		}
+	}
+	for (i = 0; i < PWIDX_SIZE; i++) {
+		if (power_up & (1 << i))
+			bits = 0;
+		else
+			bits = power_regs[i].mask;
+		snd_ac97_update_bits(ac97, power_regs[i].power_reg,
+				     power_regs[i].mask, bits);
+	}
+	if (! power_up) {
+		if (! (ac97->regs[AC97_POWERDOWN] & AC97_PD_PR2)) {
+			/* power down analog mix and vref */
+			snd_ac97_update_bits(ac97, AC97_POWERDOWN,
+					     AC97_PD_PR2, AC97_PD_PR2);
+			snd_ac97_update_bits(ac97, AC97_POWERDOWN,
+					     AC97_PD_PR3, AC97_PD_PR3);
+		}
+	}
+}
+
+
 #ifdef CONFIG_PM
 /**
  * snd_ac97_suspend - General suspend function for AC97 codec
@@ -2484,6 +2673,7 @@ static int tune_mute_led(struct snd_ac97 *ac97)
 	msw->put = master_mute_sw_put;
 	snd_ac97_remove_ctl(ac97, "External Amplifier", NULL);
 	snd_ac97_update_bits(ac97, AC97_POWERDOWN, 0x8000, 0x8000); /* mute LED on */
+	ac97->scaps |= AC97_SCAP_EAPD_LED;
 	return 0;
 }
 
diff --git a/sound/pci/ac97/ac97_pcm.c b/sound/pci/ac97/ac97_pcm.c
index f684aa2c00676..3758d07182f80 100644
--- a/sound/pci/ac97/ac97_pcm.c
+++ b/sound/pci/ac97/ac97_pcm.c
@@ -269,6 +269,7 @@ int snd_ac97_set_rate(struct snd_ac97 *ac97, int reg, unsigned int rate)
 			return -EINVAL;
 	}
 
+	snd_ac97_update_power(ac97, reg, 1);
 	switch (reg) {
 	case AC97_PCM_MIC_ADC_RATE:
 		if ((ac97->regs[AC97_EXTENDED_STATUS] & AC97_EA_VRM) == 0)	/* MIC VRA */
@@ -606,6 +607,7 @@ int snd_ac97_pcm_open(struct ac97_pcm *pcm, unsigned int rate,
 			goto error;
 		}
 	}
+	pcm->cur_dbl = r;
 	spin_unlock_irq(&pcm->bus->bus_lock);
 	for (i = 3; i < 12; i++) {
 		if (!(slots & (1 << i)))
@@ -651,6 +653,21 @@ int snd_ac97_pcm_close(struct ac97_pcm *pcm)
 	unsigned short slots = pcm->aslots;
 	int i, cidx;
 
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	int r = pcm->cur_dbl;
+	for (i = 3; i < 12; i++) {
+		if (!(slots & (1 << i)))
+			continue;
+		for (cidx = 0; cidx < 4; cidx++) {
+			if (pcm->r[r].rslots[cidx] & (1 << i)) {
+				int reg = get_slot_reg(pcm, cidx, i, r);
+				snd_ac97_update_power(pcm->r[r].codec[cidx],
+						      reg, 0);
+			}
+		}
+	}
+#endif
+
 	bus = pcm->bus;
 	spin_lock_irq(&pcm->bus->bus_lock);
 	for (i = 3; i < 12; i++) {
@@ -660,6 +677,7 @@ int snd_ac97_pcm_close(struct ac97_pcm *pcm)
 			bus->used_slots[pcm->stream][cidx] &= ~(1 << i);
 	}
 	pcm->aslots = 0;
+	pcm->cur_dbl = 0;
 	spin_unlock_irq(&pcm->bus->bus_lock);
 	return 0;
 }
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 6874263f1681f..72dbaedcbdf57 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -2251,6 +2251,16 @@ static int snd_intel8x0_ich_chip_init(struct intel8x0 *chip, int probing)
 	/* ACLink on, 2 channels */
 	cnt = igetdword(chip, ICHREG(GLOB_CNT));
 	cnt &= ~(ICH_ACLINK | ICH_PCM_246_MASK);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	/* do cold reset - the full ac97 powerdown may leave the controller
+	 * in a warm state but actually it cannot communicate with the codec.
+	 */
+	iputdword(chip, ICHREG(GLOB_CNT), cnt & ~ICH_AC97COLD);
+	cnt = igetdword(chip, ICHREG(GLOB_CNT));
+	udelay(10);
+	iputdword(chip, ICHREG(GLOB_CNT), cnt | ICH_AC97COLD);
+	msleep(1);
+#else
 	/* finish cold or do warm reset */
 	cnt |= (cnt & ICH_AC97COLD) == 0 ? ICH_AC97COLD : ICH_AC97WARM;
 	iputdword(chip, ICHREG(GLOB_CNT), cnt);
@@ -2265,6 +2275,7 @@ static int snd_intel8x0_ich_chip_init(struct intel8x0 *chip, int probing)
 	return -EIO;
 
       __ok:
+#endif
 	if (probing) {
 		/* wait for any codec ready status.
 		 * Once it becomes ready it should remain ready
@@ -2485,7 +2496,7 @@ static int intel8x0_resume(struct pci_dev *pci)
 		    card->shortname, chip);
 	chip->irq = pci->irq;
 	synchronize_irq(chip->irq);
-	snd_intel8x0_chip_init(chip, 1);
+	snd_intel8x0_chip_init(chip, 0);
 
 	/* re-initialize mixer stuff */
 	if (chip->device_type == DEVICE_INTEL_ICH4) {
@@ -2615,6 +2626,7 @@ static void __devinit intel8x0_measure_ac97_clock(struct intel8x0 *chip)
 		/* not 48000Hz, tuning the clock.. */
 		chip->ac97_bus->clock = (chip->ac97_bus->clock * 48000) / pos;
 	printk(KERN_INFO "intel8x0: clocking to %d\n", chip->ac97_bus->clock);
+	snd_ac97_update_power(chip->ac97[0], AC97_PCM_FRONT_DAC_RATE, 0);
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c
index 08da9234efb38..2c23a665c3e3b 100644
--- a/sound/pci/via82xx.c
+++ b/sound/pci/via82xx.c
@@ -1277,7 +1277,18 @@ static int snd_via82xx_pcm_close(struct snd_pcm_substream *substream)
 	if (! ratep->used)
 		ratep->rate = 0;
 	spin_unlock_irq(&ratep->lock);
-
+	if (! ratep->rate) {
+		if (! viadev->direction) {
+			snd_ac97_update_power(chip->ac97,
+					      AC97_PCM_FRONT_DAC_RATE, 0);
+			snd_ac97_update_power(chip->ac97,
+					      AC97_PCM_SURR_DAC_RATE, 0);
+			snd_ac97_update_power(chip->ac97,
+					      AC97_PCM_LFE_DAC_RATE, 0);
+		} else
+			snd_ac97_update_power(chip->ac97,
+					      AC97_PCM_LR_ADC_RATE, 0);
+	}
 	viadev->substream = NULL;
 	return 0;
 }
-- 
GitLab


From 82466ad76d60c35bf1c48ba1b9c98c35d82fc385 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Thu, 29 Jun 2006 17:15:33 +0200
Subject: [PATCH 0885/1063] [ALSA] add codec-specific controls for UCB1400

This patch adds some codec-specific controls for Philips UCB1400 codec.

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ac97/ac97_codec.c |  2 +-
 sound/pci/ac97/ac97_patch.c | 38 +++++++++++++++++++++++++++++++++++++
 sound/pci/ac97/ac97_patch.h |  1 +
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
index f82c636e99a9f..e5d062d640dfc 100644
--- a/sound/pci/ac97/ac97_codec.c
+++ b/sound/pci/ac97/ac97_codec.c
@@ -156,7 +156,7 @@ static const struct ac97_codec_id snd_ac97_codec_ids[] = {
 { 0x4e534300, 0xffffffff, "LM4540,43,45,46,48",	NULL,		NULL }, // only guess --jk
 { 0x4e534331, 0xffffffff, "LM4549",		NULL,		NULL },
 { 0x4e534350, 0xffffffff, "LM4550",		patch_lm4550,  	NULL }, // volume wrap fix 
-{ 0x50534304, 0xffffffff, "UCB1400",		NULL,		NULL },
+{ 0x50534304, 0xffffffff, "UCB1400",		patch_ucb1400,	NULL },
 { 0x53494c20, 0xffffffe0, "Si3036,8",		mpatch_si3036,	mpatch_si3036, AC97_MODEM_PATCH },
 { 0x54524102, 0xffffffff, "TR28022",		NULL,		NULL },
 { 0x54524106, 0xffffffff, "TR28026",		NULL,		NULL },
diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 094cfc1f3a190..5267b006c5c85 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -2872,3 +2872,41 @@ int patch_lm4550(struct snd_ac97 *ac97)
 	ac97->res_table = lm4550_restbl;
 	return 0;
 }
+
+/* 
+ *  UCB1400 codec (http://www.semiconductors.philips.com/acrobat_download/datasheets/UCB1400-02.pdf)
+ */
+static const struct snd_kcontrol_new snd_ac97_controls_ucb1400[] = {
+/* enable/disable headphone driver which allows direct connection to
+   stereo headphone without the use of external DC blocking
+   capacitors */
+AC97_SINGLE("Headphone Driver", 0x6a, 6, 1, 0),
+/* Filter used to compensate the DC offset is added in the ADC to remove idle
+   tones from the audio band. */
+AC97_SINGLE("DC Filter", 0x6a, 4, 1, 0),
+/* Control smart-low-power mode feature. Allows automatic power down
+   of unused blocks in the ADC analog front end and the PLL. */
+AC97_SINGLE("Smart Low Power Mode", 0x6c, 4, 3, 0),
+};
+
+static int patch_ucb1400_specific(struct snd_ac97 * ac97)
+{
+	int idx, err;
+	for (idx = 0; idx < ARRAY_SIZE(snd_ac97_controls_ucb1400); idx++)
+		if ((err = snd_ctl_add(ac97->bus->card, snd_ctl_new1(&snd_ac97_controls_ucb1400[idx], ac97))) < 0)
+			return err;
+	return 0;
+}
+
+static struct snd_ac97_build_ops patch_ucb1400_ops = {
+	.build_specific	= patch_ucb1400_specific,
+};
+
+int patch_ucb1400(struct snd_ac97 * ac97)
+{
+	ac97->build_ops = &patch_ucb1400_ops;
+	/* enable headphone driver and smart low power mode by default */
+	snd_ac97_write(ac97, 0x6a, 0x0050);
+	snd_ac97_write(ac97, 0x6c, 0x0030);
+	return 0;
+}
diff --git a/sound/pci/ac97/ac97_patch.h b/sound/pci/ac97/ac97_patch.h
index adcaa04586cbb..7419792172075 100644
--- a/sound/pci/ac97/ac97_patch.h
+++ b/sound/pci/ac97/ac97_patch.h
@@ -58,5 +58,6 @@ int patch_cm9780(struct snd_ac97 * ac97);
 int patch_vt1616(struct snd_ac97 * ac97);
 int patch_vt1617a(struct snd_ac97 * ac97);
 int patch_it2646(struct snd_ac97 * ac97);
+int patch_ucb1400(struct snd_ac97 * ac97);
 int mpatch_si3036(struct snd_ac97 * ac97);
 int patch_lm4550(struct snd_ac97 * ac97);
-- 
GitLab


From e0a5d82a966172c5f1dff6229d4a07be2222e8b3 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy@smile.org.ua>
Date: Tue, 4 Jul 2006 12:05:14 +0200
Subject: [PATCH 0886/1063] [ALSA] fm801: Support FM only card

Signed-off-by: Andy Shevchenko <andy@smile.org.ua>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/fm801.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c
index 13868c9851268..a30257fa6dbcb 100644
--- a/sound/pci/fm801.c
+++ b/sound/pci/fm801.c
@@ -2,6 +2,7 @@
  *  The driver for the ForteMedia FM801 based soundcards
  *  Copyright (c) by Jaroslav Kysela <perex@suse.cz>
  *
+ *  Support FM only card by Andy Shevchenko <andy@smile.org.ua>
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -54,6 +55,7 @@ static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;	/* Enable this card *
  *    1 = MediaForte 256-PCS
  *    2 = MediaForte 256-PCPR
  *    3 = MediaForte 64-PCR
+ *   16 = setup tuner only (this is additional bit), i.e. SF-64-PCR FM card
  *  High 16-bits are video (radio) device number + 1
  */
 static int tea575x_tuner[SNDRV_CARDS];
@@ -1253,6 +1255,9 @@ static int snd_fm801_chip_init(struct fm801 *chip, int resume)
 	int id;
 	unsigned short cmdw;
 
+	if (tea575x_tuner & 0x0010)
+		goto __ac97_ok;
+
 	/* codec cold reset + AC'97 warm reset */
 	outw((1<<5) | (1<<6), FM801_REG(chip, CODEC_CTRL));
 	inw(FM801_REG(chip, CODEC_CTRL)); /* flush posting data */
@@ -1394,13 +1399,16 @@ static int __devinit snd_fm801_create(struct snd_card *card,
 	snd_card_set_dev(card, &pci->dev);
 
 #ifdef TEA575X_RADIO
-	if (tea575x_tuner > 0 && (tea575x_tuner & 0xffff) < 4) {
+	if (tea575x_tuner > 0 && (tea575x_tuner & 0x000f) < 4) {
 		chip->tea.dev_nr = tea575x_tuner >> 16;
 		chip->tea.card = card;
 		chip->tea.freq_fixup = 10700;
 		chip->tea.private_data = chip;
-		chip->tea.ops = &snd_fm801_tea_ops[(tea575x_tuner & 0xffff) - 1];
+		chip->tea.ops = &snd_fm801_tea_ops[(tea575x_tuner & 0x000f) - 1];
 		snd_tea575x_init(&chip->tea);
+
+		/* Mute FM tuner */
+		outw(0xf800, FM801_REG(chip, GPIO_CTRL));
 	}
 #endif
 
@@ -1439,6 +1447,9 @@ static int __devinit snd_card_fm801_probe(struct pci_dev *pci,
 	sprintf(card->longname, "%s at 0x%lx, irq %i",
 		card->shortname, chip->port, chip->irq);
 
+	if (tea575x_tuner[dev] & 0x0010)
+		goto __fm801_tuner_only;
+
 	if ((err = snd_fm801_pcm(chip, 0, NULL)) < 0) {
 		snd_card_free(card);
 		return err;
@@ -1465,6 +1476,7 @@ static int __devinit snd_card_fm801_probe(struct pci_dev *pci,
 		return err;
 	}
 
+      __fm801_tuner_only:
 	if ((err = snd_card_register(card)) < 0) {
 		snd_card_free(card);
 		return err;
-- 
GitLab


From 6bbe13ecbbce4415a5a7959b3bc35b18313025e0 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@suse.cz>
Date: Tue, 4 Jul 2006 13:39:55 +0200
Subject: [PATCH 0887/1063] [ALSA] fm801: fixed broken previous patch for the
 FM tuner only code

- do not allocate and enable interrupt
- do not do the FM tuner mute (it should be handled more cleanly)

Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/fm801.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c
index a30257fa6dbcb..88a3e9f3224ab 100644
--- a/sound/pci/fm801.c
+++ b/sound/pci/fm801.c
@@ -160,6 +160,7 @@ struct fm801 {
 	unsigned int multichannel: 1,	/* multichannel support */
 		     secondary: 1;	/* secondary codec */
 	unsigned char secondary_addr;	/* address of the secondary codec */
+	unsigned int tea575x_tuner;	/* tuner flags */
 
 	unsigned short ply_ctrl; /* playback control */
 	unsigned short cap_ctrl; /* capture control */
@@ -1255,7 +1256,7 @@ static int snd_fm801_chip_init(struct fm801 *chip, int resume)
 	int id;
 	unsigned short cmdw;
 
-	if (tea575x_tuner & 0x0010)
+	if (chip->tea575x_tuner & 0x0010)
 		goto __ac97_ok;
 
 	/* codec cold reset + AC'97 warm reset */
@@ -1295,6 +1296,8 @@ static int snd_fm801_chip_init(struct fm801 *chip, int resume)
 		wait_for_codec(chip, 0, AC97_VENDOR_ID1, msecs_to_jiffies(750));
 	}
 
+      __ac97_ok:
+
 	/* init volume */
 	outw(0x0808, FM801_REG(chip, PCM_VOL));
 	outw(0x9f1f, FM801_REG(chip, FM_VOL));
@@ -1303,9 +1306,12 @@ static int snd_fm801_chip_init(struct fm801 *chip, int resume)
 	/* I2S control - I2S mode */
 	outw(0x0003, FM801_REG(chip, I2S_MODE));
 
-	/* interrupt setup - unmask MPU, PLAYBACK & CAPTURE */
+	/* interrupt setup */
 	cmdw = inw(FM801_REG(chip, IRQ_MASK));
-	cmdw &= ~0x0083;
+	if (chip->irq < 0)
+		cmdw |= 0x00c3;		/* mask everything, no PCM nor MPU */
+	else
+		cmdw &= ~0x0083;	/* unmask MPU, PLAYBACK & CAPTURE */
 	outw(cmdw, FM801_REG(chip, IRQ_MASK));
 
 	/* interrupt clear */
@@ -1370,20 +1376,23 @@ static int __devinit snd_fm801_create(struct snd_card *card,
 	chip->card = card;
 	chip->pci = pci;
 	chip->irq = -1;
+	chip->tea575x_tuner = tea575x_tuner;
 	if ((err = pci_request_regions(pci, "FM801")) < 0) {
 		kfree(chip);
 		pci_disable_device(pci);
 		return err;
 	}
 	chip->port = pci_resource_start(pci, 0);
-	if (request_irq(pci->irq, snd_fm801_interrupt, IRQF_DISABLED|IRQF_SHARED,
-			"FM801", chip)) {
-		snd_printk(KERN_ERR "unable to grab IRQ %d\n", chip->irq);
-		snd_fm801_free(chip);
-		return -EBUSY;
+	if ((tea575x_tuner & 0x0010) == 0) {
+		if (request_irq(pci->irq, snd_fm801_interrupt, IRQF_DISABLED|IRQF_SHARED,
+				"FM801", chip)) {
+			snd_printk(KERN_ERR "unable to grab IRQ %d\n", chip->irq);
+			snd_fm801_free(chip);
+			return -EBUSY;
+		}
+		chip->irq = pci->irq;
+		pci_set_master(pci);
 	}
-	chip->irq = pci->irq;
-	pci_set_master(pci);
 
 	pci_read_config_byte(pci, PCI_REVISION_ID, &rev);
 	if (rev >= 0xb1)	/* FM801-AU */
@@ -1406,9 +1415,6 @@ static int __devinit snd_fm801_create(struct snd_card *card,
 		chip->tea.private_data = chip;
 		chip->tea.ops = &snd_fm801_tea_ops[(tea575x_tuner & 0x000f) - 1];
 		snd_tea575x_init(&chip->tea);
-
-		/* Mute FM tuner */
-		outw(0xf800, FM801_REG(chip, GPIO_CTRL));
 	}
 #endif
 
-- 
GitLab


From 8aa9b586e42099817163aba01d925c2660c4dbbe Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@suse.cz>
Date: Wed, 5 Jul 2006 17:34:51 +0200
Subject: [PATCH 0888/1063] [ALSA] Control API - more robust TLV implementation

- added callback option
- added READ/WRITE/COMMAND flags to access member
- added WRITE/COMMAND ioctls
- added SNDRV_CTL_EVENT_MASK_TLV for TLV change notifications
- added TLV support to ELEM_ADD ioctl

Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/asound.h  |  10 ++-
 include/sound/control.h |  16 ++++-
 sound/core/control.c    | 139 +++++++++++++++++++++++++++++++---------
 3 files changed, 132 insertions(+), 33 deletions(-)

diff --git a/include/sound/asound.h b/include/sound/asound.h
index 76a20406bd189..c1621c650a9a0 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -727,10 +727,15 @@ typedef int __bitwise snd_ctl_elem_iface_t;
 #define SNDRV_CTL_ELEM_ACCESS_WRITE		(1<<1)
 #define SNDRV_CTL_ELEM_ACCESS_READWRITE		(SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE)
 #define SNDRV_CTL_ELEM_ACCESS_VOLATILE		(1<<2)	/* control value may be changed without a notification */
-#define SNDRV_CTL_ELEM_ACCESS_TIMESTAMP		(1<<2)	/* when was control changed */
+#define SNDRV_CTL_ELEM_ACCESS_TIMESTAMP		(1<<3)	/* when was control changed */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_READ		(1<<4)	/* TLV read is possible */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE		(1<<5)	/* TLV write is possible */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE	(SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)
+#define SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND	(1<<6)	/* TLV command is possible */
 #define SNDRV_CTL_ELEM_ACCESS_INACTIVE		(1<<8)	/* control does actually nothing, but may be updated */
 #define SNDRV_CTL_ELEM_ACCESS_LOCK		(1<<9)	/* write lock */
 #define SNDRV_CTL_ELEM_ACCESS_OWNER		(1<<10)	/* write lock owner */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK	(1<<28)	/* kernel use a TLV callback */ 
 #define SNDRV_CTL_ELEM_ACCESS_USER		(1<<29) /* user space element */
 #define SNDRV_CTL_ELEM_ACCESS_DINDIRECT		(1<<30)	/* indirect access for matrix dimensions in the info structure */
 #define SNDRV_CTL_ELEM_ACCESS_INDIRECT		(1<<31)	/* indirect access for element value in the value structure */
@@ -838,6 +843,8 @@ enum {
 	SNDRV_CTL_IOCTL_ELEM_REPLACE = _IOWR('U', 0x18, struct snd_ctl_elem_info),
 	SNDRV_CTL_IOCTL_ELEM_REMOVE = _IOWR('U', 0x19, struct snd_ctl_elem_id),
 	SNDRV_CTL_IOCTL_TLV_READ = _IOWR('U', 0x1a, struct snd_ctl_tlv),
+	SNDRV_CTL_IOCTL_TLV_WRITE = _IOWR('U', 0x1b, struct snd_ctl_tlv),
+	SNDRV_CTL_IOCTL_TLV_COMMAND = _IOWR('U', 0x1c, struct snd_ctl_tlv),
 	SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE = _IOWR('U', 0x20, int),
 	SNDRV_CTL_IOCTL_HWDEP_INFO = _IOR('U', 0x21, struct snd_hwdep_info),
 	SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE = _IOR('U', 0x30, int),
@@ -862,6 +869,7 @@ enum sndrv_ctl_event_type {
 #define SNDRV_CTL_EVENT_MASK_VALUE	(1<<0)	/* element value was changed */
 #define SNDRV_CTL_EVENT_MASK_INFO	(1<<1)	/* element info was changed */
 #define SNDRV_CTL_EVENT_MASK_ADD	(1<<2)	/* element was added */
+#define SNDRV_CTL_EVENT_MASK_TLV	(1<<3)	/* element TLV tree was changed */
 #define SNDRV_CTL_EVENT_MASK_REMOVE	(~0U)	/* element was removed */
 
 struct snd_ctl_event {
diff --git a/include/sound/control.h b/include/sound/control.h
index a93a58d0e6882..e3905c5a0950a 100644
--- a/include/sound/control.h
+++ b/include/sound/control.h
@@ -30,6 +30,11 @@ struct snd_kcontrol;
 typedef int (snd_kcontrol_info_t) (struct snd_kcontrol * kcontrol, struct snd_ctl_elem_info * uinfo);
 typedef int (snd_kcontrol_get_t) (struct snd_kcontrol * kcontrol, struct snd_ctl_elem_value * ucontrol);
 typedef int (snd_kcontrol_put_t) (struct snd_kcontrol * kcontrol, struct snd_ctl_elem_value * ucontrol);
+typedef int (snd_kcontrol_tlv_rw_t)(struct snd_kcontrol *kcontrol,
+				    int op_flag, /* 0=read,1=write,-1=command */
+				    unsigned int size,
+				    unsigned int __user *tlv);
+
 
 struct snd_kcontrol_new {
 	snd_ctl_elem_iface_t iface;	/* interface identifier */
@@ -42,7 +47,10 @@ struct snd_kcontrol_new {
 	snd_kcontrol_info_t *info;
 	snd_kcontrol_get_t *get;
 	snd_kcontrol_put_t *put;
-	unsigned int *tlv;
+	union {
+		snd_kcontrol_tlv_rw_t *c;
+		unsigned int *p;
+	} tlv;
 	unsigned long private_value;
 };
 
@@ -59,7 +67,11 @@ struct snd_kcontrol {
 	snd_kcontrol_info_t *info;
 	snd_kcontrol_get_t *get;
 	snd_kcontrol_put_t *put;
-	unsigned int *tlv;
+	snd_kcontrol_tlv_rw_t *tlv_rw;
+	union {
+		snd_kcontrol_tlv_rw_t *c;
+		unsigned int *p;
+	} tlv;
 	unsigned long private_value;
 	void *private_data;
 	void (*private_free)(struct snd_kcontrol *kcontrol);
diff --git a/sound/core/control.c b/sound/core/control.c
index f0c7272a2d48d..31ad58154c06c 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -236,12 +236,16 @@ struct snd_kcontrol *snd_ctl_new1(const struct snd_kcontrol_new *ncontrol,
 	kctl.id.index = ncontrol->index;
 	kctl.count = ncontrol->count ? ncontrol->count : 1;
 	access = ncontrol->access == 0 ? SNDRV_CTL_ELEM_ACCESS_READWRITE :
-		 (ncontrol->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE|SNDRV_CTL_ELEM_ACCESS_INACTIVE|
-		 		      SNDRV_CTL_ELEM_ACCESS_DINDIRECT|SNDRV_CTL_ELEM_ACCESS_INDIRECT));
+		 (ncontrol->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE|
+				      SNDRV_CTL_ELEM_ACCESS_INACTIVE|
+		 		      SNDRV_CTL_ELEM_ACCESS_DINDIRECT|
+		 		      SNDRV_CTL_ELEM_ACCESS_INDIRECT|
+		 		      SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE|
+		 		      SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK));
 	kctl.info = ncontrol->info;
 	kctl.get = ncontrol->get;
 	kctl.put = ncontrol->put;
-	kctl.tlv = ncontrol->tlv;
+	kctl.tlv.p = ncontrol->tlv.p;
 	kctl.private_value = ncontrol->private_value;
 	kctl.private_data = private_data;
 	return snd_ctl_new(&kctl, access);
@@ -883,6 +887,8 @@ struct user_element {
 	struct snd_ctl_elem_info info;
 	void *elem_data;		/* element data */
 	unsigned long elem_data_size;	/* size of element data in bytes */
+	void *tlv_data;			/* TLV data */
+	unsigned long tlv_data_size;	/* TLV data size */
 	void *priv_data;		/* private data (like strings for enumerated type) */
 	unsigned long priv_data_size;	/* size of private data in bytes */
 };
@@ -917,9 +923,46 @@ static int snd_ctl_elem_user_put(struct snd_kcontrol *kcontrol,
 	return change;
 }
 
+static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol,
+				 int op_flag,
+				 unsigned int size,
+				 unsigned int __user *tlv)
+{
+	struct user_element *ue = kcontrol->private_data;
+	int change = 0;
+	void *new_data;
+
+	if (op_flag > 0) {
+		if (size > 1024 * 128)	/* sane value */
+			return -EINVAL;
+		new_data = kmalloc(size, GFP_KERNEL);
+		if (new_data == NULL)
+			return -ENOMEM;
+		if (copy_from_user(new_data, tlv, size)) {
+			kfree(new_data);
+			return -EFAULT;
+		}
+		change = ue->tlv_data_size != size;
+		if (!change)
+			change = memcmp(ue->tlv_data, new_data, size);
+		kfree(ue->tlv_data);
+		ue->tlv_data = new_data;
+		ue->tlv_data_size = size;
+	} else {
+		if (size < ue->tlv_data_size)
+			return -ENOSPC;
+		if (copy_to_user(tlv, ue->tlv_data, ue->tlv_data_size))
+			return -EFAULT;
+	}
+	return change;
+}
+
 static void snd_ctl_elem_user_free(struct snd_kcontrol *kcontrol)
 {
-	kfree(kcontrol->private_data);
+	struct user_element *ue = kcontrol->private_data;
+	if (ue->tlv_data)
+		kfree(ue->tlv_data);
+	kfree(ue);
 }
 
 static int snd_ctl_elem_add(struct snd_ctl_file *file,
@@ -938,7 +981,8 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
 		return -EINVAL;
 	access = info->access == 0 ? SNDRV_CTL_ELEM_ACCESS_READWRITE :
 		(info->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE|
-				 SNDRV_CTL_ELEM_ACCESS_INACTIVE));
+				 SNDRV_CTL_ELEM_ACCESS_INACTIVE|
+				 SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE));
 	info->id.numid = 0;
 	memset(&kctl, 0, sizeof(kctl));
 	down_write(&card->controls_rwsem);
@@ -964,6 +1008,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
 		kctl.get = snd_ctl_elem_user_get;
 	if (access & SNDRV_CTL_ELEM_ACCESS_WRITE)
 		kctl.put = snd_ctl_elem_user_put;
+	if (access & SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE) {
+		kctl.tlv.c = snd_ctl_elem_user_tlv;
+		access |= SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
+	}
 	switch (info->type) {
 	case SNDRV_CTL_ELEM_TYPE_BOOLEAN:
 		private_size = sizeof(char);
@@ -1068,38 +1116,65 @@ static int snd_ctl_subscribe_events(struct snd_ctl_file *file, int __user *ptr)
 	return 0;
 }
 
-static int snd_ctl_tlv_read(struct snd_card *card,
-                            struct snd_ctl_tlv __user *_tlv)
+static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file,
+                             struct snd_ctl_tlv __user *_tlv,
+                             int op_flag)
 {
+	struct snd_card *card = file->card;
 	struct snd_ctl_tlv tlv;
 	struct snd_kcontrol *kctl;
+	struct snd_kcontrol_volatile *vd;
 	unsigned int len;
 	int err = 0;
 
 	if (copy_from_user(&tlv, _tlv, sizeof(tlv)))
 		return -EFAULT;
-        if (tlv.length < sizeof(unsigned int) * 3)
-                return -EINVAL;
-        down_read(&card->controls_rwsem);
-        kctl = snd_ctl_find_numid(card, tlv.numid);
-        if (kctl == NULL) {
-                err = -ENOENT;
-                goto __kctl_end;
-        }
-        if (kctl->tlv == NULL) {
-                err = -ENXIO;
-                goto __kctl_end;
-        }
-        len = kctl->tlv[1] + 2 * sizeof(unsigned int);
-        if (tlv.length < len) {
-                err = -ENOMEM;
-                goto __kctl_end;
-        }
-        if (copy_to_user(_tlv->tlv, kctl->tlv, len))
-        	err = -EFAULT;
+	if (tlv.length < sizeof(unsigned int) * 3)
+		return -EINVAL;
+	down_read(&card->controls_rwsem);
+	kctl = snd_ctl_find_numid(card, tlv.numid);
+	if (kctl == NULL) {
+		err = -ENOENT;
+		goto __kctl_end;
+	}
+	if (kctl->tlv.p == NULL) {
+		err = -ENXIO;
+		goto __kctl_end;
+	}
+	vd = &kctl->vd[tlv.numid - kctl->id.numid];
+	if ((op_flag == 0 && (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ) == 0) ||
+	    (op_flag > 0 && (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) == 0) ||
+	    (op_flag < 0 && (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND) == 0)) {
+	    	err = -ENXIO;
+	    	goto __kctl_end;
+	}
+	if (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) {
+		if (file && vd->owner != NULL && vd->owner != file) {
+			err = -EPERM;
+			goto __kctl_end;
+		}
+		err = kctl->tlv.c(kctl, op_flag, tlv.length, _tlv->tlv); 
+		if (err > 0) {
+			up_read(&card->controls_rwsem);
+			snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_TLV, &kctl->id);
+			return 0;
+		}
+	} else {
+		if (op_flag) {
+			err = -ENXIO;
+			goto __kctl_end;
+		}
+		len = kctl->tlv.p[1] + 2 * sizeof(unsigned int);
+		if (tlv.length < len) {
+			err = -ENOMEM;
+			goto __kctl_end;
+		}
+		if (copy_to_user(_tlv->tlv, kctl->tlv.p, len))
+			err = -EFAULT;
+	}
       __kctl_end:
-        up_read(&card->controls_rwsem);
-        return err;
+	up_read(&card->controls_rwsem);
+	return err;
 }
 
 static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1140,8 +1215,12 @@ static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 		return snd_ctl_elem_remove(ctl, argp);
 	case SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS:
 		return snd_ctl_subscribe_events(ctl, ip);
-        case SNDRV_CTL_IOCTL_TLV_READ:
-                return snd_ctl_tlv_read(card, argp);
+	case SNDRV_CTL_IOCTL_TLV_READ:
+		return snd_ctl_tlv_ioctl(ctl, argp, 0);
+	case SNDRV_CTL_IOCTL_TLV_WRITE:
+		return snd_ctl_tlv_ioctl(ctl, argp, 1);
+	case SNDRV_CTL_IOCTL_TLV_COMMAND:
+		return snd_ctl_tlv_ioctl(ctl, argp, -1);
 	case SNDRV_CTL_IOCTL_POWER:
 		return -ENOPROTOOPT;
 	case SNDRV_CTL_IOCTL_POWER_STATE:
-- 
GitLab


From 7f0e2f8bb851f5e0a2e0fef465b7b6f36c7aa7be Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@suse.cz>
Date: Wed, 5 Jul 2006 17:39:14 +0200
Subject: [PATCH 0889/1063] [ALSA] HDA codec - little code & comment cleanup

Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_codec.h |  2 +-
 sound/pci/hda/hda_proc.c  | 12 ++++--------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index 40520e9d5a4b2..c12bc4e8840f0 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -479,7 +479,7 @@ struct hda_codec_ops {
 struct hda_amp_info {
 	u32 key;		/* hash key */
 	u32 amp_caps;		/* amp capabilities */
-	u16 vol[2];		/* current volume & mute*/
+	u16 vol[2];		/* current volume & mute */
 	u16 status;		/* update flag */
 	u16 next;		/* next link */
 };
diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c
index c2f0fe85bf35c..d737f17695a33 100644
--- a/sound/pci/hda/hda_proc.c
+++ b/sound/pci/hda/hda_proc.c
@@ -52,10 +52,9 @@ static void print_amp_caps(struct snd_info_buffer *buffer,
 			   struct hda_codec *codec, hda_nid_t nid, int dir)
 {
 	unsigned int caps;
-	if (dir == HDA_OUTPUT)
-		caps = snd_hda_param_read(codec, nid, AC_PAR_AMP_OUT_CAP);
-	else
-		caps = snd_hda_param_read(codec, nid, AC_PAR_AMP_IN_CAP);
+	caps = snd_hda_param_read(codec, nid,
+				  dir == HDA_OUTPUT ?
+				    AC_PAR_AMP_OUT_CAP : AC_PAR_AMP_IN_CAP);
 	if (caps == -1 || caps == 0) {
 		snd_iprintf(buffer, "N/A\n");
 		return;
@@ -74,10 +73,7 @@ static void print_amp_vals(struct snd_info_buffer *buffer,
 	unsigned int val;
 	int i;
 
-	if (dir == HDA_OUTPUT)
-		dir = AC_AMP_GET_OUTPUT;
-	else
-		dir = AC_AMP_GET_INPUT;
+	dir = dir == HDA_OUTPUT ? AC_AMP_GET_OUTPUT : AC_AMP_GET_INPUT;
 	for (i = 0; i < indices; i++) {
 		snd_iprintf(buffer, " [");
 		if (stereo) {
-- 
GitLab


From 302e9c5af4fb3ea258917ee6a32e9e45f578b231 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@suse.cz>
Date: Wed, 5 Jul 2006 17:39:49 +0200
Subject: [PATCH 0890/1063] [ALSA] HDA codec & CA0106 - add/fix TLV support

Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ca0106/ca0106_mixer.c |  4 +++-
 sound/pci/hda/hda_codec.c       | 33 +++++++++++++++++++++++++++++++++
 sound/pci/hda/hda_local.h       |  5 +++++
 sound/pci/hda/patch_analog.c    | 17 +++++++++++++++++
 4 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c
index 35309b3ed8c0c..df75270939acc 100644
--- a/sound/pci/ca0106/ca0106_mixer.c
+++ b/sound/pci/ca0106/ca0106_mixer.c
@@ -472,10 +472,12 @@ static int snd_ca0106_i2c_volume_put(struct snd_kcontrol *kcontrol,
 #define CA_VOLUME(xname,chid,reg) \
 {								\
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname,	\
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |		\
+	          SNDRV_CTL_ELEM_ACCESS_TLV_READ,		\
 	.info =	 snd_ca0106_volume_info,			\
 	.get =   snd_ca0106_volume_get,				\
 	.put =   snd_ca0106_volume_put,				\
-	.tlv =	 snd_ca0106_db_scale,				\
+	.tlv.p = snd_ca0106_db_scale,				\
 	.private_value = ((chid) << 8) | (reg)			\
 }
 
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 23201f3eeb129..78ff4575699d2 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -29,6 +29,7 @@
 #include <sound/core.h>
 #include "hda_codec.h"
 #include <sound/asoundef.h>
+#include <sound/tlv.h>
 #include <sound/initval.h>
 #include "hda_local.h"
 
@@ -841,6 +842,38 @@ int snd_hda_mixer_amp_volume_put(struct snd_kcontrol *kcontrol, struct snd_ctl_e
 	return change;
 }
 
+int snd_hda_mixer_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag,
+			  unsigned int size, unsigned int __user *_tlv)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	hda_nid_t nid = get_amp_nid(kcontrol);
+	int dir = get_amp_direction(kcontrol);
+	u32 caps, val1, val2;
+
+	if (size < 4 * sizeof(unsigned int))
+		return -ENOMEM;
+	caps = query_amp_caps(codec, nid, dir);
+	val2 = (((caps & AC_AMPCAP_STEP_SIZE) >> AC_AMPCAP_STEP_SIZE_SHIFT) + 1) * 25;
+	val1 = -((caps & AC_AMPCAP_OFFSET) >> AC_AMPCAP_OFFSET_SHIFT);
+	val1 = ((int)val1) * ((int)val2);
+	if (caps & AC_AMPCAP_MUTE)
+		val2 |= 0x10000;
+	if ((val2 & 0x10000) == 0 && dir == HDA_OUTPUT) {
+		caps = query_amp_caps(codec, nid, HDA_INPUT);
+		if (caps & AC_AMPCAP_MUTE)
+			val2 |= 0x10000;
+	}
+	if (put_user(SNDRV_CTL_TLVT_DB_SCALE, _tlv))
+		return -EFAULT;
+	if (put_user(2 * sizeof(unsigned int), _tlv + 1))
+		return -EFAULT;
+	if (put_user(val1, _tlv + 2))
+		return -EFAULT;
+	if (put_user(val2, _tlv + 3))
+		return -EFAULT;
+	return 0;
+}
+
 /* switch */
 int snd_hda_mixer_amp_switch_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index 14e8aa2806eda..0f0ae685a9c17 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -30,9 +30,13 @@
 /* mono volume with index (index=0,1,...) (channel=1,2) */
 #define HDA_CODEC_VOLUME_MONO_IDX(xname, xcidx, nid, channel, xindex, direction) \
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xcidx,  \
+	  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | \
+	  	    SNDRV_CTL_ELEM_ACCESS_TLV_READ | \
+	  	    SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK, \
 	  .info = snd_hda_mixer_amp_volume_info, \
 	  .get = snd_hda_mixer_amp_volume_get, \
 	  .put = snd_hda_mixer_amp_volume_put, \
+	  .tlv.c = snd_hda_mixer_amp_tlv, \
 	  .private_value = HDA_COMPOSE_AMP_VAL(nid, channel, xindex, direction) }
 /* stereo volume with index */
 #define HDA_CODEC_VOLUME_IDX(xname, xcidx, nid, xindex, direction) \
@@ -63,6 +67,7 @@
 int snd_hda_mixer_amp_volume_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo);
 int snd_hda_mixer_amp_volume_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol);
 int snd_hda_mixer_amp_volume_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol);
+int snd_hda_mixer_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag, unsigned int size, unsigned int __user *tlv);
 int snd_hda_mixer_amp_switch_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo);
 int snd_hda_mixer_amp_switch_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol);
 int snd_hda_mixer_amp_switch_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol);
diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 6823f2bc10b3b..54506d4e57d57 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -452,6 +452,19 @@ static int ad1986a_pcm_amp_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl
 	return change;
 }
 
+static int ad1986a_pcm_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag,
+			       unsigned int size, unsigned int __user *_tlv)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ad198x_spec *ad = codec->spec;
+
+	mutex_lock(&ad->amp_mutex);
+	snd_hda_mixer_amp_tlv(kcontrol, op_flag, size, _tlv);
+	mutex_unlock(&ad->amp_mutex);
+	return 0;
+}
+
+
 #define ad1986a_pcm_amp_sw_info		snd_hda_mixer_amp_switch_info
 
 static int ad1986a_pcm_amp_sw_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
@@ -488,9 +501,13 @@ static struct snd_kcontrol_new ad1986a_mixers[] = {
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 		.name = "PCM Playback Volume",
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			  SNDRV_CTL_ELEM_ACCESS_TLV_READ |
+			  SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK,
 		.info = ad1986a_pcm_amp_vol_info,
 		.get = ad1986a_pcm_amp_vol_get,
 		.put = ad1986a_pcm_amp_vol_put,
+		.tlv.c = ad1986a_pcm_amp_tlv,
 		.private_value = HDA_COMPOSE_AMP_VAL(AD1986A_FRONT_DAC, 3, 0, HDA_OUTPUT)
 	},
 	{
-- 
GitLab


From 22a27c7f8d0752b38b315d6a192c338d45ea28d5 Mon Sep 17 00:00:00 2001
From: Matt Porter <mporter@embeddedalley.com>
Date: Thu, 6 Jul 2006 18:49:10 +0200
Subject: [PATCH 0891/1063] [ALSA] hda: fix sigmatel 9227/8/9 codec support

SigmaTel 9227/8/9 IDs must use the 927x patch.

Signed-off-by: Matt Porter <mporter@embeddedalley.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_sigmatel.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index ea99083a1024a..ac96336f3484d 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1525,12 +1525,12 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = {
  	{ .id = 0x83847681, .name = "STAC9220D/9223D A2", .patch = patch_stac922x },
  	{ .id = 0x83847682, .name = "STAC9221 A2", .patch = patch_stac922x },
  	{ .id = 0x83847683, .name = "STAC9221D A2", .patch = patch_stac922x },
- 	{ .id = 0x83847618, .name = "STAC9227", .patch = patch_stac922x },
- 	{ .id = 0x83847619, .name = "STAC9227", .patch = patch_stac922x },
- 	{ .id = 0x83847616, .name = "STAC9228", .patch = patch_stac922x },
- 	{ .id = 0x83847617, .name = "STAC9228", .patch = patch_stac922x },
- 	{ .id = 0x83847614, .name = "STAC9229", .patch = patch_stac922x },
- 	{ .id = 0x83847615, .name = "STAC9229", .patch = patch_stac922x },
+ 	{ .id = 0x83847618, .name = "STAC9227", .patch = patch_stac927x },
+ 	{ .id = 0x83847619, .name = "STAC9227", .patch = patch_stac927x },
+ 	{ .id = 0x83847616, .name = "STAC9228", .patch = patch_stac927x },
+ 	{ .id = 0x83847617, .name = "STAC9228", .patch = patch_stac927x },
+ 	{ .id = 0x83847614, .name = "STAC9229", .patch = patch_stac927x },
+ 	{ .id = 0x83847615, .name = "STAC9229", .patch = patch_stac927x },
  	{ .id = 0x83847620, .name = "STAC9274", .patch = patch_stac927x },
  	{ .id = 0x83847621, .name = "STAC9274D", .patch = patch_stac927x },
  	{ .id = 0x83847622, .name = "STAC9273X", .patch = patch_stac927x },
-- 
GitLab


From 11b3a7555aa1b1629614e919889a4479dfe6f37b Mon Sep 17 00:00:00 2001
From: James Courtier-Dutton <James@superbug.co.uk>
Date: Sat, 8 Jul 2006 16:39:30 +0100
Subject: [PATCH 0892/1063] [ALSA] snd-emu10k1: Implement 24bit capture via
 Philips 1361T ADC for SB0240 card.

Signed-off-by: James Courtier-Dutton <James@superbug.co.uk>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/emu10k1/emu10k1_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c
index 79f24cdf5fbf0..be65d4db8e277 100644
--- a/sound/pci/emu10k1/emu10k1_main.c
+++ b/sound/pci/emu10k1/emu10k1_main.c
@@ -927,6 +927,7 @@ static struct snd_emu_chip_details emu_chip_details[] = {
 	 .ca0151_chip = 1,
 	 .spk71 = 1,
 	 .spdif_bug = 1,
+	 .adc_1361t = 1,  /* 24 bit capture instead of 16bit */
 	 .ac97_chip = 1} ,
 	{.vendor = 0x1102, .device = 0x0004, .subsystem = 0x10051102,
 	 .driver = "Audigy2", .name = "Audigy 2 EX [1005]", 
-- 
GitLab


From 6a65d793b0a82c7e190d9fd92a479401b6a127ca Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 14 Jul 2006 14:39:34 +0200
Subject: [PATCH 0893/1063] [ALSA] Remove unused tlv_rw field from struct
 snd_kcontrol

Remove unused tlv_rw field from struct snd_kcontrol.  The callback is
set in tlv.c field, instead.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/control.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/sound/control.h b/include/sound/control.h
index e3905c5a0950a..1de148b0fd947 100644
--- a/include/sound/control.h
+++ b/include/sound/control.h
@@ -67,7 +67,6 @@ struct snd_kcontrol {
 	snd_kcontrol_info_t *info;
 	snd_kcontrol_get_t *get;
 	snd_kcontrol_put_t *put;
-	snd_kcontrol_tlv_rw_t *tlv_rw;
 	union {
 		snd_kcontrol_tlv_rw_t *c;
 		unsigned int *p;
-- 
GitLab


From 7bc5ba7e02f63a5732fdf99e7471f54738f6f918 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 14 Jul 2006 15:18:19 +0200
Subject: [PATCH 0894/1063] [ALSA] Add TLV support to snd-usb-audio driver

Added TLV-read support to snd-usb-audio driver for passing
the volume dB scale information to user-space.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/usb/usbmixer.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/sound/usb/usbmixer.c b/sound/usb/usbmixer.c
index 491e975a0c87e..e516d6adbb224 100644
--- a/sound/usb/usbmixer.c
+++ b/sound/usb/usbmixer.c
@@ -37,6 +37,7 @@
 #include <sound/control.h>
 #include <sound/hwdep.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 
 #include "usbaudio.h"
 
@@ -416,6 +417,26 @@ static inline int set_cur_mix_value(struct usb_mixer_elem_info *cval, int channe
 	return set_ctl_value(cval, SET_CUR, (cval->control << 8) | channel, value);
 }
 
+/*
+ * TLV callback for mixer volume controls
+ */
+static int mixer_vol_tlv(struct snd_kcontrol *kcontrol, int op_flag,
+			 unsigned int size, unsigned int __user *_tlv)
+{
+	struct usb_mixer_elem_info *cval = kcontrol->private_data;
+	DECLARE_TLV_DB_SCALE(scale, 0, 0, 0);
+
+	if (size < sizeof(scale))
+		return -ENOMEM;
+	/* USB descriptions contain the dB scale in 1/256 dB unit
+	 * while ALSA TLV contains in 1/100 dB unit
+	 */
+	scale[2] = (convert_signed_value(cval, cval->min) * 100) / 256;
+	scale[3] = (convert_signed_value(cval, cval->res) * 100) / 256;
+	if (copy_to_user(_tlv, scale, sizeof(scale)))
+		return -EFAULT;
+	return 0;
+}
 
 /*
  * parser routines begin here...
@@ -933,6 +954,12 @@ static void build_feature_ctl(struct mixer_build *state, unsigned char *desc,
 		}
 		strlcat(kctl->id.name + len, control == USB_FEATURE_MUTE ? " Switch" : " Volume",
 			sizeof(kctl->id.name));
+		if (control == USB_FEATURE_VOLUME) {
+			kctl->tlv.c = mixer_vol_tlv;
+			kctl->vd[0].access |= 
+				SNDRV_CTL_ELEM_ACCESS_TLV_READ |
+				SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
+		}
 		break;
 
 	default:
-- 
GitLab


From 17f48ec3f15ddb8080b151304ee887c68f7e4650 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 17 Jul 2006 16:50:56 +0200
Subject: [PATCH 0895/1063] [ALSA] system timer: fix lost ticks correction
 adjustment

Fix the adjustment of the lost ticks correction variable in the case
when the correction has been fully taken into account in the next timer
expiration value.  Subtracting the scheduled ticks value would result in
an underflow.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/core/timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/timer.c b/sound/core/timer.c
index 7e5e562fe356e..86357007259d1 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -987,7 +987,7 @@ static int snd_timer_s_start(struct snd_timer * timer)
 		njiff++;
 	} else {
 		njiff += timer->sticks - priv->correction;
-		priv->correction -= timer->sticks;
+		priv->correction = 0;
 	}
 	priv->last_expires = priv->tlist.expires = njiff;
 	add_timer(&priv->tlist);
-- 
GitLab


From 6ed5eff025b72cb84a884d4be05f854f13b1542f Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 17 Jul 2006 16:51:37 +0200
Subject: [PATCH 0896/1063] [ALSA] system timer: accumulate correction for
 multiple lost ticks

When multiple timer interrupts arrive too late, correct for all delays
instead of ignoring the earlier ones.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/core/timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/timer.c b/sound/core/timer.c
index 86357007259d1..0f6e6727ff7c5 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -971,7 +971,7 @@ static void snd_timer_s_function(unsigned long data)
 	struct snd_timer_system_private *priv = timer->private_data;
 	unsigned long jiff = jiffies;
 	if (time_after(jiff, priv->last_expires))
-		priv->correction = (long)jiff - (long)priv->last_expires;
+		priv->correction += (long)jiff - (long)priv->last_expires;
 	snd_timer_interrupt(timer, (long)jiff - (long)priv->last_jiffies);
 }
 
-- 
GitLab


From de2696d8bc9c81874b3743e0c27708760cb7fb52 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 17 Jul 2006 16:52:09 +0200
Subject: [PATCH 0897/1063] [ALSA] system timer: clear correction value when
 timer stops

Do not retain the old correction value when the timer was stopped.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/core/timer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/core/timer.c b/sound/core/timer.c
index 0f6e6727ff7c5..4fcc8549e4a64 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1006,6 +1006,7 @@ static int snd_timer_s_stop(struct snd_timer * timer)
 		timer->sticks = priv->last_expires - jiff;
 	else
 		timer->sticks = 1;
+	priv->correction = 0;
 	return 0;
 }
 
-- 
GitLab


From cd93fe4770ca607c7f39260c02941deccbd77b8b Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 17 Jul 2006 16:53:57 +0200
Subject: [PATCH 0898/1063] [ALSA] timer: fix timer rescheduling

When checking whether a hardware timer needs to be rescheduled, we have
to compare against the previously scheduled interval and not against the
actual interval between the last two interrupts.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/core/timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/timer.c b/sound/core/timer.c
index 4fcc8549e4a64..3a2f8e2ca401e 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -718,7 +718,7 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left)
 		}
 	}
 	if (timer->flags & SNDRV_TIMER_FLG_RESCHED)
-		snd_timer_reschedule(timer, ticks_left);
+		snd_timer_reschedule(timer, timer->sticks);
 	if (timer->running) {
 		if (timer->hw.flags & SNDRV_TIMER_HW_STOP) {
 			timer->hw.stop(timer);
-- 
GitLab


From 6e9059b05fa733045d7845ac70c5ba0a05e3c2d1 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Fri, 21 Jul 2006 10:45:19 +0200
Subject: [PATCH 0899/1063] [ALSA] system timer: remove unused
 snd_timer_system_private.timer field

Remove the snd_timer_system_private structure's timer field that was never used.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/core/timer.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sound/core/timer.c b/sound/core/timer.c
index 3a2f8e2ca401e..10a79aed33f86 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -959,7 +959,6 @@ int snd_timer_global_register(struct snd_timer *timer)
 
 struct snd_timer_system_private {
 	struct timer_list tlist;
-	struct timer * timer;
 	unsigned long last_expires;
 	unsigned long last_jiffies;
 	unsigned long correction;
-- 
GitLab


From f1265391ea002a28933dc1a8a55948c0ed64c9d0 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Fri, 21 Jul 2006 10:46:18 +0200
Subject: [PATCH 0900/1063] [ALSA] usb-audio: add more Yamaha devices

Add some quirks for some unknown Yamaha USB MIDI devices.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/usb/usbquirks.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h
index 9351846d7a9d9..a7e9563a01df4 100644
--- a/sound/usb/usbquirks.h
+++ b/sound/usb/usbquirks.h
@@ -123,6 +123,10 @@ YAMAHA_DEVICE(0x103e, NULL),
 YAMAHA_DEVICE(0x103f, NULL),
 YAMAHA_DEVICE(0x1040, NULL),
 YAMAHA_DEVICE(0x1041, NULL),
+YAMAHA_DEVICE(0x1042, NULL),
+YAMAHA_DEVICE(0x1043, NULL),
+YAMAHA_DEVICE(0x1044, NULL),
+YAMAHA_DEVICE(0x1045, NULL),
 YAMAHA_DEVICE(0x2000, "DGP-7"),
 YAMAHA_DEVICE(0x2001, "DGP-5"),
 YAMAHA_DEVICE(0x2002, NULL),
@@ -141,6 +145,7 @@ YAMAHA_DEVICE(0x500b, "DME64N"),
 YAMAHA_DEVICE(0x500c, "DME24N"),
 YAMAHA_DEVICE(0x500d, NULL),
 YAMAHA_DEVICE(0x500e, NULL),
+YAMAHA_DEVICE(0x500f, NULL),
 YAMAHA_DEVICE(0x7000, "DTX"),
 YAMAHA_DEVICE(0x7010, "UB99"),
 #undef YAMAHA_DEVICE
-- 
GitLab


From fff36e472b4315df77513f4339c5c199c6aad28b Mon Sep 17 00:00:00 2001
From: James Courtier-Dutton <James@superbug.co.uk>
Date: Sat, 22 Jul 2006 15:02:48 +0100
Subject: [PATCH 0901/1063] [ALSA] snd-ca0106: Fix dB gain TLVs.

Signed-off-by: James Courtier-Dutton <James@superbug.co.uk>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ca0106/ca0106_mixer.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c
index df75270939acc..6d64438cecc92 100644
--- a/sound/pci/ca0106/ca0106_mixer.c
+++ b/sound/pci/ca0106/ca0106_mixer.c
@@ -74,7 +74,8 @@
 
 #include "ca0106.h"
 
-static DECLARE_TLV_DB_SCALE(snd_ca0106_db_scale, -5150, 75, 1);
+static DECLARE_TLV_DB_SCALE(snd_ca0106_db_scale1, -5175, 25, 1);
+static DECLARE_TLV_DB_SCALE(snd_ca0106_db_scale2, -10350, 50, 1);
 
 static int snd_ca0106_shared_spdif_info(struct snd_kcontrol *kcontrol,
 					struct snd_ctl_elem_info *uinfo)
@@ -477,16 +478,19 @@ static int snd_ca0106_i2c_volume_put(struct snd_kcontrol *kcontrol,
 	.info =	 snd_ca0106_volume_info,			\
 	.get =   snd_ca0106_volume_get,				\
 	.put =   snd_ca0106_volume_put,				\
-	.tlv.p = snd_ca0106_db_scale,				\
+	.tlv.p = snd_ca0106_db_scale1,				\
 	.private_value = ((chid) << 8) | (reg)			\
 }
 
 #define I2C_VOLUME(xname,chid) \
 {								\
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname,	\
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |		\
+	          SNDRV_CTL_ELEM_ACCESS_TLV_READ,		\
 	.info =  snd_ca0106_i2c_volume_info,			\
 	.get =   snd_ca0106_i2c_volume_get,			\
 	.put =   snd_ca0106_i2c_volume_put,			\
+	.tlv.p = snd_ca0106_db_scale2,				\
 	.private_value = chid					\
 }
 
-- 
GitLab


From 31508f83f591dc8764427b6321c89f8f9e84bad2 Mon Sep 17 00:00:00 2001
From: James Courtier-Dutton <James@superbug.co.uk>
Date: Sat, 22 Jul 2006 17:02:10 +0100
Subject: [PATCH 0902/1063] [ALSA] snd-emu10k1: Implement dB gain infomation.

Signed-off-by: James Courtier-Dutton <James@superbug.co.uk>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/emu10k1.h   |  4 ++++
 sound/pci/emu10k1/emufx.c | 11 +++++++++++
 sound/pci/emu10k1/p16v.c  |  5 +++++
 3 files changed, 20 insertions(+)

diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 884bbf54cd361..892e310c504d6 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -1524,6 +1524,10 @@ struct snd_emu10k1_fx8010_control_gpr {
 	unsigned int value[32];		/* initial values */
 	unsigned int min;		/* minimum range */
 	unsigned int max;		/* maximum range */
+	union {
+		snd_kcontrol_tlv_rw_t *c;
+		unsigned int *p;
+	} tlv;
 	unsigned int translation;	/* translation type (EMU10K1_GPR_TRANSLATION*) */
 };
 
diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c
index dfba00230d4da..00fc904c251dc 100644
--- a/sound/pci/emu10k1/emufx.c
+++ b/sound/pci/emu10k1/emufx.c
@@ -35,6 +35,7 @@
 #include <linux/mutex.h>
 
 #include <sound/core.h>
+#include <sound/tlv.h>
 #include <sound/emu10k1.h>
 
 #if 0		/* for testing purposes - digital out -> capture */
@@ -290,6 +291,9 @@ static const u32 db_table[101] = {
 	0x7fffffff,
 };
 
+/* EMU10k1/EMU10k2 DSP control db gain */
+static DECLARE_TLV_DB_SCALE(snd_emu10k1_db_scale1, -4000, 40, 1);
+
 static const u32 onoff_table[2] = {
 	0x00000000, 0x00000001
 };
@@ -755,6 +759,11 @@ static int snd_emu10k1_add_controls(struct snd_emu10k1 *emu,
 		knew.device = gctl->id.device;
 		knew.subdevice = gctl->id.subdevice;
 		knew.info = snd_emu10k1_gpr_ctl_info;
+		if (gctl->tlv.p) {
+			knew.tlv.p = gctl->tlv.p;
+			knew.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+				SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+		} 
 		knew.get = snd_emu10k1_gpr_ctl_get;
 		knew.put = snd_emu10k1_gpr_ctl_put;
 		memset(nctl, 0, sizeof(*nctl));
@@ -1013,6 +1022,7 @@ snd_emu10k1_init_mono_control(struct snd_emu10k1_fx8010_control_gpr *ctl,
 	ctl->gpr[0] = gpr + 0; ctl->value[0] = defval;
 	ctl->min = 0;
 	ctl->max = 100;
+	ctl->tlv.p = snd_emu10k1_db_scale1;
 	ctl->translation = EMU10K1_GPR_TRANSLATION_TABLE100;	
 }
 
@@ -1027,6 +1037,7 @@ snd_emu10k1_init_stereo_control(struct snd_emu10k1_fx8010_control_gpr *ctl,
 	ctl->gpr[1] = gpr + 1; ctl->value[1] = defval;
 	ctl->min = 0;
 	ctl->max = 100;
+	ctl->tlv.p = snd_emu10k1_db_scale1;
 	ctl->translation = EMU10K1_GPR_TRANSLATION_TABLE100;
 }
 
diff --git a/sound/pci/emu10k1/p16v.c b/sound/pci/emu10k1/p16v.c
index 9905651935fb3..1e44714b86236 100644
--- a/sound/pci/emu10k1/p16v.c
+++ b/sound/pci/emu10k1/p16v.c
@@ -100,6 +100,7 @@
 #include <sound/pcm.h>
 #include <sound/ac97_codec.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 #include <sound/emu10k1.h>
 #include "p16v.h"
 
@@ -784,12 +785,16 @@ static int snd_p16v_capture_channel_put(struct snd_kcontrol *kcontrol,
 	}
         return change;
 }
+static DECLARE_TLV_DB_SCALE(snd_p16v_db_scale1, -5175, 25, 1);
 
 #define P16V_VOL(xname,xreg,xhl) { \
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
+        .access = SNDRV_CTL_ELEM_ACCESS_READWRITE |             \
+                  SNDRV_CTL_ELEM_ACCESS_TLV_READ,               \
 	.info = snd_p16v_volume_info, \
 	.get = snd_p16v_volume_get, \
 	.put = snd_p16v_volume_put, \
+	.tlv.p = snd_p16v_db_scale1, \
 	.private_value = ((xreg) | ((xhl) << 8)) \
 }
 
-- 
GitLab


From 0a197f005a27766f5c9e0d960e7650748ec1ee4f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 25 Jul 2006 14:51:14 +0200
Subject: [PATCH 0903/1063] [ALSA] Add model entry for Samsung X10 laptop

Added the proper model entry (laptop-eapd) for Samsung X10-T2300
Culesa laptop with AD1986A codec.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_analog.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 54506d4e57d57..e547442e6fed6 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -820,6 +820,8 @@ static struct hda_board_config ad1986a_cfg_tbl[] = {
 	  .config = AD1986A_LAPTOP_EAPD }, /* Samsung X60 Chane */
 	{ .pci_subvendor = 0x144d, .pci_subdevice = 0xc024,
 	  .config = AD1986A_LAPTOP_EAPD }, /* Samsung R65-T2300 Charis */
+	{ .pci_subvendor = 0x144d, .pci_subdevice = 0xc026,
+	  .config = AD1986A_LAPTOP_EAPD }, /* Samsung X10-T2300 Culesa */
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1153,
 	  .config = AD1986A_LAPTOP_EAPD }, /* ASUS M9 */
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1213,
-- 
GitLab


From 5a053d012d0576e9306009939ca81a86547ef35a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 25 Jul 2006 14:51:15 +0200
Subject: [PATCH 0904/1063] [ALSA] Add model entry for Clevo m665n laptop

Added the proper model entry for Clevo m665n laptop with ALC880 codec.
Also, added a model string 'clevo' to enable the clevo-type model option.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 Documentation/sound/alsa/ALSA-Configuration.txt | 1 +
 sound/pci/hda/patch_realtek.c                   | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index f61af23dd85d7..74ea66d33cf91 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -783,6 +783,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 	  F1734		2-jack
 	  lg		LG laptop (m1 express dual)
 	  lg-lw		LG LW20 laptop
+	  clevo		Clevo laptops (m520G, m665n)
 	  test		for testing/debugging purpose, almost all controls can be
 			adjusted.  Appearing only when compiled with
 			$CONFIG_SND_DEBUG=y
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 18d105263feae..f4c96aa43be76 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2156,8 +2156,13 @@ static struct hda_board_config alc880_cfg_tbl[] = {
 	{ .modelname = "3stack-digout", .config = ALC880_3ST_DIG },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe308, .config = ALC880_3ST_DIG },
 	{ .pci_subvendor = 0x1025, .pci_subdevice = 0x0070, .config = ALC880_3ST_DIG },
-	/* Clevo m520G NB */
-	{ .pci_subvendor = 0x1558, .pci_subdevice = 0x0520, .config = ALC880_CLEVO },
+
+	/* Clevo laptops */
+	{ .modelname = "clevo", .config = ALC880_CLEVO },
+	{ .pci_subvendor = 0x1558, .pci_subdevice = 0x0520,
+	  .config = ALC880_CLEVO }, /* Clevo m520G NB */
+	{ .pci_subvendor = 0x1558, .pci_subdevice = 0x0660,
+	  .config = ALC880_CLEVO }, /* Clevo m665n */
 
 	/* Back 3 jack plus 1 SPDIF out jack, front 2 jack (Internal add Aux-In)*/
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe305, .config = ALC880_3ST_DIG },
-- 
GitLab


From 6d177ba7839dd7ed391c2f36b121eb09d1eaee4c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 25 Jul 2006 14:51:15 +0200
Subject: [PATCH 0905/1063] [ALSA] Add hp-bpc model type for HP laptops

Added 'hp-bpc' model type for HP xw4400-compatible laptops.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 Documentation/sound/alsa/ALSA-Configuration.txt | 1 +
 sound/pci/hda/patch_realtek.c                   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 74ea66d33cf91..c595acb3bf80a 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -798,6 +798,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 
 	ALC262
 	  fujitsu	Fujitsu Laptop
+	  hp-bpc	HP xw4400/6400/8400/9400 laptops
 	  basic		fixed pin assignment w/o SPDIF
 	  auto		auto-config reading BIOS (default)
 
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index f4c96aa43be76..51f76eef9353d 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5774,6 +5774,7 @@ static struct hda_board_config alc262_cfg_tbl[] = {
 	{ .modelname = "fujitsu", .config = ALC262_FUJITSU },
 	{ .pci_subvendor = 0x10cf, .pci_subdevice = 0x1397,
 	  .config = ALC262_FUJITSU },
+	{ .modelname = "hp-bpc", .config = ALC262_HP_BPC },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x208c,
 	  .config = ALC262_HP_BPC }, /* xw4400 */
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3014,
-- 
GitLab


From 304dcaac91f0d26543b31fd7e63726f096c826ee Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 25 Jul 2006 14:51:16 +0200
Subject: [PATCH 0906/1063] [ALSA] Add support of Benq laptop with ALC262

Added the support of Benq laptop with ALC262 codec.
A model string 'benq' is added, too.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 .../sound/alsa/ALSA-Configuration.txt         |  1 +
 sound/pci/hda/patch_realtek.c                 | 21 +++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index c595acb3bf80a..885d2ed88fd0d 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -799,6 +799,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 	ALC262
 	  fujitsu	Fujitsu Laptop
 	  hp-bpc	HP xw4400/6400/8400/9400 laptops
+	  benq		Benq ED8
 	  basic		fixed pin assignment w/o SPDIF
 	  auto		auto-config reading BIOS (default)
 
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 51f76eef9353d..42c4f90a92b83 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -79,6 +79,7 @@ enum {
 	ALC262_BASIC,
 	ALC262_FUJITSU,
 	ALC262_HP_BPC,
+	ALC262_BENQ_ED8,
 	ALC262_AUTO,
 	ALC262_MODEL_LAST /* last tag */
 };
@@ -5504,6 +5505,13 @@ static struct snd_kcontrol_new alc262_fujitsu_mixer[] = {
 	{ } /* end */
 };
 
+/* additional init verbs for Benq laptops */
+static struct hda_verb alc262_EAPD_verbs[] = {
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x07},
+	{0x20, AC_VERB_SET_PROC_COEF,  0x3070},
+	{}
+};
+
 /* add playback controls from the parsed DAC table */
 static int alc262_auto_create_multi_out_ctls(struct alc_spec *spec, const struct auto_pin_cfg *cfg)
 {
@@ -5783,6 +5791,9 @@ static struct hda_board_config alc262_cfg_tbl[] = {
 	  .config = ALC262_HP_BPC }, /* xw8400 */
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x12fe,
 	  .config = ALC262_HP_BPC }, /* xw9400 */
+	{ .modelname = "benq", .config = ALC262_BENQ_ED8 },
+	{ .pci_subvendor = 0x17ff, .pci_subdevice = 0x0560,
+	  .config = ALC262_BENQ_ED8 },
 	{ .modelname = "auto", .config = ALC262_AUTO },
 	{}
 };
@@ -5820,6 +5831,16 @@ static struct alc_config_preset alc262_presets[] = {
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_HP_capture_source,
 	},	
+	[ALC262_BENQ_ED8] = {
+		.mixers = { alc262_base_mixer },
+		.init_verbs = { alc262_init_verbs, alc262_EAPD_verbs },
+		.num_dacs = ARRAY_SIZE(alc262_dac_nids),
+		.dac_nids = alc262_dac_nids,
+		.hp_nid = 0x03,
+		.num_channel_mode = ARRAY_SIZE(alc262_modes),
+		.channel_mode = alc262_modes,
+		.input_mux = &alc262_capture_source,
+	},		
 };
 
 static int patch_alc262(struct hda_codec *codec)
-- 
GitLab


From 827a56ea3d9c3d5f80c5520ba9d487f9b7069238 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 25 Jul 2006 14:51:16 +0200
Subject: [PATCH 0907/1063] [ALSA] Added model for ASUS M2NPV-VM mobo

Added the proper model (3stack) for ASUS M2NPV-VM mobo with AD1986A
codec.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_analog.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index e547442e6fed6..8955397cca6f8 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -808,6 +808,8 @@ static struct hda_board_config ad1986a_cfg_tbl[] = {
 	  .config = AD1986A_3STACK }, /* ASUS A8N-VM CSM */
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x81b3,
 	  .config = AD1986A_3STACK }, /* ASUS P5RD2-VM / P5GPL-X SE */
+	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x81cb,
+	  .config = AD1986A_3STACK }, /* ASUS M2NPV-VM */
 	{ .modelname = "laptop",	.config = AD1986A_LAPTOP },
 	{ .pci_subvendor = 0x144d, .pci_subdevice = 0xc01e,
 	  .config = AD1986A_LAPTOP }, /* FSC V2060 */
-- 
GitLab


From b7c6b03405896bc181e1e2c9c06628c3b1681af5 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 25 Jul 2006 15:29:37 +0200
Subject: [PATCH 0908/1063] [ALSA] via82xx - Add dxs_support entry for a FSC
 machine

Added dxs_support=5 entry for a FSC machine.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/via82xx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c
index 2c23a665c3e3b..e0e3bfd7a2dbc 100644
--- a/sound/pci/via82xx.c
+++ b/sound/pci/via82xx.c
@@ -2404,6 +2404,7 @@ static int __devinit check_dxs_list(struct pci_dev *pci, int revision)
 		{ .subvendor = 0x16f3, .subdevice = 0x6405, .action = VIA_DXS_SRC }, /* Jetway K8M8MS */
 		{ .subvendor = 0x1734, .subdevice = 0x1078, .action = VIA_DXS_SRC }, /* FSC Amilo L7300 */
 		{ .subvendor = 0x1734, .subdevice = 0x1093, .action = VIA_DXS_SRC }, /* FSC */
+		{ .subvendor = 0x1734, .subdevice = 0x10ab, .action = VIA_DXS_SRC }, /* FSC */
 		{ .subvendor = 0x1849, .subdevice = 0x3059, .action = VIA_DXS_NO_VRA }, /* ASRock K7VM2 */
 		{ .subvendor = 0x1849, .subdevice = 0x9739, .action = VIA_DXS_SRC }, /* ASRock mobo(?) */
 		{ .subvendor = 0x1849, .subdevice = 0x9761, .action = VIA_DXS_SRC }, /* ASRock mobo(?) */
-- 
GitLab


From bc6c531eb53de8a0ba355f76ce2bd28f58e46707 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@suse.cz>
Date: Thu, 27 Jul 2006 10:44:30 +0200
Subject: [PATCH 0909/1063] [ALSA] HDA driver - do not set mute flag for dB
 scale (follow HDA specification)

Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_codec.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 78ff4575699d2..399860c36be9b 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -856,13 +856,6 @@ int snd_hda_mixer_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag,
 	val2 = (((caps & AC_AMPCAP_STEP_SIZE) >> AC_AMPCAP_STEP_SIZE_SHIFT) + 1) * 25;
 	val1 = -((caps & AC_AMPCAP_OFFSET) >> AC_AMPCAP_OFFSET_SHIFT);
 	val1 = ((int)val1) * ((int)val2);
-	if (caps & AC_AMPCAP_MUTE)
-		val2 |= 0x10000;
-	if ((val2 & 0x10000) == 0 && dir == HDA_OUTPUT) {
-		caps = query_amp_caps(codec, nid, HDA_INPUT);
-		if (caps & AC_AMPCAP_MUTE)
-			val2 |= 0x10000;
-	}
 	if (put_user(SNDRV_CTL_TLVT_DB_SCALE, _tlv))
 		return -EFAULT;
 	if (put_user(2 * sizeof(unsigned int), _tlv + 1))
-- 
GitLab


From 9265d199616630c2eb993ffe40c9daef3d6873b3 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 27 Jul 2006 15:50:14 +0200
Subject: [PATCH 0910/1063] [ALSA] Fix Makefile of cs5535audio

Use ifeq instead of ifdef in Makefile to make the maintenance of
out-of-kernel tree easier.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/cs5535audio/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/cs5535audio/Makefile b/sound/pci/cs5535audio/Makefile
index 2911a8adc1f23..ad947b4c04cce 100644
--- a/sound/pci/cs5535audio/Makefile
+++ b/sound/pci/cs5535audio/Makefile
@@ -4,7 +4,7 @@
 
 snd-cs5535audio-objs := cs5535audio.o cs5535audio_pcm.o
 
-ifdef CONFIG_PM
+ifeq ($(CONFIG_PM),y)
 snd-cs5535audio-objs += cs5535audio_pm.o
 endif
 
-- 
GitLab


From 68ab801e32bbe2caac8b8c6e6e94f41fe7d687ad Mon Sep 17 00:00:00 2001
From: Matthias Koenig <mk@phasorlab.de>
Date: Thu, 27 Jul 2006 16:59:23 +0200
Subject: [PATCH 0911/1063] [ALSA] Add snd-mts64 driver for ESI Miditerminal
 4140

Added snd-mts64 driver for Ego Systems (ESI) Miditerminal 4140
by Matthias Koenig <mk@phasorlab.de>.
The driver requires parport (CONFIG_PARPORT).

Signed-off-by: Matthias Koenig <mk@phasorlab.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 .../sound/alsa/ALSA-Configuration.txt         |    8 +
 sound/drivers/Kconfig                         |   13 +
 sound/drivers/Makefile                        |    2 +
 sound/drivers/mts64.c                         | 1091 +++++++++++++++++
 4 files changed, 1114 insertions(+)
 create mode 100644 sound/drivers/mts64.c

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 885d2ed88fd0d..7344815b855e0 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -1216,6 +1216,14 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     
     Module supports only 1 card.  This module has no enable option.
 
+  Module snd-mts64
+  ----------------
+
+    Module for Ego Systems (ESI) Miditerminal 4140
+
+    This module supports multiple devices.
+    Requires parport (CONFIG_PARPORT).
+
   Module snd-nm256
   ----------------
 
diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig
index 897dc2dfd7ddb..952c7f170101b 100644
--- a/sound/drivers/Kconfig
+++ b/sound/drivers/Kconfig
@@ -73,6 +73,19 @@ config SND_MTPAV
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-mtpav.
 
+config SND_MTS64
+	tristate "ESI Miditerminal 4140 driver"
+	depends on SND && PARPORT
+	select SND_RAWMIDI
+	help
+	  The ESI Miditerminal 4140 is a 4 In 4 Out MIDI Interface with 
+          additional SMPTE Timecode capabilities for the parallel port.
+
+	  Say 'Y' to include support for this device.
+
+	  To compile this driver as a module, chose 'M' here: the module 
+          will be called snd-mts64.
+
 config SND_SERIAL_U16550
 	tristate "UART16550 serial MIDI driver"
 	depends on SND
diff --git a/sound/drivers/Makefile b/sound/drivers/Makefile
index cb98c3d662be5..c9bad6d67e73a 100644
--- a/sound/drivers/Makefile
+++ b/sound/drivers/Makefile
@@ -5,6 +5,7 @@
 
 snd-dummy-objs := dummy.o
 snd-mtpav-objs := mtpav.o
+snd-mts64-objs := mts64.o
 snd-serial-u16550-objs := serial-u16550.o
 snd-virmidi-objs := virmidi.o
 
@@ -13,5 +14,6 @@ obj-$(CONFIG_SND_DUMMY) += snd-dummy.o
 obj-$(CONFIG_SND_VIRMIDI) += snd-virmidi.o
 obj-$(CONFIG_SND_SERIAL_U16550) += snd-serial-u16550.o
 obj-$(CONFIG_SND_MTPAV) += snd-mtpav.o
+obj-$(CONFIG_SND_MTS64) += snd-mts64.o
 
 obj-$(CONFIG_SND) += opl3/ opl4/ mpu401/ vx/
diff --git a/sound/drivers/mts64.c b/sound/drivers/mts64.c
new file mode 100644
index 0000000000000..169987302ae41
--- /dev/null
+++ b/sound/drivers/mts64.c
@@ -0,0 +1,1091 @@
+/*     
+ *   ALSA Driver for Ego Systems Inc. (ESI) Miditerminal 4140
+ *   Copyright (c) 2006 by Matthias König <mk@phasorlab.de>
+ *
+ *   This program is free software; you can redistribute it and/or modify 
+ *   it under the terms of the GNU General Public License as published by 
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version. 
+ *
+ *   This program is distributed in the hope that it will be useful, 
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <sound/driver.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/parport.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <sound/core.h>
+#include <sound/initval.h>
+#include <sound/rawmidi.h>
+#include <sound/control.h>
+
+#define CARD_NAME "Miditerminal 4140"
+#define DRIVER_NAME "MTS64"
+#define PLATFORM_DRIVER "snd_mts64"
+
+static int index[SNDRV_CARDS]  = SNDRV_DEFAULT_IDX;
+static char *id[SNDRV_CARDS]   = SNDRV_DEFAULT_STR;
+static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+
+static struct platform_device *platform_devices[SNDRV_CARDS]; 
+static int device_count;
+
+module_param_array(index, int, NULL, S_IRUGO);
+MODULE_PARM_DESC(index, "Index value for " CARD_NAME " soundcard.");
+module_param_array(id, charp, NULL, S_IRUGO);
+MODULE_PARM_DESC(id, "ID string for " CARD_NAME " soundcard.");
+module_param_array(enable, bool, NULL, S_IRUGO);
+MODULE_PARM_DESC(enable, "Enable " CARD_NAME " soundcard.");
+
+MODULE_AUTHOR("Matthias Koenig <mk@phasorlab.de>");
+MODULE_DESCRIPTION("ESI Miditerminal 4140");
+MODULE_LICENSE("GPL");
+MODULE_SUPPORTED_DEVICE("{{ESI,Miditerminal 4140}}");
+
+/*********************************************************************
+ * Chip specific
+ *********************************************************************/
+#define MTS64_NUM_INPUT_PORTS 5
+#define MTS64_NUM_OUTPUT_PORTS 4
+#define MTS64_SMPTE_SUBSTREAM 4
+
+struct mts64 {
+	spinlock_t lock;
+	struct snd_card *card;
+	struct snd_rawmidi *rmidi;
+	struct pardevice *pardev;
+	int pardev_claimed;
+
+	int open_count;
+	int current_midi_output_port;
+	int current_midi_input_port;
+	u8 mode[MTS64_NUM_INPUT_PORTS];
+	struct snd_rawmidi_substream *midi_input_substream[MTS64_NUM_INPUT_PORTS];
+	int smpte_switch;
+	u8 time[4]; /* [0]=hh, [1]=mm, [2]=ss, [3]=ff */
+	u8 fps;
+};
+
+static int snd_mts64_free(struct mts64 *mts)
+{
+	kfree(mts);
+	return 0;
+}
+
+static int __devinit snd_mts64_create(struct snd_card *card, 
+				      struct pardevice *pardev, 
+				      struct mts64 **rchip)
+{
+	struct mts64 *mts;
+
+	*rchip = NULL;
+
+	mts = kzalloc(sizeof(struct mts64), GFP_KERNEL);
+	if (mts == NULL) 
+		return -ENOMEM;
+
+	/* Init chip specific data */
+	spin_lock_init(&mts->lock);
+	mts->card = card;
+	mts->pardev = pardev;
+	mts->current_midi_output_port = -1;
+	mts->current_midi_input_port = -1;
+
+	*rchip = mts;
+
+	return 0;
+}
+
+/*********************************************************************
+ * HW register related constants
+ *********************************************************************/
+
+/* Status Bits */
+#define MTS64_STAT_BSY             0x80
+#define MTS64_STAT_BIT_SET         0x20  /* readout process, bit is set */
+#define MTS64_STAT_PORT            0x10  /* read byte is a port number */
+
+/* Control Bits */
+#define MTS64_CTL_READOUT          0x08  /* enable readout */
+#define MTS64_CTL_WRITE_CMD        0x06  
+#define MTS64_CTL_WRITE_DATA       0x02  
+#define MTS64_CTL_STROBE           0x01  
+
+/* Command */
+#define MTS64_CMD_RESET            0xfe
+#define MTS64_CMD_PROBE            0x8f  /* Used in probing procedure */
+#define MTS64_CMD_SMPTE_SET_TIME   0xe8
+#define MTS64_CMD_SMPTE_SET_FPS    0xee
+#define MTS64_CMD_SMPTE_STOP       0xef
+#define MTS64_CMD_SMPTE_FPS_24     0xe3
+#define MTS64_CMD_SMPTE_FPS_25     0xe2
+#define MTS64_CMD_SMPTE_FPS_2997   0xe4 
+#define MTS64_CMD_SMPTE_FPS_30D    0xe1
+#define MTS64_CMD_SMPTE_FPS_30     0xe0
+#define MTS64_CMD_COM_OPEN         0xf8  /* setting the communication mode */
+#define MTS64_CMD_COM_CLOSE1       0xff  /* clearing communication mode */
+#define MTS64_CMD_COM_CLOSE2       0xf5
+
+/*********************************************************************
+ * Hardware specific functions
+ *********************************************************************/
+static void mts64_enable_readout(struct parport *p);
+static void mts64_disable_readout(struct parport *p);
+static int mts64_device_ready(struct parport *p);
+static int mts64_device_init(struct parport *p);
+static int mts64_device_open(struct mts64 *mts);
+static int mts64_device_close(struct mts64 *mts);
+static u8 mts64_map_midi_input(u8 c);
+static int mts64_probe(struct parport *p);
+static u16 mts64_read(struct parport *p);
+static u8 mts64_read_char(struct parport *p);
+static void mts64_smpte_start(struct parport *p,
+			      u8 hours, u8 minutes,
+			      u8 seconds, u8 frames,
+			      u8 idx);
+static void mts64_smpte_stop(struct parport *p);
+static void mts64_write_command(struct parport *p, u8 c);
+static void mts64_write_data(struct parport *p, u8 c);
+static void mts64_write_midi(struct mts64 *mts, u8 c, int midiport);
+
+
+/*  Enables the readout procedure
+ *
+ *  Before we can read a midi byte from the device, we have to set
+ *  bit 3 of control port.
+ */
+static void mts64_enable_readout(struct parport *p)
+{
+	u8 c;
+
+	c = parport_read_control(p);
+	c |= MTS64_CTL_READOUT;
+	parport_write_control(p, c); 
+}
+
+/*  Disables readout 
+ *
+ *  Readout is disabled by clearing bit 3 of control
+ */
+static void mts64_disable_readout(struct parport *p)
+{
+	u8 c;
+
+	c = parport_read_control(p);
+	c &= ~MTS64_CTL_READOUT;
+	parport_write_control(p, c);
+}
+
+/*  waits for device ready
+ *
+ *  Checks if BUSY (Bit 7 of status) is clear
+ *  1 device ready
+ *  0 failure
+ */
+static int mts64_device_ready(struct parport *p)
+{
+	int i;
+	u8 c;
+
+	for (i = 0; i < 0xffff; ++i) {
+		c = parport_read_status(p);
+		c &= MTS64_STAT_BSY;
+		if (c != 0) 
+			return 1;
+	} 
+
+	return 0;
+}
+
+/*  Init device (LED blinking startup magic)
+ *
+ *  Returns:
+ *  0 init ok
+ *  -EIO failure
+ */
+static int __devinit mts64_device_init(struct parport *p)
+{
+	int i;
+
+	mts64_write_command(p, MTS64_CMD_RESET);
+
+	for (i = 0; i < 64; ++i) {
+		msleep(100);
+
+		if (mts64_probe(p) == 0) {
+			/* success */
+			mts64_disable_readout(p);
+			return 0;
+		}
+	}
+	mts64_disable_readout(p);
+
+	return -EIO;
+}
+
+/* 
+ *  Opens the device (set communication mode)
+ */
+static int mts64_device_open(struct mts64 *mts)
+{
+	int i;
+	struct parport *p = mts->pardev->port;
+
+	for (i = 0; i < 5; ++i)
+		mts64_write_command(p, MTS64_CMD_COM_OPEN);
+
+	return 0;
+}
+
+/*  
+ *  Close device (clear communication mode)
+ */
+static int mts64_device_close(struct mts64 *mts)
+{
+	int i;
+	struct parport *p = mts->pardev->port;
+
+	for (i = 0; i < 5; ++i) {
+		mts64_write_command(p, MTS64_CMD_COM_CLOSE1);
+		mts64_write_command(p, MTS64_CMD_COM_CLOSE2);
+	}
+
+	return 0;
+}
+
+/*  map hardware port to substream number
+ * 
+ *  When reading a byte from the device, the device tells us
+ *  on what port the byte is. This HW port has to be mapped to
+ *  the midiport (substream number).
+ *  substream 0-3 are Midiports 1-4
+ *  substream 4 is SMPTE Timecode
+ *  The mapping is done by the table:
+ *  HW | 0 | 1 | 2 | 3 | 4 
+ *  SW | 0 | 1 | 4 | 2 | 3
+ */
+static u8 mts64_map_midi_input(u8 c)
+{
+	static u8 map[] = { 0, 1, 4, 2, 3 };
+
+	return map[c];
+}
+
+
+/*  Probe parport for device
+ *
+ *  Do we have a Miditerminal 4140 on parport? 
+ *  Returns:
+ *  0       device found
+ *  -ENODEV no device
+ */
+static int __devinit mts64_probe(struct parport *p)
+{
+	u8 c;
+
+	mts64_smpte_stop(p);
+	mts64_write_command(p, MTS64_CMD_PROBE);
+
+	msleep(50);
+	
+	c = mts64_read(p);
+
+	c &= 0x00ff;
+	if (c != MTS64_CMD_PROBE) 
+		return -ENODEV;
+	else 
+		return 0;
+
+}
+
+/*  Read byte incl. status from device
+ *
+ *  Returns:
+ *  data in lower 8 bits and status in upper 8 bits
+ */
+static u16 mts64_read(struct parport *p)
+{
+	u8 data, status;
+
+	mts64_device_ready(p);
+	mts64_enable_readout(p);
+	status = parport_read_status(p);
+	data = mts64_read_char(p);
+	mts64_disable_readout(p);
+
+	return (status << 8) | data;
+}
+
+/*  Read a byte from device
+ *
+ *  Note, that readout mode has to be enabled.
+ *  readout procedure is as follows: 
+ *  - Write number of the Bit to read to DATA
+ *  - Read STATUS
+ *  - Bit 5 of STATUS indicates if Bit is set
+ *
+ *  Returns:
+ *  Byte read from device
+ */
+static u8 mts64_read_char(struct parport *p)
+{
+	u8 c = 0;
+	u8 status;
+	u8 i;
+
+	for (i = 0; i < 8; ++i) {
+		parport_write_data(p, i);
+		c >>= 1;
+		status = parport_read_status(p);
+		if (status & MTS64_STAT_BIT_SET) 
+			c |= 0x80;
+	}
+	
+	return c;
+}
+
+/*  Starts SMPTE Timecode generation
+ *
+ *  The device creates SMPTE Timecode by hardware.
+ *  0 24 fps
+ *  1 25 fps
+ *  2 29.97 fps
+ *  3 30 fps (Drop-frame)
+ *  4 30 fps
+ */
+static void mts64_smpte_start(struct parport *p,
+			      u8 hours, u8 minutes,
+			      u8 seconds, u8 frames,
+			      u8 idx)
+{
+	static u8 fps[5] = { MTS64_CMD_SMPTE_FPS_24, 
+			     MTS64_CMD_SMPTE_FPS_25,
+			     MTS64_CMD_SMPTE_FPS_2997, 
+			     MTS64_CMD_SMPTE_FPS_30D,
+			     MTS64_CMD_SMPTE_FPS_30    };
+
+	mts64_write_command(p, MTS64_CMD_SMPTE_SET_TIME);
+	mts64_write_command(p, frames);
+	mts64_write_command(p, seconds);
+	mts64_write_command(p, minutes);
+	mts64_write_command(p, hours);
+
+	mts64_write_command(p, MTS64_CMD_SMPTE_SET_FPS);
+	mts64_write_command(p, fps[idx]);
+}
+
+/*  Stops SMPTE Timecode generation
+ */
+static void mts64_smpte_stop(struct parport *p)
+{
+	mts64_write_command(p, MTS64_CMD_SMPTE_STOP);
+}
+
+/*  Write a command byte to device
+ */
+static void mts64_write_command(struct parport *p, u8 c)
+{
+	mts64_device_ready(p);
+
+	parport_write_data(p, c);
+
+	parport_write_control(p, MTS64_CTL_WRITE_CMD);
+	parport_write_control(p, MTS64_CTL_WRITE_CMD | MTS64_CTL_STROBE);
+	parport_write_control(p, MTS64_CTL_WRITE_CMD);
+}
+
+/*  Write a data byte to device 
+ */
+static void mts64_write_data(struct parport *p, u8 c)
+{
+	mts64_device_ready(p);
+
+	parport_write_data(p, c);
+
+	parport_write_control(p, MTS64_CTL_WRITE_DATA);
+	parport_write_control(p, MTS64_CTL_WRITE_DATA | MTS64_CTL_STROBE);
+	parport_write_control(p, MTS64_CTL_WRITE_DATA);
+}
+
+/*  Write a MIDI byte to midiport
+ *
+ *  midiport ranges from 0-3 and maps to Ports 1-4
+ *  assumptions: communication mode is on
+ */
+static void mts64_write_midi(struct mts64 *mts, u8 c,
+			     int midiport)
+{
+	struct parport *p = mts->pardev->port;
+
+	/* check current midiport */
+	if (mts->current_midi_output_port != midiport)
+		mts64_write_command(p, midiport);
+
+	/* write midi byte */
+	mts64_write_data(p, c);
+}
+
+/*********************************************************************
+ * Control elements
+ *********************************************************************/
+
+/* SMPTE Switch */
+static int snd_mts64_ctl_smpte_switch_info(struct snd_kcontrol *kctl,
+					   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_switch_get(struct snd_kcontrol* kctl,
+					  struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+
+	spin_lock_irq(&mts->lock);
+	uctl->value.integer.value[0] = mts->smpte_switch;
+	spin_unlock_irq(&mts->lock);
+
+	return 0;
+}
+
+/* smpte_switch is not accessed from IRQ handler, so we just need
+   to protect the HW access */
+static int snd_mts64_ctl_smpte_switch_put(struct snd_kcontrol* kctl,
+					  struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+	int changed = 0;
+
+	spin_lock_irq(&mts->lock);
+	if (mts->smpte_switch == uctl->value.integer.value[0])
+		goto __out;
+
+	changed = 1;
+	mts->smpte_switch = uctl->value.integer.value[0];
+	if (mts->smpte_switch) {
+		mts64_smpte_start(mts->pardev->port,
+				  mts->time[0], mts->time[1],
+				  mts->time[2], mts->time[3],
+				  mts->fps);
+	} else {
+		mts64_smpte_stop(mts->pardev->port);
+	}
+__out:
+	spin_unlock_irq(&mts->lock);
+	return changed;
+}
+
+static struct snd_kcontrol_new mts64_ctl_smpte_switch __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Playback Switch",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 0,
+	.info = snd_mts64_ctl_smpte_switch_info,
+	.get  = snd_mts64_ctl_smpte_switch_get,
+	.put  = snd_mts64_ctl_smpte_switch_put
+};
+
+/* Time */
+static int snd_mts64_ctl_smpte_time_h_info(struct snd_kcontrol *kctl,
+					   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 23;
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_time_f_info(struct snd_kcontrol *kctl,
+					   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 99;
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_time_info(struct snd_kcontrol *kctl,
+					 struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 59;
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_time_get(struct snd_kcontrol *kctl,
+					struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+	int idx = kctl->private_value;
+
+	spin_lock_irq(&mts->lock);
+	uctl->value.integer.value[0] = mts->time[idx];
+	spin_unlock_irq(&mts->lock);
+
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_time_put(struct snd_kcontrol *kctl,
+					struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+	int idx = kctl->private_value;
+	int changed = 0;
+
+	spin_lock_irq(&mts->lock);
+	if (mts->time[idx] != uctl->value.integer.value[0]) {
+		changed = 1;
+		mts->time[idx] = uctl->value.integer.value[0];
+	}
+	spin_unlock_irq(&mts->lock);
+
+	return changed;
+}
+
+static struct snd_kcontrol_new mts64_ctl_smpte_time_hours __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Time Hours",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 0,
+	.info = snd_mts64_ctl_smpte_time_h_info,
+	.get  = snd_mts64_ctl_smpte_time_get,
+	.put  = snd_mts64_ctl_smpte_time_put
+};
+
+static struct snd_kcontrol_new mts64_ctl_smpte_time_minutes __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Time Minutes",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 1,
+	.info = snd_mts64_ctl_smpte_time_info,
+	.get  = snd_mts64_ctl_smpte_time_get,
+	.put  = snd_mts64_ctl_smpte_time_put
+};
+
+static struct snd_kcontrol_new mts64_ctl_smpte_time_seconds __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Time Seconds",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 2,
+	.info = snd_mts64_ctl_smpte_time_info,
+	.get  = snd_mts64_ctl_smpte_time_get,
+	.put  = snd_mts64_ctl_smpte_time_put
+};
+
+static struct snd_kcontrol_new mts64_ctl_smpte_time_frames __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Time Frames",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 3,
+	.info = snd_mts64_ctl_smpte_time_f_info,
+	.get  = snd_mts64_ctl_smpte_time_get,
+	.put  = snd_mts64_ctl_smpte_time_put
+};
+
+/* FPS */
+static int snd_mts64_ctl_smpte_fps_info(struct snd_kcontrol *kctl,
+					struct snd_ctl_elem_info *uinfo)
+{
+	static char *texts[5] = { "24",
+				  "25",
+				  "29.97",
+				  "30D",
+				  "30"    };
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = 5;
+	if (uinfo->value.enumerated.item > 4)
+		uinfo->value.enumerated.item = 4;
+	strcpy(uinfo->value.enumerated.name,
+	       texts[uinfo->value.enumerated.item]);
+	
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_fps_get(struct snd_kcontrol *kctl,
+				       struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+
+	spin_lock_irq(&mts->lock);
+	uctl->value.enumerated.item[0] = mts->fps;
+	spin_unlock_irq(&mts->lock);
+
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_fps_put(struct snd_kcontrol *kctl,
+				       struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+	int changed = 0;
+
+	spin_lock_irq(&mts->lock);
+	if (mts->fps != uctl->value.enumerated.item[0]) {
+		changed = 1;
+		mts->fps = uctl->value.enumerated.item[0];
+	}
+	spin_unlock_irq(&mts->lock);
+
+	return changed;
+}
+
+static struct snd_kcontrol_new mts64_ctl_smpte_fps __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Fps",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 0,
+	.info  = snd_mts64_ctl_smpte_fps_info,
+	.get   = snd_mts64_ctl_smpte_fps_get,
+	.put   = snd_mts64_ctl_smpte_fps_put
+};
+
+
+static int __devinit snd_mts64_ctl_create(struct snd_card *card, 
+					  struct mts64 *mts) 
+{
+	int err, i;
+	static struct snd_kcontrol_new *control[] = {
+		&mts64_ctl_smpte_switch,
+		&mts64_ctl_smpte_time_hours,
+		&mts64_ctl_smpte_time_minutes,
+		&mts64_ctl_smpte_time_seconds,
+		&mts64_ctl_smpte_time_frames,
+		&mts64_ctl_smpte_fps,
+	        0  };
+
+	for (i = 0; control[i]; ++i) {
+		err = snd_ctl_add(card, snd_ctl_new1(control[i], mts));
+		if (err < 0) {
+			snd_printd("Cannot create control: %s\n", 
+				   control[i]->name);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/*********************************************************************
+ * Rawmidi
+ *********************************************************************/
+#define MTS64_MODE_INPUT_TRIGGERED 0x01
+
+static int snd_mts64_rawmidi_open(struct snd_rawmidi_substream *substream)
+{
+	struct mts64 *mts = substream->rmidi->private_data;
+
+	if (mts->open_count == 0) {
+		/* We don't need a spinlock here, because this is just called 
+		   if the device has not been opened before. 
+		   So there aren't any IRQs from the device */
+		mts64_device_open(mts);
+
+		msleep(50);
+	}
+	++(mts->open_count);
+
+	return 0;
+}
+
+static int snd_mts64_rawmidi_close(struct snd_rawmidi_substream *substream)
+{
+	struct mts64 *mts = substream->rmidi->private_data;
+	unsigned long flags;
+
+	--(mts->open_count);
+	if (mts->open_count == 0) {
+		/* We need the spinlock_irqsave here because we can still
+		   have IRQs at this point */
+		spin_lock_irqsave(&mts->lock, flags);
+		mts64_device_close(mts);
+		spin_unlock_irqrestore(&mts->lock, flags);
+
+		msleep(500);
+
+	} else if (mts->open_count < 0)
+		mts->open_count = 0;
+
+	return 0;
+}
+
+static void snd_mts64_rawmidi_output_trigger(struct snd_rawmidi_substream *substream,
+					     int up)
+{
+	struct mts64 *mts = substream->rmidi->private_data;
+	u8 data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mts->lock, flags);
+	while (snd_rawmidi_transmit_peek(substream, &data, 1) == 1) {
+		mts64_write_midi(mts, data, substream->number+1);
+		snd_rawmidi_transmit_ack(substream, 1);
+	}
+	spin_unlock_irqrestore(&mts->lock, flags);
+}
+
+static void snd_mts64_rawmidi_input_trigger(struct snd_rawmidi_substream *substream,
+					    int up)
+{
+	struct mts64 *mts = substream->rmidi->private_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mts->lock, flags);
+	if (up)
+		mts->mode[substream->number] |= MTS64_MODE_INPUT_TRIGGERED;
+	else
+ 		mts->mode[substream->number] &= ~MTS64_MODE_INPUT_TRIGGERED;
+	
+	spin_unlock_irqrestore(&mts->lock, flags);
+}
+
+static struct snd_rawmidi_ops snd_mts64_rawmidi_output_ops = {
+	.open    = snd_mts64_rawmidi_open,
+	.close   = snd_mts64_rawmidi_close,
+	.trigger = snd_mts64_rawmidi_output_trigger
+};
+
+static struct snd_rawmidi_ops snd_mts64_rawmidi_input_ops = {
+	.open    = snd_mts64_rawmidi_open,
+	.close   = snd_mts64_rawmidi_close,
+	.trigger = snd_mts64_rawmidi_input_trigger
+};
+
+/* Create and initialize the rawmidi component */
+static int __devinit snd_mts64_rawmidi_create(struct snd_card *card)
+{
+	struct mts64 *mts = card->private_data;
+	struct snd_rawmidi *rmidi;
+	struct snd_rawmidi_substream *substream;
+	struct list_head *list;
+	int err;
+	
+	err = snd_rawmidi_new(card, CARD_NAME, 0, 
+			      MTS64_NUM_OUTPUT_PORTS, 
+			      MTS64_NUM_INPUT_PORTS, 
+			      &rmidi);
+	if (err < 0) 
+		return err;
+
+	rmidi->private_data = mts;
+	strcpy(rmidi->name, CARD_NAME);
+	rmidi->info_flags = SNDRV_RAWMIDI_INFO_OUTPUT |
+		            SNDRV_RAWMIDI_INFO_INPUT |
+                            SNDRV_RAWMIDI_INFO_DUPLEX;
+
+	mts->rmidi = rmidi;
+
+	/* register rawmidi ops */
+	snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, 
+			    &snd_mts64_rawmidi_output_ops);
+	snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, 
+			    &snd_mts64_rawmidi_input_ops);
+
+	/* name substreams */
+	/* output */
+	list_for_each(list, 
+		      &rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams) {
+		substream = list_entry(list, struct snd_rawmidi_substream, list);
+		sprintf(substream->name,
+			"Miditerminal %d", substream->number+1);
+	}
+	/* input */
+	list_for_each(list, 
+		      &rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT].substreams) {
+		substream = list_entry(list, struct snd_rawmidi_substream, list);
+		mts->midi_input_substream[substream->number] = substream;
+		switch(substream->number) {
+		case MTS64_SMPTE_SUBSTREAM:
+			strcpy(substream->name, "Miditerminal SMPTE");
+			break;
+		default:
+			sprintf(substream->name,
+				"Miditerminal %d", substream->number+1);
+		}
+	}
+
+	/* controls */
+	err = snd_mts64_ctl_create(card, mts);
+
+	return err;
+}
+
+/*********************************************************************
+ * parport stuff
+ *********************************************************************/
+static void snd_mts64_interrupt(int irq, void *private, struct pt_regs *r)
+{
+	struct mts64 *mts = ((struct snd_card*)private)->private_data;
+	u16 ret;
+	u8 status, data;
+	struct snd_rawmidi_substream *substream;
+
+	spin_lock(&mts->lock);
+	ret = mts64_read(mts->pardev->port);
+	data = ret & 0x00ff;
+	status = ret >> 8;
+
+	if (status & MTS64_STAT_PORT) {
+		mts->current_midi_input_port = mts64_map_midi_input(data);
+	} else {
+		if (mts->current_midi_input_port == -1) 
+			goto __out;
+		substream = mts->midi_input_substream[mts->current_midi_input_port];
+		if (mts->mode[substream->number] & MTS64_MODE_INPUT_TRIGGERED)
+			snd_rawmidi_receive(substream, &data, 1);
+	}
+__out:
+	spin_unlock(&mts->lock);
+}
+
+static int __devinit snd_mts64_probe_port(struct parport *p)
+{
+	struct pardevice *pardev;
+	int res;
+
+	pardev = parport_register_device(p, DRIVER_NAME,
+					 NULL, NULL, NULL,
+					 0, NULL);
+	if (!pardev)
+		return -EIO;
+	
+	if (parport_claim(pardev)) {
+		parport_unregister_device(pardev);
+		return -EIO;
+	}
+
+	res = mts64_probe(p);
+
+	parport_release(pardev);
+	parport_unregister_device(pardev);
+
+	return res;
+}
+
+static void __devinit snd_mts64_attach(struct parport *p)
+{
+	struct platform_device *device;
+
+	device = platform_device_alloc(PLATFORM_DRIVER, device_count);
+	if (!device) 
+		return;
+
+	/* Temporary assignment to forward the parport */
+	platform_set_drvdata(device, p);
+
+	if (platform_device_register(device) < 0) {
+		platform_device_put(device);
+		return;
+	}
+
+	/* Since we dont get the return value of probe
+	 * We need to check if device probing succeeded or not */
+	if (!platform_get_drvdata(device)) {
+		platform_device_unregister(device);
+		return;
+	}
+
+	/* register device in global table */
+	platform_devices[device_count] = device;
+	device_count++;
+}
+
+static void snd_mts64_detach(struct parport *p)
+{
+	/* nothing to do here */
+}
+
+static struct parport_driver mts64_parport_driver = {
+	.name   = "mts64",
+	.attach = snd_mts64_attach,
+	.detach = snd_mts64_detach
+};
+
+/*********************************************************************
+ * platform stuff
+ *********************************************************************/
+static void snd_mts64_card_private_free(struct snd_card *card)
+{
+	struct mts64 *mts = card->private_data;
+	struct pardevice *pardev = mts->pardev;
+
+	if (pardev) {
+		if (mts->pardev_claimed)
+			parport_release(pardev);
+		parport_unregister_device(pardev);
+	}
+
+	snd_mts64_free(mts);
+}
+
+static int __devinit snd_mts64_probe(struct platform_device *pdev)
+{
+	struct pardevice *pardev;
+	struct parport *p;
+	int dev = pdev->id;
+	struct snd_card *card = NULL;
+	struct mts64 *mts = NULL;
+	int err;
+
+	p = platform_get_drvdata(pdev);
+	platform_set_drvdata(pdev, NULL);
+
+	if (dev >= SNDRV_CARDS)
+		return -ENODEV;
+	if (!enable[dev]) 
+		return -ENOENT;
+	if ((err = snd_mts64_probe_port(p)) < 0)
+		return err;
+
+	card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0);
+	if (card == NULL) {
+		snd_printd("Cannot create card\n");
+		return -ENOMEM;
+	}
+	strcpy(card->driver, DRIVER_NAME);
+	strcpy(card->shortname, "ESI " CARD_NAME);
+	sprintf(card->longname,  "%s at 0x%lx, irq %i", 
+		card->shortname, p->base, p->irq);
+
+	pardev = parport_register_device(p,                   /* port */
+					 DRIVER_NAME,         /* name */
+					 NULL,                /* preempt */
+					 NULL,                /* wakeup */
+					 snd_mts64_interrupt, /* ISR */
+					 PARPORT_DEV_EXCL,    /* flags */
+					 (void *)card);       /* private */
+	if (pardev == NULL) {
+		snd_printd("Cannot register pardevice\n");
+		err = -EIO;
+		goto __err;
+	}
+
+	if ((err = snd_mts64_create(card, pardev, &mts)) < 0) {
+		snd_printd("Cannot create main component\n");
+		parport_unregister_device(pardev);
+		goto __err;
+	}
+	card->private_data = mts;
+	card->private_free = snd_mts64_card_private_free;
+	
+	if ((err = snd_mts64_rawmidi_create(card)) < 0) {
+		snd_printd("Creating Rawmidi component failed\n");
+		goto __err;
+	}
+
+	/* claim parport */
+	if (parport_claim(pardev)) {
+		snd_printd("Cannot claim parport 0x%lx\n", pardev->port->base);
+		err = -EIO;
+		goto __err;
+	}
+	mts->pardev_claimed = 1;
+
+	/* init device */
+	if ((err = mts64_device_init(p)) < 0)
+		goto __err;
+
+	platform_set_drvdata(pdev, card);
+
+	/* At this point card will be usable */
+	if ((err = snd_card_register(card)) < 0) {
+		snd_printd("Cannot register card\n");
+		goto __err;
+	}
+
+	snd_printk("ESI Miditerminal 4140 on 0x%lx\n", p->base);
+	return 0;
+
+__err:
+	snd_card_free(card);
+	return err;
+}
+
+static int snd_mts64_remove(struct platform_device *pdev)
+{
+	struct snd_card *card = platform_get_drvdata(pdev);
+
+	if (card)
+		snd_card_free(card);
+
+	return 0;
+}
+
+
+static struct platform_driver snd_mts64_driver = {
+	.probe  = snd_mts64_probe,
+	.remove = snd_mts64_remove,
+	.driver = {
+		.name = PLATFORM_DRIVER
+	}
+};
+
+/*********************************************************************
+ * module init stuff
+ *********************************************************************/
+static void snd_mts64_unregister_all(void)
+{
+	int i;
+
+	for (i = 0; i < SNDRV_CARDS; ++i) {
+		if (platform_devices[i]) {
+			platform_device_unregister(platform_devices[i]);
+			platform_devices[i] = NULL;
+		}
+	}		
+	platform_driver_unregister(&snd_mts64_driver);
+	parport_unregister_driver(&mts64_parport_driver);
+}
+
+static int __init snd_mts64_module_init(void)
+{
+	int err;
+
+	if ((err = platform_driver_register(&snd_mts64_driver)) < 0)
+		return err;
+
+	if (parport_register_driver(&mts64_parport_driver) != 0) {
+		platform_driver_unregister(&snd_mts64_driver);
+		return -EIO;
+	}
+
+	if (device_count == 0) {
+		snd_mts64_unregister_all();
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void __exit snd_mts64_module_exit(void)
+{
+	snd_mts64_unregister_all();
+}
+
+module_init(snd_mts64_module_init);
+module_exit(snd_mts64_module_exit);
-- 
GitLab


From 4b146cb087b4a668511f6c991da1dc40e2e04b0d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 28 Jul 2006 14:42:36 +0200
Subject: [PATCH 0912/1063] [ALSA] Misc fixes for Realtek HD-audio codecs

- Added model=arima for Arima W820Di1 with ALC882 codec chip
- Added EAPD-control verbs to TCL S700 init verbs
- Added missing model strings for Realtek codecs (to be specified
  via module option explicitly for testing/debugging)

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 .../sound/alsa/ALSA-Configuration.txt         | 13 ++++-
 sound/pci/hda/patch_realtek.c                 | 48 ++++++++++++++++---
 2 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 7344815b855e0..d0dbc3fb20c2f 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -778,11 +778,15 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 	  6stack-digout	6-jack with a SPDIF out
 	  w810		3-jack
 	  z71v		3-jack (HP shared SPDIF)
-	  asus		3-jack
+	  asus		3-jack (ASUS Mobo)
+	  asus-w1v	ASUS W1V
+	  asus-dig	ASUS with SPDIF out
+	  asus-dig2	ASUS with SPDIF out (using GPIO2)
 	  uniwill	3-jack
 	  F1734		2-jack
 	  lg		LG laptop (m1 express dual)
 	  lg-lw		LG LW20 laptop
+	  tcl		TCL S700
 	  clevo		Clevo laptops (m520G, m665n)
 	  test		for testing/debugging purpose, almost all controls can be
 			adjusted.  Appearing only when compiled with
@@ -791,6 +795,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 
 	ALC260
 	  hp		HP machines
+	  hp-3013	HP machines (3013-variant)
 	  fujitsu	Fujitsu S7020
 	  acer		Acer TravelMate
 	  basic		fixed pin assignment (old default model)
@@ -806,18 +811,22 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 	ALC882/885
 	  3stack-dig	3-jack with SPDIF I/O
 	  6stck-dig	6-jack digital with SPDIF I/O
+	  arima		Arima W820Di1
 	  auto		auto-config reading BIOS (default)
 
 	ALC883/888
 	  3stack-dig	3-jack with SPDIF I/O
 	  6stack-dig	6-jack digital with SPDIF I/O
-	  6stack-dig-demo  6-stack digital for Intel demo board
+	  3stack-6ch    3-jack 6-channel
+	  3stack-6ch-dig 3-jack 6-channel with SPDIF I/O
+	  6stack-dig-demo  6-jack digital for Intel demo board
 	  auto		auto-config reading BIOS (default)
 
 	ALC861/660
 	  3stack	3-jack
 	  3stack-dig	3-jack with SPDIF I/O
 	  6stack-dig	6-jack with SPDIF I/O
+	  3stack-660	3-jack (for ALC660)
 	  auto		auto-config reading BIOS (default)
 
 	CMI9880
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 42c4f90a92b83..378e5f111e348 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -98,6 +98,7 @@ enum {
 enum {
 	ALC882_3ST_DIG,
 	ALC882_6ST_DIG,
+	ALC882_ARIMA,
 	ALC882_AUTO,
 	ALC882_MODEL_LAST,
 };
@@ -1349,6 +1350,10 @@ static struct hda_verb alc880_pin_clevo_init_verbs[] = {
 };
 
 static struct hda_verb alc880_pin_tcl_S700_init_verbs[] = {
+	/* change to EAPD mode */
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x07},
+	{0x20, AC_VERB_SET_PROC_COEF,  0x3060},
+
 	/* Headphone output */
 	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
 	/* Front output*/
@@ -2146,6 +2151,7 @@ static struct hda_board_config alc880_cfg_tbl[] = {
 	{ .pci_subvendor = 0x107b, .pci_subdevice = 0x4040, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x107b, .pci_subdevice = 0x4041, .config = ALC880_3ST },
 	/* TCL S700 */
+	{ .modelname = "tcl", .config = ALC880_TCL_S700 },
 	{ .pci_subvendor = 0x19db, .pci_subdevice = 0x4188, .config = ALC880_TCL_S700 },
 
 	/* Back 3 jack, front 2 jack (Internal add Aux-In) */
@@ -2232,8 +2238,11 @@ static struct hda_board_config alc880_cfg_tbl[] = {
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1133, .config = ALC880_ASUS },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1123, .config = ALC880_ASUS_DIG },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1143, .config = ALC880_ASUS },
+	{ .modelname = "asus-w1v", .config = ALC880_ASUS_W1V },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x10b3, .config = ALC880_ASUS_W1V },
+	{ .modelname = "asus-dig", .config = ALC880_ASUS_DIG },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x8181, .config = ALC880_ASUS_DIG }, /* ASUS P4GPL-X */
+	{ .modelname = "asus-dig2", .config = ALC880_ASUS_DIG2 },
 	{ .pci_subvendor = 0x1558, .pci_subdevice = 0x5401, .config = ALC880_ASUS_DIG2 },
 
 	{ .modelname = "uniwill", .config = ALC880_UNIWILL_DIG },
@@ -3906,6 +3915,7 @@ static struct hda_board_config alc260_cfg_tbl[] = {
 	{ .pci_subvendor = 0x152d, .pci_subdevice = 0x0729,
 	  .config = ALC260_BASIC }, /* CTL Travel Master U553W */
 	{ .modelname = "hp", .config = ALC260_HP },
+	{ .modelname = "hp-3013", .config = ALC260_HP_3013 },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3011, .config = ALC260_HP },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3012, .config = ALC260_HP_3013 },
@@ -4272,6 +4282,13 @@ static struct hda_verb alc882_init_verbs[] = {
 	{ }
 };
 
+static struct hda_verb alc882_eapd_verbs[] = {
+	/* change to EAPD mode */
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x07},
+	{0x20, AC_VERB_SET_PROC_COEF, 0x3060},
+	{ } 
+};
+
 /*
  * generic initialization of ADC, input mixers and output mixers
  */
@@ -4403,6 +4420,9 @@ static struct hda_board_config alc882_cfg_tbl[] = {
 	  .config = ALC882_6ST_DIG }, /* Foxconn */
 	{ .pci_subvendor = 0x1019, .pci_subdevice = 0x6668,
 	  .config = ALC882_6ST_DIG }, /* ECS to Intel*/
+	{ .modelname = "arima", .config = ALC882_ARIMA },
+	{ .pci_subvendor = 0x161f, .pci_subdevice = 0x2054,
+	  .config = ALC882_ARIMA }, /* Arima W820Di1 */
 	{ .modelname = "auto", .config = ALC882_AUTO },
 	{}
 };
@@ -4430,6 +4450,15 @@ static struct alc_config_preset alc882_presets[] = {
 		.channel_mode = alc882_sixstack_modes,
 		.input_mux = &alc882_capture_source,
 	},
+	[ALC882_ARIMA] = {
+		.mixers = { alc882_base_mixer, alc882_chmode_mixer },
+		.init_verbs = { alc882_init_verbs, alc882_eapd_verbs },
+		.num_dacs = ARRAY_SIZE(alc882_dac_nids),
+		.dac_nids = alc882_dac_nids,
+		.num_channel_mode = ARRAY_SIZE(alc882_sixstack_modes),
+		.channel_mode = alc882_sixstack_modes,
+		.input_mux = &alc882_capture_source,
+	},
 };
 
 
@@ -5005,16 +5034,18 @@ static struct snd_kcontrol_new alc883_capture_mixer[] = {
  */
 static struct hda_board_config alc883_cfg_tbl[] = {
 	{ .modelname = "3stack-dig", .config = ALC883_3ST_2ch_DIG },
+	{ .modelname = "3stack-6ch-dig", .config = ALC883_3ST_6ch_DIG },
+	{ .pci_subvendor = 0x1019, .pci_subdevice = 0x6668,
+	  .config = ALC883_3ST_6ch_DIG }, /* ECS to Intel*/
+	{ .modelname = "3stack-6ch", .config = ALC883_3ST_6ch },
+	{ .pci_subvendor = 0x108e, .pci_subdevice = 0x534d,
+	  .config = ALC883_3ST_6ch },
 	{ .modelname = "6stack-dig", .config = ALC883_6ST_DIG },
-	{ .modelname = "6stack-dig-demo", .config = ALC888_DEMO_BOARD },
 	{ .pci_subvendor = 0x1462, .pci_subdevice = 0x6668,
 	  .config = ALC883_6ST_DIG }, /* MSI  */
 	{ .pci_subvendor = 0x105b, .pci_subdevice = 0x6668,
 	  .config = ALC883_6ST_DIG }, /* Foxconn */
-	{ .pci_subvendor = 0x1019, .pci_subdevice = 0x6668,
-	  .config = ALC883_3ST_6ch_DIG }, /* ECS to Intel*/
-	{ .pci_subvendor = 0x108e, .pci_subdevice = 0x534d,
-	  .config = ALC883_3ST_6ch },
+	{ .modelname = "6stack-dig-demo", .config = ALC888_DEMO_BOARD },
 	{ .modelname = "auto", .config = ALC883_AUTO },
 	{}
 };
@@ -5223,8 +5254,10 @@ static int patch_alc883(struct hda_codec *codec)
 	spec->stream_digital_playback = &alc883_pcm_digital_playback;
 	spec->stream_digital_capture = &alc883_pcm_digital_capture;
 
-	spec->adc_nids = alc883_adc_nids;
-	spec->num_adc_nids = ARRAY_SIZE(alc883_adc_nids);
+	if (! spec->adc_nids && spec->input_mux) {
+		spec->adc_nids = alc883_adc_nids;
+		spec->num_adc_nids = ARRAY_SIZE(alc883_adc_nids);
+	}
 
 	codec->patch_ops = alc_patch_ops;
 	if (board_config == ALC883_AUTO)
@@ -6504,6 +6537,7 @@ static struct hda_board_config alc861_cfg_tbl[] = {
 	{ .modelname = "3stack", .config = ALC861_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xd600,
 	  .config = ALC861_3ST },
+	{ .modelname = "3stack-660", .config = ALC660_3ST },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x81e7,
 	  .config = ALC660_3ST },
 	{ .modelname = "3stack-dig", .config = ALC861_3ST_DIG },
-- 
GitLab


From 35aec4e2affb99d52b4b744ddb09767eb6e05580 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 28 Jul 2006 14:44:31 +0200
Subject: [PATCH 0913/1063] [ALSA] Don't set up the same PID twice in
 snd_hda_multi_out_analog_prepare

Check the hp_nid whether it's identical with front pin to avoid
the setup of the same widget node twice.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_codec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 399860c36be9b..ff29d0f16903a 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -1942,7 +1942,7 @@ int snd_hda_multi_out_analog_prepare(struct hda_codec *codec, struct hda_multi_o
 
 	/* front */
 	snd_hda_codec_setup_stream(codec, nids[HDA_FRONT], stream_tag, 0, format);
-	if (mout->hp_nid)
+	if (mout->hp_nid && mout->hp_nid != nids[HDA_FRONT])
 		/* headphone out will just decode front left/right (stereo) */
 		snd_hda_codec_setup_stream(codec, mout->hp_nid, stream_tag, 0, format);
 	/* extra outputs copied from front */
-- 
GitLab


From 4e195a7b78618c89b06547f3140e67a69ec23272 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 28 Jul 2006 14:47:34 +0200
Subject: [PATCH 0914/1063] [ALSA] Fix noisy output with shared channel mode
 with hd-audio

- Fix the wrong initialization of num_dacs when changing the channel
  mode between 2 and multi-channel modes.  It must be evaluated
  after calling snd_hda_ch_mode_put()
- Added the similar check of num_dacs fix in Realtek code.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_analog.c  |  8 +++++---
 sound/pci/hda/patch_realtek.c | 27 ++++++++++++++++++++++++---
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 8955397cca6f8..077f1ce01ee11 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -1647,10 +1647,12 @@ static int ad198x_ch_mode_put(struct snd_kcontrol *kcontrol,
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct ad198x_spec *spec = codec->spec;
-	if (spec->need_dac_fix)
+	int err = snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
+				      spec->num_channel_mode,
+				      &spec->multiout.max_channels);
+	if (! err && spec->need_dac_fix)
 		spec->multiout.num_dacs = spec->multiout.max_channels / 2;
-	return snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
-				   spec->num_channel_mode, &spec->multiout.max_channels);
+	return err;
 }
 
 /* 6-stack mode */
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 378e5f111e348..991f1079116bb 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -155,6 +155,7 @@ struct alc_spec {
 	/* channel model */
 	const struct hda_channel_mode *channel_mode;
 	int num_channel_mode;
+	int need_dac_fix;
 
 	/* PCM information */
 	struct hda_pcm pcm_rec[3];	/* used in alc_build_pcms() */
@@ -192,6 +193,7 @@ struct alc_config_preset {
 	hda_nid_t dig_in_nid;
 	unsigned int num_channel_mode;
 	const struct hda_channel_mode *channel_mode;
+	int need_dac_fix;
 	unsigned int num_mux_defs;
 	const struct hda_input_mux *input_mux;
 	void (*unsol_event)(struct hda_codec *, unsigned int);
@@ -264,9 +266,12 @@ static int alc_ch_mode_put(struct snd_kcontrol *kcontrol,
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct alc_spec *spec = codec->spec;
-	return snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
-				   spec->num_channel_mode,
-				   &spec->multiout.max_channels);
+	int err = snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
+				      spec->num_channel_mode,
+				      &spec->multiout.max_channels);
+	if (! err && spec->need_dac_fix)
+		spec->multiout.num_dacs = spec->multiout.max_channels / 2;
+	return err;
 }
 
 /*
@@ -546,6 +551,7 @@ static void setup_preset(struct alc_spec *spec,
 	
 	spec->channel_mode = preset->channel_mode;
 	spec->num_channel_mode = preset->num_channel_mode;
+	spec->need_dac_fix = preset->need_dac_fix;
 
 	spec->multiout.max_channels = spec->channel_mode[0].channels;
 
@@ -2278,6 +2284,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.dac_nids = alc880_dac_nids,
 		.num_channel_mode = ARRAY_SIZE(alc880_threestack_modes),
 		.channel_mode = alc880_threestack_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_3ST_DIG] = {
@@ -2288,6 +2295,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_threestack_modes),
 		.channel_mode = alc880_threestack_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_TCL_S700] = {
@@ -2380,6 +2388,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.dac_nids = alc880_asus_dac_nids,
 		.num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
 		.channel_mode = alc880_asus_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_ASUS_DIG] = {
@@ -2391,6 +2400,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
 		.channel_mode = alc880_asus_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_ASUS_DIG2] = {
@@ -2402,6 +2412,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
 		.channel_mode = alc880_asus_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_ASUS_W1V] = {
@@ -2413,6 +2424,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
 		.channel_mode = alc880_asus_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_UNIWILL_DIG] = {
@@ -2423,6 +2435,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
 		.channel_mode = alc880_asus_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_CLEVO] = {
@@ -2434,6 +2447,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.hp_nid = 0x03,
 		.num_channel_mode = ARRAY_SIZE(alc880_threestack_modes),
 		.channel_mode = alc880_threestack_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_LG] = {
@@ -2445,6 +2459,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_lg_ch_modes),
 		.channel_mode = alc880_lg_ch_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_lg_capture_source,
 		.unsol_event = alc880_lg_unsol_event,
 		.init_hook = alc880_lg_automute,
@@ -4437,6 +4452,7 @@ static struct alc_config_preset alc882_presets[] = {
 		.dig_in_nid = ALC882_DIGIN_NID,
 		.num_channel_mode = ARRAY_SIZE(alc882_ch_modes),
 		.channel_mode = alc882_ch_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc882_capture_source,
 	},
 	[ALC882_6ST_DIG] = {
@@ -5075,6 +5091,7 @@ static struct alc_config_preset alc883_presets[] = {
 		.dig_in_nid = ALC883_DIGIN_NID,
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_6ch_modes),
 		.channel_mode = alc883_3ST_6ch_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc883_capture_source,
 	},	
 	[ALC883_3ST_6ch] = {
@@ -5086,6 +5103,7 @@ static struct alc_config_preset alc883_presets[] = {
 		.adc_nids = alc883_adc_nids,
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_6ch_modes),
 		.channel_mode = alc883_3ST_6ch_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc883_capture_source,
 	},	
 	[ALC883_6ST_DIG] = {
@@ -6554,6 +6572,7 @@ static struct alc_config_preset alc861_presets[] = {
 		.dac_nids = alc861_dac_nids,
 		.num_channel_mode = ARRAY_SIZE(alc861_threestack_modes),
 		.channel_mode = alc861_threestack_modes,
+		.need_dac_fix = 1,
 		.num_adc_nids = ARRAY_SIZE(alc861_adc_nids),
 		.adc_nids = alc861_adc_nids,
 		.input_mux = &alc861_capture_source,
@@ -6566,6 +6585,7 @@ static struct alc_config_preset alc861_presets[] = {
 		.dig_out_nid = ALC861_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc861_threestack_modes),
 		.channel_mode = alc861_threestack_modes,
+		.need_dac_fix = 1,
 		.num_adc_nids = ARRAY_SIZE(alc861_adc_nids),
 		.adc_nids = alc861_adc_nids,
 		.input_mux = &alc861_capture_source,
@@ -6589,6 +6609,7 @@ static struct alc_config_preset alc861_presets[] = {
 		.dac_nids = alc660_dac_nids,
 		.num_channel_mode = ARRAY_SIZE(alc861_threestack_modes),
 		.channel_mode = alc861_threestack_modes,
+		.need_dac_fix = 1,
 		.num_adc_nids = ARRAY_SIZE(alc861_adc_nids),
 		.adc_nids = alc861_adc_nids,
 		.input_mux = &alc861_capture_source,
-- 
GitLab


From 7012b2dac71988f61b520b33c70c63be372b5994 Mon Sep 17 00:00:00 2001
From: James Courtier-Dutton <James@superbug.co.uk>
Date: Fri, 28 Jul 2006 22:27:56 +0100
Subject: [PATCH 0915/1063] [ALSA] snd-emu10k1: Add a comment explaining the
 conversion function for dB gain.

Signed-off-by: James Courtier-Dutton <James@superbug.co.uk>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/emu10k1/emufx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c
index 00fc904c251dc..13cd6ce898115 100644
--- a/sound/pci/emu10k1/emufx.c
+++ b/sound/pci/emu10k1/emufx.c
@@ -267,6 +267,7 @@ static const u32 treble_table[41][5] = {
 	{ 0x37c4448b, 0xa45ef51d, 0x262f3267, 0x081e36dc, 0xfd8f5d14 }
 };
 
+/* dB gain = (float) 20 * log10( float(db_table_value) / 0x8000000 ) */
 static const u32 db_table[101] = {
 	0x00000000, 0x01571f82, 0x01674b41, 0x01783a1b, 0x0189f540,
 	0x019c8651, 0x01aff763, 0x01c45306, 0x01d9a446, 0x01eff6b8,
-- 
GitLab


From f3302a59cf6961712658db63b66ea5902c17d5e1 Mon Sep 17 00:00:00 2001
From: Matt Porter <mporter@embeddedalley.com>
Date: Mon, 31 Jul 2006 12:49:34 +0200
Subject: [PATCH 0916/1063] [ALSA] hda: sigmatel 9205 family support

Adds support for the '9205 family' which includes some other
part numbers but 9205 is the first one. These are 4 channel
codecs, some have digital mic capability. Support for the digital
mic feature will come later.

Signed-off-by: Matt Porter <mporter@embeddedalley.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_sigmatel.c | 102 +++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index ac96336f3484d..d572f030c3e95 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -136,6 +136,14 @@ static hda_nid_t stac927x_mux_nids[3] = {
         0x15, 0x16, 0x17
 };
 
+static hda_nid_t stac9205_adc_nids[2] = {
+        0x12, 0x13
+};
+
+static hda_nid_t stac9205_mux_nids[2] = {
+        0x19, 0x1a
+};
+
 static hda_nid_t stac9200_pin_nids[8] = {
 	0x08, 0x09, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12,
 };
@@ -151,6 +159,13 @@ static hda_nid_t stac927x_pin_nids[14] = {
 	0x14, 0x21, 0x22, 0x23,
 };
 
+static hda_nid_t stac9205_pin_nids[12] = {
+	0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+	0x0f, 0x14, 0x16, 0x17, 0x18,
+	0x21, 0x22,
+	
+};
+
 static int stac92xx_mux_enum_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
@@ -214,6 +229,12 @@ static struct hda_verb stac927x_core_init[] = {
 	{}
 };
 
+static struct hda_verb stac9205_core_init[] = {
+	/* set master volume and direct control */	
+	{ 0x24, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff},
+	{}
+};
+
 static struct snd_kcontrol_new stac9200_mixer[] = {
 	HDA_CODEC_VOLUME("Master Playback Volume", 0xb, 0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Master Playback Switch", 0xb, 0, HDA_OUTPUT),
@@ -277,6 +298,21 @@ static snd_kcontrol_new_t stac927x_mixer[] = {
 	{ } /* end */
 };
 
+static snd_kcontrol_new_t stac9205_mixer[] = {
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Input Source",
+		.count = 1,
+		.info = stac92xx_mux_enum_info,
+		.get = stac92xx_mux_enum_get,
+		.put = stac92xx_mux_enum_put,
+	},
+	HDA_CODEC_VOLUME("InMux Capture Volume", 0x19, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("InVol Capture Volume", 0x1b, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("ADCMux Capture Switch", 0x1d, 0x0, HDA_OUTPUT),
+	{ } /* end */
+};
+
 static int stac92xx_build_controls(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
@@ -415,6 +451,24 @@ static struct hda_board_config stac927x_cfg_tbl[] = {
 	{} /* terminator */
 };
 
+static unsigned int ref9205_pin_configs[12] = {
+	0x40000100, 0x40000100, 0x01016011, 0x01014010,
+	0x01813122, 0x01a19021, 0x40000100, 0x40000100, 
+	0x40000100, 0x40000100, 0x01441030, 0x01c41030
+};
+
+static unsigned int *stac9205_brd_tbl[] = {
+	ref9205_pin_configs,
+};
+
+static struct hda_board_config stac9205_cfg_tbl[] = {
+	{ .modelname = "ref",
+	  .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2668,	/* DFI LanParty */
+	  .config = STAC_REF },		/* SigmaTel reference board */
+	{} /* terminator */
+};
+
 static void stac92xx_set_config_regs(struct hda_codec *codec)
 {
 	int i;
@@ -1354,6 +1408,46 @@ static int patch_stac927x(struct hda_codec *codec)
 	return 0;
 }
 
+static int patch_stac9205(struct hda_codec *codec)
+{
+	struct sigmatel_spec *spec;
+	int err;
+
+	spec  = kzalloc(sizeof(*spec), GFP_KERNEL);
+	if (spec == NULL)
+		return -ENOMEM;
+
+	codec->spec = spec;
+	spec->board_config = snd_hda_check_board_config(codec, stac9205_cfg_tbl);
+	if (spec->board_config < 0)
+                snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9205, using BIOS defaults\n");
+	else {
+		spec->num_pins = 14;
+		spec->pin_nids = stac9205_pin_nids;
+		spec->pin_configs = stac9205_brd_tbl[spec->board_config];
+		stac92xx_set_config_regs(codec);
+	}
+
+	spec->adc_nids = stac9205_adc_nids;
+	spec->mux_nids = stac9205_mux_nids;
+	spec->num_muxes = 3;
+
+	spec->init = stac9205_core_init;
+	spec->mixer = stac9205_mixer;
+
+	spec->multiout.dac_nids = spec->dac_nids;
+
+	err = stac92xx_parse_auto_config(codec, 0x1f, 0x20);
+	if (err < 0) {
+		stac92xx_free(codec);
+		return err;
+	}
+
+	codec->patch_ops = stac92xx_patch_ops;
+
+	return 0;
+}
+
 /*
  * STAC 7661(?) hack
  */
@@ -1542,5 +1636,13 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = {
  	{ .id = 0x83847628, .name = "STAC9274X5NH", .patch = patch_stac927x },
  	{ .id = 0x83847629, .name = "STAC9274D5NH", .patch = patch_stac927x },
  	{ .id = 0x83847661, .name = "STAC7661", .patch = patch_stac7661 },
+ 	{ .id = 0x838476a0, .name = "STAC9205", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a1, .name = "STAC9205D", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a2, .name = "STAC9204", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a3, .name = "STAC9204D", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a4, .name = "STAC9255", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a5, .name = "STAC9255D", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a6, .name = "STAC9254", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a7, .name = "STAC9254D", .patch = patch_stac9205 },
 	{} /* terminator */
 };
-- 
GitLab


From 1c3985580445ef9225c1ea7714d6d963f7626eeb Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Mon, 31 Jul 2006 12:51:57 +0200
Subject: [PATCH 0917/1063] [ALSA] es18xx - Add PnP BIOS support

This patch adds PnP BIOS support to es18xx driver. It allows ESS ES18xx sound
chips integrated in some notebooks (such as DTK FortisPro TOP-5A) that don't
appear as ISA cards (they aren't recognized by ISA PnP, only by PnP BIOS)
to 'just work' automatically.

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/isa/es18xx.c | 219 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 157 insertions(+), 62 deletions(-)

diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c
index 34998de9968c0..85818200333f3 100644
--- a/sound/isa/es18xx.c
+++ b/sound/isa/es18xx.c
@@ -2038,7 +2038,80 @@ MODULE_PARM_DESC(dma2, "DMA 2 # for ES18xx driver.");
 static struct platform_device *platform_devices[SNDRV_CARDS];
 
 #ifdef CONFIG_PNP
-static int pnp_registered;
+static int pnp_registered, pnpc_registered;
+
+static struct pnp_device_id snd_audiodrive_pnpbiosids[] = {
+	{ .id = "ESS1869" },
+	{ .id = "" }		/* end */
+};
+
+MODULE_DEVICE_TABLE(pnp, snd_audiodrive_pnpbiosids);
+
+/* PnP main device initialization */
+static int __devinit snd_audiodrive_pnp_init_main(int dev, struct pnp_dev *pdev,
+						  struct pnp_resource_table *cfg)
+{
+	int err;
+
+	pnp_init_resource_table(cfg);
+	if (port[dev] != SNDRV_AUTO_PORT)
+		pnp_resource_change(&cfg->port_resource[0], port[dev], 16);
+	if (fm_port[dev] != SNDRV_AUTO_PORT)
+		pnp_resource_change(&cfg->port_resource[1], fm_port[dev], 4);
+	if (mpu_port[dev] != SNDRV_AUTO_PORT)
+		pnp_resource_change(&cfg->port_resource[2], mpu_port[dev], 2);
+	if (dma1[dev] != SNDRV_AUTO_DMA)
+		pnp_resource_change(&cfg->dma_resource[0], dma1[dev], 1);
+	if (dma2[dev] != SNDRV_AUTO_DMA)
+		pnp_resource_change(&cfg->dma_resource[1], dma2[dev], 1);
+	if (irq[dev] != SNDRV_AUTO_IRQ)
+		pnp_resource_change(&cfg->irq_resource[0], irq[dev], 1);
+	if (pnp_device_is_isapnp(pdev)) {
+		err = pnp_manual_config_dev(pdev, cfg, 0);
+		if (err < 0)
+			snd_printk(KERN_ERR PFX "PnP manual resources are invalid, using auto config\n");
+	}
+	err = pnp_activate_dev(pdev);
+	if (err < 0) {
+		snd_printk(KERN_ERR PFX "PnP configure failure (out of resources?)\n");
+		return -EBUSY;
+	}
+	/* ok. hack using Vendor-Defined Card-Level registers */
+	/* skip csn and logdev initialization - already done in isapnp_configure */
+	if (pnp_device_is_isapnp(pdev)) {
+		isapnp_cfg_begin(isapnp_card_number(pdev), isapnp_csn_number(pdev));
+		isapnp_write_byte(0x27, pnp_irq(pdev, 0));	/* Hardware Volume IRQ Number */
+		if (mpu_port[dev] != SNDRV_AUTO_PORT)
+			isapnp_write_byte(0x28, pnp_irq(pdev, 0)); /* MPU-401 IRQ Number */
+		isapnp_write_byte(0x72, pnp_irq(pdev, 0));	/* second IRQ */
+		isapnp_cfg_end();
+	}
+	port[dev] = pnp_port_start(pdev, 0);
+	fm_port[dev] = pnp_port_start(pdev, 1);
+	mpu_port[dev] = pnp_port_start(pdev, 2);
+	dma1[dev] = pnp_dma(pdev, 0);
+	dma2[dev] = pnp_dma(pdev, 1);
+	irq[dev] = pnp_irq(pdev, 0);
+	snd_printdd("PnP ES18xx: port=0x%lx, fm port=0x%lx, mpu port=0x%lx\n", port[dev], fm_port[dev], mpu_port[dev]);
+	snd_printdd("PnP ES18xx: dma1=%i, dma2=%i, irq=%i\n", dma1[dev], dma2[dev], irq[dev]);
+	return 0;
+}
+
+static int __devinit snd_audiodrive_pnp(int dev, struct snd_audiodrive *acard,
+					struct pnp_dev *pdev)
+{
+	struct pnp_resource_table * cfg = kmalloc(sizeof(struct pnp_resource_table), GFP_KERNEL);
+
+	if (!cfg)
+		return -ENOMEM;
+	acard->dev = pdev;
+	if (snd_audiodrive_pnp_init_main(dev, acard->dev, cfg) < 0) {
+		kfree(cfg);
+		return -EBUSY;
+	}
+	kfree(cfg);
+	return 0;
+}
 
 static struct pnp_card_device_id snd_audiodrive_pnpids[] = {
 	/* ESS 1868 (integrated on Compaq dual P-Pro motherboard and Genius 18PnP 3D) */
@@ -2061,13 +2134,11 @@ static struct pnp_card_device_id snd_audiodrive_pnpids[] = {
 
 MODULE_DEVICE_TABLE(pnp_card, snd_audiodrive_pnpids);
 
-static int __devinit snd_audiodrive_pnp(int dev, struct snd_audiodrive *acard,
+static int __devinit snd_audiodrive_pnpc(int dev, struct snd_audiodrive *acard,
 					struct pnp_card_link *card,
 					const struct pnp_card_device_id *id)
 {
-	struct pnp_dev *pdev;
 	struct pnp_resource_table * cfg = kmalloc(sizeof(struct pnp_resource_table), GFP_KERNEL);
-	int err;
 
 	if (!cfg)
 		return -ENOMEM;
@@ -2082,58 +2153,16 @@ static int __devinit snd_audiodrive_pnp(int dev, struct snd_audiodrive *acard,
 		return -EBUSY;
 	}
 	/* Control port initialization */
-	err = pnp_activate_dev(acard->devc);
-	if (err < 0) {
+	if (pnp_activate_dev(acard->devc) < 0) {
 		snd_printk(KERN_ERR PFX "PnP control configure failure (out of resources?)\n");
-		kfree(cfg);
 		return -EAGAIN;
 	}
 	snd_printdd("pnp: port=0x%llx\n",
 			(unsigned long long)pnp_port_start(acard->devc, 0));
-	/* PnP initialization */
-	pdev = acard->dev;
-	pnp_init_resource_table(cfg);
-	if (port[dev] != SNDRV_AUTO_PORT)
-		pnp_resource_change(&cfg->port_resource[0], port[dev], 16);
-	if (fm_port[dev] != SNDRV_AUTO_PORT)
-		pnp_resource_change(&cfg->port_resource[1], fm_port[dev], 4);
-	if (mpu_port[dev] != SNDRV_AUTO_PORT)
-		pnp_resource_change(&cfg->port_resource[2], mpu_port[dev], 2);
-	if (dma1[dev] != SNDRV_AUTO_DMA)
-		pnp_resource_change(&cfg->dma_resource[0], dma1[dev], 1);
-	if (dma2[dev] != SNDRV_AUTO_DMA)
-		pnp_resource_change(&cfg->dma_resource[1], dma2[dev], 1);
-	if (irq[dev] != SNDRV_AUTO_IRQ)
-		pnp_resource_change(&cfg->irq_resource[0], irq[dev], 1);
-	err = pnp_manual_config_dev(pdev, cfg, 0);
-	if (err < 0)
-		snd_printk(KERN_ERR PFX "PnP manual resources are invalid, using auto config\n");
-	err = pnp_activate_dev(pdev);
-	if (err < 0) {
-		snd_printk(KERN_ERR PFX "PnP configure failure (out of resources?)\n");
+	if (snd_audiodrive_pnp_init_main(dev, acard->dev, cfg) < 0) {
 		kfree(cfg);
 		return -EBUSY;
 	}
-	/* ok. hack using Vendor-Defined Card-Level registers */
-	/* skip csn and logdev initialization - already done in isapnp_configure */
-	if (pnp_device_is_isapnp(pdev)) {
-		isapnp_cfg_begin(isapnp_card_number(pdev), isapnp_csn_number(pdev));
-		isapnp_write_byte(0x27, pnp_irq(pdev, 0));	/* Hardware Volume IRQ Number */
-		if (mpu_port[dev] != SNDRV_AUTO_PORT)
-			isapnp_write_byte(0x28, pnp_irq(pdev, 0)); /* MPU-401 IRQ Number */
-		isapnp_write_byte(0x72, pnp_irq(pdev, 0));	/* second IRQ */
-		isapnp_cfg_end();
-	} else {
-		snd_printk(KERN_ERR PFX "unable to install ISA PnP hack, expect malfunction\n");
-	}
-	port[dev] = pnp_port_start(pdev, 0);
-	fm_port[dev] = pnp_port_start(pdev, 1);
-	mpu_port[dev] = pnp_port_start(pdev, 2);
-	dma1[dev] = pnp_dma(pdev, 0);
-	dma2[dev] = pnp_dma(pdev, 1);
-	irq[dev] = pnp_irq(pdev, 0);
-	snd_printdd("PnP ES18xx: port=0x%lx, fm port=0x%lx, mpu port=0x%lx\n", port[dev], fm_port[dev], mpu_port[dev]);
-	snd_printdd("PnP ES18xx: dma1=%i, dma2=%i, irq=%i\n", dma1[dev], dma2[dev], irq[dev]);
 	kfree(cfg);
 	return 0;
 }
@@ -2302,7 +2331,69 @@ static struct platform_driver snd_es18xx_nonpnp_driver = {
 #ifdef CONFIG_PNP
 static unsigned int __devinitdata es18xx_pnp_devices;
 
-static int __devinit snd_audiodrive_pnp_detect(struct pnp_card_link *pcard,
+static int __devinit snd_audiodrive_pnp_detect(struct pnp_dev *pdev,
+					    const struct pnp_device_id *id)
+{
+	static int dev;
+	int err;
+	struct snd_card *card;
+
+	if (pnp_device_is_isapnp(pdev))
+		return -ENOENT;	/* we have another procedure - card */
+	for (; dev < SNDRV_CARDS; dev++) {
+		if (enable[dev] && isapnp[dev])
+			break;
+	}
+	if (dev >= SNDRV_CARDS)
+		return -ENODEV;
+
+	card = snd_es18xx_card_new(dev);
+	if (! card)
+		return -ENOMEM;
+	if ((err = snd_audiodrive_pnp(dev, card->private_data, pdev)) < 0) {
+		snd_card_free(card);
+		return err;
+	}
+	snd_card_set_dev(card, &pdev->dev);
+	if ((err = snd_audiodrive_probe(card, dev)) < 0) {
+		snd_card_free(card);
+		return err;
+	}
+	pnp_set_drvdata(pdev, card);
+	dev++;
+	es18xx_pnp_devices++;
+	return 0;
+}
+
+static void __devexit snd_audiodrive_pnp_remove(struct pnp_dev * pdev)
+{
+	snd_card_free(pnp_get_drvdata(pdev));
+	pnp_set_drvdata(pdev, NULL);
+}
+
+#ifdef CONFIG_PM
+static int snd_audiodrive_pnp_suspend(struct pnp_dev *pdev, pm_message_t state)
+{
+	return snd_es18xx_suspend(pnp_get_drvdata(pdev), state);
+}
+static int snd_audiodrive_pnp_resume(struct pnp_dev *pdev)
+{
+	return snd_es18xx_resume(pnp_get_drvdata(pdev));
+}
+#endif
+
+static struct pnp_driver es18xx_pnp_driver = {
+	.name = "es18xx-pnpbios",
+	.id_table = snd_audiodrive_pnpbiosids,
+	.probe = snd_audiodrive_pnp_detect,
+	.remove = __devexit_p(snd_audiodrive_pnp_remove),
+#ifdef CONFIG_PM
+	.suspend = snd_audiodrive_pnp_suspend,
+	.resume = snd_audiodrive_pnp_resume,
+#endif
+};
+
+static int __devinit snd_audiodrive_pnpc_detect(struct pnp_card_link *pcard,
 					       const struct pnp_card_device_id *pid)
 {
 	static int dev;
@@ -2320,7 +2411,7 @@ static int __devinit snd_audiodrive_pnp_detect(struct pnp_card_link *pcard,
 	if (! card)
 		return -ENOMEM;
 
-	if ((res = snd_audiodrive_pnp(dev, card->private_data, pcard, pid)) < 0) {
+	if ((res = snd_audiodrive_pnpc(dev, card->private_data, pcard, pid)) < 0) {
 		snd_card_free(card);
 		return res;
 	}
@@ -2336,19 +2427,19 @@ static int __devinit snd_audiodrive_pnp_detect(struct pnp_card_link *pcard,
 	return 0;
 }
 
-static void __devexit snd_audiodrive_pnp_remove(struct pnp_card_link * pcard)
+static void __devexit snd_audiodrive_pnpc_remove(struct pnp_card_link * pcard)
 {
 	snd_card_free(pnp_get_card_drvdata(pcard));
 	pnp_set_card_drvdata(pcard, NULL);
 }
 
 #ifdef CONFIG_PM
-static int snd_audiodrive_pnp_suspend(struct pnp_card_link *pcard, pm_message_t state)
+static int snd_audiodrive_pnpc_suspend(struct pnp_card_link *pcard, pm_message_t state)
 {
 	return snd_es18xx_suspend(pnp_get_card_drvdata(pcard), state);
 }
 
-static int snd_audiodrive_pnp_resume(struct pnp_card_link *pcard)
+static int snd_audiodrive_pnpc_resume(struct pnp_card_link *pcard)
 {
 	return snd_es18xx_resume(pnp_get_card_drvdata(pcard));
 }
@@ -2359,11 +2450,11 @@ static struct pnp_card_driver es18xx_pnpc_driver = {
 	.flags = PNP_DRIVER_RES_DISABLE,
 	.name = "es18xx",
 	.id_table = snd_audiodrive_pnpids,
-	.probe = snd_audiodrive_pnp_detect,
-	.remove = __devexit_p(snd_audiodrive_pnp_remove),
+	.probe = snd_audiodrive_pnpc_detect,
+	.remove = __devexit_p(snd_audiodrive_pnpc_remove),
 #ifdef CONFIG_PM
-	.suspend	= snd_audiodrive_pnp_suspend,
-	.resume		= snd_audiodrive_pnp_resume,
+	.suspend	= snd_audiodrive_pnpc_suspend,
+	.resume		= snd_audiodrive_pnpc_resume,
 #endif
 };
 #endif /* CONFIG_PNP */
@@ -2373,8 +2464,10 @@ static void __init_or_module snd_es18xx_unregister_all(void)
 	int i;
 
 #ifdef CONFIG_PNP
-	if (pnp_registered)
+	if (pnpc_registered)
 		pnp_unregister_card_driver(&es18xx_pnpc_driver);
+	if (pnp_registered)
+		pnp_unregister_driver(&es18xx_pnp_driver);
 #endif
 	for (i = 0; i < ARRAY_SIZE(platform_devices); ++i)
 		platform_device_unregister(platform_devices[i]);
@@ -2405,11 +2498,13 @@ static int __init alsa_card_es18xx_init(void)
 	}
 
 #ifdef CONFIG_PNP
-	err = pnp_register_card_driver(&es18xx_pnpc_driver);
-	if (!err) {
+	err = pnp_register_driver(&es18xx_pnp_driver);
+	if (!err)
 		pnp_registered = 1;
-		cards += es18xx_pnp_devices;
-	}
+	err = pnp_register_card_driver(&es18xx_pnpc_driver);
+	if (!err)
+		pnpc_registered = 1;
+	cards += es18xx_pnp_devices;
 #endif
 
 	if(!cards) {
-- 
GitLab


From 548a648b98318e4b843b636dd2c7f42377e19a00 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 31 Jul 2006 16:51:51 +0200
Subject: [PATCH 0918/1063] [ALSA] Fix control/status mmap with shared PCM
 substream

The flag to avoid 32bit-incompatible mmap for control/status records
should be outside the pcm substream instance since a substream can be
shared among multiple opens.  Now it's flagged in pcm_file list that
is directly assigned to file->private_data.
Also, removed snd_pcm_add_file() and remove_file() functions and
substream.files field that are not really used in the code.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/pcm.h     |  4 +---
 sound/core/pcm_compat.c |  2 +-
 sound/core/pcm_native.c | 49 +++++++++--------------------------------
 3 files changed, 12 insertions(+), 43 deletions(-)

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index f84d84993a31e..60d40b34efc0f 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -190,7 +190,7 @@ struct snd_pcm_ops {
 
 struct snd_pcm_file {
 	struct snd_pcm_substream *substream;
-	struct snd_pcm_file *next;
+	int no_compat_mmap;
 };
 
 struct snd_pcm_hw_rule;
@@ -384,7 +384,6 @@ struct snd_pcm_substream {
 	struct snd_info_entry *proc_prealloc_entry;
 #endif
 	/* misc flags */
-	unsigned int no_mmap_ctrl: 1;
 	unsigned int hw_opened: 1;
 };
 
@@ -402,7 +401,6 @@ struct snd_pcm_str {
 	/* -- OSS things -- */
 	struct snd_pcm_oss_stream oss;
 #endif
-	struct snd_pcm_file *files;
 #ifdef CONFIG_SND_VERBOSE_PROCFS
 	struct snd_info_entry *proc_root;
 	struct snd_info_entry *proc_info_entry;
diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index 2b8aab6fd6cd3..2b539799d23b7 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -478,7 +478,7 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l
 	 * mmap of PCM status/control records because of the size
 	 * incompatibility.
 	 */
-	substream->no_mmap_ctrl = 1;
+	pcm_file->no_compat_mmap = 1;
 
 	switch (cmd) {
 	case SNDRV_PCM_IOCTL_PVERSION:
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 439f047929e18..0224c70414f51 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -1992,35 +1992,9 @@ int snd_pcm_hw_constraints_complete(struct snd_pcm_substream *substream)
 	return 0;
 }
 
-static void snd_pcm_add_file(struct snd_pcm_str *str,
-			     struct snd_pcm_file *pcm_file)
-{
-	pcm_file->next = str->files;
-	str->files = pcm_file;
-}
-
-static void snd_pcm_remove_file(struct snd_pcm_str *str,
-				struct snd_pcm_file *pcm_file)
-{
-	struct snd_pcm_file * pcm_file1;
-	if (str->files == pcm_file) {
-		str->files = pcm_file->next;
-	} else {
-		pcm_file1 = str->files;
-		while (pcm_file1 && pcm_file1->next != pcm_file)
-			pcm_file1 = pcm_file1->next;
-		if (pcm_file1 != NULL)
-			pcm_file1->next = pcm_file->next;
-	}
-}
-
 static void pcm_release_private(struct snd_pcm_substream *substream)
 {
-	struct snd_pcm_file *pcm_file = substream->file;
-
 	snd_pcm_unlink(substream);
-	snd_pcm_remove_file(substream->pstr, pcm_file);
-	kfree(pcm_file);
 }
 
 void snd_pcm_release_substream(struct snd_pcm_substream *substream)
@@ -2060,7 +2034,6 @@ int snd_pcm_open_substream(struct snd_pcm *pcm, int stream,
 		return 0;
 	}
 
-	substream->no_mmap_ctrl = 0;
 	err = snd_pcm_hw_constraints_init(substream);
 	if (err < 0) {
 		snd_printd("snd_pcm_hw_constraints_init failed\n");
@@ -2105,19 +2078,16 @@ static int snd_pcm_open_file(struct file *file,
 	if (err < 0)
 		return err;
 
-	if (substream->ref_count > 1)
-		pcm_file = substream->file;
-	else {
-		pcm_file = kzalloc(sizeof(*pcm_file), GFP_KERNEL);
-		if (pcm_file == NULL) {
-			snd_pcm_release_substream(substream);
-			return -ENOMEM;
-		}
+	pcm_file = kzalloc(sizeof(*pcm_file), GFP_KERNEL);
+	if (pcm_file == NULL) {
+		snd_pcm_release_substream(substream);
+		return -ENOMEM;
+	}
+	pcm_file->substream = substream;
+	if (substream->ref_count == 1) {
 		str = substream->pstr;
 		substream->file = pcm_file;
 		substream->pcm_release = pcm_release_private;
-		pcm_file->substream = substream;
-		snd_pcm_add_file(str, pcm_file);
 	}
 	file->private_data = pcm_file;
 	*rpcm_file = pcm_file;
@@ -2209,6 +2179,7 @@ static int snd_pcm_release(struct inode *inode, struct file *file)
 	fasync_helper(-1, file, 0, &substream->runtime->fasync);
 	mutex_lock(&pcm->open_mutex);
 	snd_pcm_release_substream(substream);
+	kfree(pcm_file);
 	mutex_unlock(&pcm->open_mutex);
 	wake_up(&pcm->open_wait);
 	module_put(pcm->card->module);
@@ -3270,11 +3241,11 @@ static int snd_pcm_mmap(struct file *file, struct vm_area_struct *area)
 	offset = area->vm_pgoff << PAGE_SHIFT;
 	switch (offset) {
 	case SNDRV_PCM_MMAP_OFFSET_STATUS:
-		if (substream->no_mmap_ctrl)
+		if (pcm_file->no_compat_mmap)
 			return -ENXIO;
 		return snd_pcm_mmap_status(substream, file, area);
 	case SNDRV_PCM_MMAP_OFFSET_CONTROL:
-		if (substream->no_mmap_ctrl)
+		if (pcm_file->no_compat_mmap)
 			return -ENXIO;
 		return snd_pcm_mmap_control(substream, file, area);
 	default:
-- 
GitLab


From f03d68fe343d70bb06ecdb3d70dcf0e678ed99f9 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@mail.ru>
Date: Thu, 3 Aug 2006 15:06:14 +0200
Subject: [PATCH 0919/1063] [ALSA] ppc-beep - handle errors from
 input_register_device()

ppc-beep: handle errors from input_register_device()
(Also fixed the wrong memory release in the error path.)

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/ppc/beep.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/sound/ppc/beep.c b/sound/ppc/beep.c
index 5fec1e58f3102..5f38f670102c8 100644
--- a/sound/ppc/beep.c
+++ b/sound/ppc/beep.c
@@ -215,15 +215,18 @@ int __init snd_pmac_attach_beep(struct snd_pmac *chip)
 {
 	struct pmac_beep *beep;
 	struct input_dev *input_dev;
+	struct snd_kcontrol *beep_ctl;
 	void *dmabuf;
 	int err = -ENOMEM;
 
 	beep = kzalloc(sizeof(*beep), GFP_KERNEL);
+	if (! beep)
+		return -ENOMEM;
 	dmabuf = dma_alloc_coherent(&chip->pdev->dev, BEEP_BUFLEN * 4,
 				    &beep->addr, GFP_KERNEL);
 	input_dev = input_allocate_device();
-	if (!beep || !dmabuf || !input_dev)
-		goto fail;
+	if (! dmabuf || ! input_dev)
+		goto fail1;
 
 	/* FIXME: set more better values */
 	input_dev->name = "PowerMac Beep";
@@ -244,17 +247,24 @@ int __init snd_pmac_attach_beep(struct snd_pmac *chip)
 	beep->volume = BEEP_VOLUME;
 	beep->running = 0;
 
-	err = snd_ctl_add(chip->card, snd_ctl_new1(&snd_pmac_beep_mixer, chip));
+	beep_ctl = snd_ctl_new1(&snd_pmac_beep_mixer, chip);
+	err = snd_ctl_add(chip->card, beep_ctl);
 	if (err < 0)
-		goto fail;
+		goto fail1;
+ 
+ 	chip->beep = beep;
 
-	chip->beep = beep;
-	input_register_device(beep->dev);
-
-	return 0;
-
- fail:	input_free_device(input_dev);
-	kfree(dmabuf);
+	err = input_register_device(beep->dev);
+	if (err)
+		goto fail2;
+ 
+ 	return 0;
+ 
+ fail2:	snd_ctl_remove(chip->card, beep_ctl);
+ fail1:	input_free_device(input_dev);
+	if (dmabuf)
+		dma_free_coherent(&chip->pdev->dev, BEEP_BUFLEN * 4,
+				  dmabuf, beep->addr);
 	kfree(beep);
 	return err;
 }
-- 
GitLab


From 2529bba7606b23c1b7161d3c2ad486162e8650f9 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 4 Aug 2006 12:57:19 +0200
Subject: [PATCH 0920/1063] [ALSA] Fix substream selection in PCM and rawmidi

The PCM and rawmidi substreams can be selected explicitly by opening
control handle and set via *_PREFER_SUBDEVICE ioctl.  But, when
multiple controls are opened, the driver gets confused.
The patch fixes the initialization of prefer_*_subdevice and the
check of multiple controls.  The first set subdevice is picked up
as the valid one.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/core/control.c | 2 ++
 sound/core/pcm.c     | 3 ++-
 sound/core/rawmidi.c | 3 ++-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/sound/core/control.c b/sound/core/control.c
index 31ad58154c06c..ac1442682eace 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -75,6 +75,8 @@ static int snd_ctl_open(struct inode *inode, struct file *file)
 	init_waitqueue_head(&ctl->change_sleep);
 	spin_lock_init(&ctl->read_lock);
 	ctl->card = card;
+	ctl->prefer_pcm_subdevice = -1;
+	ctl->prefer_rawmidi_subdevice = -1;
 	ctl->pid = current->pid;
 	file->private_data = ctl;
 	write_lock_irqsave(&card->ctl_files_rwlock, flags);
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index f52178abf1209..ed3b09469560b 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -792,7 +792,8 @@ int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream,
 		kctl = snd_ctl_file(list);
 		if (kctl->pid == current->pid) {
 			prefer_subdevice = kctl->prefer_pcm_subdevice;
-			break;
+			if (prefer_subdevice != -1)
+				break;
 		}
 	}
 	up_read(&card->controls_rwsem);
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 8a2bdfae63e39..269c467ca9bb2 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -430,7 +430,8 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file)
 			kctl = snd_ctl_file(list);
 			if (kctl->pid == current->pid) {
 				subdevice = kctl->prefer_rawmidi_subdevice;
-				break;
+				if (subdevice != -1)
+					break;
 			}
 		}
 		up_read(&card->controls_rwsem);
-- 
GitLab


From 727f317a10da74b4e5c6d968bbba07767bfea794 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 4 Aug 2006 19:08:03 +0200
Subject: [PATCH 0921/1063] [ALSA] usb-audio - Fix a typo of CONFIG_PROC_FS

Fixed a typo of CONFIG_PROC_FS in usbaudio.c.
The stream proc file appears again.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/usb/usbaudio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index 3144313859137..087f9b64d8a0c 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -2049,7 +2049,7 @@ static struct usb_driver usb_audio_driver = {
 };
 
 
-#if defined(CONFIG_PROCFS) && defined(CONFIG_SND_VERBOSE_PROCFS)
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_SND_VERBOSE_PROCFS)
 
 /*
  * proc interface for list the supported pcm formats
-- 
GitLab


From 25b6c43b3d6258f3e87244eeb2b9347dc5e83c40 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 8 Aug 2006 13:01:14 +0200
Subject: [PATCH 0922/1063] [ALSA] Fix the preselected model for HP machine

Fixed the preselected model for a HP machine with SSID 103c:3010
to use hp-3013 (ALSA bug#2157).

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_realtek.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 991f1079116bb..ac561a5d86675 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -3931,7 +3931,7 @@ static struct hda_board_config alc260_cfg_tbl[] = {
 	  .config = ALC260_BASIC }, /* CTL Travel Master U553W */
 	{ .modelname = "hp", .config = ALC260_HP },
 	{ .modelname = "hp-3013", .config = ALC260_HP_3013 },
-	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP },
+	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP_3013 },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3011, .config = ALC260_HP },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3012, .config = ALC260_HP_3013 },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3013, .config = ALC260_HP_3013 },
-- 
GitLab


From f5a5ffad072ec3c1fd636174c30f0ba52fe0259f Mon Sep 17 00:00:00 2001
From: Danny Tholen <danny@mailmij.org>
Date: Tue, 8 Aug 2006 18:59:07 +0200
Subject: [PATCH 0923/1063] [ALSA] [snd-hda-intel] fix sound on some Asus W6A
 chips

This patch adds support in ALSA snd-hda-intel driver for Asus W6A
motherboard as reported in MDV Bugzilla #19962
(see http://qa.mandriva.com/show_bug.cgi?id=19962)

Signed-off-by: Danny Tholen <danny@mailmij.org>
Signed-off-by: Thomas Backlund <tmb@mandriva.org>
Signed-off-by: Thierry Vignaud <tvignaud@mandriva.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index ac561a5d86675..a91757316765f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2240,6 +2240,7 @@ static struct hda_board_config alc880_cfg_tbl[] = {
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1113, .config = ALC880_ASUS_DIG },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1173, .config = ALC880_ASUS_DIG },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1993, .config = ALC880_ASUS },
+	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x10c2, .config = ALC880_ASUS_DIG }, /* Asus W6A */
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x10c3, .config = ALC880_ASUS_DIG },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1133, .config = ALC880_ASUS },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1123, .config = ALC880_ASUS_DIG },
-- 
GitLab


From 683fe1537e660c322c8af953773921e814791193 Mon Sep 17 00:00:00 2001
From: Jochen Voss <voss@seehuhn.de>
Date: Tue, 8 Aug 2006 21:12:44 +0200
Subject: [PATCH 0924/1063] [ALSA] Revolution 5.1 - add AK5365 ADC support

Add support for the AK5365 ADC.

Signed-off-by: Jochen Voss <voss@seehuhn.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/ak4xxx-adda.h   |  3 ++-
 sound/i2c/other/ak4xxx-adda.c | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/include/sound/ak4xxx-adda.h b/include/sound/ak4xxx-adda.h
index 3d98884920261..65ddfa3cac1fb 100644
--- a/include/sound/ak4xxx-adda.h
+++ b/include/sound/ak4xxx-adda.h
@@ -53,7 +53,8 @@ struct snd_akm4xxx {
 	unsigned int idx_offset;		/* control index offset */
 	enum {
 		SND_AK4524, SND_AK4528, SND_AK4529,
-		SND_AK4355, SND_AK4358, SND_AK4381
+		SND_AK4355, SND_AK4358, SND_AK4381,
+		SND_AK5365
 	} type;
 	unsigned int *num_stereo;	/* array of combined counts
 					 * for the mixer
diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c
index dc7cc2001b74a..7d562f0842077 100644
--- a/sound/i2c/other/ak4xxx-adda.c
+++ b/sound/i2c/other/ak4xxx-adda.c
@@ -598,6 +598,31 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak)
 		if (err < 0)
 			goto __error;
 	}
+
+	if (ak->type == SND_AK5365) {
+		memset(ctl, 0, sizeof(*ctl));
+		if (ak->channel_names == NULL)
+			strcpy(ctl->id.name, "Capture Volume");
+		else
+			strcpy(ctl->id.name, ak->channel_names[0]);
+		ctl->id.index = ak->idx_offset * 2;
+		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+		ctl->count = 1;
+		ctl->info = snd_akm4xxx_stereo_volume_info;
+		ctl->get = snd_akm4xxx_stereo_volume_get;
+		ctl->put = snd_akm4xxx_stereo_volume_put;
+		/* Registers 4 & 5 (see AK5365 data sheet, pages 34 and 35):
+		 * valid values are from 0x00 (mute) to 0x98 (+12dB).  */
+		ctl->private_value =
+			AK_COMPOSE(0, 4, 0, 0x98);
+		ctl->private_data = ak;
+		err = snd_ctl_add(ak->card,
+				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
+					      SNDRV_CTL_ELEM_ACCESS_WRITE));
+		if (err < 0)
+			goto __error;
+	}
+
 	if (ak->type == SND_AK4355 || ak->type == SND_AK4358)
 		num_emphs = 1;
 	else
-- 
GitLab


From 96d9e9347c9c5ca980bef22b4add7d437d79034f Mon Sep 17 00:00:00 2001
From: Jochen Voss <voss@seehuhn.de>
Date: Tue, 8 Aug 2006 21:13:42 +0200
Subject: [PATCH 0925/1063] [ALSA] Revolution 5.1 - register the AK5365 ADC
 with ALSA

Enable capture support for the M-Audio Revolution 5.1 card,
by registering the ADC with ALSA.

Signed-off-by: Jochen Voss <voss@seehuhn.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ice1712/revo.c | 30 ++++++++++++++++++++++++++++--
 sound/pci/ice1712/revo.h |  2 +-
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/sound/pci/ice1712/revo.c b/sound/pci/ice1712/revo.c
index fec9440cb3104..ef64be49a898e 100644
--- a/sound/pci/ice1712/revo.c
+++ b/sound/pci/ice1712/revo.c
@@ -98,6 +98,9 @@ static unsigned int revo51_num_stereo[] = {2, 1, 1, 2};
 static char *revo51_channel_names[] = {"PCM Playback Volume", "PCM Center Playback Volume",
 					"PCM LFE Playback Volume", "PCM Rear Playback Volume"};
 
+static unsigned int revo51_adc_num_stereo[] = {2};
+static char *revo51_adc_channel_names[] = {"PCM Capture Volume"};
+
 static struct snd_akm4xxx akm_revo_front __devinitdata = {
 	.type = SND_AK4381,
 	.num_dacs = 2,
@@ -159,7 +162,26 @@ static struct snd_ak4xxx_private akm_revo51_priv __devinitdata = {
 	.data_mask = VT1724_REVO_CDOUT,
 	.clk_mask = VT1724_REVO_CCLK,
 	.cs_mask = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2,
-	.cs_addr = 0,
+	.cs_addr = VT1724_REVO_CS1 | VT1724_REVO_CS2,
+	.cs_none = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2,
+	.add_flags = VT1724_REVO_CCLK, /* high at init */
+	.mask_flags = 0,
+};
+
+static struct snd_akm4xxx akm_revo51_adc __devinitdata = {
+	.type = SND_AK5365,
+	.num_adcs = 2,
+	.num_stereo = revo51_adc_num_stereo,
+	.channel_names = revo51_adc_channel_names
+};
+
+static struct snd_ak4xxx_private akm_revo51_adc_priv __devinitdata = {
+	.caddr = 2,
+	.cif = 0,
+	.data_mask = VT1724_REVO_CDOUT,
+	.clk_mask = VT1724_REVO_CCLK,
+	.cs_mask = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2,
+	.cs_addr = VT1724_REVO_CS0 | VT1724_REVO_CS2,
 	.cs_none = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2,
 	.add_flags = VT1724_REVO_CCLK, /* high at init */
 	.mask_flags = 0,
@@ -202,9 +224,13 @@ static int __devinit revo_init(struct snd_ice1712 *ice)
 		snd_ice1712_gpio_write_bits(ice, VT1724_REVO_MUTE, VT1724_REVO_MUTE);
 		break;
 	case VT1724_SUBDEVICE_REVOLUTION51:
-		ice->akm_codecs = 1;
+		ice->akm_codecs = 2;
 		if ((err = snd_ice1712_akm4xxx_init(ak, &akm_revo51, &akm_revo51_priv, ice)) < 0)
 			return err;
+		err = snd_ice1712_akm4xxx_init(ak + 1, &akm_revo51_adc,
+					       &akm_revo51_adc_priv, ice);
+		if (err < 0)
+			return err;
 		/* unmute all codecs - needed! */
 		snd_ice1712_gpio_write_bits(ice, VT1724_REVO_MUTE, VT1724_REVO_MUTE);
 		break;
diff --git a/sound/pci/ice1712/revo.h b/sound/pci/ice1712/revo.h
index dea52ea219dff..efbb86ec3289e 100644
--- a/sound/pci/ice1712/revo.h
+++ b/sound/pci/ice1712/revo.h
@@ -42,7 +42,7 @@ extern struct snd_ice1712_card_info snd_vt1724_revo_cards[];
 #define VT1724_REVO_CCLK	0x02
 #define VT1724_REVO_CDIN	0x04	/* not used */
 #define VT1724_REVO_CDOUT	0x08
-#define VT1724_REVO_CS0		0x10	/* not used */
+#define VT1724_REVO_CS0		0x10	/* AK5365 chipselect for Rev. 5.1 */
 #define VT1724_REVO_CS1		0x20	/* front AKM4381 chipselect */
 #define VT1724_REVO_CS2		0x40	/* surround AKM4355 chipselect */
 #define VT1724_REVO_MUTE	(1<<22)	/* 0 = all mute, 1 = normal operation */
-- 
GitLab


From 30ba6e207a915a6c70f22ccb3f9169d1cce88466 Mon Sep 17 00:00:00 2001
From: Jochen Voss <voss@seehuhn.de>
Date: Wed, 9 Aug 2006 14:26:26 +0200
Subject: [PATCH 0926/1063] [ALSA] Revolution 5.1 - complete the AK5365 support

Complete the AK5365 support.
This adds a boolean control to toggle the soft mute feature of the
AK5365 chip.

Signed-off-by: Jochen Voss <voss@seehuhn.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/i2c/other/ak4xxx-adda.c | 72 +++++++++++++++++++++++++++++++++++
 sound/pci/ice1712/revo.c      |  2 +-
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c
index 7d562f0842077..d76d8b078a817 100644
--- a/sound/i2c/other/ak4xxx-adda.c
+++ b/sound/i2c/other/ak4xxx-adda.c
@@ -472,6 +472,57 @@ static int snd_akm4xxx_deemphasis_put(struct snd_kcontrol *kcontrol,
 	return change;
 }
 
+static int ak4xxx_switch_info(struct snd_kcontrol *kcontrol,
+			      struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int ak4xxx_switch_get(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
+	int chip = AK_GET_CHIP(kcontrol->private_value);
+	int addr = AK_GET_ADDR(kcontrol->private_value);
+	int shift = AK_GET_SHIFT(kcontrol->private_value);
+	int invert = AK_GET_INVERT(kcontrol->private_value);
+	unsigned char val = snd_akm4xxx_get(ak, chip, addr);
+
+	if (invert)
+		val = ! val;
+	ucontrol->value.integer.value[0] = (val & (1<<shift)) != 0;
+	return 0;
+}
+
+static int ak4xxx_switch_put(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
+	int chip = AK_GET_CHIP(kcontrol->private_value);
+	int addr = AK_GET_ADDR(kcontrol->private_value);
+	int shift = AK_GET_SHIFT(kcontrol->private_value);
+	int invert = AK_GET_INVERT(kcontrol->private_value);
+	long flag = ucontrol->value.integer.value[0];
+	unsigned char val, oval;
+	int change;
+
+	if (invert)
+		flag = ! flag;
+	oval = snd_akm4xxx_get(ak, chip, addr);
+	if (flag)
+		val = oval | (1<<shift);
+	else
+		val = oval & ~(1<<shift);
+	change = (oval != val);
+	if (change)
+		snd_akm4xxx_write(ak, chip, addr, val);
+	return change;
+}
+
 /*
  * build AK4xxx controls
  */
@@ -621,6 +672,27 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak)
 					      SNDRV_CTL_ELEM_ACCESS_WRITE));
 		if (err < 0)
 			goto __error;
+
+		memset(ctl, 0, sizeof(*ctl));
+		if (ak->channel_names == NULL)
+			strcpy(ctl->id.name, "Capture Switch");
+		else
+			strcpy(ctl->id.name, ak->channel_names[1]);
+		ctl->id.index = ak->idx_offset * 2;
+		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+		ctl->count = 1;
+		ctl->info = ak4xxx_switch_info;
+		ctl->get = ak4xxx_switch_get;
+		ctl->put = ak4xxx_switch_put;
+		/* register 2, bit 0 (SMUTE): 0 = normal operation, 1 = mute */
+		ctl->private_value =
+			AK_COMPOSE(0, 2, 0, 0) | AK_INVERT;
+		ctl->private_data = ak;
+		err = snd_ctl_add(ak->card,
+				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
+					      SNDRV_CTL_ELEM_ACCESS_WRITE));
+		if (err < 0)
+			goto __error;
 	}
 
 	if (ak->type == SND_AK4355 || ak->type == SND_AK4358)
diff --git a/sound/pci/ice1712/revo.c b/sound/pci/ice1712/revo.c
index ef64be49a898e..1134a57f9e652 100644
--- a/sound/pci/ice1712/revo.c
+++ b/sound/pci/ice1712/revo.c
@@ -99,7 +99,7 @@ static char *revo51_channel_names[] = {"PCM Playback Volume", "PCM Center Playba
 					"PCM LFE Playback Volume", "PCM Rear Playback Volume"};
 
 static unsigned int revo51_adc_num_stereo[] = {2};
-static char *revo51_adc_channel_names[] = {"PCM Capture Volume"};
+static char *revo51_adc_channel_names[] = {"PCM Capture Volume","PCM Capture Switch"};
 
 static struct snd_akm4xxx akm_revo_front __devinitdata = {
 	.type = SND_AK4381,
-- 
GitLab


From cf93907b98c82c2157e5bbe766bee8f1c5bb87b2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 9 Aug 2006 14:33:27 +0200
Subject: [PATCH 0927/1063] [ALSA] Fix compile warnings in ak4xxx-adda.c

Fixed compile warnings in ak4xxx-adda.c reagarding missing
enum cases in switch.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/i2c/other/ak4xxx-adda.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c
index d76d8b078a817..0aea536a3371b 100644
--- a/sound/i2c/other/ak4xxx-adda.c
+++ b/sound/i2c/other/ak4xxx-adda.c
@@ -137,6 +137,8 @@ void snd_akm4xxx_reset(struct snd_akm4xxx *ak, int state)
 	case SND_AK4381:
 		ak4381_reset(ak, state);
 		break;
+	default:
+		break;
 	}
 }
 
@@ -727,6 +729,9 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak)
 		case SND_AK4381:
 			ctl->private_value = AK_COMPOSE(idx, 1, 1, 0);
 			break;
+		default:
+			err = -EINVAL;
+			goto __error;
 		}
 		ctl->private_data = ak;
 		err = snd_ctl_add(ak->card,
-- 
GitLab


From f24e9f586b377749dff37554696cf3a105540c94 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 9 Aug 2006 14:51:14 +0200
Subject: [PATCH 0928/1063] [ALSA] Select I2C and I2C_POWERMAC in
 aoa/codecs/Kconfig

Added the missing selection of I2C and I2C_POWERMAC
for Onyx and TAS codecs in aoa/codecs/Kconfig.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/aoa/codecs/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/aoa/codecs/Kconfig b/sound/aoa/codecs/Kconfig
index 90cf58f68630b..d5fbd6016e937 100644
--- a/sound/aoa/codecs/Kconfig
+++ b/sound/aoa/codecs/Kconfig
@@ -1,6 +1,8 @@
 config SND_AOA_ONYX
 	tristate "support Onyx chip"
 	depends on SND_AOA
+	select I2C
+	select I2C_POWERMAC
 	---help---
 	This option enables support for the Onyx (pcm3052)
 	codec chip found in the latest Apple machines
@@ -18,6 +20,8 @@ config SND_AOA_ONYX
 config SND_AOA_TAS
 	tristate "support TAS chips"
 	depends on SND_AOA
+	select I2C
+	select I2C_POWERMAC
 	---help---
 	This option enables support for the tas chips
 	found in a lot of Apple Machines, especially
-- 
GitLab


From 22309c3e0c8911865cad0aa94f53a9afadaad7ee Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 9 Aug 2006 16:57:28 +0200
Subject: [PATCH 0929/1063] [ALSA] Added model for Uniwill laptop with ALC861

Added a new model 'uniwill-m31' for Uniwill laptops with ALC861
codec chip.  The patch is taken from ALSA bug#2035, and modifeid.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 .../sound/alsa/ALSA-Configuration.txt         |   1 +
 sound/pci/hda/patch_realtek.c                 | 137 ++++++++++++++++++
 2 files changed, 138 insertions(+)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index d0dbc3fb20c2f..74be228596ade 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -827,6 +827,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 	  3stack-dig	3-jack with SPDIF I/O
 	  6stack-dig	6-jack with SPDIF I/O
 	  3stack-660	3-jack (for ALC660)
+	  uniwill-m31	Uniwill M31 laptop
 	  auto		auto-config reading BIOS (default)
 
 	CMI9880
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a91757316765f..f857e963ff452 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -90,6 +90,7 @@ enum {
 	ALC660_3ST,
 	ALC861_3ST_DIG,
 	ALC861_6ST_DIG,
+	ALC861_UNIWILL_M31,
 	ALC861_AUTO,
 	ALC861_MODEL_LAST,
 };
@@ -6021,6 +6022,23 @@ static struct hda_channel_mode alc861_threestack_modes[2] = {
 	{ 2, alc861_threestack_ch2_init },
 	{ 6, alc861_threestack_ch6_init },
 };
+/* Set mic1 as input and unmute the mixer */
+static struct hda_verb alc861_uniwill_m31_ch2_init[] = {
+	{ 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+	{ 0x15, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8)) }, /*mic*/
+	{ } /* end */
+};
+/* Set mic1 as output and mute mixer */
+static struct hda_verb alc861_uniwill_m31_ch4_init[] = {
+	{ 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+	{ 0x15, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8)) }, /*mic*/
+	{ } /* end */
+};
+
+static struct hda_channel_mode alc861_uniwill_m31_modes[2] = {
+	{ 2, alc861_uniwill_m31_ch2_init },
+	{ 4, alc861_uniwill_m31_ch4_init },
+};
 
 /* patch-ALC861 */
 
@@ -6099,6 +6117,47 @@ static struct snd_kcontrol_new alc861_3ST_mixer[] = {
 	},
 	{ } /* end */
 };			
+static struct snd_kcontrol_new alc861_uniwill_m31_mixer[] = {
+        /* output mixer control */
+	HDA_CODEC_MUTE("Front Playback Switch", 0x03, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Surround Playback Switch", 0x06, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE_MONO("Center Playback Switch", 0x05, 1, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE_MONO("LFE Playback Switch", 0x05, 2, 0x0, HDA_OUTPUT),
+	/*HDA_CODEC_MUTE("Side Playback Switch", 0x04, 0x0, HDA_OUTPUT), */
+
+	/* Input mixer control */
+	/* HDA_CODEC_VOLUME("Input Playback Volume", 0x15, 0x0, HDA_OUTPUT),
+	   HDA_CODEC_MUTE("Input Playback Switch", 0x15, 0x0, HDA_OUTPUT), */
+	HDA_CODEC_VOLUME("CD Playback Volume", 0x15, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("CD Playback Switch", 0x15, 0x0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Playback Volume", 0x15, 0x02, HDA_INPUT),
+	HDA_CODEC_MUTE("Line Playback Switch", 0x15, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x15, 0x01, HDA_INPUT),
+	HDA_CODEC_MUTE("Mic Playback Switch", 0x15, 0x01, HDA_INPUT),
+	HDA_CODEC_MUTE("Front Mic Playback Switch", 0x10, 0x01, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Headphone Playback Switch", 0x1a, 0x03, HDA_INPUT),
+ 
+	/* Capture mixer control */
+	HDA_CODEC_VOLUME("Capture Volume", 0x08, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("Capture Switch", 0x08, 0x0, HDA_INPUT),
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Capture Source",
+		.count = 1,
+		.info = alc_mux_enum_info,
+		.get = alc_mux_enum_get,
+		.put = alc_mux_enum_put,
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Channel Mode",
+		.info = alc_ch_mode_info,
+		.get = alc_ch_mode_get,
+		.put = alc_ch_mode_put,
+                .private_value = ARRAY_SIZE(alc861_uniwill_m31_modes),
+	},
+	{ } /* end */
+};			
 	
 /*
  * generic initialization of ADC, input mixers and output mixers
@@ -6227,6 +6286,67 @@ static struct hda_verb alc861_threestack_init_verbs[] = {
         {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
 	{ }
 };
+
+static struct hda_verb alc861_uniwill_m31_init_verbs[] = {
+	/*
+	 * Unmute ADC0 and set the default input to mic-in
+	 */
+	/* port-A for surround (rear panel) */
+	{ 0x0e, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 },
+	/* port-B for mic-in (rear panel) with vref */
+	{ 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+	/* port-C for line-in (rear panel) */
+	{ 0x0c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+	/* port-D for Front */
+	{ 0x0b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+	{ 0x0b, AC_VERB_SET_CONNECT_SEL, 0x00 },
+	/* port-E for HP out (front panel) */
+	{ 0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 }, // this has to be set to VREF80
+	/* route front PCM to HP */
+	{ 0x0f, AC_VERB_SET_CONNECT_SEL, 0x01 },
+	/* port-F for mic-in (front panel) with vref */
+	{ 0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+	/* port-G for CLFE (rear panel) */
+	{ 0x1f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 },
+	/* port-H for side (rear panel) */
+	{ 0x20, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 },
+	/* CD-in */
+	{ 0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+	/* route front mic to ADC1*/
+	{0x08, AC_VERB_SET_CONNECT_SEL, 0x00},
+	{0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	/* Unmute DAC0~3 & spdif out*/
+	{0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x06, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	
+	/* Unmute Mixer 14 (mic) 1c (Line in)*/
+	{0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+        {0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+        {0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	
+	/* Unmute Stereo Mixer 15 */
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb00c          }, //Output 0~12 step
+
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	{0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)}, // hp used DAC 3 (Front)
+        {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+	{ }
+};
+
 /*
  * generic initialization of ADC, input mixers and output mixers
  */
@@ -6561,6 +6681,9 @@ static struct hda_board_config alc861_cfg_tbl[] = {
 	  .config = ALC660_3ST },
 	{ .modelname = "3stack-dig", .config = ALC861_3ST_DIG },
 	{ .modelname = "6stack-dig", .config = ALC861_6ST_DIG },
+	{ .modelname = "uniwill-m31", .config = ALC861_UNIWILL_M31},
+	{ .pci_subvendor = 0x1584, .pci_subdevice = 0x9072,
+	  .config = ALC861_UNIWILL_M31 },
 	{ .modelname = "auto", .config = ALC861_AUTO },
 	{}
 };
@@ -6615,6 +6738,20 @@ static struct alc_config_preset alc861_presets[] = {
 		.adc_nids = alc861_adc_nids,
 		.input_mux = &alc861_capture_source,
 	},
+	[ALC861_UNIWILL_M31] = {
+		.mixers = { alc861_uniwill_m31_mixer },
+		.init_verbs = { alc861_uniwill_m31_init_verbs },
+		.num_dacs = ARRAY_SIZE(alc861_dac_nids),
+		.dac_nids = alc861_dac_nids,
+		.dig_out_nid = ALC861_DIGOUT_NID,
+		.num_channel_mode = ARRAY_SIZE(alc861_uniwill_m31_modes),
+		.channel_mode = alc861_uniwill_m31_modes,
+		.need_dac_fix = 1,
+		.num_adc_nids = ARRAY_SIZE(alc861_adc_nids),
+		.adc_nids = alc861_adc_nids,
+		.input_mux = &alc861_capture_source,
+	},
+
 };	
 
 
-- 
GitLab


From fe25befde9723ba7d921c100bf00d7643323e5a7 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@suse.cz>
Date: Tue, 15 Aug 2006 14:39:07 +0200
Subject: [PATCH 0930/1063] [ALSA] ice1712 - fix 1600->16000Hz value typo

Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ice1712/ice1712.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c
index bf20858d9f193..9b8325d65d8de 100644
--- a/sound/pci/ice1712/ice1712.c
+++ b/sound/pci/ice1712/ice1712.c
@@ -1857,7 +1857,7 @@ static int snd_ice1712_pro_internal_clock_put(struct snd_kcontrol *kcontrol,
 {
 	struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol);
 	static unsigned int xrate[13] = {
-		8000, 9600, 11025, 12000, 1600, 22050, 24000,
+		8000, 9600, 11025, 12000, 16000, 22050, 24000,
 		32000, 44100, 48000, 64000, 88200, 96000
 	};
 	unsigned char oval;
@@ -1924,7 +1924,7 @@ static int snd_ice1712_pro_internal_clock_default_get(struct snd_kcontrol *kcont
 {
 	int val;
 	static unsigned int xrate[13] = {
-		8000, 9600, 11025, 12000, 1600, 22050, 24000,
+		8000, 9600, 11025, 12000, 16000, 22050, 24000,
 		32000, 44100, 48000, 64000, 88200, 96000
 	};
 
@@ -1941,7 +1941,7 @@ static int snd_ice1712_pro_internal_clock_default_put(struct snd_kcontrol *kcont
 						      struct snd_ctl_elem_value *ucontrol)
 {
 	static unsigned int xrate[13] = {
-		8000, 9600, 11025, 12000, 1600, 22050, 24000,
+		8000, 9600, 11025, 12000, 16000, 22050, 24000,
 		32000, 44100, 48000, 64000, 88200, 96000
 	};
 	unsigned char oval;
-- 
GitLab


From 42fe7647911d0bcaf81aac46db73a3b24387df6d Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 16 Aug 2006 12:53:34 +0200
Subject: [PATCH 0931/1063] [ALSA] dbri driver cleanup

This is a small clean up of the dbri driver for sparc machines.
It contains also a fix to DBRI interrupt queue initialization.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index f3ae6e23610e5..652f433a3f0d0 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -104,17 +104,15 @@ static char *cmds[] = {
 
 #define dprintk(a, x...) if(dbri_debug & a) printk(KERN_DEBUG x)
 
-#define DBRI_CMD(cmd, intr, value) ((cmd << 28) |			\
-				    (1 << 27) | \
-				    value)
 #else
 #define dprintk(a, x...)
 
-#define DBRI_CMD(cmd, intr, value) ((cmd << 28) |			\
-				    (intr << 27) | \
-				    value)
 #endif				/* DBRI_DEBUG */
 
+#define DBRI_CMD(cmd, intr, value) ((cmd << 28) |	\
+				    (intr << 27) |	\
+				    value)
+
 /***************************************************************************
 	CS4215 specific definitions and structures
 ****************************************************************************/
@@ -690,7 +688,6 @@ static volatile s32 *dbri_cmdlock(struct snd_dbri * dbri, enum dbri_lock get)
 static void dbri_cmdsend(struct snd_dbri * dbri, volatile s32 * cmd)
 {
 	volatile s32 *ptr;
-	u32	reg;
 
 	for (ptr = &dbri->dma->cmd[0]; ptr < cmd; ptr++) {
 		dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
@@ -709,9 +706,6 @@ static void dbri_cmdsend(struct snd_dbri * dbri, volatile s32 * cmd)
 
 	/* Set command pointer and signal it is valid. */
 	sbus_writel(dbri->dma_dvma, dbri->regs + REG8);
-	reg = sbus_readl(dbri->regs + REG0);
-	reg |= D_P;
-	sbus_writel(reg, dbri->regs + REG0);
 
 	/*spin_unlock(&dbri->lock); */
 }
@@ -752,7 +746,7 @@ static void dbri_initialize(struct snd_dbri * dbri)
 	 */
 	for (n = 0; n < DBRI_NO_INTS - 1; n++) {
 		dma_addr = dbri->dma_dvma;
-		dma_addr += dbri_dma_off(intr, ((n + 1) & DBRI_INT_BLK));
+		dma_addr += dbri_dma_off(intr, ((n + 1) * DBRI_INT_BLK));
 		dbri->dma->intr[n * DBRI_INT_BLK] = dma_addr;
 	}
 	dma_addr = dbri->dma_dvma + dbri_dma_off(intr, 0);
-- 
GitLab


From 6fb982803522bc86ca61774c6edf317f77165453 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 16 Aug 2006 12:54:29 +0200
Subject: [PATCH 0932/1063] [ALSA] sparc dbri removal of DBRI_NO_INTS

This patch removes define DBR_NO_INTS and all code related to
handling more than one dbri irq statuses block.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 31 ++++++++-----------------------
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 652f433a3f0d0..4651ff5135137 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -238,12 +238,6 @@ static struct {
 #define REG9	0x24UL		/* Interrupt Queue Pointer */
 
 #define DBRI_NO_CMDS	64
-#define DBRI_NO_INTS	1	/* Note: the value of this define was
-				 * originally 2.  The ringbuffer to store
-				 * interrupts in dma is currently broken.
-				 * This is a temporary fix until the ringbuffer
-				 * is fixed.
-				 */
 #define DBRI_INT_BLK	64
 #define DBRI_NO_DESCS	64
 #define DBRI_NO_PIPES	32
@@ -268,7 +262,7 @@ struct dbri_mem {
  */
 struct dbri_dma {
 	volatile s32 cmd[DBRI_NO_CMDS];	/* Place for commands       */
-	volatile s32 intr[DBRI_NO_INTS * DBRI_INT_BLK];	/* Interrupt field  */
+	volatile s32 intr[DBRI_INT_BLK];	/* Interrupt field  */
 	struct dbri_mem desc[DBRI_NO_DESCS];	/* Xmit/receive descriptors */
 };
 
@@ -741,18 +735,6 @@ static void dbri_initialize(struct snd_dbri * dbri)
 	dprintk(D_GEN, "init: cmd: %p, int: %p\n",
 		&dbri->dma->cmd[0], &dbri->dma->intr[0]);
 
-	/*
-	 * Initialize the interrupt ringbuffer.
-	 */
-	for (n = 0; n < DBRI_NO_INTS - 1; n++) {
-		dma_addr = dbri->dma_dvma;
-		dma_addr += dbri_dma_off(intr, ((n + 1) * DBRI_INT_BLK));
-		dbri->dma->intr[n * DBRI_INT_BLK] = dma_addr;
-	}
-	dma_addr = dbri->dma_dvma + dbri_dma_off(intr, 0);
-	dbri->dma->intr[n * DBRI_INT_BLK] = dma_addr;
-	dbri->dbri_irqp = 1;
-
 	/* Initialize pipes */
 	for (n = 0; n < DBRI_NO_PIPES; n++)
 		dbri->pipes[n].desc = dbri->pipes[n].first_desc = -1;
@@ -765,9 +747,14 @@ static void dbri_initialize(struct snd_dbri * dbri)
 	sbus_writel(tmp, dbri->regs + REG0);
 
 	/*
-	 * Set up the interrupt queue
+	 * Initialize the interrupt ringbuffer.
 	 */
 	dma_addr = dbri->dma_dvma + dbri_dma_off(intr, 0);
+	dbri->dma->intr[0] = dma_addr;
+	dbri->dbri_irqp = 1;
+	/*
+	 * Set up the interrupt queue
+	 */
 	*(cmd++) = DBRI_CMD(D_IIQ, 0, 0);
 	*(cmd++) = dma_addr;
 
@@ -1951,10 +1938,8 @@ static void dbri_process_interrupt_buffer(struct snd_dbri * dbri)
 	while ((x = dbri->dma->intr[dbri->dbri_irqp]) != 0) {
 		dbri->dma->intr[dbri->dbri_irqp] = 0;
 		dbri->dbri_irqp++;
-		if (dbri->dbri_irqp == (DBRI_NO_INTS * DBRI_INT_BLK))
+		if (dbri->dbri_irqp == DBRI_INT_BLK)
 			dbri->dbri_irqp = 1;
-		else if ((dbri->dbri_irqp & (DBRI_INT_BLK - 1)) == 0)
-			dbri->dbri_irqp++;
 
 		dbri_process_one_interrupt(dbri, x);
 	}
-- 
GitLab


From 5e4968e24ced93b7b130e7e1fc947a79f82776bf Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 16 Aug 2006 12:56:16 +0200
Subject: [PATCH 0933/1063] [ALSA] sound/pci/fm801: Use ARRAY_SIZE macro

Use ARRAY_SIZE macro instead of sizeof(x)/sizeof(x[0])

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/fm801.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c
index 88a3e9f3224ab..f3f2b2c99723a 100644
--- a/sound/pci/fm801.c
+++ b/sound/pci/fm801.c
@@ -321,10 +321,8 @@ static unsigned int channels[] = {
   2, 4, 6
 };
 
-#define CHANNELS sizeof(channels) / sizeof(channels[0])
-
 static struct snd_pcm_hw_constraint_list hw_constraints_channels = {
-	.count = CHANNELS,
+	.count = ARRAY_SIZE(channels),
 	.list = channels,
 	.mask = 0,
 };
-- 
GitLab


From 80b556f26b3830ad5bd6ff9f701675ac8afcb263 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 16 Aug 2006 12:56:53 +0200
Subject: [PATCH 0934/1063] [ALSA] emu10k1x: simplify around
 pci_register_driver()

Report errors to modprobe as side effect.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/emu10k1/emu10k1x.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c
index bda8bdf59935b..da1610a571b8c 100644
--- a/sound/pci/emu10k1/emu10k1x.c
+++ b/sound/pci/emu10k1/emu10k1x.c
@@ -1626,12 +1626,7 @@ static struct pci_driver driver = {
 // initialization of the module
 static int __init alsa_card_emu10k1x_init(void)
 {
-	int err;
-
-	if ((err = pci_register_driver(&driver)) > 0)
-		return err;
-
-	return 0;
+	return pci_register_driver(&driver);
 }
 
 // clean up the module
-- 
GitLab


From d244bf897b2e7933112067ec8d8dc1d47b86145f Mon Sep 17 00:00:00 2001
From: Magnus Sandin <magnus@sandin.cx>
Date: Wed, 16 Aug 2006 15:25:23 +0200
Subject: [PATCH 0935/1063] [ALSA] Fix for LG K1 Express Laptop

Attached is the patch for the LG K1 Express (K1-2333V) laptop that
enables sound output.

Signed-off-by: Magnus Sandin <magnus@sandin.cx>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ac97/ac97_patch.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 5267b006c5c85..37c6be481c4a1 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -2208,7 +2208,8 @@ int patch_alc655(struct snd_ac97 * ac97)
 		val &= ~(1 << 1); /* Pin 47 is spdif input pin */
 	else { /* ALC655 */
 		if (ac97->subsystem_vendor == 0x1462 &&
-		    ac97->subsystem_device == 0x0131) /* MSI S270 laptop */
+		    (ac97->subsystem_device == 0x0131 || /* MSI S270 laptop */
+		     ac97->subsystem_device == 0x0161)) /* LG K1 Express */
 			val &= ~(1 << 1); /* Pin 47 is EAPD (for internal speaker) */
 		else
 			val |= (1 << 1); /* Pin 47 is spdif input pin */
-- 
GitLab


From 99ccc560b73ff7381153dc1391d18391373931d3 Mon Sep 17 00:00:00 2001
From: Guillaume Munch <diabo@free.fr>
Date: Wed, 16 Aug 2006 19:35:12 +0200
Subject: [PATCH 0936/1063] [ALSA] Add support for Sony Vaio AR 11B

This patch adds automatic detection for Sigmatel ID 7664,
the sound chip in Sony Vaio AR 11B (european name).
- patch_stac7661 becomes patch_stac766x
- .id = 0x83847664 is added

Signed-off-by: Guillaume Munch <diabo@free.fr>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 .../sound/alsa/ALSA-Configuration.txt         |  6 ++--
 sound/pci/hda/patch_sigmatel.c                | 29 ++++++++++---------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 74be228596ade..d7e95f1445693 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -856,10 +856,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 	  3stack-dig	ditto with SPDIF
 	  laptop	3-jack with hp-jack automute
 	  laptop-dig	ditto with SPDIF
-	  auto		auto-confgi reading BIOS (default)
+	  auto		auto-config reading BIOS (default)
 
-	STAC7661(?)
-	  vaio		Setup for VAIO FE550G/SZ110
+	STAC7664/7661(?)
+	  vaio		Setup for VAIO FE550G/SZ110/AR11B
 
     If the default configuration doesn't work and one of the above
     matches with your device, report it together with the PCI
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index d572f030c3e95..7eaf755b014ba 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1449,10 +1449,10 @@ static int patch_stac9205(struct hda_codec *codec)
 }
 
 /*
- * STAC 7661(?) hack
+ * STAC 7661(?) and 7664 hack
  */
 
-/* static config for Sony VAIO FE550G */
+/* static config for Sony VAIO FE550G and Sony VAIO AR */
 static hda_nid_t vaio_dacs[] = { 0x2 };
 #define VAIO_HP_DAC	0x5
 static hda_nid_t vaio_adcs[] = { 0x8 /*,0x6*/ };
@@ -1552,7 +1552,7 @@ static struct snd_kcontrol_new vaio_mixer[] = {
 	{}
 };
 
-static struct hda_codec_ops stac7661_patch_ops = {
+static struct hda_codec_ops stac766x_patch_ops = {
 	.build_controls = stac92xx_build_controls,
 	.build_pcms = stac92xx_build_pcms,
 	.init = stac92xx_init,
@@ -1562,23 +1562,25 @@ static struct hda_codec_ops stac7661_patch_ops = {
 #endif
 };
 
-enum { STAC7661_VAIO };
+enum { STAC766x_VAIO };
 
-static struct hda_board_config stac7661_cfg_tbl[] = {
-	{ .modelname = "vaio", .config = STAC7661_VAIO },
+static struct hda_board_config stac766x_cfg_tbl[] = {
+	{ .modelname = "vaio", .config = STAC766x_VAIO },
 	{ .pci_subvendor = 0x104d, .pci_subdevice = 0x81e6,
-	  .config = STAC7661_VAIO },
+	  .config = STAC766x_VAIO },
 	{ .pci_subvendor = 0x104d, .pci_subdevice = 0x81ef,
-	  .config = STAC7661_VAIO },
+	  .config = STAC766x_VAIO },
+	{ .pci_subvendor = 0x104d, .pci_subdevice = 0x81fd,
+	  .config = STAC766x_VAIO },
 	{}
 };
 
-static int patch_stac7661(struct hda_codec *codec)
+static int patch_stac766x(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec;
 	int board_config;
 
-	board_config = snd_hda_check_board_config(codec, stac7661_cfg_tbl);
+	board_config = snd_hda_check_board_config(codec, stac766x_cfg_tbl);
 	if (board_config < 0)
 		/* unknown config, let generic-parser do its job... */
 		return snd_hda_parse_generic_codec(codec);
@@ -1589,7 +1591,7 @@ static int patch_stac7661(struct hda_codec *codec)
 
 	codec->spec = spec;
 	switch (board_config) {
-	case STAC7661_VAIO:
+	case STAC766x_VAIO:
 		spec->mixer = vaio_mixer;
 		spec->init = vaio_init;
 		spec->multiout.max_channels = 2;
@@ -1603,7 +1605,7 @@ static int patch_stac7661(struct hda_codec *codec)
 		break;
 	}
 
-	codec->patch_ops = stac7661_patch_ops;
+	codec->patch_ops = stac766x_patch_ops;
 	return 0;
 }
 
@@ -1635,7 +1637,7 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = {
  	{ .id = 0x83847627, .name = "STAC9271D", .patch = patch_stac927x },
  	{ .id = 0x83847628, .name = "STAC9274X5NH", .patch = patch_stac927x },
  	{ .id = 0x83847629, .name = "STAC9274D5NH", .patch = patch_stac927x },
- 	{ .id = 0x83847661, .name = "STAC7661", .patch = patch_stac7661 },
+ 	{ .id = 0x83847661, .name = "STAC7661", .patch = patch_stac766x },
  	{ .id = 0x838476a0, .name = "STAC9205", .patch = patch_stac9205 },
  	{ .id = 0x838476a1, .name = "STAC9205D", .patch = patch_stac9205 },
  	{ .id = 0x838476a2, .name = "STAC9204", .patch = patch_stac9205 },
@@ -1644,5 +1646,6 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = {
  	{ .id = 0x838476a5, .name = "STAC9255D", .patch = patch_stac9205 },
  	{ .id = 0x838476a6, .name = "STAC9254", .patch = patch_stac9205 },
  	{ .id = 0x838476a7, .name = "STAC9254D", .patch = patch_stac9205 },
+ 	{ .id = 0x83847664, .name = "STAC7664", .patch = patch_stac766x },
 	{} /* terminator */
 };
-- 
GitLab


From 7cf0a95310f21f3c986288a483801b1d5694dee1 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 17 Aug 2006 16:23:07 +0200
Subject: [PATCH 0937/1063] [ALSA] Fix compile errors with older gcc

Fixed compile errors with older gcc for initialization of a union.
sound/pci/ca0106/ca0106_mixer.c: At top level:
sound/pci/ca0106/ca0106_mixer.c:499: unknown field 'p' specified in initializer
sound/pci/ca0106/ca0106_mixer.c:499: warning: missing braces around initializer
sound/pci/ca0106/ca0106_mixer.c:499: warning: (near initialization for 'snd_ca0106_volume_ctls[0].tlv')

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ca0106/ca0106_mixer.c | 4 ++--
 sound/pci/emu10k1/p16v.c        | 2 +-
 sound/pci/hda/hda_local.h       | 2 +-
 sound/pci/hda/patch_analog.c    | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c
index 6d64438cecc92..9855f528ea781 100644
--- a/sound/pci/ca0106/ca0106_mixer.c
+++ b/sound/pci/ca0106/ca0106_mixer.c
@@ -478,7 +478,7 @@ static int snd_ca0106_i2c_volume_put(struct snd_kcontrol *kcontrol,
 	.info =	 snd_ca0106_volume_info,			\
 	.get =   snd_ca0106_volume_get,				\
 	.put =   snd_ca0106_volume_put,				\
-	.tlv.p = snd_ca0106_db_scale1,				\
+	.tlv = { .p = snd_ca0106_db_scale1 },			\
 	.private_value = ((chid) << 8) | (reg)			\
 }
 
@@ -490,7 +490,7 @@ static int snd_ca0106_i2c_volume_put(struct snd_kcontrol *kcontrol,
 	.info =  snd_ca0106_i2c_volume_info,			\
 	.get =   snd_ca0106_i2c_volume_get,			\
 	.put =   snd_ca0106_i2c_volume_put,			\
-	.tlv.p = snd_ca0106_db_scale2,				\
+	.tlv = { .p = snd_ca0106_db_scale2 },			\
 	.private_value = chid					\
 }
 
diff --git a/sound/pci/emu10k1/p16v.c b/sound/pci/emu10k1/p16v.c
index 1e44714b86236..4e0f95438f47e 100644
--- a/sound/pci/emu10k1/p16v.c
+++ b/sound/pci/emu10k1/p16v.c
@@ -794,7 +794,7 @@ static DECLARE_TLV_DB_SCALE(snd_p16v_db_scale1, -5175, 25, 1);
 	.info = snd_p16v_volume_info, \
 	.get = snd_p16v_volume_get, \
 	.put = snd_p16v_volume_put, \
-	.tlv.p = snd_p16v_db_scale1, \
+	.tlv = { .p = snd_p16v_db_scale1 },	\
 	.private_value = ((xreg) | ((xhl) << 8)) \
 }
 
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index 0f0ae685a9c17..ff24266fe353b 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -36,7 +36,7 @@
 	  .info = snd_hda_mixer_amp_volume_info, \
 	  .get = snd_hda_mixer_amp_volume_get, \
 	  .put = snd_hda_mixer_amp_volume_put, \
-	  .tlv.c = snd_hda_mixer_amp_tlv, \
+	  .tlv = { .c = snd_hda_mixer_amp_tlv },		\
 	  .private_value = HDA_COMPOSE_AMP_VAL(nid, channel, xindex, direction) }
 /* stereo volume with index */
 #define HDA_CODEC_VOLUME_IDX(xname, xcidx, nid, xindex, direction) \
diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 077f1ce01ee11..043256c67d1f6 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -507,7 +507,7 @@ static struct snd_kcontrol_new ad1986a_mixers[] = {
 		.info = ad1986a_pcm_amp_vol_info,
 		.get = ad1986a_pcm_amp_vol_get,
 		.put = ad1986a_pcm_amp_vol_put,
-		.tlv.c = ad1986a_pcm_amp_tlv,
+		.tlv = { .c = ad1986a_pcm_amp_tlv },
 		.private_value = HDA_COMPOSE_AMP_VAL(AD1986A_FRONT_DAC, 3, 0, HDA_OUTPUT)
 	},
 	{
-- 
GitLab


From 5fc3a2b250716b34ca7c0128475bbedf795f1ac2 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Thu, 17 Aug 2006 16:58:45 +0200
Subject: [PATCH 0938/1063] [ALSA] sparc dbri: removal of unused struct members

It removes unused or rarely used members of defined structures.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 35 ++++++-----------------------------
 1 file changed, 6 insertions(+), 29 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 4651ff5135137..66b4d45cf8bfd 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -34,7 +34,7 @@
  * (the second one is a monitor/tee pipe, valid only for serial input).
  *
  * The mmcodec is connected via the CHI bus and needs the data & some
- * parameters (volume, balance, output selection) timemultiplexed in 8 byte
+ * parameters (volume, output selection) timemultiplexed in 8 byte
  * chunks. It also has a control mode, which serves for audio format setting.
  *
  * Looking at the CS4215 data sheet it is easy to set up 2 or 4 codecs on
@@ -274,9 +274,7 @@ enum in_or_out { PIPEinput, PIPEoutput };
 
 struct dbri_pipe {
 	u32 sdp;		/* SDP command word */
-	enum in_or_out direction;
 	int nextpipe;		/* Next pipe in linked list */
-	int prevpipe;
 	int cycle;		/* Offset of timeslot (bits) */
 	int length;		/* Length of timeslot (bits) */
 	int first_desc;		/* Index of first descriptor */
@@ -300,13 +298,11 @@ struct dbri_streaminfo {
 	int pipe;		/* Data pipe used                 */
 	int left_gain;		/* mixer elements                 */
 	int right_gain;
-	int balance;
 };
 
 /* This structure holds the information for both chips (DBRI & CS4215) */
 struct snd_dbri {
 	struct snd_card *card;	/* ALSA card */
-	struct snd_pcm *pcm;
 
 	int regs_size, irq;	/* Needed for unload */
 	struct sbus_dev *sdev;	/* SBUS device info */
@@ -316,7 +312,6 @@ struct snd_dbri {
 	u32 dma_dvma;		/* DBRI visible DMA address */
 
 	void __iomem *regs;	/* dbri HW regs */
-	int dbri_version;	/* 'e' and up is OK */
 	int dbri_irqp;		/* intr queue pointer */
 	int wait_send;		/* sequence of command buffers send */
 	int wait_ackd;		/* sequence of command buffers acknowledged */
@@ -337,8 +332,6 @@ struct snd_dbri {
 
 #define DBRI_MAX_VOLUME		63	/* Output volume */
 #define DBRI_MAX_GAIN		15	/* Input gain */
-#define DBRI_RIGHT_BALANCE	255
-#define DBRI_MID_BALANCE	(DBRI_RIGHT_BALANCE >> 1)
 
 /* DBRI Reg0 - Status Control Register - defines. (Page 17) */
 #define D_P		(1<<15)	/* Program command & queue pointer valid */
@@ -841,10 +834,6 @@ static void setup_pipe(struct snd_dbri * dbri, int pipe, int sdp)
 	dbri->pipes[pipe].sdp = sdp;
 	dbri->pipes[pipe].desc = -1;
 	dbri->pipes[pipe].first_desc = -1;
-	if (sdp & D_SDP_TO_SER)
-		dbri->pipes[pipe].direction = PIPEoutput;
-	else
-		dbri->pipes[pipe].direction = PIPEinput;
 
 	reset_pipe(dbri, pipe);
 }
@@ -1363,14 +1352,6 @@ static void cs4215_setdata(struct snd_dbri * dbri, int muted)
 		int left_gain = info->left_gain % 64;
 		int right_gain = info->right_gain % 64;
 
-		if (info->balance < DBRI_MID_BALANCE) {
-			right_gain *= info->balance;
-			right_gain /= DBRI_MID_BALANCE;
-		} else {
-			left_gain *= DBRI_RIGHT_BALANCE - info->balance;
-			left_gain /= DBRI_MID_BALANCE;
-		}
-
 		dbri->mm.data[0] &= ~0x3f;	/* Reset the volume bits */
 		dbri->mm.data[1] &= ~0x3f;
 		dbri->mm.data[0] |= (DBRI_MAX_VOLUME - left_gain);
@@ -2233,7 +2214,6 @@ static int __devinit snd_dbri_pcm(struct snd_dbri * dbri)
 	pcm->private_data = dbri;
 	pcm->info_flags = 0;
 	strcpy(pcm->name, dbri->card->shortname);
-	dbri->pcm = pcm;
 
 	if ((err = snd_pcm_lib_preallocate_pages_for_all(pcm,
 			SNDRV_DMA_TYPE_CONTINUOUS,
@@ -2452,7 +2432,6 @@ static int __init snd_dbri_mixer(struct snd_dbri * dbri)
 	for (idx = DBRI_REC; idx < DBRI_NO_STREAMS; idx++) {
 		dbri->stream_info[idx].left_gain = 0;
 		dbri->stream_info[idx].right_gain = 0;
-		dbri->stream_info[idx].balance = DBRI_MID_BALANCE;
 	}
 
 	return 0;
@@ -2484,12 +2463,11 @@ static void dbri_debug_read(struct snd_info_entry * entry,
 			struct dbri_pipe *pptr = &dbri->pipes[pipe];
 			snd_iprintf(buffer,
 				    "Pipe %d: %s SDP=0x%x desc=%d, "
-				    "len=%d @ %d prev: %d next %d\n",
+				    "len=%d @ %d next %d\n",
 				    pipe,
-				    (pptr->direction ==
-				     PIPEinput ? "input" : "output"), pptr->sdp,
-				    pptr->desc, pptr->length, pptr->cycle,
-				    pptr->prevpipe, pptr->nextpipe);
+				   ((pptr->sdp & D_SDP_TO_SER) ? "output" : "input"),
+				    pptr->sdp, pptr->desc,
+				    pptr->length, pptr->cycle, pptr->nextpipe);
 		}
 	}
 }
@@ -2528,7 +2506,6 @@ static int __init snd_dbri_create(struct snd_card *card,
 	dbri->card = card;
 	dbri->sdev = sdev;
 	dbri->irq = irq->pri;
-	dbri->dbri_version = sdev->prom_name[9];
 
 	dbri->dma = sbus_alloc_consistent(sdev, sizeof(struct dbri_dma),
 					  &dbri->dma_dvma);
@@ -2648,7 +2625,7 @@ static int __init dbri_attach(int prom_node, struct sbus_dev *sdev)
 
 	printk(KERN_INFO "audio%d at %p (irq %d) is DBRI(%c)+CS4215(%d)\n",
 	       dev, dbri->regs,
-	       dbri->irq, dbri->dbri_version, dbri->mm.version);
+	       dbri->irq, sdev->prom_name[9], dbri->mm.version);
 	dev++;
 
 	return 0;
-- 
GitLab


From 16727d94adf9a1376775fd34d982778c7f3506df Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Thu, 17 Aug 2006 16:59:28 +0200
Subject: [PATCH 0939/1063] [ALSA] sparc dbri: removal of redudant volatile
 keywords

It removes redudant volatile keywords.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 66b4d45cf8bfd..405c603717b67 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -252,8 +252,8 @@ static struct {
 /* One transmit/receive descriptor */
 struct dbri_mem {
 	volatile __u32 word1;
-	volatile __u32 ba;	/* Transmit/Receive Buffer Address */
-	volatile __u32 nda;	/* Next Descriptor Address */
+	__u32 ba;	/* Transmit/Receive Buffer Address */
+	__u32 nda;	/* Next Descriptor Address */
 	volatile __u32 word4;
 };
 
@@ -308,7 +308,7 @@ struct snd_dbri {
 	struct sbus_dev *sdev;	/* SBUS device info */
 	spinlock_t lock;
 
-	volatile struct dbri_dma *dma;	/* Pointer to our DMA block */
+	struct dbri_dma *dma;	/* Pointer to our DMA block */
 	u32 dma_dvma;		/* DBRI visible DMA address */
 
 	void __iomem *regs;	/* dbri HW regs */
-- 
GitLab


From e05d696424f21b59eccff35d04938f0d6588cd94 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 17 Aug 2006 17:12:19 +0200
Subject: [PATCH 0940/1063] [ALSA] Fix some typos in snd-dummy driver

Fixed some typos in snd-dummy driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/drivers/dummy.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index ffeafaf2eccae..73b16134a4342 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -285,7 +285,7 @@ static struct snd_pcm_hardware snd_card_dummy_playback =
 	.channels_max =		USE_CHANNELS_MAX,
 	.buffer_bytes_max =	MAX_BUFFER_SIZE,
 	.period_bytes_min =	64,
-	.period_bytes_max =	MAX_BUFFER_SIZE,
+	.period_bytes_max =	MAX_PERIOD_SIZE,
 	.periods_min =		USE_PERIODS_MIN,
 	.periods_max =		USE_PERIODS_MAX,
 	.fifo_size =		0,
@@ -547,13 +547,13 @@ static struct snd_kcontrol_new snd_dummy_controls[] = {
 DUMMY_VOLUME("Master Volume", 0, MIXER_ADDR_MASTER),
 DUMMY_CAPSRC("Master Capture Switch", 0, MIXER_ADDR_MASTER),
 DUMMY_VOLUME("Synth Volume", 0, MIXER_ADDR_SYNTH),
-DUMMY_CAPSRC("Synth Capture Switch", 0, MIXER_ADDR_MASTER),
+DUMMY_CAPSRC("Synth Capture Switch", 0, MIXER_ADDR_SYNTH),
 DUMMY_VOLUME("Line Volume", 0, MIXER_ADDR_LINE),
-DUMMY_CAPSRC("Line Capture Switch", 0, MIXER_ADDR_MASTER),
+DUMMY_CAPSRC("Line Capture Switch", 0, MIXER_ADDR_LINE),
 DUMMY_VOLUME("Mic Volume", 0, MIXER_ADDR_MIC),
-DUMMY_CAPSRC("Mic Capture Switch", 0, MIXER_ADDR_MASTER),
+DUMMY_CAPSRC("Mic Capture Switch", 0, MIXER_ADDR_MIC),
 DUMMY_VOLUME("CD Volume", 0, MIXER_ADDR_CD),
-DUMMY_CAPSRC("CD Capture Switch", 0, MIXER_ADDR_MASTER)
+DUMMY_CAPSRC("CD Capture Switch", 0, MIXER_ADDR_CD)
 };
 
 static int __init snd_card_dummy_new_mixer(struct snd_dummy *dummy)
-- 
GitLab


From c256652466127872f1b2e510431dc25524ba40ba Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 17 Aug 2006 18:21:36 +0200
Subject: [PATCH 0941/1063] [ALSA] Add missing TLV callbacks for HD-audio
 codecs

Added missing TLV callbacks for HD-audio codec supports.
Also cleaned up the tlv callback for ad1986a (no mutex is needed there).

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_analog.c   | 16 ++--------------
 sound/pci/hda/patch_realtek.c  |  1 +
 sound/pci/hda/patch_sigmatel.c |  1 +
 3 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 043256c67d1f6..71abc2aa61a60 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -452,19 +452,6 @@ static int ad1986a_pcm_amp_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl
 	return change;
 }
 
-static int ad1986a_pcm_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag,
-			       unsigned int size, unsigned int __user *_tlv)
-{
-	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-	struct ad198x_spec *ad = codec->spec;
-
-	mutex_lock(&ad->amp_mutex);
-	snd_hda_mixer_amp_tlv(kcontrol, op_flag, size, _tlv);
-	mutex_unlock(&ad->amp_mutex);
-	return 0;
-}
-
-
 #define ad1986a_pcm_amp_sw_info		snd_hda_mixer_amp_switch_info
 
 static int ad1986a_pcm_amp_sw_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
@@ -507,7 +494,7 @@ static struct snd_kcontrol_new ad1986a_mixers[] = {
 		.info = ad1986a_pcm_amp_vol_info,
 		.get = ad1986a_pcm_amp_vol_get,
 		.put = ad1986a_pcm_amp_vol_put,
-		.tlv = { .c = ad1986a_pcm_amp_tlv },
+		.tlv = { .c = snd_hda_mixer_amp_tlv },
 		.private_value = HDA_COMPOSE_AMP_VAL(AD1986A_FRONT_DAC, 3, 0, HDA_OUTPUT)
 	},
 	{
@@ -654,6 +641,7 @@ static struct snd_kcontrol_new ad1986a_laptop_eapd_mixers[] = {
 		.info = snd_hda_mixer_amp_volume_info,
 		.get = snd_hda_mixer_amp_volume_get,
 		.put = ad1986a_laptop_master_vol_put,
+		.tlv = { .c = snd_hda_mixer_amp_tlv },
 		.private_value = HDA_COMPOSE_AMP_VAL(0x1a, 3, 0, HDA_OUTPUT),
 	},
 	{
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index f857e963ff452..79d361260b277 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5540,6 +5540,7 @@ static struct snd_kcontrol_new alc262_fujitsu_mixer[] = {
 		.info = snd_hda_mixer_amp_volume_info,
 		.get = snd_hda_mixer_amp_volume_get,
 		.put = alc262_fujitsu_master_vol_put,
+		.tlv = { .c = snd_hda_mixer_amp_tlv },
 		.private_value = HDA_COMPOSE_AMP_VAL(0x0c, 3, 0, HDA_OUTPUT),
 	},
 	{
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 7eaf755b014ba..887b52e96ec43 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1528,6 +1528,7 @@ static struct snd_kcontrol_new vaio_mixer[] = {
 		.info = snd_hda_mixer_amp_volume_info,
 		.get = snd_hda_mixer_amp_volume_get,
 		.put = vaio_master_vol_put,
+		.tlv = { .c = snd_hda_mixer_amp_tlv },
 		.private_value = HDA_COMPOSE_AMP_VAL(0x02, 3, 0, HDA_OUTPUT),
 	},
 	{
-- 
GitLab


From adf75dcab1deb9625538f74906508c1f6136fd98 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Fri, 18 Aug 2006 09:03:45 +0200
Subject: [PATCH 0942/1063] [ALSA] riptide: fix compile errors with older gcc

Change the syntax of a union initialization that is not understood by
gcc 2.x.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/riptide/riptide.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c
index f435fcd6dca99..fe210c853442c 100644
--- a/sound/pci/riptide/riptide.c
+++ b/sound/pci/riptide/riptide.c
@@ -673,9 +673,13 @@ static struct lbuspath lbus_rec_path = {
 #define FIRMWARE_VERSIONS 1
 static union firmware_version firmware_versions[] = {
 	{
-	 .firmware.ASIC = 3,.firmware.CODEC = 2,
-	 .firmware.AUXDSP = 3,.firmware.PROG = 773,
-	 },
+		.firmware = {
+			.ASIC = 3,
+			.CODEC = 2,
+			.AUXDSP = 3,
+			.PROG = 773,
+		},
+	},
 };
 
 static u32 atoh(unsigned char *in, unsigned int len)
-- 
GitLab


From 2aaeee8bd1cf51b6ed7c751a8472cb77f3ddc642 Mon Sep 17 00:00:00 2001
From: Tobin Davis <tobinx.b.davis@intel.com>
Date: Mon, 21 Aug 2006 19:01:12 +0200
Subject: [PATCH 0943/1063] [ALSA] hda-codec - add missing device ids

This patch adds missing device ids for Intel 915 and D102GGC
motherboards.

Signed-off-by: Tobin Davis <tobinx.b.davis@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_realtek.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 79d361260b277..53aa57f5a1a11 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2143,7 +2143,10 @@ static struct hda_board_config alc880_cfg_tbl[] = {
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe20f, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe210, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe211, .config = ALC880_3ST },
+	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe212, .config = ALC880_3ST },
+	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe213, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe214, .config = ALC880_3ST },
+	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe234, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe302, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe303, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe304, .config = ALC880_3ST },
@@ -5058,6 +5061,8 @@ static struct hda_board_config alc883_cfg_tbl[] = {
 	{ .modelname = "3stack-6ch", .config = ALC883_3ST_6ch },
 	{ .pci_subvendor = 0x108e, .pci_subdevice = 0x534d,
 	  .config = ALC883_3ST_6ch },
+        { .pci_subvendor = 0x8086, .pci_subdevice = 0xd601,
+          .config = ALC883_3ST_6ch }, /* D102GGC */
 	{ .modelname = "6stack-dig", .config = ALC883_6ST_DIG },
 	{ .pci_subvendor = 0x1462, .pci_subdevice = 0x6668,
 	  .config = ALC883_6ST_DIG }, /* MSI  */
-- 
GitLab


From 68a6abd97f8b9aa072e36b1901531e7bb69b6efc Mon Sep 17 00:00:00 2001
From: Tobin Davis <tobinx.b.davis@intel.com>
Date: Mon, 21 Aug 2006 19:02:10 +0200
Subject: [PATCH 0944/1063] [ALSA] hda-codec - Fix headphone output for some
 Intel 945 systems

This patch enables headphone output at initialization for Intel
945 based systems that don't have proper detection circuitry.

Signed-off-by: Tobin Davis <tobinx.b.davis@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_sigmatel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 887b52e96ec43..d709389c4f615 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1143,6 +1143,8 @@ static int stac92xx_init(struct hda_codec *codec)
 				STAC_UNSOL_ENABLE);
 		/* fake event to set up pins */
 		codec->patch_ops.unsol_event(codec, STAC_HP_EVENT << 26);
+		/* enable the headphones by default.  If/when unsol_event detection works, this will be ignored */
+		stac92xx_auto_init_hp_out(codec);
 	} else {
 		stac92xx_auto_init_multi_out(codec);
 		stac92xx_auto_init_hp_out(codec);
-- 
GitLab


From 79cf0d376fbf1cdf8e9c7c70c3a7c7434a716879 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 22 Aug 2006 16:35:19 +0200
Subject: [PATCH 0945/1063] [ALSA] Fix missing selection of CONFIG_VIDEO_DEV
 from SND_FM801_TEA575X

Fixed the missing selection of CONFIG_VIDEO_DEV from SND_FM801_TEA575X.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index e49c0fe21b0d1..dffb6be768008 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -475,6 +475,7 @@ config SND_FM801_TEA575X
 	depends on SND_FM801_TEA575X_BOOL
 	default SND_FM801
 	select VIDEO_V4L1
+	select VIDEO_DEV
 
 config SND_HDA_INTEL
 	tristate "Intel HD Audio"
-- 
GitLab


From 948a4db217235ba51c41d8e7c2ffcf9432e57274 Mon Sep 17 00:00:00 2001
From: Tobin Davis <tobinx.b.davis@intel.com>
Date: Tue, 22 Aug 2006 19:43:46 +0200
Subject: [PATCH 0946/1063] [ALSA] hda-codec - add missing device ids for Intel
 945 boards

This patch adds missing device ids for Intel 945 motherboards.


Signed-off-by: Tobin Davis <tobinx.b.davis@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_sigmatel.c | 66 ++++++++++++++++++++++++++++++----
 1 file changed, 60 insertions(+), 6 deletions(-)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index d709389c4f615..7b29288690cb3 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -396,19 +396,53 @@ static struct hda_board_config stac922x_cfg_tbl[] = {
 	  .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2668,	/* DFI LanParty */
 	  .config = STAC_REF },		/* SigmaTel reference board */
+         /* Intel 945G based systems */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x0101,
 	  .config = STAC_D945GTP3 },	/* Intel D945GTP - 3 Stack */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x0202,
-	  .config = STAC_D945GTP3 },	/* Intel D945GNT - 3 Stack, 9221 A1 */
+	  .config = STAC_D945GTP3 },	/* Intel D945GNT - 3 Stack */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
-	  .pci_subdevice = 0x0b0b,
-	  .config = STAC_D945GTP3 },	/* Intel D945PSN - 3 Stack, 9221 A1 */
+	  .pci_subdevice = 0x0606,
+	  .config = STAC_D945GTP3 },	/* Intel D945GTP - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0601,
+	  .config = STAC_D945GTP3 },	/* Intel D945GTP - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0111,
+	  .config = STAC_D945GTP3 },	/* Intel D945GZP - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x1115,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x1116,
+	  .config = STAC_D945GTP3 },	/* Intel D945GBO - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x1117,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x1118,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x1119,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x8826,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x5049,
+	  .config = STAC_D945GTP3 },	/* Intel D945GCZ - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x5055,
+	  .config = STAC_D945GTP3 },	/* Intel D945GCZ - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x5048,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPB - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0110,
+	  .config = STAC_D945GTP3 },	/* Intel D945GLR - 3 Stack */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
-	  .pci_subdevice = 0x0707,
-	  .config = STAC_D945GTP5 },	/* Intel D945PSV - 5 Stack */
-       { .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x0404,
 	  .config = STAC_D945GTP5 },	/* Intel D945GTP - 5 Stack */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
@@ -420,6 +454,26 @@ static struct hda_board_config stac922x_cfg_tbl[] = {
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x0417,
 	  .config = STAC_D945GTP5 },	/* Intel D975XBK - 5 Stack */
+	  /* Intel 945P based systems */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0b0b,
+	  .config = STAC_D945GTP3 },	/* Intel D945PSN - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0112,
+	  .config = STAC_D945GTP3 },	/* Intel D945PLN - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0d0d,
+	  .config = STAC_D945GTP3 },	/* Intel D945PLM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0909,
+	  .config = STAC_D945GTP3 },	/* Intel D945PAW - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0505,
+	  .config = STAC_D945GTP3 },	/* Intel D945PLM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0707,
+	  .config = STAC_D945GTP5 },	/* Intel D945PSV - 5 Stack */
+	  /* other systems  */
 	{ .pci_subvendor = 0x8384,
 	  .pci_subdevice = 0x7680,
 	  .config = STAC_MACMINI },	/* Apple Mac Mini (early 2006) */
-- 
GitLab


From 81d3dbde76eedcd3ede8a73eb72790d67fa254a9 Mon Sep 17 00:00:00 2001
From: Tobin Davis <tobinx.b.davis@intel.com>
Date: Tue, 22 Aug 2006 19:44:45 +0200
Subject: [PATCH 0947/1063] [ALSA] hda-codec - Add support for new Intel boards
 with Stac9227 codec

This patch adds full 5.1 audio support for Intel boards
with the SigmaTel 9227 codec chip (946, 963, 965 series).


Signed-off-by: Tobin Davis <tobinx.b.davis@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_sigmatel.c | 126 ++++++++++++++++++++++++---------
 1 file changed, 92 insertions(+), 34 deletions(-)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 7b29288690cb3..73ca566e9eb75 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -377,18 +377,11 @@ static unsigned int d945gtp5_pin_configs[10] = {
 	0x02a19320, 0x40000100,
 };
 
-static unsigned int d965_2112_pin_configs[10] = {
-	0x0221401f, 0x40000100, 0x40000100, 0x01014011,
-	0x01a19021, 0x01813024, 0x01452130, 0x40000100,
-	0x02a19320, 0x40000100,
-};
-
 static unsigned int *stac922x_brd_tbl[STAC_922X_MODELS] = {
 	[STAC_REF] =	ref922x_pin_configs,
 	[STAC_D945GTP3] = d945gtp3_pin_configs,
 	[STAC_D945GTP5] = d945gtp5_pin_configs,
 	[STAC_MACMINI] = d945gtp5_pin_configs,
-	[STAC_D965_2112] = d965_2112_pin_configs,
 };
 
 static struct hda_board_config stac922x_cfg_tbl[] = {
@@ -493,8 +486,16 @@ static unsigned int ref927x_pin_configs[14] = {
 	0x01c41030, 0x40000100,
 };
 
+static unsigned int d965_2112_pin_configs[14] = {
+	0x0221401f, 0x02a19120, 0x40000100, 0x01014011,
+	0x01a19021, 0x01813024, 0x40000100, 0x40000100,
+	0x40000100, 0x40000100, 0x40000100, 0x40000100,
+	0x40000100, 0x40000100
+};
+
 static unsigned int *stac927x_brd_tbl[] = {
-	ref927x_pin_configs,
+	[STAC_REF] =	ref927x_pin_configs,
+	[STAC_D965_2112] = d965_2112_pin_configs,
 };
 
 static struct hda_board_config stac927x_cfg_tbl[] = {
@@ -502,6 +503,66 @@ static struct hda_board_config stac927x_cfg_tbl[] = {
 	  .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2668,	/* DFI LanParty */
 	  .config = STAC_REF },		/* SigmaTel reference board */
+	/* SigmaTel 9227 reference board */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x284b,
+	  .config = STAC_D965_284B },
+	 /* Intel 946 based systems */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x3d01,
+	  .config = STAC_D965_2112 }, /* D946  configuration */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0xa301,
+	  .config = STAC_D965_2112 }, /* Intel D946GZT - 3 stack  */
+	/* 965 based systems */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2116,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2115,
+	  .config = STAC_D965_2112 }, /* Intel DQ965WC - 3 Stack  */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2114,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2113,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2112,
+	  .config = STAC_D965_2112 }, /* Intel DG965MS - 3 Stack  */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2111,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2110,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2009,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2008,
+	  .config = STAC_D965_2112 }, /* Intel DQ965GF - 3 Stack  */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2007,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2006,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2005,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2004,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2003,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2002,
+	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2001,
+	  .config = STAC_D965_2112 }, /* Intel DQ965GF - 3 Stackg */
 	{} /* terminator */
 };
 
@@ -1391,25 +1452,6 @@ static int patch_stac922x(struct hda_codec *codec)
 
 	spec->multiout.dac_nids = spec->dac_nids;
 	
-	switch (spec->board_config) {
-	case STAC_D965_2112:
-		spec->adc_nids = stac9227_adc_nids;
-		spec->mux_nids = stac9227_mux_nids;
-#if 0
-		spec->multiout.dac_nids = d965_2112_dac_nids;
-		spec->multiout.num_dacs = ARRAY_SIZE(d965_2112_dac_nids);
-#endif
-		spec->init = d965_2112_core_init;
-		spec->mixer = stac9227_mixer;
-		break;
-	case STAC_D965_284B:
-		spec->adc_nids = stac9227_adc_nids;
-		spec->mux_nids = stac9227_mux_nids;
-		spec->init = stac9227_core_init;
-		spec->mixer = stac9227_mixer;
-		break;
-	}
-
 	err = stac92xx_parse_auto_config(codec, 0x08, 0x09);
 	if (err < 0) {
 		stac92xx_free(codec);
@@ -1437,19 +1479,35 @@ static int patch_stac927x(struct hda_codec *codec)
 	spec->board_config = snd_hda_check_board_config(codec, stac927x_cfg_tbl);
 	if (spec->board_config < 0)
                 snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC927x, using BIOS defaults\n");
-	else {
+	else if (stac927x_brd_tbl[spec->board_config] != NULL) {
 		spec->num_pins = 14;
 		spec->pin_nids = stac927x_pin_nids;
 		spec->pin_configs = stac927x_brd_tbl[spec->board_config];
 		stac92xx_set_config_regs(codec);
 	}
 
-	spec->adc_nids = stac927x_adc_nids;
-	spec->mux_nids = stac927x_mux_nids;
-	spec->num_muxes = 3;
-
-	spec->init = stac927x_core_init;
-	spec->mixer = stac927x_mixer;
+	switch (spec->board_config) {
+	case STAC_D965_2112:
+		spec->adc_nids = stac927x_adc_nids;
+		spec->mux_nids = stac927x_mux_nids;
+		spec->num_muxes = 3;
+		spec->init = d965_2112_core_init;
+		spec->mixer = stac9227_mixer;
+		break;
+	case STAC_D965_284B:
+		spec->adc_nids = stac9227_adc_nids;
+		spec->mux_nids = stac9227_mux_nids;
+		spec->num_muxes = 2;
+		spec->init = stac9227_core_init;
+		spec->mixer = stac9227_mixer;
+		break;
+	default:
+		spec->adc_nids = stac927x_adc_nids;
+		spec->mux_nids = stac927x_mux_nids;
+		spec->num_muxes = 3;
+		spec->init = stac927x_core_init;
+		spec->mixer = stac927x_mixer;
+	}
 
 	spec->multiout.dac_nids = spec->dac_nids;
 
-- 
GitLab


From e96224ae974844d3f4e84f927ca4b17f1a2079a3 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 21 Aug 2006 17:57:44 +0200
Subject: [PATCH 0948/1063] [ALSA] hda-intel - Switch to polling mode for
 CORB/RIRB communication

Automatically switch to polling mode for CORB/RIRB communication
if the irq-driven mode seems not working well.  If the polling
mode still doesn't work, switch to single_cmd mode as fallback.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_intel.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 79d63c99f0924..ce75e07aaa2a7 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -332,6 +332,7 @@ struct azx {
 	int position_fix;
 	unsigned int initialized: 1;
 	unsigned int single_cmd: 1;
+	unsigned int polling_mode: 1;
 };
 
 /* driver types */
@@ -518,8 +519,23 @@ static unsigned int azx_rirb_get_response(struct hda_codec *codec)
 	struct azx *chip = codec->bus->private_data;
 	int timeout = 50;
 
-	while (chip->rirb.cmds) {
+	for (;;) {
+		if (chip->polling_mode) {
+			spin_lock_irq(&chip->reg_lock);
+			azx_update_rirb(chip);
+			spin_unlock_irq(&chip->reg_lock);
+		}
+		if (! chip->rirb.cmds)
+			break;
 		if (! --timeout) {
+			if (! chip->polling_mode) {
+				snd_printk(KERN_WARNING "hda_intel: "
+					   "azx_get_response timeout, "
+					   "switching to polling mode...\n");
+				chip->polling_mode = 1;
+				timeout = 50;
+				continue;
+			}
 			snd_printk(KERN_ERR
 				   "hda_intel: azx_get_response timeout, "
 				   "switching to single_cmd mode...\n");
-- 
GitLab


From c27354460b1e0cbcd9dfc9232a76bd56c46dce89 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Mon, 21 Aug 2006 19:27:35 +0200
Subject: [PATCH 0949/1063] [ALSA] sparc dbri: removal of dri_desc struct

The structure is in big part redudant.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 40 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 405c603717b67..0b8545ad3e9a0 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -250,6 +250,7 @@ static struct {
 #define DBRI_NO_STREAMS	2
 
 /* One transmit/receive descriptor */
+/* When ba != 0 descriptor is used */
 struct dbri_mem {
 	volatile __u32 word1;
 	__u32 ba;	/* Transmit/Receive Buffer Address */
@@ -282,12 +283,6 @@ struct dbri_pipe {
 	volatile __u32 *recv_fixed_ptr;	/* Ptr to receive fixed data */
 };
 
-struct dbri_desc {
-	int inuse;		/* Boolean flag */
-	int next;		/* Index of next desc, or -1 */
-	unsigned int len;
-};
-
 /* Per stream (playback or record) information */
 struct dbri_streaminfo {
 	struct snd_pcm_substream *substream;
@@ -317,7 +312,7 @@ struct snd_dbri {
 	int wait_ackd;		/* sequence of command buffers acknowledged */
 
 	struct dbri_pipe pipes[DBRI_NO_PIPES];	/* DBRI's 32 data pipes */
-	struct dbri_desc descs[DBRI_NO_DESCS];
+	int next_desc[DBRI_NO_DESCS];		/* Index of next desc, or -1 */
 
 	int chi_in_pipe;
 	int chi_out_pipe;
@@ -803,8 +798,8 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe)
 
 	desc = dbri->pipes[pipe].first_desc;
 	while (desc != -1) {
-		dbri->descs[desc].inuse = 0;
-		desc = dbri->descs[desc].next;
+		dbri->dma->desc[desc].nda = dbri->dma->desc[desc].ba = 0;
+		desc = dbri->next_desc[desc];
 	}
 
 	dbri->pipes[pipe].desc = -1;
@@ -1093,7 +1088,7 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period
 		int mylen;
 
 		for (; desc < DBRI_NO_DESCS; desc++) {
-			if (!dbri->descs[desc].inuse)
+			if (!dbri->dma->desc[desc].ba)
 				break;
 		}
 		if (desc == DBRI_NO_DESCS) {
@@ -1110,19 +1105,16 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period
 			mylen = period;
 		}
 
-		dbri->descs[desc].inuse = 1;
-		dbri->descs[desc].next = -1;
+		dbri->next_desc[desc] = -1;
 		dbri->dma->desc[desc].ba = dvma_buffer;
 		dbri->dma->desc[desc].nda = 0;
 
 		if (streamno == DBRI_PLAY) {
-			dbri->descs[desc].len = mylen;
 			dbri->dma->desc[desc].word1 = DBRI_TD_CNT(mylen);
 			dbri->dma->desc[desc].word4 = 0;
 			if (first_desc != -1)
 				dbri->dma->desc[desc].word1 |= DBRI_TD_M;
 		} else {
-			dbri->descs[desc].len = 0;
 			dbri->dma->desc[desc].word1 = 0;
 			dbri->dma->desc[desc].word4 =
 			    DBRI_RD_B | DBRI_RD_BCNT(mylen);
@@ -1131,7 +1123,7 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period
 		if (first_desc == -1) {
 			first_desc = desc;
 		} else {
-			dbri->descs[last_desc].next = desc;
+			dbri->next_desc[last_desc] = desc;
 			dbri->dma->desc[last_desc].nda =
 			    dbri->dma_dvma + dbri_dma_off(desc, desc);
 		}
@@ -1154,7 +1146,7 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period
 	dbri->pipes[info->pipe].first_desc = first_desc;
 	dbri->pipes[info->pipe].desc = first_desc;
 
-	for (desc = first_desc; desc != -1; desc = dbri->descs[desc].next) {
+	for (desc = first_desc; desc != -1; desc = dbri->next_desc[desc]) {
 		dprintk(D_DESC, "DESC %d: %08x %08x %08x %08x\n",
 			desc,
 			dbri->dma->desc[desc].word1,
@@ -1747,6 +1739,7 @@ static void transmission_complete_intr(struct snd_dbri * dbri, int pipe)
 	struct dbri_streaminfo *info;
 	int td;
 	int status;
+	int len;
 
 	info = &dbri->stream_info[DBRI_PLAY];
 
@@ -1765,11 +1758,12 @@ static void transmission_complete_intr(struct snd_dbri * dbri, int pipe)
 		dprintk(D_INT, "TD %d, status 0x%02x\n", td, status);
 
 		dbri->dma->desc[td].word4 = 0;	/* Reset it for next time. */
-		info->offset += dbri->descs[td].len;
-		info->left -= dbri->descs[td].len;
+		len = DBRI_RD_CNT(dbri->dma->desc[td].word1);
+		info->offset += len;
+		info->left -= len;
 
 		/* On the last TD, transmit them all again. */
-		if (dbri->descs[td].next == -1) {
+		if (dbri->next_desc[td] == -1) {
 			if (info->left > 0) {
 				printk(KERN_WARNING
 				       "%d bytes left after last transfer.\n",
@@ -1779,7 +1773,7 @@ static void transmission_complete_intr(struct snd_dbri * dbri, int pipe)
 			tasklet_schedule(&xmit_descs_task);
 		}
 
-		td = dbri->descs[td].next;
+		td = dbri->next_desc[td];
 		dbri->pipes[pipe].desc = td;
 	}
 
@@ -1803,8 +1797,8 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe)
 		return;
 	}
 
-	dbri->descs[rd].inuse = 0;
-	dbri->pipes[pipe].desc = dbri->descs[rd].next;
+	dbri->dma->desc[rd].ba = 0;
+	dbri->pipes[pipe].desc = dbri->next_desc[rd];
 	status = dbri->dma->desc[rd].word1;
 	dbri->dma->desc[rd].word1 = 0;	/* Reset it for next time. */
 
@@ -1818,7 +1812,7 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe)
 		rd, DBRI_RD_STATUS(status), DBRI_RD_CNT(status));
 
 	/* On the last TD, transmit them all again. */
-	if (dbri->descs[rd].next == -1) {
+	if (dbri->next_desc[rd] == -1) {
 		if (info->left > info->size) {
 			printk(KERN_WARNING
 			       "%d bytes recorded in %d size buffer.\n",
-- 
GitLab


From 470f1f1a1c2597fab98339ab0966dbf602d604f0 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Mon, 21 Aug 2006 19:28:16 +0200
Subject: [PATCH 0950/1063] [ALSA] sparc dbri: more driver cleanup

A general clean up and redudant code removal.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 45 ++++++++++++++++-----------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 0b8545ad3e9a0..6fc37c9cb4fcc 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -241,9 +241,7 @@ static struct {
 #define DBRI_INT_BLK	64
 #define DBRI_NO_DESCS	64
 #define DBRI_NO_PIPES	32
-
-#define DBRI_MM_ONB	1
-#define DBRI_MM_SB	2
+#define DBRI_MAX_PIPE	(DBRI_NO_PIPES - 1)
 
 #define DBRI_REC	0
 #define DBRI_PLAY	1
@@ -650,10 +648,6 @@ static volatile s32 *dbri_cmdlock(struct snd_dbri * dbri, enum dbri_lock get)
 	/* Delay if previous commands are still being processed */
 	while ((--maxloops) > 0 && (dbri->wait_send != dbri->wait_ackd)) {
 		msleep_interruptible(1);
-		/* If dbri_cmdlock() got called from inside the
-		 * interrupt handler, this will do the processing.
-		 */
-		dbri_process_interrupt_buffer(dbri);
 	}
 	if (maxloops == 0) {
 		printk(KERN_ERR "DBRI: Chip never completed command buffer %d\n",
@@ -780,7 +774,7 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe)
 	int desc;
 	volatile int *cmd;
 
-	if (pipe < 0 || pipe > 31) {
+	if (pipe < 0 || pipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR "DBRI: reset_pipe called with illegal pipe number\n");
 		return;
 	}
@@ -806,10 +800,9 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe)
 	dbri->pipes[pipe].first_desc = -1;
 }
 
-/* FIXME: direction as an argument? */
 static void setup_pipe(struct snd_dbri * dbri, int pipe, int sdp)
 {
-	if (pipe < 0 || pipe > 31) {
+	if (pipe < 0 || pipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR "DBRI: setup_pipe called with illegal pipe number\n");
 		return;
 	}
@@ -843,7 +836,7 @@ static void link_time_slot(struct snd_dbri * dbri, int pipe,
 	int prevpipe;
 	int nextpipe;
 
-	if (pipe < 0 || pipe > 31 || basepipe < 0 || basepipe > 31) {
+	if (pipe < 0 || pipe > DBRI_MAX_PIPE || basepipe < 0 || basepipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR 
 		    "DBRI: link_time_slot called with illegal pipe number\n");
 		return;
@@ -931,7 +924,8 @@ static void unlink_time_slot(struct snd_dbri * dbri, int pipe,
 	volatile s32 *cmd;
 	int val;
 
-	if (pipe < 0 || pipe > 31 || prevpipe < 0 || prevpipe > 31) {
+	if (pipe < 0 || pipe > DBRI_MAX_PIPE 
+			|| prevpipe < 0 || prevpipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR 
 		    "DBRI: unlink_time_slot called with illegal pipe number\n");
 		return;
@@ -972,7 +966,7 @@ static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data)
 {
 	volatile s32 *cmd;
 
-	if (pipe < 16 || pipe > 31) {
+	if (pipe < 16 || pipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR "DBRI: xmit_fixed: Illegal pipe number\n");
 		return;
 	}
@@ -1007,7 +1001,7 @@ static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data)
 
 static void recv_fixed(struct snd_dbri * dbri, int pipe, volatile __u32 * ptr)
 {
-	if (pipe < 16 || pipe > 31) {
+	if (pipe < 16 || pipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR "DBRI: recv_fixed called with illegal pipe number\n");
 		return;
 	}
@@ -1182,20 +1176,14 @@ static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_sla
 
 		/* Set CHI Anchor: Pipe 16 */
 
-		val = D_DTS_VI | D_DTS_INS | D_DTS_PRVIN(16) | D_PIPE(16);
+		val = D_DTS_VO | D_DTS_VI | D_DTS_INS 
+			| D_DTS_PRVIN(16) | D_PIPE(16) | D_DTS_PRVOUT(16);
 		*(cmd++) = DBRI_CMD(D_DTS, 0, val);
 		*(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16);
-		*(cmd++) = 0;
-
-		val = D_DTS_VO | D_DTS_INS | D_DTS_PRVOUT(16) | D_PIPE(16);
-		*(cmd++) = DBRI_CMD(D_DTS, 0, val);
-		*(cmd++) = 0;
 		*(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16);
 
 		dbri->pipes[16].sdp = 1;
 		dbri->pipes[16].nextpipe = 16;
-		dbri->chi_in_pipe = 16;
-		dbri->chi_out_pipe = 16;
 
 #if 0
 		chi_initialized++;
@@ -1214,11 +1202,10 @@ static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_sla
 					 16, dbri->pipes[pipe].nextpipe);
 		}
 
-		dbri->chi_in_pipe = 16;
-		dbri->chi_out_pipe = 16;
-
 		cmd = dbri_cmdlock(dbri, GetLock);
 	}
+	dbri->chi_in_pipe = 16;
+	dbri->chi_out_pipe = 16;
 
 	if (master_or_slave == CHIslave) {
 		/* Setup DBRI for CHI Slave - receive clock, frame sync (FS)
@@ -1341,8 +1328,8 @@ static void cs4215_setdata(struct snd_dbri * dbri, int muted)
 	} else {
 		/* Start by setting the playback attenuation. */
 		struct dbri_streaminfo *info = &dbri->stream_info[DBRI_PLAY];
-		int left_gain = info->left_gain % 64;
-		int right_gain = info->right_gain % 64;
+		int left_gain = info->left_gain & 0x3f;
+		int right_gain = info->right_gain & 0x3f;
 
 		dbri->mm.data[0] &= ~0x3f;	/* Reset the volume bits */
 		dbri->mm.data[1] &= ~0x3f;
@@ -1351,8 +1338,8 @@ static void cs4215_setdata(struct snd_dbri * dbri, int muted)
 
 		/* Now set the recording gain. */
 		info = &dbri->stream_info[DBRI_REC];
-		left_gain = info->left_gain % 16;
-		right_gain = info->right_gain % 16;
+		left_gain = info->left_gain & 0xf;
+		right_gain = info->right_gain & 0xf;
 		dbri->mm.data[2] |= CS4215_LG(left_gain);
 		dbri->mm.data[3] |= CS4215_RG(right_gain);
 	}
-- 
GitLab


From d1fdf07e22efdb9fa53739c0f0fec1f6b24c2056 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Mon, 21 Aug 2006 19:29:18 +0200
Subject: [PATCH 0951/1063] [ALSA] sparc dbri: fixed setting of burst size
 after reset

A proper way to set DBRI's burst size. The size must be set after
each chip reset.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 6fc37c9cb4fcc..810f8b99a60eb 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -690,6 +690,7 @@ static void dbri_cmdsend(struct snd_dbri * dbri, volatile s32 * cmd)
 static void dbri_reset(struct snd_dbri * dbri)
 {
 	int i;
+	u32 tmp;
 
 	dprintk(D_GEN, "reset 0:%x 2:%x 8:%x 9:%x\n",
 		sbus_readl(dbri->regs + REG0),
@@ -699,13 +700,20 @@ static void dbri_reset(struct snd_dbri * dbri)
 	sbus_writel(D_R, dbri->regs + REG0);	/* Soft Reset */
 	for (i = 0; (sbus_readl(dbri->regs + REG0) & D_R) && i < 64; i++)
 		udelay(10);
+
+	/* A brute approach - DBRI falls back to working burst size by itself
+	 * On SS20 D_S does not work, so do not try so high. */
+	tmp = sbus_readl(dbri->regs + REG0);
+	tmp |= D_G | D_E;
+	tmp &= ~D_S;
+	sbus_writel(tmp, dbri->regs + REG0);
 }
 
 /* Lock must not be held before calling this */
 static void dbri_initialize(struct snd_dbri * dbri)
 {
 	volatile s32 *cmd;
-	u32 dma_addr, tmp;
+	u32 dma_addr;
 	unsigned long flags;
 	int n;
 
@@ -721,13 +729,6 @@ static void dbri_initialize(struct snd_dbri * dbri)
 	for (n = 0; n < DBRI_NO_PIPES; n++)
 		dbri->pipes[n].desc = dbri->pipes[n].first_desc = -1;
 
-	/* A brute approach - DBRI falls back to working burst size by itself
-	 * On SS20 D_S does not work, so do not try so high. */
-	tmp = sbus_readl(dbri->regs + REG0);
-	tmp |= D_G | D_E;
-	tmp &= ~D_S;
-	sbus_writel(tmp, dbri->regs + REG0);
-
 	/*
 	 * Initialize the interrupt ringbuffer.
 	 */
-- 
GitLab


From 294a30dc8cf13c492913f2ed3a6540bdf6e84e39 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Mon, 21 Aug 2006 19:29:59 +0200
Subject: [PATCH 0952/1063] [ALSA] sparc dbri: simplifed linking time slot
 function

A simplified routines to link and unlink time slots.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 125 +++++++++++++++------------------------------
 1 file changed, 41 insertions(+), 84 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 810f8b99a60eb..5696f792e3d1a 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -274,7 +274,6 @@ enum in_or_out { PIPEinput, PIPEoutput };
 struct dbri_pipe {
 	u32 sdp;		/* SDP command word */
 	int nextpipe;		/* Next pipe in linked list */
-	int cycle;		/* Offset of timeslot (bits) */
 	int length;		/* Length of timeslot (bits) */
 	int first_desc;		/* Index of first descriptor */
 	int desc;		/* Index of active descriptor */
@@ -312,8 +311,6 @@ struct snd_dbri {
 	struct dbri_pipe pipes[DBRI_NO_PIPES];	/* DBRI's 32 data pipes */
 	int next_desc[DBRI_NO_DESCS];		/* Index of next desc, or -1 */
 
-	int chi_in_pipe;
-	int chi_out_pipe;
 	int chi_bpf;
 
 	struct cs4215 mm;	/* mmcodec special info */
@@ -827,92 +824,55 @@ static void setup_pipe(struct snd_dbri * dbri, int pipe, int sdp)
 	reset_pipe(dbri, pipe);
 }
 
-/* FIXME: direction not needed */
 static void link_time_slot(struct snd_dbri * dbri, int pipe,
-			   enum in_or_out direction, int basepipe,
+			   int prevpipe, int nextpipe,
 			   int length, int cycle)
 {
 	volatile s32 *cmd;
 	int val;
-	int prevpipe;
-	int nextpipe;
 
-	if (pipe < 0 || pipe > DBRI_MAX_PIPE || basepipe < 0 || basepipe > DBRI_MAX_PIPE) {
+	if (pipe < 0 || pipe > DBRI_MAX_PIPE 
+			|| prevpipe < 0 || prevpipe > DBRI_MAX_PIPE
+			|| nextpipe < 0 || nextpipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR 
 		    "DBRI: link_time_slot called with illegal pipe number\n");
 		return;
 	}
 
-	if (dbri->pipes[pipe].sdp == 0 || dbri->pipes[basepipe].sdp == 0) {
+	if (dbri->pipes[pipe].sdp == 0 
+			|| dbri->pipes[prevpipe].sdp == 0
+			|| dbri->pipes[nextpipe].sdp == 0) {
 		printk(KERN_ERR "DBRI: link_time_slot called on uninitialized pipe\n");
 		return;
 	}
 
-	/* Deal with CHI special case:
-	 * "If transmission on edges 0 or 1 is desired, then cycle n
-	 *  (where n = # of bit times per frame...) must be used."
-	 *                  - DBRI data sheet, page 11
-	 */
-	if (basepipe == 16 && direction == PIPEoutput && cycle == 0)
-		cycle = dbri->chi_bpf;
-
-	if (basepipe == pipe) {
-		prevpipe = pipe;
-		nextpipe = pipe;
-	} else {
-		/* We're not initializing a new linked list (basepipe != pipe),
-		 * so run through the linked list and find where this pipe
-		 * should be sloted in, based on its cycle.  CHI confuses
-		 * things a bit, since it has a single anchor for both its
-		 * transmit and receive lists.
-		 */
-		if (basepipe == 16) {
-			if (direction == PIPEinput) {
-				prevpipe = dbri->chi_in_pipe;
-			} else {
-				prevpipe = dbri->chi_out_pipe;
-			}
-		} else {
-			prevpipe = basepipe;
-		}
-
-		nextpipe = dbri->pipes[prevpipe].nextpipe;
-
-		while (dbri->pipes[nextpipe].cycle < cycle
-		       && dbri->pipes[nextpipe].nextpipe != basepipe) {
-			prevpipe = nextpipe;
-			nextpipe = dbri->pipes[nextpipe].nextpipe;
-		}
-	}
-
-	if (prevpipe == 16) {
-		if (direction == PIPEinput) {
-			dbri->chi_in_pipe = pipe;
-		} else {
-			dbri->chi_out_pipe = pipe;
-		}
-	} else {
-		dbri->pipes[prevpipe].nextpipe = pipe;
-	}
+	dbri->pipes[prevpipe].nextpipe = pipe;
 
 	dbri->pipes[pipe].nextpipe = nextpipe;
-	dbri->pipes[pipe].cycle = cycle;
 	dbri->pipes[pipe].length = length;
 
 	cmd = dbri_cmdlock(dbri, NoGetLock);
 
-	if (direction == PIPEinput) {
-		val = D_DTS_VI | D_DTS_INS | D_DTS_PRVIN(prevpipe) | pipe;
+	if (dbri->pipes[pipe].sdp & D_SDP_TO_SER) {
+		/* Deal with CHI special case:
+		 * "If transmission on edges 0 or 1 is desired, then cycle n
+		 *  (where n = # of bit times per frame...) must be used."
+		 *                  - DBRI data sheet, page 11
+		 */
+		if (prevpipe == 16 && cycle == 0)
+			cycle = dbri->chi_bpf;
+
+		val = D_DTS_VO | D_DTS_INS | D_DTS_PRVOUT(prevpipe) | pipe;
 		*(cmd++) = DBRI_CMD(D_DTS, 0, val);
+		*(cmd++) = 0;
 		*(cmd++) =
 		    D_TS_LEN(length) | D_TS_CYCLE(cycle) | D_TS_NEXT(nextpipe);
-		*(cmd++) = 0;
 	} else {
-		val = D_DTS_VO | D_DTS_INS | D_DTS_PRVOUT(prevpipe) | pipe;
+		val = D_DTS_VI | D_DTS_INS | D_DTS_PRVIN(prevpipe) | pipe;
 		*(cmd++) = DBRI_CMD(D_DTS, 0, val);
-		*(cmd++) = 0;
 		*(cmd++) =
 		    D_TS_LEN(length) | D_TS_CYCLE(cycle) | D_TS_NEXT(nextpipe);
+		*(cmd++) = 0;
 	}
 
 	dbri_cmdsend(dbri, cmd);
@@ -1192,21 +1152,18 @@ static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_sla
 	} else {
 		int pipe;
 
-		for (pipe = dbri->chi_in_pipe;
-		     pipe != 16; pipe = dbri->pipes[pipe].nextpipe) {
-			unlink_time_slot(dbri, pipe, PIPEinput,
-					 16, dbri->pipes[pipe].nextpipe);
-		}
-		for (pipe = dbri->chi_out_pipe;
-		     pipe != 16; pipe = dbri->pipes[pipe].nextpipe) {
-			unlink_time_slot(dbri, pipe, PIPEoutput,
-					 16, dbri->pipes[pipe].nextpipe);
-		}
-
-		cmd = dbri_cmdlock(dbri, GetLock);
+		for (pipe = 0; pipe < DBRI_NO_PIPES; pipe++ )
+			if ( pipe != 16 ) {
+				if (dbri->pipes[pipe].sdp & D_SDP_TO_SER)
+					unlink_time_slot(dbri, pipe, PIPEoutput,
+							 16, dbri->pipes[pipe].nextpipe);
+				else
+					unlink_time_slot(dbri, pipe, PIPEinput,
+							 16, dbri->pipes[pipe].nextpipe);
+			}
+  
+  		cmd = dbri_cmdlock(dbri, GetLock);
 	}
-	dbri->chi_in_pipe = 16;
-	dbri->chi_out_pipe = 16;
 
 	if (master_or_slave == CHIslave) {
 		/* Setup DBRI for CHI Slave - receive clock, frame sync (FS)
@@ -1397,10 +1354,10 @@ static void cs4215_open(struct snd_dbri * dbri)
 	 */
 	data_width = dbri->mm.channels * dbri->mm.precision;
 
-	link_time_slot(dbri, 20, PIPEoutput, 16, 32, dbri->mm.offset + 32);
-	link_time_slot(dbri, 4, PIPEoutput, 16, data_width, dbri->mm.offset);
-	link_time_slot(dbri, 6, PIPEinput, 16, data_width, dbri->mm.offset);
-	link_time_slot(dbri, 21, PIPEinput, 16, 16, dbri->mm.offset + 40);
+	link_time_slot(dbri, 4, 16, 16, data_width, dbri->mm.offset);
+	link_time_slot(dbri, 20, 4, 16, 32, dbri->mm.offset + 32);
+	link_time_slot(dbri, 6, 16, 16, data_width, dbri->mm.offset);
+	link_time_slot(dbri, 21, 6, 16, 16, dbri->mm.offset + 40);
 
 	/* FIXME: enable CHI after _setdata? */
 	tmp = sbus_readl(dbri->regs + REG0);
@@ -1466,9 +1423,9 @@ static int cs4215_setctrl(struct snd_dbri * dbri)
 	 * Pipe 19: Receive timeslot 7 (version). 
 	 */
 
-	link_time_slot(dbri, 17, PIPEoutput, 16, 32, dbri->mm.offset);
-	link_time_slot(dbri, 18, PIPEinput, 16, 8, dbri->mm.offset);
-	link_time_slot(dbri, 19, PIPEinput, 16, 8, dbri->mm.offset + 48);
+	link_time_slot(dbri, 17, 16, 16, 32, dbri->mm.offset);
+	link_time_slot(dbri, 18, 16, 16, 8, dbri->mm.offset);
+	link_time_slot(dbri, 19, 18, 16, 8, dbri->mm.offset + 48);
 
 	/* Wait for the chip to echo back CLB (Control Latch Bit) as zero */
 	dbri->mm.ctrl[0] &= ~CS4215_CLB;
@@ -2445,11 +2402,11 @@ static void dbri_debug_read(struct snd_info_entry * entry,
 			struct dbri_pipe *pptr = &dbri->pipes[pipe];
 			snd_iprintf(buffer,
 				    "Pipe %d: %s SDP=0x%x desc=%d, "
-				    "len=%d @ %d next %d\n",
+				    "len=%d next %d\n",
 				    pipe,
 				   ((pptr->sdp & D_SDP_TO_SER) ? "output" : "input"),
 				    pptr->sdp, pptr->desc,
-				    pptr->length, pptr->cycle, pptr->nextpipe);
+				    pptr->length, pptr->nextpipe);
 		}
 	}
 }
-- 
GitLab


From 1be54c824be9b5e163cd83dabdf0ad3ac81c72a8 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Mon, 21 Aug 2006 19:30:57 +0200
Subject: [PATCH 0953/1063] [ALSA] sparc dbri: ring buffered version

It is a complete rework of low level layer to work on ring
buffers for comands and data descriptors. This removes annoying
noise due to delay in data buffer switching.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 385 ++++++++++++++++++++++-----------------------
 1 file changed, 192 insertions(+), 193 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 5696f792e3d1a..3fb2ede80eaf0 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -2,6 +2,8 @@
  * Driver for DBRI sound chip found on Sparcs.
  * Copyright (C) 2004, 2005 Martin Habets (mhabets@users.sourceforge.net)
  *
+ * Converted to ring buffered version by Krzysztof Helt (krzysztof.h1@wp.pl)
+ *
  * Based entirely upon drivers/sbus/audio/dbri.c which is:
  * Copyright (C) 1997 Rudolf Koenig (rfkoenig@immd4.informatik.uni-erlangen.de)
  * Copyright (C) 1998, 1999 Brent Baccala (baccala@freesoft.org)
@@ -260,7 +262,7 @@ struct dbri_mem {
  * the CPU and the DBRI
  */
 struct dbri_dma {
-	volatile s32 cmd[DBRI_NO_CMDS];	/* Place for commands       */
+	s32 cmd[DBRI_NO_CMDS];			/* Place for commands */
 	volatile s32 intr[DBRI_INT_BLK];	/* Interrupt field  */
 	struct dbri_mem desc[DBRI_NO_DESCS];	/* Xmit/receive descriptors */
 };
@@ -284,7 +286,6 @@ struct dbri_pipe {
 struct dbri_streaminfo {
 	struct snd_pcm_substream *substream;
 	u32 dvma_buffer;	/* Device view of Alsa DMA buffer */
-	int left;		/* # of bytes left in DMA buffer  */
 	int size;		/* Size of DMA buffer             */
 	size_t offset;		/* offset in user buffer          */
 	int pipe;		/* Data pipe used                 */
@@ -305,11 +306,11 @@ struct snd_dbri {
 
 	void __iomem *regs;	/* dbri HW regs */
 	int dbri_irqp;		/* intr queue pointer */
-	int wait_send;		/* sequence of command buffers send */
-	int wait_ackd;		/* sequence of command buffers acknowledged */
 
 	struct dbri_pipe pipes[DBRI_NO_PIPES];	/* DBRI's 32 data pipes */
 	int next_desc[DBRI_NO_DESCS];		/* Index of next desc, or -1 */
+	spinlock_t cmdlock;	/* Protects cmd queue accesses */
+	s32 *cmdptr;		/* Pointer to the last queued cmd */
 
 	int chi_bpf;
 
@@ -544,7 +545,7 @@ struct snd_dbri {
 #define DBRI_TD_TBC	(1<<0)	/* Transmit buffer Complete */
 #define DBRI_TD_STATUS(v)       ((v)&0xff)	/* Transmit status */
 			/* Maximum buffer size per TD: almost 8Kb */
-#define DBRI_TD_MAXCNT	((1 << 13) - 1)
+#define DBRI_TD_MAXCNT	((1 << 13) - 4)
 
 /* Receive descriptor defines */
 #define DBRI_RD_F	(1<<31)	/* End of Frame */
@@ -608,79 +609,110 @@ The list is terminated with a WAIT command, which generates a
 CPU interrupt to signal completion.
 
 Since the DBRI can run in parallel with the CPU, several means of
-synchronization present themselves.  The method implemented here is close
-to the original scheme (Rudolf's), and uses 2 counters (wait_send and
-wait_ackd) to synchronize the command buffer between the CPU and the DBRI.
+synchronization present themselves. The method implemented here is only
+to use the dbri_cmdwait() to wait for execution of batch of sent commands.
 
-A more sophisticated scheme might involve a circular command buffer
-or an array of command buffers.  A routine could fill one with
-commands and link it onto a list.  When a interrupt signaled
-completion of the current command buffer, look on the list for
-the next one.
+A circular command buffer is used here. A new command is being added 
+while other can be executed. The scheme works by adding two WAIT commands
+after each sent batch of commands. When the next batch is prepared it is
+added after the WAIT commands then the WAITs are replaced with single JUMP
+command to the new batch. The the DBRI is forced to reread the last WAIT 
+command (replaced by the JUMP by then). If the DBRI is still executing 
+previous commands the request to reread the WAIT command is ignored.
 
 Every time a routine wants to write commands to the DBRI, it must
-first call dbri_cmdlock() and get an initial pointer into dbri->dma->cmd
-in return. dbri_cmdlock() will block if the previous commands have not
-been completed yet. After this the commands can be written to the buffer,
-and dbri_cmdsend() is called with the final pointer value to send them
-to the DBRI.
+first call dbri_cmdlock() and get pointer to a free space in 
+dbri->dma->cmd buffer. After this, the commands can be written to 
+the buffer, and dbri_cmdsend() is called with the final pointer value 
+to send them to the DBRI.
 
 */
 
 static void dbri_process_interrupt_buffer(struct snd_dbri * dbri);
 
-enum dbri_lock { NoGetLock, GetLock };
 #define MAXLOOPS 10
-
-static volatile s32 *dbri_cmdlock(struct snd_dbri * dbri, enum dbri_lock get)
+/*
+ * Wait for the current command string to execute
+ */
+static void dbri_cmdwait(struct snd_dbri *dbri)
 {
 	int maxloops = MAXLOOPS;
 
-#ifndef SMP
-	if ((get == GetLock) && spin_is_locked(&dbri->lock)) {
-		printk(KERN_ERR "DBRI: cmdlock called while in spinlock.");
-	}
-#endif
-
 	/* Delay if previous commands are still being processed */
-	while ((--maxloops) > 0 && (dbri->wait_send != dbri->wait_ackd)) {
+	while ((--maxloops) > 0 && (sbus_readl(dbri->regs + REG0) & D_P))
 		msleep_interruptible(1);
-	}
+
 	if (maxloops == 0) {
-		printk(KERN_ERR "DBRI: Chip never completed command buffer %d\n",
-			dbri->wait_send);
+		printk(KERN_ERR "DBRI: Chip never completed command buffer\n");
 	} else {
 		dprintk(D_CMD, "Chip completed command buffer (%d)\n",
 			MAXLOOPS - maxloops - 1);
 	}
+}
+/*
+ * Lock the command queue and returns pointer to a space for len cmd words
+ * It locks the cmdlock spinlock.
+ */
+static s32 *dbri_cmdlock(struct snd_dbri * dbri, int len)
+{
+	/* Space for 2 WAIT cmds (replaced later by 1 JUMP cmd) */
+	len += 2;
+	spin_lock(&dbri->cmdlock);
+	if (dbri->cmdptr - dbri->dma->cmd + len < DBRI_NO_CMDS - 2)
+		return dbri->cmdptr + 2;
+	else if (len < sbus_readl(dbri->regs + REG8) - dbri->dma_dvma)
+		return dbri->dma->cmd;
+	else
+		printk(KERN_ERR "DBRI: no space for commands.");
 
-	/*if (get == GetLock) spin_lock(&dbri->lock); */
-	return &dbri->dma->cmd[0];
+	return 0;
 }
 
-static void dbri_cmdsend(struct snd_dbri * dbri, volatile s32 * cmd)
+/*
+ * Send prepared cmd string. It works by writting a JMP cmd into
+ * the last WAIT cmd and force DBRI to reread the cmd.
+ * The JMP cmd points to the new cmd string.
+ * It also releases the cmdlock spinlock.
+ */
+static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len)
 {
-	volatile s32 *ptr;
+	s32 *ptr;
+	s32 tmp, addr;
+	static int wait_id = 0;
 
-	for (ptr = &dbri->dma->cmd[0]; ptr < cmd; ptr++) {
-		dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
-	}
+	wait_id++;
+	wait_id &= 0xffff;	/* restrict it to a 16 bit counter. */
+	*(cmd) = DBRI_CMD(D_WAIT, 1, wait_id);
+	*(cmd+1) = DBRI_CMD(D_WAIT, 1, wait_id);
 
-	if ((cmd - &dbri->dma->cmd[0]) >= DBRI_NO_CMDS - 1) {
-		printk(KERN_ERR "DBRI: Command buffer overflow! (bug in driver)\n");
-		/* Ignore the last part. */
-		cmd = &dbri->dma->cmd[DBRI_NO_CMDS - 3];
-	}
+	/* Replace the last command with JUMP */
+	addr = dbri->dma_dvma + (cmd - len - dbri->dma->cmd) * sizeof(s32);
+	*(dbri->cmdptr+1) = addr;
+	*(dbri->cmdptr) = DBRI_CMD(D_JUMP, 0, 0);
 
-	dbri->wait_send++;
-	dbri->wait_send &= 0xffff;	/* restrict it to a 16 bit counter. */
-	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
-	*(cmd++) = DBRI_CMD(D_WAIT, 1, dbri->wait_send);
+#ifdef DBRI_DEBUG
+	if (cmd > dbri->cmdptr )
+		for (ptr = dbri->cmdptr; ptr < cmd+2; ptr++) {
+			dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
+		}
+	else {
+		ptr = dbri->cmdptr;
+		dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
+		ptr = dbri->cmdptr+1;
+		dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
+		for (ptr = dbri->dma->cmd; ptr < cmd+2; ptr++) {
+			dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
+		}
+	}
+#endif
 
-	/* Set command pointer and signal it is valid. */
-	sbus_writel(dbri->dma_dvma, dbri->regs + REG8);
+	/* Reread the last command */
+	tmp = sbus_readl(dbri->regs + REG0);
+	tmp |= D_P;
+	sbus_writel(tmp, dbri->regs + REG0);
 
-	/*spin_unlock(&dbri->lock); */
+	dbri->cmdptr = cmd;
+	spin_unlock(&dbri->cmdlock);
 }
 
 /* Lock must be held when calling this */
@@ -709,7 +741,7 @@ static void dbri_reset(struct snd_dbri * dbri)
 /* Lock must not be held before calling this */
 static void dbri_initialize(struct snd_dbri * dbri)
 {
-	volatile s32 *cmd;
+	s32 *cmd;
 	u32 dma_addr;
 	unsigned long flags;
 	int n;
@@ -718,14 +750,11 @@ static void dbri_initialize(struct snd_dbri * dbri)
 
 	dbri_reset(dbri);
 
-	cmd = dbri_cmdlock(dbri, NoGetLock);
-	dprintk(D_GEN, "init: cmd: %p, int: %p\n",
-		&dbri->dma->cmd[0], &dbri->dma->intr[0]);
-
 	/* Initialize pipes */
 	for (n = 0; n < DBRI_NO_PIPES; n++)
 		dbri->pipes[n].desc = dbri->pipes[n].first_desc = -1;
 
+	spin_lock_init(&dbri->cmdlock);
 	/*
 	 * Initialize the interrupt ringbuffer.
 	 */
@@ -735,10 +764,19 @@ static void dbri_initialize(struct snd_dbri * dbri)
 	/*
 	 * Set up the interrupt queue
 	 */
+	spin_lock(&dbri->cmdlock);
+	cmd = dbri->cmdptr = dbri->dma->cmd;
 	*(cmd++) = DBRI_CMD(D_IIQ, 0, 0);
 	*(cmd++) = dma_addr;
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
+	dbri->cmdptr = cmd;
+	*(cmd++) = DBRI_CMD(D_WAIT, 1, 0);
+	*(cmd++) = DBRI_CMD(D_WAIT, 1, 0);
+	dma_addr = dbri->dma_dvma + dbri_dma_off(cmd, 0);
+	sbus_writel(dma_addr, dbri->regs + REG8);
+	spin_unlock(&dbri->cmdlock);
+	dbri_cmdwait(dbri);
 
-	dbri_cmdsend(dbri, cmd);
 	spin_unlock_irqrestore(&dbri->lock, flags);
 }
 
@@ -770,7 +808,7 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe)
 {
 	int sdp;
 	int desc;
-	volatile int *cmd;
+	s32 *cmd;
 
 	if (pipe < 0 || pipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR "DBRI: reset_pipe called with illegal pipe number\n");
@@ -783,16 +821,18 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe)
 		return;
 	}
 
-	cmd = dbri_cmdlock(dbri, NoGetLock);
+	cmd = dbri_cmdlock(dbri, 3);
 	*(cmd++) = DBRI_CMD(D_SDP, 0, sdp | D_SDP_C | D_SDP_P);
 	*(cmd++) = 0;
-	dbri_cmdsend(dbri, cmd);
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
+	dbri_cmdsend(dbri, cmd, 3);
 
 	desc = dbri->pipes[pipe].first_desc;
-	while (desc != -1) {
-		dbri->dma->desc[desc].nda = dbri->dma->desc[desc].ba = 0;
-		desc = dbri->next_desc[desc];
-	}
+	if ( desc >= 0)
+		do {
+			dbri->dma->desc[desc].nda = dbri->dma->desc[desc].ba = 0;
+			desc = dbri->next_desc[desc];
+		} while (desc != -1 && desc != dbri->pipes[pipe].first_desc);
 
 	dbri->pipes[pipe].desc = -1;
 	dbri->pipes[pipe].first_desc = -1;
@@ -828,7 +868,7 @@ static void link_time_slot(struct snd_dbri * dbri, int pipe,
 			   int prevpipe, int nextpipe,
 			   int length, int cycle)
 {
-	volatile s32 *cmd;
+	s32 *cmd;
 	int val;
 
 	if (pipe < 0 || pipe > DBRI_MAX_PIPE 
@@ -847,11 +887,10 @@ static void link_time_slot(struct snd_dbri * dbri, int pipe,
 	}
 
 	dbri->pipes[prevpipe].nextpipe = pipe;
-
 	dbri->pipes[pipe].nextpipe = nextpipe;
 	dbri->pipes[pipe].length = length;
 
-	cmd = dbri_cmdlock(dbri, NoGetLock);
+	cmd = dbri_cmdlock(dbri, 4);
 
 	if (dbri->pipes[pipe].sdp & D_SDP_TO_SER) {
 		/* Deal with CHI special case:
@@ -874,25 +913,27 @@ static void link_time_slot(struct snd_dbri * dbri, int pipe,
 		    D_TS_LEN(length) | D_TS_CYCLE(cycle) | D_TS_NEXT(nextpipe);
 		*(cmd++) = 0;
 	}
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
 
-	dbri_cmdsend(dbri, cmd);
+	dbri_cmdsend(dbri, cmd, 4);
 }
 
 static void unlink_time_slot(struct snd_dbri * dbri, int pipe,
 			     enum in_or_out direction, int prevpipe,
 			     int nextpipe)
 {
-	volatile s32 *cmd;
+	s32 *cmd;
 	int val;
 
 	if (pipe < 0 || pipe > DBRI_MAX_PIPE 
-			|| prevpipe < 0 || prevpipe > DBRI_MAX_PIPE) {
+			|| prevpipe < 0 || prevpipe > DBRI_MAX_PIPE
+			|| nextpipe < 0 || nextpipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR 
 		    "DBRI: unlink_time_slot called with illegal pipe number\n");
 		return;
 	}
 
-	cmd = dbri_cmdlock(dbri, NoGetLock);
+	cmd = dbri_cmdlock(dbri, 4);
 
 	if (direction == PIPEinput) {
 		val = D_DTS_VI | D_DTS_DEL | D_DTS_PRVIN(prevpipe) | pipe;
@@ -905,8 +946,9 @@ static void unlink_time_slot(struct snd_dbri * dbri, int pipe,
 		*(cmd++) = 0;
 		*(cmd++) = D_TS_NEXT(nextpipe);
 	}
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
 
-	dbri_cmdsend(dbri, cmd);
+	dbri_cmdsend(dbri, cmd, 4);
 }
 
 /* xmit_fixed() / recv_fixed()
@@ -925,7 +967,7 @@ static void unlink_time_slot(struct snd_dbri * dbri, int pipe,
  */
 static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data)
 {
-	volatile s32 *cmd;
+	s32 *cmd;
 
 	if (pipe < 16 || pipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR "DBRI: xmit_fixed: Illegal pipe number\n");
@@ -952,12 +994,14 @@ static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data)
 	if (dbri->pipes[pipe].sdp & D_SDP_MSB)
 		data = reverse_bytes(data, dbri->pipes[pipe].length);
 
-	cmd = dbri_cmdlock(dbri, GetLock);
+	cmd = dbri_cmdlock(dbri, 3);
 
 	*(cmd++) = DBRI_CMD(D_SSP, 0, pipe);
 	*(cmd++) = data;
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
 
-	dbri_cmdsend(dbri, cmd);
+	dbri_cmdsend(dbri, cmd, 3);
+	dbri_cmdwait(dbri);
 }
 
 static void recv_fixed(struct snd_dbri * dbri, int pipe, volatile __u32 * ptr)
@@ -991,6 +1035,8 @@ static void recv_fixed(struct snd_dbri * dbri, int pipe, volatile __u32 * ptr)
  * and work by building chains of descriptors which identify the
  * data buffers.  Buffers too large for a single descriptor will
  * be spread across multiple descriptors.
+ *
+ * All descriptors create a ring buffer.
  */
 static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period)
 {
@@ -1051,14 +1097,13 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period
 			return -1;
 		}
 
-		if (len > DBRI_TD_MAXCNT) {
-			mylen = DBRI_TD_MAXCNT;	/* 8KB - 1 */
-		} else {
+		if (len > DBRI_TD_MAXCNT)
+			mylen = DBRI_TD_MAXCNT;	/* 8KB - 4 */
+		else
 			mylen = len;
-		}
-		if (mylen > period) {
+
+		if (mylen > period)
 			mylen = period;
-		}
 
 		dbri->next_desc[desc] = -1;
 		dbri->dma->desc[desc].ba = dvma_buffer;
@@ -1067,17 +1112,17 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period
 		if (streamno == DBRI_PLAY) {
 			dbri->dma->desc[desc].word1 = DBRI_TD_CNT(mylen);
 			dbri->dma->desc[desc].word4 = 0;
-			if (first_desc != -1)
-				dbri->dma->desc[desc].word1 |= DBRI_TD_M;
+			dbri->dma->desc[desc].word1 |= 
+			    DBRI_TD_F | DBRI_TD_B;
 		} else {
 			dbri->dma->desc[desc].word1 = 0;
 			dbri->dma->desc[desc].word4 =
 			    DBRI_RD_B | DBRI_RD_BCNT(mylen);
 		}
 
-		if (first_desc == -1) {
+		if (first_desc == -1)
 			first_desc = desc;
-		} else {
+		else {
 			dbri->next_desc[last_desc] = desc;
 			dbri->dma->desc[last_desc].nda =
 			    dbri->dma_dvma + dbri_dma_off(desc, desc);
@@ -1093,21 +1138,28 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period
 		return -1;
 	}
 
-	dbri->dma->desc[last_desc].word1 &= ~DBRI_TD_M;
 	if (streamno == DBRI_PLAY) {
 		dbri->dma->desc[last_desc].word1 |=
-		    DBRI_TD_I | DBRI_TD_F | DBRI_TD_B;
+		    DBRI_TD_F | DBRI_TD_B;
+		dbri->dma->desc[last_desc].nda =
+		    dbri->dma_dvma + dbri_dma_off(desc, first_desc);
+		dbri->next_desc[last_desc] = first_desc;
 	}
 	dbri->pipes[info->pipe].first_desc = first_desc;
 	dbri->pipes[info->pipe].desc = first_desc;
 
-	for (desc = first_desc; desc != -1; desc = dbri->next_desc[desc]) {
+#ifdef DBRI_DEBUG
+	for (desc = first_desc; desc != -1; ) {
 		dprintk(D_DESC, "DESC %d: %08x %08x %08x %08x\n",
 			desc,
 			dbri->dma->desc[desc].word1,
 			dbri->dma->desc[desc].ba,
 			dbri->dma->desc[desc].nda, dbri->dma->desc[desc].word4);
+			desc = dbri->next_desc[desc];
+			if ( desc == first_desc )
+				break;
 	}
+#endif
 	return 0;
 }
 
@@ -1127,43 +1179,24 @@ enum master_or_slave { CHImaster, CHIslave };
 static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_slave,
 		      int bits_per_frame)
 {
-	volatile s32 *cmd;
+	s32 *cmd;
 	int val;
-	static int chi_initialized = 0;	/* FIXME: mutex? */
-
-	if (!chi_initialized) {
 
-		cmd = dbri_cmdlock(dbri, GetLock);
+	/* Set CHI Anchor: Pipe 16 */
 
-		/* Set CHI Anchor: Pipe 16 */
-
-		val = D_DTS_VO | D_DTS_VI | D_DTS_INS 
-			| D_DTS_PRVIN(16) | D_PIPE(16) | D_DTS_PRVOUT(16);
-		*(cmd++) = DBRI_CMD(D_DTS, 0, val);
-		*(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16);
-		*(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16);
+	cmd = dbri_cmdlock(dbri, 4);
+	val = D_DTS_VO | D_DTS_VI | D_DTS_INS 
+		| D_DTS_PRVIN(16) | D_PIPE(16) | D_DTS_PRVOUT(16);
+	*(cmd++) = DBRI_CMD(D_DTS, 0, val);
+	*(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16);
+	*(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16);
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
+	dbri_cmdsend(dbri, cmd, 4);
 
-		dbri->pipes[16].sdp = 1;
-		dbri->pipes[16].nextpipe = 16;
+	dbri->pipes[16].sdp = 1;
+	dbri->pipes[16].nextpipe = 16;
 
-#if 0
-		chi_initialized++;
-#endif
-	} else {
-		int pipe;
-
-		for (pipe = 0; pipe < DBRI_NO_PIPES; pipe++ )
-			if ( pipe != 16 ) {
-				if (dbri->pipes[pipe].sdp & D_SDP_TO_SER)
-					unlink_time_slot(dbri, pipe, PIPEoutput,
-							 16, dbri->pipes[pipe].nextpipe);
-				else
-					unlink_time_slot(dbri, pipe, PIPEinput,
-							 16, dbri->pipes[pipe].nextpipe);
-			}
-  
-  		cmd = dbri_cmdlock(dbri, GetLock);
-	}
+	cmd = dbri_cmdlock(dbri, 4);
 
 	if (master_or_slave == CHIslave) {
 		/* Setup DBRI for CHI Slave - receive clock, frame sync (FS)
@@ -1202,8 +1235,9 @@ static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_sla
 
 	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
 	*(cmd++) = DBRI_CMD(D_CDM, 0, D_CDM_XCE | D_CDM_XEN | D_CDM_REN);
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
 
-	dbri_cmdsend(dbri, cmd);
+	dbri_cmdsend(dbri, cmd, 4);
 }
 
 /*
@@ -1240,6 +1274,8 @@ static void cs4215_setup_pipes(struct snd_dbri * dbri)
 	setup_pipe(dbri, 17, D_SDP_FIXED | D_SDP_TO_SER | D_SDP_MSB);
 	setup_pipe(dbri, 18, D_SDP_FIXED | D_SDP_FROM_SER | D_SDP_MSB);
 	setup_pipe(dbri, 19, D_SDP_FIXED | D_SDP_FROM_SER | D_SDP_MSB);
+
+	dbri_cmdwait(dbri);
 }
 
 static int cs4215_init_data(struct cs4215 *mm)
@@ -1271,7 +1307,7 @@ static int cs4215_init_data(struct cs4215 *mm)
 	mm->status = 0;
 	mm->version = 0xff;
 	mm->precision = 8;	/* For ULAW */
-	mm->channels = 2;
+	mm->channels = 1;
 
 	return 0;
 }
@@ -1554,7 +1590,6 @@ static int cs4215_init(struct snd_dbri * dbri)
 	}
 
 	cs4215_setup_pipes(dbri);
-
 	cs4215_init_data(&dbri->mm);
 
 	/* Enable capture of the status & version timeslots. */
@@ -1583,9 +1618,7 @@ buffer and calls dbri_process_one_interrupt() for each interrupt word.
 Complicated interrupts are handled by dedicated functions (which
 appear first in this file).  Any pending interrupts can be serviced by
 calling dbri_process_interrupt_buffer(), which works even if the CPU's
-interrupts are disabled.  This function is used by dbri_cmdlock()
-to make sure we're synced up with the chip before each command sequence,
-even if we're running cli'ed.
+interrupts are disabled.
 
 */
 
@@ -1594,11 +1627,10 @@ even if we're running cli'ed.
  * Transmit the current TD's for recording/playing, if needed.
  * For playback, ALSA has filled the DMA memory with new data (we hope).
  */
-static void xmit_descs(unsigned long data)
+static void xmit_descs(struct snd_dbri *dbri)
 {
-	struct snd_dbri *dbri = (struct snd_dbri *) data;
 	struct dbri_streaminfo *info;
-	volatile s32 *cmd;
+	s32 *cmd;
 	unsigned long flags;
 	int first_td;
 
@@ -1609,7 +1641,7 @@ static void xmit_descs(unsigned long data)
 	info = &dbri->stream_info[DBRI_REC];
 	spin_lock_irqsave(&dbri->lock, flags);
 
-	if ((info->left >= info->size) && (info->pipe >= 0)) {
+	if (info->pipe >= 0) {
 		first_td = dbri->pipes[info->pipe].first_desc;
 
 		dprintk(D_DESC, "xmit_descs rec @ TD %d\n", first_td);
@@ -1619,16 +1651,15 @@ static void xmit_descs(unsigned long data)
 			goto play;
 		}
 
-		cmd = dbri_cmdlock(dbri, NoGetLock);
+		cmd = dbri_cmdlock(dbri, 2);
 		*(cmd++) = DBRI_CMD(D_SDP, 0,
 				    dbri->pipes[info->pipe].sdp
 				    | D_SDP_P | D_SDP_EVERY | D_SDP_C);
 		*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td);
-		dbri_cmdsend(dbri, cmd);
+		dbri_cmdsend(dbri, cmd, 2);
 
 		/* Reset our admin of the pipe & bytes read. */
 		dbri->pipes[info->pipe].desc = first_td;
-		info->left = 0;
 	}
 
 play:
@@ -1638,33 +1669,27 @@ static void xmit_descs(unsigned long data)
 	info = &dbri->stream_info[DBRI_PLAY];
 	spin_lock_irqsave(&dbri->lock, flags);
 
-	if ((info->left <= 0) && (info->pipe >= 0)) {
+	if (info->pipe >= 0) {
 		first_td = dbri->pipes[info->pipe].first_desc;
 
 		dprintk(D_DESC, "xmit_descs play @ TD %d\n", first_td);
 
 		/* Stream could be closed by the time we run. */
-		if (first_td < 0) {
-			spin_unlock_irqrestore(&dbri->lock, flags);
-			return;
-		}
-
-		cmd = dbri_cmdlock(dbri, NoGetLock);
-		*(cmd++) = DBRI_CMD(D_SDP, 0,
-				    dbri->pipes[info->pipe].sdp
-				    | D_SDP_P | D_SDP_EVERY | D_SDP_C);
-		*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td);
-		dbri_cmdsend(dbri, cmd);
+		if (first_td >= 0) {
+			cmd = dbri_cmdlock(dbri, 2);
+			*(cmd++) = DBRI_CMD(D_SDP, 0,
+					    dbri->pipes[info->pipe].sdp
+					    | D_SDP_P | D_SDP_EVERY | D_SDP_C);
+			*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td);
+			dbri_cmdsend(dbri, cmd, 2);
 
-		/* Reset our admin of the pipe & bytes written. */
-		dbri->pipes[info->pipe].desc = first_td;
-		info->left = info->size;
+			/* Reset our admin of the pipe & bytes written. */
+			dbri->pipes[info->pipe].desc = first_td;
+		}
 	}
 	spin_unlock_irqrestore(&dbri->lock, flags);
 }
 
-static DECLARE_TASKLET(xmit_descs_task, xmit_descs, 0);
-
 /* transmission_complete_intr()
  *
  * Called by main interrupt handler when DBRI signals transmission complete
@@ -1684,7 +1709,6 @@ static void transmission_complete_intr(struct snd_dbri * dbri, int pipe)
 	struct dbri_streaminfo *info;
 	int td;
 	int status;
-	int len;
 
 	info = &dbri->stream_info[DBRI_PLAY];
 
@@ -1703,20 +1727,7 @@ static void transmission_complete_intr(struct snd_dbri * dbri, int pipe)
 		dprintk(D_INT, "TD %d, status 0x%02x\n", td, status);
 
 		dbri->dma->desc[td].word4 = 0;	/* Reset it for next time. */
-		len = DBRI_RD_CNT(dbri->dma->desc[td].word1);
-		info->offset += len;
-		info->left -= len;
-
-		/* On the last TD, transmit them all again. */
-		if (dbri->next_desc[td] == -1) {
-			if (info->left > 0) {
-				printk(KERN_WARNING
-				       "%d bytes left after last transfer.\n",
-				       info->left);
-				info->left = 0;
-			}
-			tasklet_schedule(&xmit_descs_task);
-		}
+		info->offset += DBRI_RD_CNT(dbri->dma->desc[td].word1);
 
 		td = dbri->next_desc[td];
 		dbri->pipes[pipe].desc = td;
@@ -1749,7 +1760,6 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe)
 
 	info = &dbri->stream_info[DBRI_REC];
 	info->offset += DBRI_RD_CNT(status);
-	info->left += DBRI_RD_CNT(status);
 
 	/* FIXME: Check status */
 
@@ -1757,6 +1767,7 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe)
 		rd, DBRI_RD_STATUS(status), DBRI_RD_CNT(status));
 
 	/* On the last TD, transmit them all again. */
+#if 0
 	if (dbri->next_desc[rd] == -1) {
 		if (info->left > info->size) {
 			printk(KERN_WARNING
@@ -1765,6 +1776,7 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe)
 		}
 		tasklet_schedule(&xmit_descs_task);
 	}
+#endif
 
 	/* Notify ALSA */
 	if (spin_is_locked(&dbri->lock)) {
@@ -1793,16 +1805,11 @@ static void dbri_process_one_interrupt(struct snd_dbri * dbri, int x)
 			channel, code, rval);
 	}
 
-	if (channel == D_INTR_CMD && command == D_WAIT) {
-		dbri->wait_ackd = val;
-		if (dbri->wait_send != val) {
-			printk(KERN_ERR "Processing wait command %d when %d was send.\n",
-			       val, dbri->wait_send);
-		}
-		return;
-	}
-
 	switch (code) {
+	case D_INTR_CMDI:
+		if (command != D_WAIT)
+			printk(KERN_ERR "DBRI: Command read interrupt\n");
+		break;
 	case D_INTR_BRDY:
 		reception_complete_intr(dbri, channel);
 		break;
@@ -1815,8 +1822,10 @@ static void dbri_process_one_interrupt(struct snd_dbri * dbri, int x)
 		 * resend SDP command with clear pipe bit (C) set
 		 */
 		{
-			volatile s32 *cmd;
-
+	/* FIXME: do something useful in case of underrun */
+			printk(KERN_ERR "DBRI: Underrun error\n");
+#if 0
+			s32 *cmd;
 			int pipe = channel;
 			int td = dbri->pipes[pipe].desc;
 
@@ -1827,6 +1836,7 @@ static void dbri_process_one_interrupt(struct snd_dbri * dbri, int x)
 					    | D_SDP_P | D_SDP_C | D_SDP_2SAME);
 			*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, td);
 			dbri_cmdsend(dbri, cmd);
+#endif
 		}
 		break;
 	case D_INTR_FXDT:
@@ -1847,9 +1857,7 @@ static void dbri_process_one_interrupt(struct snd_dbri * dbri, int x)
 /* dbri_process_interrupt_buffer advances through the DBRI's interrupt
  * buffer until it finds a zero word (indicating nothing more to do
  * right now).  Non-zero words require processing and are handed off
- * to dbri_process_one_interrupt AFTER advancing the pointer.  This
- * order is important since we might recurse back into this function
- * and need to make sure the pointer has been advanced first.
+ * to dbri_process_one_interrupt AFTER advancing the pointer.
  */
 static void dbri_process_interrupt_buffer(struct snd_dbri * dbri)
 {
@@ -1919,8 +1927,6 @@ static irqreturn_t snd_dbri_interrupt(int irq, void *dev_id,
 
 	dbri_process_interrupt_buffer(dbri);
 
-	/* FIXME: Write 0 into regs to ACK interrupt */
-
 	spin_unlock(&dbri->lock);
 
 	return IRQ_HANDLED;
@@ -1962,7 +1968,6 @@ static int snd_dbri_open(struct snd_pcm_substream *substream)
 
 	spin_lock_irqsave(&dbri->lock, flags);
 	info->substream = substream;
-	info->left = 0;
 	info->offset = 0;
 	info->dvma_buffer = 0;
 	info->pipe = -1;
@@ -1980,7 +1985,6 @@ static int snd_dbri_close(struct snd_pcm_substream *substream)
 
 	dprintk(D_USR, "close audio output.\n");
 	info->substream = NULL;
-	info->left = 0;
 	info->offset = 0;
 
 	return 0;
@@ -2062,10 +2066,8 @@ static int snd_dbri_prepare(struct snd_pcm_substream *substream)
 	info->size = snd_pcm_lib_buffer_bytes(substream);
 	if (DBRI_STREAMNO(substream) == DBRI_PLAY)
 		info->pipe = 4;	/* Send pipe */
-	else {
+	else
 		info->pipe = 6;	/* Receive pipe */
-		info->left = info->size;	/* To trigger submittal */
-	}
 
 	spin_lock_irq(&dbri->lock);
 
@@ -2093,14 +2095,11 @@ static int snd_dbri_trigger(struct snd_pcm_substream *substream, int cmd)
 	case SNDRV_PCM_TRIGGER_START:
 		dprintk(D_USR, "start audio, period is %d bytes\n",
 			(int)snd_pcm_lib_period_bytes(substream));
-		/* Enable & schedule the tasklet that re-submits the TDs. */
-		xmit_descs_task.data = (unsigned long)dbri;
-		tasklet_schedule(&xmit_descs_task);
+		/* Re-submit the TDs. */
+		xmit_descs(dbri);
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
 		dprintk(D_USR, "stop audio.\n");
-		/* Make the tasklet bail out immediately. */
-		xmit_descs_task.data = 0;
 		reset_pipe(dbri, info->pipe);
 		break;
 	default:
@@ -2118,8 +2117,8 @@ static snd_pcm_uframes_t snd_dbri_pointer(struct snd_pcm_substream *substream)
 
 	ret = bytes_to_frames(substream->runtime, info->offset)
 		% substream->runtime->buffer_size;
-	dprintk(D_USR, "I/O pointer: %ld frames, %d bytes left.\n",
-		ret, info->left);
+	dprintk(D_USR, "I/O pointer: %ld frames of %ld.\n",
+		ret, substream->runtime->buffer_size);
 	return ret;
 }
 
-- 
GitLab


From ab93c7ae54a81bcecb77608ca89eea140f1d45ad Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Aug 2006 11:37:36 +0200
Subject: [PATCH 0954/1063] [ALSA] sparc dbri: hardware constrains added

This patch adds ALSA hardware constrains so stereo is possible
only with 16-bit format. It contains small cleanups to ring
buffered code as well.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 81 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 19 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 3fb2ede80eaf0..3e6ad507849dc 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -85,7 +85,7 @@ MODULE_PARM_DESC(id, "ID string for Sun DBRI soundcard.");
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Sun DBRI soundcard.");
 
-#define DBRI_DEBUG
+#undef DBRI_DEBUG
 
 #define D_INT	(1<<0)
 #define D_GEN	(1<<1)
@@ -160,7 +160,7 @@ static struct {
      /* {    NA, (1 << 4), (5 << 3) }, */
 	{ 48000, (1 << 4), (6 << 3) },
 	{  9600, (1 << 4), (7 << 3) },
-	{  5513, (2 << 4), (0 << 3) },	/* Actually 5512.5 */
+	{  5512, (2 << 4), (0 << 3) },	/* Actually 5512.5 */
 	{ 11025, (2 << 4), (1 << 3) },
 	{ 18900, (2 << 4), (2 << 3) },
 	{ 22050, (2 << 4), (3 << 3) },
@@ -628,8 +628,6 @@ to send them to the DBRI.
 
 */
 
-static void dbri_process_interrupt_buffer(struct snd_dbri * dbri);
-
 #define MAXLOOPS 10
 /*
  * Wait for the current command string to execute
@@ -669,15 +667,15 @@ static s32 *dbri_cmdlock(struct snd_dbri * dbri, int len)
 }
 
 /*
- * Send prepared cmd string. It works by writting a JMP cmd into
+ * Send prepared cmd string. It works by writting a JUMP cmd into
  * the last WAIT cmd and force DBRI to reread the cmd.
- * The JMP cmd points to the new cmd string.
+ * The JUMP cmd points to the new cmd string.
  * It also releases the cmdlock spinlock.
  */
 static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len)
 {
-	s32 *ptr;
 	s32 tmp, addr;
+	unsigned long flags;
 	static int wait_id = 0;
 
 	wait_id++;
@@ -691,14 +689,17 @@ static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len)
 	*(dbri->cmdptr) = DBRI_CMD(D_JUMP, 0, 0);
 
 #ifdef DBRI_DEBUG
-	if (cmd > dbri->cmdptr )
+	if (cmd > dbri->cmdptr) {
+		s32 *ptr;
+
 		for (ptr = dbri->cmdptr; ptr < cmd+2; ptr++) {
 			dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
 		}
-	else {
-		ptr = dbri->cmdptr;
+	} else {
+		s32 *ptr = dbri->cmdptr;
+
 		dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
-		ptr = dbri->cmdptr+1;
+		ptr++;
 		dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
 		for (ptr = dbri->dma->cmd; ptr < cmd+2; ptr++) {
 			dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
@@ -706,10 +707,12 @@ static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len)
 	}
 #endif
 
+	spin_lock_irqsave(&dbri->lock, flags);
 	/* Reread the last command */
 	tmp = sbus_readl(dbri->regs + REG0);
 	tmp |= D_P;
 	sbus_writel(tmp, dbri->regs + REG0);
+	spin_unlock_irqrestore(&dbri->lock, flags);
 
 	dbri->cmdptr = cmd;
 	spin_unlock(&dbri->cmdlock);
@@ -1549,8 +1552,7 @@ static int cs4215_prepare(struct snd_dbri * dbri, unsigned int rate,
 	    CS4215_BSEL_128 | CS4215_FREQ[freq_idx].xtal;
 
 	dbri->mm.channels = channels;
-	/* Stereo bit: 8 bit stereo not working yet. */
-	if ((channels > 1) && (dbri->mm.precision == 16))
+	if (channels == 2)
 		dbri->mm.ctrl[1] |= CS4215_DFR_STEREO;
 
 	ret = cs4215_setctrl(dbri);
@@ -1624,7 +1626,7 @@ interrupts are disabled.
 
 /* xmit_descs()
  *
- * Transmit the current TD's for recording/playing, if needed.
+ * Starts transmiting the current TD's for recording/playing.
  * For playback, ALSA has filled the DMA memory with new data (we hope).
  */
 static void xmit_descs(struct snd_dbri *dbri)
@@ -1699,9 +1701,9 @@ static void xmit_descs(struct snd_dbri *dbri)
  * them as available. Stops when the first descriptor is found without
  * TBC (Transmit Buffer Complete) set, or we've run through them all.
  *
- * The DMA buffers are not released, but re-used. Since the transmit buffer
- * descriptors are not clobbered, they can be re-submitted as is. This is
- * done by the xmit_descs() tasklet above since that could take longer.
+ * The DMA buffers are not released. They form a ring buffer and
+ * they are filled by ALSA while others are transmitted by DMA.
+ *
  */
 
 static void transmission_complete_intr(struct snd_dbri * dbri, int pipe)
@@ -1944,8 +1946,8 @@ static struct snd_pcm_hardware snd_dbri_pcm_hw = {
 				  SNDRV_PCM_FMTBIT_A_LAW |
 				  SNDRV_PCM_FMTBIT_U8 |
 				  SNDRV_PCM_FMTBIT_S16_BE,
-	.rates			= SNDRV_PCM_RATE_8000_48000,
-	.rate_min		= 8000,
+	.rates			= SNDRV_PCM_RATE_8000_48000 | SNDRV_PCM_RATE_5512,
+	.rate_min		= 5512,
 	.rate_max		= 48000,
 	.channels_min		= 1,
 	.channels_max		= 2,
@@ -1956,6 +1958,39 @@ static struct snd_pcm_hardware snd_dbri_pcm_hw = {
 	.periods_max		= 1024,
 };
 
+static int snd_hw_rule_format(struct snd_pcm_hw_params *params,
+			      struct snd_pcm_hw_rule *rule)
+{
+	struct snd_interval *c = hw_param_interval(params,
+				SNDRV_PCM_HW_PARAM_CHANNELS);
+	struct snd_mask *f = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+	struct snd_mask fmt;
+
+	snd_mask_any(&fmt);
+	if (c->min > 1) {
+		fmt.bits[0] &= SNDRV_PCM_FMTBIT_S16_BE;
+		return snd_mask_refine(f, &fmt);
+	}
+	return 0;
+}
+
+static int snd_hw_rule_channels(struct snd_pcm_hw_params *params,
+				struct snd_pcm_hw_rule *rule)
+{
+	struct snd_interval *c = hw_param_interval(params,
+				SNDRV_PCM_HW_PARAM_CHANNELS);
+	struct snd_mask *f = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+	struct snd_interval ch;
+
+	snd_interval_any(&ch);
+	if (!(f->bits[0] & SNDRV_PCM_FMTBIT_S16_BE)) {
+		ch.min = ch.max = 1;
+		ch.integer = 1;
+		return snd_interval_refine(c, &ch);
+	}
+	return 0;
+}
+
 static int snd_dbri_open(struct snd_pcm_substream *substream)
 {
 	struct snd_dbri *dbri = snd_pcm_substream_chip(substream);
@@ -1973,6 +2008,14 @@ static int snd_dbri_open(struct snd_pcm_substream *substream)
 	info->pipe = -1;
 	spin_unlock_irqrestore(&dbri->lock, flags);
 
+	snd_pcm_hw_rule_add(runtime,0,SNDRV_PCM_HW_PARAM_CHANNELS,
+			    snd_hw_rule_format, 0, SNDRV_PCM_HW_PARAM_FORMAT,
+			    -1);
+	snd_pcm_hw_rule_add(runtime,0,SNDRV_PCM_HW_PARAM_FORMAT,
+			    snd_hw_rule_channels, 0, 
+			    SNDRV_PCM_HW_PARAM_CHANNELS,
+			    -1);
+				
 	cs4215_open(dbri);
 
 	return 0;
-- 
GitLab


From 93f09c4cc111506db2ffa6220b7a3d7f73e41aa3 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 21 Aug 2006 19:22:45 +0200
Subject: [PATCH 0955/1063] [ALSA] make
 sound/pci/emu10k1/emu10k1.c:snd_emu10k1_resume() static

This patch makes the needlessly global snd_emu10k1_resume() static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/emu10k1/emu10k1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c
index 289bcd99c19c8..493ec0816bb3f 100644
--- a/sound/pci/emu10k1/emu10k1.c
+++ b/sound/pci/emu10k1/emu10k1.c
@@ -232,7 +232,7 @@ static int snd_emu10k1_suspend(struct pci_dev *pci, pm_message_t state)
 	return 0;
 }
 
-int snd_emu10k1_resume(struct pci_dev *pci)
+static int snd_emu10k1_resume(struct pci_dev *pci)
 {
 	struct snd_card *card = pci_get_drvdata(pci);
 	struct snd_emu10k1 *emu = card->private_data;
-- 
GitLab


From 7376d013fc6d3a45d748e0ce758ca9412b01b9dd Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Mon, 21 Aug 2006 19:17:46 +0200
Subject: [PATCH 0956/1063] [ALSA] intel_hda: MSI support

Simple patch to enable Message Signalled Interrupts for the HDA Intel
audio controller.  Tested with:
 Intel Corporation 82801FB/FBM/FR/FW/FRW (ICH6 Family) High Definition Audio Controller (rev 03)
MSI is better because it means audio doesn't end up sharing IRQ with USB.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_intel.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index ce75e07aaa2a7..c9ae9f7789282 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -55,6 +55,7 @@ static char *model;
 static int position_fix;
 static int probe_mask = -1;
 static int single_cmd;
+static int disable_msi;
 
 module_param(index, int, 0444);
 MODULE_PARM_DESC(index, "Index value for Intel HD audio interface.");
@@ -68,6 +69,8 @@ module_param(probe_mask, int, 0444);
 MODULE_PARM_DESC(probe_mask, "Bitmask to probe codecs (default = -1).");
 module_param(single_cmd, bool, 0444);
 MODULE_PARM_DESC(single_cmd, "Use single command to communicate with codecs (for debugging only).");
+module_param(disable_msi, int, 0);
+MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");
 
 
 /* just for backward compatibility */
@@ -1418,8 +1421,10 @@ static int azx_free(struct azx *chip)
 		msleep(1);
 	}
 
-	if (chip->irq >= 0)
+	if (chip->irq >= 0) {
+		pci_disable_msi(chip->pci);
 		free_irq(chip->irq, (void*)chip);
+	}
 	if (chip->remap_addr)
 		iounmap(chip->remap_addr);
 
@@ -1502,6 +1507,9 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 		goto errout;
 	}
 
+	if (!disable_msi)
+		pci_enable_msi(pci);
+
 	if (request_irq(pci->irq, azx_interrupt, IRQF_DISABLED|IRQF_SHARED,
 			"HDA Intel", (void*)chip)) {
 		snd_printk(KERN_ERR SFX "unable to grab IRQ %d\n", pci->irq);
-- 
GitLab


From 2f3482fbbd5dac7d0e86fe5b7ac5c1e51d52b084 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 21 Aug 2006 18:44:31 +0200
Subject: [PATCH 0957/1063] [ALSA] Add TLV support to AC97 codec driver

Added the TLV support to AC97 codec driver for addition of
dB range information.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ac97/ac97_codec.c | 54 ++++++++++++++++++++++++++++++++-----
 sound/pci/ac97/ac97_patch.c | 39 +++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
index e5d062d640dfc..c47f43dbd664e 100644
--- a/sound/pci/ac97/ac97_codec.c
+++ b/sound/pci/ac97/ac97_codec.c
@@ -31,6 +31,7 @@
 #include <linux/mutex.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
+#include <sound/tlv.h>
 #include <sound/ac97_codec.h>
 #include <sound/asoundef.h>
 #include <sound/initval.h>
@@ -1181,6 +1182,32 @@ static int snd_ac97_cmute_new_stereo(struct snd_card *card, char *name, int reg,
 	return 0;
 }
 
+/*
+ * set dB information
+ */
+static DECLARE_TLV_DB_SCALE(db_scale_4bit, -4500, 300, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit, -4650, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_6bit, -9450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_rec_gain, 0, 150, 0);
+
+static unsigned int *find_db_scale(unsigned int maxval)
+{
+	switch (maxval) {
+	case 0x0f: return db_scale_4bit;
+	case 0x1f: return db_scale_5bit;
+	case 0x3f: return db_scale_6bit;
+	}
+	return NULL;
+}
+
+static void set_tlv_db_scale(struct snd_kcontrol *kctl, unsigned int *tlv)
+{	
+	kctl->tlv.p = tlv;
+	if (tlv)
+		kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+}
+
 /*
  * create a volume for normal stereo/mono controls
  */
@@ -1203,6 +1230,10 @@ static int snd_ac97_cvol_new(struct snd_card *card, char *name, int reg, unsigne
 		tmp.index = ac97->num;
 		kctl = snd_ctl_new1(&tmp, ac97);
 	}
+	if (reg >= AC97_PHONE && reg <= AC97_PCM)
+		set_tlv_db_scale(kctl, db_scale_5bit_12db_max);
+	else
+		set_tlv_db_scale(kctl, find_db_scale(lo_max));
 	err = snd_ctl_add(card, kctl);
 	if (err < 0)
 		return err;
@@ -1282,6 +1313,7 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 		snd_ac97_change_volume_params2(ac97, AC97_CENTER_LFE_MASTER, 0, &max);
 		kctl->private_value &= ~(0xff << 16);
 		kctl->private_value |= (int)max << 16;
+		set_tlv_db_scale(kctl, find_db_scale(max));
 		snd_ac97_write_cache(ac97, AC97_CENTER_LFE_MASTER, ac97->regs[AC97_CENTER_LFE_MASTER] | max);
 	}
 
@@ -1295,6 +1327,7 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 		snd_ac97_change_volume_params2(ac97, AC97_CENTER_LFE_MASTER, 8, &max);
 		kctl->private_value &= ~(0xff << 16);
 		kctl->private_value |= (int)max << 16;
+		set_tlv_db_scale(kctl, find_db_scale(max));
 		snd_ac97_write_cache(ac97, AC97_CENTER_LFE_MASTER, ac97->regs[AC97_CENTER_LFE_MASTER] | max << 8);
 	}
 
@@ -1342,8 +1375,9 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 		((ac97->flags & AC97_HAS_PC_BEEP) ||
 	    snd_ac97_try_volume_mix(ac97, AC97_PC_BEEP))) {
 		for (idx = 0; idx < 2; idx++)
-			if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_pc_beep[idx], ac97))) < 0)
+			if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_pc_beep[idx], ac97))) < 0)
 				return err;
+		set_tlv_db_scale(kctl, db_scale_4bit);
 		snd_ac97_write_cache(ac97, AC97_PC_BEEP,
 				     snd_ac97_read(ac97, AC97_PC_BEEP) | 0x801e);
 	}
@@ -1410,22 +1444,26 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 		else
 			init_val = 0x9f1f;
 		for (idx = 0; idx < 2; idx++)
-			if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_pcm[idx], ac97))) < 0)
+			if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_pcm[idx], ac97))) < 0)
 				return err;
+		set_tlv_db_scale(kctl, db_scale_5bit);
 		ac97->spec.ad18xx.pcmreg[0] = init_val;
 		if (ac97->scaps & AC97_SCAP_SURROUND_DAC) {
 			for (idx = 0; idx < 2; idx++)
-				if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_surround[idx], ac97))) < 0)
+				if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_surround[idx], ac97))) < 0)
 					return err;
+			set_tlv_db_scale(kctl, db_scale_5bit);
 			ac97->spec.ad18xx.pcmreg[1] = init_val;
 		}
 		if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC) {
 			for (idx = 0; idx < 2; idx++)
-				if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_center[idx], ac97))) < 0)
+				if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_center[idx], ac97))) < 0)
 					return err;
+			set_tlv_db_scale(kctl, db_scale_5bit);
 			for (idx = 0; idx < 2; idx++)
-				if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_lfe[idx], ac97))) < 0)
+				if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_lfe[idx], ac97))) < 0)
 					return err;
+			set_tlv_db_scale(kctl, db_scale_5bit);
 			ac97->spec.ad18xx.pcmreg[2] = init_val;
 		}
 		snd_ac97_write_cache(ac97, AC97_PCM, init_val);
@@ -1453,16 +1491,18 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 			if (err < 0)
 				return err;
 		}
-		if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_control_capture_vol, ac97))) < 0)
+		if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_control_capture_vol, ac97))) < 0)
 			return err;
+		set_tlv_db_scale(kctl, db_scale_rec_gain);
 		snd_ac97_write_cache(ac97, AC97_REC_SEL, 0x0000);
 		snd_ac97_write_cache(ac97, AC97_REC_GAIN, 0x0000);
 	}
 	/* build MIC Capture controls */
 	if (snd_ac97_try_volume_mix(ac97, AC97_REC_GAIN_MIC)) {
 		for (idx = 0; idx < 2; idx++)
-			if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_mic_capture[idx], ac97))) < 0)
+			if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_mic_capture[idx], ac97))) < 0)
 				return err;
+		set_tlv_db_scale(kctl, db_scale_rec_gain);
 		snd_ac97_write_cache(ac97, AC97_REC_GAIN_MIC, 0x0000);
 	}
 
diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 37c6be481c4a1..392f6ccace5d2 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -32,6 +32,7 @@
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/ac97_codec.h>
 #include "ac97_patch.h"
 #include "ac97_id.h"
@@ -51,6 +52,20 @@ static int patch_build_controls(struct snd_ac97 * ac97, const struct snd_kcontro
 	return 0;
 }
 
+/* replace with a new TLV */
+static void reset_tlv(struct snd_ac97 *ac97, const char *name,
+		      unsigned int *tlv)
+{
+	struct snd_ctl_elem_id sid;
+	struct snd_kcontrol *kctl;
+	memset(&sid, 0, sizeof(sid));
+	strcpy(sid.name, name);
+	sid.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+	kctl = snd_ctl_find_id(ac97->bus->card, &sid);
+	if (kctl && kctl->tlv.p)
+		kctl->tlv.p = tlv;
+}
+
 /* set to the page, update bits and restore the page */
 static int ac97_update_bits_page(struct snd_ac97 *ac97, unsigned short reg, unsigned short mask, unsigned short value, unsigned short page)
 {
@@ -1522,12 +1537,16 @@ static const struct snd_kcontrol_new snd_ac97_controls_ad1885[] = {
 	AC97_SINGLE("Line Jack Sense", AC97_AD_JACK_SPDIF, 8, 1, 1), /* inverted */
 };
 
+static DECLARE_TLV_DB_SCALE(db_scale_6bit_6db_max, -8850, 150, 0);
+
 static int patch_ad1885_specific(struct snd_ac97 * ac97)
 {
 	int err;
 
 	if ((err = patch_build_controls(ac97, snd_ac97_controls_ad1885, ARRAY_SIZE(snd_ac97_controls_ad1885))) < 0)
 		return err;
+	reset_tlv(ac97, "Headphone Playback Volume",
+		  db_scale_6bit_6db_max);
 	return 0;
 }
 
@@ -1551,12 +1570,27 @@ int patch_ad1885(struct snd_ac97 * ac97)
 	return 0;
 }
 
+static int patch_ad1886_specific(struct snd_ac97 * ac97)
+{
+	reset_tlv(ac97, "Headphone Playback Volume",
+		  db_scale_6bit_6db_max);
+	return 0;
+}
+
+static struct snd_ac97_build_ops patch_ad1886_build_ops = {
+	.build_specific = &patch_ad1886_specific,
+#ifdef CONFIG_PM
+	.resume = ad18xx_resume
+#endif
+};
+
 int patch_ad1886(struct snd_ac97 * ac97)
 {
 	patch_ad1881(ac97);
 	/* Presario700 workaround */
 	/* for Jack Sense/SPDIF Register misetting causing */
 	snd_ac97_write_cache(ac97, AC97_AD_JACK_SPDIF, 0x0010);
+	ac97->build_ops = &patch_ad1886_build_ops;
 	return 0;
 }
 
@@ -2015,6 +2049,8 @@ static const struct snd_kcontrol_new snd_ac97_spdif_controls_alc650[] = {
 	/* AC97_SINGLE("IEC958 Input Monitor", AC97_ALC650_MULTICH, 13, 1, 0), */
 };
 
+static DECLARE_TLV_DB_SCALE(db_scale_5bit_3db_max, -4350, 150, 0);
+
 static int patch_alc650_specific(struct snd_ac97 * ac97)
 {
 	int err;
@@ -2025,6 +2061,9 @@ static int patch_alc650_specific(struct snd_ac97 * ac97)
 		if ((err = patch_build_controls(ac97, snd_ac97_spdif_controls_alc650, ARRAY_SIZE(snd_ac97_spdif_controls_alc650))) < 0)
 			return err;
 	}
+	if (ac97->id != AC97_ID_ALC650F)
+		reset_tlv(ac97, "Master Playback Volume",
+			  db_scale_5bit_3db_max);
 	return 0;
 }
 
-- 
GitLab


From 7058c042001e111c601e1b031d9bcb8b5d392b74 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 21 Aug 2006 18:44:54 +0200
Subject: [PATCH 0958/1063] [ALSA] Added TLV support to VIA82xx driver

Added the TLV support to VIA82xx driver for addition of
dB range information.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/via82xx.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c
index e0e3bfd7a2dbc..6db3d4cc4d8dd 100644
--- a/sound/pci/via82xx.c
+++ b/sound/pci/via82xx.c
@@ -59,6 +59,7 @@
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 #include <sound/ac97_codec.h>
 #include <sound/mpu401.h>
 #include <sound/initval.h>
@@ -1698,21 +1699,29 @@ static int snd_via8233_pcmdxs_volume_put(struct snd_kcontrol *kcontrol,
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_dxs, -9450, 150, 1);
+
 static struct snd_kcontrol_new snd_via8233_pcmdxs_volume_control __devinitdata = {
 	.name = "PCM Playback Volume",
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.info = snd_via8233_dxs_volume_info,
 	.get = snd_via8233_pcmdxs_volume_get,
 	.put = snd_via8233_pcmdxs_volume_put,
+	.tlv = { .p = db_scale_dxs }
 };
 
 static struct snd_kcontrol_new snd_via8233_dxs_volume_control __devinitdata = {
 	.name = "VIA DXS Playback Volume",
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.count = 4,
 	.info = snd_via8233_dxs_volume_info,
 	.get = snd_via8233_dxs_volume_get,
 	.put = snd_via8233_dxs_volume_put,
+	.tlv = { .p = db_scale_dxs }
 };
 
 /*
-- 
GitLab


From 9107226d2ca8a15534da96313a1d370fb1eb8f9e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Aug 2006 12:04:34 +0200
Subject: [PATCH 0959/1063] [ALSA] Add dB scale information to ak4531 codec

Added the dB scale information to ak4531 codec driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ac97/ak4531_codec.c | 49 ++++++++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/sound/pci/ac97/ak4531_codec.c b/sound/pci/ac97/ak4531_codec.c
index 94c26ec058820..c153cb79c518a 100644
--- a/sound/pci/ac97/ak4531_codec.c
+++ b/sound/pci/ac97/ak4531_codec.c
@@ -27,6 +27,7 @@
 
 #include <sound/core.h>
 #include <sound/ak4531_codec.h>
+#include <sound/tlv.h>
 
 MODULE_AUTHOR("Jaroslav Kysela <perex@suse.cz>");
 MODULE_DESCRIPTION("Universal routines for AK4531 codec");
@@ -63,6 +64,14 @@ static void snd_ak4531_dump(struct snd_ak4531 *ak4531)
   .info = snd_ak4531_info_single, \
   .get = snd_ak4531_get_single, .put = snd_ak4531_put_single, \
   .private_value = reg | (shift << 16) | (mask << 24) | (invert << 22) }
+#define AK4531_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv)    \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .index = xindex, \
+  .info = snd_ak4531_info_single, \
+  .get = snd_ak4531_get_single, .put = snd_ak4531_put_single, \
+  .private_value = reg | (shift << 16) | (mask << 24) | (invert << 22), \
+  .tlv = { .p = (xtlv) } }
 
 static int snd_ak4531_info_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
@@ -122,6 +131,14 @@ static int snd_ak4531_put_single(struct snd_kcontrol *kcontrol, struct snd_ctl_e
   .info = snd_ak4531_info_double, \
   .get = snd_ak4531_get_double, .put = snd_ak4531_put_double, \
   .private_value = left_reg | (right_reg << 8) | (left_shift << 16) | (right_shift << 19) | (mask << 24) | (invert << 22) }
+#define AK4531_DOUBLE_TLV(xname, xindex, left_reg, right_reg, left_shift, right_shift, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .index = xindex, \
+  .info = snd_ak4531_info_double, \
+  .get = snd_ak4531_get_double, .put = snd_ak4531_put_double, \
+  .private_value = left_reg | (right_reg << 8) | (left_shift << 16) | (right_shift << 19) | (mask << 24) | (invert << 22), \
+  .tlv = { .p = (xtlv) } }
 
 static int snd_ak4531_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
@@ -250,50 +267,62 @@ static int snd_ak4531_put_input_sw(struct snd_kcontrol *kcontrol, struct snd_ctl
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_master, -6200, 200, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_mono, -2800, 400, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_input, -5000, 200, 0);
+
 static struct snd_kcontrol_new snd_ak4531_controls[] = {
 
-AK4531_DOUBLE("Master Playback Switch", 0, AK4531_LMASTER, AK4531_RMASTER, 7, 7, 1, 1),
+AK4531_DOUBLE_TLV("Master Playback Switch", 0,
+		  AK4531_LMASTER, AK4531_RMASTER, 7, 7, 1, 1,
+		  db_scale_master),
 AK4531_DOUBLE("Master Playback Volume", 0, AK4531_LMASTER, AK4531_RMASTER, 0, 0, 0x1f, 1),
 
-AK4531_SINGLE("Master Mono Playback Switch", 0, AK4531_MONO_OUT, 7, 1, 1),
+AK4531_SINGLE_TLV("Master Mono Playback Switch", 0, AK4531_MONO_OUT, 7, 1, 1,
+		  db_scale_mono),
 AK4531_SINGLE("Master Mono Playback Volume", 0, AK4531_MONO_OUT, 0, 0x07, 1),
 
 AK4531_DOUBLE("PCM Switch", 0, AK4531_LVOICE, AK4531_RVOICE, 7, 7, 1, 1),
-AK4531_DOUBLE("PCM Volume", 0, AK4531_LVOICE, AK4531_RVOICE, 0, 0, 0x1f, 1),
+AK4531_DOUBLE_TLV("PCM Volume", 0, AK4531_LVOICE, AK4531_RVOICE, 0, 0, 0x1f, 1,
+		  db_scale_input),
 AK4531_DOUBLE("PCM Playback Switch", 0, AK4531_OUT_SW2, AK4531_OUT_SW2, 3, 2, 1, 0),
 AK4531_DOUBLE("PCM Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 2, 2, 1, 0),
 
 AK4531_DOUBLE("PCM Switch", 1, AK4531_LFM, AK4531_RFM, 7, 7, 1, 1),
-AK4531_DOUBLE("PCM Volume", 1, AK4531_LFM, AK4531_RFM, 0, 0, 0x1f, 1),
+AK4531_DOUBLE_TLV("PCM Volume", 1, AK4531_LFM, AK4531_RFM, 0, 0, 0x1f, 1,
+		  db_scale_input),
 AK4531_DOUBLE("PCM Playback Switch", 1, AK4531_OUT_SW1, AK4531_OUT_SW1, 6, 5, 1, 0),
 AK4531_INPUT_SW("PCM Capture Route", 1, AK4531_LIN_SW1, AK4531_RIN_SW1, 6, 5),
 
 AK4531_DOUBLE("CD Switch", 0, AK4531_LCD, AK4531_RCD, 7, 7, 1, 1),
-AK4531_DOUBLE("CD Volume", 0, AK4531_LCD, AK4531_RCD, 0, 0, 0x1f, 1),
+AK4531_DOUBLE_TLV("CD Volume", 0, AK4531_LCD, AK4531_RCD, 0, 0, 0x1f, 1,
+		  db_scale_input),
 AK4531_DOUBLE("CD Playback Switch", 0, AK4531_OUT_SW1, AK4531_OUT_SW1, 2, 1, 1, 0),
 AK4531_INPUT_SW("CD Capture Route", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 2, 1),
 
 AK4531_DOUBLE("Line Switch", 0, AK4531_LLINE, AK4531_RLINE, 7, 7, 1, 1),
-AK4531_DOUBLE("Line Volume", 0, AK4531_LLINE, AK4531_RLINE, 0, 0, 0x1f, 1),
+AK4531_DOUBLE_TLV("Line Volume", 0, AK4531_LLINE, AK4531_RLINE, 0, 0, 0x1f, 1,
+		  db_scale_input),
 AK4531_DOUBLE("Line Playback Switch", 0, AK4531_OUT_SW1, AK4531_OUT_SW1, 4, 3, 1, 0),
 AK4531_INPUT_SW("Line Capture Route", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 4, 3),
 
 AK4531_DOUBLE("Aux Switch", 0, AK4531_LAUXA, AK4531_RAUXA, 7, 7, 1, 1),
-AK4531_DOUBLE("Aux Volume", 0, AK4531_LAUXA, AK4531_RAUXA, 0, 0, 0x1f, 1),
+AK4531_DOUBLE_TLV("Aux Volume", 0, AK4531_LAUXA, AK4531_RAUXA, 0, 0, 0x1f, 1,
+		  db_scale_input),
 AK4531_DOUBLE("Aux Playback Switch", 0, AK4531_OUT_SW2, AK4531_OUT_SW2, 5, 4, 1, 0),
 AK4531_INPUT_SW("Aux Capture Route", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 4, 3),
 
 AK4531_SINGLE("Mono Switch", 0, AK4531_MONO1, 7, 1, 1),
-AK4531_SINGLE("Mono Volume", 0, AK4531_MONO1, 0, 0x1f, 1),
+AK4531_SINGLE_TLV("Mono Volume", 0, AK4531_MONO1, 0, 0x1f, 1, db_scale_input),
 AK4531_SINGLE("Mono Playback Switch", 0, AK4531_OUT_SW2, 0, 1, 0),
 AK4531_DOUBLE("Mono Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 0, 0, 1, 0),
 
 AK4531_SINGLE("Mono Switch", 1, AK4531_MONO2, 7, 1, 1),
-AK4531_SINGLE("Mono Volume", 1, AK4531_MONO2, 0, 0x1f, 1),
+AK4531_SINGLE_TLV("Mono Volume", 1, AK4531_MONO2, 0, 0x1f, 1, db_scale_input),
 AK4531_SINGLE("Mono Playback Switch", 1, AK4531_OUT_SW2, 1, 1, 0),
 AK4531_DOUBLE("Mono Capture Switch", 1, AK4531_LIN_SW2, AK4531_RIN_SW2, 1, 1, 1, 0),
 
-AK4531_SINGLE("Mic Volume", 0, AK4531_MIC, 0, 0x1f, 1),
+AK4531_SINGLE_TLV("Mic Volume", 0, AK4531_MIC, 0, 0x1f, 1, db_scale_input),
 AK4531_SINGLE("Mic Switch", 0, AK4531_MIC, 7, 1, 1),
 AK4531_SINGLE("Mic Playback Switch", 0, AK4531_OUT_SW1, 0, 1, 0),
 AK4531_DOUBLE("Mic Capture Switch", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 0, 0, 1, 0),
-- 
GitLab


From 9f6ab25063f04597e02968ae8393e8f4703c1563 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Aug 2006 12:14:25 +0200
Subject: [PATCH 0960/1063] [ALSA] Add dB scale information to cs4281 driver

Added the dB scale information to cs4281 driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/cs4281.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/pci/cs4281.c b/sound/pci/cs4281.c
index 9631456ec3de8..1990430a21c1d 100644
--- a/sound/pci/cs4281.c
+++ b/sound/pci/cs4281.c
@@ -33,6 +33,7 @@
 #include <sound/pcm.h>
 #include <sound/rawmidi.h>
 #include <sound/ac97_codec.h>
+#include <sound/tlv.h>
 #include <sound/opl3.h>
 #include <sound/initval.h>
 
@@ -1054,6 +1055,8 @@ static int snd_cs4281_put_volume(struct snd_kcontrol *kcontrol,
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_dsp, -4650, 150, 0);
+
 static struct snd_kcontrol_new snd_cs4281_fm_vol = 
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1062,6 +1065,7 @@ static struct snd_kcontrol_new snd_cs4281_fm_vol =
 	.get = snd_cs4281_get_volume,
 	.put = snd_cs4281_put_volume, 
 	.private_value = ((BA0_FMLVC << 16) | BA0_FMRVC),
+	.tlv = { .p = db_scale_dsp },
 };
 
 static struct snd_kcontrol_new snd_cs4281_pcm_vol = 
@@ -1072,6 +1076,7 @@ static struct snd_kcontrol_new snd_cs4281_pcm_vol =
 	.get = snd_cs4281_get_volume,
 	.put = snd_cs4281_put_volume, 
 	.private_value = ((BA0_PPLVC << 16) | BA0_PPRVC),
+	.tlv = { .p = db_scale_dsp },
 };
 
 static void snd_cs4281_mixer_free_ac97_bus(struct snd_ac97_bus *bus)
-- 
GitLab


From 666c70ffd1c4be795de988f26a8ab13524d4ed47 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Aug 2006 12:32:06 +0200
Subject: [PATCH 0961/1063] [ALSA] Add dB scale information to fm801 driver

Added the dB scale information to fm801 driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/fm801.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c
index f3f2b2c99723a..bdfda1997d5b2 100644
--- a/sound/pci/fm801.c
+++ b/sound/pci/fm801.c
@@ -29,6 +29,7 @@
 #include <linux/moduleparam.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
+#include <sound/tlv.h>
 #include <sound/ac97_codec.h>
 #include <sound/mpu401.h>
 #include <sound/opl3.h>
@@ -1053,6 +1054,13 @@ static int snd_fm801_put_single(struct snd_kcontrol *kcontrol,
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .info = snd_fm801_info_double, \
   .get = snd_fm801_get_double, .put = snd_fm801_put_double, \
   .private_value = reg | (shift_left << 8) | (shift_right << 12) | (mask << 16) | (invert << 24) }
+#define FM801_DOUBLE_TLV(xname, reg, shift_left, shift_right, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .info = snd_fm801_info_double, \
+  .get = snd_fm801_get_double, .put = snd_fm801_put_double, \
+  .private_value = reg | (shift_left << 8) | (shift_right << 12) | (mask << 16) | (invert << 24), \
+  .tlv = { .p = (xtlv) } }
 
 static int snd_fm801_info_double(struct snd_kcontrol *kcontrol,
 				 struct snd_ctl_elem_info *uinfo)
@@ -1149,14 +1157,19 @@ static int snd_fm801_put_mux(struct snd_kcontrol *kcontrol,
 	return snd_fm801_update_bits(chip, FM801_REC_SRC, 7, val);
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_dsp, -3450, 150, 0);
+
 #define FM801_CONTROLS ARRAY_SIZE(snd_fm801_controls)
 
 static struct snd_kcontrol_new snd_fm801_controls[] __devinitdata = {
-FM801_DOUBLE("Wave Playback Volume", FM801_PCM_VOL, 0, 8, 31, 1),
+FM801_DOUBLE_TLV("Wave Playback Volume", FM801_PCM_VOL, 0, 8, 31, 1,
+		 db_scale_dsp),
 FM801_SINGLE("Wave Playback Switch", FM801_PCM_VOL, 15, 1, 1),
-FM801_DOUBLE("I2S Playback Volume", FM801_I2S_VOL, 0, 8, 31, 1),
+FM801_DOUBLE_TLV("I2S Playback Volume", FM801_I2S_VOL, 0, 8, 31, 1,
+		 db_scale_dsp),
 FM801_SINGLE("I2S Playback Switch", FM801_I2S_VOL, 15, 1, 1),
-FM801_DOUBLE("FM Playback Volume", FM801_FM_VOL, 0, 8, 31, 1),
+FM801_DOUBLE_TLV("FM Playback Volume", FM801_FM_VOL, 0, 8, 31, 1,
+		 db_scale_dsp),
 FM801_SINGLE("FM Playback Switch", FM801_FM_VOL, 15, 1, 1),
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-- 
GitLab


From a0aef8edfc9d6d682dba557fe42599297cbc329a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Aug 2006 13:01:37 +0200
Subject: [PATCH 0962/1063] [ALSA] Add dB scale information to trident driver

Added the dB scale information to trident driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/trident/trident_main.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sound/pci/trident/trident_main.c b/sound/pci/trident/trident_main.c
index 4930cc6b054d6..ebbe12d78d8c6 100644
--- a/sound/pci/trident/trident_main.c
+++ b/sound/pci/trident/trident_main.c
@@ -40,6 +40,7 @@
 #include <sound/core.h>
 #include <sound/info.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/trident.h>
 #include <sound/asoundef.h>
 
@@ -2627,6 +2628,8 @@ static int snd_trident_vol_control_get(struct snd_kcontrol *kcontrol,
 	return 0;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_gvol, -6375, 25, 0);
+
 static int snd_trident_vol_control_put(struct snd_kcontrol *kcontrol,
 				       struct snd_ctl_elem_value *ucontrol)
 {
@@ -2653,6 +2656,7 @@ static struct snd_kcontrol_new snd_trident_vol_music_control __devinitdata =
 	.get =		snd_trident_vol_control_get,
 	.put =		snd_trident_vol_control_put,
 	.private_value = 16,
+	.tlv = { .p = db_scale_gvol },
 };
 
 static struct snd_kcontrol_new snd_trident_vol_wave_control __devinitdata =
@@ -2663,6 +2667,7 @@ static struct snd_kcontrol_new snd_trident_vol_wave_control __devinitdata =
 	.get =		snd_trident_vol_control_get,
 	.put =		snd_trident_vol_control_put,
 	.private_value = 0,
+	.tlv = { .p = db_scale_gvol },
 };
 
 /*---------------------------------------------------------------------------
@@ -2730,6 +2735,7 @@ static struct snd_kcontrol_new snd_trident_pcm_vol_control __devinitdata =
 	.info =		snd_trident_pcm_vol_control_info,
 	.get =		snd_trident_pcm_vol_control_get,
 	.put =		snd_trident_pcm_vol_control_put,
+	/* FIXME: no tlv yet */
 };
 
 /*---------------------------------------------------------------------------
@@ -2839,6 +2845,8 @@ static int snd_trident_pcm_rvol_control_put(struct snd_kcontrol *kcontrol,
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_crvol, -3175, 25, 1);
+
 static struct snd_kcontrol_new snd_trident_pcm_rvol_control __devinitdata =
 {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -2848,6 +2856,7 @@ static struct snd_kcontrol_new snd_trident_pcm_rvol_control __devinitdata =
 	.info =		snd_trident_pcm_rvol_control_info,
 	.get =		snd_trident_pcm_rvol_control_get,
 	.put =		snd_trident_pcm_rvol_control_put,
+	.tlv = { .p = db_scale_crvol },
 };
 
 /*---------------------------------------------------------------------------
@@ -2903,6 +2912,7 @@ static struct snd_kcontrol_new snd_trident_pcm_cvol_control __devinitdata =
 	.info =		snd_trident_pcm_cvol_control_info,
 	.get =		snd_trident_pcm_cvol_control_get,
 	.put =		snd_trident_pcm_cvol_control_put,
+	.tlv = { .p = db_scale_crvol },
 };
 
 static void snd_trident_notify_pcm_change1(struct snd_card *card,
-- 
GitLab


From fb567a8e4f077b7b084c0558706339c35a4fb186 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Aug 2006 13:07:19 +0200
Subject: [PATCH 0963/1063] [ALSA] Add dB scale information to dummy driver

Added the dB scale information to dummy driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/drivers/dummy.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index 73b16134a4342..42001efa9f3ec 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -29,6 +29,7 @@
 #include <linux/moduleparam.h>
 #include <sound/core.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/pcm.h>
 #include <sound/rawmidi.h>
 #include <sound/initval.h>
@@ -443,10 +444,13 @@ static int __init snd_card_dummy_pcm(struct snd_dummy *dummy, int device, int su
 }
 
 #define DUMMY_VOLUME(xname, xindex, addr) \
-{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .index = xindex, \
   .info = snd_dummy_volume_info, \
   .get = snd_dummy_volume_get, .put = snd_dummy_volume_put, \
-  .private_value = addr }
+  .private_value = addr, \
+  .tlv = { .p = db_scale_dummy } }
 
 static int snd_dummy_volume_info(struct snd_kcontrol *kcontrol,
 				 struct snd_ctl_elem_info *uinfo)
@@ -497,6 +501,8 @@ static int snd_dummy_volume_put(struct snd_kcontrol *kcontrol,
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_dummy, -4500, 30, 0);
+
 #define DUMMY_CAPSRC(xname, xindex, addr) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \
   .info = snd_dummy_capsrc_info, \
-- 
GitLab


From 0e7febf15851fb438b9518654340d1f704d202e5 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 22 Aug 2006 13:16:01 +0200
Subject: [PATCH 0964/1063] [ALSA] Add dB scale information to ad1816a driver

Added the dB scale information to ad1816a driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/isa/ad1816a/ad1816a_lib.c | 55 ++++++++++++++++++++++++++-------
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/sound/isa/ad1816a/ad1816a_lib.c b/sound/isa/ad1816a/ad1816a_lib.c
index 8fcf2c151823c..fd9b61eda0f37 100644
--- a/sound/isa/ad1816a/ad1816a_lib.c
+++ b/sound/isa/ad1816a/ad1816a_lib.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <sound/core.h>
+#include <sound/tlv.h>
 #include <sound/ad1816a.h>
 
 #include <asm/io.h>
@@ -765,6 +766,13 @@ static int snd_ad1816a_put_mux(struct snd_kcontrol *kcontrol, struct snd_ctl_ele
 	return change;
 }
 
+#define AD1816A_SINGLE_TLV(xname, reg, shift, mask, invert, xtlv)	\
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .info = snd_ad1816a_info_single, \
+  .get = snd_ad1816a_get_single, .put = snd_ad1816a_put_single, \
+  .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24), \
+  .tlv = { .p = (xtlv) } }
 #define AD1816A_SINGLE(xname, reg, shift, mask, invert) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .info = snd_ad1816a_info_single, \
   .get = snd_ad1816a_get_single, .put = snd_ad1816a_put_single, \
@@ -822,6 +830,14 @@ static int snd_ad1816a_put_single(struct snd_kcontrol *kcontrol, struct snd_ctl_
 	return change;
 }
 
+#define AD1816A_DOUBLE_TLV(xname, reg, shift_left, shift_right, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .info = snd_ad1816a_info_double,		\
+  .get = snd_ad1816a_get_double, .put = snd_ad1816a_put_double, \
+  .private_value = reg | (shift_left << 8) | (shift_right << 12) | (mask << 16) | (invert << 24), \
+  .tlv = { .p = (xtlv) } }
+
 #define AD1816A_DOUBLE(xname, reg, shift_left, shift_right, mask, invert) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .info = snd_ad1816a_info_double, \
   .get = snd_ad1816a_get_double, .put = snd_ad1816a_put_double, \
@@ -890,28 +906,44 @@ static int snd_ad1816a_put_double(struct snd_kcontrol *kcontrol, struct snd_ctl_
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_4bit, -4500, 300, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit, -4650, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_6bit, -9450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_rec_gain, 0, 150, 0);
+
 static struct snd_kcontrol_new snd_ad1816a_controls[] __devinitdata = {
 AD1816A_DOUBLE("Master Playback Switch", AD1816A_MASTER_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("Master Playback Volume", AD1816A_MASTER_ATT, 8, 0, 31, 1),
+AD1816A_DOUBLE_TLV("Master Playback Volume", AD1816A_MASTER_ATT, 8, 0, 31, 1,
+		   db_scale_5bit),
 AD1816A_DOUBLE("PCM Playback Switch", AD1816A_VOICE_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("PCM Playback Volume", AD1816A_VOICE_ATT, 8, 0, 63, 1),
+AD1816A_DOUBLE_TLV("PCM Playback Volume", AD1816A_VOICE_ATT, 8, 0, 63, 1,
+		   db_scale_6bit),
 AD1816A_DOUBLE("Line Playback Switch", AD1816A_LINE_GAIN_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("Line Playback Volume", AD1816A_LINE_GAIN_ATT, 8, 0, 31, 1),
+AD1816A_DOUBLE_TLV("Line Playback Volume", AD1816A_LINE_GAIN_ATT, 8, 0, 31, 1,
+		   db_scale_5bit_12db_max),
 AD1816A_DOUBLE("CD Playback Switch", AD1816A_CD_GAIN_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("CD Playback Volume", AD1816A_CD_GAIN_ATT, 8, 0, 31, 1),
+AD1816A_DOUBLE_TLV("CD Playback Volume", AD1816A_CD_GAIN_ATT, 8, 0, 31, 1,
+		   db_scale_5bit_12db_max),
 AD1816A_DOUBLE("Synth Playback Switch", AD1816A_SYNTH_GAIN_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("Synth Playback Volume", AD1816A_SYNTH_GAIN_ATT, 8, 0, 31, 1),
+AD1816A_DOUBLE_TLV("Synth Playback Volume", AD1816A_SYNTH_GAIN_ATT, 8, 0, 31, 1,
+		   db_scale_5bit_12db_max),
 AD1816A_DOUBLE("FM Playback Switch", AD1816A_FM_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("FM Playback Volume", AD1816A_FM_ATT, 8, 0, 63, 1),
+AD1816A_DOUBLE_TLV("FM Playback Volume", AD1816A_FM_ATT, 8, 0, 63, 1,
+		   db_scale_6bit),
 AD1816A_SINGLE("Mic Playback Switch", AD1816A_MIC_GAIN_ATT, 15, 1, 1),
-AD1816A_SINGLE("Mic Playback Volume", AD1816A_MIC_GAIN_ATT, 8, 31, 1),
+AD1816A_SINGLE_TLV("Mic Playback Volume", AD1816A_MIC_GAIN_ATT, 8, 31, 1,
+		   db_scale_5bit_12db_max),
 AD1816A_SINGLE("Mic Boost", AD1816A_MIC_GAIN_ATT, 14, 1, 0),
 AD1816A_DOUBLE("Video Playback Switch", AD1816A_VID_GAIN_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("Video Playback Volume", AD1816A_VID_GAIN_ATT, 8, 0, 31, 1),
+AD1816A_DOUBLE_TLV("Video Playback Volume", AD1816A_VID_GAIN_ATT, 8, 0, 31, 1,
+		   db_scale_5bit_12db_max),
 AD1816A_SINGLE("Phone Capture Switch", AD1816A_PHONE_IN_GAIN_ATT, 15, 1, 1),
-AD1816A_SINGLE("Phone Capture Volume", AD1816A_PHONE_IN_GAIN_ATT, 0, 15, 1),
+AD1816A_SINGLE_TLV("Phone Capture Volume", AD1816A_PHONE_IN_GAIN_ATT, 0, 15, 1,
+		   db_scale_4bit),
 AD1816A_SINGLE("Phone Playback Switch", AD1816A_PHONE_OUT_ATT, 7, 1, 1),
-AD1816A_SINGLE("Phone Playback Volume", AD1816A_PHONE_OUT_ATT, 0, 31, 1),
+AD1816A_SINGLE_TLV("Phone Playback Volume", AD1816A_PHONE_OUT_ATT, 0, 31, 1,
+		   db_scale_5bit),
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "Capture Source",
@@ -920,7 +952,8 @@ AD1816A_SINGLE("Phone Playback Volume", AD1816A_PHONE_OUT_ATT, 0, 31, 1),
 	.put = snd_ad1816a_put_mux,
 },
 AD1816A_DOUBLE("Capture Switch", AD1816A_ADC_PGA, 15, 7, 1, 1),
-AD1816A_DOUBLE("Capture Volume", AD1816A_ADC_PGA, 8, 0, 15, 0),
+AD1816A_DOUBLE_TLV("Capture Volume", AD1816A_ADC_PGA, 8, 0, 15, 0,
+		   db_scale_rec_gain),
 AD1816A_SINGLE("3D Control - Switch", AD1816A_3D_PHAT_CTRL, 15, 1, 1),
 AD1816A_SINGLE("3D Control - Level", AD1816A_3D_PHAT_CTRL, 0, 15, 0),
 };
-- 
GitLab


From eac06a10d2b814dfacc36a8fff35ef07bf4eec8e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 22 Aug 2006 13:16:25 +0200
Subject: [PATCH 0965/1063] [ALSA] Add dB scale information to ad1848 driver

Added the dB scale information to ad1848 driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/ad1848.h        | 22 +++++++++++-----
 sound/isa/ad1848/ad1848_lib.c | 49 ++++++++++++++++++++++++-----------
 2 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/include/sound/ad1848.h b/include/sound/ad1848.h
index 57af1fe7b3099..c8de6f83338f6 100644
--- a/include/sound/ad1848.h
+++ b/include/sound/ad1848.h
@@ -179,14 +179,13 @@ enum { AD1848_MIX_SINGLE, AD1848_MIX_DOUBLE, AD1848_MIX_CAPTURE };
 #define AD1848_MIXVAL_DOUBLE(left_reg, right_reg, shift_left, shift_right, mask, invert) \
 	((left_reg) | ((right_reg) << 8) | ((shift_left) << 16) | ((shift_right) << 19) | ((mask) << 24) | ((invert) << 22))
 
-int snd_ad1848_add_ctl(struct snd_ad1848 *chip, const char *name, int index, int type, unsigned long value);
-
 /* for ease of use */
 struct ad1848_mix_elem {
 	const char *name;
 	int index;
 	int type;
 	unsigned long private_value;
+	unsigned int *tlv;
 };
 
 #define AD1848_SINGLE(xname, xindex, reg, shift, mask, invert) \
@@ -195,15 +194,26 @@ struct ad1848_mix_elem {
   .type = AD1848_MIX_SINGLE, \
   .private_value = AD1848_MIXVAL_SINGLE(reg, shift, mask, invert) }
 
+#define AD1848_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv) \
+{ .name = xname, \
+  .index = xindex, \
+  .type = AD1848_MIX_SINGLE, \
+  .private_value = AD1848_MIXVAL_SINGLE(reg, shift, mask, invert), \
+  .tlv = xtlv }
+
 #define AD1848_DOUBLE(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert) \
 { .name = xname, \
   .index = xindex, \
   .type = AD1848_MIX_DOUBLE, \
   .private_value = AD1848_MIXVAL_DOUBLE(left_reg, right_reg, shift_left, shift_right, mask, invert) }
 
-static inline int snd_ad1848_add_ctl_elem(struct snd_ad1848 *chip, const struct ad1848_mix_elem *c)
-{
-	return snd_ad1848_add_ctl(chip, c->name, c->index, c->type, c->private_value);
-}
+#define AD1848_DOUBLE_TLV(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert, xtlv) \
+{ .name = xname, \
+  .index = xindex, \
+  .type = AD1848_MIX_DOUBLE, \
+  .private_value = AD1848_MIXVAL_DOUBLE(left_reg, right_reg, shift_left, shift_right, mask, invert), \
+  .tlv = xtlv }
+
+int snd_ad1848_add_ctl_elem(struct snd_ad1848 *chip, const struct ad1848_mix_elem *c);
 
 #endif /* __SOUND_AD1848_H */
diff --git a/sound/isa/ad1848/ad1848_lib.c b/sound/isa/ad1848/ad1848_lib.c
index e711f87d5fd1a..a6fbd5d1d62f2 100644
--- a/sound/isa/ad1848/ad1848_lib.c
+++ b/sound/isa/ad1848/ad1848_lib.c
@@ -29,6 +29,7 @@
 #include <sound/core.h>
 #include <sound/ad1848.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/pcm_params.h>
 
 #include <asm/io.h>
@@ -118,6 +119,8 @@ void snd_ad1848_out(struct snd_ad1848 *chip,
 #endif
 }
 
+EXPORT_SYMBOL(snd_ad1848_out);
+
 static void snd_ad1848_dout(struct snd_ad1848 *chip,
 			    unsigned char reg, unsigned char value)
 {
@@ -941,6 +944,8 @@ int snd_ad1848_create(struct snd_card *card,
 	return 0;
 }
 
+EXPORT_SYMBOL(snd_ad1848_create);
+
 static struct snd_pcm_ops snd_ad1848_playback_ops = {
 	.open =		snd_ad1848_playback_open,
 	.close =	snd_ad1848_playback_close,
@@ -988,12 +993,16 @@ int snd_ad1848_pcm(struct snd_ad1848 *chip, int device, struct snd_pcm **rpcm)
 	return 0;
 }
 
+EXPORT_SYMBOL(snd_ad1848_pcm);
+
 const struct snd_pcm_ops *snd_ad1848_get_pcm_ops(int direction)
 {
 	return direction == SNDRV_PCM_STREAM_PLAYBACK ?
 		&snd_ad1848_playback_ops : &snd_ad1848_capture_ops;
 }
 
+EXPORT_SYMBOL(snd_ad1848_get_pcm_ops);
+
 /*
  *  MIXER part
  */
@@ -1171,7 +1180,8 @@ static int snd_ad1848_put_double(struct snd_kcontrol *kcontrol, struct snd_ctl_e
 
 /*
  */
-int snd_ad1848_add_ctl(struct snd_ad1848 *chip, const char *name, int index, int type, unsigned long value)
+int snd_ad1848_add_ctl_elem(struct snd_ad1848 *chip,
+			    const struct ad1848_mix_elem *c)
 {
 	static struct snd_kcontrol_new newctls[] = {
 		[AD1848_MIX_SINGLE] = {
@@ -1196,32 +1206,46 @@ int snd_ad1848_add_ctl(struct snd_ad1848 *chip, const char *name, int index, int
 	struct snd_kcontrol *ctl;
 	int err;
 
-	ctl = snd_ctl_new1(&newctls[type], chip);
+	ctl = snd_ctl_new1(&newctls[c->type], chip);
 	if (! ctl)
 		return -ENOMEM;
-	strlcpy(ctl->id.name, name, sizeof(ctl->id.name));
-	ctl->id.index = index;
-	ctl->private_value = value;
+	strlcpy(ctl->id.name, c->name, sizeof(ctl->id.name));
+	ctl->id.index = c->index;
+	ctl->private_value = c->private_value;
+	if (c->tlv) {
+		ctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+		ctl->tlv.p = c->tlv;
+	}
 	if ((err = snd_ctl_add(chip->card, ctl)) < 0)
 		return err;
 	return 0;
 }
 
+EXPORT_SYMBOL(snd_ad1848_add_ctl_elem);
+
+static DECLARE_TLV_DB_SCALE(db_scale_6bit, -9450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_rec_gain, 0, 150, 0);
 
 static struct ad1848_mix_elem snd_ad1848_controls[] = {
 AD1848_DOUBLE("PCM Playback Switch", 0, AD1848_LEFT_OUTPUT, AD1848_RIGHT_OUTPUT, 7, 7, 1, 1),
-AD1848_DOUBLE("PCM Playback Volume", 0, AD1848_LEFT_OUTPUT, AD1848_RIGHT_OUTPUT, 0, 0, 63, 1),
+AD1848_DOUBLE_TLV("PCM Playback Volume", 0, AD1848_LEFT_OUTPUT, AD1848_RIGHT_OUTPUT, 0, 0, 63, 1,
+		  db_scale_6bit),
 AD1848_DOUBLE("Aux Playback Switch", 0, AD1848_AUX1_LEFT_INPUT, AD1848_AUX1_RIGHT_INPUT, 7, 7, 1, 1),
-AD1848_DOUBLE("Aux Playback Volume", 0, AD1848_AUX1_LEFT_INPUT, AD1848_AUX1_RIGHT_INPUT, 0, 0, 31, 1),
+AD1848_DOUBLE_TLV("Aux Playback Volume", 0, AD1848_AUX1_LEFT_INPUT, AD1848_AUX1_RIGHT_INPUT, 0, 0, 31, 1,
+		  db_scale_5bit_12db_max),
 AD1848_DOUBLE("Aux Playback Switch", 1, AD1848_AUX2_LEFT_INPUT, AD1848_AUX2_RIGHT_INPUT, 7, 7, 1, 1),
-AD1848_DOUBLE("Aux Playback Volume", 1, AD1848_AUX2_LEFT_INPUT, AD1848_AUX2_RIGHT_INPUT, 0, 0, 31, 1),
-AD1848_DOUBLE("Capture Volume", 0, AD1848_LEFT_INPUT, AD1848_RIGHT_INPUT, 0, 0, 15, 0),
+AD1848_DOUBLE_TLV("Aux Playback Volume", 1, AD1848_AUX2_LEFT_INPUT, AD1848_AUX2_RIGHT_INPUT, 0, 0, 31, 1,
+		  db_scale_5bit_12db_max),
+AD1848_DOUBLE_TLV("Capture Volume", 0, AD1848_LEFT_INPUT, AD1848_RIGHT_INPUT, 0, 0, 15, 0,
+		  db_scale_rec_gain),
 {
 	.name = "Capture Source",
 	.type = AD1848_MIX_CAPTURE,
 },
 AD1848_SINGLE("Loopback Capture Switch", 0, AD1848_LOOPBACK, 0, 1, 0),
-AD1848_SINGLE("Loopback Capture Volume", 0, AD1848_LOOPBACK, 1, 63, 0)
+AD1848_SINGLE_TLV("Loopback Capture Volume", 0, AD1848_LOOPBACK, 1, 63, 0,
+		  db_scale_6bit),
 };
                                         
 int snd_ad1848_mixer(struct snd_ad1848 *chip)
@@ -1245,12 +1269,7 @@ int snd_ad1848_mixer(struct snd_ad1848 *chip)
 	return 0;
 }
 
-EXPORT_SYMBOL(snd_ad1848_out);
-EXPORT_SYMBOL(snd_ad1848_create);
-EXPORT_SYMBOL(snd_ad1848_pcm);
-EXPORT_SYMBOL(snd_ad1848_get_pcm_ops);
 EXPORT_SYMBOL(snd_ad1848_mixer);
-EXPORT_SYMBOL(snd_ad1848_add_ctl);
 
 /*
  *  INIT part
-- 
GitLab


From a1c7a7d890634eaec106e5fb3a7d9c92b8f85b0d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 22 Aug 2006 13:16:39 +0200
Subject: [PATCH 0966/1063] [ALSA] Add dB scale information to opl3sa2 driver

Added the dB scale information to opl3sa2 driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/isa/opl3sa2.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c
index 4031b61b797fe..da92bf6c392b8 100644
--- a/sound/isa/opl3sa2.c
+++ b/sound/isa/opl3sa2.c
@@ -33,6 +33,7 @@
 #include <sound/mpu401.h>
 #include <sound/opl3.h>
 #include <sound/initval.h>
+#include <sound/tlv.h>
 
 #include <asm/io.h>
 
@@ -337,6 +338,14 @@ static irqreturn_t snd_opl3sa2_interrupt(int irq, void *dev_id, struct pt_regs *
   .info = snd_opl3sa2_info_single, \
   .get = snd_opl3sa2_get_single, .put = snd_opl3sa2_put_single, \
   .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24) }
+#define OPL3SA2_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .index = xindex, \
+  .info = snd_opl3sa2_info_single, \
+  .get = snd_opl3sa2_get_single, .put = snd_opl3sa2_put_single, \
+  .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24), \
+  .tlv = { .p = (xtlv) } }
 
 static int snd_opl3sa2_info_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
@@ -395,6 +404,14 @@ static int snd_opl3sa2_put_single(struct snd_kcontrol *kcontrol, struct snd_ctl_
   .info = snd_opl3sa2_info_double, \
   .get = snd_opl3sa2_get_double, .put = snd_opl3sa2_put_double, \
   .private_value = left_reg | (right_reg << 8) | (shift_left << 16) | (shift_right << 19) | (mask << 24) | (invert << 22) }
+#define OPL3SA2_DOUBLE_TLV(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .index = xindex, \
+  .info = snd_opl3sa2_info_double, \
+  .get = snd_opl3sa2_get_double, .put = snd_opl3sa2_put_double, \
+  .private_value = left_reg | (right_reg << 8) | (shift_left << 16) | (shift_right << 19) | (mask << 24) | (invert << 22), \
+  .tlv = { .p = (xtlv) } }
 
 static int snd_opl3sa2_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
@@ -469,11 +486,16 @@ static int snd_opl3sa2_put_double(struct snd_kcontrol *kcontrol, struct snd_ctl_
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_master, -3000, 200, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0);
+
 static struct snd_kcontrol_new snd_opl3sa2_controls[] = {
 OPL3SA2_DOUBLE("Master Playback Switch", 0, 0x07, 0x08, 7, 7, 1, 1),
-OPL3SA2_DOUBLE("Master Playback Volume", 0, 0x07, 0x08, 0, 0, 15, 1),
+OPL3SA2_DOUBLE_TLV("Master Playback Volume", 0, 0x07, 0x08, 0, 0, 15, 1,
+		   db_scale_master),
 OPL3SA2_SINGLE("Mic Playback Switch", 0, 0x09, 7, 1, 1),
-OPL3SA2_SINGLE("Mic Playback Volume", 0, 0x09, 0, 31, 1)
+OPL3SA2_SINGLE_TLV("Mic Playback Volume", 0, 0x09, 0, 31, 1,
+		   db_scale_5bit_12db_max),
 };
 
 static struct snd_kcontrol_new snd_opl3sa2_tone_controls[] = {
-- 
GitLab


From bab282b912baf372d8f705357946ef691b621899 Mon Sep 17 00:00:00 2001
From: Vladimir Avdonin <vldmrrr@yahoo.com>
Date: Tue, 22 Aug 2006 13:31:58 +0200
Subject: [PATCH 0967/1063] [ALSA] hda-codec - Fix for Acer laptops with ALC883
 codec

Patch enables the internal speaker on acer laptops with ALC883.

Signed-off-by: Vladimir Avdonin <vldmrrr@yahoo.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 .../sound/alsa/ALSA-Configuration.txt         |  1 +
 sound/pci/hda/patch_realtek.c                 | 21 +++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index d7e95f1445693..504ebbceafbc1 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -820,6 +820,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 	  3stack-6ch    3-jack 6-channel
 	  3stack-6ch-dig 3-jack 6-channel with SPDIF I/O
 	  6stack-dig-demo  6-jack digital for Intel demo board
+	  acer		Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc)
 	  auto		auto-config reading BIOS (default)
 
 	ALC861/660
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 53aa57f5a1a11..65903812b307a 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -111,6 +111,7 @@ enum {
 	ALC883_3ST_6ch,
 	ALC883_6ST_DIG,
 	ALC888_DEMO_BOARD,
+	ALC883_ACER,
 	ALC883_AUTO,
 	ALC883_MODEL_LAST,
 };
@@ -5069,6 +5070,9 @@ static struct hda_board_config alc883_cfg_tbl[] = {
 	{ .pci_subvendor = 0x105b, .pci_subdevice = 0x6668,
 	  .config = ALC883_6ST_DIG }, /* Foxconn */
 	{ .modelname = "6stack-dig-demo", .config = ALC888_DEMO_BOARD },
+	{ .modelname = "acer", .config = ALC883_ACER },
+	{ .pci_subvendor = 0x1025, .pci_subdevice = 0/*0x0102*/,
+	  .config = ALC883_ACER },
 	{ .modelname = "auto", .config = ALC883_AUTO },
 	{}
 };
@@ -5139,6 +5143,23 @@ static struct alc_config_preset alc883_presets[] = {
 		.channel_mode = alc883_sixstack_modes,
 		.input_mux = &alc883_capture_source,
 	},
+	[ALC883_ACER] = {
+		.mixers = { alc883_base_mixer,
+			    alc883_chmode_mixer },
+		/* On TravelMate laptops, GPIO 0 enables the internal speaker
+		 * and the headphone jack.  Turn this on and rely on the
+		 * standard mute methods whenever the user wants to turn
+		 * these outputs off.
+		 */
+		.init_verbs = { alc883_init_verbs, alc880_gpio1_init_verbs },
+		.num_dacs = ARRAY_SIZE(alc883_dac_nids),
+		.dac_nids = alc883_dac_nids,
+		.num_adc_nids = ARRAY_SIZE(alc883_adc_nids),
+		.adc_nids = alc883_adc_nids,
+		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
+		.channel_mode = alc883_3ST_2ch_modes,
+		.input_mux = &alc883_capture_source,
+	},
 };
 
 
-- 
GitLab


From 7b89190cf6ecd5075c272b4ec12f65a4ce45a762 Mon Sep 17 00:00:00 2001
From: Magnus Sandin <magnus@sandin.cx>
Date: Tue, 22 Aug 2006 13:33:12 +0200
Subject: [PATCH 0968/1063] [ALSA] ac97 - Enable S/PDIF on ASUS P5P800-VM mobo

The attached patch will force building the S/PDIF controls on the PCU
SSID for Asus P5P800-VM motherboard, even if the  AC97_EI_SPDIF bit is
not set.

Signed-off-by: Magnus Sandin <magnus@sandin.cx>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ac97/ac97_codec.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
index c47f43dbd664e..a79e91850ba36 100644
--- a/sound/pci/ac97/ac97_codec.c
+++ b/sound/pci/ac97/ac97_codec.c
@@ -1573,6 +1573,12 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97)
 	}
 
 	/* build S/PDIF controls */
+
+	/* Hack for ASUS P5P800-VM, which does not indicate S/PDIF capability */
+	if (ac97->subsystem_vendor == 0x1043 &&
+	    ac97->subsystem_device == 0x810f)
+		ac97->ext_id |= AC97_EI_SPDIF;
+
 	if ((ac97->ext_id & AC97_EI_SPDIF) && !(ac97->scaps & AC97_SCAP_NO_SPDIF)) {
 		if (ac97->build_ops->build_spdif) {
 			if ((err = ac97->build_ops->build_spdif(ac97)) < 0)
-- 
GitLab


From 6d8590650eb81d2c869c7adf4b469071cec11eee Mon Sep 17 00:00:00 2001
From: Guillaume Munch <diabo@free.fr>
Date: Tue, 22 Aug 2006 17:15:47 +0200
Subject: [PATCH 0969/1063] [ALSA] hda-codec - Support for SigmaTel 9872

- AR11M and AR11S uses the same chip hence we claim to support the AR Series.
- Added commentary about STAC9225s which shares the same id as CXD9872RD.
- Added entry for 7662 but won't work automatically until pci_subdevice
  is known.
- 'vaio' model now corresponds to CXD9872RD_VAIO for backward compat.
- Replaced STAC766x_VAIO with CXD9872RD_VAIO, STAC9872AK_VAIO, STAC9872K_VAIO
  and CXD9872AKD_VAIO
- Added 'vaio-ar' model for potential future modifications.

Signed-off-by: Guillaume Munch <diabo@free.fr>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 .../sound/alsa/ALSA-Configuration.txt         |   5 +-
 sound/pci/hda/patch_sigmatel.c                | 109 +++++++++++++++---
 2 files changed, 97 insertions(+), 17 deletions(-)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 504ebbceafbc1..48d3bdf2a7cd8 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -859,8 +859,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 	  laptop-dig	ditto with SPDIF
 	  auto		auto-config reading BIOS (default)
 
-	STAC7664/7661(?)
-	  vaio		Setup for VAIO FE550G/SZ110/AR11B
+	STAC9872
+	  vaio		Setup for VAIO FE550G/SZ110
+	  vaio-ar Setup for VAIO AR
 
     If the default configuration doesn't work and one of the above
     matches with your device, report it together with the PCI
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 73ca566e9eb75..139d73e18a3c9 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1563,7 +1563,7 @@ static int patch_stac9205(struct hda_codec *codec)
 }
 
 /*
- * STAC 7661(?) and 7664 hack
+ * STAC9872 hack
  */
 
 /* static config for Sony VAIO FE550G and Sony VAIO AR */
@@ -1597,6 +1597,23 @@ static struct hda_verb vaio_init[] = {
 	{}
 };
 
+static struct hda_verb vaio_ar_init[] = {
+	{0x0a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP }, /* HP <- 0x2 */
+	{0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT }, /* Speaker <- 0x5 */
+	{0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 }, /* Mic? (<- 0x2) */
+	{0x0e, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN }, /* CD */
+/*	{0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },*/ /* Optical Out */
+	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 }, /* Mic? */
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x2}, /* mic-sel: 0a,0d,14,02 */
+	{0x02, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, /* HP */
+	{0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, /* Speaker */
+/*	{0x10, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},*/ /* Optical Out */
+	{0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)}, /* capture sw/vol -> 0x8 */
+	{0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, /* CD-in -> 0x6 */
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Mic-in -> 0x9 */
+	{}
+};
+
 /* bind volumes of both NID 0x02 and 0x05 */
 static int vaio_master_vol_put(struct snd_kcontrol *kcontrol,
 			       struct snd_ctl_elem_value *ucontrol)
@@ -1667,7 +1684,40 @@ static struct snd_kcontrol_new vaio_mixer[] = {
 	{}
 };
 
-static struct hda_codec_ops stac766x_patch_ops = {
+static struct snd_kcontrol_new vaio_ar_mixer[] = {
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Master Playback Volume",
+		.info = snd_hda_mixer_amp_volume_info,
+		.get = snd_hda_mixer_amp_volume_get,
+		.put = vaio_master_vol_put,
+		.private_value = HDA_COMPOSE_AMP_VAL(0x02, 3, 0, HDA_OUTPUT),
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Master Playback Switch",
+		.info = snd_hda_mixer_amp_switch_info,
+		.get = snd_hda_mixer_amp_switch_get,
+		.put = vaio_master_sw_put,
+		.private_value = HDA_COMPOSE_AMP_VAL(0x02, 3, 0, HDA_OUTPUT),
+	},
+	/* HDA_CODEC_VOLUME("CD Capture Volume", 0x07, 0, HDA_INPUT), */
+	HDA_CODEC_VOLUME("Capture Volume", 0x09, 0, HDA_INPUT),
+	HDA_CODEC_MUTE("Capture Switch", 0x09, 0, HDA_INPUT),
+	/*HDA_CODEC_MUTE("Optical Out Switch", 0x10, 0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Optical Out Volume", 0x10, 0, HDA_OUTPUT),*/
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Capture Source",
+		.count = 1,
+		.info = stac92xx_mux_enum_info,
+		.get = stac92xx_mux_enum_get,
+		.put = stac92xx_mux_enum_put,
+	},
+	{}
+};
+
+static struct hda_codec_ops stac9872_patch_ops = {
 	.build_controls = stac92xx_build_controls,
 	.build_pcms = stac92xx_build_pcms,
 	.init = stac92xx_init,
@@ -1677,25 +1727,34 @@ static struct hda_codec_ops stac766x_patch_ops = {
 #endif
 };
 
-enum { STAC766x_VAIO };
-
-static struct hda_board_config stac766x_cfg_tbl[] = {
-	{ .modelname = "vaio", .config = STAC766x_VAIO },
+enum { /* FE and SZ series. id=0x83847661 and subsys=0x104D0700 or 104D1000. */
+       CXD9872RD_VAIO,
+       /* Unknown. id=0x83847662 and subsys=0x104D1200 or 104D1000. */
+       STAC9872AK_VAIO, 
+       /* Unknown. id=0x83847661 and subsys=0x104D1200. */
+       STAC9872K_VAIO,
+       /* AR Series. id=0x83847664 and subsys=104D1300 */
+       CXD9872AKD_VAIO 
+     };
+
+static struct hda_board_config stac9872_cfg_tbl[] = {
+	{ .modelname = "vaio", .config = CXD9872RD_VAIO },
+	{ .modelname = "vaio-ar", .config = CXD9872AKD_VAIO },
 	{ .pci_subvendor = 0x104d, .pci_subdevice = 0x81e6,
-	  .config = STAC766x_VAIO },
+	  .config = CXD9872RD_VAIO },
 	{ .pci_subvendor = 0x104d, .pci_subdevice = 0x81ef,
-	  .config = STAC766x_VAIO },
+	  .config = CXD9872RD_VAIO },
 	{ .pci_subvendor = 0x104d, .pci_subdevice = 0x81fd,
-	  .config = STAC766x_VAIO },
+	  .config = CXD9872AKD_VAIO },
 	{}
 };
 
-static int patch_stac766x(struct hda_codec *codec)
+static int patch_stac9872(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec;
 	int board_config;
 
-	board_config = snd_hda_check_board_config(codec, stac766x_cfg_tbl);
+	board_config = snd_hda_check_board_config(codec, stac9872_cfg_tbl);
 	if (board_config < 0)
 		/* unknown config, let generic-parser do its job... */
 		return snd_hda_parse_generic_codec(codec);
@@ -1706,7 +1765,9 @@ static int patch_stac766x(struct hda_codec *codec)
 
 	codec->spec = spec;
 	switch (board_config) {
-	case STAC766x_VAIO:
+	case CXD9872RD_VAIO:
+	case STAC9872AK_VAIO:
+	case STAC9872K_VAIO:
 		spec->mixer = vaio_mixer;
 		spec->init = vaio_init;
 		spec->multiout.max_channels = 2;
@@ -1718,9 +1779,22 @@ static int patch_stac766x(struct hda_codec *codec)
 		spec->input_mux = &vaio_mux;
 		spec->mux_nids = vaio_mux_nids;
 		break;
+	
+	case CXD9872AKD_VAIO:
+		spec->mixer = vaio_ar_mixer;
+		spec->init = vaio_ar_init;
+		spec->multiout.max_channels = 2;
+		spec->multiout.num_dacs = ARRAY_SIZE(vaio_dacs);
+		spec->multiout.dac_nids = vaio_dacs;
+		spec->multiout.hp_nid = VAIO_HP_DAC;
+		spec->num_adcs = ARRAY_SIZE(vaio_adcs);
+		spec->adc_nids = vaio_adcs;
+		spec->input_mux = &vaio_mux;
+		spec->mux_nids = vaio_mux_nids;
+		break;
 	}
 
-	codec->patch_ops = stac766x_patch_ops;
+	codec->patch_ops = stac9872_patch_ops;
 	return 0;
 }
 
@@ -1752,7 +1826,13 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = {
  	{ .id = 0x83847627, .name = "STAC9271D", .patch = patch_stac927x },
  	{ .id = 0x83847628, .name = "STAC9274X5NH", .patch = patch_stac927x },
  	{ .id = 0x83847629, .name = "STAC9274D5NH", .patch = patch_stac927x },
- 	{ .id = 0x83847661, .name = "STAC7661", .patch = patch_stac766x },
+ 	/* The following does not take into account .id=0x83847661 when subsys =
+ 	 * 104D0C00 which is STAC9225s. Because of this, some SZ Notebooks are
+ 	 * currently not fully supported.
+ 	 */
+ 	{ .id = 0x83847661, .name = "CXD9872RD/K", .patch = patch_stac9872 },
+ 	{ .id = 0x83847662, .name = "STAC9872AK", .patch = patch_stac9872 },
+ 	{ .id = 0x83847664, .name = "CXD9872AKD", .patch = patch_stac9872 },
  	{ .id = 0x838476a0, .name = "STAC9205", .patch = patch_stac9205 },
  	{ .id = 0x838476a1, .name = "STAC9205D", .patch = patch_stac9205 },
  	{ .id = 0x838476a2, .name = "STAC9204", .patch = patch_stac9205 },
@@ -1761,6 +1841,5 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = {
  	{ .id = 0x838476a5, .name = "STAC9255D", .patch = patch_stac9205 },
  	{ .id = 0x838476a6, .name = "STAC9254", .patch = patch_stac9205 },
  	{ .id = 0x838476a7, .name = "STAC9254D", .patch = patch_stac9205 },
- 	{ .id = 0x83847664, .name = "STAC7664", .patch = patch_stac766x },
 	{} /* terminator */
 };
-- 
GitLab


From 11b44bbde52b6c50ed8c9ba579d7ee9ff5b48cd8 Mon Sep 17 00:00:00 2001
From: Richard Fish <bigfish@asmallpond.org>
Date: Wed, 23 Aug 2006 18:31:34 +0200
Subject: [PATCH 0970/1063] [ALSA] hda-codec - restore HDA sigmatel pin configs
 on resume

This patch restores the Intel HDA Sigmatel codec pin configuration on
resume.  Most of it is dedicated to saving the BIOS pin configuration
if necessary, so that even unrecognized chips can be resumed correctly.

Signed-off-by: Richard Fish <bigfish@asmallpond.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_sigmatel.c | 100 ++++++++++++++++++++++++++-------
 1 file changed, 79 insertions(+), 21 deletions(-)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 139d73e18a3c9..239ae3fad0540 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -73,6 +73,7 @@ struct sigmatel_spec {
 	hda_nid_t *pin_nids;
 	unsigned int num_pins;
 	unsigned int *pin_configs;
+	unsigned int *bios_pin_configs;
 
 	/* codec specific stuff */
 	struct hda_verb *init;
@@ -584,13 +585,42 @@ static struct hda_board_config stac9205_cfg_tbl[] = {
 	{} /* terminator */
 };
 
+static int stac92xx_save_bios_config_regs(struct hda_codec *codec)
+{
+	int i;
+	struct sigmatel_spec *spec = codec->spec;
+	
+	if (! spec->bios_pin_configs) {
+		spec->bios_pin_configs = kcalloc(spec->num_pins,
+		                                 sizeof(*spec->bios_pin_configs), GFP_KERNEL);
+		if (! spec->bios_pin_configs)
+			return -ENOMEM;
+	}
+	
+	for (i = 0; i < spec->num_pins; i++) {
+		hda_nid_t nid = spec->pin_nids[i];
+		unsigned int pin_cfg;
+		
+		pin_cfg = snd_hda_codec_read(codec, nid, 0, 
+			AC_VERB_GET_CONFIG_DEFAULT, 0x00);	
+		snd_printdd(KERN_INFO "hda_codec: pin nid %2.2x bios pin config %8.8x\n",
+					nid, pin_cfg);
+		spec->bios_pin_configs[i] = pin_cfg;
+	}
+	
+	return 0;
+}
+
 static void stac92xx_set_config_regs(struct hda_codec *codec)
 {
 	int i;
 	struct sigmatel_spec *spec = codec->spec;
 	unsigned int pin_cfg;
 
-	for (i=0; i < spec->num_pins; i++) {
+	if (! spec->pin_nids || ! spec->pin_configs)
+		return;
+
+	for (i = 0; i < spec->num_pins; i++) {
 		snd_hda_codec_write(codec, spec->pin_nids[i], 0,
 				    AC_VERB_SET_CONFIG_DEFAULT_BYTES_0,
 				    spec->pin_configs[i] & 0x000000ff);
@@ -1302,6 +1332,9 @@ static void stac92xx_free(struct hda_codec *codec)
 		kfree(spec->kctl_alloc);
 	}
 
+	if (spec->bios_pin_configs)
+		kfree(spec->bios_pin_configs);
+
 	kfree(spec);
 }
 
@@ -1359,6 +1392,7 @@ static int stac92xx_resume(struct hda_codec *codec)
 	int i;
 
 	stac92xx_init(codec);
+	stac92xx_set_config_regs(codec);
 	for (i = 0; i < spec->num_mixers; i++)
 		snd_hda_resume_ctls(codec, spec->mixers[i]);
 	if (spec->multiout.dig_out_nid)
@@ -1391,12 +1425,18 @@ static int patch_stac9200(struct hda_codec *codec)
 		return -ENOMEM;
 
 	codec->spec = spec;
+	spec->num_pins = 8;
+	spec->pin_nids = stac9200_pin_nids;
 	spec->board_config = snd_hda_check_board_config(codec, stac9200_cfg_tbl);
-	if (spec->board_config < 0)
-                snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9200, using BIOS defaults\n");
-	else {
-		spec->num_pins = 8;
-		spec->pin_nids = stac9200_pin_nids;
+	if (spec->board_config < 0) {
+		snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9200, using BIOS defaults\n");
+		err = stac92xx_save_bios_config_regs(codec);
+		if (err < 0) {
+			stac92xx_free(codec);
+			return err;
+		}
+		spec->pin_configs = spec->bios_pin_configs;
+	} else {
 		spec->pin_configs = stac9200_brd_tbl[spec->board_config];
 		stac92xx_set_config_regs(codec);
 	}
@@ -1432,13 +1472,19 @@ static int patch_stac922x(struct hda_codec *codec)
 		return -ENOMEM;
 
 	codec->spec = spec;
+	spec->num_pins = 10;
+	spec->pin_nids = stac922x_pin_nids;
 	spec->board_config = snd_hda_check_board_config(codec, stac922x_cfg_tbl);
-	if (spec->board_config < 0)
-                snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC922x, "
-			    "using BIOS defaults\n");
-	else if (stac922x_brd_tbl[spec->board_config] != NULL) {
-		spec->num_pins = 10;
-		spec->pin_nids = stac922x_pin_nids;
+	if (spec->board_config < 0) {
+		snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC922x, "
+			"using BIOS defaults\n");
+		err = stac92xx_save_bios_config_regs(codec);
+		if (err < 0) {
+			stac92xx_free(codec);
+			return err;
+		}
+		spec->pin_configs = spec->bios_pin_configs;
+	} else if (stac922x_brd_tbl[spec->board_config] != NULL) {
 		spec->pin_configs = stac922x_brd_tbl[spec->board_config];
 		stac92xx_set_config_regs(codec);
 	}
@@ -1476,12 +1522,18 @@ static int patch_stac927x(struct hda_codec *codec)
 		return -ENOMEM;
 
 	codec->spec = spec;
+	spec->num_pins = 14;
+	spec->pin_nids = stac927x_pin_nids;
 	spec->board_config = snd_hda_check_board_config(codec, stac927x_cfg_tbl);
-	if (spec->board_config < 0)
+	if (spec->board_config < 0) {
                 snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC927x, using BIOS defaults\n");
-	else if (stac927x_brd_tbl[spec->board_config] != NULL) {
-		spec->num_pins = 14;
-		spec->pin_nids = stac927x_pin_nids;
+		err = stac92xx_save_bios_config_regs(codec);
+		if (err < 0) {
+			stac92xx_free(codec);
+			return err;
+		}
+		spec->pin_configs = spec->bios_pin_configs;
+	} else if (stac927x_brd_tbl[spec->board_config] != NULL) {
 		spec->pin_configs = stac927x_brd_tbl[spec->board_config];
 		stac92xx_set_config_regs(codec);
 	}
@@ -1532,12 +1584,18 @@ static int patch_stac9205(struct hda_codec *codec)
 		return -ENOMEM;
 
 	codec->spec = spec;
+	spec->num_pins = 14;
+	spec->pin_nids = stac9205_pin_nids;
 	spec->board_config = snd_hda_check_board_config(codec, stac9205_cfg_tbl);
-	if (spec->board_config < 0)
-                snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9205, using BIOS defaults\n");
-	else {
-		spec->num_pins = 14;
-		spec->pin_nids = stac9205_pin_nids;
+	if (spec->board_config < 0) {
+		snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9205, using BIOS defaults\n");
+		err = stac92xx_save_bios_config_regs(codec);
+		if (err < 0) {
+			stac92xx_free(codec);
+			return err;
+		}
+		spec->pin_configs = spec->bios_pin_configs;
+	} else {
 		spec->pin_configs = stac9205_brd_tbl[spec->board_config];
 		stac92xx_set_config_regs(codec);
 	}
-- 
GitLab


From 071c73ad5fce436ee00c9422b7ca0c5d629451fb Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Aug 2006 18:34:06 +0200
Subject: [PATCH 0971/1063] [ALSA] hda-codec - Fix mic capture with generic
 parser

Fixed the mic capture with generic parser of hda-codec driver
- Use VREF80 for mic pins if available
- Handle multiple inputs correctly on audio-input widget node.
Confirmed on a conexant codec chip.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_generic.c | 128 +++++++++++++++++++++++++-----------
 1 file changed, 90 insertions(+), 38 deletions(-)

diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 85ad164ada59e..dedfc5b1083ac 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -461,14 +461,19 @@ static const char *get_input_type(struct hda_gnode *node, unsigned int *pinctl)
 			return "Front Line";
 		return "Line";
 	case AC_JACK_CD:
+#if 0
 		if (pinctl)
 			*pinctl |= AC_PINCTL_VREF_GRD;
+#endif
 		return "CD";
 	case AC_JACK_AUX:
 		if ((location & 0x0f) == AC_JACK_LOC_FRONT)
 			return "Front Aux";
 		return "Aux";
 	case AC_JACK_MIC_IN:
+		if (node->pin_caps &
+		    (AC_PINCAP_VREF_80 << AC_PINCAP_VREF_SHIFT))
+			*pinctl |= AC_PINCTL_VREF_80;
 		if ((location & 0x0f) == AC_JACK_LOC_FRONT)
 			return "Front Mic";
 		return "Mic";
@@ -556,6 +561,29 @@ static int parse_adc_sub_nodes(struct hda_codec *codec, struct hda_gspec *spec,
 	return 1; /* found */
 }
 
+/* add a capture source element */
+static void add_cap_src(struct hda_gspec *spec, int idx)
+{
+	struct hda_input_mux_item *csrc;
+	char *buf;
+	int num, ocap;
+
+	num = spec->input_mux.num_items;
+	csrc = &spec->input_mux.items[num];
+	buf = spec->cap_labels[num];
+	for (ocap = 0; ocap < num; ocap++) {
+		if (! strcmp(buf, spec->cap_labels[ocap])) {
+			/* same label already exists,
+			 * put the index number to be unique
+			 */
+			sprintf(buf, "%s %d", spec->cap_labels[ocap], num);
+			break;
+		}
+	}
+	csrc->index = idx;
+	spec->input_mux.num_items++;
+}
+
 /*
  * parse input
  */
@@ -576,28 +604,26 @@ static int parse_input_path(struct hda_codec *codec, struct hda_gnode *adc_node)
 	 * if it reaches to a proper input PIN, add the path as the
 	 * input path.
 	 */
+	/* first, check the direct connections to PIN widgets */
 	for (i = 0; i < adc_node->nconns; i++) {
 		node = hda_get_node(spec, adc_node->conn_list[i]);
-		if (! node)
-			continue;
-		err = parse_adc_sub_nodes(codec, spec, node);
-		if (err < 0)
-			return err;
-		else if (err > 0) {
-			struct hda_input_mux_item *csrc = &spec->input_mux.items[spec->input_mux.num_items];
-			char *buf = spec->cap_labels[spec->input_mux.num_items];
-			int ocap;
-			for (ocap = 0; ocap < spec->input_mux.num_items; ocap++) {
-				if (! strcmp(buf, spec->cap_labels[ocap])) {
-					/* same label already exists,
-					 * put the index number to be unique
-					 */
-					sprintf(buf, "%s %d", spec->cap_labels[ocap],
-						spec->input_mux.num_items);
-				}
-			}
-			csrc->index = i;
-			spec->input_mux.num_items++;
+		if (node && node->type == AC_WID_PIN) {
+			err = parse_adc_sub_nodes(codec, spec, node);
+			if (err < 0)
+				return err;
+			else if (err > 0)
+				add_cap_src(spec, i);
+		}
+	}
+	/* ... then check the rests, more complicated connections */
+	for (i = 0; i < adc_node->nconns; i++) {
+		node = hda_get_node(spec, adc_node->conn_list[i]);
+		if (node && node->type != AC_WID_PIN) {
+			err = parse_adc_sub_nodes(codec, spec, node);
+			if (err < 0)
+				return err;
+			else if (err > 0)
+				add_cap_src(spec, i);
 		}
 	}
 
@@ -647,9 +673,6 @@ static int parse_input(struct hda_codec *codec)
 /*
  * create mixer controls if possible
  */
-#define DIR_OUT		0x1
-#define DIR_IN		0x2
-
 static int create_mixer(struct hda_codec *codec, struct hda_gnode *node,
 			unsigned int index, const char *type, const char *dir_sfx)
 {
@@ -743,28 +766,57 @@ static int build_input_controls(struct hda_codec *codec)
 {
 	struct hda_gspec *spec = codec->spec;
 	struct hda_gnode *adc_node = spec->adc_node;
-	int err;
-
-	if (! adc_node)
+	int i, err;
+	static struct snd_kcontrol_new cap_sel = {
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Capture Source",
+		.info = capture_source_info,
+		.get = capture_source_get,
+		.put = capture_source_put,
+	};
+
+	if (! adc_node || ! spec->input_mux.num_items)
 		return 0; /* not found */
 
+	spec->cur_cap_src = 0;
+	select_input_connection(codec, adc_node,
+				spec->input_mux.items[0].index);
+
 	/* create capture volume and switch controls if the ADC has an amp */
-	err = create_mixer(codec, adc_node, 0, NULL, "Capture");
+	/* do we have only a single item? */
+	if (spec->input_mux.num_items == 1) {
+		err = create_mixer(codec, adc_node,
+				   spec->input_mux.items[0].index,
+				   NULL, "Capture");
+		if (err < 0)
+			return err;
+		return 0;
+	}
 
 	/* create input MUX if multiple sources are available */
-	if (spec->input_mux.num_items > 1) {
-		static struct snd_kcontrol_new cap_sel = {
-			.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-			.name = "Capture Source",
-			.info = capture_source_info,
-			.get = capture_source_get,
-			.put = capture_source_put,
-		};
-		if ((err = snd_ctl_add(codec->bus->card, snd_ctl_new1(&cap_sel, codec))) < 0)
+	if ((err = snd_ctl_add(codec->bus->card,
+			       snd_ctl_new1(&cap_sel, codec))) < 0)
+		return err;
+
+	/* no volume control? */
+	if (! (adc_node->wid_caps & AC_WCAP_IN_AMP) ||
+	    ! (adc_node->amp_in_caps & AC_AMPCAP_NUM_STEPS))
+		return 0;
+
+	for (i = 0; i < spec->input_mux.num_items; i++) {
+		struct snd_kcontrol_new knew;
+		char name[32];
+		sprintf(name, "%s Capture Volume",
+			spec->input_mux.items[i].label);
+		knew = (struct snd_kcontrol_new)
+			HDA_CODEC_VOLUME(name, adc_node->nid,
+					 spec->input_mux.items[i].index,
+					 HDA_INPUT);
+		if ((err = snd_ctl_add(codec->bus->card,
+				       snd_ctl_new1(&knew, codec))) < 0)
 			return err;
-		spec->cur_cap_src = 0;
-		select_input_connection(codec, adc_node, spec->input_mux.items[0].index);
 	}
+
 	return 0;
 }
 
-- 
GitLab


From 3479307f8ca3cbf4181b8bf7d8c824156a9e63b7 Mon Sep 17 00:00:00 2001
From: Jochen Voss <voss@seehuhn.de>
Date: Wed, 23 Aug 2006 18:35:35 +0200
Subject: [PATCH 0972/1063] [ALSA] Fix volume control for the AK4358 DAC

Fix volume control for the AK4358 DAC.
The attenuation control registers of the AK4358 use only 7bit for the
volume, the msb is used to enable attenuation output.  Without this
patch there are 256 volume levels the lower 128 of which are mute.

Signed-off-by: Jochen Voss <voss@seehuhn.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/i2c/other/ak4xxx-adda.c | 42 ++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c
index 0aea536a3371b..89fc3cbc23561 100644
--- a/sound/i2c/other/ak4xxx-adda.c
+++ b/sound/i2c/other/ak4xxx-adda.c
@@ -284,11 +284,13 @@ EXPORT_SYMBOL(snd_akm4xxx_init);
 
 #define AK_GET_CHIP(val)		(((val) >> 8) & 0xff)
 #define AK_GET_ADDR(val)		((val) & 0xff)
-#define AK_GET_SHIFT(val)		(((val) >> 16) & 0x7f)
+#define AK_GET_SHIFT(val)		(((val) >> 16) & 0x3f)
+#define AK_GET_NEEDSMSB(val)		(((val) >> 22) & 1)
 #define AK_GET_INVERT(val)		(((val) >> 23) & 1)
 #define AK_GET_MASK(val)		(((val) >> 24) & 0xff)
 #define AK_COMPOSE(chip,addr,shift,mask) \
 	(((chip) << 8) | (addr) | ((shift) << 16) | ((mask) << 24))
+#define AK_NEEDSMSB 			(1<<22)
 #define AK_INVERT 			(1<<23)
 
 static int snd_akm4xxx_volume_info(struct snd_kcontrol *kcontrol,
@@ -309,10 +311,13 @@ static int snd_akm4xxx_volume_get(struct snd_kcontrol *kcontrol,
 	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
 	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
+	int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value);
 	int invert = AK_GET_INVERT(kcontrol->private_value);
 	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
 	unsigned char val = snd_akm4xxx_get(ak, chip, addr);
-	
+
+	if (needsmsb)
+		val &= 0x7f;
 	ucontrol->value.integer.value[0] = invert ? mask - val : val;
 	return 0;
 }
@@ -323,6 +328,7 @@ static int snd_akm4xxx_volume_put(struct snd_kcontrol *kcontrol,
 	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
 	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
+	int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value);
 	int invert = AK_GET_INVERT(kcontrol->private_value);
 	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
 	unsigned char nval = ucontrol->value.integer.value[0] % (mask+1);
@@ -330,6 +336,8 @@ static int snd_akm4xxx_volume_put(struct snd_kcontrol *kcontrol,
 
 	if (invert)
 		nval = mask - nval;
+	if (needsmsb)
+		nval |= 0x80;
 	change = snd_akm4xxx_get(ak, chip, addr) != nval;
 	if (change)
 		snd_akm4xxx_write(ak, chip, addr, nval);
@@ -354,13 +362,19 @@ static int snd_akm4xxx_stereo_volume_get(struct snd_kcontrol *kcontrol,
 	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
 	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
+	int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value);
 	int invert = AK_GET_INVERT(kcontrol->private_value);
 	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
-	unsigned char val = snd_akm4xxx_get(ak, chip, addr);
-	
+	unsigned char val;
+
+	val = snd_akm4xxx_get(ak, chip, addr);
+	if (needsmsb)
+		val &= 0x7f;
 	ucontrol->value.integer.value[0] = invert ? mask - val : val;
 
 	val = snd_akm4xxx_get(ak, chip, addr+1);
+	if (needsmsb)
+		val &= 0x7f;
 	ucontrol->value.integer.value[1] = invert ? mask - val : val;
 
 	return 0;
@@ -372,6 +386,7 @@ static int snd_akm4xxx_stereo_volume_put(struct snd_kcontrol *kcontrol,
 	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
 	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
+	int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value);
 	int invert = AK_GET_INVERT(kcontrol->private_value);
 	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
 	unsigned char nval = ucontrol->value.integer.value[0] % (mask+1);
@@ -379,6 +394,8 @@ static int snd_akm4xxx_stereo_volume_put(struct snd_kcontrol *kcontrol,
 
 	if (invert)
 		nval = mask - nval;
+	if (needsmsb)
+		nval |= 0x80;
 	change0 = snd_akm4xxx_get(ak, chip, addr) != nval;
 	if (change0)
 		snd_akm4xxx_write(ak, chip, addr, nval);
@@ -386,6 +403,8 @@ static int snd_akm4xxx_stereo_volume_put(struct snd_kcontrol *kcontrol,
 	nval = ucontrol->value.integer.value[1] % (mask+1);
 	if (invert)
 		nval = mask - nval;
+	if (needsmsb)
+		nval |= 0x80;
 	change1 = snd_akm4xxx_get(ak, chip, addr+1) != nval;
 	if (change1)
 		snd_akm4xxx_write(ak, chip, addr+1, nval);
@@ -585,16 +604,13 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak)
 			/* register 4-9, chip #0 only */
 			ctl->private_value = AK_COMPOSE(0, idx + 4, 0, 255);
 			break;
-		case SND_AK4358:
-			if (idx >= 6)
-				/* register 4-9, chip #0 only */
-				ctl->private_value =
-					AK_COMPOSE(0, idx + 5, 0, 255);
-			else
-				/* register 4-9, chip #0 only */
-				ctl->private_value =
-					AK_COMPOSE(0, idx + 4, 0, 255);
+		case SND_AK4358: {
+			/* register 4-9 and 11-12, chip #0 only */
+			int  addr = idx < 6 ? idx + 4 : idx + 5;
+			ctl->private_value =
+				AK_COMPOSE(0, addr, 0, 127) | AK_NEEDSMSB;
 			break;
+		}
 		case SND_AK4381:
 			/* register 3 & 4 */
 			ctl->private_value =
-- 
GitLab


From c6ff77f71fe692fa48fe02dbfe74a01f3d5e55e2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Aug 2006 19:53:02 +0200
Subject: [PATCH 0973/1063] [ALSA] Add dB scale information to pcxhr driver

Added the dB scale information to pcxhr driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/pcxhr/pcxhr_mixer.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/sound/pci/pcxhr/pcxhr_mixer.c b/sound/pci/pcxhr/pcxhr_mixer.c
index 94e63a1e90d9b..b133ad9e095e2 100644
--- a/sound/pci/pcxhr/pcxhr_mixer.c
+++ b/sound/pci/pcxhr/pcxhr_mixer.c
@@ -31,6 +31,7 @@
 #include "pcxhr_hwdep.h"
 #include "pcxhr_core.h"
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/asoundef.h>
 #include "pcxhr_mixer.h"
 
@@ -43,6 +44,9 @@
 #define PCXHR_ANALOG_PLAYBACK_LEVEL_MAX  128	/*    0.0 dB */
 #define PCXHR_ANALOG_PLAYBACK_ZERO_LEVEL 104	/*  -24.0 dB ( 0.0 dB - fix level +24.0 dB ) */
 
+static DECLARE_TLV_DB_SCALE(db_scale_analog_capture, -9600, 50, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_analog_playback, -12800, 100, 0);
+
 static int pcxhr_update_analog_audio_level(struct snd_pcxhr *chip, int is_capture, int channel)
 {
 	int err, vol;
@@ -130,10 +134,13 @@ static int pcxhr_analog_vol_put(struct snd_kcontrol *kcontrol,
 
 static struct snd_kcontrol_new pcxhr_control_analog_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	/* name will be filled later */
 	.info =		pcxhr_analog_vol_info,
 	.get =		pcxhr_analog_vol_get,
 	.put =		pcxhr_analog_vol_put,
+	/* tlv will be filled later */
 };
 
 /* shared */
@@ -188,6 +195,7 @@ static struct snd_kcontrol_new pcxhr_control_output_switch = {
 #define PCXHR_DIGITAL_LEVEL_MAX		0x1ff	/* +18 dB */
 #define PCXHR_DIGITAL_ZERO_LEVEL	0x1b7	/*  0 dB */
 
+static DECLARE_TLV_DB_SCALE(db_scale_digital, -10950, 50, 0);
 
 #define MORE_THAN_ONE_STREAM_LEVEL	0x000001
 #define VALID_STREAM_PAN_LEVEL_MASK	0x800000
@@ -343,11 +351,14 @@ static int pcxhr_pcm_vol_put(struct snd_kcontrol *kcontrol,
 static struct snd_kcontrol_new snd_pcxhr_pcm_vol =
 {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	/* name will be filled later */
 	/* count will be filled later */
 	.info =		pcxhr_digital_vol_info,		/* shared */
 	.get =		pcxhr_pcm_vol_get,
 	.put =		pcxhr_pcm_vol_put,
+	.tlv = { .p = db_scale_digital },
 };
 
 
@@ -433,10 +444,13 @@ static int pcxhr_monitor_vol_put(struct snd_kcontrol *kcontrol,
 
 static struct snd_kcontrol_new pcxhr_control_monitor_vol = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =         "Monitoring Volume",
 	.info =		pcxhr_digital_vol_info,		/* shared */
 	.get =		pcxhr_monitor_vol_get,
 	.put =		pcxhr_monitor_vol_put,
+	.tlv = { .p = db_scale_digital },
 };
 
 /*
@@ -928,6 +942,7 @@ int pcxhr_create_mixer(struct pcxhr_mgr *mgr)
 			temp = pcxhr_control_analog_level;
 			temp.name = "Master Playback Volume";
 			temp.private_value = 0; /* playback */
+			temp.tlv.p = db_scale_analog_playback;
 			if ((err = snd_ctl_add(chip->card, snd_ctl_new1(&temp, chip))) < 0)
 				return err;
 			/* output mute controls */
@@ -963,6 +978,7 @@ int pcxhr_create_mixer(struct pcxhr_mgr *mgr)
 			temp = pcxhr_control_analog_level;
 			temp.name = "Master Capture Volume";
 			temp.private_value = 1; /* capture */
+			temp.tlv.p = db_scale_analog_capture;
 			if ((err = snd_ctl_add(chip->card, snd_ctl_new1(&temp, chip))) < 0)
 				return err;
 
-- 
GitLab


From 1186ed8c7dc9c0185e783beddf241509cc224f1a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Aug 2006 19:53:28 +0200
Subject: [PATCH 0974/1063] [ALSA] Add dB scale information to vxpocket and
 vx222 drivers

Added the dB scale information to vxpocket and vx222 drivers.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/vx_core.h     |  1 +
 sound/drivers/vx/vx_mixer.c | 17 +++++++++++++++--
 sound/pci/vx222/vx222.c     |  7 +++++++
 sound/pci/vx222/vx222_ops.c |  9 +++++++++
 sound/pcmcia/vx/vxpocket.c  |  5 +++++
 5 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/include/sound/vx_core.h b/include/sound/vx_core.h
index 9821a6194caae..dbca14170615b 100644
--- a/include/sound/vx_core.h
+++ b/include/sound/vx_core.h
@@ -128,6 +128,7 @@ struct snd_vx_hardware {
 	unsigned int num_ins;
 	unsigned int num_outs;
 	unsigned int output_level_max;
+	unsigned int *output_level_db_scale;
 };
 
 /* hwdep id string */
diff --git a/sound/drivers/vx/vx_mixer.c b/sound/drivers/vx/vx_mixer.c
index c1d7fcdd1973e..1613ed844ac62 100644
--- a/sound/drivers/vx/vx_mixer.c
+++ b/sound/drivers/vx/vx_mixer.c
@@ -23,6 +23,7 @@
 #include <sound/driver.h>
 #include <sound/core.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/vx_core.h>
 #include "vx_cmd.h"
 
@@ -455,10 +456,13 @@ static int vx_output_level_put(struct snd_kcontrol *kcontrol, struct snd_ctl_ele
 
 static struct snd_kcontrol_new vx_control_output_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =		"Master Playback Volume",
 	.info =		vx_output_level_info,
 	.get =		vx_output_level_get,
 	.put =		vx_output_level_put,
+	/* tlv will be filled later */
 };
 
 /*
@@ -712,12 +716,17 @@ static int vx_monitor_sw_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_
 	return 0;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_audio_gain, -10975, 25, 0);
+
 static struct snd_kcontrol_new vx_control_audio_gain = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	/* name will be filled later */
 	.info =         vx_audio_gain_info,
 	.get =          vx_audio_gain_get,
-	.put =          vx_audio_gain_put
+	.put =          vx_audio_gain_put,
+	.tlv = { .p = db_scale_audio_gain },
 };
 static struct snd_kcontrol_new vx_control_output_switch = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -729,9 +738,12 @@ static struct snd_kcontrol_new vx_control_output_switch = {
 static struct snd_kcontrol_new vx_control_monitor_gain = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name =         "Monitoring Volume",
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.info =         vx_audio_gain_info,	/* shared */
 	.get =          vx_audio_monitor_get,
-	.put =          vx_audio_monitor_put
+	.put =          vx_audio_monitor_put,
+	.tlv = { .p = db_scale_audio_gain },
 };
 static struct snd_kcontrol_new vx_control_monitor_switch = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -918,6 +930,7 @@ int snd_vx_mixer_new(struct vx_core *chip)
 	for (i = 0; i < chip->hw->num_outs; i++) {
 		temp = vx_control_output_level;
 		temp.index = i;
+		temp.tlv.p = chip->hw->output_level_db_scale;
 		if ((err = snd_ctl_add(card, snd_ctl_new1(&temp, chip))) < 0)
 			return err;
 	}
diff --git a/sound/pci/vx222/vx222.c b/sound/pci/vx222/vx222.c
index 9c03c6b4e4900..e7cd8acab59ae 100644
--- a/sound/pci/vx222/vx222.c
+++ b/sound/pci/vx222/vx222.c
@@ -26,6 +26,7 @@
 #include <linux/moduleparam.h>
 #include <sound/core.h>
 #include <sound/initval.h>
+#include <sound/tlv.h>
 #include "vx222.h"
 
 #define CARD_NAME "VX222"
@@ -72,6 +73,9 @@ MODULE_DEVICE_TABLE(pci, snd_vx222_ids);
 /*
  */
 
+static DECLARE_TLV_DB_SCALE(db_scale_old_vol, -11350, 50, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_akm, -7350, 50, 0);
+
 static struct snd_vx_hardware vx222_old_hw = {
 
 	.name = "VX222/Old",
@@ -81,6 +85,7 @@ static struct snd_vx_hardware vx222_old_hw = {
 	.num_ins = 1,
 	.num_outs = 1,
 	.output_level_max = VX_ANALOG_OUT_LEVEL_MAX,
+	.output_level_db_scale = db_scale_old_vol,
 };
 
 static struct snd_vx_hardware vx222_v2_hw = {
@@ -92,6 +97,7 @@ static struct snd_vx_hardware vx222_v2_hw = {
 	.num_ins = 1,
 	.num_outs = 1,
 	.output_level_max = VX2_AKM_LEVEL_MAX,
+	.output_level_db_scale = db_scale_akm,
 };
 
 static struct snd_vx_hardware vx222_mic_hw = {
@@ -103,6 +109,7 @@ static struct snd_vx_hardware vx222_mic_hw = {
 	.num_ins = 1,
 	.num_outs = 1,
 	.output_level_max = VX2_AKM_LEVEL_MAX,
+	.output_level_db_scale = db_scale_akm,
 };
 
 
diff --git a/sound/pci/vx222/vx222_ops.c b/sound/pci/vx222/vx222_ops.c
index 9b6d345b83a66..5e51950e05f94 100644
--- a/sound/pci/vx222/vx222_ops.c
+++ b/sound/pci/vx222/vx222_ops.c
@@ -28,6 +28,7 @@
 
 #include <sound/core.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <asm/io.h>
 #include "vx222.h"
 
@@ -845,6 +846,8 @@ static void vx2_set_input_level(struct snd_vx222 *chip)
 
 #define MIC_LEVEL_MAX	0xff
 
+static DECLARE_TLV_DB_SCALE(db_scale_mic, -6450, 50, 0);
+
 /*
  * controls API for input levels
  */
@@ -922,18 +925,24 @@ static int vx_mic_level_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_v
 
 static struct snd_kcontrol_new vx_control_input_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =		"Capture Volume",
 	.info =		vx_input_level_info,
 	.get =		vx_input_level_get,
 	.put =		vx_input_level_put,
+	.tlv = { .p = db_scale_mic },
 };
 
 static struct snd_kcontrol_new vx_control_mic_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =		"Mic Capture Volume",
 	.info =		vx_mic_level_info,
 	.get =		vx_mic_level_get,
 	.put =		vx_mic_level_put,
+	.tlv = { .p = db_scale_mic },
 };
 
 /*
diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c
index 76c85cffb40e6..3089fcca800ec 100644
--- a/sound/pcmcia/vx/vxpocket.c
+++ b/sound/pcmcia/vx/vxpocket.c
@@ -27,6 +27,7 @@
 #include <pcmcia/ciscode.h>
 #include <pcmcia/cisreg.h>
 #include <sound/initval.h>
+#include <sound/tlv.h>
 
 /*
  */
@@ -90,6 +91,8 @@ static int snd_vxpocket_dev_free(struct snd_device *device)
  * Only output levels can be modified
  */
 
+static DECLARE_TLV_DB_SCALE(db_scale_old_vol, -11350, 50, 0);
+
 static struct snd_vx_hardware vxpocket_hw = {
 	.name = "VXPocket",
 	.type = VX_TYPE_VXPOCKET,
@@ -99,6 +102,7 @@ static struct snd_vx_hardware vxpocket_hw = {
 	.num_ins = 1,
 	.num_outs = 1,
 	.output_level_max = VX_ANALOG_OUT_LEVEL_MAX,
+	.output_level_db_scale = db_scale_old_vol,
 };	
 
 /* VX-pocket 440
@@ -120,6 +124,7 @@ static struct snd_vx_hardware vxp440_hw = {
 	.num_ins = 2,
 	.num_outs = 2,
 	.output_level_max = VX_ANALOG_OUT_LEVEL_MAX,
+	.output_level_db_scale = db_scale_old_vol,
 };	
 
 
-- 
GitLab


From 86148e84c218e49b54521e8dae7bb78eb66c4281 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 24 Aug 2006 12:36:36 +0200
Subject: [PATCH 0975/1063] [ALSA] Fix errors with user TLV_WRITE

Fixed the errors at checking info.access field during user TLV_WRITE
call.  It should have been zero-initialized.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/core/control.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/core/control.c b/sound/core/control.c
index ac1442682eace..3030aaa6d2c5c 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -1048,6 +1048,7 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
 	if (ue == NULL)
 		return -ENOMEM;
 	ue->info = *info;
+	ue->info.access = 0;
 	ue->elem_data = (char *)ue + sizeof(*ue);
 	ue->elem_data_size = private_size;
 	kctl.private_free = snd_ctl_elem_user_free;
-- 
GitLab


From 18c1c3f694105ab2a6f43e054e23f9a751b2f869 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 25 Aug 2006 11:39:34 +0200
Subject: [PATCH 0976/1063] [ALSA] Return error if no user TLV is defined

Retrun error to user TLV_READ ioctl if no TLV is defined.
(Until now, nothing was written and rerunred successfully.)

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/core/control.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/core/control.c b/sound/core/control.c
index 3030aaa6d2c5c..6973a9686b679 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -951,6 +951,8 @@ static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol,
 		ue->tlv_data = new_data;
 		ue->tlv_data_size = size;
 	} else {
+		if (! ue->tlv_data_size || ! ue->tlv_data)
+			return -ENXIO;
 		if (size < ue->tlv_data_size)
 			return -ENOSPC;
 		if (copy_to_user(tlv, ue->tlv_data, ue->tlv_data_size))
-- 
GitLab


From 2c7782b420ee137057eeec7c24a565ac85fc1988 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 25 Aug 2006 13:11:26 +0200
Subject: [PATCH 0977/1063] [ALSA] hda-codec - Use model=ref for some Dell
 laptops

Force to choose model=ref for some Dell laptops with STAC9200 codec
chip for fixing the silent mic recording problem (possibly due to
a BIOS bug).  Reference: ALSA bug#2038
So far, applied to Inspiron 630m, Latitude D620 and 120L.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_sigmatel.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 239ae3fad0540..8d5ad7c0db072 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -582,6 +582,13 @@ static struct hda_board_config stac9205_cfg_tbl[] = {
 	  .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2668,	/* DFI LanParty */
 	  .config = STAC_REF },		/* SigmaTel reference board */
+	/* Dell laptops have BIOS problem */
+	{ .pci_subvendor = PCI_VENDOR_ID_DELL, .pci_subdevice = 0x01b5,
+	  .config = STAC_REF },	/* Dell Inspiron 630m */
+	{ .pci_subvendor = PCI_VENDOR_ID_DELL, .pci_subdevice = 0x01c2,
+	  .config = STAC_REF },	/* Dell Latitude D620 */
+	{ .pci_subvendor = PCI_VENDOR_ID_DELL, .pci_subdevice = 0x01cb,
+	  .config = STAC_REF },	/* Dell Latitude 120L */
 	{} /* terminator */
 };
 
-- 
GitLab


From aaad3653a5f073ce9eaef4efd387cf7fc3a53d18 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Mon, 28 Aug 2006 12:59:23 +0200
Subject: [PATCH 0978/1063] [ALSA] sparc dbri: recording is back

This patch fixes sound recording after the driver convertion to
ring buffered version. It also contains small clean ups to the
driver.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 65 ++++++++++++++--------------------------------
 1 file changed, 20 insertions(+), 45 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 3e6ad507849dc..cdca8e4a96e4b 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -107,7 +107,7 @@ static char *cmds[] = {
 #define dprintk(a, x...) if(dbri_debug & a) printk(KERN_DEBUG x)
 
 #else
-#define dprintk(a, x...)
+#define dprintk(a, x...) do { } while (0)
 
 #endif				/* DBRI_DEBUG */
 
@@ -610,10 +610,10 @@ CPU interrupt to signal completion.
 
 Since the DBRI can run in parallel with the CPU, several means of
 synchronization present themselves. The method implemented here is only
-to use the dbri_cmdwait() to wait for execution of batch of sent commands.
+use of the dbri_cmdwait() to wait for execution of batch of sent commands.
 
 A circular command buffer is used here. A new command is being added 
-while other can be executed. The scheme works by adding two WAIT commands
+while another can be executed. The scheme works by adding two WAIT commands
 after each sent batch of commands. When the next batch is prepared it is
 added after the WAIT commands then the WAITs are replaced with single JUMP
 command to the new batch. The the DBRI is forced to reread the last WAIT 
@@ -628,7 +628,7 @@ to send them to the DBRI.
 
 */
 
-#define MAXLOOPS 10
+#define MAXLOOPS 20
 /*
  * Wait for the current command string to execute
  */
@@ -692,9 +692,8 @@ static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len)
 	if (cmd > dbri->cmdptr) {
 		s32 *ptr;
 
-		for (ptr = dbri->cmdptr; ptr < cmd+2; ptr++) {
+		for (ptr = dbri->cmdptr; ptr < cmd+2; ptr++)
 			dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
-		}
 	} else {
 		s32 *ptr = dbri->cmdptr;
 
@@ -1141,13 +1140,9 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period
 		return -1;
 	}
 
-	if (streamno == DBRI_PLAY) {
-		dbri->dma->desc[last_desc].word1 |=
-		    DBRI_TD_F | DBRI_TD_B;
-		dbri->dma->desc[last_desc].nda =
-		    dbri->dma_dvma + dbri_dma_off(desc, first_desc);
-		dbri->next_desc[last_desc] = first_desc;
-	}
+	dbri->dma->desc[last_desc].nda =
+	    dbri->dma_dvma + dbri_dma_off(desc, first_desc);
+	dbri->next_desc[last_desc] = first_desc;
 	dbri->pipes[info->pipe].first_desc = first_desc;
 	dbri->pipes[info->pipe].desc = first_desc;
 
@@ -1639,7 +1634,6 @@ static void xmit_descs(struct snd_dbri *dbri)
 	if (dbri == NULL)
 		return;		/* Disabled */
 
-	/* First check the recording stream for buffer overflow */
 	info = &dbri->stream_info[DBRI_REC];
 	spin_lock_irqsave(&dbri->lock, flags);
 
@@ -1649,27 +1643,20 @@ static void xmit_descs(struct snd_dbri *dbri)
 		dprintk(D_DESC, "xmit_descs rec @ TD %d\n", first_td);
 
 		/* Stream could be closed by the time we run. */
-		if (first_td < 0) {
-			goto play;
-		}
-
-		cmd = dbri_cmdlock(dbri, 2);
-		*(cmd++) = DBRI_CMD(D_SDP, 0,
-				    dbri->pipes[info->pipe].sdp
-				    | D_SDP_P | D_SDP_EVERY | D_SDP_C);
-		*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td);
-		dbri_cmdsend(dbri, cmd, 2);
+		if (first_td >= 0) {
+			cmd = dbri_cmdlock(dbri, 2);
+			*(cmd++) = DBRI_CMD(D_SDP, 0,
+					    dbri->pipes[info->pipe].sdp
+					    | D_SDP_P | D_SDP_EVERY | D_SDP_C);
+			*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td);
+			dbri_cmdsend(dbri, cmd, 2);
 
-		/* Reset our admin of the pipe & bytes read. */
-		dbri->pipes[info->pipe].desc = first_td;
+			/* Reset our admin of the pipe. */
+			dbri->pipes[info->pipe].desc = first_td;
+		}
 	}
 
-play:
-	spin_unlock_irqrestore(&dbri->lock, flags);
-
-	/* Now check the playback stream for buffer underflow */
 	info = &dbri->stream_info[DBRI_PLAY];
-	spin_lock_irqsave(&dbri->lock, flags);
 
 	if (info->pipe >= 0) {
 		first_td = dbri->pipes[info->pipe].first_desc;
@@ -1685,7 +1672,7 @@ static void xmit_descs(struct snd_dbri *dbri)
 			*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td);
 			dbri_cmdsend(dbri, cmd, 2);
 
-			/* Reset our admin of the pipe & bytes written. */
+			/* Reset our admin of the pipe. */
 			dbri->pipes[info->pipe].desc = first_td;
 		}
 	}
@@ -1755,7 +1742,6 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe)
 		return;
 	}
 
-	dbri->dma->desc[rd].ba = 0;
 	dbri->pipes[pipe].desc = dbri->next_desc[rd];
 	status = dbri->dma->desc[rd].word1;
 	dbri->dma->desc[rd].word1 = 0;	/* Reset it for next time. */
@@ -1768,18 +1754,6 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe)
 	dprintk(D_INT, "Recv RD %d, status 0x%02x, len %d\n",
 		rd, DBRI_RD_STATUS(status), DBRI_RD_CNT(status));
 
-	/* On the last TD, transmit them all again. */
-#if 0
-	if (dbri->next_desc[rd] == -1) {
-		if (info->left > info->size) {
-			printk(KERN_WARNING
-			       "%d bytes recorded in %d size buffer.\n",
-			       info->left, info->size);
-		}
-		tasklet_schedule(&xmit_descs_task);
-	}
-#endif
-
 	/* Notify ALSA */
 	if (spin_is_locked(&dbri->lock)) {
 		spin_unlock(&dbri->lock);
@@ -2113,6 +2087,7 @@ static int snd_dbri_prepare(struct snd_pcm_substream *substream)
 		info->pipe = 6;	/* Receive pipe */
 
 	spin_lock_irq(&dbri->lock);
+	info->offset = 0;
 
 	/* Setup the all the transmit/receive desciptors to cover the
 	 * whole DMA buffer.
-- 
GitLab


From 99dabfe716002c54b4dffa545460dc74bc632c22 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Mon, 28 Aug 2006 13:00:45 +0200
Subject: [PATCH 0979/1063] [ALSA] dbri sparc: fixes TS leak

This patch fixes time slot leak in the dbri driver.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index cdca8e4a96e4b..6b090fb66a8d5 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -1044,7 +1044,7 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period
 {
 	struct dbri_streaminfo *info = &dbri->stream_info[streamno];
 	__u32 dvma_buffer;
-	int desc = 0;
+	int desc;
 	int len;
 	int first_desc = -1;
 	int last_desc = -1;
@@ -1087,6 +1087,18 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period
 		len &= ~3;
 	}
 
+	/* Free descriptors if pipe has any */
+	desc = dbri->pipes[info->pipe].first_desc;
+	if ( desc >= 0)
+		do {
+			dbri->dma->desc[desc].nda = dbri->dma->desc[desc].ba = 0;
+			desc = dbri->next_desc[desc];
+		} while (desc != -1 && desc != dbri->pipes[info->pipe].first_desc);
+
+	dbri->pipes[info->pipe].desc = -1;
+	dbri->pipes[info->pipe].first_desc = -1;
+
+	desc = 0;
 	while (len > 0) {
 		int mylen;
 
@@ -2054,6 +2066,7 @@ static int snd_dbri_hw_free(struct snd_pcm_substream *substream)
 	struct snd_dbri *dbri = snd_pcm_substream_chip(substream);
 	struct dbri_streaminfo *info = DBRI_STREAM(dbri, substream);
 	int direction;
+
 	dprintk(D_USR, "hw_free.\n");
 
 	/* hw_free can get called multiple times. Only unmap the DMA once.
@@ -2068,7 +2081,10 @@ static int snd_dbri_hw_free(struct snd_pcm_substream *substream)
 				  substream->runtime->buffer_size, direction);
 		info->dvma_buffer = 0;
 	}
-	info->pipe = -1;
+	if (info->pipe != -1) {
+		reset_pipe(dbri, info->pipe);
+		info->pipe = -1;
+	}
 
 	return snd_pcm_lib_free_pages(substream);
 }
-- 
GitLab


From 1f14d167f0233342eab53bb1a429ddad1e848de4 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Mon, 28 Aug 2006 13:01:31 +0200
Subject: [PATCH 0980/1063] [ALSA] sparc dbri: OSS layer fix

This patch removes setting of incorrect stop_threshold value
inside the driver. After the change, playback through the OSS
layer works correctly.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 6b090fb66a8d5..82d5e8072f2b6 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -2111,8 +2111,6 @@ static int snd_dbri_prepare(struct snd_pcm_substream *substream)
 	ret = setup_descs(dbri, DBRI_STREAMNO(substream),
 			  snd_pcm_lib_period_bytes(substream));
 
-	runtime->stop_threshold = DBRI_TD_MAXCNT / runtime->channels;
-
 	spin_unlock_irq(&dbri->lock);
 
 	dprintk(D_USR, "prepare audio output. %d bytes\n", info->size);
-- 
GitLab


From 063a40d9111ce7558f2fdfa4f85acfc47eb27353 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 28 Aug 2006 13:20:13 +0200
Subject: [PATCH 0981/1063] [ALSA] Add the definition of linear volume TLV

Added the definition of linear volume TLV type.
Some DSP chips and codecs (e.g. AK codec) use linear volume control.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/tlv.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/sound/tlv.h b/include/sound/tlv.h
index b826e1df1da63..7905841643df3 100644
--- a/include/sound/tlv.h
+++ b/include/sound/tlv.h
@@ -33,6 +33,7 @@
 
 #define SNDRV_CTL_TLVT_CONTAINER 0	/* one level down - group of TLVs */
 #define SNDRV_CTL_TLVT_DB_SCALE	1       /* dB scale */
+#define SNDRV_CTL_TLVT_DB_LINEAR 2	/* linear volume */
 
 #define DECLARE_TLV_DB_SCALE(name, min, step, mute) \
 unsigned int name[] = { \
@@ -40,4 +41,13 @@ unsigned int name[] = { \
         (min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0) \
 }
 
+/* linear volume between min_dB and max_dB (.01dB unit) */
+#define DECLARE_TLV_DB_LINEAR(name, min_dB, max_dB)	\
+unsigned int name[] = { \
+        SNDRV_CTL_TLVT_DB_LINEAR, 2 * sizeof(unsigned int), \
+        (min_dB), (max_dB)				\
+}
+
+#define TLV_DB_GAIN_MUTE	-9999999
+
 #endif /* __SOUND_TLV_H */
-- 
GitLab


From 33925186d843e7004288cd3d87843c5a1dbf55a4 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 28 Aug 2006 13:29:42 +0200
Subject: [PATCH 0982/1063] [ALSA] ymfpci - Add TLV entries for native volume
 controls

Added the linear volume TLV entries for YMFPCI native volume controls.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ymfpci/ymfpci_main.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c
index a55b5fd7da64e..24f6fc52f898f 100644
--- a/sound/pci/ymfpci/ymfpci_main.c
+++ b/sound/pci/ymfpci/ymfpci_main.c
@@ -36,6 +36,7 @@
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 #include <sound/ymfpci.h>
 #include <sound/asoundef.h>
 #include <sound/mpu401.h>
@@ -1477,11 +1478,15 @@ static int snd_ymfpci_put_single(struct snd_kcontrol *kcontrol,
 	return change;
 }
 
+static DECLARE_TLV_DB_LINEAR(db_scale_native, TLV_DB_GAIN_MUTE, 0);
+
 #define YMFPCI_DOUBLE(xname, xindex, reg) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
   .info = snd_ymfpci_info_double, \
   .get = snd_ymfpci_get_double, .put = snd_ymfpci_put_double, \
-  .private_value = reg }
+  .private_value = reg, \
+  .tlv = { .p = db_scale_native } }
 
 static int snd_ymfpci_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
-- 
GitLab


From 9f458e7fb5b92385d348fb6039ba7211a6d6ba6e Mon Sep 17 00:00:00 2001
From: Andrey Liakhovets <liakh@dol.ru>
Date: Mon, 28 Aug 2006 16:52:41 +0200
Subject: [PATCH 0983/1063] [ALSA] ac97 - Fix VIA EPIA sound problem

Fix the bad sound quality on VIA EPIA system using VIA VT1617A
(ALSA bug#2381).

Signed-off-by: Andrey Liakhovets <liakh@dol.ru>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ac97/ac97_patch.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 392f6ccace5d2..bdd7f89234f6f 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -2799,6 +2799,10 @@ int patch_vt1616(struct snd_ac97 * ac97)
  */
 int patch_vt1617a(struct snd_ac97 * ac97)
 {
+	/* bring analog power consumption to normal, like WinXP driver
+	 * for EPIA SP
+	 */
+	snd_ac97_write_cache(ac97, 0x5c, 0x20);
 	ac97->ext_id |= AC97_EI_SPDIF;	/* force the detection of spdif */
 	ac97->rates[AC97_RATES_SPDIF] = SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000;
 	return 0;
-- 
GitLab


From a79eee8d3d8a80c37d235e1181d67c3705c7bbfe Mon Sep 17 00:00:00 2001
From: Luke Ross <luke@lukeross.name>
Date: Tue, 29 Aug 2006 10:46:32 +0200
Subject: [PATCH 0984/1063] [ALSA] Support for non-standard rates in USB audio
 driver

There's at least one USB audio chipset out there which supports only one
non-standard rate (ID 0e6a:0310 supports 46875Hz). There's a few other
patches for this card which are unsatisfactory because they attempt to
map this rate to 44.1k leading to sound distortion.
The patch below uses SNDRV_PCM_RATE_KNOT to properly support the
non-standard rates where they are available.

Signed-off-by: Luke Ross <luke@lukeross.name>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/usb/usbaudio.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index 087f9b64d8a0c..664dd4c21e66e 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -123,6 +123,7 @@ struct audioformat {
 	unsigned int rate_min, rate_max;	/* min/max rates */
 	unsigned int nr_rates;		/* number of rate table entries */
 	unsigned int *rate_table;	/* rate table */
+	unsigned int needs_knot;	/* any unusual rates? */
 };
 
 struct snd_usb_substream;
@@ -1759,6 +1760,9 @@ static int check_hw_params_convention(struct snd_usb_substream *subs)
 		}
 		channels[f->format] |= (1 << f->channels);
 		rates[f->format] |= f->rates;
+		/* needs knot? */
+		if (f->needs_knot)
+			goto __out;
 	}
 	/* check whether channels and rates match for all formats */
 	cmaster = rmaster = 0;
@@ -1799,6 +1803,38 @@ static int check_hw_params_convention(struct snd_usb_substream *subs)
 	return err;
 }
 
+/*
+ *  If the device supports unusual bit rates, does the request meet these?
+ */
+static int snd_usb_pcm_check_knot(struct snd_pcm_runtime *runtime,
+				  struct snd_usb_substream *subs)
+{
+	struct list_head *p;
+	struct snd_pcm_hw_constraint_list constraints_rates;
+	int err;
+
+	list_for_each(p, &subs->fmt_list) {
+		struct audioformat *fp;
+		fp = list_entry(p, struct audioformat, list);
+
+		if (!fp->needs_knot)
+			continue;
+
+		constraints_rates.count = fp->nr_rates;
+		constraints_rates.list = fp->rate_table;
+		constraints_rates.mask = 0;
+
+		err = snd_pcm_hw_constraint_list(runtime, 0,
+			SNDRV_PCM_HW_PARAM_RATE,
+			&constraints_rates);
+
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 
 /*
  * set up the runtime hardware information.
@@ -1861,6 +1897,8 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre
 					       SNDRV_PCM_HW_PARAM_CHANNELS,
 					       -1)) < 0)
 			return err;
+		if ((err = snd_usb_pcm_check_knot(runtime, subs)) < 0)
+			return err;
 	}
 	return 0;
 }
@@ -2406,6 +2444,7 @@ static int parse_audio_format_rates(struct snd_usb_audio *chip, struct audioform
 				    unsigned char *fmt, int offset)
 {
 	int nr_rates = fmt[offset];
+	int found;
 	if (fmt[0] < offset + 1 + 3 * (nr_rates ? nr_rates : 2)) {
 		snd_printk(KERN_ERR "%d:%u:%d : invalid FORMAT_TYPE desc\n",
 				   chip->dev->devnum, fp->iface, fp->altsetting);
@@ -2428,6 +2467,7 @@ static int parse_audio_format_rates(struct snd_usb_audio *chip, struct audioform
 			return -1;
 		}
 
+		fp->needs_knot = 0;
 		fp->nr_rates = nr_rates;
 		fp->rate_min = fp->rate_max = combine_triple(&fmt[8]);
 		for (r = 0, idx = offset + 1; r < nr_rates; r++, idx += 3) {
@@ -2436,13 +2476,19 @@ static int parse_audio_format_rates(struct snd_usb_audio *chip, struct audioform
 				fp->rate_min = rate;
 			else if (rate > fp->rate_max)
 				fp->rate_max = rate;
+			found = 0;
 			for (c = 0; c < (int)ARRAY_SIZE(conv_rates); c++) {
 				if (rate == conv_rates[c]) {
+					found = 1;
 					fp->rates |= (1 << c);
 					break;
 				}
 			}
+			if (!found)
+				fp->needs_knot = 1;
 		}
+		if (fp->needs_knot)
+			fp->rates |= SNDRV_PCM_RATE_KNOT;
 	} else {
 		/* continuous rates */
 		fp->rates = SNDRV_PCM_RATE_CONTINUOUS;
-- 
GitLab


From d0ae48471570c680333cbe28c143bbab887a4ec2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 29 Aug 2006 18:15:15 +0200
Subject: [PATCH 0985/1063] [ALSA] Add missing dB scale information to vxpocket
 driver

Added the missing dB scale information for Mic volume
to vxpocket driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pcmcia/vx/vxp_mixer.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sound/pcmcia/vx/vxp_mixer.c b/sound/pcmcia/vx/vxp_mixer.c
index e237f6c2018f0..bced7b623b12e 100644
--- a/sound/pcmcia/vx/vxp_mixer.c
+++ b/sound/pcmcia/vx/vxp_mixer.c
@@ -23,6 +23,7 @@
 #include <sound/driver.h>
 #include <sound/core.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include "vxpocket.h"
 
 #define MIC_LEVEL_MIN	0
@@ -63,12 +64,17 @@ static int vx_mic_level_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_v
 	return 0;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_mic, -21, 3, 0);
+
 static struct snd_kcontrol_new vx_control_mic_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =		"Mic Capture Volume",
 	.info =		vx_mic_level_info,
 	.get =		vx_mic_level_get,
 	.put =		vx_mic_level_put,
+	.tlv = { .p = db_scale_mic },
 };
 
 /*
-- 
GitLab


From 723b2b0d36fa7cea81a962af2d40d88520d5a5f1 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 30 Aug 2006 16:49:54 +0200
Subject: [PATCH 0986/1063] [ALSA] Clean up and add TLV support to AK4xxx i2c
 driver

- Clean up the code in AK4xxx-ADDA i2c code.
- Fix capture gain controls for AK5365
- Changed the static table for DAC/ADC mixer labels to use
  structs
- Implemented TLV entries for each AK codec
  The volumes in AK4524, AK4528 and AK5365 are corrected with
  a table to be suitable for dB conversion.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/ak4xxx-adda.h   |  40 ++-
 sound/i2c/other/ak4xxx-adda.c | 523 +++++++++++++++++++---------------
 sound/pci/ice1712/revo.c      |  47 +--
 3 files changed, 354 insertions(+), 256 deletions(-)

diff --git a/include/sound/ak4xxx-adda.h b/include/sound/ak4xxx-adda.h
index 65ddfa3cac1fb..026e4072a9a15 100644
--- a/include/sound/ak4xxx-adda.h
+++ b/include/sound/ak4xxx-adda.h
@@ -39,14 +39,26 @@ struct snd_ak4xxx_ops {
 
 #define AK4XXX_IMAGE_SIZE	(AK4XXX_MAX_CHIPS * 16)	/* 64 bytes */
 
+/* DAC label and channels */
+struct snd_akm4xxx_dac_channel {
+	char *name;		/* mixer volume name */
+	unsigned int num_channels;
+};
+
+/* ADC labels and channels */
+struct snd_akm4xxx_adc_channel {
+	char *name;		/* capture gain volume label */
+	char *gain_name;	/* IPGA */
+	char *switch_name;	/* capture switch */
+	unsigned int num_channels;
+};
+
 struct snd_akm4xxx {
 	struct snd_card *card;
 	unsigned int num_adcs;			/* AK4524 or AK4528 ADCs */
 	unsigned int num_dacs;			/* AK4524 or AK4528 DACs */
 	unsigned char images[AK4XXX_IMAGE_SIZE]; /* saved register image */
-	unsigned char ipga_gain[AK4XXX_MAX_CHIPS][2]; /* saved register image
-						       * for IPGA (AK4528)
-						       */
+	unsigned char volumes[AK4XXX_IMAGE_SIZE]; /* saved volume values */
 	unsigned long private_value[AK4XXX_MAX_CHIPS];	/* helper for driver */
 	void *private_data[AK4XXX_MAX_CHIPS];		/* helper for driver */
 	/* template should fill the following fields */
@@ -56,10 +68,11 @@ struct snd_akm4xxx {
 		SND_AK4355, SND_AK4358, SND_AK4381,
 		SND_AK5365
 	} type;
-	unsigned int *num_stereo;	/* array of combined counts
-					 * for the mixer
-					 */
-	char **channel_names;		/* array of mixer channel names */
+
+	/* (array) information of combined codecs */
+	struct snd_akm4xxx_dac_channel *dac_info;
+	struct snd_akm4xxx_adc_channel *adc_info;
+
 	struct snd_ak4xxx_ops ops;
 };
 
@@ -73,9 +86,18 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak);
 	(ak)->images[(chip) * 16 + (reg)]
 #define snd_akm4xxx_set(ak,chip,reg,val) \
 	((ak)->images[(chip) * 16 + (reg)] = (val))
+#define snd_akm4xxx_get_vol(ak,chip,reg) \
+	(ak)->volumes[(chip) * 16 + (reg)]
+#define snd_akm4xxx_set_vol(ak,chip,reg,val) \
+	((ak)->volumes[(chip) * 16 + (reg)] = (val))
+
+/* Warning: IPGA is tricky - we assume the addr + 4 is unused
+ *   so far, it's OK for all AK codecs with IPGA:
+ *   AK4524, AK4528 and EK5365
+ */
 #define snd_akm4xxx_get_ipga(ak,chip,reg) \
-	(ak)->ipga_gain[chip][(reg)-4]
+	snd_akm4xxx_get_vol(ak, chip, (reg) + 4)
 #define snd_akm4xxx_set_ipga(ak,chip,reg,val) \
-	((ak)->ipga_gain[chip][(reg)-4] = (val))
+	snd_akm4xxx_set_vol(ak, chip, (reg) + 4, val)
 
 #endif /* __SOUND_AK4XXX_ADDA_H */
diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c
index 89fc3cbc23561..c34cb4684607c 100644
--- a/sound/i2c/other/ak4xxx-adda.c
+++ b/sound/i2c/other/ak4xxx-adda.c
@@ -28,12 +28,14 @@
 #include <linux/init.h>
 #include <sound/core.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/ak4xxx-adda.h>
 
 MODULE_AUTHOR("Jaroslav Kysela <perex@suse.cz>, Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("Routines for control of AK452x / AK43xx  AD/DA converters");
 MODULE_LICENSE("GPL");
 
+/* write the given register and save the data to the cache */
 void snd_akm4xxx_write(struct snd_akm4xxx *ak, int chip, unsigned char reg,
 		       unsigned char val)
 {
@@ -41,15 +43,10 @@ void snd_akm4xxx_write(struct snd_akm4xxx *ak, int chip, unsigned char reg,
 	ak->ops.write(ak, chip, reg, val);
 
 	/* save the data */
-	if (ak->type == SND_AK4524 || ak->type == SND_AK4528) {
-		if ((reg != 0x04 && reg != 0x05) || (val & 0x80) == 0)
-			snd_akm4xxx_set(ak, chip, reg, val);
-		else
-			snd_akm4xxx_set_ipga(ak, chip, reg, val);
-	} else {
-		/* AK4529, or else */
+	/* don't overwrite with IPGA data */
+	if ((ak->type != SND_AK4524 && ak->type != SND_AK5365) ||
+	    (reg != 0x04 && reg != 0x05) || (val & 0x80) == 0)
 		snd_akm4xxx_set(ak, chip, reg, val);
-	}
 	ak->ops.unlock(ak, chip);
 }
 
@@ -78,7 +75,7 @@ static void ak4524_reset(struct snd_akm4xxx *ak, int state)
 		/* IPGA */
 		for (reg = 0x04; reg < 0x06; reg++)
 			snd_akm4xxx_write(ak, chip, reg,
-					  snd_akm4xxx_get_ipga(ak, chip, reg));
+					  snd_akm4xxx_get_ipga(ak, chip, reg) | 0x80);
 	}
 }
 
@@ -144,6 +141,42 @@ void snd_akm4xxx_reset(struct snd_akm4xxx *ak, int state)
 
 EXPORT_SYMBOL(snd_akm4xxx_reset);
 
+
+/*
+ * Volume conversion table for non-linear volumes
+ * from -63.5dB (mute) to 0dB step 0.5dB
+ *
+ * Used for AK4524 input/ouput attenuation, AK4528, and
+ * AK5365 input attenuation
+ */
+static unsigned char vol_cvt_datt[128] = {
+	0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04,
+	0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06,
+	0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x09, 0x0a,
+	0x0a, 0x0b, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f,
+	0x10, 0x10, 0x11, 0x12, 0x12, 0x13, 0x13, 0x14,
+	0x15, 0x16, 0x17, 0x17, 0x18, 0x19, 0x1a, 0x1c,
+	0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x23,
+	0x24, 0x25, 0x26, 0x28, 0x29, 0x2a, 0x2b, 0x2d,
+	0x2e, 0x30, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35,
+	0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3e, 0x3f, 0x40,
+	0x41, 0x42, 0x43, 0x44, 0x46, 0x47, 0x48, 0x4a,
+	0x4b, 0x4d, 0x4e, 0x50, 0x51, 0x52, 0x53, 0x54,
+	0x55, 0x56, 0x58, 0x59, 0x5b, 0x5c, 0x5e, 0x5f,
+	0x60, 0x61, 0x62, 0x64, 0x65, 0x66, 0x67, 0x69,
+	0x6a, 0x6c, 0x6d, 0x6f, 0x70, 0x71, 0x72, 0x73,
+	0x75, 0x76, 0x77, 0x79, 0x7a, 0x7c, 0x7d, 0x7f,
+};
+
+/*
+ * dB tables
+ */
+static DECLARE_TLV_DB_SCALE(db_scale_vol_datt, -6350, 50, 1);
+static DECLARE_TLV_DB_SCALE(db_scale_8bit, -12750, 50, 1);
+static DECLARE_TLV_DB_SCALE(db_scale_7bit, -6350, 50, 1);
+static DECLARE_TLV_DB_LINEAR(db_scale_linear, TLV_DB_GAIN_MUTE, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_ipga, 0, 50, 0);
+
 /*
  * initialize all the ak4xxx chips
  */
@@ -240,6 +273,9 @@ void snd_akm4xxx_init(struct snd_akm4xxx *ak)
 	int chip, num_chips;
 	unsigned char *ptr, reg, data, *inits;
 
+	memset(ak->images, 0, sizeof(ak->images));
+	memset(ak->volumes, 0, sizeof(ak->volumes));
+
 	switch (ak->type) {
 	case SND_AK4524:
 		inits = inits_ak4524;
@@ -265,6 +301,9 @@ void snd_akm4xxx_init(struct snd_akm4xxx *ak)
 		inits = inits_ak4381;
 		num_chips = ak->num_dacs / 2;
 		break;
+	case SND_AK5365:
+		/* FIXME: any init sequence? */
+		return;
 	default:
 		snd_BUG();
 		return;
@@ -282,16 +321,21 @@ void snd_akm4xxx_init(struct snd_akm4xxx *ak)
 
 EXPORT_SYMBOL(snd_akm4xxx_init);
 
+/*
+ * Mixer callbacks
+ */
+#define AK_VOL_CVT 			(1<<21)	/* need dB conversion */
+#define AK_NEEDSMSB 			(1<<22)	/* need MSB update bit */
+#define AK_INVERT 			(1<<23)	/* data is inverted */
 #define AK_GET_CHIP(val)		(((val) >> 8) & 0xff)
 #define AK_GET_ADDR(val)		((val) & 0xff)
-#define AK_GET_SHIFT(val)		(((val) >> 16) & 0x3f)
+#define AK_GET_SHIFT(val)		(((val) >> 16) & 0x1f)
+#define AK_GET_VOL_CVT(val)		(((val) >> 21) & 1)
 #define AK_GET_NEEDSMSB(val)		(((val) >> 22) & 1)
 #define AK_GET_INVERT(val)		(((val) >> 23) & 1)
 #define AK_GET_MASK(val)		(((val) >> 24) & 0xff)
 #define AK_COMPOSE(chip,addr,shift,mask) \
 	(((chip) << 8) | (addr) | ((shift) << 16) | ((mask) << 24))
-#define AK_NEEDSMSB 			(1<<22)
-#define AK_INVERT 			(1<<23)
 
 static int snd_akm4xxx_volume_info(struct snd_kcontrol *kcontrol,
 				   struct snd_ctl_elem_info *uinfo)
@@ -311,37 +355,37 @@ static int snd_akm4xxx_volume_get(struct snd_kcontrol *kcontrol,
 	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
 	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
-	int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value);
-	int invert = AK_GET_INVERT(kcontrol->private_value);
-	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
-	unsigned char val = snd_akm4xxx_get(ak, chip, addr);
 
-	if (needsmsb)
-		val &= 0x7f;
-	ucontrol->value.integer.value[0] = invert ? mask - val : val;
+	ucontrol->value.integer.value[0] = snd_akm4xxx_get_vol(ak, chip, addr);
 	return 0;
 }
 
-static int snd_akm4xxx_volume_put(struct snd_kcontrol *kcontrol,
-				  struct snd_ctl_elem_value *ucontrol)
+static int put_ak_reg(struct snd_kcontrol *kcontrol, int addr,
+		      unsigned char nval)
 {
 	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
-	int chip = AK_GET_CHIP(kcontrol->private_value);
-	int addr = AK_GET_ADDR(kcontrol->private_value);
-	int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value);
-	int invert = AK_GET_INVERT(kcontrol->private_value);
 	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
-	unsigned char nval = ucontrol->value.integer.value[0] % (mask+1);
-	int change;
+	int chip = AK_GET_CHIP(kcontrol->private_value);
 
-	if (invert)
+	if (snd_akm4xxx_get_vol(ak, chip, addr) == nval)
+		return 0;
+
+	snd_akm4xxx_set_vol(ak, chip, addr, nval);
+	if (AK_GET_VOL_CVT(kcontrol->private_value))
+		nval = vol_cvt_datt[nval];
+	if (AK_GET_INVERT(kcontrol->private_value))
 		nval = mask - nval;
-	if (needsmsb)
+	if (AK_GET_NEEDSMSB(kcontrol->private_value))
 		nval |= 0x80;
-	change = snd_akm4xxx_get(ak, chip, addr) != nval;
-	if (change)
-		snd_akm4xxx_write(ak, chip, addr, nval);
-	return change;
+	snd_akm4xxx_write(ak, chip, addr, nval);
+	return 1;
+}
+
+static int snd_akm4xxx_volume_put(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	return put_ak_reg(kcontrol, AK_GET_ADDR(kcontrol->private_value),
+			  ucontrol->value.integer.value[0]);
 }
 
 static int snd_akm4xxx_stereo_volume_info(struct snd_kcontrol *kcontrol,
@@ -362,66 +406,25 @@ static int snd_akm4xxx_stereo_volume_get(struct snd_kcontrol *kcontrol,
 	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
 	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
-	int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value);
-	int invert = AK_GET_INVERT(kcontrol->private_value);
-	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
-	unsigned char val;
-
-	val = snd_akm4xxx_get(ak, chip, addr);
-	if (needsmsb)
-		val &= 0x7f;
-	ucontrol->value.integer.value[0] = invert ? mask - val : val;
-
-	val = snd_akm4xxx_get(ak, chip, addr+1);
-	if (needsmsb)
-		val &= 0x7f;
-	ucontrol->value.integer.value[1] = invert ? mask - val : val;
 
+	ucontrol->value.integer.value[0] = snd_akm4xxx_get_vol(ak, chip, addr);
+	ucontrol->value.integer.value[1] = snd_akm4xxx_get_vol(ak, chip, addr+1);
 	return 0;
 }
 
 static int snd_akm4xxx_stereo_volume_put(struct snd_kcontrol *kcontrol,
 					 struct snd_ctl_elem_value *ucontrol)
 {
-	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
-	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
-	int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value);
-	int invert = AK_GET_INVERT(kcontrol->private_value);
-	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
-	unsigned char nval = ucontrol->value.integer.value[0] % (mask+1);
-	int change0, change1;
-
-	if (invert)
-		nval = mask - nval;
-	if (needsmsb)
-		nval |= 0x80;
-	change0 = snd_akm4xxx_get(ak, chip, addr) != nval;
-	if (change0)
-		snd_akm4xxx_write(ak, chip, addr, nval);
-
-	nval = ucontrol->value.integer.value[1] % (mask+1);
-	if (invert)
-		nval = mask - nval;
-	if (needsmsb)
-		nval |= 0x80;
-	change1 = snd_akm4xxx_get(ak, chip, addr+1) != nval;
-	if (change1)
-		snd_akm4xxx_write(ak, chip, addr+1, nval);
-
+	int change;
 
-	return change0 || change1;
+	change = put_ak_reg(kcontrol, addr, ucontrol->value.integer.value[0]);
+	change |= put_ak_reg(kcontrol, addr + 1,
+			     ucontrol->value.integer.value[1]);
+	return change;
 }
 
-static int snd_akm4xxx_ipga_gain_info(struct snd_kcontrol *kcontrol,
-				      struct snd_ctl_elem_info *uinfo)
-{
-	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
-	uinfo->count = 1;
-	uinfo->value.integer.min = 0;
-	uinfo->value.integer.max = 36;
-	return 0;
-}
+#define snd_akm4xxx_ipga_gain_info	snd_akm4xxx_volume_info
 
 static int snd_akm4xxx_ipga_gain_get(struct snd_kcontrol *kcontrol,
 				     struct snd_ctl_elem_value *ucontrol)
@@ -429,21 +432,57 @@ static int snd_akm4xxx_ipga_gain_get(struct snd_kcontrol *kcontrol,
 	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
 	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
+
 	ucontrol->value.integer.value[0] =
-		snd_akm4xxx_get_ipga(ak, chip, addr) & 0x7f;
+		snd_akm4xxx_get_ipga(ak, chip, addr);
 	return 0;
 }
 
+static int put_ak_ipga(struct snd_kcontrol *kcontrol, int addr,
+		       unsigned char nval)
+{
+	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
+	int chip = AK_GET_CHIP(kcontrol->private_value);
+
+	if (snd_akm4xxx_get_ipga(ak, chip, addr) == nval)
+		return 0;
+	snd_akm4xxx_set_ipga(ak, chip, addr, nval);
+	snd_akm4xxx_write(ak, chip, addr, nval | 0x80); /* need MSB */
+	return 1;
+}
+
 static int snd_akm4xxx_ipga_gain_put(struct snd_kcontrol *kcontrol,
 				     struct snd_ctl_elem_value *ucontrol)
+{
+	return put_ak_ipga(kcontrol, AK_GET_ADDR(kcontrol->private_value),
+			   ucontrol->value.integer.value[0]);
+}
+
+#define snd_akm4xxx_stereo_gain_info	snd_akm4xxx_stereo_volume_info
+
+static int snd_akm4xxx_stereo_gain_get(struct snd_kcontrol *kcontrol,
+				       struct snd_ctl_elem_value *ucontrol)
 {
 	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
 	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
-	unsigned char nval = (ucontrol->value.integer.value[0] % 37) | 0x80;
-	int change = snd_akm4xxx_get_ipga(ak, chip, addr) != nval;
-	if (change)
-		snd_akm4xxx_write(ak, chip, addr, nval);
+
+	ucontrol->value.integer.value[0] =
+		snd_akm4xxx_get_ipga(ak, chip, addr);
+	ucontrol->value.integer.value[1] =
+		snd_akm4xxx_get_ipga(ak, chip, addr + 1);
+	return 0;
+}
+
+static int snd_akm4xxx_stereo_gain_put(struct snd_kcontrol *kcontrol,
+				       struct snd_ctl_elem_value *ucontrol)
+{
+	int addr = AK_GET_ADDR(kcontrol->private_value);
+	int change;
+
+	change = put_ak_ipga(kcontrol, addr, ucontrol->value.integer.value[0]);
+	change |= put_ak_ipga(kcontrol, addr + 1,
+			      ucontrol->value.integer.value[1]);
 	return change;
 }
 
@@ -548,221 +587,247 @@ static int ak4xxx_switch_put(struct snd_kcontrol *kcontrol,
  * build AK4xxx controls
  */
 
-int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak)
+static int build_dac_controls(struct snd_akm4xxx *ak)
 {
-	unsigned int idx, num_emphs;
-	struct snd_kcontrol *ctl;
-	int err;
-	int mixer_ch = 0;
-	int num_stereo;
-
-	ctl = kmalloc(sizeof(*ctl), GFP_KERNEL);
-	if (! ctl)
-		return -ENOMEM;
+	int idx, err, mixer_ch, num_stereo;
+	struct snd_kcontrol_new knew;
 
+	mixer_ch = 0;
 	for (idx = 0; idx < ak->num_dacs; ) {
-		memset(ctl, 0, sizeof(*ctl));
-		if (ak->channel_names == NULL) {
-			strcpy(ctl->id.name, "DAC Volume");
+		memset(&knew, 0, sizeof(knew));
+		if (! ak->dac_info || ! ak->dac_info[mixer_ch].name) {
+			knew.name = "DAC Volume";
+			knew.index = mixer_ch + ak->idx_offset * 2;
 			num_stereo = 1;
-			ctl->id.index = mixer_ch + ak->idx_offset * 2;
 		} else {
-			strcpy(ctl->id.name, ak->channel_names[mixer_ch]);
-			num_stereo = ak->num_stereo[mixer_ch];
-			ctl->id.index = 0;
+			knew.name = ak->dac_info[mixer_ch].name;
+			num_stereo = ak->dac_info[mixer_ch].num_channels;
 		}
-		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
-		ctl->count = 1;
+		knew.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+		knew.count = 1;
+		knew.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ;
 		if (num_stereo == 2) {
-			ctl->info = snd_akm4xxx_stereo_volume_info;
-			ctl->get = snd_akm4xxx_stereo_volume_get;
-			ctl->put = snd_akm4xxx_stereo_volume_put;
+			knew.info = snd_akm4xxx_stereo_volume_info;
+			knew.get = snd_akm4xxx_stereo_volume_get;
+			knew.put = snd_akm4xxx_stereo_volume_put;
 		} else {
-			ctl->info = snd_akm4xxx_volume_info;
-			ctl->get = snd_akm4xxx_volume_get;
-			ctl->put = snd_akm4xxx_volume_put;
+			knew.info = snd_akm4xxx_volume_info;
+			knew.get = snd_akm4xxx_volume_get;
+			knew.put = snd_akm4xxx_volume_put;
 		}
 		switch (ak->type) {
 		case SND_AK4524:
 			/* register 6 & 7 */
-			ctl->private_value =
-				AK_COMPOSE(idx/2, (idx%2) + 6, 0, 127);
+			knew.private_value =
+				AK_COMPOSE(idx/2, (idx%2) + 6, 0, 127) |
+				AK_VOL_CVT;
+			knew.tlv.p = db_scale_vol_datt;
 			break;
 		case SND_AK4528:
 			/* register 4 & 5 */
-			ctl->private_value =
-				AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127);
+			knew.private_value =
+				AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127) |
+				AK_VOL_CVT;
+			knew.tlv.p = db_scale_vol_datt;
 			break;
 		case SND_AK4529: {
 			/* registers 2-7 and b,c */
 			int val = idx < 6 ? idx + 2 : (idx - 6) + 0xb;
-			ctl->private_value =
+			knew.private_value =
 				AK_COMPOSE(0, val, 0, 255) | AK_INVERT;
+			knew.tlv.p = db_scale_8bit;
 			break;
 		}
 		case SND_AK4355:
 			/* register 4-9, chip #0 only */
-			ctl->private_value = AK_COMPOSE(0, idx + 4, 0, 255);
+			knew.private_value = AK_COMPOSE(0, idx + 4, 0, 255);
+			knew.tlv.p = db_scale_8bit;
 			break;
 		case SND_AK4358: {
 			/* register 4-9 and 11-12, chip #0 only */
 			int  addr = idx < 6 ? idx + 4 : idx + 5;
-			ctl->private_value =
+			knew.private_value =
 				AK_COMPOSE(0, addr, 0, 127) | AK_NEEDSMSB;
+			knew.tlv.p = db_scale_7bit;
 			break;
 		}
 		case SND_AK4381:
 			/* register 3 & 4 */
-			ctl->private_value =
+			knew.private_value =
 				AK_COMPOSE(idx/2, (idx%2) + 3, 0, 255);
+			knew.tlv.p = db_scale_linear;
 			break;
 		default:
-			err = -EINVAL;
-			goto __error;
+			return -EINVAL;
 		}
 
-		ctl->private_data = ak;
-		err = snd_ctl_add(ak->card,
-				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
-					      SNDRV_CTL_ELEM_ACCESS_WRITE));
+		err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak));
 		if (err < 0)
-			goto __error;
+			return err;
 
 		idx += num_stereo;
 		mixer_ch++;
 	}
-	for (idx = 0; idx < ak->num_adcs && ak->type == SND_AK4524; ++idx) {
-		memset(ctl, 0, sizeof(*ctl));
-		strcpy(ctl->id.name, "ADC Volume");
-		ctl->id.index = idx + ak->idx_offset * 2;
-		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
-		ctl->count = 1;
-		ctl->info = snd_akm4xxx_volume_info;
-		ctl->get = snd_akm4xxx_volume_get;
-		ctl->put = snd_akm4xxx_volume_put;
-		/* register 4 & 5 */
-		ctl->private_value =
-			AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127);
-		ctl->private_data = ak;
-		err = snd_ctl_add(ak->card,
-				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
-					      SNDRV_CTL_ELEM_ACCESS_WRITE));
-		if (err < 0)
-			goto __error;
-
-		memset(ctl, 0, sizeof(*ctl));
-		strcpy(ctl->id.name, "IPGA Analog Capture Volume");
-		ctl->id.index = idx + ak->idx_offset * 2;
-		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
-		ctl->count = 1;
-		ctl->info = snd_akm4xxx_ipga_gain_info;
-		ctl->get = snd_akm4xxx_ipga_gain_get;
-		ctl->put = snd_akm4xxx_ipga_gain_put;
+	return 0;
+}
+
+static int build_adc_controls(struct snd_akm4xxx *ak)
+{
+	int idx, err, mixer_ch, num_stereo;
+	struct snd_kcontrol_new knew;
+
+	mixer_ch = 0;
+	for (idx = 0; idx < ak->num_adcs;) {
+		memset(&knew, 0, sizeof(knew));
+		if (! ak->adc_info || ! ak->adc_info[mixer_ch].name) {
+			knew.name = "ADC Volume";
+			knew.index = mixer_ch + ak->idx_offset * 2;
+			num_stereo = 1;
+		} else {
+			knew.name = ak->adc_info[mixer_ch].name;
+			num_stereo = ak->adc_info[mixer_ch].num_channels;
+		}
+		knew.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+		knew.count = 1;
+		knew.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+		if (num_stereo == 2) {
+			knew.info = snd_akm4xxx_stereo_volume_info;
+			knew.get = snd_akm4xxx_stereo_volume_get;
+			knew.put = snd_akm4xxx_stereo_volume_put;
+		} else {
+			knew.info = snd_akm4xxx_volume_info;
+			knew.get = snd_akm4xxx_volume_get;
+			knew.put = snd_akm4xxx_volume_put;
+		}
 		/* register 4 & 5 */
-		ctl->private_value = AK_COMPOSE(idx/2, (idx%2) + 4, 0, 0);
-		ctl->private_data = ak;
-		err = snd_ctl_add(ak->card,
-				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
-					      SNDRV_CTL_ELEM_ACCESS_WRITE));
+		knew.private_value =
+			AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127) |
+			AK_VOL_CVT;
+		knew.tlv.p = db_scale_vol_datt;
+		err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak));
 		if (err < 0)
-			goto __error;
-	}
+			return err;
 
-	if (ak->type == SND_AK5365) {
-		memset(ctl, 0, sizeof(*ctl));
-		if (ak->channel_names == NULL)
-			strcpy(ctl->id.name, "Capture Volume");
+		if (! ak->adc_info || ! ak->adc_info[mixer_ch].gain_name)
+			knew.name = "IPGA Analog Capture Volume";
 		else
-			strcpy(ctl->id.name, ak->channel_names[0]);
-		ctl->id.index = ak->idx_offset * 2;
-		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
-		ctl->count = 1;
-		ctl->info = snd_akm4xxx_stereo_volume_info;
-		ctl->get = snd_akm4xxx_stereo_volume_get;
-		ctl->put = snd_akm4xxx_stereo_volume_put;
-		/* Registers 4 & 5 (see AK5365 data sheet, pages 34 and 35):
-		 * valid values are from 0x00 (mute) to 0x98 (+12dB).  */
-		ctl->private_value =
-			AK_COMPOSE(0, 4, 0, 0x98);
-		ctl->private_data = ak;
-		err = snd_ctl_add(ak->card,
-				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
-					      SNDRV_CTL_ELEM_ACCESS_WRITE));
+			knew.name = ak->adc_info[mixer_ch].gain_name;
+		if (num_stereo == 2) {
+			knew.info = snd_akm4xxx_stereo_gain_info;
+			knew.get = snd_akm4xxx_stereo_gain_get;
+			knew.put = snd_akm4xxx_stereo_gain_put;
+		} else {
+			knew.info = snd_akm4xxx_ipga_gain_info;
+			knew.get = snd_akm4xxx_ipga_gain_get;
+			knew.put = snd_akm4xxx_ipga_gain_put;
+		}
+		/* register 4 & 5 */
+		if (ak->type == SND_AK4524)
+			knew.private_value = AK_COMPOSE(idx/2, (idx%2) + 4, 0,
+							24);
+		else /* AK5365 */
+			knew.private_value = AK_COMPOSE(idx/2, (idx%2) + 4, 0,
+							36);
+		knew.tlv.p = db_scale_ipga;
+		err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak));
 		if (err < 0)
-			goto __error;
+			return err;
+
+		if (ak->type == SND_AK5365 && (idx % 2) == 0) {
+			if (! ak->adc_info || 
+			    ! ak->adc_info[mixer_ch].switch_name)
+				knew.name = "Capture Switch";
+			else
+				knew.name = ak->adc_info[mixer_ch].switch_name;
+			knew.info = ak4xxx_switch_info;
+			knew.get = ak4xxx_switch_get;
+			knew.put = ak4xxx_switch_put;
+			knew.access = 0;
+			/* register 2, bit 0 (SMUTE): 0 = normal operation,
+			   1 = mute */
+			knew.private_value =
+				AK_COMPOSE(idx/2, 2, 0, 0) | AK_INVERT;
+			err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak));
+			if (err < 0)
+				return err;
+		}
 
-		memset(ctl, 0, sizeof(*ctl));
-		if (ak->channel_names == NULL)
-			strcpy(ctl->id.name, "Capture Switch");
-		else
-			strcpy(ctl->id.name, ak->channel_names[1]);
-		ctl->id.index = ak->idx_offset * 2;
-		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
-		ctl->count = 1;
-		ctl->info = ak4xxx_switch_info;
-		ctl->get = ak4xxx_switch_get;
-		ctl->put = ak4xxx_switch_put;
-		/* register 2, bit 0 (SMUTE): 0 = normal operation, 1 = mute */
-		ctl->private_value =
-			AK_COMPOSE(0, 2, 0, 0) | AK_INVERT;
-		ctl->private_data = ak;
-		err = snd_ctl_add(ak->card,
-				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
-					      SNDRV_CTL_ELEM_ACCESS_WRITE));
-		if (err < 0)
-			goto __error;
+		idx += num_stereo;
+		mixer_ch++;
 	}
+	return 0;
+}
+
+static int build_deemphasis(struct snd_akm4xxx *ak, int num_emphs)
+{
+	int idx, err;
+	struct snd_kcontrol_new knew;
 
-	if (ak->type == SND_AK4355 || ak->type == SND_AK4358)
-		num_emphs = 1;
-	else
-		num_emphs = ak->num_dacs / 2;
 	for (idx = 0; idx < num_emphs; idx++) {
-		memset(ctl, 0, sizeof(*ctl));
-		strcpy(ctl->id.name, "Deemphasis");
-		ctl->id.index = idx + ak->idx_offset;
-		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
-		ctl->count = 1;
-		ctl->info = snd_akm4xxx_deemphasis_info;
-		ctl->get = snd_akm4xxx_deemphasis_get;
-		ctl->put = snd_akm4xxx_deemphasis_put;
+		memset(&knew, 0, sizeof(knew));
+		knew.name = "Deemphasis";
+		knew.index = idx + ak->idx_offset;
+		knew.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+		knew.count = 1;
+		knew.info = snd_akm4xxx_deemphasis_info;
+		knew.get = snd_akm4xxx_deemphasis_get;
+		knew.put = snd_akm4xxx_deemphasis_put;
 		switch (ak->type) {
 		case SND_AK4524:
 		case SND_AK4528:
 			/* register 3 */
-			ctl->private_value = AK_COMPOSE(idx, 3, 0, 0);
+			knew.private_value = AK_COMPOSE(idx, 3, 0, 0);
 			break;
 		case SND_AK4529: {
 			int shift = idx == 3 ? 6 : (2 - idx) * 2;
 			/* register 8 with shift */
-			ctl->private_value = AK_COMPOSE(0, 8, shift, 0);
+			knew.private_value = AK_COMPOSE(0, 8, shift, 0);
 			break;
 		}
 		case SND_AK4355:
 		case SND_AK4358:
-			ctl->private_value = AK_COMPOSE(idx, 3, 0, 0);
+			knew.private_value = AK_COMPOSE(idx, 3, 0, 0);
 			break;
 		case SND_AK4381:
-			ctl->private_value = AK_COMPOSE(idx, 1, 1, 0);
+			knew.private_value = AK_COMPOSE(idx, 1, 1, 0);
 			break;
 		default:
-			err = -EINVAL;
-			goto __error;
+			return -EINVAL;
 		}
-		ctl->private_data = ak;
-		err = snd_ctl_add(ak->card,
-				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
-					      SNDRV_CTL_ELEM_ACCESS_WRITE));
+		err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak));
 		if (err < 0)
-			goto __error;
+			return err;
 	}
-	err = 0;
-
- __error:
-	kfree(ctl);
-	return err;
+	return 0;
 }
 
+int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak)
+{
+	int err, num_emphs;
+
+	err = build_dac_controls(ak);
+	if (err < 0)
+		return err;
+
+	if (ak->type == SND_AK4524 || ak->type == SND_AK5365) {
+		err = build_adc_controls(ak);
+		if (err < 0)
+			return err;
+	}
+
+	if (ak->type == SND_AK4355 || ak->type == SND_AK4358)
+		num_emphs = 1;
+	else
+		num_emphs = ak->num_dacs / 2;
+	err = build_deemphasis(ak, num_emphs);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+	
 EXPORT_SYMBOL(snd_akm4xxx_build_controls);
 
 static int __init alsa_akm4xxx_module_init(void)
diff --git a/sound/pci/ice1712/revo.c b/sound/pci/ice1712/revo.c
index 1134a57f9e652..c9eefa9bbfff2 100644
--- a/sound/pci/ice1712/revo.c
+++ b/sound/pci/ice1712/revo.c
@@ -87,19 +87,34 @@ static void revo_set_rate_val(struct snd_akm4xxx *ak, unsigned int rate)
  * initialize the chips on M-Audio Revolution cards
  */
 
-static unsigned int revo71_num_stereo_front[] = {2};
-static char *revo71_channel_names_front[] = {"PCM Playback Volume"};
+#define AK_DAC(xname,xch) { .name = xname, .num_channels = xch }
 
-static unsigned int revo71_num_stereo_surround[] = {1, 1, 2, 2};
-static char *revo71_channel_names_surround[] = {"PCM Center Playback Volume", "PCM LFE Playback Volume",
-						"PCM Side Playback Volume", "PCM Rear Playback Volume"};
+static struct snd_akm4xxx_dac_channel revo71_front[] = {
+	AK_DAC("PCM Playback Volume", 2)
+};
+
+static struct snd_akm4xxx_dac_channel revo71_surround[] = {
+	AK_DAC("PCM Center Playback Volume", 1),
+	AK_DAC("PCM LFE Playback Volume", 1),
+	AK_DAC("PCM Side Playback Volume", 2),
+	AK_DAC("PCM Rear Playback Volume", 2),
+};
 
-static unsigned int revo51_num_stereo[] = {2, 1, 1, 2};
-static char *revo51_channel_names[] = {"PCM Playback Volume", "PCM Center Playback Volume",
-					"PCM LFE Playback Volume", "PCM Rear Playback Volume"};
+static struct snd_akm4xxx_dac_channel revo51_dac[] = {
+	AK_DAC("PCM Playback Volume", 2),
+	AK_DAC("PCM Center Playback Volume", 1),
+	AK_DAC("PCM LFE Playback Volume", 1),
+	AK_DAC("PCM Rear Playback Volume", 2),
+};
 
-static unsigned int revo51_adc_num_stereo[] = {2};
-static char *revo51_adc_channel_names[] = {"PCM Capture Volume","PCM Capture Switch"};
+static struct snd_akm4xxx_adc_channel revo51_adc[] = {
+	{
+		.name = "PCM Capture Volume",
+		.gain_name = "PCM Capture Gain Volume",
+		.switch_name = "PCM Capture Switch",
+		.num_channels = 2
+	},
+};
 
 static struct snd_akm4xxx akm_revo_front __devinitdata = {
 	.type = SND_AK4381,
@@ -107,8 +122,7 @@ static struct snd_akm4xxx akm_revo_front __devinitdata = {
 	.ops = {
 		.set_rate_val = revo_set_rate_val
 	},
-	.num_stereo = revo71_num_stereo_front,
-	.channel_names = revo71_channel_names_front
+	.dac_info = revo71_front,
 };
 
 static struct snd_ak4xxx_private akm_revo_front_priv __devinitdata = {
@@ -130,8 +144,7 @@ static struct snd_akm4xxx akm_revo_surround __devinitdata = {
 	.ops = {
 		.set_rate_val = revo_set_rate_val
 	},
-	.num_stereo = revo71_num_stereo_surround,
-	.channel_names = revo71_channel_names_surround
+	.dac_info = revo71_surround,
 };
 
 static struct snd_ak4xxx_private akm_revo_surround_priv __devinitdata = {
@@ -152,8 +165,7 @@ static struct snd_akm4xxx akm_revo51 __devinitdata = {
 	.ops = {
 		.set_rate_val = revo_set_rate_val
 	},
-	.num_stereo = revo51_num_stereo,
-	.channel_names = revo51_channel_names
+	.dac_info = revo51_dac,
 };
 
 static struct snd_ak4xxx_private akm_revo51_priv __devinitdata = {
@@ -171,8 +183,7 @@ static struct snd_ak4xxx_private akm_revo51_priv __devinitdata = {
 static struct snd_akm4xxx akm_revo51_adc __devinitdata = {
 	.type = SND_AK5365,
 	.num_adcs = 2,
-	.num_stereo = revo51_adc_num_stereo,
-	.channel_names = revo51_adc_channel_names
+	.adc_info = revo51_adc,
 };
 
 static struct snd_ak4xxx_private akm_revo51_adc_priv __devinitdata = {
-- 
GitLab


From 680ef792a1afdb3bf38e4a0296cce996a5b95317 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 30 Aug 2006 16:56:30 +0200
Subject: [PATCH 0987/1063] [ALSA] Add dB scale information to ice1712 driver

Added the dB scale information for native digital volumes of ice1712 driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ice1712/ice1712.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c
index 9b8325d65d8de..dc69392eafa37 100644
--- a/sound/pci/ice1712/ice1712.c
+++ b/sound/pci/ice1712/ice1712.c
@@ -62,6 +62,7 @@
 #include <sound/cs8427.h>
 #include <sound/info.h>
 #include <sound/initval.h>
+#include <sound/tlv.h>
 
 #include <sound/asoundef.h>
 
@@ -1377,6 +1378,7 @@ static int snd_ice1712_pro_mixer_volume_put(struct snd_kcontrol *kcontrol, struc
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_playback, -14400, 150, 0);
 
 static struct snd_kcontrol_new snd_ice1712_multi_playback_ctrls[] __devinitdata = {
 	{
@@ -1390,12 +1392,15 @@ static struct snd_kcontrol_new snd_ice1712_multi_playback_ctrls[] __devinitdata
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Multi Playback Volume",
 		.info = snd_ice1712_pro_mixer_volume_info,
 		.get = snd_ice1712_pro_mixer_volume_get,
 		.put = snd_ice1712_pro_mixer_volume_put,
 		.private_value = 0,
 		.count = 10,
+		.tlv = { .p = db_scale_playback }
 	},
 };
 
@@ -1420,11 +1425,14 @@ static struct snd_kcontrol_new snd_ice1712_multi_capture_spdif_switch __devinitd
 
 static struct snd_kcontrol_new snd_ice1712_multi_capture_analog_volume __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name = "H/W Multi Capture Volume",
 	.info = snd_ice1712_pro_mixer_volume_info,
 	.get = snd_ice1712_pro_mixer_volume_get,
 	.put = snd_ice1712_pro_mixer_volume_put,
 	.private_value = 10,
+	.tlv = { .p = db_scale_playback }
 };
 
 static struct snd_kcontrol_new snd_ice1712_multi_capture_spdif_volume __devinitdata = {
-- 
GitLab


From f640c3205aca4fe231beccc9e719c946cf3fee7a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 30 Aug 2006 16:57:37 +0200
Subject: [PATCH 0988/1063] [ALSA] Add dB scale information to ice1724 driver

Added the dB scale information to each board support code of ice1724 driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ice1712/aureon.c     | 104 ++++++++++++++++++++++++++-------
 sound/pci/ice1712/phase.c      |  39 ++++++++++---
 sound/pci/ice1712/pontis.c     |   9 +++
 sound/pci/ice1712/prodigy192.c |  14 +++++
 4 files changed, 139 insertions(+), 27 deletions(-)

diff --git a/sound/pci/ice1712/aureon.c b/sound/pci/ice1712/aureon.c
index 9492f3d2455b4..9e76cebd2d228 100644
--- a/sound/pci/ice1712/aureon.c
+++ b/sound/pci/ice1712/aureon.c
@@ -60,6 +60,7 @@
 #include "ice1712.h"
 #include "envy24ht.h"
 #include "aureon.h"
+#include <sound/tlv.h>
 
 /* WM8770 registers */
 #define WM_DAC_ATTEN		0x00	/* DAC1-8 analog attenuation */
@@ -660,6 +661,12 @@ static int aureon_ac97_mmute_put(struct snd_kcontrol *kcontrol, struct snd_ctl_e
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_wm_dac, -12700, 100, 1);
+static DECLARE_TLV_DB_SCALE(db_scale_wm_pcm, -6400, 50, 1);
+static DECLARE_TLV_DB_SCALE(db_scale_wm_adc, -1200, 100, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_ac97_master, -4650, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_ac97_gain, -3450, 150, 0);
+
 /*
  * Logarithmic volume values for WM8770
  * Computed as 20 * Log10(255 / x)
@@ -1409,10 +1416,13 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Master Playback Volume",
 		.info = wm_master_vol_info,
 		.get = wm_master_vol_get,
-		.put = wm_master_vol_put
+		.put = wm_master_vol_put,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1424,11 +1434,14 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Front Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 0
+		.private_value = (2 << 8) | 0,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1440,11 +1453,14 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Rear Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 2
+		.private_value = (2 << 8) | 2,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1456,11 +1472,14 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Center Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (1 << 8) | 4
+		.private_value = (1 << 8) | 4,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1472,11 +1491,14 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "LFE Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (1 << 8) | 5
+		.private_value = (1 << 8) | 5,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1488,11 +1510,14 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Side Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 6
+		.private_value = (2 << 8) | 6,
+		.tlv = { .p = db_scale_wm_dac }
 	}
 };
 
@@ -1506,10 +1531,13 @@ static struct snd_kcontrol_new wm_controls[] __devinitdata = {
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "PCM Playback Volume",
 		.info = wm_pcm_vol_info,
 		.get = wm_pcm_vol_get,
-		.put = wm_pcm_vol_put
+		.put = wm_pcm_vol_put,
+		.tlv = { .p = db_scale_wm_pcm }
  	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1520,10 +1548,13 @@ static struct snd_kcontrol_new wm_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Capture Volume",
 		.info = wm_adc_vol_info,
 		.get = wm_adc_vol_get,
-		.put = wm_adc_vol_put
+		.put = wm_adc_vol_put,
+		.tlv = { .p = db_scale_wm_adc }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1567,11 +1598,14 @@ static struct snd_kcontrol_new ac97_controls[] __devinitdata = {
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "AC97 Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_MASTER|AUREON_AC97_STEREO
+ 		.private_value = AC97_MASTER|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_master }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1583,11 +1617,14 @@ static struct snd_kcontrol_new ac97_controls[] __devinitdata = {
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "CD Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_CD|AUREON_AC97_STEREO
+ 		.private_value = AC97_CD|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1599,11 +1636,14 @@ static struct snd_kcontrol_new ac97_controls[] __devinitdata = {
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Aux Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_AUX|AUREON_AC97_STEREO
+ 		.private_value = AC97_AUX|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1615,11 +1655,14 @@ static struct snd_kcontrol_new ac97_controls[] __devinitdata = {
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Line Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_LINE|AUREON_AC97_STEREO
+ 		.private_value = AC97_LINE|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1631,11 +1674,14 @@ static struct snd_kcontrol_new ac97_controls[] __devinitdata = {
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Mic Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_MIC
+ 		.private_value = AC97_MIC,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1657,11 +1703,14 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = {
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "AC97 Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_MASTER|AUREON_AC97_STEREO
+ 		.private_value = AC97_MASTER|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_master }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1673,11 +1722,14 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = {
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "CD Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_AUX|AUREON_AC97_STEREO
+ 		.private_value = AC97_AUX|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1685,15 +1737,18 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = {
  		.info = aureon_ac97_mute_info,
  		.get = aureon_ac97_mute_get,
  		.put = aureon_ac97_mute_put,
- 		.private_value = AC97_CD,
+ 		.private_value = AC97_CD
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Phono Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_CD|AUREON_AC97_STEREO
+ 		.private_value = AC97_CD|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1705,11 +1760,14 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = {
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Line Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_LINE|AUREON_AC97_STEREO
+ 		.private_value = AC97_LINE|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1721,11 +1779,14 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = {
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Mic Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_MIC
+ 		.private_value = AC97_MIC,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1744,11 +1805,14 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = {
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Aux Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_VIDEO|AUREON_AC97_STEREO
+ 		.private_value = AC97_VIDEO|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
diff --git a/sound/pci/ice1712/phase.c b/sound/pci/ice1712/phase.c
index 502da1c8b5f71..e08d73f4ff85d 100644
--- a/sound/pci/ice1712/phase.c
+++ b/sound/pci/ice1712/phase.c
@@ -46,6 +46,7 @@
 #include "ice1712.h"
 #include "envy24ht.h"
 #include "phase.h"
+#include <sound/tlv.h>
 
 /* WM8770 registers */
 #define WM_DAC_ATTEN		0x00	/* DAC1-8 analog attenuation */
@@ -696,6 +697,9 @@ static int phase28_oversampling_put(struct snd_kcontrol *kcontrol, struct snd_ct
 	return 0;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_wm_dac, -12700, 100, 1);
+static DECLARE_TLV_DB_SCALE(db_scale_wm_pcm, -6400, 50, 1);
+
 static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = {
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -706,10 +710,13 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Master Playback Volume",
 		.info = wm_master_vol_info,
 		.get = wm_master_vol_get,
-		.put = wm_master_vol_put
+		.put = wm_master_vol_put,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -721,11 +728,14 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Front Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 0
+		.private_value = (2 << 8) | 0,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -737,11 +747,14 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Rear Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 2
+		.private_value = (2 << 8) | 2,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -753,11 +766,14 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Center Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (1 << 8) | 4
+		.private_value = (1 << 8) | 4,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -769,11 +785,14 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "LFE Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (1 << 8) | 5
+		.private_value = (1 << 8) | 5,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -785,11 +804,14 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Side Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 6
+		.private_value = (2 << 8) | 6,
+		.tlv = { .p = db_scale_wm_dac }
 	}
 };
 
@@ -803,10 +825,13 @@ static struct snd_kcontrol_new wm_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "PCM Playback Volume",
 		.info = wm_pcm_vol_info,
 		.get = wm_pcm_vol_get,
-		.put = wm_pcm_vol_put
+		.put = wm_pcm_vol_put,
+		.tlv = { .p = db_scale_wm_pcm }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
diff --git a/sound/pci/ice1712/pontis.c b/sound/pci/ice1712/pontis.c
index 0efcad9260a5f..6c74c2d2e7f39 100644
--- a/sound/pci/ice1712/pontis.c
+++ b/sound/pci/ice1712/pontis.c
@@ -31,6 +31,7 @@
 
 #include <sound/core.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 
 #include "ice1712.h"
 #include "envy24ht.h"
@@ -564,6 +565,8 @@ static int pontis_gpio_data_put(struct snd_kcontrol *kcontrol, struct snd_ctl_el
 	return changed;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_volume, -6400, 50, 1);
+
 /*
  * mixers
  */
@@ -571,17 +574,23 @@ static int pontis_gpio_data_put(struct snd_kcontrol *kcontrol, struct snd_ctl_el
 static struct snd_kcontrol_new pontis_controls[] __devinitdata = {
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "PCM Playback Volume",
 		.info = wm_dac_vol_info,
 		.get = wm_dac_vol_get,
 		.put = wm_dac_vol_put,
+		.tlv = { .p = db_scale_volume },
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Capture Volume",
 		.info = wm_adc_vol_info,
 		.get = wm_adc_vol_get,
 		.put = wm_adc_vol_put,
+		.tlv = { .p = db_scale_volume },
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
diff --git a/sound/pci/ice1712/prodigy192.c b/sound/pci/ice1712/prodigy192.c
index fdb5cb8fac97c..41b2605daa3ae 100644
--- a/sound/pci/ice1712/prodigy192.c
+++ b/sound/pci/ice1712/prodigy192.c
@@ -35,6 +35,7 @@
 #include "envy24ht.h"
 #include "prodigy192.h"
 #include "stac946x.h"
+#include <sound/tlv.h>
 
 static inline void stac9460_put(struct snd_ice1712 *ice, int reg, unsigned char val)
 {
@@ -356,6 +357,9 @@ static int aureon_oversampling_put(struct snd_kcontrol *kcontrol, struct snd_ctl
 }
 #endif
 
+static DECLARE_TLV_DB_SCALE(db_scale_dac, -19125, 75, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_adc, 0, 150, 0);
+
 /*
  * mixers
  */
@@ -368,14 +372,18 @@ static struct snd_kcontrol_new stac_controls[] __devinitdata = {
 		.get = stac9460_dac_mute_get,
 		.put = stac9460_dac_mute_put,
 		.private_value = 1,
+		.tlv = { .p = db_scale_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Master Playback Volume",
 		.info = stac9460_dac_vol_info,
 		.get = stac9460_dac_vol_get,
 		.put = stac9460_dac_vol_put,
 		.private_value = 1,
+		.tlv = { .p = db_scale_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -387,11 +395,14 @@ static struct snd_kcontrol_new stac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "DAC Volume",
 		.count = 6,
 		.info = stac9460_dac_vol_info,
 		.get = stac9460_dac_vol_get,
 		.put = stac9460_dac_vol_put,
+		.tlv = { .p = db_scale_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -404,11 +415,14 @@ static struct snd_kcontrol_new stac_controls[] __devinitdata = {
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "ADC Volume",
 		.count = 1,
 		.info = stac9460_adc_vol_info,
 		.get = stac9460_adc_vol_get,
 		.put = stac9460_adc_vol_put,
+		.tlv = { .p = db_scale_adc }
 	},
 #if 0
 	{
-- 
GitLab


From 929861c669a443cf667ec0d80ac73a567ed4543c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 31 Aug 2006 16:55:40 +0200
Subject: [PATCH 0989/1063] [ALSA] hda-intel - Remove volatile

Removed volatile from the position buffer pointer.
Also, use synchronize_irq() instead of unreliable msleep(1)
in the driver remove callback.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_intel.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index c9ae9f7789282..d56ea2125aa86 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -255,7 +255,7 @@ enum {
 struct azx_dev {
 	u32 *bdl;			/* virtual address of the BDL */
 	dma_addr_t bdl_addr;		/* physical address of the BDL */
-	volatile u32 *posbuf;			/* position buffer pointer */
+	u32 *posbuf;			/* position buffer pointer */
 
 	unsigned int bufsize;		/* size of the play buffer in bytes */
 	unsigned int fragsize;		/* size of each period in bytes */
@@ -1197,7 +1197,7 @@ static snd_pcm_uframes_t azx_pcm_pointer(struct snd_pcm_substream *substream)
 	if (chip->position_fix == POS_FIX_POSBUF ||
 	    chip->position_fix == POS_FIX_AUTO) {
 		/* use the position buffer */
-		pos = *azx_dev->posbuf;
+		pos = le32_to_cpu(*azx_dev->posbuf);
 		if (chip->position_fix == POS_FIX_AUTO &&
 		    azx_dev->period_intr == 1 && ! pos) {
 			printk(KERN_WARNING
@@ -1345,7 +1345,7 @@ static int __devinit azx_init_stream(struct azx *chip)
 		struct azx_dev *azx_dev = &chip->azx_dev[i];
 		azx_dev->bdl = (u32 *)(chip->bdl.area + off);
 		azx_dev->bdl_addr = chip->bdl.addr + off;
-		azx_dev->posbuf = (volatile u32 *)(chip->posbuf.area + i * 8);
+		azx_dev->posbuf = (u32 __iomem *)(chip->posbuf.area + i * 8);
 		/* offset: SDI0=0x80, SDI1=0xa0, ... SDO3=0x160 */
 		azx_dev->sd_addr = chip->remap_addr + (0x20 * i + 0x80);
 		/* int mask: SDI0=0x01, SDI1=0x02, ... SDO3=0x80 */
@@ -1417,8 +1417,7 @@ static int azx_free(struct azx *chip)
 		azx_writel(chip, DPLBASE, 0);
 		azx_writel(chip, DPUBASE, 0);
 
-		/* wait a little for interrupts to finish */
-		msleep(1);
+		synchronize_irq(chip->irq);
 	}
 
 	if (chip->irq >= 0) {
-- 
GitLab


From 927fc866025857c109219d4ed62d8c3cbc02713a Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@suse.cz>
Date: Thu, 31 Aug 2006 17:03:43 +0200
Subject: [PATCH 0990/1063] [ALSA] sound/pci/hda/intel_hda: small cleanups

Cleanup whitespace.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_intel.c | 52 ++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index d56ea2125aa86..cc50d13ee90c1 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -274,8 +274,8 @@ struct azx_dev {
 	/* for sanity check of position buffer */
 	unsigned int period_intr;
 
-	unsigned int opened: 1;
-	unsigned int running: 1;
+	unsigned int opened :1;
+	unsigned int running :1;
 };
 
 /* CORB/RIRB */
@@ -333,9 +333,9 @@ struct azx {
 
 	/* flags */
 	int position_fix;
-	unsigned int initialized: 1;
-	unsigned int single_cmd: 1;
-	unsigned int polling_mode: 1;
+	unsigned int initialized :1;
+	unsigned int single_cmd :1;
+	unsigned int polling_mode :1;
 };
 
 /* driver types */
@@ -661,14 +661,14 @@ static int azx_reset(struct azx *chip)
 	azx_writeb(chip, GCTL, azx_readb(chip, GCTL) | ICH6_GCTL_RESET);
 
 	count = 50;
-	while (! azx_readb(chip, GCTL) && --count)
+	while (!azx_readb(chip, GCTL) && --count)
 		msleep(1);
 
-	/* Brent Chartrand said to wait >= 540us for codecs to intialize */
+	/* Brent Chartrand said to wait >= 540us for codecs to initialize */
 	msleep(1);
 
 	/* check to see if controller is ready */
-	if (! azx_readb(chip, GCTL)) {
+	if (!azx_readb(chip, GCTL)) {
 		snd_printd("azx_reset: controller not ready!\n");
 		return -EBUSY;
 	}
@@ -677,7 +677,7 @@ static int azx_reset(struct azx *chip)
 	azx_writel(chip, GCTL, azx_readl(chip, GCTL) | ICH6_GCTL_UREN);
 
 	/* detect codecs */
-	if (! chip->codec_mask) {
+	if (!chip->codec_mask) {
 		chip->codec_mask = azx_readw(chip, STATESTS);
 		snd_printdd("codec_mask = 0x%x\n", chip->codec_mask);
 	}
@@ -785,7 +785,7 @@ static void azx_init_chip(struct azx *chip)
 	azx_int_enable(chip);
 
 	/* initialize the codec command I/O */
-	if (! chip->single_cmd)
+	if (!chip->single_cmd)
 		azx_init_cmd_io(chip);
 
 	/* program the position buffer */
@@ -813,7 +813,7 @@ static void azx_init_chip(struct azx *chip)
 /*
  * interrupt handler
  */
-static irqreturn_t azx_interrupt(int irq, void* dev_id, struct pt_regs *regs)
+static irqreturn_t azx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	struct azx *chip = dev_id;
 	struct azx_dev *azx_dev;
@@ -1018,8 +1018,9 @@ static struct snd_pcm_hardware azx_pcm_hw = {
 	.info =			(SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED |
 				 SNDRV_PCM_INFO_BLOCK_TRANSFER |
 				 SNDRV_PCM_INFO_MMAP_VALID |
-				 SNDRV_PCM_INFO_PAUSE /*|*/
-				 /*SNDRV_PCM_INFO_RESUME*/),
+				 /* No full-resume yet implemented */
+				 /* SNDRV_PCM_INFO_RESUME |*/
+				 SNDRV_PCM_INFO_PAUSE),
 	.formats =		SNDRV_PCM_FMTBIT_S16_LE,
 	.rates =		SNDRV_PCM_RATE_48000,
 	.rate_min =		48000,
@@ -1454,19 +1455,19 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 				struct azx **rchip)
 {
 	struct azx *chip;
-	int err = 0;
+	int err;
 	static struct snd_device_ops ops = {
 		.dev_free = azx_dev_free,
 	};
 
 	*rchip = NULL;
 	
-	if ((err = pci_enable_device(pci)) < 0)
+	err = pci_enable_device(pci);
+	if (err < 0)
 		return err;
 
 	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
-	
-	if (NULL == chip) {
+	if (!chip) {
 		snd_printk(KERN_ERR SFX "cannot allocate chip\n");
 		pci_disable_device(pci);
 		return -ENOMEM;
@@ -1492,13 +1493,14 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 	}
 #endif
 
-	if ((err = pci_request_regions(pci, "ICH HD audio")) < 0) {
+	err = pci_request_regions(pci, "ICH HD audio");
+	if (err < 0) {
 		kfree(chip);
 		pci_disable_device(pci);
 		return err;
 	}
 
-	chip->addr = pci_resource_start(pci,0);
+	chip->addr = pci_resource_start(pci, 0);
 	chip->remap_addr = ioremap_nocache(chip->addr, pci_resource_len(pci,0));
 	if (chip->remap_addr == NULL) {
 		snd_printk(KERN_ERR SFX "ioremap error\n");
@@ -1542,7 +1544,7 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 	}
 	chip->num_streams = chip->playback_streams + chip->capture_streams;
 	chip->azx_dev = kcalloc(chip->num_streams, sizeof(*chip->azx_dev), GFP_KERNEL);
-	if (! chip->azx_dev) {
+	if (!chip->azx_dev) {
 		snd_printk(KERN_ERR "cannot malloc azx_dev\n");
 		goto errout;
 	}
@@ -1573,7 +1575,7 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 	chip->initialized = 1;
 
 	/* codec detection */
-	if (! chip->codec_mask) {
+	if (!chip->codec_mask) {
 		snd_printk(KERN_ERR SFX "no codecs found!\n");
 		err = -ENODEV;
 		goto errout;
@@ -1600,16 +1602,16 @@ static int __devinit azx_probe(struct pci_dev *pci, const struct pci_device_id *
 {
 	struct snd_card *card;
 	struct azx *chip;
-	int err = 0;
+	int err;
 
 	card = snd_card_new(index, id, THIS_MODULE, 0);
-	if (NULL == card) {
+	if (!card) {
 		snd_printk(KERN_ERR SFX "Error creating card!\n");
 		return -ENOMEM;
 	}
 
-	if ((err = azx_create(card, pci, pci_id->driver_data,
-			      &chip)) < 0) {
+	err = azx_create(card, pci, pci_id->driver_data, &chip);
+	if (err < 0) {
 		snd_card_free(card);
 		return err;
 	}
-- 
GitLab


From 2fd53a7e9b1392f9cc3002a24f3c13b2796e70c3 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Fri, 1 Sep 2006 17:15:36 +0200
Subject: [PATCH 0991/1063] [ALSA] [PPC,SOUND] Fix audio gpio state detection

When booting with line out or headphone plugged, you won't hear anything.
The problem is that after reset all channels are muted, but the actual
value of the gpio port doesn't exactly match the active_val settings as
expected by check_audio_gpio.  For example, the line_mute port is set to
7, but check_audio_gpio would expect 0xd or 0xf, thus its return value
indicates that it is not active, even though it is.  AFAICS only looking
at the low bit is enough to determine whether the port is active.

Signed-off-by: Andreas Schwab <schwab@suse.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/ppc/tumbler.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c
index 6ae2d5b9aa4a0..cdff53e4a17e6 100644
--- a/sound/ppc/tumbler.c
+++ b/sound/ppc/tumbler.c
@@ -190,7 +190,7 @@ static int check_audio_gpio(struct pmac_gpio *gp)
 
 	ret = do_gpio_read(gp);
 
-	return (ret & 0xd) == (gp->active_val & 0xd);
+	return (ret & 0x1) == (gp->active_val & 0x1);
 }
 
 static int read_audio_gpio(struct pmac_gpio *gp)
@@ -198,7 +198,8 @@ static int read_audio_gpio(struct pmac_gpio *gp)
 	int ret;
 	if (! gp->addr)
 		return 0;
-	ret = ((do_gpio_read(gp) & 0x02) !=0);
+	ret = do_gpio_read(gp);
+	ret = (ret & 0x02) !=0;
 	return ret == gp->active_state;
 }
 
-- 
GitLab


From 35a49934a7180fd80fb0bb3777d125dd939df50e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 1 Sep 2006 17:09:44 +0200
Subject: [PATCH 0992/1063] [ALSA] Add dB scale information to mixart driver

Added the dB scale information to mixart driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/mixart/mixart_mixer.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sound/pci/mixart/mixart_mixer.c b/sound/pci/mixart/mixart_mixer.c
index ed47b732c1034..13de0f71d4b74 100644
--- a/sound/pci/mixart/mixart_mixer.c
+++ b/sound/pci/mixart/mixart_mixer.c
@@ -31,6 +31,7 @@
 #include "mixart_core.h"
 #include "mixart_hwdep.h"
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include "mixart_mixer.h"
 
 static u32 mixart_analog_level[256] = {
@@ -388,12 +389,17 @@ static int mixart_analog_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_e
 	return changed;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_analog, -9600, 50, 0);
+
 static struct snd_kcontrol_new mixart_control_analog_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	/* name will be filled later */
 	.info =		mixart_analog_vol_info,
 	.get =		mixart_analog_vol_get,
 	.put =		mixart_analog_vol_put,
+	.tlv = { .p = db_scale_analog },
 };
 
 /* shared */
@@ -866,14 +872,19 @@ static int mixart_pcm_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem
 	return changed;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_digital, -10950, 50, 0);
+
 static struct snd_kcontrol_new snd_mixart_pcm_vol =
 {
 	.iface =        SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	/* name will be filled later */
 	/* count will be filled later */
 	.info =         mixart_digital_vol_info,		/* shared */
 	.get =          mixart_pcm_vol_get,
 	.put =          mixart_pcm_vol_put,
+	.tlv = { .p = db_scale_digital },
 };
 
 
@@ -984,10 +995,13 @@ static int mixart_monitor_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_
 
 static struct snd_kcontrol_new mixart_control_monitor_vol = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =         "Monitoring Volume",
 	.info =		mixart_digital_vol_info,		/* shared */
 	.get =		mixart_monitor_vol_get,
 	.put =		mixart_monitor_vol_put,
+	.tlv = { .p = db_scale_digital },
 };
 
 /*
-- 
GitLab


From 93ed150375187ae7917ed1e3b9b830b9d4065bad Mon Sep 17 00:00:00 2001
From: Tobin Davis <tdavis@dsl-only.net>
Date: Fri, 1 Sep 2006 21:03:12 +0200
Subject: [PATCH 0993/1063] [ALSA] hda-codec - Add 5 stack audio support for
 Intel 965 systems

This patch renames the 965_2112 function ids to 965_3ST, and
adds functional support for 965_5ST (5 stack 7.1 surround).

Signed-off-by: Tobin Davis <tdavis@dsl-only.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 .../sound/alsa/ALSA-Configuration.txt         |  10 ++
 sound/pci/hda/patch_sigmatel.c                | 152 ++++++++++--------
 2 files changed, 91 insertions(+), 71 deletions(-)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 48d3bdf2a7cd8..a788dd7bc790b 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -859,6 +859,16 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 	  laptop-dig	ditto with SPDIF
 	  auto		auto-config reading BIOS (default)
 
+	STAC9200/9205/9220/9221/9254
+	  ref		Reference board
+	  3stack	D945 3stack
+	  5stack	D945 5stack + SPDIF
+
+	STAC9227/9228/9229/927x
+	  ref		Reference board
+	  3stack	D965 3stack
+	  5stack	D965 5stack + SPDIF
+
 	STAC9872
 	  vaio		Setup for VAIO FE550G/SZ110
 	  vaio-ar Setup for VAIO AR
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 8d5ad7c0db072..87169032be1f2 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -42,9 +42,10 @@
 #define STAC_D945GTP3		1
 #define STAC_D945GTP5		2
 #define STAC_MACMINI		3
-#define STAC_D965_2112		4
-#define STAC_D965_284B		5
-#define STAC_922X_MODELS	6	/* number of 922x models */
+#define STAC_922X_MODELS	4	/* number of 922x models */
+#define STAC_D965_3ST		4
+#define STAC_D965_5ST		5
+#define STAC_927X_MODELS	6	/* number of 922x models */
 
 struct sigmatel_spec {
 	struct snd_kcontrol_new *mixers[4];
@@ -111,24 +112,10 @@ static hda_nid_t stac922x_adc_nids[2] = {
         0x06, 0x07,
 };
 
-static hda_nid_t stac9227_adc_nids[2] = {
-        0x07, 0x08,
-};
-
-#if 0
-static hda_nid_t d965_2112_dac_nids[3] = {
-        0x02, 0x03, 0x05,
-};
-#endif
-
 static hda_nid_t stac922x_mux_nids[2] = {
         0x12, 0x13,
 };
 
-static hda_nid_t stac9227_mux_nids[2] = {
-        0x15, 0x16,
-};
-
 static hda_nid_t stac927x_adc_nids[3] = {
         0x07, 0x08, 0x09
 };
@@ -146,7 +133,8 @@ static hda_nid_t stac9205_mux_nids[2] = {
 };
 
 static hda_nid_t stac9200_pin_nids[8] = {
-	0x08, 0x09, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12,
+	0x08, 0x09, 0x0d, 0x0e, 
+	0x0f, 0x10, 0x11, 0x12,
 };
 
 static hda_nid_t stac922x_pin_nids[10] = {
@@ -206,17 +194,9 @@ static struct hda_verb stac922x_core_init[] = {
 	{}
 };
 
-static struct hda_verb stac9227_core_init[] = {
-	/* set master volume and direct control */	
-	{ 0x16, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff},
-	/* unmute node 0x1b */
-	{ 0x1b, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-	{}
-};
-
-static struct hda_verb d965_2112_core_init[] = {
+static struct hda_verb d965_core_init[] = {
 	/* set master volume and direct control */	
-	{ 0x16, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff},
+	{ 0x24, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff},
 	/* unmute node 0x1b */
 	{ 0x1b, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
 	/* select node 0x03 as DAC */	
@@ -386,6 +366,8 @@ static unsigned int *stac922x_brd_tbl[STAC_922X_MODELS] = {
 };
 
 static struct hda_board_config stac922x_cfg_tbl[] = {
+	{ .modelname = "5stack", .config = STAC_D945GTP5 },
+	{ .modelname = "3stack", .config = STAC_D945GTP3 },
 	{ .modelname = "ref",
 	  .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2668,	/* DFI LanParty */
@@ -471,99 +453,127 @@ static struct hda_board_config stac922x_cfg_tbl[] = {
 	{ .pci_subvendor = 0x8384,
 	  .pci_subdevice = 0x7680,
 	  .config = STAC_MACMINI },	/* Apple Mac Mini (early 2006) */
-	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
-	  .pci_subdevice = 0x2112,
-	  .config = STAC_D965_2112 },
-	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
-	  .pci_subdevice = 0x284b,
-	  .config = STAC_D965_284B },
 	{} /* terminator */
 };
 
 static unsigned int ref927x_pin_configs[14] = {
-	0x01813122, 0x01a19021, 0x01014010, 0x01016011,
-	0x01012012, 0x01011014, 0x40000100, 0x40000100, 
-	0x40000100, 0x40000100, 0x40000100, 0x01441030,
-	0x01c41030, 0x40000100,
+	0x02214020, 0x02a19080, 0x0181304e, 0x01014010,
+	0x01a19040, 0x01011012, 0x01016011, 0x0101201f, 
+	0x183301f0, 0x18a001f0, 0x18a001f0, 0x01442070,
+	0x01c42190, 0x40000100,
 };
 
-static unsigned int d965_2112_pin_configs[14] = {
+static unsigned int d965_3st_pin_configs[14] = {
 	0x0221401f, 0x02a19120, 0x40000100, 0x01014011,
 	0x01a19021, 0x01813024, 0x40000100, 0x40000100,
 	0x40000100, 0x40000100, 0x40000100, 0x40000100,
 	0x40000100, 0x40000100
 };
 
-static unsigned int *stac927x_brd_tbl[] = {
+static unsigned int d965_5st_pin_configs[14] = {
+	0x02214020, 0x02a19080, 0x0181304e, 0x01014010,
+	0x01a19040, 0x01011012, 0x01016011, 0x40000100,
+	0x40000100, 0x40000100, 0x40000100, 0x01442070,
+	0x40000100, 0x40000100
+};
+
+static unsigned int *stac927x_brd_tbl[STAC_927X_MODELS] = {
 	[STAC_REF] =	ref927x_pin_configs,
-	[STAC_D965_2112] = d965_2112_pin_configs,
+	[STAC_D965_3ST] = d965_3st_pin_configs,
+	[STAC_D965_5ST] = d965_5st_pin_configs,
 };
 
 static struct hda_board_config stac927x_cfg_tbl[] = {
+	{ .modelname = "5stack", .config = STAC_D965_5ST },
+	{ .modelname = "3stack", .config = STAC_D965_3ST },
 	{ .modelname = "ref",
 	  .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2668,	/* DFI LanParty */
 	  .config = STAC_REF },		/* SigmaTel reference board */
-	/* SigmaTel 9227 reference board */
-	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
-	  .pci_subdevice = 0x284b,
-	  .config = STAC_D965_284B },
 	 /* Intel 946 based systems */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x3d01,
-	  .config = STAC_D965_2112 }, /* D946  configuration */
+	  .config = STAC_D965_3ST }, /* D946  configuration */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0xa301,
-	  .config = STAC_D965_2112 }, /* Intel D946GZT - 3 stack  */
-	/* 965 based systems */
+	  .config = STAC_D965_3ST }, /* Intel D946GZT - 3 stack  */
+	/* 965 based 3 stack systems */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2116,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2115,
-	  .config = STAC_D965_2112 }, /* Intel DQ965WC - 3 Stack  */
+	  .config = STAC_D965_3ST }, /* Intel DQ965WC - 3 Stack  */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2114,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2113,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2112,
-	  .config = STAC_D965_2112 }, /* Intel DG965MS - 3 Stack  */
+	  .config = STAC_D965_3ST }, /* Intel DG965MS - 3 Stack  */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2111,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2110,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2009,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2008,
-	  .config = STAC_D965_2112 }, /* Intel DQ965GF - 3 Stack  */
+	  .config = STAC_D965_3ST }, /* Intel DQ965GF - 3 Stack  */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2007,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2006,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2005,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2004,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2003,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2002,
-	  .config = STAC_D965_2112 }, /* Intel D965 3Stack config */
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2001,
-	  .config = STAC_D965_2112 }, /* Intel DQ965GF - 3 Stackg */
+	  .config = STAC_D965_3ST }, /* Intel DQ965GF - 3 Stack */
+	/* 965 based 5 stack systems */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2301,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2302,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2303,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2304,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2305,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2501,
+	  .config = STAC_D965_5ST }, /* Intel DG965MQ - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2502,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2503,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2504,
+	  .config = STAC_D965_5ST }, /* Intel DQ965GF - 5 Stack */
 	{} /* terminator */
 };
 
@@ -1546,18 +1556,18 @@ static int patch_stac927x(struct hda_codec *codec)
 	}
 
 	switch (spec->board_config) {
-	case STAC_D965_2112:
+	case STAC_D965_3ST:
 		spec->adc_nids = stac927x_adc_nids;
 		spec->mux_nids = stac927x_mux_nids;
 		spec->num_muxes = 3;
-		spec->init = d965_2112_core_init;
+		spec->init = d965_core_init;
 		spec->mixer = stac9227_mixer;
 		break;
-	case STAC_D965_284B:
-		spec->adc_nids = stac9227_adc_nids;
-		spec->mux_nids = stac9227_mux_nids;
-		spec->num_muxes = 2;
-		spec->init = stac9227_core_init;
+	case STAC_D965_5ST:
+		spec->adc_nids = stac927x_adc_nids;
+		spec->mux_nids = stac927x_mux_nids;
+		spec->num_muxes = 3;
+		spec->init = d965_core_init;
 		spec->mixer = stac9227_mixer;
 		break;
 	default:
-- 
GitLab


From bd25b7cae1e763b292f359170e16bccd01c7ee5c Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Mon, 4 Sep 2006 12:28:24 +0200
Subject: [PATCH 0994/1063] [ALSA] ac97: Fix AD1819 volume range

AD1819 volume registers can hold extra bits which do not affect the
actual volume. Add a res_table to the codec patch to fix the problem.
PCM, line and mic volume were tested.

Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ac97/ac97_patch.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index bdd7f89234f6f..9be4ceb6838e5 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -1395,6 +1395,17 @@ static void ad1888_resume(struct snd_ac97 *ac97)
 
 #endif
 
+static const struct snd_ac97_res_table ad1819_restbl[] = {
+	{ AC97_PHONE, 0x9f1f },
+	{ AC97_MIC, 0x9f1f },
+	{ AC97_LINE, 0x9f1f },
+	{ AC97_CD, 0x9f1f },
+	{ AC97_VIDEO, 0x9f1f },
+	{ AC97_AUX, 0x9f1f },
+	{ AC97_PCM, 0x9f1f },
+	{ } /* terminator */
+};
+
 int patch_ad1819(struct snd_ac97 * ac97)
 {
 	unsigned short scfg;
@@ -1402,6 +1413,7 @@ int patch_ad1819(struct snd_ac97 * ac97)
 	// patch for Analog Devices
 	scfg = snd_ac97_read(ac97, AC97_AD_SERIAL_CFG);
 	snd_ac97_write_cache(ac97, AC97_AD_SERIAL_CFG, scfg | 0x7000); /* select all codecs */
+	ac97->res_table = ad1819_restbl;
 	return 0;
 }
 
-- 
GitLab


From 679e28eef835cbd30de78c2f80bf488cba1b7e40 Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Mon, 4 Sep 2006 12:28:51 +0200
Subject: [PATCH 0995/1063] [ALSA] es1968: Fix hw volume

Fix maestro2 hardware volume control. Tested on a Dell Inspiron 7000.

Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/es1968.c | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c
index 3c5ab7c2e72df..f3c40385c87d4 100644
--- a/sound/pci/es1968.c
+++ b/sound/pci/es1968.c
@@ -1905,7 +1905,7 @@ static void es1968_update_hw_volume(unsigned long private_data)
 	/* Figure out which volume control button was pushed,
 	   based on differences from the default register
 	   values. */
-	x = inb(chip->io_port + 0x1c);
+	x = inb(chip->io_port + 0x1c) & 0xee;
 	/* Reset the volume control registers. */
 	outb(0x88, chip->io_port + 0x1c);
 	outb(0x88, chip->io_port + 0x1d);
@@ -1921,7 +1921,8 @@ static void es1968_update_hw_volume(unsigned long private_data)
 	/* FIXME: we can't call snd_ac97_* functions since here is in tasklet. */
 	spin_lock_irqsave(&chip->ac97_lock, flags);
 	val = chip->ac97->regs[AC97_MASTER];
-	if (x & 1) {
+	switch (x) {
+	case 0x88:
 		/* mute */
 		val ^= 0x8000;
 		chip->ac97->regs[AC97_MASTER] = val;
@@ -1929,26 +1930,31 @@ static void es1968_update_hw_volume(unsigned long private_data)
 		outb(AC97_MASTER, chip->io_port + ESM_AC97_INDEX);
 		snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
 			       &chip->master_switch->id);
-	} else {
-		val &= 0x7fff;
-		if (((x>>1) & 7) > 4) {
-			/* volume up */
-			if ((val & 0xff) > 0)
-				val--;
-			if ((val & 0xff00) > 0)
-				val -= 0x0100;
-		} else {
-			/* volume down */
-			if ((val & 0xff) < 0x1f)
-				val++;
-			if ((val & 0xff00) < 0x1f00)
-				val += 0x0100;
-		}
+		break;
+	case 0xaa:
+		/* volume up */
+		if ((val & 0x7f) > 0)
+			val--;
+		if ((val & 0x7f00) > 0)
+			val -= 0x0100;
+		chip->ac97->regs[AC97_MASTER] = val;
+		outw(val, chip->io_port + ESM_AC97_DATA);
+		outb(AC97_MASTER, chip->io_port + ESM_AC97_INDEX);
+		snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
+			       &chip->master_volume->id);
+		break;
+	case 0x66:
+		/* volume down */
+		if ((val & 0x7f) < 0x1f)
+			val++;
+		if ((val & 0x7f00) < 0x1f00)
+			val += 0x0100;
 		chip->ac97->regs[AC97_MASTER] = val;
 		outw(val, chip->io_port + ESM_AC97_DATA);
 		outb(AC97_MASTER, chip->io_port + ESM_AC97_INDEX);
 		snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
 			       &chip->master_volume->id);
+		break;
 	}
 	spin_unlock_irqrestore(&chip->ac97_lock, flags);
 }
-- 
GitLab


From ea543f1ee61bbfdf6cac4b79d66c7840d5b00037 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Tue, 5 Sep 2006 20:25:05 +0200
Subject: [PATCH 0996/1063] [ALSA] sparc dbri: SMP fixes

The dbri driver hangs when used in kernel compiled with SMP
support due to inproper locking. The patch fixes it.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/sparc/dbri.c | 65 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 48 insertions(+), 17 deletions(-)

diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 82d5e8072f2b6..e4935fca12dff 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -635,10 +635,16 @@ to send them to the DBRI.
 static void dbri_cmdwait(struct snd_dbri *dbri)
 {
 	int maxloops = MAXLOOPS;
+	unsigned long flags;
 
 	/* Delay if previous commands are still being processed */
-	while ((--maxloops) > 0 && (sbus_readl(dbri->regs + REG0) & D_P))
+	spin_lock_irqsave(&dbri->lock, flags);
+	while ((--maxloops) > 0 && (sbus_readl(dbri->regs + REG0) & D_P)) {
+		spin_unlock_irqrestore(&dbri->lock, flags);
 		msleep_interruptible(1);
+		spin_lock_irqsave(&dbri->lock, flags);
+	}
+	spin_unlock_irqrestore(&dbri->lock, flags);
 
 	if (maxloops == 0) {
 		printk(KERN_ERR "DBRI: Chip never completed command buffer\n");
@@ -671,11 +677,12 @@ static s32 *dbri_cmdlock(struct snd_dbri * dbri, int len)
  * the last WAIT cmd and force DBRI to reread the cmd.
  * The JUMP cmd points to the new cmd string.
  * It also releases the cmdlock spinlock.
+ *
+ * Lock must not be held before calling this.
  */
 static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len)
 {
 	s32 tmp, addr;
-	unsigned long flags;
 	static int wait_id = 0;
 
 	wait_id++;
@@ -706,12 +713,10 @@ static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len)
 	}
 #endif
 
-	spin_lock_irqsave(&dbri->lock, flags);
 	/* Reread the last command */
 	tmp = sbus_readl(dbri->regs + REG0);
 	tmp |= D_P;
 	sbus_writel(tmp, dbri->regs + REG0);
-	spin_unlock_irqrestore(&dbri->lock, flags);
 
 	dbri->cmdptr = cmd;
 	spin_unlock(&dbri->cmdlock);
@@ -777,9 +782,9 @@ static void dbri_initialize(struct snd_dbri * dbri)
 	dma_addr = dbri->dma_dvma + dbri_dma_off(cmd, 0);
 	sbus_writel(dma_addr, dbri->regs + REG8);
 	spin_unlock(&dbri->cmdlock);
-	dbri_cmdwait(dbri);
 
 	spin_unlock_irqrestore(&dbri->lock, flags);
+	dbri_cmdwait(dbri);
 }
 
 /*
@@ -840,6 +845,9 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe)
 	dbri->pipes[pipe].first_desc = -1;
 }
 
+/*
+ * Lock must be held before calling this.
+ */
 static void setup_pipe(struct snd_dbri * dbri, int pipe, int sdp)
 {
 	if (pipe < 0 || pipe > DBRI_MAX_PIPE) {
@@ -866,6 +874,9 @@ static void setup_pipe(struct snd_dbri * dbri, int pipe, int sdp)
 	reset_pipe(dbri, pipe);
 }
 
+/*
+ * Lock must be held before calling this.
+ */
 static void link_time_slot(struct snd_dbri * dbri, int pipe,
 			   int prevpipe, int nextpipe,
 			   int length, int cycle)
@@ -920,6 +931,10 @@ static void link_time_slot(struct snd_dbri * dbri, int pipe,
 	dbri_cmdsend(dbri, cmd, 4);
 }
 
+#if 0
+/*
+ * Lock must be held before calling this.
+ */
 static void unlink_time_slot(struct snd_dbri * dbri, int pipe,
 			     enum in_or_out direction, int prevpipe,
 			     int nextpipe)
@@ -952,6 +967,7 @@ static void unlink_time_slot(struct snd_dbri * dbri, int pipe,
 
 	dbri_cmdsend(dbri, cmd, 4);
 }
+#endif
 
 /* xmit_fixed() / recv_fixed()
  *
@@ -965,11 +981,14 @@ static void unlink_time_slot(struct snd_dbri * dbri, int pipe,
  * the actual time slot is.  The interrupt handler takes care of bit
  * ordering and alignment.  An 8-bit time slot will always end up
  * in the low-order 8 bits, filled either MSB-first or LSB-first,
- * depending on the settings passed to setup_pipe()
+ * depending on the settings passed to setup_pipe().
+ *
+ * Lock must not be held before calling it.
  */
 static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data)
 {
 	s32 *cmd;
+	unsigned long flags;
 
 	if (pipe < 16 || pipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR "DBRI: xmit_fixed: Illegal pipe number\n");
@@ -1002,8 +1021,11 @@ static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data)
 	*(cmd++) = data;
 	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
 
+	spin_lock_irqsave(&dbri->lock, flags);
 	dbri_cmdsend(dbri, cmd, 3);
+	spin_unlock_irqrestore(&dbri->lock, flags);
 	dbri_cmdwait(dbri);
+
 }
 
 static void recv_fixed(struct snd_dbri * dbri, int pipe, volatile __u32 * ptr)
@@ -1039,6 +1061,8 @@ static void recv_fixed(struct snd_dbri * dbri, int pipe, volatile __u32 * ptr)
  * be spread across multiple descriptors.
  *
  * All descriptors create a ring buffer.
+ *
+ * Lock must be held before calling this.
  */
 static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period)
 {
@@ -1186,6 +1210,9 @@ multiplexed serial interface which the DBRI can operate in either master
 
 enum master_or_slave { CHImaster, CHIslave };
 
+/*
+ * Lock must not be held before calling it.
+ */
 static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_slave,
 		      int bits_per_frame)
 {
@@ -1258,9 +1285,14 @@ static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_sla
 In the standard SPARC audio configuration, the CS4215 codec is attached
 to the DBRI via the CHI interface and few of the DBRI's PIO pins.
 
+ * Lock must not be held before calling it.
+
 */
 static void cs4215_setup_pipes(struct snd_dbri * dbri)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&dbri->lock, flags);
 	/*
 	 * Data mode:
 	 * Pipe  4: Send timeslots 1-4 (audio data)
@@ -1284,6 +1316,7 @@ static void cs4215_setup_pipes(struct snd_dbri * dbri)
 	setup_pipe(dbri, 17, D_SDP_FIXED | D_SDP_TO_SER | D_SDP_MSB);
 	setup_pipe(dbri, 18, D_SDP_FIXED | D_SDP_FROM_SER | D_SDP_MSB);
 	setup_pipe(dbri, 19, D_SDP_FIXED | D_SDP_FROM_SER | D_SDP_MSB);
+	spin_unlock_irqrestore(&dbri->lock, flags);
 
 	dbri_cmdwait(dbri);
 }
@@ -1358,6 +1391,7 @@ static void cs4215_open(struct snd_dbri * dbri)
 {
 	int data_width;
 	u32 tmp;
+	unsigned long flags;
 
 	dprintk(D_MM, "cs4215_open: %d channels, %d bits\n",
 		dbri->mm.channels, dbri->mm.precision);
@@ -1382,6 +1416,7 @@ static void cs4215_open(struct snd_dbri * dbri)
 	 * bits.  The CS4215, it seems, observes TSIN (the delayed signal)
 	 * even if it's the CHI master.  Don't ask me...
 	 */
+	spin_lock_irqsave(&dbri->lock, flags);
 	tmp = sbus_readl(dbri->regs + REG0);
 	tmp &= ~(D_C);		/* Disable CHI */
 	sbus_writel(tmp, dbri->regs + REG0);
@@ -1409,6 +1444,7 @@ static void cs4215_open(struct snd_dbri * dbri)
 	tmp = sbus_readl(dbri->regs + REG0);
 	tmp |= D_C;		/* Enable CHI */
 	sbus_writel(tmp, dbri->regs + REG0);
+	spin_unlock_irqrestore(&dbri->lock, flags);
 
 	cs4215_setdata(dbri, 0);
 }
@@ -1420,6 +1456,7 @@ static int cs4215_setctrl(struct snd_dbri * dbri)
 {
 	int i, val;
 	u32 tmp;
+	unsigned long flags;
 
 	/* FIXME - let the CPU do something useful during these delays */
 
@@ -1456,6 +1493,7 @@ static int cs4215_setctrl(struct snd_dbri * dbri)
 	 * done in hardware by a TI 248 that delays the DBRI->4215
 	 * frame sync signal by eight clock cycles.  Anybody know why?
 	 */
+	spin_lock_irqsave(&dbri->lock, flags);
 	tmp = sbus_readl(dbri->regs + REG0);
 	tmp &= ~D_C;		/* Disable CHI */
 	sbus_writel(tmp, dbri->regs + REG0);
@@ -1472,14 +1510,17 @@ static int cs4215_setctrl(struct snd_dbri * dbri)
 	link_time_slot(dbri, 17, 16, 16, 32, dbri->mm.offset);
 	link_time_slot(dbri, 18, 16, 16, 8, dbri->mm.offset);
 	link_time_slot(dbri, 19, 18, 16, 8, dbri->mm.offset + 48);
+	spin_unlock_irqrestore(&dbri->lock, flags);
 
 	/* Wait for the chip to echo back CLB (Control Latch Bit) as zero */
 	dbri->mm.ctrl[0] &= ~CS4215_CLB;
 	xmit_fixed(dbri, 17, *(int *)dbri->mm.ctrl);
 
+	spin_lock_irqsave(&dbri->lock, flags);
 	tmp = sbus_readl(dbri->regs + REG0);
 	tmp |= D_C;		/* Enable CHI */
 	sbus_writel(tmp, dbri->regs + REG0);
+	spin_unlock_irqrestore(&dbri->lock, flags);
 
 	for (i = 10; ((dbri->mm.status & 0xe4) != 0x20); --i) {
 		msleep_interruptible(1);
@@ -1688,6 +1729,7 @@ static void xmit_descs(struct snd_dbri *dbri)
 			dbri->pipes[info->pipe].desc = first_td;
 		}
 	}
+
 	spin_unlock_irqrestore(&dbri->lock, flags);
 }
 
@@ -2093,7 +2135,6 @@ static int snd_dbri_prepare(struct snd_pcm_substream *substream)
 {
 	struct snd_dbri *dbri = snd_pcm_substream_chip(substream);
 	struct dbri_streaminfo *info = DBRI_STREAM(dbri, substream);
-	struct snd_pcm_runtime *runtime = substream->runtime;
 	int ret;
 
 	info->size = snd_pcm_lib_buffer_bytes(substream);
@@ -2232,7 +2273,6 @@ static int snd_cs4215_put_volume(struct snd_kcontrol *kcontrol,
 {
 	struct snd_dbri *dbri = snd_kcontrol_chip(kcontrol);
 	struct dbri_streaminfo *info = &dbri->stream_info[kcontrol->private_value];
-	unsigned long flags;
 	int changed = 0;
 
 	if (info->left_gain != ucontrol->value.integer.value[0]) {
@@ -2247,13 +2287,9 @@ static int snd_cs4215_put_volume(struct snd_kcontrol *kcontrol,
 		/* First mute outputs, and wait 1/8000 sec (125 us)
 		 * to make sure this takes.  This avoids clicking noises.
 		 */
-		spin_lock_irqsave(&dbri->lock, flags);
-
 		cs4215_setdata(dbri, 1);
 		udelay(125);
 		cs4215_setdata(dbri, 0);
-
-		spin_unlock_irqrestore(&dbri->lock, flags);
 	}
 	return changed;
 }
@@ -2300,7 +2336,6 @@ static int snd_cs4215_put_single(struct snd_kcontrol *kcontrol,
 				 struct snd_ctl_elem_value *ucontrol)
 {
 	struct snd_dbri *dbri = snd_kcontrol_chip(kcontrol);
-	unsigned long flags;
 	int elem = kcontrol->private_value & 0xff;
 	int shift = (kcontrol->private_value >> 8) & 0xff;
 	int mask = (kcontrol->private_value >> 16) & 0xff;
@@ -2333,13 +2368,9 @@ static int snd_cs4215_put_single(struct snd_kcontrol *kcontrol,
 		/* First mute outputs, and wait 1/8000 sec (125 us)
 		 * to make sure this takes.  This avoids clicking noises.
 		 */
-		spin_lock_irqsave(&dbri->lock, flags);
-
 		cs4215_setdata(dbri, 1);
 		udelay(125);
 		cs4215_setdata(dbri, 0);
-
-		spin_unlock_irqrestore(&dbri->lock, flags);
 	}
 	return changed;
 }
-- 
GitLab


From 311e70a4741c736795da082da7290164d9cf3726 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 6 Sep 2006 12:13:37 +0200
Subject: [PATCH 0997/1063] [ALSA] hdsp - Fix auto-updating of firmware

Fixed the auto-updating of firmware if the breakout box was switched
off/on.  The firmware binary itself was already cached but it wasn't
loaded properly.  Also, request_firmware() is issued if the box was
with firmware at module loading time but later it's erased.
The auto-update is triggered at each PCM action (open, prepare, etc)
and at opening proc files.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/rme9652/hdsp.c | 48 +++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index e5a52da77b85d..d3e07de433b0e 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -726,22 +726,36 @@ static int hdsp_get_iobox_version (struct hdsp *hdsp)
 }
 
 
-static int hdsp_check_for_firmware (struct hdsp *hdsp, int show_err)
+#ifdef HDSP_FW_LOADER
+static int __devinit hdsp_request_fw_loader(struct hdsp *hdsp);
+#endif
+
+static int hdsp_check_for_firmware (struct hdsp *hdsp, int load_on_demand)
 {
-	if (hdsp->io_type == H9652 || hdsp->io_type == H9632) return 0;
+	if (hdsp->io_type == H9652 || hdsp->io_type == H9632)
+		return 0;
 	if ((hdsp_read (hdsp, HDSP_statusRegister) & HDSP_DllError) != 0) {
-		snd_printk(KERN_ERR "Hammerfall-DSP: firmware not present.\n");
 		hdsp->state &= ~HDSP_FirmwareLoaded;
-		if (! show_err)
+		if (! load_on_demand)
 			return -EIO;
+		snd_printk(KERN_ERR "Hammerfall-DSP: firmware not present.\n");
 		/* try to load firmware */
-		if (hdsp->state & HDSP_FirmwareCached) {
-			if (snd_hdsp_load_firmware_from_cache(hdsp) != 0)
-				snd_printk(KERN_ERR "Hammerfall-DSP: Firmware loading from cache failed, please upload manually.\n");
-		} else {
-			snd_printk(KERN_ERR "Hammerfall-DSP: No firmware loaded nor cached, please upload firmware.\n");
+		if (! (hdsp->state & HDSP_FirmwareCached)) {
+#ifdef HDSP_FW_LOADER
+			if (! hdsp_request_fw_loader(hdsp))
+				return 0;
+#endif
+			snd_printk(KERN_ERR
+				   "Hammerfall-DSP: No firmware loaded nor "
+				   "cached, please upload firmware.\n");
+			return -EIO;
+		}
+		if (snd_hdsp_load_firmware_from_cache(hdsp) != 0) {
+			snd_printk(KERN_ERR
+				   "Hammerfall-DSP: Firmware loading from "
+				   "cache failed, please upload manually.\n");
+			return -EIO;
 		}
-		return -EIO;
 	}
 	return 0;
 }
@@ -3181,8 +3195,16 @@ snd_hdsp_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer)
 				return;
 			}
 		} else {
-			snd_iprintf(buffer, "No firmware loaded nor cached, please upload firmware.\n");
-			return;
+			int err = -EINVAL;
+#ifdef HDSP_FW_LOADER
+			err = hdsp_request_fw_loader(hdsp);
+#endif
+			if (err < 0) {
+				snd_iprintf(buffer,
+					    "No firmware loaded nor cached, "
+					    "please upload firmware.\n");
+				return;
+			}
 		}
 	}
 	
@@ -3851,7 +3873,7 @@ static int snd_hdsp_trigger(struct snd_pcm_substream *substream, int cmd)
 	if (hdsp_check_for_iobox (hdsp))
 		return -EIO;
 
-	if (hdsp_check_for_firmware(hdsp, 1))
+	if (hdsp_check_for_firmware(hdsp, 0)) /* no auto-loading in trigger */
 		return -EIO;
 
 	spin_lock(&hdsp->lock);
-- 
GitLab


From 55a29af5ed5d914f017e6a7c613a4d7cc34f82d9 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 6 Sep 2006 12:15:34 +0200
Subject: [PATCH 0998/1063] [ALSA] Add definition of TLV dB range compound

Added the definition of TLV dB range compound.  It contains one or
more dB-range or linear-volume TLV entries with min/max ranges.
Used for volume controls with non-linear curves.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/tlv.h | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/include/sound/tlv.h b/include/sound/tlv.h
index 7905841643df3..d93a96b91875a 100644
--- a/include/sound/tlv.h
+++ b/include/sound/tlv.h
@@ -34,19 +34,26 @@
 #define SNDRV_CTL_TLVT_CONTAINER 0	/* one level down - group of TLVs */
 #define SNDRV_CTL_TLVT_DB_SCALE	1       /* dB scale */
 #define SNDRV_CTL_TLVT_DB_LINEAR 2	/* linear volume */
+#define SNDRV_CTL_TLVT_DB_RANGE 3	/* dB range container */
 
+#define TLV_DB_SCALE_ITEM(min, step, mute)			\
+	SNDRV_CTL_TLVT_DB_SCALE, 2 * sizeof(unsigned int),	\
+	(min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0)
 #define DECLARE_TLV_DB_SCALE(name, min, step, mute) \
-unsigned int name[] = { \
-        SNDRV_CTL_TLVT_DB_SCALE, 2 * sizeof(unsigned int), \
-        (min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0) \
-}
+	unsigned int name[] = { TLV_DB_SCALE_ITEM(min, step, mute) }
 
 /* linear volume between min_dB and max_dB (.01dB unit) */
+#define TLV_DB_LINEAR_ITEM(min_dB, max_dB)		    \
+	SNDRV_CTL_TLVT_DB_LINEAR, 2 * sizeof(unsigned int), \
+	(min_dB), (max_dB)
 #define DECLARE_TLV_DB_LINEAR(name, min_dB, max_dB)	\
-unsigned int name[] = { \
-        SNDRV_CTL_TLVT_DB_LINEAR, 2 * sizeof(unsigned int), \
-        (min_dB), (max_dB)				\
-}
+	unsigned int name[] = { TLV_DB_LINEAR_ITEM(min_dB, max_dB) }
+
+/* dB range container */
+/* Each item is: <min> <max> <TLV> */
+/* The below assumes that each item TLV is 4 words like DB_SCALE or LINEAR */
+#define TLV_DB_RANGE_HEAD(num)			\
+	SNDRV_CTL_TLVT_DB_RANGE, 6 * (num) * sizeof(unsigned int)
 
 #define TLV_DB_GAIN_MUTE	-9999999
 
-- 
GitLab


From 0b59397268ed418e139db3806f7956ffcb18b33d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 6 Sep 2006 13:35:27 +0200
Subject: [PATCH 0999/1063] [ALSA] Add dB information to es1938 driver

Added the dB information to ESS Solo (es1938) driver.
The new compound dB range TLVs are used for non-linear native
volume controls.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/es1938.c | 103 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 86 insertions(+), 17 deletions(-)

diff --git a/sound/pci/es1938.c b/sound/pci/es1938.c
index cc0f34f68185d..3784088bea84e 100644
--- a/sound/pci/es1938.c
+++ b/sound/pci/es1938.c
@@ -62,6 +62,7 @@
 #include <sound/opl3.h>
 #include <sound/mpu401.h>
 #include <sound/initval.h>
+#include <sound/tlv.h>
 
 #include <asm/io.h>
 
@@ -1164,6 +1165,14 @@ static int snd_es1938_reg_read(struct es1938 *chip, unsigned char reg)
 		return snd_es1938_read(chip, reg);
 }
 
+#define ES1938_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv)    \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ,\
+  .name = xname, .index = xindex, \
+  .info = snd_es1938_info_single, \
+  .get = snd_es1938_get_single, .put = snd_es1938_put_single, \
+  .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24), \
+  .tlv = { .p = xtlv } }
 #define ES1938_SINGLE(xname, xindex, reg, shift, mask, invert) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \
   .info = snd_es1938_info_single, \
@@ -1217,6 +1226,14 @@ static int snd_es1938_put_single(struct snd_kcontrol *kcontrol,
 	return snd_es1938_reg_bits(chip, reg, mask, val) != val;
 }
 
+#define ES1938_DOUBLE_TLV(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ,\
+  .name = xname, .index = xindex, \
+  .info = snd_es1938_info_double, \
+  .get = snd_es1938_get_double, .put = snd_es1938_put_double, \
+  .private_value = left_reg | (right_reg << 8) | (shift_left << 16) | (shift_right << 19) | (mask << 24) | (invert << 22), \
+  .tlv = { .p = xtlv } }
 #define ES1938_DOUBLE(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \
   .info = snd_es1938_info_double, \
@@ -1297,8 +1314,41 @@ static int snd_es1938_put_double(struct snd_kcontrol *kcontrol,
 	return change;
 }
 
+static unsigned int db_scale_master[] = {
+	TLV_DB_RANGE_HEAD(2),
+	0, 54, TLV_DB_SCALE_ITEM(-3600, 50, 1),
+	54, 63, TLV_DB_SCALE_ITEM(-900, 100, 0),
+};
+
+static unsigned int db_scale_audio1[] = {
+	TLV_DB_RANGE_HEAD(2),
+	0, 8, TLV_DB_SCALE_ITEM(-3300, 300, 1),
+	8, 15, TLV_DB_SCALE_ITEM(-900, 150, 0),
+};
+
+static unsigned int db_scale_audio2[] = {
+	TLV_DB_RANGE_HEAD(2),
+	0, 8, TLV_DB_SCALE_ITEM(-3450, 300, 1),
+	8, 15, TLV_DB_SCALE_ITEM(-1050, 150, 0),
+};
+
+static unsigned int db_scale_mic[] = {
+	TLV_DB_RANGE_HEAD(2),
+	0, 8, TLV_DB_SCALE_ITEM(-2400, 300, 1),
+	8, 15, TLV_DB_SCALE_ITEM(0, 150, 0),
+};
+
+static unsigned int db_scale_line[] = {
+	TLV_DB_RANGE_HEAD(2),
+	0, 8, TLV_DB_SCALE_ITEM(-3150, 300, 1),
+	8, 15, TLV_DB_SCALE_ITEM(-750, 150, 0),
+};
+
+static DECLARE_TLV_DB_SCALE(db_scale_capture, 0, 150, 0);
+
 static struct snd_kcontrol_new snd_es1938_controls[] = {
-ES1938_DOUBLE("Master Playback Volume", 0, 0x60, 0x62, 0, 0, 63, 0),
+ES1938_DOUBLE_TLV("Master Playback Volume", 0, 0x60, 0x62, 0, 0, 63, 0,
+		  db_scale_master),
 ES1938_DOUBLE("Master Playback Switch", 0, 0x60, 0x62, 6, 6, 1, 1),
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1309,19 +1359,28 @@ ES1938_DOUBLE("Master Playback Switch", 0, 0x60, 0x62, 6, 6, 1, 1),
 },
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name = "Hardware Master Playback Switch",
 	.access = SNDRV_CTL_ELEM_ACCESS_READ,
 	.info = snd_es1938_info_hw_switch,
 	.get = snd_es1938_get_hw_switch,
+	.tlv = { .p = db_scale_master },
 },
 ES1938_SINGLE("Hardware Volume Split", 0, 0x64, 7, 1, 0),
-ES1938_DOUBLE("Line Playback Volume", 0, 0x3e, 0x3e, 4, 0, 15, 0),
+ES1938_DOUBLE_TLV("Line Playback Volume", 0, 0x3e, 0x3e, 4, 0, 15, 0,
+		  db_scale_line),
 ES1938_DOUBLE("CD Playback Volume", 0, 0x38, 0x38, 4, 0, 15, 0),
-ES1938_DOUBLE("FM Playback Volume", 0, 0x36, 0x36, 4, 0, 15, 0),
-ES1938_DOUBLE("Mono Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0),
-ES1938_DOUBLE("Mic Playback Volume", 0, 0x1a, 0x1a, 4, 0, 15, 0),
-ES1938_DOUBLE("Aux Playback Volume", 0, 0x3a, 0x3a, 4, 0, 15, 0),
-ES1938_DOUBLE("Capture Volume", 0, 0xb4, 0xb4, 4, 0, 15, 0),
+ES1938_DOUBLE_TLV("FM Playback Volume", 0, 0x36, 0x36, 4, 0, 15, 0,
+		  db_scale_mic),
+ES1938_DOUBLE_TLV("Mono Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("Mic Playback Volume", 0, 0x1a, 0x1a, 4, 0, 15, 0,
+		  db_scale_mic),
+ES1938_DOUBLE_TLV("Aux Playback Volume", 0, 0x3a, 0x3a, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("Capture Volume", 0, 0xb4, 0xb4, 4, 0, 15, 0,
+		  db_scale_capture),
 ES1938_SINGLE("PC Speaker Volume", 0, 0x3c, 0, 7, 0),
 ES1938_SINGLE("Record Monitor", 0, 0xa8, 3, 1, 0),
 ES1938_SINGLE("Capture Switch", 0, 0x1c, 4, 1, 1),
@@ -1332,16 +1391,26 @@ ES1938_SINGLE("Capture Switch", 0, 0x1c, 4, 1, 1),
 	.get = snd_es1938_get_mux,
 	.put = snd_es1938_put_mux,
 },
-ES1938_DOUBLE("Mono Input Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0),
-ES1938_DOUBLE("PCM Capture Volume", 0, 0x69, 0x69, 4, 0, 15, 0),
-ES1938_DOUBLE("Mic Capture Volume", 0, 0x68, 0x68, 4, 0, 15, 0),
-ES1938_DOUBLE("Line Capture Volume", 0, 0x6e, 0x6e, 4, 0, 15, 0),
-ES1938_DOUBLE("FM Capture Volume", 0, 0x6b, 0x6b, 4, 0, 15, 0),
-ES1938_DOUBLE("Mono Capture Volume", 0, 0x6f, 0x6f, 4, 0, 15, 0),
-ES1938_DOUBLE("CD Capture Volume", 0, 0x6a, 0x6a, 4, 0, 15, 0),
-ES1938_DOUBLE("Aux Capture Volume", 0, 0x6c, 0x6c, 4, 0, 15, 0),
-ES1938_DOUBLE("PCM Playback Volume", 0, 0x7c, 0x7c, 4, 0, 15, 0),
-ES1938_DOUBLE("PCM Playback Volume", 1, 0x14, 0x14, 4, 0, 15, 0),
+ES1938_DOUBLE_TLV("Mono Input Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("PCM Capture Volume", 0, 0x69, 0x69, 4, 0, 15, 0,
+		  db_scale_audio2),
+ES1938_DOUBLE_TLV("Mic Capture Volume", 0, 0x68, 0x68, 4, 0, 15, 0,
+		  db_scale_mic),
+ES1938_DOUBLE_TLV("Line Capture Volume", 0, 0x6e, 0x6e, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("FM Capture Volume", 0, 0x6b, 0x6b, 4, 0, 15, 0,
+		  db_scale_mic),
+ES1938_DOUBLE_TLV("Mono Capture Volume", 0, 0x6f, 0x6f, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("CD Capture Volume", 0, 0x6a, 0x6a, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("Aux Capture Volume", 0, 0x6c, 0x6c, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("PCM Playback Volume", 0, 0x7c, 0x7c, 4, 0, 15, 0,
+		  db_scale_audio2),
+ES1938_DOUBLE_TLV("PCM Playback Volume", 1, 0x14, 0x14, 4, 0, 15, 0,
+		  db_scale_audio1),
 ES1938_SINGLE("3D Control - Level", 0, 0x52, 0, 63, 0),
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-- 
GitLab


From 160ea0dc6b86e2c0c4d325c06bf402bfdde7c1c7 Mon Sep 17 00:00:00 2001
From: Richard Fish <bigfish@asmallpond.org>
Date: Wed, 6 Sep 2006 13:58:25 +0200
Subject: [PATCH 1000/1063] [ALSA] [snd-intel-hda] enable center/LFE speaker on
 some laptops

This patch adds LFE mixer controls for laptops with a stac9200 and a mono
speaker pin with amplifier.

Signed-off-by: Richard Fish <bigfish@asmallpond.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_sigmatel.c | 63 ++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 87169032be1f2..bcbbe111ab951 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1223,6 +1223,66 @@ static int stac9200_auto_create_hp_ctls(struct hda_codec *codec,
 	return 0;
 }
 
+/* add playback controls for LFE output */
+static int stac9200_auto_create_lfe_ctls(struct hda_codec *codec,
+					struct auto_pin_cfg *cfg)
+{
+	struct sigmatel_spec *spec = codec->spec;
+	int err;
+	hda_nid_t lfe_pin = 0x0;
+	int i;
+
+	/*
+	 * search speaker outs and line outs for a mono speaker pin
+	 * with an amp.  If one is found, add LFE controls
+	 * for it.
+	 */
+	for (i = 0; i < spec->autocfg.speaker_outs && lfe_pin == 0x0; i++) {
+		hda_nid_t pin = spec->autocfg.speaker_pins[i];
+		unsigned long wcaps = get_wcaps(codec, pin);
+		wcaps &= (AC_WCAP_STEREO | AC_WCAP_OUT_AMP);
+		if (wcaps == AC_WCAP_OUT_AMP)
+			/* found a mono speaker with an amp, must be lfe */
+			lfe_pin = pin;
+	}
+
+	/* if speaker_outs is 0, then speakers may be in line_outs */
+	if (lfe_pin == 0 && spec->autocfg.speaker_outs == 0) {
+		for (i = 0; i < spec->autocfg.line_outs && lfe_pin == 0x0; i++) {
+			hda_nid_t pin = spec->autocfg.line_out_pins[i];
+			unsigned long cfg;
+			cfg = snd_hda_codec_read(codec, pin, 0,
+						 AC_VERB_GET_CONFIG_DEFAULT,
+						 0x00);
+			if (get_defcfg_device(cfg) == AC_JACK_SPEAKER) {
+				unsigned long wcaps = get_wcaps(codec, pin);
+				wcaps &= (AC_WCAP_STEREO | AC_WCAP_OUT_AMP);
+				if (wcaps == AC_WCAP_OUT_AMP)
+					/* found a mono speaker with an amp,
+					   must be lfe */
+					lfe_pin = pin;
+			}
+		}
+	}
+
+	if (lfe_pin) {
+		err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL,
+					   "LFE Playback Volume",
+					   HDA_COMPOSE_AMP_VAL(lfe_pin, 1, 0,
+							       HDA_OUTPUT));
+		if (err < 0)
+			return err;
+		err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE,
+					   "LFE Playback Switch",
+					   HDA_COMPOSE_AMP_VAL(lfe_pin, 1, 0,
+							       HDA_OUTPUT));
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 static int stac9200_parse_auto_config(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
@@ -1237,6 +1297,9 @@ static int stac9200_parse_auto_config(struct hda_codec *codec)
 	if ((err = stac9200_auto_create_hp_ctls(codec, &spec->autocfg)) < 0)
 		return err;
 
+	if ((err = stac9200_auto_create_lfe_ctls(codec, &spec->autocfg)) < 0)
+		return err;
+
 	if (spec->autocfg.dig_out_pin)
 		spec->multiout.dig_out_nid = 0x05;
 	if (spec->autocfg.dig_in_pin)
-- 
GitLab


From a7da6ce564a80952d9c0b210deca5a8cd3474a31 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 6 Sep 2006 14:03:14 +0200
Subject: [PATCH 1001/1063] [ALSA] hda-codec - Add independent headphone volume
 control

This patch addes the support of the independent 'Headphone' volume
control to the generic codec parser.  Some codecs (e.g. Conexant)
have separate connections to the headphone and the independent amp
adjustment is needed.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_generic.c | 71 +++++++++++++++++++++++++++----------
 1 file changed, 52 insertions(+), 19 deletions(-)

diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index dedfc5b1083ac..97e9af130b710 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -46,11 +46,18 @@ struct hda_gnode {
 };
 
 /* patch-specific record */
+
+#define MAX_PCM_VOLS	2
+struct pcm_vol {
+	struct hda_gnode *node;	/* Node for PCM volume */
+	unsigned int index;	/* connection of PCM volume */
+};
+
 struct hda_gspec {
 	struct hda_gnode *dac_node[2];	/* DAC node */
 	struct hda_gnode *out_pin_node[2];	/* Output pin (Line-Out) node */
-	struct hda_gnode *pcm_vol_node[2];	/* Node for PCM volume */
-	unsigned int pcm_vol_index[2];	/* connection of PCM volume */
+	struct pcm_vol pcm_vol[MAX_PCM_VOLS];	/* PCM volumes */
+	unsigned int pcm_vol_nodes;	/* number of PCM volumes */
 
 	struct hda_gnode *adc_node;	/* ADC node */
 	struct hda_gnode *cap_vol_node;	/* Node for capture volume */
@@ -285,9 +292,11 @@ static int parse_output_path(struct hda_codec *codec, struct hda_gspec *spec,
 			return node == spec->dac_node[dac_idx];
 		}
 		spec->dac_node[dac_idx] = node;
-		if (node->wid_caps & AC_WCAP_OUT_AMP) {
-			spec->pcm_vol_node[dac_idx] = node;
-			spec->pcm_vol_index[dac_idx] = 0;
+		if ((node->wid_caps & AC_WCAP_OUT_AMP) &&
+		    spec->pcm_vol_nodes < MAX_PCM_VOLS) {
+			spec->pcm_vol[spec->pcm_vol_nodes].node = node;
+			spec->pcm_vol[spec->pcm_vol_nodes].index = 0;
+			spec->pcm_vol_nodes++;
 		}
 		return 1; /* found */
 	}
@@ -307,13 +316,16 @@ static int parse_output_path(struct hda_codec *codec, struct hda_gspec *spec,
 				select_input_connection(codec, node, i);
 			unmute_input(codec, node, i);
 			unmute_output(codec, node);
-			if (! spec->pcm_vol_node[dac_idx]) {
-				if (node->wid_caps & AC_WCAP_IN_AMP) {
-					spec->pcm_vol_node[dac_idx] = node;
-					spec->pcm_vol_index[dac_idx] = i;
-				} else if (node->wid_caps & AC_WCAP_OUT_AMP) {
-					spec->pcm_vol_node[dac_idx] = node;
-					spec->pcm_vol_index[dac_idx] = 0;
+			if (spec->dac_node[dac_idx] &&
+			    spec->pcm_vol_nodes < MAX_PCM_VOLS &&
+			    !(spec->dac_node[dac_idx]->wid_caps &
+			      AC_WCAP_OUT_AMP)) {
+				if ((node->wid_caps & AC_WCAP_IN_AMP) ||
+				    (node->wid_caps & AC_WCAP_OUT_AMP)) {
+					int n = spec->pcm_vol_nodes;
+					spec->pcm_vol[n].node = node;
+					spec->pcm_vol[n].index = i;
+					spec->pcm_vol_nodes++;
 				}
 			}
 			return 1;
@@ -370,7 +382,9 @@ static struct hda_gnode *parse_output_jack(struct hda_codec *codec,
 			/* set PIN-Out enable */
 			snd_hda_codec_write(codec, node->nid, 0,
 					    AC_VERB_SET_PIN_WIDGET_CONTROL,
-					    AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN);
+					    AC_PINCTL_OUT_EN |
+					    ((node->pin_caps & AC_PINCAP_HP_DRV) ?
+					     AC_PINCTL_HP_EN : 0));
 			return node;
 		}
 	}
@@ -745,22 +759,41 @@ static int check_existing_control(struct hda_codec *codec, const char *type, con
 /*
  * build output mixer controls
  */
-static int build_output_controls(struct hda_codec *codec)
+static int create_output_mixers(struct hda_codec *codec, const char **names)
 {
 	struct hda_gspec *spec = codec->spec;
-	static const char *types[2] = { "Master", "Headphone" };
 	int i, err;
 
-	for (i = 0; i < 2 && spec->pcm_vol_node[i]; i++) {
-		err = create_mixer(codec, spec->pcm_vol_node[i],
-				   spec->pcm_vol_index[i],
-				   types[i], "Playback");
+	for (i = 0; i < spec->pcm_vol_nodes; i++) {
+		err = create_mixer(codec, spec->pcm_vol[i].node,
+				   spec->pcm_vol[i].index,
+				   names[i], "Playback");
 		if (err < 0)
 			return err;
 	}
 	return 0;
 }
 
+static int build_output_controls(struct hda_codec *codec)
+{
+	struct hda_gspec *spec = codec->spec;
+	static const char *types_speaker[] = { "Speaker", "Headphone" };
+	static const char *types_line[] = { "Front", "Headphone" };
+
+	switch (spec->pcm_vol_nodes) {
+	case 1:
+		return create_mixer(codec, spec->pcm_vol[0].node,
+				    spec->pcm_vol[0].index,
+				    "Master", "Playback");
+	case 2:
+		if (defcfg_type(spec->out_pin_node[0]) == AC_JACK_SPEAKER)
+			return create_output_mixers(codec, types_speaker);
+		else
+			return create_output_mixers(codec, types_line);
+	}
+	return 0;
+}
+
 /* create capture volume/switch */
 static int build_input_controls(struct hda_codec *codec)
 {
-- 
GitLab


From 9d19f48cfe2570562c2c6226780a7ca627b0f1f1 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 6 Sep 2006 14:27:46 +0200
Subject: [PATCH 1002/1063] [ALSA] Add pcm_class attribute to PCM sysfs entry

This patch adds a new attribute, pcm_class, to each PCM sysfs entry.
It's useful to detect what kind of PCM stream is, for example, HAL
can check whether it's a modem or not.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/core.h |  4 ++++
 sound/core/pcm.c     | 24 +++++++++++++++++++
 sound/core/sound.c   | 56 ++++++++++++++++++++++++++++++++++----------
 3 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/include/sound/core.h b/include/sound/core.h
index 1359c532b68e3..b056ea925ecf3 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -26,6 +26,7 @@
 #include <linux/mutex.h>		/* struct mutex */
 #include <linux/rwsem.h>		/* struct rw_semaphore */
 #include <linux/pm.h>			/* pm_message_t */
+#include <linux/device.h>
 
 /* forward declarations */
 #ifdef CONFIG_PCI
@@ -186,6 +187,7 @@ struct snd_minor {
 	int device;			/* device number */
 	const struct file_operations *f_ops;	/* file operations */
 	void *private_data;		/* private data for f_ops->open */
+	struct class_device *class_dev;	/* class device for sysfs */
 };
 
 /* sound.c */
@@ -200,6 +202,8 @@ int snd_register_device(int type, struct snd_card *card, int dev,
 			const char *name);
 int snd_unregister_device(int type, struct snd_card *card, int dev);
 void *snd_lookup_minor_data(unsigned int minor, int type);
+int snd_add_device_sysfs_file(int type, struct snd_card *card, int dev,
+			      const struct class_device_attribute *attr);
 
 #ifdef CONFIG_SND_OSSEMUL
 int snd_register_oss_device(int type, struct snd_card *card, int dev,
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index ed3b09469560b..bf8f412988b8e 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -907,6 +907,28 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream)
 	substream->pstr->substream_opened--;
 }
 
+static ssize_t show_pcm_class(struct class_device *class_device, char *buf)
+{
+	struct snd_pcm *pcm;
+	const char *str;
+	static const char *strs[SNDRV_PCM_CLASS_LAST + 1] = {
+		[SNDRV_PCM_CLASS_GENERIC] = "generic",
+		[SNDRV_PCM_CLASS_MULTI] = "multi",
+		[SNDRV_PCM_CLASS_MODEM] = "modem",
+		[SNDRV_PCM_CLASS_DIGITIZER] = "digitizer",
+	};
+
+	if (! (pcm = class_get_devdata(class_device)) ||
+	    pcm->dev_class > SNDRV_PCM_CLASS_LAST)
+		str = "none";
+	else
+		str = strs[pcm->dev_class];
+        return snprintf(buf, PAGE_SIZE, "%s\n", str);
+}
+
+static struct class_device_attribute pcm_attrs =
+	__ATTR(pcm_class, S_IRUGO, show_pcm_class, NULL);
+
 static int snd_pcm_dev_register(struct snd_device *device)
 {
 	int cidx, err;
@@ -945,6 +967,8 @@ static int snd_pcm_dev_register(struct snd_device *device)
 			mutex_unlock(&register_mutex);
 			return err;
 		}
+		snd_add_device_sysfs_file(devtype, pcm->card, pcm->device,
+					  &pcm_attrs);
 		for (substream = pcm->streams[cidx].substream; substream; substream = substream->next)
 			snd_pcm_timer_init(substream);
 	}
diff --git a/sound/core/sound.c b/sound/core/sound.c
index b4430db3fa4c6..efa476c5210ac 100644
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -268,7 +268,11 @@ int snd_register_device(int type, struct snd_card *card, int dev,
 	snd_minors[minor] = preg;
 	if (card)
 		device = card->dev;
-	class_device_create(sound_class, NULL, MKDEV(major, minor), device, "%s", name);
+	preg->class_dev = class_device_create(sound_class, NULL,
+					      MKDEV(major, minor),
+					      device, "%s", name);
+	if (preg->class_dev)
+		class_set_devdata(preg->class_dev, private_data);
 
 	mutex_unlock(&sound_mutex);
 	return 0;
@@ -276,6 +280,24 @@ int snd_register_device(int type, struct snd_card *card, int dev,
 
 EXPORT_SYMBOL(snd_register_device);
 
+/* find the matching minor record
+ * return the index of snd_minor, or -1 if not found
+ */
+static int find_snd_minor(int type, struct snd_card *card, int dev)
+{
+	int cardnum, minor;
+	struct snd_minor *mptr;
+
+	cardnum = card ? card->number : -1;
+	for (minor = 0; minor < ARRAY_SIZE(snd_minors); ++minor)
+		if ((mptr = snd_minors[minor]) != NULL &&
+		    mptr->type == type &&
+		    mptr->card == cardnum &&
+		    mptr->device == dev)
+			return minor;
+	return -1;
+}
+
 /**
  * snd_unregister_device - unregister the device on the given card
  * @type: the device type, SNDRV_DEVICE_TYPE_XXX
@@ -289,32 +311,42 @@ EXPORT_SYMBOL(snd_register_device);
  */
 int snd_unregister_device(int type, struct snd_card *card, int dev)
 {
-	int cardnum, minor;
-	struct snd_minor *mptr;
+	int minor;
 
-	cardnum = card ? card->number : -1;
 	mutex_lock(&sound_mutex);
-	for (minor = 0; minor < ARRAY_SIZE(snd_minors); ++minor)
-		if ((mptr = snd_minors[minor]) != NULL &&
-		    mptr->type == type &&
-		    mptr->card == cardnum &&
-		    mptr->device == dev)
-			break;
-	if (minor == ARRAY_SIZE(snd_minors)) {
+	minor = find_snd_minor(type, card, dev);
+	if (minor < 0) {
 		mutex_unlock(&sound_mutex);
 		return -EINVAL;
 	}
 
 	class_device_destroy(sound_class, MKDEV(major, minor));
 
+	kfree(snd_minors[minor]);
 	snd_minors[minor] = NULL;
 	mutex_unlock(&sound_mutex);
-	kfree(mptr);
 	return 0;
 }
 
 EXPORT_SYMBOL(snd_unregister_device);
 
+int snd_add_device_sysfs_file(int type, struct snd_card *card, int dev,
+			      const struct class_device_attribute *attr)
+{
+	int minor, ret = -EINVAL;
+	struct class_device *cdev;
+
+	mutex_lock(&sound_mutex);
+	minor = find_snd_minor(type, card, dev);
+	if (minor >= 0 && (cdev = snd_minors[minor]->class_dev) != NULL)
+		ret = class_device_create_file(cdev, attr);
+	mutex_unlock(&sound_mutex);
+	return ret;
+
+}
+
+EXPORT_SYMBOL(snd_add_device_sysfs_file);
+
 #ifdef CONFIG_PROC_FS
 /*
  *  INFO PART
-- 
GitLab


From cd417d4fe89638a2848980cb389b9781d4913173 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 6 Sep 2006 16:03:11 +0200
Subject: [PATCH 1003/1063] [ALSA] hda-codec - Add support for LG LW25 laptop

Added the support for LG LW25 laptop with ALC880 codec.
It's the same codec model as LG LW20 (model=lg-lw).

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 Documentation/sound/alsa/ALSA-Configuration.txt | 2 +-
 sound/pci/hda/patch_realtek.c                   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index a788dd7bc790b..1b749947233c4 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -785,7 +785,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 	  uniwill	3-jack
 	  F1734		2-jack
 	  lg		LG laptop (m1 express dual)
-	  lg-lw		LG LW20 laptop
+	  lg-lw		LG LW20/LW25 laptop
 	  tcl		TCL S700
 	  clevo		Clevo laptops (m520G, m665n)
 	  test		for testing/debugging purpose, almost all controls can be
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 65903812b307a..d037051b66b40 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2270,6 +2270,7 @@ static struct hda_board_config alc880_cfg_tbl[] = {
 
 	{ .modelname = "lg-lw", .config = ALC880_LG_LW },
 	{ .pci_subvendor = 0x1854, .pci_subdevice = 0x0018, .config = ALC880_LG_LW },
+	{ .pci_subvendor = 0x1854, .pci_subdevice = 0x0077, .config = ALC880_LG_LW },
 
 #ifdef CONFIG_SND_DEBUG
 	{ .modelname = "test", .config = ALC880_TEST },
-- 
GitLab


From dafbbb1fdbf103b24d0f7aa645625b6bd558c896 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 7 Sep 2006 12:40:00 +0200
Subject: [PATCH 1004/1063] [ALSA] hda-intel - Fix pci_disable_msi() call

Fix the order to call pci_disable_msi() to be after free_irq().
(Otherwise pci_disable_msi() bugs you.)
Also, added a description of disable_msi option to documentation.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 Documentation/sound/alsa/ALSA-Configuration.txt | 1 +
 sound/pci/hda/hda_intel.c                       | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 1b749947233c4..e6b57dd46a4f3 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -758,6 +758,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size)
     single_cmd  - Use single immediate commands to communicate with
 		codecs (for debugging only)
+    disable_msi - Disable Message Signaled Interrupt (MSI)
 
     This module supports one card and autoprobe.
 
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index cc50d13ee90c1..bfd74a526b856 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1422,8 +1422,9 @@ static int azx_free(struct azx *chip)
 	}
 
 	if (chip->irq >= 0) {
-		pci_disable_msi(chip->pci);
 		free_irq(chip->irq, (void*)chip);
+		if (!disable_msi)
+			pci_disable_msi(chip->pci);
 	}
 	if (chip->remap_addr)
 		iounmap(chip->remap_addr);
-- 
GitLab


From e08a007d1041e0bc3df6b855043d8efde91851aa Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 7 Sep 2006 17:52:14 +0200
Subject: [PATCH 1005/1063] [ALSA] hda-codec - Fix SPDIF device number of ALC
 codecs

Assign the SPDIF always to the secondary device (dev#1) to keep
the same configuration.  Move the optional capture device to the
third device (dev#2).
hda_intel now just ignores the NULL entries in the pcm arrays
from codecs.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_intel.c     | 10 +++++++--
 sound/pci/hda/patch_realtek.c | 38 ++++++++++++++++++-----------------
 2 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index bfd74a526b856..6309e0c67e6ae 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1242,7 +1242,12 @@ static int __devinit create_codec_pcm(struct azx *chip, struct hda_codec *codec,
 	struct snd_pcm *pcm;
 	struct azx_pcm *apcm;
 
-	snd_assert(cpcm->stream[0].substreams || cpcm->stream[1].substreams, return -EINVAL);
+	/* if no substreams are defined for both playback and capture,
+	 * it's just a placeholder.  ignore it.
+	 */
+	if (!cpcm->stream[0].substreams && !cpcm->stream[1].substreams)
+		return 0;
+
 	snd_assert(cpcm->name, return -EINVAL);
 
 	err = snd_pcm_new(chip->card, cpcm->name, pcm_dev,
@@ -1268,7 +1273,8 @@ static int __devinit create_codec_pcm(struct azx *chip, struct hda_codec *codec,
 					      snd_dma_pci_data(chip->pci),
 					      1024 * 64, 1024 * 128);
 	chip->pcm[pcm_dev] = pcm;
-	chip->pcm_devs = pcm_dev + 1;
+	if (chip->pcm_devs < pcm_dev + 1)
+		chip->pcm_devs = pcm_dev + 1;
 
 	return 0;
 }
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index d037051b66b40..ba9e050e20124 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1796,25 +1796,9 @@ static int alc_build_pcms(struct hda_codec *codec)
 		}
 	}
 
-	/* If the use of more than one ADC is requested for the current
-	 * model, configure a second analog capture-only PCM.
-	 */
-	if (spec->num_adc_nids > 1) {
-		codec->num_pcms++;
-		info++;
-		info->name = spec->stream_name_analog;
-		/* No playback stream for second PCM */
-		info->stream[SNDRV_PCM_STREAM_PLAYBACK] = alc_pcm_null_playback;
-		info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = 0;
-		if (spec->stream_analog_capture) {
-			snd_assert(spec->adc_nids, return -EINVAL);
-			info->stream[SNDRV_PCM_STREAM_CAPTURE] = *(spec->stream_analog_capture);
-			info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adc_nids[1];
-		}
-	}
-
+	/* SPDIF for stream index #1 */
 	if (spec->multiout.dig_out_nid || spec->dig_in_nid) {
-		codec->num_pcms++;
+		codec->num_pcms = 2;
 		info++;
 		info->name = spec->stream_name_digital;
 		if (spec->multiout.dig_out_nid &&
@@ -1829,6 +1813,24 @@ static int alc_build_pcms(struct hda_codec *codec)
 		}
 	}
 
+	/* If the use of more than one ADC is requested for the current
+	 * model, configure a second analog capture-only PCM.
+	 */
+	/* Additional Analaog capture for index #2 */
+	if (spec->num_adc_nids > 1 && spec->stream_analog_capture &&
+	    spec->adc_nids) {
+		codec->num_pcms = 3;
+		info++;
+		info->name = spec->stream_name_analog;
+		/* No playback stream for second PCM */
+		info->stream[SNDRV_PCM_STREAM_PLAYBACK] = alc_pcm_null_playback;
+		info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = 0;
+		if (spec->stream_analog_capture) {
+			info->stream[SNDRV_PCM_STREAM_CAPTURE] = *(spec->stream_analog_capture);
+			info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adc_nids[1];
+		}
+	}
+
 	return 0;
 }
 
-- 
GitLab


From 8f88820ee49359ea33af42845456ce9dbf54d39a Mon Sep 17 00:00:00 2001
From: Liam Girdwood <liam.girdwood@wolfsonmicro.com>
Date: Thu, 7 Sep 2006 18:07:46 +0200
Subject: [PATCH 1006/1063] [ALSA] Fix WM9705 AC97 patch build error

This patch fixes a build error (introduced by me) in ac97_patch.c wrt
WM9705 touchscreen.
 o Removed spurious '3D' from character after |= operation (0x3D is
ASCII for '=')

Signed-off-by: Liam Girdwood <liam.girdwood@wolfsonmicro.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/ac97/ac97_patch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 9be4ceb6838e5..dc28b111a06dd 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -481,7 +481,7 @@ int patch_wolfson05(struct snd_ac97 * ac97)
 	ac97->build_ops = &patch_wolfson_wm9705_ops;
 #ifdef CONFIG_TOUCHSCREEN_WM9705
 	/* WM9705 touchscreen uses AUX and VIDEO for touch */
-	ac97->flags |=3D AC97_HAS_NO_VIDEO | AC97_HAS_NO_AUX;
+	ac97->flags |= AC97_HAS_NO_VIDEO | AC97_HAS_NO_AUX;
 #endif
 	return 0;
 }
-- 
GitLab


From 854b66e44260320c21ebe4b8a18e189f2e45b5be Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 8 Sep 2006 12:27:38 +0200
Subject: [PATCH 1007/1063] [ALSA] ak4xxx - Remove bogus IPGA controls

Remove IPGA volume controls and merge the IPGA range to ADC volume
controls.  These two volumes are not really independent but connected
simply in different ranges 0-0x7f and 0x80-max.  It doesn't make sense
to provide two controls.
Since both 0x7f and 0x80 specify 0dB, a hack is needed for IPGA range
to skip 0x80 (increment one) for such controls.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/ak4xxx-adda.h   |  10 ---
 sound/i2c/other/ak4xxx-adda.c | 128 +++++-----------------------------
 sound/pci/ice1712/revo.c      |   1 -
 3 files changed, 18 insertions(+), 121 deletions(-)

diff --git a/include/sound/ak4xxx-adda.h b/include/sound/ak4xxx-adda.h
index 026e4072a9a15..d0deca669b92f 100644
--- a/include/sound/ak4xxx-adda.h
+++ b/include/sound/ak4xxx-adda.h
@@ -48,7 +48,6 @@ struct snd_akm4xxx_dac_channel {
 /* ADC labels and channels */
 struct snd_akm4xxx_adc_channel {
 	char *name;		/* capture gain volume label */
-	char *gain_name;	/* IPGA */
 	char *switch_name;	/* capture switch */
 	unsigned int num_channels;
 };
@@ -91,13 +90,4 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak);
 #define snd_akm4xxx_set_vol(ak,chip,reg,val) \
 	((ak)->volumes[(chip) * 16 + (reg)] = (val))
 
-/* Warning: IPGA is tricky - we assume the addr + 4 is unused
- *   so far, it's OK for all AK codecs with IPGA:
- *   AK4524, AK4528 and EK5365
- */
-#define snd_akm4xxx_get_ipga(ak,chip,reg) \
-	snd_akm4xxx_get_vol(ak, chip, (reg) + 4)
-#define snd_akm4xxx_set_ipga(ak,chip,reg,val) \
-	snd_akm4xxx_set_vol(ak, chip, (reg) + 4, val)
-
 #endif /* __SOUND_AK4XXX_ADDA_H */
diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c
index c34cb4684607c..5da49e2eb3504 100644
--- a/sound/i2c/other/ak4xxx-adda.c
+++ b/sound/i2c/other/ak4xxx-adda.c
@@ -43,10 +43,7 @@ void snd_akm4xxx_write(struct snd_akm4xxx *ak, int chip, unsigned char reg,
 	ak->ops.write(ak, chip, reg, val);
 
 	/* save the data */
-	/* don't overwrite with IPGA data */
-	if ((ak->type != SND_AK4524 && ak->type != SND_AK5365) ||
-	    (reg != 0x04 && reg != 0x05) || (val & 0x80) == 0)
-		snd_akm4xxx_set(ak, chip, reg, val);
+	snd_akm4xxx_set(ak, chip, reg, val);
 	ak->ops.unlock(ak, chip);
 }
 
@@ -70,12 +67,6 @@ static void ak4524_reset(struct snd_akm4xxx *ak, int state)
 		for (reg = 0x04; reg < maxreg; reg++)
 			snd_akm4xxx_write(ak, chip, reg,
 					  snd_akm4xxx_get(ak, chip, reg));
-		if (ak->type == SND_AK4528)
-			continue;
-		/* IPGA */
-		for (reg = 0x04; reg < 0x06; reg++)
-			snd_akm4xxx_write(ak, chip, reg,
-					  snd_akm4xxx_get_ipga(ak, chip, reg) | 0x80);
 	}
 }
 
@@ -175,7 +166,6 @@ static DECLARE_TLV_DB_SCALE(db_scale_vol_datt, -6350, 50, 1);
 static DECLARE_TLV_DB_SCALE(db_scale_8bit, -12750, 50, 1);
 static DECLARE_TLV_DB_SCALE(db_scale_7bit, -6350, 50, 1);
 static DECLARE_TLV_DB_LINEAR(db_scale_linear, TLV_DB_GAIN_MUTE, 0);
-static DECLARE_TLV_DB_SCALE(db_scale_ipga, 0, 50, 0);
 
 /*
  * initialize all the ak4xxx chips
@@ -190,8 +180,6 @@ void snd_akm4xxx_init(struct snd_akm4xxx *ak)
 		0x01, 0x03, /* 1: ADC/DAC enable */
 		0x04, 0x00, /* 4: ADC left muted */
 		0x05, 0x00, /* 5: ADC right muted */
-		0x04, 0x80, /* 4: ADC IPGA gain 0dB */
-		0x05, 0x80, /* 5: ADC IPGA gain 0dB */
 		0x06, 0x00, /* 6: DAC left muted */
 		0x07, 0x00, /* 7: DAC right muted */
 		0xff, 0xff
@@ -324,13 +312,15 @@ EXPORT_SYMBOL(snd_akm4xxx_init);
 /*
  * Mixer callbacks
  */
+#define AK_IPGA 			(1<<20)	/* including IPGA */
 #define AK_VOL_CVT 			(1<<21)	/* need dB conversion */
 #define AK_NEEDSMSB 			(1<<22)	/* need MSB update bit */
 #define AK_INVERT 			(1<<23)	/* data is inverted */
 #define AK_GET_CHIP(val)		(((val) >> 8) & 0xff)
 #define AK_GET_ADDR(val)		((val) & 0xff)
-#define AK_GET_SHIFT(val)		(((val) >> 16) & 0x1f)
+#define AK_GET_SHIFT(val)		(((val) >> 16) & 0x0f)
 #define AK_GET_VOL_CVT(val)		(((val) >> 21) & 1)
+#define AK_GET_IPGA(val)		(((val) >> 20) & 1)
 #define AK_GET_NEEDSMSB(val)		(((val) >> 22) & 1)
 #define AK_GET_INVERT(val)		(((val) >> 23) & 1)
 #define AK_GET_MASK(val)		(((val) >> 24) & 0xff)
@@ -371,8 +361,10 @@ static int put_ak_reg(struct snd_kcontrol *kcontrol, int addr,
 		return 0;
 
 	snd_akm4xxx_set_vol(ak, chip, addr, nval);
-	if (AK_GET_VOL_CVT(kcontrol->private_value))
+	if (AK_GET_VOL_CVT(kcontrol->private_value) && nval < 128)
 		nval = vol_cvt_datt[nval];
+	if (AK_GET_IPGA(kcontrol->private_value) && nval >= 128)
+		nval++; /* need to correct + 1 since both 127 and 128 are 0dB */
 	if (AK_GET_INVERT(kcontrol->private_value))
 		nval = mask - nval;
 	if (AK_GET_NEEDSMSB(kcontrol->private_value))
@@ -424,68 +416,6 @@ static int snd_akm4xxx_stereo_volume_put(struct snd_kcontrol *kcontrol,
 	return change;
 }
 
-#define snd_akm4xxx_ipga_gain_info	snd_akm4xxx_volume_info
-
-static int snd_akm4xxx_ipga_gain_get(struct snd_kcontrol *kcontrol,
-				     struct snd_ctl_elem_value *ucontrol)
-{
-	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
-	int chip = AK_GET_CHIP(kcontrol->private_value);
-	int addr = AK_GET_ADDR(kcontrol->private_value);
-
-	ucontrol->value.integer.value[0] =
-		snd_akm4xxx_get_ipga(ak, chip, addr);
-	return 0;
-}
-
-static int put_ak_ipga(struct snd_kcontrol *kcontrol, int addr,
-		       unsigned char nval)
-{
-	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
-	int chip = AK_GET_CHIP(kcontrol->private_value);
-
-	if (snd_akm4xxx_get_ipga(ak, chip, addr) == nval)
-		return 0;
-	snd_akm4xxx_set_ipga(ak, chip, addr, nval);
-	snd_akm4xxx_write(ak, chip, addr, nval | 0x80); /* need MSB */
-	return 1;
-}
-
-static int snd_akm4xxx_ipga_gain_put(struct snd_kcontrol *kcontrol,
-				     struct snd_ctl_elem_value *ucontrol)
-{
-	return put_ak_ipga(kcontrol, AK_GET_ADDR(kcontrol->private_value),
-			   ucontrol->value.integer.value[0]);
-}
-
-#define snd_akm4xxx_stereo_gain_info	snd_akm4xxx_stereo_volume_info
-
-static int snd_akm4xxx_stereo_gain_get(struct snd_kcontrol *kcontrol,
-				       struct snd_ctl_elem_value *ucontrol)
-{
-	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
-	int chip = AK_GET_CHIP(kcontrol->private_value);
-	int addr = AK_GET_ADDR(kcontrol->private_value);
-
-	ucontrol->value.integer.value[0] =
-		snd_akm4xxx_get_ipga(ak, chip, addr);
-	ucontrol->value.integer.value[1] =
-		snd_akm4xxx_get_ipga(ak, chip, addr + 1);
-	return 0;
-}
-
-static int snd_akm4xxx_stereo_gain_put(struct snd_kcontrol *kcontrol,
-				       struct snd_ctl_elem_value *ucontrol)
-{
-	int addr = AK_GET_ADDR(kcontrol->private_value);
-	int change;
-
-	change = put_ak_ipga(kcontrol, addr, ucontrol->value.integer.value[0]);
-	change |= put_ak_ipga(kcontrol, addr + 1,
-			      ucontrol->value.integer.value[1]);
-	return change;
-}
-
 static int snd_akm4xxx_deemphasis_info(struct snd_kcontrol *kcontrol,
 				       struct snd_ctl_elem_info *uinfo)
 {
@@ -702,35 +632,15 @@ static int build_adc_controls(struct snd_akm4xxx *ak)
 			knew.put = snd_akm4xxx_volume_put;
 		}
 		/* register 4 & 5 */
-		knew.private_value =
-			AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127) |
-			AK_VOL_CVT;
-		knew.tlv.p = db_scale_vol_datt;
-		err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak));
-		if (err < 0)
-			return err;
-
-		if (! ak->adc_info || ! ak->adc_info[mixer_ch].gain_name)
-			knew.name = "IPGA Analog Capture Volume";
+		if (ak->type == SND_AK5365)
+			knew.private_value =
+				AK_COMPOSE(idx/2, (idx%2) + 4, 0, 151) |
+				AK_VOL_CVT | AK_IPGA;
 		else
-			knew.name = ak->adc_info[mixer_ch].gain_name;
-		if (num_stereo == 2) {
-			knew.info = snd_akm4xxx_stereo_gain_info;
-			knew.get = snd_akm4xxx_stereo_gain_get;
-			knew.put = snd_akm4xxx_stereo_gain_put;
-		} else {
-			knew.info = snd_akm4xxx_ipga_gain_info;
-			knew.get = snd_akm4xxx_ipga_gain_get;
-			knew.put = snd_akm4xxx_ipga_gain_put;
-		}
-		/* register 4 & 5 */
-		if (ak->type == SND_AK4524)
-			knew.private_value = AK_COMPOSE(idx/2, (idx%2) + 4, 0,
-							24);
-		else /* AK5365 */
-			knew.private_value = AK_COMPOSE(idx/2, (idx%2) + 4, 0,
-							36);
-		knew.tlv.p = db_scale_ipga;
+			knew.private_value =
+				AK_COMPOSE(idx/2, (idx%2) + 4, 0, 163) |
+				AK_VOL_CVT | AK_IPGA;
+		knew.tlv.p = db_scale_vol_datt;
 		err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak));
 		if (err < 0)
 			return err;
@@ -811,11 +721,9 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak)
 	if (err < 0)
 		return err;
 
-	if (ak->type == SND_AK4524 || ak->type == SND_AK5365) {
-		err = build_adc_controls(ak);
-		if (err < 0)
-			return err;
-	}
+	err = build_adc_controls(ak);
+	if (err < 0)
+		return err;
 
 	if (ak->type == SND_AK4355 || ak->type == SND_AK4358)
 		num_emphs = 1;
diff --git a/sound/pci/ice1712/revo.c b/sound/pci/ice1712/revo.c
index c9eefa9bbfff2..bf98ea34feb04 100644
--- a/sound/pci/ice1712/revo.c
+++ b/sound/pci/ice1712/revo.c
@@ -110,7 +110,6 @@ static struct snd_akm4xxx_dac_channel revo51_dac[] = {
 static struct snd_akm4xxx_adc_channel revo51_adc[] = {
 	{
 		.name = "PCM Capture Volume",
-		.gain_name = "PCM Capture Gain Volume",
 		.switch_name = "PCM Capture Switch",
 		.num_channels = 2
 	},
-- 
GitLab


From 43001c9515cf87935c50e84b3e27b1f3b3776b5d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 8 Sep 2006 12:30:03 +0200
Subject: [PATCH 1008/1063] [ALSA] hda-intel - Fix suspend/resume with MSI

Fixed suspend/resume with MSI enablement.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_intel.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 6309e0c67e6ae..4d2df771112e0 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1381,6 +1381,10 @@ static int azx_suspend(struct pci_dev *pci, pm_message_t state)
 		snd_pcm_suspend_all(chip->pcm[i]);
 	snd_hda_suspend(chip->bus, state);
 	azx_free_cmd_io(chip);
+	if (chip->irq >= 0)
+		free_irq(chip->irq, chip);
+	if (!disable_msi)
+		pci_disable_msi(chip->pci);
 	pci_disable_device(pci);
 	pci_save_state(pci);
 	return 0;
@@ -1393,6 +1397,12 @@ static int azx_resume(struct pci_dev *pci)
 
 	pci_restore_state(pci);
 	pci_enable_device(pci);
+	if (!disable_msi)
+		pci_enable_msi(pci);
+	/* FIXME: need proper error handling */
+	request_irq(pci->irq, azx_interrupt, IRQF_DISABLED|IRQF_SHARED,
+		    "HDA Intel", chip);
+	chip->irq = pci->irq;
 	pci_set_master(pci);
 	azx_init_chip(chip);
 	snd_hda_resume(chip->bus);
-- 
GitLab


From 307192065c55dbc70159037c1e3006a9f761192b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 17 Sep 2006 21:59:25 +0200
Subject: [PATCH 1009/1063] [ALSA] aoa: add locking to tas codec

Looks like I completely forgot to do this. This patch adds locking to
the tas codec so two userspace programs can't hit the controls at the
same time. Tested on my powerbook, but I obviously can't find any
problems even without it since it doesn't do SMP.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/aoa/codecs/snd-aoa-codec-tas.c | 96 ++++++++++++++++++++++++----
 1 file changed, 83 insertions(+), 13 deletions(-)

diff --git a/sound/aoa/codecs/snd-aoa-codec-tas.c b/sound/aoa/codecs/snd-aoa-codec-tas.c
index 16c0b6b0a8052..2ef55a17917c6 100644
--- a/sound/aoa/codecs/snd-aoa-codec-tas.c
+++ b/sound/aoa/codecs/snd-aoa-codec-tas.c
@@ -66,6 +66,8 @@
 #include <asm/prom.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
+
 MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("tas codec driver for snd-aoa");
@@ -91,6 +93,10 @@ struct tas {
 	u8			bass, treble;
 	u8			acr;
 	int			drc_range;
+	/* protects hardware access against concurrency from
+	 * userspace when hitting controls and during
+	 * codec init/suspend/resume */
+	struct mutex		mtx;
 };
 
 static int tas_reset_init(struct tas *tas);
@@ -231,8 +237,10 @@ static int tas_snd_vol_get(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->cached_volume_l;
 	ucontrol->value.integer.value[1] = tas->cached_volume_r;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -241,14 +249,18 @@ static int tas_snd_vol_put(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	if (tas->cached_volume_l == ucontrol->value.integer.value[0]
-	 && tas->cached_volume_r == ucontrol->value.integer.value[1])
+	 && tas->cached_volume_r == ucontrol->value.integer.value[1]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->cached_volume_l = ucontrol->value.integer.value[0];
 	tas->cached_volume_r = ucontrol->value.integer.value[1];
 	if (tas->hw_enabled)
 		tas_set_volume(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -276,8 +288,10 @@ static int tas_snd_mute_get(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = !tas->mute_l;
 	ucontrol->value.integer.value[1] = !tas->mute_r;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -286,14 +300,18 @@ static int tas_snd_mute_put(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	if (tas->mute_l == !ucontrol->value.integer.value[0]
-	 && tas->mute_r == !ucontrol->value.integer.value[1])
+	 && tas->mute_r == !ucontrol->value.integer.value[1]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->mute_l = !ucontrol->value.integer.value[0];
 	tas->mute_r = !ucontrol->value.integer.value[1];
 	if (tas->hw_enabled)
 		tas_set_volume(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -322,8 +340,10 @@ static int tas_snd_mixer_get(struct snd_kcontrol *kcontrol,
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 	int idx = kcontrol->private_value;
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->mixer_l[idx];
 	ucontrol->value.integer.value[1] = tas->mixer_r[idx];
+	mutex_unlock(&tas->mtx);
 
 	return 0;
 }
@@ -334,15 +354,19 @@ static int tas_snd_mixer_put(struct snd_kcontrol *kcontrol,
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 	int idx = kcontrol->private_value;
 
+	mutex_lock(&tas->mtx);
 	if (tas->mixer_l[idx] == ucontrol->value.integer.value[0]
-	 && tas->mixer_r[idx] == ucontrol->value.integer.value[1])
+	 && tas->mixer_r[idx] == ucontrol->value.integer.value[1]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->mixer_l[idx] = ucontrol->value.integer.value[0];
 	tas->mixer_r[idx] = ucontrol->value.integer.value[1];
 
 	if (tas->hw_enabled)
 		tas_set_mixer(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -375,7 +399,9 @@ static int tas_snd_drc_range_get(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->drc_range;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -384,12 +410,16 @@ static int tas_snd_drc_range_put(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
-	if (tas->drc_range == ucontrol->value.integer.value[0])
+	mutex_lock(&tas->mtx);
+	if (tas->drc_range == ucontrol->value.integer.value[0]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->drc_range = ucontrol->value.integer.value[0];
 	if (tas->hw_enabled)
 		tas3004_set_drc(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -417,7 +447,9 @@ static int tas_snd_drc_switch_get(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->drc_enabled;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -426,12 +458,16 @@ static int tas_snd_drc_switch_put(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
-	if (tas->drc_enabled == ucontrol->value.integer.value[0])
+	mutex_lock(&tas->mtx);
+	if (tas->drc_enabled == ucontrol->value.integer.value[0]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->drc_enabled = ucontrol->value.integer.value[0];
 	if (tas->hw_enabled)
 		tas3004_set_drc(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -463,7 +499,9 @@ static int tas_snd_capture_source_get(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.enumerated.item[0] = !!(tas->acr & TAS_ACR_INPUT_B);
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -471,15 +509,21 @@ static int tas_snd_capture_source_put(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
-	int oldacr = tas->acr;
+	int oldacr;
+
+	mutex_lock(&tas->mtx);
+	oldacr = tas->acr;
 
 	tas->acr &= ~TAS_ACR_INPUT_B;
 	if (ucontrol->value.enumerated.item[0])
 		tas->acr |= TAS_ACR_INPUT_B;
-	if (oldacr == tas->acr)
+	if (oldacr == tas->acr) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 	if (tas->hw_enabled)
 		tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -518,7 +562,9 @@ static int tas_snd_treble_get(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->treble;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -527,12 +573,16 @@ static int tas_snd_treble_put(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
-	if (tas->treble == ucontrol->value.integer.value[0])
+	mutex_lock(&tas->mtx);
+	if (tas->treble == ucontrol->value.integer.value[0]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->treble = ucontrol->value.integer.value[0];
 	if (tas->hw_enabled)
 		tas_set_treble(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -560,7 +610,9 @@ static int tas_snd_bass_get(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->bass;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -569,12 +621,16 @@ static int tas_snd_bass_put(struct snd_kcontrol *kcontrol,
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
-	if (tas->bass == ucontrol->value.integer.value[0])
+	mutex_lock(&tas->mtx);
+	if (tas->bass == ucontrol->value.integer.value[0]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->bass = ucontrol->value.integer.value[0];
 	if (tas->hw_enabled)
 		tas_set_bass(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -628,16 +684,16 @@ static int tas_reset_init(struct tas *tas)
 
 	tmp = TAS_MCS_SCLK64 | TAS_MCS_SPORT_MODE_I2S | TAS_MCS_SPORT_WL_24BIT;
 	if (tas_write_reg(tas, TAS_REG_MCS, 1, &tmp))
-		return -ENODEV;
+		goto outerr;
 
 	tas->acr |= TAS_ACR_ANALOG_PDOWN | TAS_ACR_B_MONAUREAL |
 		TAS_ACR_B_MON_SEL_RIGHT;
 	if (tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr))
-		return -ENODEV;
+		goto outerr;
 
 	tmp = 0;
 	if (tas_write_reg(tas, TAS_REG_MCS2, 1, &tmp))
-		return -ENODEV;
+		goto outerr;
 
 	tas3004_set_drc(tas);
 
@@ -649,9 +705,11 @@ static int tas_reset_init(struct tas *tas)
 
 	tas->acr &= ~TAS_ACR_ANALOG_PDOWN;
 	if (tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr))
-		return -ENODEV;
+		goto outerr;
 
 	return 0;
+ outerr:
+	return -ENODEV;
 }
 
 static int tas_switch_clock(struct codec_info_item *cii, enum clock_switch clock)
@@ -666,11 +724,13 @@ static int tas_switch_clock(struct codec_info_item *cii, enum clock_switch clock
 		break;
 	case CLOCK_SWITCH_SLAVE:
 		/* Clocks are back, re-init the codec */
+		mutex_lock(&tas->mtx);
 		tas_reset_init(tas);
 		tas_set_volume(tas);
 		tas_set_mixer(tas);
 		tas->hw_enabled = 1;
 		tas->codec.gpio->methods->all_amps_restore(tas->codec.gpio);
+		mutex_unlock(&tas->mtx);
 		break;
 	default:
 		/* doesn't happen as of now */
@@ -684,19 +744,23 @@ static int tas_switch_clock(struct codec_info_item *cii, enum clock_switch clock
  * our i2c device is suspended, and then take note of that! */
 static int tas_suspend(struct tas *tas)
 {
+	mutex_lock(&tas->mtx);
 	tas->hw_enabled = 0;
 	tas->acr |= TAS_ACR_ANALOG_PDOWN;
 	tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr);
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
 static int tas_resume(struct tas *tas)
 {
 	/* reset codec */
+	mutex_lock(&tas->mtx);
 	tas_reset_init(tas);
 	tas_set_volume(tas);
 	tas_set_mixer(tas);
 	tas->hw_enabled = 1;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -739,11 +803,14 @@ static int tas_init_codec(struct aoa_codec *codec)
 		return -EINVAL;
 	}
 
+	mutex_lock(&tas->mtx);
 	if (tas_reset_init(tas)) {
 		printk(KERN_ERR PFX "tas failed to initialise\n");
+		mutex_unlock(&tas->mtx);
 		return -ENXIO;
 	}
 	tas->hw_enabled = 1;
+	mutex_unlock(&tas->mtx);
 
 	if (tas->codec.soundbus_dev->attach_codec(tas->codec.soundbus_dev,
 						   aoa_get_card(),
@@ -822,6 +889,7 @@ static int tas_create(struct i2c_adapter *adapter,
 	if (!tas)
 		return -ENOMEM;
 
+	mutex_init(&tas->mtx);
 	tas->i2c.driver = &tas_driver;
 	tas->i2c.adapter = adapter;
 	tas->i2c.addr = addr;
@@ -850,6 +918,7 @@ static int tas_create(struct i2c_adapter *adapter,
  detach:
 	i2c_detach_client(&tas->i2c);
  fail:
+	mutex_destroy(&tas->mtx);
 	kfree(tas);
 	return -EINVAL;
 }
@@ -908,6 +977,7 @@ static int tas_i2c_detach(struct i2c_client *client)
 	/* power down codec chip */
 	tas_write_reg(tas, TAS_REG_ACR, 1, &tmp);
 
+	mutex_destroy(&tas->mtx);
 	kfree(tas);
 	return 0;
 }
-- 
GitLab


From 783eaf4671a4f5a95102aedb5a45e1f8adab945c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 17 Sep 2006 22:00:51 +0200
Subject: [PATCH 1010/1063] [ALSA] powermac - Fix Oops when conflicting with
 aoa driver

Fixed Oops when conflictin with aoa driver due to lack of
i2c initialization.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/ppc/keywest.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/ppc/keywest.c b/sound/ppc/keywest.c
index 59482a4cd4464..272ae38e9b188 100644
--- a/sound/ppc/keywest.c
+++ b/sound/ppc/keywest.c
@@ -117,6 +117,9 @@ int __init snd_pmac_tumbler_post_init(void)
 {
 	int err;
 	
+	if (!keywest_ctx || !keywest_ctx->client)
+		return -ENXIO;
+
 	if ((err = keywest_ctx->init_client(keywest_ctx)) < 0) {
 		snd_printk(KERN_ERR "tumbler: %i :cannot initialize the MCS\n", err);
 		return err;
-- 
GitLab


From 2b1181ed83ee8b0afbf9ba3e4f789f00375b2a17 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 17 Sep 2006 22:02:22 +0200
Subject: [PATCH 1011/1063] [ALSA] Add missing compat ioctls for ALSA control
 API

Added the missing 32bit-compat ioctl entries for ALSA control API
(espcially for recent additions of TLV stuff).

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/core/control_compat.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c
index 3c0161bb5ba45..ab48962c48ce9 100644
--- a/sound/core/control_compat.c
+++ b/sound/core/control_compat.c
@@ -407,6 +407,10 @@ static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, uns
 	case SNDRV_CTL_IOCTL_POWER_STATE:
 	case SNDRV_CTL_IOCTL_ELEM_LOCK:
 	case SNDRV_CTL_IOCTL_ELEM_UNLOCK:
+	case SNDRV_CTL_IOCTL_ELEM_REMOVE:
+	case SNDRV_CTL_IOCTL_TLV_READ:
+	case SNDRV_CTL_IOCTL_TLV_WRITE:
+	case SNDRV_CTL_IOCTL_TLV_COMMAND:
 		return snd_ctl_ioctl(file, cmd, (unsigned long)argp);
 	case SNDRV_CTL_IOCTL_ELEM_LIST32:
 		return snd_ctl_elem_list_compat(ctl->card, argp);
-- 
GitLab


From 5720fddd62367bb44335ec83f6371ce91e9ead12 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 17 Sep 2006 22:04:17 +0200
Subject: [PATCH 1012/1063] [ALSA] hda-codec - Add device id for Motorola
 si3054-compatible codec

Added the device id for Motorola si3054-compatible modem codec
on a Gateway laptop.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_si3054.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_si3054.c b/sound/pci/hda/patch_si3054.c
index 250242cd6c70b..76ec3d75fa9ea 100644
--- a/sound/pci/hda/patch_si3054.c
+++ b/sound/pci/hda/patch_si3054.c
@@ -298,6 +298,7 @@ struct hda_codec_preset snd_hda_preset_si3054[] = {
  	{ .id = 0x163c3055, .name = "Si3054", .patch = patch_si3054 },
  	{ .id = 0x163c3155, .name = "Si3054", .patch = patch_si3054 },
  	{ .id = 0x11c13026, .name = "Si3054", .patch = patch_si3054 },
+ 	{ .id = 0x10573057, .name = "Si3054", .patch = patch_si3054 },
 	{}
 };
 
-- 
GitLab


From a922625126cc9bf593d801879a965b9f0eae6958 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 17 Sep 2006 22:05:54 +0200
Subject: [PATCH 1013/1063] [ALSA] hda-codec - Add vendor ids for Motorola and
 Conexant

Added string entries for Motorola and Conexant vendor ids.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_codec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index ff29d0f16903a..e69db04b7eb8f 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -51,8 +51,10 @@ struct hda_vendor_id {
 /* codec vendor labels */
 static struct hda_vendor_id hda_vendor_ids[] = {
 	{ 0x10ec, "Realtek" },
+	{ 0x1057, "Motorola" },
 	{ 0x11d4, "Analog Devices" },
 	{ 0x13f6, "C-Media" },
+	{ 0x14f1, "Conexant" },
 	{ 0x434d, "C-Media" },
 	{ 0x8384, "SigmaTel" },
 	{} /* terminator */
-- 
GitLab


From 33ef765131bcf82bc5fca3f25d8313fa4df93ce0 Mon Sep 17 00:00:00 2001
From: Nicolas Graziano <nicolas.graziano@wanadoo.fr>
Date: Tue, 19 Sep 2006 14:23:14 +0200
Subject: [PATCH 1014/1063] [ALSA] hda_intel prefer 24bit instead of 20bit

If I understand the hda_intel code, for format > 20bit it only advertise
the SNDRV_PCM_FMTBIT_S32_LE format and play it at 32 bit, 20 bit or 24 bit.
But if the 20bit and 24bit are available, actually it prefer the 20bit
format. This path is to prefer the 24bit format instead of 20bit.

Signed-off-by: Nicolas Graziano <nicolas.graziano@wanadoo.fr>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_codec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index e69db04b7eb8f..8b2c080c85aed 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -1505,10 +1505,10 @@ int snd_hda_query_supported_pcm(struct hda_codec *codec, hda_nid_t nid,
 				formats |= SNDRV_PCM_FMTBIT_S32_LE;
 				if (val & AC_SUPPCM_BITS_32)
 					bps = 32;
-				else if (val & AC_SUPPCM_BITS_20)
-					bps = 20;
 				else if (val & AC_SUPPCM_BITS_24)
 					bps = 24;
+				else if (val & AC_SUPPCM_BITS_20)
+					bps = 20;
 			}
 		}
 		else if (streams == AC_SUPFMT_FLOAT32) { /* should be exclusive */
-- 
GitLab


From eb06ed8f4c2440558ebf465e8baeac6367d90201 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 20 Sep 2006 17:10:27 +0200
Subject: [PATCH 1015/1063] [ALSA] hda-codec - Support multiple headphone pins

Some machines have multiple headpohne pins (usually on the lpatop
and on the docking station) while the current hda-codec driver
assumes a single headphone pin.  Now it supports multiple hp pins
(at least for detection).
The sigmatel 92xx code supports this new multiple hp pins.
It detects all hp pins for auto-muting, too.
Also, the driver checks speaker pins in addition.  In some cases,
all line-out, speaker and hp-pins coexist.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_codec.c      |  23 ++--
 sound/pci/hda/hda_local.h      |   3 +-
 sound/pci/hda/patch_analog.c   |   4 +-
 sound/pci/hda/patch_realtek.c  |  18 +--
 sound/pci/hda/patch_sigmatel.c | 202 ++++++++++++++++++++++-----------
 5 files changed, 164 insertions(+), 86 deletions(-)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 8b2c080c85aed..07360996caaac 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -2012,7 +2012,7 @@ static int is_in_nid_list(hda_nid_t nid, hda_nid_t *list)
  * in the order of front, rear, CLFE, side, ...
  *
  * If more extra outputs (speaker and headphone) are found, the pins are
- * assisnged to hp_pin and speaker_pins[], respectively.  If no line-out jack
+ * assisnged to hp_pins[] and speaker_pins[], respectively.  If no line-out jack
  * is detected, one of speaker of HP pins is assigned as the primary
  * output, i.e. to line_out_pins[0].  So, line_outs is always positive
  * if any analog output exists.
@@ -2074,7 +2074,10 @@ int snd_hda_parse_pin_def_config(struct hda_codec *codec, struct auto_pin_cfg *c
 			cfg->speaker_outs++;
 			break;
 		case AC_JACK_HP_OUT:
-			cfg->hp_pin = nid;
+			if (cfg->hp_outs >= ARRAY_SIZE(cfg->hp_pins))
+				continue;
+			cfg->hp_pins[cfg->hp_outs] = nid;
+			cfg->hp_outs++;
 			break;
 		case AC_JACK_MIC_IN:
 			if (loc == AC_JACK_LOC_FRONT)
@@ -2147,8 +2150,10 @@ int snd_hda_parse_pin_def_config(struct hda_codec *codec, struct auto_pin_cfg *c
 		   cfg->speaker_outs, cfg->speaker_pins[0],
 		   cfg->speaker_pins[1], cfg->speaker_pins[2],
 		   cfg->speaker_pins[3], cfg->speaker_pins[4]);
-	snd_printd("   hp=0x%x, dig_out=0x%x, din_in=0x%x\n",
-		   cfg->hp_pin, cfg->dig_out_pin, cfg->dig_in_pin);
+	snd_printd("   hp_outs=%d (0x%x/0x%x/0x%x/0x%x/0x%x)\n",
+		   cfg->hp_outs, cfg->hp_pins[0],
+		   cfg->hp_pins[1], cfg->hp_pins[2],
+		   cfg->hp_pins[3], cfg->hp_pins[4]);
 	snd_printd("   inputs: mic=0x%x, fmic=0x%x, line=0x%x, fline=0x%x,"
 		   " cd=0x%x, aux=0x%x\n",
 		   cfg->input_pins[AUTO_PIN_MIC],
@@ -2169,10 +2174,12 @@ int snd_hda_parse_pin_def_config(struct hda_codec *codec, struct auto_pin_cfg *c
 			       sizeof(cfg->speaker_pins));
 			cfg->speaker_outs = 0;
 			memset(cfg->speaker_pins, 0, sizeof(cfg->speaker_pins));
-		} else if (cfg->hp_pin) {
-			cfg->line_outs = 1;
-			cfg->line_out_pins[0] = cfg->hp_pin;
-			cfg->hp_pin = 0;
+		} else if (cfg->hp_outs) {
+			cfg->line_outs = cfg->hp_outs;
+			memcpy(cfg->line_out_pins, cfg->hp_pins,
+			       sizeof(cfg->hp_pins));
+			cfg->hp_outs = 0;
+			memset(cfg->hp_pins, 0, sizeof(cfg->hp_pins));
 		}
 	}
 
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index ff24266fe353b..f9416c36396ec 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -229,7 +229,8 @@ struct auto_pin_cfg {
 	hda_nid_t line_out_pins[5]; /* sorted in the order of Front/Surr/CLFE/Side */
 	int speaker_outs;
 	hda_nid_t speaker_pins[5];
-	hda_nid_t hp_pin;
+	int hp_outs;
+	hda_nid_t hp_pins[5];
 	hda_nid_t input_pins[AUTO_PIN_LAST];
 	hda_nid_t dig_out_pin;
 	hda_nid_t dig_in_pin;
diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 71abc2aa61a60..511df07fa2a3f 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -2471,7 +2471,7 @@ static void ad1988_auto_init_extra_out(struct hda_codec *codec)
 	pin = spec->autocfg.speaker_pins[0];
 	if (pin) /* connect to front */
 		ad1988_auto_set_output_and_unmute(codec, pin, PIN_OUT, 0);
-	pin = spec->autocfg.hp_pin;
+	pin = spec->autocfg.hp_pins[0];
 	if (pin) /* connect to front */
 		ad1988_auto_set_output_and_unmute(codec, pin, PIN_HP, 0);
 }
@@ -2523,7 +2523,7 @@ static int ad1988_parse_auto_config(struct hda_codec *codec)
 	    (err = ad1988_auto_create_extra_out(codec,
 						spec->autocfg.speaker_pins[0],
 						"Speaker")) < 0 ||
-	    (err = ad1988_auto_create_extra_out(codec, spec->autocfg.hp_pin,
+	    (err = ad1988_auto_create_extra_out(codec, spec->autocfg.hp_pins[0],
 						"Headphone")) < 0 ||
 	    (err = ad1988_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0)
 		return err;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index ba9e050e20124..d08d2e399c8f1 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2753,7 +2753,7 @@ static void alc880_auto_init_extra_out(struct hda_codec *codec)
 	pin = spec->autocfg.speaker_pins[0];
 	if (pin) /* connect to front */
 		alc880_auto_set_output_and_unmute(codec, pin, PIN_OUT, 0);
-	pin = spec->autocfg.hp_pin;
+	pin = spec->autocfg.hp_pins[0];
 	if (pin) /* connect to front */
 		alc880_auto_set_output_and_unmute(codec, pin, PIN_HP, 0);
 }
@@ -2794,7 +2794,7 @@ static int alc880_parse_auto_config(struct hda_codec *codec)
 	    (err = alc880_auto_create_extra_out(spec,
 						spec->autocfg.speaker_pins[0],
 						"Speaker")) < 0 ||
-	    (err = alc880_auto_create_extra_out(spec, spec->autocfg.hp_pin,
+	    (err = alc880_auto_create_extra_out(spec, spec->autocfg.hp_pins[0],
 						"Headphone")) < 0 ||
 	    (err = alc880_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0)
 		return err;
@@ -3736,7 +3736,7 @@ static int alc260_auto_create_multi_out_ctls(struct alc_spec *spec,
 			return err;
 	}
 
-	nid = cfg->hp_pin;
+	nid = cfg->hp_pins[0];
 	if (nid) {
 		err = alc260_add_playback_controls(spec, nid, "Headphone");
 		if (err < 0)
@@ -3806,7 +3806,7 @@ static void alc260_auto_init_multi_out(struct hda_codec *codec)
 	if (nid)
 		alc260_auto_set_output_and_unmute(codec, nid, PIN_OUT, 0);
 
-	nid = spec->autocfg.hp_pin;
+	nid = spec->autocfg.hp_pins[0];
 	if (nid)
 		alc260_auto_set_output_and_unmute(codec, nid, PIN_OUT, 0);
 }	
@@ -4526,7 +4526,7 @@ static void alc882_auto_init_hp_out(struct hda_codec *codec)
 	struct alc_spec *spec = codec->spec;
 	hda_nid_t pin;
 
-	pin = spec->autocfg.hp_pin;
+	pin = spec->autocfg.hp_pins[0];
 	if (pin) /* connect to front */
 		alc882_auto_set_output_and_unmute(codec, pin, PIN_HP, 0); /* use dac 0 */
 }
@@ -5207,7 +5207,7 @@ static void alc883_auto_init_hp_out(struct hda_codec *codec)
 	struct alc_spec *spec = codec->spec;
 	hda_nid_t pin;
 
-	pin = spec->autocfg.hp_pin;
+	pin = spec->autocfg.hp_pins[0];
 	if (pin) /* connect to front */
 		/* use dac 0 */
 		alc883_auto_set_output_and_unmute(codec, pin, PIN_HP, 0);
@@ -5630,7 +5630,7 @@ static int alc262_auto_create_multi_out_ctls(struct alc_spec *spec, const struct
 				return err;
 		}
 	}
-	nid = cfg->hp_pin;
+	nid = cfg->hp_pins[0];
 	if (nid) {
 		/* spec->multiout.hp_nid = 2; */
 		if (nid == 0x16) {
@@ -6630,7 +6630,7 @@ static void alc861_auto_init_hp_out(struct hda_codec *codec)
 	struct alc_spec *spec = codec->spec;
 	hda_nid_t pin;
 
-	pin = spec->autocfg.hp_pin;
+	pin = spec->autocfg.hp_pins[0];
 	if (pin) /* connect to front */
 		alc861_auto_set_output_and_unmute(codec, pin, PIN_HP, spec->multiout.dac_nids[0]);
 }
@@ -6665,7 +6665,7 @@ static int alc861_parse_auto_config(struct hda_codec *codec)
 
 	if ((err = alc861_auto_fill_dac_nids(spec, &spec->autocfg)) < 0 ||
 	    (err = alc861_auto_create_multi_out_ctls(spec, &spec->autocfg)) < 0 ||
-	    (err = alc861_auto_create_hp_ctls(spec, spec->autocfg.hp_pin)) < 0 ||
+	    (err = alc861_auto_create_hp_ctls(spec, spec->autocfg.hp_pins[0])) < 0 ||
 	    (err = alc861_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0)
 		return err;
 
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index bcbbe111ab951..7cc064265204c 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1011,11 +1011,29 @@ static int stac92xx_auto_fill_dac_nids(struct hda_codec *codec,
 	return 0;
 }
 
+/* create volume control/switch for the given prefx type */
+static int create_controls(struct sigmatel_spec *spec, const char *pfx, hda_nid_t nid, int chs)
+{
+	char name[32];
+	int err;
+
+	sprintf(name, "%s Playback Volume", pfx);
+	err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, name,
+				   HDA_COMPOSE_AMP_VAL(nid, chs, 0, HDA_OUTPUT));
+	if (err < 0)
+		return err;
+	sprintf(name, "%s Playback Switch", pfx);
+	err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, name,
+				   HDA_COMPOSE_AMP_VAL(nid, chs, 0, HDA_OUTPUT));
+	if (err < 0)
+		return err;
+	return 0;
+}
+
 /* add playback controls from the parsed DAC table */
 static int stac92xx_auto_create_multi_out_ctls(struct sigmatel_spec *spec,
 					       const struct auto_pin_cfg *cfg)
 {
-	char name[32];
 	static const char *chname[4] = {
 		"Front", "Surround", NULL /*CLFE*/, "Side"
 	};
@@ -1030,26 +1048,15 @@ static int stac92xx_auto_create_multi_out_ctls(struct sigmatel_spec *spec,
 
 		if (i == 2) {
 			/* Center/LFE */
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, "Center Playback Volume",
-					       HDA_COMPOSE_AMP_VAL(nid, 1, 0, HDA_OUTPUT))) < 0)
+			err = create_controls(spec, "Center", nid, 1);
+			if (err < 0)
 				return err;
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, "LFE Playback Volume",
-					       HDA_COMPOSE_AMP_VAL(nid, 2, 0, HDA_OUTPUT))) < 0)
-				return err;
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, "Center Playback Switch",
-					       HDA_COMPOSE_AMP_VAL(nid, 1, 0, HDA_OUTPUT))) < 0)
-				return err;
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, "LFE Playback Switch",
-					       HDA_COMPOSE_AMP_VAL(nid, 2, 0, HDA_OUTPUT))) < 0)
+			err = create_controls(spec, "LFE", nid, 2);
+			if (err < 0)
 				return err;
 		} else {
-			sprintf(name, "%s Playback Volume", chname[i]);
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, name,
-					       HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
-				return err;
-			sprintf(name, "%s Playback Switch", chname[i]);
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, name,
-					       HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
+			err = create_controls(spec, chname[i], nid, 3);
+			if (err < 0)
 				return err;
 		}
 	}
@@ -1065,39 +1072,85 @@ static int stac92xx_auto_create_multi_out_ctls(struct sigmatel_spec *spec,
 	return 0;
 }
 
-/* add playback controls for HP output */
-static int stac92xx_auto_create_hp_ctls(struct hda_codec *codec, struct auto_pin_cfg *cfg)
+static int check_in_dac_nids(struct sigmatel_spec *spec, hda_nid_t nid)
 {
-	struct sigmatel_spec *spec = codec->spec;
-	hda_nid_t pin = cfg->hp_pin;
-	hda_nid_t nid;
-	int i, err;
-	unsigned int wid_caps;
+	int i;
 
-	if (! pin)
-		return 0;
+	for (i = 0; i < spec->multiout.num_dacs; i++) {
+		if (spec->multiout.dac_nids[i] == nid)
+			return 1;
+	}
+	if (spec->multiout.hp_nid == nid)
+		return 1;
+	return 0;
+}
 
-	wid_caps = get_wcaps(codec, pin);
-	if (wid_caps & AC_WCAP_UNSOL_CAP)
-		spec->hp_detect = 1;
+static int add_spec_dacs(struct sigmatel_spec *spec, hda_nid_t nid)
+{
+	if (!spec->multiout.hp_nid)
+		spec->multiout.hp_nid = nid;
+	else if (spec->multiout.num_dacs > 4) {
+		printk(KERN_WARNING "stac92xx: No space for DAC 0x%x\n", nid);
+		return 1;
+	} else {
+		spec->multiout.dac_nids[spec->multiout.num_dacs] = nid;
+		spec->multiout.num_dacs++;
+	}
+	return 0;
+}
 
-	nid = snd_hda_codec_read(codec, pin, 0, AC_VERB_GET_CONNECT_LIST, 0) & 0xff;
-	for (i = 0; i < cfg->line_outs; i++) {
-		if (! spec->multiout.dac_nids[i])
+/* add playback controls for Speaker and HP outputs */
+static int stac92xx_auto_create_hp_ctls(struct hda_codec *codec,
+					struct auto_pin_cfg *cfg)
+{
+	struct sigmatel_spec *spec = codec->spec;
+	hda_nid_t nid;
+	int i, old_num_dacs, err;
+
+	old_num_dacs = spec->multiout.num_dacs;
+	for (i = 0; i < cfg->hp_outs; i++) {
+		unsigned int wid_caps = get_wcaps(codec, cfg->hp_pins[i]);
+		if (wid_caps & AC_WCAP_UNSOL_CAP)
+			spec->hp_detect = 1;
+		nid = snd_hda_codec_read(codec, cfg->hp_pins[i], 0,
+					 AC_VERB_GET_CONNECT_LIST, 0) & 0xff;
+		if (check_in_dac_nids(spec, nid))
+			nid = 0;
+		if (! nid)
 			continue;
-		if (spec->multiout.dac_nids[i] == nid)
-			return 0;
+		add_spec_dacs(spec, nid);
+	}
+	for (i = 0; i < cfg->speaker_outs; i++) {
+		nid = snd_hda_codec_read(codec, cfg->speaker_pins[0], 0,
+					 AC_VERB_GET_CONNECT_LIST, 0) & 0xff;
+		if (check_in_dac_nids(spec, nid))
+			nid = 0;
+		if (check_in_dac_nids(spec, nid))
+			nid = 0;
+		if (! nid)
+			continue;
+		add_spec_dacs(spec, nid);
 	}
 
-	spec->multiout.hp_nid = nid;
-
-	/* control HP volume/switch on the output mixer amp */
-	if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, "Headphone Playback Volume",
-					HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
-		return err;
-	if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, "Headphone Playback Switch",
-					HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
-		return err;
+	for (i = old_num_dacs; i < spec->multiout.num_dacs; i++) {
+		static const char *pfxs[] = {
+			"Speaker", "External Speaker", "Speaker2",
+		};
+		err = create_controls(spec, pfxs[i - old_num_dacs],
+				      spec->multiout.dac_nids[i], 3);
+		if (err < 0)
+			return err;
+	}
+	if (spec->multiout.hp_nid) {
+		const char *pfx;
+		if (old_num_dacs == spec->multiout.num_dacs)
+			pfx = "Master";
+		else
+			pfx = "Headphone";
+		err = create_controls(spec, pfx, spec->multiout.hp_nid, 3);
+		if (err < 0)
+			return err;
+	}
 
 	return 0;
 }
@@ -1160,11 +1213,20 @@ static void stac92xx_auto_init_multi_out(struct hda_codec *codec)
 static void stac92xx_auto_init_hp_out(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
-	hda_nid_t pin;
+	int i;
 
-	pin = spec->autocfg.hp_pin;
-	if (pin) /* connect to front */
-		stac92xx_auto_set_pinctl(codec, pin, AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN);
+	for (i = 0; i < spec->autocfg.hp_outs; i++) {
+		hda_nid_t pin;
+		pin = spec->autocfg.hp_pins[i];
+		if (pin) /* connect to front */
+			stac92xx_auto_set_pinctl(codec, pin, AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN);
+	}
+	for (i = 0; i < spec->autocfg.speaker_outs; i++) {
+		hda_nid_t pin;
+		pin = spec->autocfg.speaker_pins[i];
+		if (pin) /* connect to front */
+			stac92xx_auto_set_pinctl(codec, pin, AC_PINCTL_OUT_EN);
+	}
 }
 
 static int stac92xx_parse_auto_config(struct hda_codec *codec, hda_nid_t dig_out, hda_nid_t dig_in)
@@ -1210,7 +1272,7 @@ static int stac9200_auto_create_hp_ctls(struct hda_codec *codec,
 					struct auto_pin_cfg *cfg)
 {
 	struct sigmatel_spec *spec = codec->spec;
-	hda_nid_t pin = cfg->hp_pin;
+	hda_nid_t pin = cfg->hp_pins[0];
 	unsigned int wid_caps;
 
 	if (! pin)
@@ -1266,16 +1328,7 @@ static int stac9200_auto_create_lfe_ctls(struct hda_codec *codec,
 	}
 
 	if (lfe_pin) {
-		err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL,
-					   "LFE Playback Volume",
-					   HDA_COMPOSE_AMP_VAL(lfe_pin, 1, 0,
-							       HDA_OUTPUT));
-		if (err < 0)
-			return err;
-		err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE,
-					   "LFE Playback Switch",
-					   HDA_COMPOSE_AMP_VAL(lfe_pin, 1, 0,
-							       HDA_OUTPUT));
+		err = create_controls(spec, "LFE", lfe_pin, 1);
 		if (err < 0)
 			return err;
 	}
@@ -1363,9 +1416,11 @@ static int stac92xx_init(struct hda_codec *codec)
 	/* set up pins */
 	if (spec->hp_detect) {
 		/* Enable unsolicited responses on the HP widget */
-		snd_hda_codec_write(codec, cfg->hp_pin, 0,
-				AC_VERB_SET_UNSOLICITED_ENABLE,
-				STAC_UNSOL_ENABLE);
+		for (i = 0; i < cfg->hp_outs; i++)
+			if (get_wcaps(codec, cfg->hp_pins[i]) & AC_WCAP_UNSOL_CAP)
+				snd_hda_codec_write(codec, cfg->hp_pins[i], 0,
+						    AC_VERB_SET_UNSOLICITED_ENABLE,
+						    STAC_UNSOL_ENABLE);
 		/* fake event to set up pins */
 		codec->patch_ops.unsol_event(codec, STAC_HP_EVENT << 26);
 		/* enable the headphones by default.  If/when unsol_event detection works, this will be ignored */
@@ -1447,21 +1502,36 @@ static void stac92xx_unsol_event(struct hda_codec *codec, unsigned int res)
 	if ((res >> 26) != STAC_HP_EVENT)
 		return;
 
-	presence = snd_hda_codec_read(codec, cfg->hp_pin, 0,
-			AC_VERB_GET_PIN_SENSE, 0x00) >> 31;
+	presence = 0;
+	for (i = 0; i < cfg->hp_outs; i++) {
+		int p = snd_hda_codec_read(codec, cfg->hp_pins[i], 0,
+					   AC_VERB_GET_PIN_SENSE, 0x00);
+		if (p & (1 << 31))
+			presence++;
+	}
 
 	if (presence) {
 		/* disable lineouts, enable hp */
 		for (i = 0; i < cfg->line_outs; i++)
 			stac92xx_reset_pinctl(codec, cfg->line_out_pins[i],
 						AC_PINCTL_OUT_EN);
-		stac92xx_set_pinctl(codec, cfg->hp_pin, AC_PINCTL_OUT_EN);
+		for (i = 0; i < cfg->speaker_outs; i++)
+			stac92xx_reset_pinctl(codec, cfg->speaker_pins[i],
+						AC_PINCTL_OUT_EN);
+		for (i = 0; i < cfg->hp_outs; i++)
+			stac92xx_set_pinctl(codec, cfg->hp_pins[i],
+					    AC_PINCTL_OUT_EN);
 	} else {
 		/* enable lineouts, disable hp */
 		for (i = 0; i < cfg->line_outs; i++)
 			stac92xx_set_pinctl(codec, cfg->line_out_pins[i],
 						AC_PINCTL_OUT_EN);
-		stac92xx_reset_pinctl(codec, cfg->hp_pin, AC_PINCTL_OUT_EN);
+		for (i = 0; i < cfg->speaker_outs; i++)
+			stac92xx_set_pinctl(codec, cfg->speaker_pins[i],
+						AC_PINCTL_OUT_EN);
+		for (i = 0; i < cfg->hp_outs; i++)
+			stac92xx_reset_pinctl(codec, cfg->hp_pins[i],
+					      AC_PINCTL_OUT_EN);
 	}
 } 
 
-- 
GitLab


From e6f8f108a19638d7c6535ab393a228ed9d4804a6 Mon Sep 17 00:00:00 2001
From: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Date: Thu, 21 Sep 2006 11:31:58 +0200
Subject: [PATCH 1016/1063] [ALSA] sound core: Use SEEK_{SET,CUR,END} instead
 of hardcoded values

sound core: Use SEEK_{SET,CUR,END} instead of hardcoded values

Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/core/info.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/core/info.c b/sound/core/info.c
index 9663b6be9c3a0..e43662b33f16e 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -175,15 +175,15 @@ static loff_t snd_info_entry_llseek(struct file *file, loff_t offset, int orig)
 	switch (entry->content) {
 	case SNDRV_INFO_CONTENT_TEXT:
 		switch (orig) {
-		case 0:	/* SEEK_SET */
+		case SEEK_SET:
 			file->f_pos = offset;
 			ret = file->f_pos;
 			goto out;
-		case 1:	/* SEEK_CUR */
+		case SEEK_CUR:
 			file->f_pos += offset;
 			ret = file->f_pos;
 			goto out;
-		case 2:	/* SEEK_END */
+		case SEEK_END:
 		default:
 			ret = -EINVAL;
 			goto out;
-- 
GitLab


From dd47a33806bfe93c08b071c4d26a2390cbbc9e65 Mon Sep 17 00:00:00 2001
From: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Date: Thu, 21 Sep 2006 11:32:43 +0200
Subject: [PATCH 1017/1063] [ALSA] opl4: Use SEEK_{SET,CUR,END} instead of
 hardcoded values

opl4: Use SEEK_{SET,CUR,END} instead of hardcoded values

Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/drivers/opl4/opl4_proc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/drivers/opl4/opl4_proc.c b/sound/drivers/opl4/opl4_proc.c
index 11dd811771a4b..1679300b75835 100644
--- a/sound/drivers/opl4/opl4_proc.c
+++ b/sound/drivers/opl4/opl4_proc.c
@@ -105,13 +105,13 @@ static long long snd_opl4_mem_proc_llseek(struct snd_info_entry *entry, void *fi
 					  struct file *file, long long offset, int orig)
 {
 	switch (orig) {
-	case 0: /* SEEK_SET */
+	case SEEK_SET:
 		file->f_pos = offset;
 		break;
-	case 1: /* SEEK_CUR */
+	case SEEK_CUR:
 		file->f_pos += offset;
 		break;
-	case 2: /* SEEK_END, offset is negative */
+	case SEEK_END: /* offset is negative */
 		file->f_pos = entry->size + offset;
 		break;
 	default:
-- 
GitLab


From d158da81ee9a1fa70d980f58b0f143fa873ca9ed Mon Sep 17 00:00:00 2001
From: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Date: Thu, 21 Sep 2006 11:33:14 +0200
Subject: [PATCH 1018/1063] [ALSA] gus: Use SEEK_{SET,CUR,END} instead of
 hardcoded values

gus: Use SEEK_{SET,CUR,END} instead of hardcoded values

Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/isa/gus/gus_mem_proc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/isa/gus/gus_mem_proc.c b/sound/isa/gus/gus_mem_proc.c
index 4080255007d59..80f0a83818b27 100644
--- a/sound/isa/gus/gus_mem_proc.c
+++ b/sound/isa/gus/gus_mem_proc.c
@@ -61,13 +61,13 @@ static long long snd_gf1_mem_proc_llseek(struct snd_info_entry *entry,
 	struct gus_proc_private *priv = entry->private_data;
 
 	switch (orig) {
-	case 0:	/* SEEK_SET */
+	case SEEK_SET:
 		file->f_pos = offset;
 		break;
-	case 1:	/* SEEK_CUR */
+	case SEEK_CUR:
 		file->f_pos += offset;
 		break;
-	case 2: /* SEEK_END, offset is negative */
+	case SEEK_END: /* offset is negative */
 		file->f_pos = priv->size + offset;
 		break;
 	default:
-- 
GitLab


From 7ffffecc7c4df08ad89723ca32d936ff09b5b3ff Mon Sep 17 00:00:00 2001
From: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Date: Thu, 21 Sep 2006 11:33:42 +0200
Subject: [PATCH 1019/1063] [ALSA] mixart: Use SEEK_{SET,CUR,END} instead of
 hardcoded values

mixart: Use SEEK_{SET,CUR,END} instead of hardcoded values

Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/mixart/mixart.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sound/pci/mixart/mixart.c b/sound/pci/mixart/mixart.c
index cc43ecd679061..216aee5f93e77 100644
--- a/sound/pci/mixart/mixart.c
+++ b/sound/pci/mixart/mixart.c
@@ -1109,13 +1109,13 @@ static long long snd_mixart_BA0_llseek(struct snd_info_entry *entry,
 	offset = offset & ~3; /* 4 bytes aligned */
 
 	switch(orig) {
-	case 0:  /* SEEK_SET */
+	case SEEK_SET:
 		file->f_pos = offset;
 		break;
-	case 1:  /* SEEK_CUR */
+	case SEEK_CUR:
 		file->f_pos += offset;
 		break;
-	case 2:  /* SEEK_END, offset is negative */
+	case SEEK_END: /* offset is negative */
 		file->f_pos = MIXART_BA0_SIZE + offset;
 		break;
 	default:
@@ -1135,13 +1135,13 @@ static long long snd_mixart_BA1_llseek(struct snd_info_entry *entry,
 	offset = offset & ~3; /* 4 bytes aligned */
 
 	switch(orig) {
-	case 0:  /* SEEK_SET */
+	case SEEK_SET:
 		file->f_pos = offset;
 		break;
-	case 1:  /* SEEK_CUR */
+	case SEEK_CUR:
 		file->f_pos += offset;
 		break;
-	case 2: /* SEEK_END, offset is negative */
+	case SEEK_END: /* offset is negative */
 		file->f_pos = MIXART_BA1_SIZE + offset;
 		break;
 	default:
-- 
GitLab


From 314634bc81325dcfeb31ed138647d428b1f26cbf Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Sep 2006 11:56:18 +0200
Subject: [PATCH 1020/1063] [ALSA] hda-codec - Fix mic input with STAC92xx
 codecs

Fixed mic input with STAC92xx codecs.  The mic pin was sometimes set to
OUTPUT by the headphone jack detection.
Also, try to assign a secondary mic as front-mic (or vice versa)
in the auto-detection if possible.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_codec.c      | 19 +++++--
 sound/pci/hda/patch_sigmatel.c | 90 ++++++++++++++++++++++------------
 2 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 07360996caaac..9c3d7ac080688 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -2079,12 +2079,21 @@ int snd_hda_parse_pin_def_config(struct hda_codec *codec, struct auto_pin_cfg *c
 			cfg->hp_pins[cfg->hp_outs] = nid;
 			cfg->hp_outs++;
 			break;
-		case AC_JACK_MIC_IN:
-			if (loc == AC_JACK_LOC_FRONT)
-				cfg->input_pins[AUTO_PIN_FRONT_MIC] = nid;
-			else
-				cfg->input_pins[AUTO_PIN_MIC] = nid;
+		case AC_JACK_MIC_IN: {
+			int preferred, alt;
+			if (loc == AC_JACK_LOC_FRONT) {
+				preferred = AUTO_PIN_FRONT_MIC;
+				alt = AUTO_PIN_MIC;
+			} else {
+				preferred = AUTO_PIN_MIC;
+				alt = AUTO_PIN_FRONT_MIC;
+			}
+			if (!cfg->input_pins[preferred])
+				cfg->input_pins[preferred] = nid;
+			else if (!cfg->input_pins[alt])
+				cfg->input_pins[alt] = nid;
 			break;
+		}
 		case AC_JACK_LINE_IN:
 			if (loc == AC_JACK_LOC_FRONT)
 				cfg->input_pins[AUTO_PIN_FRONT_LINE] = nid;
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 7cc064265204c..92f48a7258534 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -36,7 +36,6 @@
 
 #define NUM_CONTROL_ALLOC	32
 #define STAC_HP_EVENT		0x37
-#define STAC_UNSOL_ENABLE 	(AC_USRSP_EN | STAC_HP_EVENT)
 
 #define STAC_REF		0
 #define STAC_D945GTP3		1
@@ -1164,23 +1163,28 @@ static int stac92xx_auto_create_analog_input_ctls(struct hda_codec *codec, const
 	int i, j, k;
 
 	for (i = 0; i < AUTO_PIN_LAST; i++) {
-		int index = -1;
-		if (cfg->input_pins[i]) {
-			imux->items[imux->num_items].label = auto_pin_cfg_labels[i];
-
-			for (j=0; j<spec->num_muxes; j++) {
-				int num_cons = snd_hda_get_connections(codec, spec->mux_nids[j], con_lst, HDA_MAX_NUM_INPUTS);
-				for (k=0; k<num_cons; k++)
-					if (con_lst[k] == cfg->input_pins[i]) {
-						index = k;
-					 	break;
-					}
-				if (index >= 0)
-					break;
-			}
-			imux->items[imux->num_items].index = index;
-			imux->num_items++;
+		int index;
+
+		if (!cfg->input_pins[i])
+			continue;
+		index = -1;
+		for (j = 0; j < spec->num_muxes; j++) {
+			int num_cons;
+			num_cons = snd_hda_get_connections(codec,
+							   spec->mux_nids[j],
+							   con_lst,
+							   HDA_MAX_NUM_INPUTS);
+			for (k = 0; k < num_cons; k++)
+				if (con_lst[k] == cfg->input_pins[i]) {
+					index = k;
+					goto found;
+				}
 		}
+		continue;
+	found:
+		imux->items[imux->num_items].label = auto_pin_cfg_labels[i];
+		imux->items[imux->num_items].index = index;
+		imux->num_items++;
 	}
 
 	if (imux->num_items == 1) {
@@ -1405,6 +1409,15 @@ static void stac922x_gpio_mute(struct hda_codec *codec, int pin, int muted)
 			    AC_VERB_SET_GPIO_DATA, gpiostate);
 }
 
+static void enable_pin_detect(struct hda_codec *codec, hda_nid_t nid,
+			      unsigned int event)
+{
+	if (get_wcaps(codec, nid) & AC_WCAP_UNSOL_CAP)
+		snd_hda_codec_write(codec, nid, 0,
+				    AC_VERB_SET_UNSOLICITED_ENABLE,
+				    (AC_USRSP_EN | event));
+}
+
 static int stac92xx_init(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
@@ -1417,13 +1430,13 @@ static int stac92xx_init(struct hda_codec *codec)
 	if (spec->hp_detect) {
 		/* Enable unsolicited responses on the HP widget */
 		for (i = 0; i < cfg->hp_outs; i++)
-			if (get_wcaps(codec, cfg->hp_pins[i]) & AC_WCAP_UNSOL_CAP)
-				snd_hda_codec_write(codec, cfg->hp_pins[i], 0,
-						    AC_VERB_SET_UNSOLICITED_ENABLE,
-						    STAC_UNSOL_ENABLE);
+			enable_pin_detect(codec, cfg->hp_pins[i],
+					  STAC_HP_EVENT);
 		/* fake event to set up pins */
 		codec->patch_ops.unsol_event(codec, STAC_HP_EVENT << 26);
-		/* enable the headphones by default.  If/when unsol_event detection works, this will be ignored */
+		/* enable the headphones by default.
+		 * If/when unsol_event detection works, this will be ignored
+		 */
 		stac92xx_auto_init_hp_out(codec);
 	} else {
 		stac92xx_auto_init_multi_out(codec);
@@ -1478,6 +1491,8 @@ static void stac92xx_set_pinctl(struct hda_codec *codec, hda_nid_t nid,
 {
 	unsigned int pin_ctl = snd_hda_codec_read(codec, nid,
 			0, AC_VERB_GET_PIN_WIDGET_CONTROL, 0x00);
+	if (flag == AC_PINCTL_OUT_EN && (pin_ctl & AC_PINCTL_IN_EN))
+		return;
 	snd_hda_codec_write(codec, nid, 0,
 			AC_VERB_SET_PIN_WIDGET_CONTROL,
 			pin_ctl | flag);
@@ -1493,21 +1508,27 @@ static void stac92xx_reset_pinctl(struct hda_codec *codec, hda_nid_t nid,
 			pin_ctl & ~flag);
 }
 
-static void stac92xx_unsol_event(struct hda_codec *codec, unsigned int res)
+static int get_pin_presence(struct hda_codec *codec, hda_nid_t nid)
+{
+	if (!nid)
+		return 0;
+	if (snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_PIN_SENSE, 0x00)
+	    & (1 << 31))
+		return 1;
+	return 0;
+}
+
+static void stac92xx_hp_detect(struct hda_codec *codec, unsigned int res)
 {
 	struct sigmatel_spec *spec = codec->spec;
 	struct auto_pin_cfg *cfg = &spec->autocfg;
 	int i, presence;
 
-	if ((res >> 26) != STAC_HP_EVENT)
-		return;
-
 	presence = 0;
 	for (i = 0; i < cfg->hp_outs; i++) {
-		int p = snd_hda_codec_read(codec, cfg->hp_pins[i], 0,
-					   AC_VERB_GET_PIN_SENSE, 0x00);
-		if (p & (1 << 31))
-			presence++;
+		presence = get_pin_presence(codec, cfg->hp_pins[i]);
+		if (presence)
+			break;
 	}
 
 	if (presence) {
@@ -1535,6 +1556,15 @@ static void stac92xx_unsol_event(struct hda_codec *codec, unsigned int res)
 	}
 } 
 
+static void stac92xx_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+	switch (res >> 26) {
+	case STAC_HP_EVENT:
+		stac92xx_hp_detect(codec, res);
+		break;
+	}
+}
+
 #ifdef CONFIG_PM
 static int stac92xx_resume(struct hda_codec *codec)
 {
-- 
GitLab


From 5c79b1f887f8edcd399baa164b66a1c08566c994 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Sep 2006 13:34:13 +0200
Subject: [PATCH 1021/1063] [ALSA] hda-intel - A slight cleanup of timeout
 check in azx_get_response()

A slight cleanup of timeout check in azx_get_response() to check
jiffies for HZ-independent timeout.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/hda_intel.c | 46 +++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 4d2df771112e0..e9d4cb4d07e11 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -520,38 +520,36 @@ static void azx_update_rirb(struct azx *chip)
 static unsigned int azx_rirb_get_response(struct hda_codec *codec)
 {
 	struct azx *chip = codec->bus->private_data;
-	int timeout = 50;
+	unsigned long timeout;
 
-	for (;;) {
+ again:
+	timeout = jiffies + msecs_to_jiffies(1000);
+	do {
 		if (chip->polling_mode) {
 			spin_lock_irq(&chip->reg_lock);
 			azx_update_rirb(chip);
 			spin_unlock_irq(&chip->reg_lock);
 		}
 		if (! chip->rirb.cmds)
-			break;
-		if (! --timeout) {
-			if (! chip->polling_mode) {
-				snd_printk(KERN_WARNING "hda_intel: "
-					   "azx_get_response timeout, "
-					   "switching to polling mode...\n");
-				chip->polling_mode = 1;
-				timeout = 50;
-				continue;
-			}
-			snd_printk(KERN_ERR
-				   "hda_intel: azx_get_response timeout, "
-				   "switching to single_cmd mode...\n");
-			chip->rirb.rp = azx_readb(chip, RIRBWP);
-			chip->rirb.cmds = 0;
-			/* switch to single_cmd mode */
-			chip->single_cmd = 1;
-			azx_free_cmd_io(chip);
-			return -1;
-		}
-		msleep(1);
+			return chip->rirb.res; /* the last value */
+		schedule_timeout_interruptible(1);
+	} while (time_after_eq(timeout, jiffies));
+
+	if (!chip->polling_mode) {
+		snd_printk(KERN_WARNING "hda_intel: azx_get_response timeout, "
+			   "switching to polling mode...\n");
+		chip->polling_mode = 1;
+		goto again;
 	}
-	return chip->rirb.res; /* the last value */
+
+	snd_printk(KERN_ERR "hda_intel: azx_get_response timeout, "
+		   "switching to single_cmd mode...\n");
+	chip->rirb.rp = azx_readb(chip, RIRBWP);
+	chip->rirb.cmds = 0;
+	/* switch to single_cmd mode */
+	chip->single_cmd = 1;
+	azx_free_cmd_io(chip);
+	return -1;
 }
 
 /*
-- 
GitLab


From eb995a8c82dba4a8e027c99ac5001fbc287a115c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Sep 2006 14:28:21 +0200
Subject: [PATCH 1022/1063] [ALSA] hda-codec - Fix headphone auto-toggle on
 sigmatel codec

Fix/optimize the headphone auto-toggle function on sigmatel codecs.
The headphone pins are kept as output.  When headhpones are unplugged,
you cannot hear anyway ;)

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/hda/patch_sigmatel.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 92f48a7258534..731b7b97ee711 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1432,12 +1432,9 @@ static int stac92xx_init(struct hda_codec *codec)
 		for (i = 0; i < cfg->hp_outs; i++)
 			enable_pin_detect(codec, cfg->hp_pins[i],
 					  STAC_HP_EVENT);
+		stac92xx_auto_init_hp_out(codec);
 		/* fake event to set up pins */
 		codec->patch_ops.unsol_event(codec, STAC_HP_EVENT << 26);
-		/* enable the headphones by default.
-		 * If/when unsol_event detection works, this will be ignored
-		 */
-		stac92xx_auto_init_hp_out(codec);
 	} else {
 		stac92xx_auto_init_multi_out(codec);
 		stac92xx_auto_init_hp_out(codec);
@@ -1539,9 +1536,6 @@ static void stac92xx_hp_detect(struct hda_codec *codec, unsigned int res)
 		for (i = 0; i < cfg->speaker_outs; i++)
 			stac92xx_reset_pinctl(codec, cfg->speaker_pins[i],
 						AC_PINCTL_OUT_EN);
-		for (i = 0; i < cfg->hp_outs; i++)
-			stac92xx_set_pinctl(codec, cfg->hp_pins[i],
-					    AC_PINCTL_OUT_EN);
 	} else {
 		/* enable lineouts, disable hp */
 		for (i = 0; i < cfg->line_outs; i++)
@@ -1550,9 +1544,6 @@ static void stac92xx_hp_detect(struct hda_codec *codec, unsigned int res)
 		for (i = 0; i < cfg->speaker_outs; i++)
 			stac92xx_set_pinctl(codec, cfg->speaker_pins[i],
 						AC_PINCTL_OUT_EN);
-		for (i = 0; i < cfg->hp_outs; i++)
-			stac92xx_reset_pinctl(codec, cfg->hp_pins[i],
-					      AC_PINCTL_OUT_EN);
 	}
 } 
 
-- 
GitLab


From 92b9ac78f934616d08c72747607bfb0fa51ee52d Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Fri, 22 Sep 2006 10:57:36 +0200
Subject: [PATCH 1023/1063] [ALSA] usb-audio: increase number of packets per
 URB

To decrease the USB interrupts rate, increase both the default and the
maximum number of packets per URB.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/usb/usbaudio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index 664dd4c21e66e..49248fa7aef47 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -68,7 +68,7 @@ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;	/* ID for this card */
 static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;	/* Enable this card */
 static int vid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 }; /* Vendor ID for this card */
 static int pid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 }; /* Product ID for this card */
-static int nrpacks = 4;		/* max. number of packets per urb */
+static int nrpacks = 8;		/* max. number of packets per urb */
 static int async_unlink = 1;
 static int device_setup[SNDRV_CARDS]; /* device parameter for this card*/
 
@@ -100,7 +100,7 @@ MODULE_PARM_DESC(device_setup, "Specific device setup (if needed).");
  *
  */
 
-#define MAX_PACKS	10
+#define MAX_PACKS	20
 #define MAX_PACKS_HS	(MAX_PACKS * 8)	/* in high speed mode */
 #define MAX_URBS	8
 #define SYNC_URBS	4	/* always four urbs for sync */
-- 
GitLab


From dbf91dd47d90e1d91d5daf37ca30728f4e11c5e3 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Fri, 22 Sep 2006 10:58:40 +0200
Subject: [PATCH 1024/1063] [ALSA] ES1938: remove duplicate field
 initialization

Remove the duplicate and inconsistent initialization of the kcontrol
access field.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/es1938.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/pci/es1938.c b/sound/pci/es1938.c
index 3784088bea84e..3ce5a4e7e31f3 100644
--- a/sound/pci/es1938.c
+++ b/sound/pci/es1938.c
@@ -1359,10 +1359,9 @@ ES1938_DOUBLE("Master Playback Switch", 0, 0x60, 0x62, 6, 6, 1, 1),
 },
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+	.access = (SNDRV_CTL_ELEM_ACCESS_READ |
 		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name = "Hardware Master Playback Switch",
-	.access = SNDRV_CTL_ELEM_ACCESS_READ,
 	.info = snd_es1938_info_hw_switch,
 	.get = snd_es1938_get_hw_switch,
 	.tlv = { .p = db_scale_master },
-- 
GitLab


From fef8a0c03daa1aaf3f83e45da2b14674c073a9f5 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Fri, 22 Sep 2006 11:00:51 +0200
Subject: [PATCH 1025/1063] [ALSA] usb-audio: add mixer control names for the
 Aureon 5.1 MkII

Add a mixer name map for the TerraTec Aureon 5.1 MkII USB.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/usb/usbmixer_maps.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/sound/usb/usbmixer_maps.c b/sound/usb/usbmixer_maps.c
index 37accb68652d0..7c4dcb3f436af 100644
--- a/sound/usb/usbmixer_maps.c
+++ b/sound/usb/usbmixer_maps.c
@@ -234,6 +234,26 @@ static struct usbmix_name_map justlink_map[] = {
 	{ 0 } /* terminator */
 };
 
+/* TerraTec Aureon 5.1 MkII USB */
+static struct usbmix_name_map aureon_51_2_map[] = {
+	/* 1: IT USB */
+	/* 2: IT Mic */
+	/* 3: IT Line */
+	/* 4: IT SPDIF */
+	/* 5: OT SPDIF */
+	/* 6: OT Speaker */
+	/* 7: OT USB */
+	{ 8, "Capture Source" }, /* SU */
+	{ 9, "Master Playback" }, /* FU */
+	{ 10, "Mic Capture" }, /* FU */
+	{ 11, "Line Capture" }, /* FU */
+	{ 12, "IEC958 In Capture" }, /* FU */
+	{ 13, "Mic Playback" }, /* FU */
+	{ 14, "Line Playback" }, /* FU */
+	/* 15: MU */
+	{} /* terminator */
+};
+
 /*
  * Control map entries
  */
@@ -276,6 +296,10 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = {
 		.id = USB_ID(0x0c45, 0x1158),
 		.map = justlink_map,
 	},
+	{
+		.id = USB_ID(0x0ccd, 0x0028),
+		.map = aureon_51_2_map,
+	},
 	{ 0 } /* terminator */
 };
 
-- 
GitLab


From 8b0c4149e82170ebc44b96e9ed96545f8ebd7c81 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 22 Sep 2006 15:27:55 +0200
Subject: [PATCH 1026/1063] [ALSA] Move CONFIG_SND_AC97_POWER_SAVE to
 pci/Kconfig

Moved the entry of CONFIG_SND_AC97_POWER_SAVE from drivers/Kconfig to
more appropriate place, pci/Kconfig.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/drivers/Kconfig | 13 -------------
 sound/pci/Kconfig     | 13 +++++++++++++
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig
index 952c7f170101b..7971285dfd5b9 100644
--- a/sound/drivers/Kconfig
+++ b/sound/drivers/Kconfig
@@ -113,17 +113,4 @@ config SND_MPU401
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-mpu401.
 
-config SND_AC97_POWER_SAVE
-	bool "AC97 Power-Saving Mode"
-	depends on SND_AC97_CODEC && EXPERIMENTAL
-	default n
-	help
-	  Say Y here to enable the aggressive power-saving support of
-	  AC97 codecs.  In this mode, the power-mode is dynamically
-	  controlled at each open/close.
-
-	  The mode is activated by passing power_save=1 option to
-	  snd-ac97-codec driver.  You can toggle it dynamically over
-	  sysfs, too.
-
 endmenu
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index dffb6be768008..8a6b1803c763e 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -744,4 +744,17 @@ config SND_YMFPCI
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-ymfpci.
 
+config SND_AC97_POWER_SAVE
+	bool "AC97 Power-Saving Mode"
+	depends on SND_AC97_CODEC && EXPERIMENTAL
+	default n
+	help
+	  Say Y here to enable the aggressive power-saving support of
+	  AC97 codecs.  In this mode, the power-mode is dynamically
+	  controlled at each open/close.
+
+	  The mode is activated by passing power_save=1 option to
+	  snd-ac97-codec driver.  You can toggle it dynamically over
+	  sysfs, too.
+
 endmenu
-- 
GitLab


From f0063c4489a00ed5395378ef80a7edea4272f20b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 22 Sep 2006 15:30:42 +0200
Subject: [PATCH 1027/1063] [ALSA] intel8x0m - Free irq in suspend

Free the irq handler in suspend and reacquire in resume as well as
intel8x0 audio driver does.  Some devices may change the irq line
dynamically during suspend/resume.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 sound/pci/intel8x0m.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c
index 91850281f89b1..268e2f7241eae 100644
--- a/sound/pci/intel8x0m.c
+++ b/sound/pci/intel8x0m.c
@@ -1045,6 +1045,8 @@ static int intel8x0m_suspend(struct pci_dev *pci, pm_message_t state)
 	for (i = 0; i < chip->pcm_devs; i++)
 		snd_pcm_suspend_all(chip->pcm[i]);
 	snd_ac97_suspend(chip->ac97);
+	if (chip->irq >= 0)
+		free_irq(chip->irq, chip);
 	pci_disable_device(pci);
 	pci_save_state(pci);
 	return 0;
@@ -1058,6 +1060,9 @@ static int intel8x0m_resume(struct pci_dev *pci)
 	pci_restore_state(pci);
 	pci_enable_device(pci);
 	pci_set_master(pci);
+	request_irq(pci->irq, snd_intel8x0_interrupt, IRQF_DISABLED|IRQF_SHARED,
+		    card->shortname, chip);
+	chip->irq = pci->irq;
 	snd_intel8x0_chip_init(chip, 0);
 	snd_ac97_resume(chip->ac97);
 
-- 
GitLab


From 892e4fba1cb5cdc70f3acc65e024e541c0b2d559 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sat, 23 Sep 2006 10:24:36 +0100
Subject: [PATCH 1028/1063] [MTD] Fix dependencies with CONFIG_MTD=m

CMDLINEPARTS shouldn't be selectable, and neither should SSFDC, which
can be a tristate anyway.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 717e90448fc6b..a03e862851db6 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -101,7 +101,7 @@ config MTD_REDBOOT_PARTS_READONLY
 
 config MTD_CMDLINE_PARTS
 	bool "Command line partition table parsing"
-	depends on MTD_PARTITIONS = "y"
+	depends on MTD_PARTITIONS = "y" && MTD = "y"
 	---help---
 	  Allow generic configuration of the MTD partition tables via the kernel
 	  command line. Multiple flash resources are supported for hardware where
@@ -264,7 +264,7 @@ config RFD_FTL
 		http://www.gensw.com/pages/prod/bios/rfd.htm
 
 config SSFDC
-	bool "NAND SSFDC (SmartMedia) read only translation layer"
+	tristate "NAND SSFDC (SmartMedia) read only translation layer"
 	depends on MTD
 	default n
 	help
-- 
GitLab


From 9a05eded5d17a425b9d9ed9dd80f518429dde4e8 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sat, 23 Sep 2006 10:56:24 +0100
Subject: [PATCH 1029/1063] [MTD] SSFDC translation layer minor cleanup

Don't include <linux/config.h>.
Don't say 'MB' where you mean 'MiB'.
Don't allocate 512 bytes on the stack.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/ssfdc.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index ddbf015f4119d..cf60a5e87f19f 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -10,7 +10,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -29,7 +28,7 @@ struct ssfdcr_record {
 	int cis_block;			/* block n. containing CIS/IDI */
 	int erase_size;			/* phys_block_size */
 	unsigned short *logic_block_map; /* all zones (max 8192 phys blocks on
-					    the 128MB) */
+					    the 128MiB) */
 	int map_len;			/* n. phys_blocks on the card */
 };
 
@@ -43,11 +42,11 @@ struct ssfdcr_record {
 #define MAX_LOGIC_BLK_PER_ZONE	1000
 #define MAX_PHYS_BLK_PER_ZONE	1024
 
-#define KB(x)	( (x) * 1024L )
-#define MB(x)	( KB(x) * 1024L )
+#define KiB(x)	( (x) * 1024L )
+#define MiB(x)	( KiB(x) * 1024L )
 
 /** CHS Table
-		1MB	2MB	4MB	8MB	16MB	32MB	64MB	128MB
+		1MiB	2MiB	4MiB	8MiB	16MiB	32MiB	64MiB	128MiB
 NCylinder	125	125	250	250	500	500	500	500
 NHead		4	4	4	4	4	8	8	16
 NSector		4	8	8	16	16	16	32	32
@@ -64,14 +63,14 @@ typedef struct {
 
 /* Must be ordered by size */
 static const chs_entry_t chs_table[] = {
-	{ MB(  1), 125,  4,  4 },
-	{ MB(  2), 125,  4,  8 },
-	{ MB(  4), 250,  4,  8 },
-	{ MB(  8), 250,  4, 16 },
-	{ MB( 16), 500,  4, 16 },
-	{ MB( 32), 500,  8, 16 },
-	{ MB( 64), 500,  8, 32 },
-	{ MB(128), 500, 16, 32 },
+	{ MiB(  1), 125,  4,  4 },
+	{ MiB(  2), 125,  4,  8 },
+	{ MiB(  4), 250,  4,  8 },
+	{ MiB(  8), 250,  4, 16 },
+	{ MiB( 16), 500,  4, 16 },
+	{ MiB( 32), 500,  8, 16 },
+	{ MiB( 64), 500,  8, 32 },
+	{ MiB(128), 500, 16, 32 },
 	{ 0 },
 };
 
@@ -109,14 +108,19 @@ static int get_valid_cis_sector(struct mtd_info *mtd)
 	int ret, k, cis_sector;
 	size_t retlen;
 	loff_t offset;
-	uint8_t sect_buf[SECTOR_SIZE];
+	uint8_t *sect_buf;
+
+	cis_sector = -1;
+
+	sect_buf = kmalloc(SECTOR_SIZE, GFP_KERNEL);
+	if (!sect_buf)
+		goto out;
 
 	/*
 	 * Look for CIS/IDI sector on the first GOOD block (give up after 4 bad
 	 * blocks). If the first good block doesn't contain CIS number the flash
 	 * is not SSFDC formatted
 	 */
-	cis_sector = -1;
 	for (k = 0, offset = 0; k < 4; k++, offset += mtd->erasesize) {
 		if (!mtd->block_isbad(mtd, offset)) {
 			ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen,
@@ -140,6 +144,8 @@ static int get_valid_cis_sector(struct mtd_info *mtd)
 		}
 	}
 
+	kfree(sect_buf);
+ out:
 	return cis_sector;
 }
 
-- 
GitLab


From 08d3ad6a518051bfaefd5d6a8005e20c036996c3 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sat, 23 Sep 2006 16:20:48 +0100
Subject: [PATCH 1030/1063] [MTD] Whitespace cleanup in SSFDC driver.

Says akpm: ' - search for "( " and " )", fix.'

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/ssfdc.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index cf60a5e87f19f..79d3bb659bfe7 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -127,11 +127,11 @@ static int get_valid_cis_sector(struct mtd_info *mtd)
 				sect_buf);
 
 			/* CIS pattern match on the sector buffer */
-			if ( ret < 0 || retlen != SECTOR_SIZE ) {
+			if (ret < 0 || retlen != SECTOR_SIZE) {
 				printk(KERN_WARNING
 					"SSFDC_RO:can't read CIS/IDI sector\n");
-			} else if ( !memcmp(sect_buf, cis_numbers,
-					sizeof(cis_numbers)) ) {
+			} else if (!memcmp(sect_buf, cis_numbers,
+					sizeof(cis_numbers))) {
 				/* Found */
 				cis_sector = (int)(offset >> SECTOR_SHIFT);
 			} else {
@@ -233,7 +233,7 @@ static int get_logical_address(uint8_t *oob_buf)
 		}
 	}
 
-	if ( !ok )
+	if (!ok)
 		block_address = -2;
 
 	DEBUG(MTD_DEBUG_LEVEL3, "SSFDC_RO: get_logical_address() %d\n",
@@ -251,8 +251,8 @@ static int build_logical_block_map(struct ssfdcr_record *ssfdc)
 	struct mtd_info *mtd = ssfdc->mbd.mtd;
 
 	DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: build_block_map() nblks=%d (%luK)\n",
-		ssfdc->map_len, (unsigned long)ssfdc->map_len *
-		ssfdc->erase_size / 1024 );
+	      ssfdc->map_len,
+	      (unsigned long)ssfdc->map_len * ssfdc->erase_size / 1024);
 
 	/* Scan every physical block, skip CIS block */
 	for (phys_block = ssfdc->cis_block + 1; phys_block < ssfdc->map_len;
@@ -329,21 +329,21 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	/* Set geometry */
 	ssfdc->heads = 16;
 	ssfdc->sectors = 32;
-	get_chs( mtd->size, NULL, &ssfdc->heads, &ssfdc->sectors);
+	get_chs(mtd->size, NULL, &ssfdc->heads, &ssfdc->sectors);
 	ssfdc->cylinders = (unsigned short)((mtd->size >> SECTOR_SHIFT) /
 			((long)ssfdc->sectors * (long)ssfdc->heads));
 
 	DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: using C:%d H:%d S:%d == %ld sects\n",
 		ssfdc->cylinders, ssfdc->heads , ssfdc->sectors,
 		(long)ssfdc->cylinders * (long)ssfdc->heads *
-		(long)ssfdc->sectors );
+		(long)ssfdc->sectors);
 
 	ssfdc->mbd.size = (long)ssfdc->heads * (long)ssfdc->cylinders *
 				(long)ssfdc->sectors;
 
 	/* Allocate logical block map */
-	ssfdc->logic_block_map = kmalloc( sizeof(ssfdc->logic_block_map[0]) *
-						ssfdc->map_len, GFP_KERNEL);
+	ssfdc->logic_block_map = kmalloc(sizeof(ssfdc->logic_block_map[0]) *
+					 ssfdc->map_len, GFP_KERNEL);
 	if (!ssfdc->logic_block_map) {
 		printk(KERN_WARNING
 			"SSFDC_RO: out of memory for data structures\n");
@@ -414,7 +414,7 @@ static int ssfdcr_readsect(struct mtd_blktrans_dev *dev,
 			"SSFDC_RO: ssfdcr_readsect() phys_sect_no=%lu\n",
 			sect_no);
 
-		if (read_physical_sector( ssfdc->mbd.mtd, buf, sect_no ) < 0)
+		if (read_physical_sector(ssfdc->mbd.mtd, buf, sect_no) < 0)
 			return -EIO;
 	} else {
 		memset(buf, 0xff, SECTOR_SIZE);
-- 
GitLab


From f2d719c65ad8f10afa7bec11315faa7badf4ecb9 Mon Sep 17 00:00:00 2001
From: Alexis Bruemmer <alexisb@us.ibm.com>
Date: Thu, 7 Sep 2006 14:32:16 -0700
Subject: [PATCH 1031/1063] [SCSI] aic94xx: Removes Reliance on FLASH
 Manufacture IDs

This patch removes the reliance on FLASH Manufacture IDs for validation.

Signed-off-by: Alexis Bruemmer <alexisb@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aic94xx/aic94xx_sds.c | 51 ++----------------------------
 1 file changed, 2 insertions(+), 49 deletions(-)

diff --git a/drivers/scsi/aic94xx/aic94xx_sds.c b/drivers/scsi/aic94xx/aic94xx_sds.c
index eec1e0db0e0f0..83574b5b4e694 100644
--- a/drivers/scsi/aic94xx/aic94xx_sds.c
+++ b/drivers/scsi/aic94xx/aic94xx_sds.c
@@ -376,7 +376,6 @@ int asd_read_ocm(struct asd_ha_struct *asd_ha)
 /* ---------- FLASH stuff ---------- */
 
 #define FLASH_RESET			0xF0
-#define FLASH_MANUF_AMD                 1
 
 #define FLASH_SIZE                      0x200000
 #define FLASH_DIR_COOKIE                "*** ADAPTEC FLASH DIRECTORY *** "
@@ -627,7 +626,7 @@ static int asd_find_flash_dir(struct asd_ha_struct *asd_ha,
 static int asd_flash_getid(struct asd_ha_struct *asd_ha)
 {
 	int err = 0;
-	u32 reg, inc;
+	u32 reg;
 
 	reg = asd_read_reg_dword(asd_ha, EXSICNFGR);
 
@@ -648,53 +647,7 @@ static int asd_flash_getid(struct asd_ha_struct *asd_ha)
 		ASD_DPRINTK("couldn't reset flash(%d)\n", err);
 		return err;
 	}
-	/* Get flash info. This would most likely be AMD Am29LV family flash.
-	 * First try the sequence for word mode.  It is the same as for
-	 * 008B (byte mode only), 160B (word mode) and 800D (word mode).
-	 */
-	reg = asd_ha->hw_prof.flash.bar;
-	inc = asd_ha->hw_prof.flash.wide ? 2 : 1;
-	asd_write_reg_byte(asd_ha, reg + 0x555, 0xAA);
-	asd_write_reg_byte(asd_ha, reg + 0x2AA, 0x55);
-	asd_write_reg_byte(asd_ha, reg + 0x555, 0x90);
-	asd_ha->hw_prof.flash.manuf = asd_read_reg_byte(asd_ha, reg);
-	asd_ha->hw_prof.flash.dev_id= asd_read_reg_byte(asd_ha,reg+inc);
-	asd_ha->hw_prof.flash.sec_prot = asd_read_reg_byte(asd_ha,reg+inc+inc);
-	/* Get out of autoselect mode. */
-	err = asd_reset_flash(asd_ha);
-
-	if (asd_ha->hw_prof.flash.manuf == FLASH_MANUF_AMD) {
-		ASD_DPRINTK("0Found FLASH(%d) manuf:%d, dev_id:0x%x, "
-			    "sec_prot:%d\n",
-			    asd_ha->hw_prof.flash.wide ? 16 : 8,
-			    asd_ha->hw_prof.flash.manuf,
-			    asd_ha->hw_prof.flash.dev_id,
-			    asd_ha->hw_prof.flash.sec_prot);
-		return 0;
-	}
-
-	/* Ok, try the sequence for byte mode of 160B and 800D.
-	 * We may actually never need this.
-	 */
-	asd_write_reg_byte(asd_ha, reg + 0xAAA, 0xAA);
-	asd_write_reg_byte(asd_ha, reg + 0x555, 0x55);
-	asd_write_reg_byte(asd_ha, reg + 0xAAA, 0x90);
-	asd_ha->hw_prof.flash.manuf = asd_read_reg_byte(asd_ha, reg);
-	asd_ha->hw_prof.flash.dev_id = asd_read_reg_byte(asd_ha, reg + 2);
-	asd_ha->hw_prof.flash.sec_prot = asd_read_reg_byte(asd_ha, reg + 4);
-	err = asd_reset_flash(asd_ha);
-
-	if (asd_ha->hw_prof.flash.manuf == FLASH_MANUF_AMD) {
-		ASD_DPRINTK("1Found FLASH(%d) manuf:%d, dev_id:0x%x, "
-			    "sec_prot:%d\n",
-			    asd_ha->hw_prof.flash.wide ? 16 : 8,
-			    asd_ha->hw_prof.flash.manuf,
-			    asd_ha->hw_prof.flash.dev_id,
-			    asd_ha->hw_prof.flash.sec_prot);
-		return 0;
-	}
-
-	return -ENOENT;
+	return 0;
 }
 
 static u16 asd_calc_flash_chksum(u16 *p, int size)
-- 
GitLab


From 10d19ae5e1715c27db7009df6d59179774e7b8a1 Mon Sep 17 00:00:00 2001
From: "malahal@us.ibm.com" <malahal@us.ibm.com>
Date: Thu, 7 Sep 2006 15:12:42 -0700
Subject: [PATCH 1032/1063] [SCSI] aic94xx: Fix for a typo in aic94xx_init()

Signed-off-by: Malahal Naineni <malahal@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aic94xx/aic94xx_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
index 302b54fddf3cf..ee2ccad70487c 100644
--- a/drivers/scsi/aic94xx/aic94xx_init.c
+++ b/drivers/scsi/aic94xx/aic94xx_init.c
@@ -828,7 +828,7 @@ static int __init aic94xx_init(void)
 
 	aic94xx_transport_template =
 		sas_domain_attach_transport(&aic94xx_transport_functions);
-	if (err)
+	if (!aic94xx_transport_template)
 		goto out_destroy_caches;
 
 	err = pci_register_driver(&aic94xx_pci_driver);
-- 
GitLab


From 5fcda4224529c4e550c917668d5e96c1d3e7039b Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Thu, 14 Sep 2006 17:04:58 -0500
Subject: [PATCH 1033/1063] [SCSI] aha152x: remove static host array

Fix this driver not to use a static two element host array instead use
a list.  This should fix panic on multiple eject reinsert of the
pcmcia version of this device.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aha152x.c | 53 +++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 29 deletions(-)

diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index f974869ea3236..fb6a476eb8738 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -253,6 +253,7 @@
 #include <linux/isapnp.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <linux/list.h>
 #include <asm/semaphore.h>
 #include <scsi/scsicam.h>
 
@@ -262,6 +263,8 @@
 #include <scsi/scsi_transport_spi.h>
 #include "aha152x.h"
 
+static LIST_HEAD(aha152x_host_list);
+
 
 /* DEFINES */
 
@@ -423,8 +426,6 @@ MODULE_DEVICE_TABLE(isapnp, id_table);
 
 #endif /* !PCMCIA */
 
-static int registered_count=0;
-static struct Scsi_Host *aha152x_host[2];
 static struct scsi_host_template aha152x_driver_template;
 
 /*
@@ -541,6 +542,7 @@ struct aha152x_hostdata {
 #ifdef __ISAPNP__
 	struct pnp_dev *pnpdev;
 #endif
+	struct list_head host_list;
 };
 
 
@@ -755,20 +757,9 @@ static inline Scsi_Cmnd *remove_SC(Scsi_Cmnd **SC, Scsi_Cmnd *SCp)
 	return ptr;
 }
 
-static inline struct Scsi_Host *lookup_irq(int irqno)
-{
-	int i;
-
-	for(i=0; i<ARRAY_SIZE(aha152x_host); i++)
-		if(aha152x_host[i] && aha152x_host[i]->irq==irqno)
-			return aha152x_host[i];
-
-	return NULL;
-}
-
 static irqreturn_t swintr(int irqno, void *dev_id, struct pt_regs *regs)
 {
-	struct Scsi_Host *shpnt = lookup_irq(irqno);
+	struct Scsi_Host *shpnt = (struct Scsi_Host *)dev_id;
 
 	if (!shpnt) {
         	printk(KERN_ERR "aha152x: catched software interrupt %d for unknown controller.\n", irqno);
@@ -791,10 +782,11 @@ struct Scsi_Host *aha152x_probe_one(struct aha152x_setup *setup)
 		return NULL;
 	}
 
-	/* need to have host registered before triggering any interrupt */
-	aha152x_host[registered_count] = shpnt;
-
 	memset(HOSTDATA(shpnt), 0, sizeof *HOSTDATA(shpnt));
+	INIT_LIST_HEAD(&HOSTDATA(shpnt)->host_list);
+
+	/* need to have host registered before triggering any interrupt */
+	list_add_tail(&HOSTDATA(shpnt)->host_list, &aha152x_host_list);
 
 	shpnt->io_port   = setup->io_port;
 	shpnt->n_io_port = IO_RANGE;
@@ -907,12 +899,10 @@ struct Scsi_Host *aha152x_probe_one(struct aha152x_setup *setup)
 
 	scsi_scan_host(shpnt);
 
-	registered_count++;
-
 	return shpnt;
 
 out_host_put:
-	aha152x_host[registered_count]=NULL;
+	list_del(&HOSTDATA(shpnt)->host_list);
 	scsi_host_put(shpnt);
 
 	return NULL;
@@ -937,6 +927,7 @@ void aha152x_release(struct Scsi_Host *shpnt)
 #endif
 
 	scsi_remove_host(shpnt);
+	list_del(&HOSTDATA(shpnt)->host_list);
 	scsi_host_put(shpnt);
 }
 
@@ -1459,9 +1450,12 @@ static struct work_struct aha152x_tq;
  */
 static void run(void)
 {
-	int i;
-	for (i = 0; i<ARRAY_SIZE(aha152x_host); i++) {
-		is_complete(aha152x_host[i]);
+	struct aha152x_hostdata *hd;
+
+	list_for_each_entry(hd, &aha152x_host_list, host_list) {
+		struct Scsi_Host *shost = container_of((void *)hd, struct Scsi_Host, hostdata);
+
+		is_complete(shost);
 	}
 }
 
@@ -1471,7 +1465,7 @@ static void run(void)
  */
 static irqreturn_t intr(int irqno, void *dev_id, struct pt_regs *regs)
 {
-	struct Scsi_Host *shpnt = lookup_irq(irqno);
+	struct Scsi_Host *shpnt = (struct Scsi_Host *)dev_id;
 	unsigned long flags;
 	unsigned char rev, dmacntrl0;
 
@@ -3953,16 +3947,17 @@ static int __init aha152x_init(void)
 #endif
 	}
 
-	return registered_count>0;
+	return 1;
 }
 
 static void __exit aha152x_exit(void)
 {
-	int i;
+	struct aha152x_hostdata *hd;
+
+	list_for_each_entry(hd, &aha152x_host_list, host_list) {
+		struct Scsi_Host *shost = container_of((void *)hd, struct Scsi_Host, hostdata);
 
-	for(i=0; i<ARRAY_SIZE(setup); i++) {
-		aha152x_release(aha152x_host[i]);
-		aha152x_host[i]=NULL;
+		aha152x_release(shost);
 	}
 }
 
-- 
GitLab


From 2d2f8d59b14bec6c745e219a350ac51d9e00673f Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Fri, 15 Sep 2006 14:43:11 +0200
Subject: [PATCH 1034/1063] [SCSI] megaraid: Make megaraid_ioctl() check
 copy_to_user() return value

Check copy_to_user() return value in drivers/scsi/megaraid.c::megadev_ioctl()
This gets rid of this little warning:
  drivers/scsi/megaraid.c:3661: warning: ignoring return value of 'copy_to_user', declared with attribute warn_unused_result

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Acked-by: "Ju, Seokmann" <Seokmann.Ju@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/megaraid.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index ccb0055ac73ad..b87bef69ba0fc 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -3656,8 +3656,9 @@ megadev_ioctl(struct inode *inode, struct file *filep, unsigned int cmd,
 			 * Send the request sense data also, irrespective of
 			 * whether the user has asked for it or not.
 			 */
-			copy_to_user(upthru->reqsensearea,
-					pthru->reqsensearea, 14);
+			if (copy_to_user(upthru->reqsensearea,
+					pthru->reqsensearea, 14))
+				rval = -EFAULT;
 
 freemem_and_return:
 			if( pthru->dataxferlen ) {
-- 
GitLab


From 4c8bd7eeee4c8f157fb61fb64b57500990b42e0e Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Fri, 22 Sep 2006 22:31:36 -0700
Subject: [PATCH 1035/1063] [KERNEL] Do not truncate to 'int' in ALIGN() macro.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 851aa1bcfc1a2..2b2ae4fdce8bb 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -31,7 +31,7 @@ extern const char linux_banner[];
 #define STACK_MAGIC	0xdeadbeef
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
+#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL))
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
 
-- 
GitLab


From 5f77043f0f7851aa6139fb9a8b297497b540b397 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 24 Sep 2006 00:40:41 +1000
Subject: [PATCH 1036/1063] [CRYPTO] hmac: Fix hmac_init update call

The crypto_hash_update call in hmac_init gave the number 1
instead of the length of the sg list in bytes.  This is a
missed conversion from the digest => hash change.

As tcrypt only tests crypto_hash_digest it didn't catch this.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 crypto/hmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/hmac.c b/crypto/hmac.c
index f403b69460471..d52b234835cf1 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -98,7 +98,7 @@ static int hmac_init(struct hash_desc *pdesc)
 	sg_set_buf(&tmp, ipad, bs);
 
 	return unlikely(crypto_hash_init(&desc)) ?:
-	       crypto_hash_update(&desc, &tmp, 1);
+	       crypto_hash_update(&desc, &tmp, bs);
 }
 
 static int hmac_update(struct hash_desc *pdesc,
-- 
GitLab


From d7b2004528a967f2ba0bf31b1eb0da6a876960e6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 16:44:16 +0100
Subject: [PATCH 1037/1063] [PATCH] missing includes from infiniband merge

indirect chains of includes are arch-specific and can't
be relied upon...  (hell, even attempt to build it for
itanic would trigger vmalloc.h ones; err.h triggers
on e.g. alpha).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/infiniband/core/mad_priv.h           | 1 +
 drivers/infiniband/hw/amso1100/c2_provider.c | 1 +
 drivers/infiniband/hw/amso1100/c2_rnic.c     | 1 +
 drivers/infiniband/hw/ipath/ipath_diag.c     | 1 +
 4 files changed, 4 insertions(+)

diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 1da9adbccaecd..d06b59083f6ea 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -38,6 +38,7 @@
 #define __IB_MAD_PRIV_H__
 
 #include <linux/completion.h>
+#include <linux/err.h>
 #include <linux/pci.h>
 #include <linux/workqueue.h>
 #include <rdma/ib_mad.h>
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 8fddc8cccdf3d..dd6af551108bc 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -49,6 +49,7 @@
 #include <linux/init.h>
 #include <linux/dma-mapping.h>
 #include <linux/if_arp.h>
+#include <linux/vmalloc.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
index 1c3c9d65ecea6..f49a32b7a8f64 100644
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -50,6 +50,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
 #include <linux/inet.h>
+#include <linux/vmalloc.h>
 
 #include <linux/route.h>
 
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 28b6b46c106ae..29958b6e0214a 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -43,6 +43,7 @@
 
 #include <linux/io.h>
 #include <linux/pci.h>
+#include <linux/vmalloc.h>
 #include <asm/uaccess.h>
 
 #include "ipath_kernel.h"
-- 
GitLab


From 13b5aeccc4350e5069c723e8f9becd7208ee02f2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 16:44:58 +0100
Subject: [PATCH 1038/1063] [PATCH] more fallout from get_property returning
 pointer to const

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/powerpc/platforms/powermac/feature.c | 4 ++--
 arch/powerpc/platforms/powermac/smp.c     | 2 +-
 drivers/char/briq_panel.c                 | 2 +-
 drivers/video/riva/fbdev.c                | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 13fcaf5b17960..e49621be66400 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -1058,8 +1058,8 @@ core99_reset_cpu(struct device_node *node, long param, long value)
 	if (np == NULL)
 		return -ENODEV;
 	for (np = np->child; np != NULL; np = np->sibling) {
-		u32 *num = get_property(np, "reg", NULL);
-		u32 *rst = get_property(np, "soft-reset", NULL);
+		const u32 *num = get_property(np, "reg", NULL);
+		const u32 *rst = get_property(np, "soft-reset", NULL);
 		if (num == NULL || rst == NULL)
 			continue;
 		if (param == *num) {
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 653eeb64d1e28..1949b657b0926 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -702,7 +702,7 @@ static void __init smp_core99_setup(int ncpus)
 	/* GPIO based HW sync on ppc32 Core99 */
 	if (pmac_tb_freeze == NULL && !machine_is_compatible("MacRISC4")) {
 		struct device_node *cpu;
-		u32 *tbprop = NULL;
+		const u32 *tbprop = NULL;
 
 		core99_tb_gpio = KL_GPIO_TB_ENABLE;	/* default value */
 		cpu = of_find_node_by_type(NULL, "cpu");
diff --git a/drivers/char/briq_panel.c b/drivers/char/briq_panel.c
index a0e5eac5f33aa..caae795dd3e79 100644
--- a/drivers/char/briq_panel.c
+++ b/drivers/char/briq_panel.c
@@ -202,7 +202,7 @@ static struct miscdevice briq_panel_miscdev = {
 static int __init briq_panel_init(void)
 {
 	struct device_node *root = find_path_device("/");
-	char *machine;
+	const char *machine;
 	int i;
 
 	machine = get_property(root, "model", NULL);
diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index 67d1e1c8813d3..61a4665fb4863 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -1827,7 +1827,7 @@ static int __devinit riva_get_EDID_OF(struct fb_info *info, struct pci_dev *pd)
 	struct riva_par *par = info->par;
 	struct device_node *dp;
 	unsigned char *pedid = NULL;
-	unsigned char *disptype = NULL;
+	const unsigned char *disptype = NULL;
 	static char *propnames[] = {
 		"DFP,EDID", "LCD,EDID", "EDID", "EDID1", "EDID,B", "EDID,A", NULL };
 	int i;
-- 
GitLab


From 2efc80cb8ddc341d81de996920e3b2ad8a12b1f7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 16:45:55 +0100
Subject: [PATCH 1039/1063] [PATCH] #elif that should've been #elif defined

 #elif CONFIG_44x
in ibm4xx.h should've been
 #elif defined(CONFIG_44x)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-ppc/ibm4xx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-ppc/ibm4xx.h b/include/asm-ppc/ibm4xx.h
index cf62b69cb69a1..499c14691c71d 100644
--- a/include/asm-ppc/ibm4xx.h
+++ b/include/asm-ppc/ibm4xx.h
@@ -86,7 +86,7 @@ void ppc4xx_init(unsigned long r3, unsigned long r4, unsigned long r5,
 #define PCI_DRAM_OFFSET	0
 #endif
 
-#elif CONFIG_44x
+#elif defined(CONFIG_44x)
 
 #if defined(CONFIG_BAMBOO)
 #include <platforms/4xx/bamboo.h>
-- 
GitLab


From 4ac493b1d5bfd332f3dee64baaa620961bab6cdc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 18:20:56 +0100
Subject: [PATCH 1040/1063] [PATCH] briq_panel: read() and write() get __user
 pointers, damnit

annotated, fixed a roothole in ->write().  Dereferencing user-supplied pointer
is a Bad Idea(tm)...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/briq_panel.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/char/briq_panel.c b/drivers/char/briq_panel.c
index caae795dd3e79..b8c22255f6ada 100644
--- a/drivers/char/briq_panel.c
+++ b/drivers/char/briq_panel.c
@@ -87,7 +87,7 @@ static int briq_panel_release(struct inode *ino, struct file *filep)
 	return 0;
 }
 
-static ssize_t briq_panel_read(struct file *file, char *buf, size_t count,
+static ssize_t briq_panel_read(struct file *file, char __user *buf, size_t count,
 			 loff_t *ppos)
 {
 	unsigned short c;
@@ -135,7 +135,7 @@ static void scroll_vfd( void )
 	vfd_cursor = 20;
 }
 
-static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len,
+static ssize_t briq_panel_write(struct file *file, const char __user *buf, size_t len,
 			  loff_t *ppos)
 {
 	size_t indx = len;
@@ -150,19 +150,22 @@ static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len,
 		return -EBUSY;
 
 	for (;;) {
+		char c;
 		if (!indx)
 			break;
+		if (get_user(c, buf))
+			return -EFAULT;
 		if (esc) {
-			set_led(*buf);
+			set_led(c);
 			esc = 0;
-		} else if (*buf == 27) {
+		} else if (c == 27) {
 			esc = 1;
-		} else if (*buf == 12) {
+		} else if (c == 12) {
 			/* do a form feed */
 			for (i=0; i<40; i++)
 				vfd[i] = ' ';
 			vfd_cursor = 0;
-		} else if (*buf == 10) {
+		} else if (c == 10) {
 			if (vfd_cursor < 20)
 				vfd_cursor = 20;
 			else if (vfd_cursor < 40)
@@ -175,7 +178,7 @@ static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len,
 			/* just a character */
 			if (vfd_cursor > 39)
 				scroll_vfd();
-			vfd[vfd_cursor++] = *buf;
+			vfd[vfd_cursor++] = c;
 		}
 		indx--;
 		buf++;
-- 
GitLab


From 79da342c31ea839277060c1d2086aaf3b5cd85a4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 23 Sep 2006 18:21:35 +0100
Subject: [PATCH 1041/1063] [PATCH] more get_property() fallout

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/video/riva/fbdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index 61a4665fb4863..4acde4f7dbf89 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -1826,7 +1826,7 @@ static int __devinit riva_get_EDID_OF(struct fb_info *info, struct pci_dev *pd)
 {
 	struct riva_par *par = info->par;
 	struct device_node *dp;
-	unsigned char *pedid = NULL;
+	const unsigned char *pedid = NULL;
 	const unsigned char *disptype = NULL;
 	static char *propnames[] = {
 		"DFP,EDID", "LCD,EDID", "EDID", "EDID1", "EDID,B", "EDID,A", NULL };
-- 
GitLab


From 231839102b54512ced7d3ee7fc9b8bcf5e3b583b Mon Sep 17 00:00:00 2001
From: Douglas Gilbert <dougg@torque.net>
Date: Sat, 16 Sep 2006 20:30:47 -0400
Subject: [PATCH 1042/1063] [SCSI] scsi_debug version 1.80

See http://www.torque.net/sg/sdebug26.html for more
information on the scsi_debug driver.

ChangeLog:
  - add 'vpd_use_hostno' parameter to allow simulated hosts
    to see the same set of targets (and luns). For testing
    multipath software.
  - add 'fake_rw' parameter to ignore the data in READ and
    WRITE commands
  - add support for log subpages (new in SPC-4)
  - yield appropriate block descriptor for MODE SENSE
    commands (only for pdt=0 (i.e. disks))
  - REQUEST SENSE response no longer shows the stopped
    power condition (SAT changed to agree with SPC-3)

Signed-off-by: Douglas Gilbert <dougg@torque.net>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_debug.c | 230 +++++++++++++++++++++++++++++++-------
 1 file changed, 189 insertions(+), 41 deletions(-)

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index a80303c6b3fda..9c0f35820e3e3 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1,5 +1,4 @@
 /*
- *  linux/kernel/scsi_debug.c
  * vvvvvvvvvvvvvvvvvvvvvvv Original vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
  *  Copyright (C) 1992  Eric Youngdale
  *  Simulate a host adapter with 2 disks attached.  Do a lot of checking
@@ -8,7 +7,9 @@
  * ^^^^^^^^^^^^^^^^^^^^^^^ Original ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  *
  *  This version is more generic, simulating a variable number of disk
- *  (or disk like devices) sharing a common amount of RAM
+ *  (or disk like devices) sharing a common amount of RAM. To be more
+ *  realistic, the simulated devices have the transport attributes of
+ *  SAS disks.
  *
  *
  *  For documentation see http://www.torque.net/sg/sdebug26.html
@@ -50,8 +51,8 @@
 #include "scsi_logging.h"
 #include "scsi_debug.h"
 
-#define SCSI_DEBUG_VERSION "1.79"
-static const char * scsi_debug_version_date = "20060604";
+#define SCSI_DEBUG_VERSION "1.80"
+static const char * scsi_debug_version_date = "20060914";
 
 /* Additional Sense Code (ASC) used */
 #define NO_ADDITIONAL_SENSE 0x0
@@ -86,6 +87,8 @@ static const char * scsi_debug_version_date = "20060604";
 #define DEF_D_SENSE   0
 #define DEF_NO_LUN_0   0
 #define DEF_VIRTUAL_GB   0
+#define DEF_FAKE_RW	0
+#define DEF_VPD_USE_HOSTNO 1
 
 /* bit mask values for scsi_debug_opts */
 #define SCSI_DEBUG_OPT_NOISE   1
@@ -127,6 +130,8 @@ static int scsi_debug_ptype = DEF_PTYPE; /* SCSI peripheral type (0==disk) */
 static int scsi_debug_dsense = DEF_D_SENSE;
 static int scsi_debug_no_lun_0 = DEF_NO_LUN_0;
 static int scsi_debug_virtual_gb = DEF_VIRTUAL_GB;
+static int scsi_debug_fake_rw = DEF_FAKE_RW;
+static int scsi_debug_vpd_use_hostno = DEF_VPD_USE_HOSTNO;
 
 static int scsi_debug_cmnd_count = 0;
 
@@ -423,6 +428,8 @@ int scsi_debug_queuecommand(struct scsi_cmnd * SCpnt, done_funct_t done)
 	case READ_6:
 		if ((errsts = check_readiness(SCpnt, 0, devip)))
 			break;
+		if (scsi_debug_fake_rw)
+			break;
 		if ((*cmd) == READ_16) {
 			for (lba = 0, j = 0; j < 8; ++j) {
 				if (j > 0)
@@ -465,6 +472,8 @@ int scsi_debug_queuecommand(struct scsi_cmnd * SCpnt, done_funct_t done)
 	case WRITE_6:
 		if ((errsts = check_readiness(SCpnt, 0, devip)))
 			break;
+		if (scsi_debug_fake_rw)
+			break;
 		if ((*cmd) == WRITE_16) {
 			for (lba = 0, j = 0; j < 8; ++j) {
 				if (j > 0)
@@ -941,6 +950,8 @@ static int resp_inquiry(struct scsi_cmnd * scp, int target,
 		char lu_id_str[6];
 		int host_no = devip->sdbg_host->shost->host_no;
 		
+		if (0 == scsi_debug_vpd_use_hostno)
+			host_no = 0;
 		lu_id_num = devip->wlun ? -1 : (((host_no + 1) * 2000) +
 			    (devip->target * 1000) + devip->lun);
 		target_dev_id = ((host_no + 1) * 2000) +
@@ -1059,19 +1070,6 @@ static int resp_requests(struct scsi_cmnd * scp,
 			arr[12] = THRESHOLD_EXCEEDED;
 			arr[13] = 0xff;		/* TEST set and MRIE==6 */
 		}
-	} else if (devip->stopped) {
-		if (want_dsense) {
-			arr[0] = 0x72;
-			arr[1] = 0x0;		/* NO_SENSE in sense_key */
-			arr[2] = LOW_POWER_COND_ON;
-			arr[3] = 0x0;		/* TEST set and MRIE==6 */
-		} else {
-			arr[0] = 0x70;
-			arr[2] = 0x0;		/* NO_SENSE in sense_key */
-			arr[7] = 0xa;   	/* 18 byte sense buffer */
-			arr[12] = LOW_POWER_COND_ON;
-			arr[13] = 0x0;		/* TEST set and MRIE==6 */
-		}
 	} else {
 		memcpy(arr, sbuff, SDEBUG_SENSE_LEN);
 		if ((cmd[1] & 1) && (! scsi_debug_dsense)) {
@@ -1325,21 +1323,26 @@ static int resp_sas_sha_m_spg(unsigned char * p, int pcontrol)
 static int resp_mode_sense(struct scsi_cmnd * scp, int target,
 			   struct sdebug_dev_info * devip)
 {
-	unsigned char dbd;
-	int pcontrol, pcode, subpcode;
+	unsigned char dbd, llbaa;
+	int pcontrol, pcode, subpcode, bd_len;
 	unsigned char dev_spec;
-	int alloc_len, msense_6, offset, len, errsts, target_dev_id;
+	int k, alloc_len, msense_6, offset, len, errsts, target_dev_id;
 	unsigned char * ap;
 	unsigned char arr[SDEBUG_MAX_MSENSE_SZ];
 	unsigned char *cmd = (unsigned char *)scp->cmnd;
 
 	if ((errsts = check_readiness(scp, 1, devip)))
 		return errsts;
-	dbd = cmd[1] & 0x8;
+	dbd = !!(cmd[1] & 0x8);
 	pcontrol = (cmd[2] & 0xc0) >> 6;
 	pcode = cmd[2] & 0x3f;
 	subpcode = cmd[3];
 	msense_6 = (MODE_SENSE == cmd[0]);
+	llbaa = msense_6 ? 0 : !!(cmd[1] & 0x10);
+	if ((0 == scsi_debug_ptype) && (0 == dbd))
+		bd_len = llbaa ? 16 : 8;
+	else
+		bd_len = 0;
 	alloc_len = msense_6 ? cmd[4] : ((cmd[7] << 8) | cmd[8]);
 	memset(arr, 0, SDEBUG_MAX_MSENSE_SZ);
 	if (0x3 == pcontrol) {  /* Saving values not supported */
@@ -1349,15 +1352,58 @@ static int resp_mode_sense(struct scsi_cmnd * scp, int target,
 	}
 	target_dev_id = ((devip->sdbg_host->shost->host_no + 1) * 2000) +
 			(devip->target * 1000) - 3;
-	dev_spec = DEV_READONLY(target) ? 0x80 : 0x0;
+	/* set DPOFUA bit for disks */
+	if (0 == scsi_debug_ptype)
+		dev_spec = (DEV_READONLY(target) ? 0x80 : 0x0) | 0x10;
+	else
+		dev_spec = 0x0;
 	if (msense_6) {
 		arr[2] = dev_spec;
+		arr[3] = bd_len;
 		offset = 4;
 	} else {
 		arr[3] = dev_spec;
+		if (16 == bd_len)
+			arr[4] = 0x1;	/* set LONGLBA bit */
+		arr[7] = bd_len;	/* assume 255 or less */
 		offset = 8;
 	}
 	ap = arr + offset;
+	if ((bd_len > 0) && (0 == sdebug_capacity)) {
+		if (scsi_debug_virtual_gb > 0) {
+			sdebug_capacity = 2048 * 1024;
+			sdebug_capacity *= scsi_debug_virtual_gb;
+		} else
+			sdebug_capacity = sdebug_store_sectors;
+	}
+	if (8 == bd_len) {
+		if (sdebug_capacity > 0xfffffffe) {
+			ap[0] = 0xff;
+			ap[1] = 0xff;
+			ap[2] = 0xff;
+			ap[3] = 0xff;
+		} else {
+			ap[0] = (sdebug_capacity >> 24) & 0xff;
+			ap[1] = (sdebug_capacity >> 16) & 0xff;
+			ap[2] = (sdebug_capacity >> 8) & 0xff;
+			ap[3] = sdebug_capacity & 0xff;
+		}
+        	ap[6] = (SECT_SIZE_PER(target) >> 8) & 0xff;
+        	ap[7] = SECT_SIZE_PER(target) & 0xff;
+		offset += bd_len;
+		ap = arr + offset;
+	} else if (16 == bd_len) {
+		unsigned long long capac = sdebug_capacity;
+
+        	for (k = 0; k < 8; ++k, capac >>= 8)
+                	ap[7 - k] = capac & 0xff;
+        	ap[12] = (SECT_SIZE_PER(target) >> 24) & 0xff;
+        	ap[13] = (SECT_SIZE_PER(target) >> 16) & 0xff;
+        	ap[14] = (SECT_SIZE_PER(target) >> 8) & 0xff;
+        	ap[15] = SECT_SIZE_PER(target) & 0xff;
+		offset += bd_len;
+		ap = arr + offset;
+	}
 
 	if ((subpcode > 0x0) && (subpcode < 0xff) && (0x19 != pcode)) {
 		/* TODO: Control Extension page */
@@ -1471,7 +1517,7 @@ static int resp_mode_select(struct scsi_cmnd * scp, int mselect6,
                        " IO sent=%d bytes\n", param_len, res);
 	md_len = mselect6 ? (arr[0] + 1) : ((arr[0] << 8) + arr[1] + 2);
 	bd_len = mselect6 ? arr[3] : ((arr[6] << 8) + arr[7]);
-	if ((md_len > 2) || (0 != bd_len)) {
+	if (md_len > 2) {
 		mk_sense_buffer(devip, ILLEGAL_REQUEST,
 				INVALID_FIELD_IN_PARAM_LIST, 0);
 		return check_condition_result;
@@ -1544,7 +1590,7 @@ static int resp_ie_l_pg(unsigned char * arr)
 static int resp_log_sense(struct scsi_cmnd * scp,
                           struct sdebug_dev_info * devip)
 {
-	int ppc, sp, pcontrol, pcode, alloc_len, errsts, len, n;
+	int ppc, sp, pcontrol, pcode, subpcode, alloc_len, errsts, len, n;
 	unsigned char arr[SDEBUG_MAX_LSENSE_SZ];
 	unsigned char *cmd = (unsigned char *)scp->cmnd;
 
@@ -1560,23 +1606,63 @@ static int resp_log_sense(struct scsi_cmnd * scp,
 	}
 	pcontrol = (cmd[2] & 0xc0) >> 6;
 	pcode = cmd[2] & 0x3f;
+	subpcode = cmd[3] & 0xff;
 	alloc_len = (cmd[7] << 8) + cmd[8];
 	arr[0] = pcode;
-	switch (pcode) {
-	case 0x0:	/* Supported log pages log page */
-		n = 4;
-		arr[n++] = 0x0;		/* this page */
-		arr[n++] = 0xd;		/* Temperature */
-		arr[n++] = 0x2f;	/* Informational exceptions */
-		arr[3] = n - 4;
-		break;
-	case 0xd:	/* Temperature log page */
-		arr[3] = resp_temp_l_pg(arr + 4);
-		break;
-	case 0x2f:	/* Informational exceptions log page */
-		arr[3] = resp_ie_l_pg(arr + 4);
-		break;
-	default:
+	if (0 == subpcode) {
+		switch (pcode) {
+		case 0x0:	/* Supported log pages log page */
+			n = 4;
+			arr[n++] = 0x0;		/* this page */
+			arr[n++] = 0xd;		/* Temperature */
+			arr[n++] = 0x2f;	/* Informational exceptions */
+			arr[3] = n - 4;
+			break;
+		case 0xd:	/* Temperature log page */
+			arr[3] = resp_temp_l_pg(arr + 4);
+			break;
+		case 0x2f:	/* Informational exceptions log page */
+			arr[3] = resp_ie_l_pg(arr + 4);
+			break;
+		default:
+			mk_sense_buffer(devip, ILLEGAL_REQUEST,
+					INVALID_FIELD_IN_CDB, 0);
+			return check_condition_result;
+		}
+	} else if (0xff == subpcode) {
+		arr[0] |= 0x40;
+		arr[1] = subpcode;
+		switch (pcode) {
+		case 0x0:	/* Supported log pages and subpages log page */
+			n = 4;
+			arr[n++] = 0x0;
+			arr[n++] = 0x0;		/* 0,0 page */
+			arr[n++] = 0x0;
+			arr[n++] = 0xff;	/* this page */
+			arr[n++] = 0xd;
+			arr[n++] = 0x0;		/* Temperature */
+			arr[n++] = 0x2f;
+			arr[n++] = 0x0;	/* Informational exceptions */
+			arr[3] = n - 4;
+			break;
+		case 0xd:	/* Temperature subpages */
+			n = 4;
+			arr[n++] = 0xd;
+			arr[n++] = 0x0;		/* Temperature */
+			arr[3] = n - 4;
+			break;
+		case 0x2f:	/* Informational exceptions subpages */
+			n = 4;
+			arr[n++] = 0x2f;
+			arr[n++] = 0x0;		/* Informational exceptions */
+			arr[3] = n - 4;
+			break;
+		default:
+			mk_sense_buffer(devip, ILLEGAL_REQUEST,
+					INVALID_FIELD_IN_CDB, 0);
+			return check_condition_result;
+		}
+	} else {
 		mk_sense_buffer(devip, ILLEGAL_REQUEST,
 				INVALID_FIELD_IN_CDB, 0);
 		return check_condition_result;
@@ -2151,11 +2237,18 @@ static int schedule_resp(struct scsi_cmnd * cmnd,
 	}
 }
 
+/* Note: The following macros create attribute files in the
+   /sys/module/scsi_debug/parameters directory. Unfortunately this
+   driver is unaware of a change and cannot trigger auxiliary actions
+   as it can when the corresponding attribute in the
+   /sys/bus/pseudo/drivers/scsi_debug directory is changed.
+ */
 module_param_named(add_host, scsi_debug_add_host, int, S_IRUGO | S_IWUSR);
 module_param_named(delay, scsi_debug_delay, int, S_IRUGO | S_IWUSR);
 module_param_named(dev_size_mb, scsi_debug_dev_size_mb, int, S_IRUGO);
 module_param_named(dsense, scsi_debug_dsense, int, S_IRUGO | S_IWUSR);
 module_param_named(every_nth, scsi_debug_every_nth, int, S_IRUGO | S_IWUSR);
+module_param_named(fake_rw, scsi_debug_fake_rw, int, S_IRUGO | S_IWUSR);
 module_param_named(max_luns, scsi_debug_max_luns, int, S_IRUGO | S_IWUSR);
 module_param_named(no_lun_0, scsi_debug_no_lun_0, int, S_IRUGO | S_IWUSR);
 module_param_named(num_parts, scsi_debug_num_parts, int, S_IRUGO);
@@ -2164,6 +2257,8 @@ module_param_named(opts, scsi_debug_opts, int, S_IRUGO | S_IWUSR);
 module_param_named(ptype, scsi_debug_ptype, int, S_IRUGO | S_IWUSR);
 module_param_named(scsi_level, scsi_debug_scsi_level, int, S_IRUGO);
 module_param_named(virtual_gb, scsi_debug_virtual_gb, int, S_IRUGO | S_IWUSR);
+module_param_named(vpd_use_hostno, scsi_debug_vpd_use_hostno, int,
+		   S_IRUGO | S_IWUSR);
 
 MODULE_AUTHOR("Eric Youngdale + Douglas Gilbert");
 MODULE_DESCRIPTION("SCSI debug adapter driver");
@@ -2175,6 +2270,7 @@ MODULE_PARM_DESC(delay, "# of jiffies to delay response(def=1)");
 MODULE_PARM_DESC(dev_size_mb, "size in MB of ram shared by devs(def=8)");
 MODULE_PARM_DESC(dsense, "use descriptor sense format(def=0 -> fixed)");
 MODULE_PARM_DESC(every_nth, "timeout every nth command(def=100)");
+MODULE_PARM_DESC(fake_rw, "fake reads/writes instead of copying (def=0)");
 MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
 MODULE_PARM_DESC(no_lun_0, "no LU number 0 (def=0 -> have lun 0)");
 MODULE_PARM_DESC(num_parts, "number of partitions(def=0)");
@@ -2183,6 +2279,7 @@ MODULE_PARM_DESC(opts, "1->noise, 2->medium_error, 4->... (def=0)");
 MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])");
 MODULE_PARM_DESC(scsi_level, "SCSI level to simulate(def=5[SPC-3])");
 MODULE_PARM_DESC(virtual_gb, "virtual gigabyte size (def=0 -> use dev_size_mb)");
+MODULE_PARM_DESC(vpd_use_hostno, "0 -> dev ids ignore hostno (def=1 -> unique dev ids)");
 
 
 static char sdebug_info[256];
@@ -2334,6 +2431,24 @@ static ssize_t sdebug_dsense_store(struct device_driver * ddp,
 DRIVER_ATTR(dsense, S_IRUGO | S_IWUSR, sdebug_dsense_show,
 	    sdebug_dsense_store);
 
+static ssize_t sdebug_fake_rw_show(struct device_driver * ddp, char * buf)
+{
+        return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_fake_rw);
+}
+static ssize_t sdebug_fake_rw_store(struct device_driver * ddp,
+				    const char * buf, size_t count)
+{
+        int n;
+
+	if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) {
+		scsi_debug_fake_rw = n;
+		return count;
+	}
+	return -EINVAL;
+}
+DRIVER_ATTR(fake_rw, S_IRUGO | S_IWUSR, sdebug_fake_rw_show,
+	    sdebug_fake_rw_store);
+
 static ssize_t sdebug_no_lun_0_show(struct device_driver * ddp, char * buf)
 {
         return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_no_lun_0);
@@ -2487,6 +2602,31 @@ static ssize_t sdebug_add_host_store(struct device_driver * ddp,
 DRIVER_ATTR(add_host, S_IRUGO | S_IWUSR, sdebug_add_host_show, 
 	    sdebug_add_host_store);
 
+static ssize_t sdebug_vpd_use_hostno_show(struct device_driver * ddp,
+					  char * buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_vpd_use_hostno);
+}
+static ssize_t sdebug_vpd_use_hostno_store(struct device_driver * ddp,
+					   const char * buf, size_t count)
+{
+	int n;
+
+	if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) {
+		scsi_debug_vpd_use_hostno = n;
+		return count;
+	}
+	return -EINVAL;
+}
+DRIVER_ATTR(vpd_use_hostno, S_IRUGO | S_IWUSR, sdebug_vpd_use_hostno_show,
+	    sdebug_vpd_use_hostno_store);
+
+/* Note: The following function creates attribute files in the
+   /sys/bus/pseudo/drivers/scsi_debug directory. The advantage of these
+   files (over those found in the /sys/module/scsi_debug/parameters
+   directory) is that auxiliary actions can be triggered when an attribute
+   is changed. For example see: sdebug_add_host_store() above.
+ */
 static int do_create_driverfs_files(void)
 {
 	int ret;
@@ -2496,23 +2636,31 @@ static int do_create_driverfs_files(void)
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_dev_size_mb);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_dsense);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_every_nth);
+	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_fake_rw);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_max_luns);
-	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_num_tgts);
+	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_no_lun_0);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_num_parts);
+	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_num_tgts);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_ptype);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_opts);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_scsi_level);
+	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_virtual_gb);
+	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_vpd_use_hostno);
 	return ret;
 }
 
 static void do_remove_driverfs_files(void)
 {
+	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_vpd_use_hostno);
+	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_virtual_gb);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_scsi_level);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_opts);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_ptype);
-	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_num_parts);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_num_tgts);
+	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_num_parts);
+	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_no_lun_0);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_max_luns);
+	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_fake_rw);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_every_nth);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_dsense);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_dev_size_mb);
-- 
GitLab


From 3e3c60e3a8c7013d55768aa7256bb5a7f66b0bb4 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Wed, 6 Sep 2006 09:04:40 -0500
Subject: [PATCH 1043/1063] [SCSI] aic7xxx: avoid checking SBLKCTL register for
 certain cards

For cards that don't support LVD, checking the SBLKCTL register to
determine the bus singalling doesn't work.  So, check that the card
supports LVD first (AHC_ULTRA2) before checking the register.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aic7xxx/aic7xxx_osm.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index e5bb4d87b3073..0b3c01ac42596 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -2539,15 +2539,23 @@ static void ahc_linux_set_iu(struct scsi_target *starget, int iu)
 static void ahc_linux_get_signalling(struct Scsi_Host *shost)
 {
 	struct ahc_softc *ahc = *(struct ahc_softc **)shost->hostdata;
-	u8 mode = ahc_inb(ahc, SBLKCTL);
+	u8 mode;
 
-	if (mode & ENAB40)
-		spi_signalling(shost) = SPI_SIGNAL_LVD;
-	else if (mode & ENAB20)
+	if (!(ahc->features & AHC_ULTRA2)) {
+		/* non-LVD chipset, may not have SBLKCTL reg */
 		spi_signalling(shost) = 
 			ahc->features & AHC_HVD ?
 			SPI_SIGNAL_HVD :
 			SPI_SIGNAL_SE;
+		return;
+	}
+
+	mode = ahc_inb(ahc, SBLKCTL);
+
+	if (mode & ENAB40)
+		spi_signalling(shost) = SPI_SIGNAL_LVD;
+	else if (mode & ENAB20)
+		spi_signalling(shost) = SPI_SIGNAL_SE;
 	else
 		spi_signalling(shost) = SPI_SIGNAL_UNKNOWN;
 }
-- 
GitLab


From cf2b5d3fcab77a9390293920ec5b49e67eced200 Mon Sep 17 00:00:00 2001
From: Doug Ledford <dledford@redhat.com>
Date: Sun, 17 Sep 2006 07:38:15 +0200
Subject: [PATCH 1044/1063] [SCSI] aic7xxx: pause sequencer before touching
 SBLKCTL

Some cards need to pause the sequencer before the SBLKCTL register is
touched.  This fixes a PCI related oops seen on powerpc macs with this
card caused by trying to ascertain the bus signalling before beginning
domain validation.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aic7xxx/aic7xxx_osm.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index 0b3c01ac42596..64c8b88a429fa 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -2539,6 +2539,7 @@ static void ahc_linux_set_iu(struct scsi_target *starget, int iu)
 static void ahc_linux_get_signalling(struct Scsi_Host *shost)
 {
 	struct ahc_softc *ahc = *(struct ahc_softc **)shost->hostdata;
+	unsigned long flags;
 	u8 mode;
 
 	if (!(ahc->features & AHC_ULTRA2)) {
@@ -2550,7 +2551,11 @@ static void ahc_linux_get_signalling(struct Scsi_Host *shost)
 		return;
 	}
 
+	ahc_lock(ahc, &flags);
+	ahc_pause(ahc);
 	mode = ahc_inb(ahc, SBLKCTL);
+	ahc_unpause(ahc);
+	ahc_unlock(ahc, &flags);
 
 	if (mode & ENAB40)
 		spi_signalling(shost) = SPI_SIGNAL_LVD;
-- 
GitLab


From d136205182b1ea4897da31e325a296f8831a6796 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 18 Sep 2006 22:28:04 +0200
Subject: [PATCH 1045/1063] [SCSI] zfcp: remove zfcp_ccw_unregister function

Remove unused zfcp_ccw_unregister function (leftover from zfcp's
module_exit era).

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/s390/scsi/zfcp_ccw.c | 13 -------------
 drivers/s390/scsi/zfcp_ext.h |  1 -
 2 files changed, 14 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c
index fdabadeaa9ee1..81680efa17213 100644
--- a/drivers/s390/scsi/zfcp_ccw.c
+++ b/drivers/s390/scsi/zfcp_ccw.c
@@ -274,19 +274,6 @@ zfcp_ccw_register(void)
 	return retval;
 }
 
-/**
- * zfcp_ccw_unregister - ccw unregister function
- *
- * Unregisters the driver from common i/o layer. Function will be called at
- * module unload/system shutdown.
- */
-void __exit
-zfcp_ccw_unregister(void)
-{
-	zfcp_sysfs_driver_remove_files(&zfcp_ccw_driver.driver);
-	ccw_driver_unregister(&zfcp_ccw_driver);
-}
-
 /**
  * zfcp_ccw_shutdown - gets called on reboot/shutdown
  *
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 146d7a2b4c4a0..b45d1bf297a83 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -55,7 +55,6 @@ extern void   zfcp_unit_dequeue(struct zfcp_unit *);
 
 /******************************* S/390 IO ************************************/
 extern int  zfcp_ccw_register(void);
-extern void zfcp_ccw_unregister(void);
 
 extern void zfcp_qdio_zero_sbals(struct qdio_buffer **, int, int);
 extern int  zfcp_qdio_allocate(struct zfcp_adapter *);
-- 
GitLab


From dd52e0eaf891cd85bf2ca057c15ed6bfd76db4e6 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 18 Sep 2006 22:28:49 +0200
Subject: [PATCH 1046/1063] [SCSI] zfcp: create private slab caches to
 guarantee proper data alignment

Create private slab caches in order to guarantee proper alignment of
data structures that get passed to hardware.

Sidenote: with this patch slab cache debugging will finally work on s390
(at least no known problems left).

Furthermore this patch does some minor cleanups:
- store ptr for transport template in struct zfcp_data

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Compile fix ups and
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/s390/scsi/zfcp_aux.c  | 76 ++++++++++++++++++++++++++---------
 drivers/s390/scsi/zfcp_def.h  | 16 +++++---
 drivers/s390/scsi/zfcp_ext.h  |  1 -
 drivers/s390/scsi/zfcp_fsf.c  | 33 ++++++++++-----
 drivers/s390/scsi/zfcp_scsi.c |  5 +--
 5 files changed, 92 insertions(+), 39 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index adc9d8f2c28fa..d2b094d9c34f8 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -299,11 +299,45 @@ zfcp_init_device_configure(void)
 	return;
 }
 
+static int calc_alignment(int size)
+{
+	int align = 1;
+
+	if (!size)
+		return 0;
+
+	while ((size - align) > 0)
+		align <<= 1;
+
+	return align;
+}
+
 static int __init
 zfcp_module_init(void)
 {
+	int retval = -ENOMEM;
+	int size, align;
+
+	size = sizeof(struct zfcp_fsf_req_qtcb);
+	align = calc_alignment(size);
+	zfcp_data.fsf_req_qtcb_cache =
+		kmem_cache_create("zfcp_fsf", size, align, 0, NULL, NULL);
+	if (!zfcp_data.fsf_req_qtcb_cache)
+		goto out;
 
-	int retval = 0;
+	size = sizeof(struct fsf_status_read_buffer);
+	align = calc_alignment(size);
+	zfcp_data.sr_buffer_cache =
+		kmem_cache_create("zfcp_sr", size, align, 0, NULL, NULL);
+	if (!zfcp_data.sr_buffer_cache)
+		goto out_sr_cache;
+
+	size = sizeof(struct zfcp_gid_pn_data);
+	align = calc_alignment(size);
+	zfcp_data.gid_pn_cache =
+		kmem_cache_create("zfcp_gid", size, align, 0, NULL, NULL);
+	if (!zfcp_data.gid_pn_cache)
+		goto out_gid_cache;
 
 	atomic_set(&zfcp_data.loglevel, loglevel);
 
@@ -313,15 +347,16 @@ zfcp_module_init(void)
 	/* initialize adapters to be removed list head */
 	INIT_LIST_HEAD(&zfcp_data.adapter_remove_lh);
 
-	zfcp_transport_template = fc_attach_transport(&zfcp_transport_functions);
-	if (!zfcp_transport_template)
-		return -ENODEV;
+	zfcp_data.scsi_transport_template =
+		fc_attach_transport(&zfcp_transport_functions);
+	if (!zfcp_data.scsi_transport_template)
+		goto out_transport;
 
 	retval = misc_register(&zfcp_cfdc_misc);
 	if (retval != 0) {
 		ZFCP_LOG_INFO("registration of misc device "
 			      "zfcp_cfdc failed\n");
-		goto out;
+		goto out_misc;
 	}
 
 	ZFCP_LOG_TRACE("major/minor for zfcp_cfdc: %d/%d\n",
@@ -333,9 +368,6 @@ zfcp_module_init(void)
 	/* initialise configuration rw lock */
 	rwlock_init(&zfcp_data.config_lock);
 
-	/* save address of data structure managing the driver module */
-	zfcp_data.scsi_host_template.module = THIS_MODULE;
-
 	/* setup dynamic I/O */
 	retval = zfcp_ccw_register();
 	if (retval) {
@@ -350,6 +382,14 @@ zfcp_module_init(void)
 
  out_ccw_register:
 	misc_deregister(&zfcp_cfdc_misc);
+ out_misc:
+	fc_release_transport(zfcp_data.scsi_transport_template);
+ out_transport:
+	kmem_cache_destroy(zfcp_data.gid_pn_cache);
+ out_gid_cache:
+	kmem_cache_destroy(zfcp_data.sr_buffer_cache);
+ out_sr_cache:
+	kmem_cache_destroy(zfcp_data.fsf_req_qtcb_cache);
  out:
 	return retval;
 }
@@ -935,20 +975,20 @@ static int
 zfcp_allocate_low_mem_buffers(struct zfcp_adapter *adapter)
 {
 	adapter->pool.fsf_req_erp =
-		mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_ERP_NR,
-				sizeof(struct zfcp_fsf_req_pool_element));
+		mempool_create_slab_pool(ZFCP_POOL_FSF_REQ_ERP_NR,
+					 zfcp_data.fsf_req_qtcb_cache);
 	if (!adapter->pool.fsf_req_erp)
 		return -ENOMEM;
 
 	adapter->pool.fsf_req_scsi =
-		mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_SCSI_NR,
-				sizeof(struct zfcp_fsf_req_pool_element));
+		mempool_create_slab_pool(ZFCP_POOL_FSF_REQ_SCSI_NR,
+					 zfcp_data.fsf_req_qtcb_cache);
 	if (!adapter->pool.fsf_req_scsi)
 		return -ENOMEM;
 
 	adapter->pool.fsf_req_abort =
-		mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_ABORT_NR,
-				sizeof(struct zfcp_fsf_req_pool_element));
+		mempool_create_slab_pool(ZFCP_POOL_FSF_REQ_ABORT_NR,
+					 zfcp_data.fsf_req_qtcb_cache);
 	if (!adapter->pool.fsf_req_abort)
 		return -ENOMEM;
 
@@ -959,14 +999,14 @@ zfcp_allocate_low_mem_buffers(struct zfcp_adapter *adapter)
 		return -ENOMEM;
 
 	adapter->pool.data_status_read =
-		mempool_create_kmalloc_pool(ZFCP_POOL_STATUS_READ_NR,
-					sizeof(struct fsf_status_read_buffer));
+		mempool_create_slab_pool(ZFCP_POOL_STATUS_READ_NR,
+					 zfcp_data.sr_buffer_cache);
 	if (!adapter->pool.data_status_read)
 		return -ENOMEM;
 
 	adapter->pool.data_gid_pn =
-		mempool_create_kmalloc_pool(ZFCP_POOL_DATA_GID_PN_NR,
-					    sizeof(struct zfcp_gid_pn_data));
+		mempool_create_slab_pool(ZFCP_POOL_DATA_GID_PN_NR,
+					 zfcp_data.gid_pn_cache);
 	if (!adapter->pool.data_gid_pn)
 		return -ENOMEM;
 
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 7c84b3d4bd94f..ef1cd49184e8b 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -19,7 +19,6 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
  */ 
 
-
 #ifndef ZFCP_DEF_H
 #define ZFCP_DEF_H
 
@@ -32,6 +31,10 @@
 #include <linux/blkdev.h>
 #include <linux/delay.h>
 #include <linux/timer.h>
+#include <linux/slab.h>
+#include <linux/mempool.h>
+#include <linux/syscalls.h>
+#include <linux/ioctl.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsi_cmnd.h>
@@ -39,14 +42,11 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_fc.h>
-#include "zfcp_fsf.h"
 #include <asm/ccwdev.h>
 #include <asm/qdio.h>
 #include <asm/debug.h>
 #include <asm/ebcdic.h>
-#include <linux/mempool.h>
-#include <linux/syscalls.h>
-#include <linux/ioctl.h>
+#include "zfcp_fsf.h"
 
 
 /********************* GENERAL DEFINES *********************************/
@@ -1016,6 +1016,7 @@ typedef void zfcp_fsf_req_handler_t(struct zfcp_fsf_req*);
 /* driver data */
 struct zfcp_data {
 	struct scsi_host_template scsi_host_template;
+	struct scsi_transport_template *scsi_transport_template;
         atomic_t                status;             /* Module status flags */
 	struct list_head	adapter_list_head;  /* head of adapter list */
 	struct list_head	adapter_remove_lh;  /* head of adapters to be
@@ -1031,6 +1032,9 @@ struct zfcp_data {
 	wwn_t                   init_wwpn;
 	fcp_lun_t               init_fcp_lun;
 	char 			*driver_version;
+	kmem_cache_t		*fsf_req_qtcb_cache;
+	kmem_cache_t		*sr_buffer_cache;
+	kmem_cache_t		*gid_pn_cache;
 };
 
 /**
@@ -1051,7 +1055,7 @@ struct zfcp_sg_list {
 #define ZFCP_POOL_DATA_GID_PN_NR	1
 
 /* struct used by memory pools for fsf_requests */
-struct zfcp_fsf_req_pool_element {
+struct zfcp_fsf_req_qtcb {
 	struct zfcp_fsf_req fsf_req;
 	struct fsf_qtcb qtcb;
 };
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index b45d1bf297a83..4f4ef0c4ca7b2 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -130,7 +130,6 @@ extern int zfcp_scsi_command_async(struct zfcp_adapter *,struct zfcp_unit *,
 				   struct scsi_cmnd *, struct timer_list *);
 extern int zfcp_scsi_command_sync(struct zfcp_unit *, struct scsi_cmnd *,
 				  struct timer_list *);
-extern struct scsi_transport_template *zfcp_transport_template;
 extern struct fc_function_template zfcp_transport_functions;
 
 /******************************** ERP ****************************************/
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index ff2eacf5ec8c8..4913ffbb2fc8a 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -100,14 +100,19 @@ zfcp_fsf_req_alloc(mempool_t *pool, int req_flags)
 	if (req_flags & ZFCP_REQ_NO_QTCB)
 		size = sizeof(struct zfcp_fsf_req);
 	else
-		size = sizeof(struct zfcp_fsf_req_pool_element);
+		size = sizeof(struct zfcp_fsf_req_qtcb);
 
-	if (likely(pool != NULL))
+	if (likely(pool))
 		ptr = mempool_alloc(pool, GFP_ATOMIC);
-	else
-		ptr = kmalloc(size, GFP_ATOMIC);
+	else {
+		if (req_flags & ZFCP_REQ_NO_QTCB)
+			ptr = kmalloc(size, GFP_ATOMIC);
+		else
+			ptr = kmem_cache_alloc(zfcp_data.fsf_req_qtcb_cache,
+					       SLAB_ATOMIC);
+	}
 
-	if (unlikely(NULL == ptr))
+	if (unlikely(!ptr))
 		goto out;
 
 	memset(ptr, 0, size);
@@ -115,9 +120,8 @@ zfcp_fsf_req_alloc(mempool_t *pool, int req_flags)
 	if (req_flags & ZFCP_REQ_NO_QTCB) {
 		fsf_req = (struct zfcp_fsf_req *) ptr;
 	} else {
-		fsf_req = &((struct zfcp_fsf_req_pool_element *) ptr)->fsf_req;
-		fsf_req->qtcb =
-			&((struct zfcp_fsf_req_pool_element *) ptr)->qtcb;
+		fsf_req = &((struct zfcp_fsf_req_qtcb *) ptr)->fsf_req;
+		fsf_req->qtcb =	&((struct zfcp_fsf_req_qtcb *) ptr)->qtcb;
 	}
 
 	fsf_req->pool = pool;
@@ -139,10 +143,17 @@ zfcp_fsf_req_alloc(mempool_t *pool, int req_flags)
 void
 zfcp_fsf_req_free(struct zfcp_fsf_req *fsf_req)
 {
-	if (likely(fsf_req->pool != NULL))
+	if (likely(fsf_req->pool)) {
 		mempool_free(fsf_req, fsf_req->pool);
-	else
-		kfree(fsf_req);
+		return;
+	}
+
+	if (fsf_req->qtcb) {
+		kmem_cache_free(zfcp_data.fsf_req_qtcb_cache, fsf_req);
+		return;
+	}
+
+	kfree(fsf_req);
 }
 
 /**
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 1bb55086db9f4..4857cccb1d5bf 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -39,11 +39,10 @@ static struct zfcp_unit *zfcp_unit_lookup(struct zfcp_adapter *, int,
 
 static struct device_attribute *zfcp_sysfs_sdev_attrs[];
 
-struct scsi_transport_template *zfcp_transport_template;
-
 struct zfcp_data zfcp_data = {
 	.scsi_host_template = {
 		.name			= ZFCP_NAME,
+		.module			= THIS_MODULE,
 		.proc_name		= "zfcp",
 		.slave_alloc		= zfcp_scsi_slave_alloc,
 		.slave_configure	= zfcp_scsi_slave_configure,
@@ -607,7 +606,7 @@ zfcp_adapter_scsi_register(struct zfcp_adapter *adapter)
 	adapter->scsi_host->max_channel = 0;
 	adapter->scsi_host->unique_id = unique_id++;	/* FIXME */
 	adapter->scsi_host->max_cmd_len = ZFCP_MAX_SCSI_CMND_LENGTH;
-	adapter->scsi_host->transportt = zfcp_transport_template;
+	adapter->scsi_host->transportt = zfcp_data.scsi_transport_template;
 
 	/*
 	 * save a pointer to our own adapter data structure within
-- 
GitLab


From 4eff4a36516d72e4f6ede901141214a7e05607e7 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <aherrman@de.ibm.com>
Date: Mon, 18 Sep 2006 22:29:20 +0200
Subject: [PATCH 1047/1063] [SCSI] zfcp: fix: use correct req_id in
 eh_abort_handler

zfcp's eh_abort_handler used the wrong request ID to
identify the request to be aborted. The bug was introduced
with commit fea9d6c7bcd8ff1d60ff74f27ba483b3820b18a3
for improved management of request IDs. The bug is
fixed with this patch.

Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/s390/scsi/zfcp_aux.c  |  4 +++
 drivers/s390/scsi/zfcp_dbf.c  | 13 ++++----
 drivers/s390/scsi/zfcp_ext.h  |  2 +-
 drivers/s390/scsi/zfcp_fsf.c  |  9 +++--
 drivers/s390/scsi/zfcp_scsi.c | 63 +++++++++++++++--------------------
 5 files changed, 42 insertions(+), 49 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index d2b094d9c34f8..504c9219961c6 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -189,6 +189,10 @@ struct zfcp_fsf_req *zfcp_reqlist_ismember(struct zfcp_adapter *adapter,
 	struct zfcp_fsf_req *request, *tmp;
 	unsigned int i;
 
+	/* 0 is reserved as an invalid req_id */
+	if (req_id == 0)
+		return NULL;
+
 	i = req_id % REQUEST_LIST_SIZE;
 
 	list_for_each_entry_safe(request, tmp, &adapter->req_list[i], list)
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index c033145d0f19f..0aa3b1ac76af4 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -707,7 +707,7 @@ _zfcp_scsi_dbf_event_common(const char *tag, const char *tag2, int level,
 			    struct zfcp_adapter *adapter,
 			    struct scsi_cmnd *scsi_cmnd,
 			    struct zfcp_fsf_req *fsf_req,
-			    struct zfcp_fsf_req *old_fsf_req)
+			    unsigned long old_req_id)
 {
 	struct zfcp_scsi_dbf_record *rec = &adapter->scsi_dbf_buf;
 	struct zfcp_dbf_dump *dump = (struct zfcp_dbf_dump *)rec;
@@ -768,8 +768,7 @@ _zfcp_scsi_dbf_event_common(const char *tag, const char *tag2, int level,
 				rec->fsf_seqno = fsf_req->seq_no;
 				rec->fsf_issued = fsf_req->issued;
 			}
-			rec->type.old_fsf_reqid =
-				    (unsigned long) old_fsf_req;
+			rec->type.old_fsf_reqid = old_req_id;
 		} else {
 			strncpy(dump->tag, "dump", ZFCP_DBF_TAG_SIZE);
 			dump->total_size = buflen;
@@ -794,17 +793,17 @@ zfcp_scsi_dbf_event_result(const char *tag, int level,
 			   struct zfcp_fsf_req *fsf_req)
 {
 	_zfcp_scsi_dbf_event_common("rslt", tag, level,
-			adapter, scsi_cmnd, fsf_req, NULL);
+			adapter, scsi_cmnd, fsf_req, 0);
 }
 
 inline void
 zfcp_scsi_dbf_event_abort(const char *tag, struct zfcp_adapter *adapter,
 			  struct scsi_cmnd *scsi_cmnd,
 			  struct zfcp_fsf_req *new_fsf_req,
-			  struct zfcp_fsf_req *old_fsf_req)
+			  unsigned long old_req_id)
 {
 	_zfcp_scsi_dbf_event_common("abrt", tag, 1,
-			adapter, scsi_cmnd, new_fsf_req, old_fsf_req);
+			adapter, scsi_cmnd, new_fsf_req, old_req_id);
 }
 
 inline void
@@ -814,7 +813,7 @@ zfcp_scsi_dbf_event_devreset(const char *tag, u8 flag, struct zfcp_unit *unit,
 	struct zfcp_adapter *adapter = unit->port->adapter;
 
 	_zfcp_scsi_dbf_event_common(flag == FCP_TARGET_RESET ? "trst" : "lrst",
-			tag, 1, adapter, scsi_cmnd, NULL, NULL);
+			tag, 1, adapter, scsi_cmnd, NULL, 0);
 }
 
 static int
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 4f4ef0c4ca7b2..710ebbf899295 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -185,7 +185,7 @@ extern void zfcp_scsi_dbf_event_result(const char *, int, struct zfcp_adapter *,
 				       struct zfcp_fsf_req *);
 extern void zfcp_scsi_dbf_event_abort(const char *, struct zfcp_adapter *,
 				      struct scsi_cmnd *, struct zfcp_fsf_req *,
-				      struct zfcp_fsf_req *);
+				      unsigned long);
 extern void zfcp_scsi_dbf_event_devreset(const char *, u8, struct zfcp_unit *,
 					 struct scsi_cmnd *);
 extern void zfcp_reqlist_add(struct zfcp_adapter *, struct zfcp_fsf_req *);
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 4913ffbb2fc8a..a66b5193b70e1 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -3527,7 +3527,7 @@ zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter,
 	fsf_req->unit = unit;
 
 	/* associate FSF request with SCSI request (for look up on abort) */
-	scsi_cmnd->host_scribble = (char *) fsf_req;
+	scsi_cmnd->host_scribble = (unsigned char *) fsf_req->req_id;
 
 	/* associate SCSI command with FSF request */
 	fsf_req->data = (unsigned long) scsi_cmnd;
@@ -4667,7 +4667,6 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags,
 {
 	volatile struct qdio_buffer_element *sbale;
 	struct zfcp_fsf_req *fsf_req = NULL;
-	unsigned long flags;
 	int ret = 0;
 	struct zfcp_qdio_queue *req_queue = &adapter->request_queue;
 
@@ -4684,10 +4683,10 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags,
 	fsf_req->fsf_command = fsf_cmd;
 	INIT_LIST_HEAD(&fsf_req->list);
 	
-	/* unique request id */
-	spin_lock_irqsave(&adapter->req_list_lock, flags);
+	/* this is serialized (we are holding req_queue-lock of adapter */
+	if (adapter->req_no == 0)
+		adapter->req_no++;
 	fsf_req->req_id = adapter->req_no++;
-	spin_unlock_irqrestore(&adapter->req_list_lock, flags);
 
         zfcp_fsf_req_qtcb_init(fsf_req);
 
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 4857cccb1d5bf..043ed7c0a7ed5 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -378,16 +378,15 @@ zfcp_unit_lookup(struct zfcp_adapter *adapter, int channel, unsigned int id,
  * will handle late commands.  (Usually, the normal completion of late
  * commands is ignored with respect to the running abort operation.)
  */
-int
-zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
+int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
 {
  	struct Scsi_Host *scsi_host;
  	struct zfcp_adapter *adapter;
 	struct zfcp_unit *unit;
-	int retval = SUCCESS;
-	struct zfcp_fsf_req *new_fsf_req = NULL;
-	struct zfcp_fsf_req *old_fsf_req;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long flags;
+	unsigned long old_req_id;
+	int retval = SUCCESS;
 
 	scsi_host = scpnt->device->host;
 	adapter = (struct zfcp_adapter *) scsi_host->hostdata[0];
@@ -399,55 +398,47 @@ zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
 	/* avoid race condition between late normal completion and abort */
 	write_lock_irqsave(&adapter->abort_lock, flags);
 
-	/*
-	 * Check whether command has just completed and can not be aborted.
-	 * Even if the command has just been completed late, we can access
-	 * scpnt since the SCSI stack does not release it at least until
-	 * this routine returns. (scpnt is parameter passed to this routine
-	 * and must not disappear during abort even on late completion.)
-	 */
-	old_fsf_req = (struct zfcp_fsf_req *) scpnt->host_scribble;
-	if (!old_fsf_req) {
+	/* Check whether corresponding fsf_req is still pending */
+	spin_lock(&adapter->req_list_lock);
+	fsf_req = zfcp_reqlist_ismember(adapter, (unsigned long)
+					scpnt->host_scribble);
+	spin_unlock(&adapter->req_list_lock);
+	if (!fsf_req) {
 		write_unlock_irqrestore(&adapter->abort_lock, flags);
-		zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, NULL);
+		zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, 0);
 		retval = SUCCESS;
 		goto out;
 	}
-	old_fsf_req->data = 0;
-	old_fsf_req->status |= ZFCP_STATUS_FSFREQ_ABORTING;
+	fsf_req->data = 0;
+	fsf_req->status |= ZFCP_STATUS_FSFREQ_ABORTING;
+	old_req_id = fsf_req->req_id;
 
-	/* don't access old_fsf_req after releasing the abort_lock */
+	/* don't access old fsf_req after releasing the abort_lock */
 	write_unlock_irqrestore(&adapter->abort_lock, flags);
-	/* call FSF routine which does the abort */
-	new_fsf_req = zfcp_fsf_abort_fcp_command((unsigned long) old_fsf_req,
-						 adapter, unit, 0);
-	if (!new_fsf_req) {
+
+	fsf_req = zfcp_fsf_abort_fcp_command(old_req_id, adapter, unit, 0);
+	if (!fsf_req) {
 		ZFCP_LOG_INFO("error: initiation of Abort FCP Cmnd failed\n");
 		zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL,
-					  old_fsf_req);
+					  old_req_id);
 		retval = FAILED;
 		goto out;
 	}
 
-	/* wait for completion of abort */
-	__wait_event(new_fsf_req->completion_wq,
-		     new_fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
+	__wait_event(fsf_req->completion_wq,
+		     fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
 
-	/* status should be valid since signals were not permitted */
-	if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) {
-		zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, new_fsf_req,
-					  NULL);
+	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) {
+		zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, fsf_req, 0);
 		retval = SUCCESS;
-	} else if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) {
-		zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, new_fsf_req,
-					  NULL);
+	} else if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) {
+		zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, fsf_req, 0);
 		retval = SUCCESS;
 	} else {
-		zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, new_fsf_req,
-					  NULL);
+		zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, fsf_req, 0);
 		retval = FAILED;
 	}
-	zfcp_fsf_req_free(new_fsf_req);
+	zfcp_fsf_req_free(fsf_req);
  out:
 	return retval;
 }
-- 
GitLab


From 2abbe866c8eb0296e3f5343bcf73e5371522a738 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <aherrman@de.ibm.com>
Date: Mon, 18 Sep 2006 22:29:56 +0200
Subject: [PATCH 1048/1063] [SCSI] zfcp: introduce struct timer_list in struct
 zfcp_fsf_req

This instance will be used whenever a timer is needed for
a request by zfcp.

Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/s390/scsi/zfcp_aux.c  |   4 -
 drivers/s390/scsi/zfcp_def.h  |   8 +-
 drivers/s390/scsi/zfcp_erp.c  | 228 ++++++++++--------------------
 drivers/s390/scsi/zfcp_ext.h  |  13 +-
 drivers/s390/scsi/zfcp_fsf.c  | 257 ++++++++++++++--------------------
 drivers/s390/scsi/zfcp_scsi.c |  25 +---
 6 files changed, 191 insertions(+), 344 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 504c9219961c6..5d39b2df0cc43 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -1135,9 +1135,6 @@ zfcp_adapter_enqueue(struct ccw_device *ccw_device)
 	/* initialize lock of associated request queue */
 	rwlock_init(&adapter->request_queue.queue_lock);
 
-	/* intitialise SCSI ER timer */
-	init_timer(&adapter->scsi_er_timer);
-
 	/* mark adapter unusable as long as sysfs registration is not complete */
 	atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE, &adapter->status);
 
@@ -1653,7 +1650,6 @@ zfcp_ns_gid_pn_request(struct zfcp_erp_action *erp_action)
 	gid_pn->ct.handler = zfcp_ns_gid_pn_handler;
 	gid_pn->ct.handler_data = (unsigned long) gid_pn;
         gid_pn->ct.timeout = ZFCP_NS_GID_PN_TIMEOUT;
-        gid_pn->ct.timer = &erp_action->timer;
 	gid_pn->port = erp_action->port;
 
 	ret = zfcp_fsf_send_ct(&gid_pn->ct, adapter->pool.fsf_req_erp,
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index ef1cd49184e8b..8f882690994d9 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -137,7 +137,7 @@ zfcp_address_to_sg(void *address, struct scatterlist *list)
 #define ZFCP_EXCHANGE_CONFIG_DATA_RETRIES	7
 
 /* timeout value for "default timer" for fsf requests */
-#define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ);
+#define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ)
 
 /*************** FIBRE CHANNEL PROTOCOL SPECIFIC DEFINES ********************/
 
@@ -779,7 +779,6 @@ typedef void (*zfcp_send_ct_handler_t)(unsigned long);
  * @handler_data: data passed to handler function
  * @pool: pointer to memory pool for ct request structure
  * @timeout: FSF timeout for this request
- * @timer: timer (e.g. for request initiated by erp)
  * @completion: completion for synchronization purposes
  * @status: used to pass error status to calling function
  */
@@ -793,7 +792,6 @@ struct zfcp_send_ct {
 	unsigned long handler_data;
 	mempool_t *pool;
 	int timeout;
-	struct timer_list *timer;
 	struct completion *completion;
 	int status;
 };
@@ -821,7 +819,6 @@ typedef void (*zfcp_send_els_handler_t)(unsigned long);
  * @resp_count: number of elements in response scatter-gather list
  * @handler: handler function (called for response to the request)
  * @handler_data: data passed to handler function
- * @timer: timer (e.g. for request initiated by erp)
  * @completion: completion for synchronization purposes
  * @ls_code: hex code of ELS command
  * @status: used to pass error status to calling function
@@ -836,7 +833,6 @@ struct zfcp_send_els {
 	unsigned int resp_count;
 	zfcp_send_els_handler_t handler;
 	unsigned long handler_data;
-	struct timer_list *timer;
 	struct completion *completion;
 	int ls_code;
 	int status;
@@ -886,7 +882,6 @@ struct zfcp_adapter {
 	struct list_head        port_remove_lh;    /* head of ports to be
 						      removed */
 	u32			ports;	           /* number of remote ports */
-	struct timer_list	scsi_er_timer;     /* SCSI err recovery watch */
 	atomic_t		reqs_active;	   /* # active FSF reqs */
 	unsigned long		req_no;		   /* unique FSF req number */
 	struct list_head	*req_list;	   /* list of pending reqs */
@@ -1003,6 +998,7 @@ struct zfcp_fsf_req {
 	struct fsf_qtcb	       *qtcb;	       /* address of associated QTCB */
 	u32		       seq_no;         /* Sequence number of request */
         unsigned long          data;           /* private data of request */ 
+	struct timer_list      timer;	       /* used for erp or scsi er */
 	struct zfcp_erp_action *erp_action;    /* used if this request is
 						  issued on behalf of erp */
 	mempool_t	       *pool;	       /* used if request was alloacted
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 7f60b6fdf7248..af42a0eadf03b 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -64,8 +64,6 @@ static int zfcp_erp_strategy_check_action(struct zfcp_erp_action *, int);
 static int zfcp_erp_adapter_strategy(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_generic(struct zfcp_erp_action *, int);
 static int zfcp_erp_adapter_strategy_close(struct zfcp_erp_action *);
-static void zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *);
-static void zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *);
@@ -111,64 +109,86 @@ static inline void zfcp_erp_action_to_ready(struct zfcp_erp_action *);
 static inline void zfcp_erp_action_to_running(struct zfcp_erp_action *);
 
 static void zfcp_erp_memwait_handler(unsigned long);
-static void zfcp_erp_timeout_handler(unsigned long);
-static inline void zfcp_erp_timeout_init(struct zfcp_erp_action *);
 
 /**
- * zfcp_fsf_request_timeout_handler - called if a request timed out
- * @data: pointer to adapter for handler function
- *
- * This function needs to be called if requests (ELS, Generic Service,
- * or SCSI commands) exceed a certain time limit. The assumption is
- * that after the time limit the adapter get stuck. So we trigger a reopen of
- * the adapter. This should not be used for error recovery, SCSI abort
- * commands and SCSI requests from SCSI mid-layer.
+ * zfcp_close_qdio - close qdio queues for an adapter
  */
-void
-zfcp_fsf_request_timeout_handler(unsigned long data)
+static void zfcp_close_qdio(struct zfcp_adapter *adapter)
 {
-	struct zfcp_adapter *adapter;
+	struct zfcp_qdio_queue *req_queue;
+	int first, count;
 
-	adapter = (struct zfcp_adapter *) data;
+	if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status))
+		return;
 
-	zfcp_erp_adapter_reopen(adapter, 0);
+	/* clear QDIOUP flag, thus do_QDIO is not called during qdio_shutdown */
+	req_queue = &adapter->request_queue;
+	write_lock_irq(&req_queue->queue_lock);
+	atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
+	write_unlock_irq(&req_queue->queue_lock);
+
+	debug_text_event(adapter->erp_dbf, 3, "qdio_down2a");
+	while (qdio_shutdown(adapter->ccw_device,
+			     QDIO_FLAG_CLEANUP_USING_CLEAR) == -EINPROGRESS)
+		msleep(1000);
+	debug_text_event(adapter->erp_dbf, 3, "qdio_down2b");
+
+	/* cleanup used outbound sbals */
+	count = atomic_read(&req_queue->free_count);
+	if (count < QDIO_MAX_BUFFERS_PER_Q) {
+		first = (req_queue->free_index+count) % QDIO_MAX_BUFFERS_PER_Q;
+		count = QDIO_MAX_BUFFERS_PER_Q - count;
+		zfcp_qdio_zero_sbals(req_queue->buffer, first, count);
+	}
+	req_queue->free_index = 0;
+	atomic_set(&req_queue->free_count, 0);
+	req_queue->distance_from_int = 0;
+	adapter->response_queue.free_index = 0;
+	atomic_set(&adapter->response_queue.free_count, 0);
 }
 
 /**
- * zfcp_fsf_scsi_er_timeout_handler - timeout handler for scsi eh tasks
+ * zfcp_close_fsf - stop FSF operations for an adapter
  *
- * This function needs to be called whenever a SCSI error recovery
- * action (abort/reset) does not return.  Re-opening the adapter means
- * that the abort/reset command can be returned by zfcp. It won't complete
- * via the adapter anymore (because qdio queues are closed). If ERP is
- * already running on this adapter it will be stopped.
+ * Dismiss and cleanup all pending fsf_reqs (this wakes up all initiators of
+ * requests waiting for completion; especially this returns SCSI commands
+ * with error state).
  */
-void zfcp_fsf_scsi_er_timeout_handler(unsigned long data)
+static void zfcp_close_fsf(struct zfcp_adapter *adapter)
 {
-	struct zfcp_adapter *adapter = (struct zfcp_adapter *) data;
-	unsigned long flags;
-
-	ZFCP_LOG_NORMAL("warning: SCSI error recovery timed out. "
-			"Restarting all operations on the adapter %s\n",
-			zfcp_get_busid_by_adapter(adapter));
-	debug_text_event(adapter->erp_dbf, 1, "eh_lmem_tout");
+	/* close queues to ensure that buffers are not accessed by adapter */
+	zfcp_close_qdio(adapter);
+	zfcp_fsf_req_dismiss_all(adapter);
+	/* reset FSF request sequence number */
+	adapter->fsf_req_seq_no = 0;
+	/* all ports and units are closed */
+	zfcp_erp_modify_adapter_status(adapter,
+				       ZFCP_STATUS_COMMON_OPEN, ZFCP_CLEAR);
+}
 
-	write_lock_irqsave(&adapter->erp_lock, flags);
-	if (atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
-			     &adapter->status)) {
-		zfcp_erp_modify_adapter_status(adapter,
-		       ZFCP_STATUS_COMMON_UNBLOCKED|ZFCP_STATUS_COMMON_OPEN,
-		       ZFCP_CLEAR);
-		zfcp_erp_action_dismiss_adapter(adapter);
-		write_unlock_irqrestore(&adapter->erp_lock, flags);
-		/* dismiss all pending requests including requests for ERP */
-		zfcp_fsf_req_dismiss_all(adapter);
-		adapter->fsf_req_seq_no = 0;
-	} else
-		write_unlock_irqrestore(&adapter->erp_lock, flags);
+/**
+ * zfcp_fsf_request_timeout_handler - called if a request timed out
+ * @data: pointer to adapter for handler function
+ *
+ * This function needs to be called if requests (ELS, Generic Service,
+ * or SCSI commands) exceed a certain time limit. The assumption is
+ * that after the time limit the adapter get stuck. So we trigger a reopen of
+ * the adapter.
+ */
+static void zfcp_fsf_request_timeout_handler(unsigned long data)
+{
+	struct zfcp_adapter *adapter = (struct zfcp_adapter *) data;
 	zfcp_erp_adapter_reopen(adapter, 0);
 }
 
+void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req, unsigned long timeout)
+{
+	fsf_req->timer.function = zfcp_fsf_request_timeout_handler;
+	fsf_req->timer.data = (unsigned long) fsf_req->adapter;
+	fsf_req->timer.expires = timeout;
+	add_timer(&fsf_req->timer);
+}
+
 /*
  * function:	
  *
@@ -282,7 +302,6 @@ zfcp_erp_adisc(struct zfcp_port *port)
 	struct zfcp_ls_adisc *adisc;
 	void *address = NULL;
 	int retval = 0;
-	struct timer_list *timer;
 
 	send_els = kzalloc(sizeof(struct zfcp_send_els), GFP_ATOMIC);
 	if (send_els == NULL)
@@ -329,22 +348,11 @@ zfcp_erp_adisc(struct zfcp_port *port)
 		      (wwn_t) adisc->wwnn, adisc->hard_nport_id,
 		      adisc->nport_id);
 
-	timer = kmalloc(sizeof(struct timer_list), GFP_ATOMIC);
-	if (!timer)
-		goto nomem;
-
-	init_timer(timer);
-	timer->function = zfcp_fsf_request_timeout_handler;
-	timer->data = (unsigned long) adapter;
-	timer->expires = ZFCP_FSF_REQUEST_TIMEOUT;
-	send_els->timer = timer;
-
 	retval = zfcp_fsf_send_els(send_els);
 	if (retval != 0) {
 		ZFCP_LOG_NORMAL("error: initiation of Send ELS failed for port "
 				"0x%08x on adapter %s\n", send_els->d_id,
 				zfcp_get_busid_by_adapter(adapter));
-		del_timer(send_els->timer);
 		goto freemem;
 	}
 
@@ -356,7 +364,6 @@ zfcp_erp_adisc(struct zfcp_port *port)
 	if (address != NULL)
 		__free_pages(send_els->req->page, 0);
 	if (send_els != NULL) {
-		kfree(send_els->timer);
 		kfree(send_els->req);
 		kfree(send_els->resp);
 		kfree(send_els);
@@ -382,9 +389,6 @@ zfcp_erp_adisc_handler(unsigned long data)
 	struct zfcp_ls_adisc_acc *adisc;
 
 	send_els = (struct zfcp_send_els *) data;
-
-	del_timer(send_els->timer);
-
 	adapter = send_els->adapter;
 	port = send_els->port;
 	d_id = send_els->d_id;
@@ -433,7 +437,6 @@ zfcp_erp_adisc_handler(unsigned long data)
  out:
 	zfcp_port_put(port);
 	__free_pages(send_els->req->page, 0);
-	kfree(send_els->timer);
 	kfree(send_els->req);
 	kfree(send_els->resp);
 	kfree(send_els);
@@ -909,8 +912,6 @@ static void zfcp_erp_async_handler_nolock(struct zfcp_erp_action *erp_action,
 		debug_text_event(adapter->erp_dbf, 2, "a_asyh_ex");
 		debug_event(adapter->erp_dbf, 2, &erp_action->action,
 			    sizeof (int));
-		if (!(set_mask & ZFCP_STATUS_ERP_TIMEDOUT))
-			del_timer(&erp_action->timer);
 		erp_action->status |= set_mask;
 		zfcp_erp_action_ready(erp_action);
 	} else {
@@ -957,8 +958,7 @@ zfcp_erp_memwait_handler(unsigned long data)
  *		action gets an appropriate flag and will be processed
  *		accordingly
  */
-static void
-zfcp_erp_timeout_handler(unsigned long data)
+void zfcp_erp_timeout_handler(unsigned long data)
 {
 	struct zfcp_erp_action *erp_action = (struct zfcp_erp_action *) data;
 	struct zfcp_adapter *adapter = erp_action->adapter;
@@ -1934,8 +1934,7 @@ zfcp_erp_adapter_strategy_generic(struct zfcp_erp_action *erp_action, int close)
 			  &erp_action->adapter->status);
 
  failed_openfcp:
-	zfcp_erp_adapter_strategy_close_qdio(erp_action);
-	zfcp_erp_adapter_strategy_close_fsf(erp_action);
+	zfcp_close_fsf(erp_action->adapter);
  failed_qdio:
  out:
 	return retval;
@@ -2040,59 +2039,6 @@ zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *erp_action)
 	return retval;
 }
 
-/**
- * zfcp_erp_adapter_strategy_close_qdio - close qdio queues for an adapter
- */
-static void
-zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *erp_action)
-{
-	int first_used;
-	int used_count;
-	struct zfcp_adapter *adapter = erp_action->adapter;
-
-	if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status)) {
-		ZFCP_LOG_DEBUG("error: attempt to shut down inactive QDIO "
-			       "queues on adapter %s\n",
-			       zfcp_get_busid_by_adapter(adapter));
-		return;
-	}
-
-	/*
-	 * Get queue_lock and clear QDIOUP flag. Thus it's guaranteed that
-	 * do_QDIO won't be called while qdio_shutdown is in progress.
-	 */
-	write_lock_irq(&adapter->request_queue.queue_lock);
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
-	write_unlock_irq(&adapter->request_queue.queue_lock);
-
-	debug_text_event(adapter->erp_dbf, 3, "qdio_down2a");
-	while (qdio_shutdown(adapter->ccw_device,
-			     QDIO_FLAG_CLEANUP_USING_CLEAR) == -EINPROGRESS)
-		msleep(1000);
-	debug_text_event(adapter->erp_dbf, 3, "qdio_down2b");
-
-	/*
-	 * First we had to stop QDIO operation.
-	 * Now it is safe to take the following actions.
-	 */
-
-	/* Cleanup only necessary when there are unacknowledged buffers */
-	if (atomic_read(&adapter->request_queue.free_count)
-	    < QDIO_MAX_BUFFERS_PER_Q) {
-		first_used = (adapter->request_queue.free_index +
-			      atomic_read(&adapter->request_queue.free_count))
-			% QDIO_MAX_BUFFERS_PER_Q;
-		used_count = QDIO_MAX_BUFFERS_PER_Q -
-			atomic_read(&adapter->request_queue.free_count);
-		zfcp_qdio_zero_sbals(adapter->request_queue.buffer,
-				     first_used, used_count);
-	}
-	adapter->response_queue.free_index = 0;
-	atomic_set(&adapter->response_queue.free_count, 0);
-	adapter->request_queue.free_index = 0;
-	atomic_set(&adapter->request_queue.free_count, 0);
-	adapter->request_queue.distance_from_int = 0;
-}
 
 static int
 zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *erp_action)
@@ -2127,7 +2073,6 @@ zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *erp_action)
 		write_lock_irq(&adapter->erp_lock);
 		zfcp_erp_action_to_running(erp_action);
 		write_unlock_irq(&adapter->erp_lock);
-		zfcp_erp_timeout_init(erp_action);
 		if (zfcp_fsf_exchange_config_data(erp_action)) {
 			retval = ZFCP_ERP_FAILED;
 			debug_text_event(adapter->erp_dbf, 5, "a_fstx_xf");
@@ -2196,7 +2141,6 @@ zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *erp_action)
 	zfcp_erp_action_to_running(erp_action);
 	write_unlock_irq(&adapter->erp_lock);
 
-	zfcp_erp_timeout_init(erp_action);
 	ret = zfcp_fsf_exchange_port_data(erp_action, adapter, NULL);
 	if (ret == -EOPNOTSUPP) {
 		debug_text_event(adapter->erp_dbf, 3, "a_xport_notsupp");
@@ -2248,27 +2192,6 @@ zfcp_erp_adapter_strategy_open_fsf_statusread(struct zfcp_erp_action
 	return retval;
 }
 
-/**
- * zfcp_erp_adapter_strategy_close_fsf - stop FSF operations for an adapter
- */
-static void
-zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *erp_action)
-{
-	struct zfcp_adapter *adapter = erp_action->adapter;
-
-	/*
-	 * wake waiting initiators of requests,
-	 * return SCSI commands (with error status),
-	 * clean up all requests (synchronously)
-	 */
-	zfcp_fsf_req_dismiss_all(adapter);
-	/* reset FSF request sequence number */
-	adapter->fsf_req_seq_no = 0;
-	/* all ports and units are closed */
-	zfcp_erp_modify_adapter_status(adapter,
-				       ZFCP_STATUS_COMMON_OPEN, ZFCP_CLEAR);
-}
-
 /*
  * function:	
  *
@@ -2605,7 +2528,6 @@ zfcp_erp_port_forced_strategy_close(struct zfcp_erp_action *erp_action)
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_port *port = erp_action->port;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_fsf_close_physical_port(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "o_pfstc_nomem");
@@ -2662,7 +2584,6 @@ zfcp_erp_port_strategy_close(struct zfcp_erp_action *erp_action)
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_port *port = erp_action->port;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_fsf_close_port(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "p_pstc_nomem");
@@ -2700,7 +2621,6 @@ zfcp_erp_port_strategy_open_port(struct zfcp_erp_action *erp_action)
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_port *port = erp_action->port;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_fsf_open_port(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "p_psto_nomem");
@@ -2738,7 +2658,6 @@ zfcp_erp_port_strategy_open_common_lookup(struct zfcp_erp_action *erp_action)
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_port *port = erp_action->port;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_ns_gid_pn_request(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "p_pstn_nomem");
@@ -2864,7 +2783,6 @@ zfcp_erp_unit_strategy_close(struct zfcp_erp_action *erp_action)
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_unit *unit = erp_action->unit;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_fsf_close_unit(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "u_ustc_nomem");
@@ -2905,7 +2823,6 @@ zfcp_erp_unit_strategy_open(struct zfcp_erp_action *erp_action)
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_unit *unit = erp_action->unit;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_fsf_open_unit(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "u_usto_nomem");
@@ -2930,14 +2847,13 @@ zfcp_erp_unit_strategy_open(struct zfcp_erp_action *erp_action)
 	return retval;
 }
 
-static inline void
-zfcp_erp_timeout_init(struct zfcp_erp_action *erp_action)
+void zfcp_erp_start_timer(struct zfcp_fsf_req *fsf_req)
 {
-	init_timer(&erp_action->timer);
-	erp_action->timer.function = zfcp_erp_timeout_handler;
-	erp_action->timer.data = (unsigned long) erp_action;
-	/* jiffies will be added in zfcp_fsf_req_send */
-	erp_action->timer.expires = ZFCP_ERP_FSFREQ_TIMEOUT;
+	BUG_ON(!fsf_req->erp_action);
+	fsf_req->timer.function = zfcp_erp_timeout_handler;
+	fsf_req->timer.data = (unsigned long) fsf_req->erp_action;
+	fsf_req->timer.expires = jiffies + ZFCP_ERP_FSFREQ_TIMEOUT;
+	add_timer(&fsf_req->timer);
 }
 
 /*
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 710ebbf899295..3125a42a63433 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -87,8 +87,8 @@ extern int  zfcp_fsf_exchange_port_data(struct zfcp_erp_action *,
 					struct fsf_qtcb_bottom_port *);
 extern int  zfcp_fsf_control_file(struct zfcp_adapter *, struct zfcp_fsf_req **,
 				  u32, u32, struct zfcp_sg_list *);
-extern void zfcp_fsf_request_timeout_handler(unsigned long);
-extern void zfcp_fsf_scsi_er_timeout_handler(unsigned long);
+extern void zfcp_fsf_start_timer(struct zfcp_fsf_req *, unsigned long);
+extern void zfcp_erp_start_timer(struct zfcp_fsf_req *);
 extern int  zfcp_fsf_req_dismiss_all(struct zfcp_adapter *);
 extern int  zfcp_fsf_status_read(struct zfcp_adapter *, int);
 extern int zfcp_fsf_req_create(struct zfcp_adapter *, u32, int, mempool_t *,
@@ -98,8 +98,7 @@ extern int zfcp_fsf_send_ct(struct zfcp_send_ct *, mempool_t *,
 extern int zfcp_fsf_send_els(struct zfcp_send_els *);
 extern int  zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *,
 					   struct zfcp_unit *,
-					   struct scsi_cmnd *,
-					   struct timer_list*, int);
+					   struct scsi_cmnd *, int, int);
 extern int  zfcp_fsf_req_complete(struct zfcp_fsf_req *);
 extern void zfcp_fsf_incoming_els(struct zfcp_fsf_req *);
 extern void zfcp_fsf_req_free(struct zfcp_fsf_req *);
@@ -123,13 +122,11 @@ extern char *zfcp_get_fcp_rsp_info_ptr(struct fcp_rsp_iu *);
 extern void set_host_byte(u32 *, char);
 extern void set_driver_byte(u32 *, char);
 extern char *zfcp_get_fcp_sns_info_ptr(struct fcp_rsp_iu *);
-extern void zfcp_fsf_start_scsi_er_timer(struct zfcp_adapter *);
 extern fcp_dl_t zfcp_get_fcp_dl(struct fcp_cmnd_iu *);
 
 extern int zfcp_scsi_command_async(struct zfcp_adapter *,struct zfcp_unit *,
-				   struct scsi_cmnd *, struct timer_list *);
-extern int zfcp_scsi_command_sync(struct zfcp_unit *, struct scsi_cmnd *,
-				  struct timer_list *);
+				   struct scsi_cmnd *, int);
+extern int zfcp_scsi_command_sync(struct zfcp_unit *, struct scsi_cmnd *, int);
 extern struct fc_function_template zfcp_transport_functions;
 
 /******************************** ERP ****************************************/
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index a66b5193b70e1..277826cdd0c84 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -42,7 +42,7 @@ static inline int zfcp_fsf_req_sbal_check(
 static inline int zfcp_use_one_sbal(
 	struct scatterlist *, int, struct scatterlist *, int);
 static struct zfcp_fsf_req *zfcp_fsf_req_alloc(mempool_t *, int);
-static int zfcp_fsf_req_send(struct zfcp_fsf_req *, struct timer_list *);
+static int zfcp_fsf_req_send(struct zfcp_fsf_req *);
 static int zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *);
 static int zfcp_fsf_fsfstatus_eval(struct zfcp_fsf_req *);
 static int zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *);
@@ -225,8 +225,10 @@ zfcp_fsf_req_complete(struct zfcp_fsf_req *fsf_req)
 		 */
 		zfcp_fsf_status_read_handler(fsf_req);
 		goto out;
-	} else
+	} else {
+		del_timer(&fsf_req->timer);
 		zfcp_fsf_protstatus_eval(fsf_req);
+	}
 
 	/*
 	 * fsf_req may be deleted due to waking up functions, so 
@@ -785,8 +787,7 @@ zfcp_fsf_status_read(struct zfcp_adapter *adapter, int req_flags)
 	sbale->addr = (void *) status_buffer;
 	sbale->length = sizeof(struct fsf_status_read_buffer);
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(fsf_req, NULL);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_DEBUG("error: Could not set-up unsolicited status "
 			       "environment.\n");
@@ -1112,8 +1113,8 @@ zfcp_fsf_abort_fcp_command(unsigned long old_req_id,
 			   struct zfcp_unit *unit, int req_flags)
 {
 	volatile struct qdio_buffer_element *sbale;
-	unsigned long lock_flags;
 	struct zfcp_fsf_req *fsf_req = NULL;
+	unsigned long lock_flags;
 	int retval = 0;
 
 	/* setup new FSF request */
@@ -1143,12 +1144,9 @@ zfcp_fsf_abort_fcp_command(unsigned long old_req_id,
 	/* set handle of request which should be aborted */
 	fsf_req->qtcb->bottom.support.req_handle = (u64) old_req_id;
 
-	/* start QDIO request for this FSF request */
-
-	zfcp_fsf_start_scsi_er_timer(adapter);
-	retval = zfcp_fsf_req_send(fsf_req, NULL);
+	zfcp_fsf_start_timer(fsf_req, ZFCP_SCSI_ER_TIMEOUT);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
-		del_timer(&adapter->scsi_er_timer);
 		ZFCP_LOG_INFO("error: Failed to send abort command request "
 			      "on adapter %s, port 0x%016Lx, unit 0x%016Lx\n",
 			      zfcp_get_busid_by_adapter(adapter),
@@ -1184,8 +1182,6 @@ zfcp_fsf_abort_fcp_command_handler(struct zfcp_fsf_req *new_fsf_req)
 	unsigned char status_qual =
 	    new_fsf_req->qtcb->header.fsf_status_qual.word[0];
 
-	del_timer(&new_fsf_req->adapter->scsi_er_timer);
-
 	if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) {
 		/* do not set ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED */
 		goto skip_fsfstatus;
@@ -1391,11 +1387,6 @@ zfcp_fsf_send_ct(struct zfcp_send_ct *ct, mempool_t *pool,
 		goto failed_req;
 	}
 
-        if (erp_action != NULL) {
-                erp_action->fsf_req = fsf_req;
-                fsf_req->erp_action = erp_action;
-        }
-
 	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         if (zfcp_use_one_sbal(ct->req, ct->req_count,
                               ct->resp, ct->resp_count)){
@@ -1462,8 +1453,14 @@ zfcp_fsf_send_ct(struct zfcp_send_ct *ct, mempool_t *pool,
 
 	zfcp_san_dbf_event_ct_request(fsf_req);
 
-	/* start QDIO request for this FSF request */
-	ret = zfcp_fsf_req_send(fsf_req, ct->timer);
+	if (erp_action) {
+		erp_action->fsf_req = fsf_req;
+		fsf_req->erp_action = erp_action;
+		zfcp_erp_start_timer(fsf_req);
+	} else
+		zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT);
+
+	ret = zfcp_fsf_req_send(fsf_req);
 	if (ret) {
 		ZFCP_LOG_DEBUG("error: initiation of CT request failed "
 			       "(adapter %s, port 0x%016Lx)\n",
@@ -1760,8 +1757,8 @@ zfcp_fsf_send_els(struct zfcp_send_els *els)
 
 	zfcp_san_dbf_event_els_request(fsf_req);
 
-	/* start QDIO request for this FSF request */
-	ret = zfcp_fsf_req_send(fsf_req, els->timer);
+	zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT);
+	ret = zfcp_fsf_req_send(fsf_req);
 	if (ret) {
 		ZFCP_LOG_DEBUG("error: initiation of ELS request failed "
 			       "(adapter %s, port d_id: 0x%08x)\n",
@@ -1958,6 +1955,7 @@ int
 zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action)
 {
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long lock_flags;
 	int retval = 0;
 
@@ -1966,7 +1964,7 @@ zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action)
 				     FSF_QTCB_EXCHANGE_CONFIG_DATA,
 				     ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &(erp_action->fsf_req));
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create exchange configuration "
 			      "data request for adapter %s.\n",
@@ -1974,26 +1972,26 @@ zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action)
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-                                    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-	erp_action->fsf_req->erp_action = erp_action;
-	erp_action->fsf_req->qtcb->bottom.config.feature_selection =
+	fsf_req->qtcb->bottom.config.feature_selection =
 			FSF_FEATURE_CFDC |
 			FSF_FEATURE_LUN_SHARING |
 			FSF_FEATURE_NOTIFICATION_LOST |
 			FSF_FEATURE_UPDATE_ALERT;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO
 		    ("error: Could not send exchange configuration data "
 		     "command on the adapter %s\n",
 		     zfcp_get_busid_by_adapter(erp_action->adapter));
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -2223,10 +2221,9 @@ zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action,
 			    struct fsf_qtcb_bottom_port *data)
 {
 	volatile struct qdio_buffer_element *sbale;
-	int retval = 0;
-	unsigned long lock_flags;
         struct zfcp_fsf_req *fsf_req;
-	struct timer_list *timer;
+	unsigned long lock_flags;
+	int retval = 0;
 
 	if (!(adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT)) {
 		ZFCP_LOG_INFO("error: exchange port data "
@@ -2259,22 +2256,11 @@ zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action,
 	if (erp_action) {
 		erp_action->fsf_req = fsf_req;
 		fsf_req->erp_action = erp_action;
-		timer = &erp_action->timer;
-	} else {
-		timer = kmalloc(sizeof(struct timer_list), GFP_ATOMIC);
-		if (!timer) {
-			write_unlock_irqrestore(&adapter->request_queue.queue_lock,
-						lock_flags);
-			zfcp_fsf_req_free(fsf_req);
-			return -ENOMEM;
-		}
-		init_timer(timer);
-		timer->function = zfcp_fsf_request_timeout_handler;
-		timer->data = (unsigned long) adapter;
-		timer->expires = ZFCP_FSF_REQUEST_TIMEOUT;
-	}
+		zfcp_erp_start_timer(fsf_req);
+	} else
+		zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT);
 
-	retval = zfcp_fsf_req_send(fsf_req, timer);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send an exchange port data "
                               "command on the adapter %s\n",
@@ -2282,8 +2268,6 @@ zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action,
 		zfcp_fsf_req_free(fsf_req);
 		if (erp_action)
 			erp_action->fsf_req = NULL;
-		else
-			kfree(timer);
 		write_unlock_irqrestore(&adapter->request_queue.queue_lock,
 					lock_flags);
 		return retval;
@@ -2294,9 +2278,7 @@ zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action,
 	if (!erp_action) {
 		wait_event(fsf_req->completion_wq,
 			   fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
-		del_timer_sync(timer);
 		zfcp_fsf_req_free(fsf_req);
-		kfree(timer);
 	}
 	return retval;
 }
@@ -2378,6 +2360,7 @@ int
 zfcp_fsf_open_port(struct zfcp_erp_action *erp_action)
 {
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long lock_flags;
 	int retval = 0;
 
@@ -2386,7 +2369,7 @@ zfcp_fsf_open_port(struct zfcp_erp_action *erp_action)
 				     FSF_QTCB_OPEN_PORT_WITH_DID,
 				     ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &(erp_action->fsf_req));
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create open port request "
 			      "for port 0x%016Lx on adapter %s.\n",
@@ -2395,24 +2378,24 @@ zfcp_fsf_open_port(struct zfcp_erp_action *erp_action)
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-                                    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-	erp_action->fsf_req->qtcb->bottom.support.d_id = erp_action->port->d_id;
+	fsf_req->qtcb->bottom.support.d_id = erp_action->port->d_id;
 	atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->port->status);
-	erp_action->fsf_req->data = (unsigned long) erp_action->port;
-	erp_action->fsf_req->erp_action = erp_action;
+	fsf_req->data = (unsigned long) erp_action->port;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send open port request for "
 			      "port 0x%016Lx on adapter %s.\n",
 			      erp_action->port->wwpn,
 			      zfcp_get_busid_by_adapter(erp_action->adapter));
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -2634,6 +2617,7 @@ int
 zfcp_fsf_close_port(struct zfcp_erp_action *erp_action)
 {
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long lock_flags;
 	int retval = 0;
 
@@ -2642,7 +2626,7 @@ zfcp_fsf_close_port(struct zfcp_erp_action *erp_action)
 				     FSF_QTCB_CLOSE_PORT,
 				     ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &(erp_action->fsf_req));
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create a close port request "
 			      "for port 0x%016Lx on adapter %s.\n",
@@ -2651,25 +2635,25 @@ zfcp_fsf_close_port(struct zfcp_erp_action *erp_action)
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-                                    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
 	atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->port->status);
-	erp_action->fsf_req->data = (unsigned long) erp_action->port;
-	erp_action->fsf_req->erp_action = erp_action;
-	erp_action->fsf_req->qtcb->header.port_handle =
-	    erp_action->port->handle;
-
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	fsf_req->data = (unsigned long) erp_action->port;
+	fsf_req->erp_action = erp_action;
+	fsf_req->qtcb->header.port_handle = erp_action->port->handle;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
+
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send a close port request for "
 			      "port 0x%016Lx on adapter %s.\n",
 			      erp_action->port->wwpn,
 			      zfcp_get_busid_by_adapter(erp_action->adapter));
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -2766,16 +2750,17 @@ zfcp_fsf_close_port_handler(struct zfcp_fsf_req *fsf_req)
 int
 zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action)
 {
-	int retval = 0;
-	unsigned long lock_flags;
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
+	unsigned long lock_flags;
+	int retval = 0;
 
 	/* setup new FSF request */
 	retval = zfcp_fsf_req_create(erp_action->adapter,
 				     FSF_QTCB_CLOSE_PHYSICAL_PORT,
 				     ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &erp_action->fsf_req);
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create close physical port "
 			      "request (adapter %s, port 0x%016Lx)\n",
@@ -2785,8 +2770,7 @@ zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action)
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-				    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
 	sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
 	sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
@@ -2794,20 +2778,19 @@ zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action)
 	atomic_set_mask(ZFCP_STATUS_PORT_PHYS_CLOSING,
 			&erp_action->port->status);
 	/* save a pointer to this port */
-	erp_action->fsf_req->data = (unsigned long) erp_action->port;
-	/* port to be closed */
-	erp_action->fsf_req->qtcb->header.port_handle =
-	    erp_action->port->handle;
-	erp_action->fsf_req->erp_action = erp_action;
-
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	fsf_req->data = (unsigned long) erp_action->port;
+	fsf_req->qtcb->header.port_handle = erp_action->port->handle;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
+
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send close physical port "
 			      "request (adapter %s, port 0x%016Lx)\n",
 			      zfcp_get_busid_by_adapter(erp_action->adapter),
 			      erp_action->port->wwpn);
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -2972,6 +2955,7 @@ int
 zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action)
 {
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long lock_flags;
 	int retval = 0;
 
@@ -2980,7 +2964,7 @@ zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action)
 				     FSF_QTCB_OPEN_LUN,
 				     ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &(erp_action->fsf_req));
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create open unit request for "
 			      "unit 0x%016Lx on port 0x%016Lx on adapter %s.\n",
@@ -2990,24 +2974,22 @@ zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action)
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-                                    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-	erp_action->fsf_req->qtcb->header.port_handle =
-		erp_action->port->handle;
-	erp_action->fsf_req->qtcb->bottom.support.fcp_lun =
-		erp_action->unit->fcp_lun;
+	fsf_req->qtcb->header.port_handle = erp_action->port->handle;
+	fsf_req->qtcb->bottom.support.fcp_lun =	erp_action->unit->fcp_lun;
 	if (!(erp_action->adapter->connection_features & FSF_FEATURE_NPIV_MODE))
-		erp_action->fsf_req->qtcb->bottom.support.option =
+		fsf_req->qtcb->bottom.support.option =
 			FSF_OPEN_LUN_SUPPRESS_BOXING;
 	atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->unit->status);
-	erp_action->fsf_req->data = (unsigned long) erp_action->unit;
-	erp_action->fsf_req->erp_action = erp_action;
+	fsf_req->data = (unsigned long) erp_action->unit;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(erp_action->fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send an open unit request "
 			      "on the adapter %s, port 0x%016Lx for "
@@ -3015,7 +2997,7 @@ zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action)
 			      zfcp_get_busid_by_adapter(erp_action->adapter),
 			      erp_action->port->wwpn,
 			      erp_action->unit->fcp_lun);
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -3308,6 +3290,7 @@ int
 zfcp_fsf_close_unit(struct zfcp_erp_action *erp_action)
 {
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long lock_flags;
 	int retval = 0;
 
@@ -3316,7 +3299,7 @@ zfcp_fsf_close_unit(struct zfcp_erp_action *erp_action)
 				     FSF_QTCB_CLOSE_LUN,
 				     ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &(erp_action->fsf_req));
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create close unit request for "
 			      "unit 0x%016Lx on port 0x%016Lx on adapter %s.\n",
@@ -3326,27 +3309,26 @@ zfcp_fsf_close_unit(struct zfcp_erp_action *erp_action)
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-                                    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-	erp_action->fsf_req->qtcb->header.port_handle =
-	    erp_action->port->handle;
-	erp_action->fsf_req->qtcb->header.lun_handle = erp_action->unit->handle;
+	fsf_req->qtcb->header.port_handle = erp_action->port->handle;
+	fsf_req->qtcb->header.lun_handle = erp_action->unit->handle;
 	atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->unit->status);
-	erp_action->fsf_req->data = (unsigned long) erp_action->unit;
-	erp_action->fsf_req->erp_action = erp_action;
+	fsf_req->data = (unsigned long) erp_action->unit;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(erp_action->fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send a close unit request for "
 			      "unit 0x%016Lx on port 0x%016Lx onadapter %s.\n",
 			      erp_action->unit->fcp_lun,
 			      erp_action->port->wwpn,
 			      zfcp_get_busid_by_adapter(erp_action->adapter));
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -3499,7 +3481,7 @@ int
 zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter,
 			       struct zfcp_unit *unit,
 			       struct scsi_cmnd * scsi_cmnd,
-			       struct timer_list *timer, int req_flags)
+			       int use_timer, int req_flags)
 {
 	struct zfcp_fsf_req *fsf_req = NULL;
 	struct fcp_cmnd_iu *fcp_cmnd_iu;
@@ -3640,11 +3622,10 @@ zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter,
 	ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_DEBUG,
 		      (char *) scsi_cmnd->cmnd, scsi_cmnd->cmd_len);
 
-	/*
-	 * start QDIO request for this FSF request
-	 *  covered by an SBALE)
-	 */
-	retval = zfcp_fsf_req_send(fsf_req, timer);
+	if (use_timer)
+		zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT);
+
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (unlikely(retval < 0)) {
 		ZFCP_LOG_INFO("error: Could not send FCP command request "
 			      "on adapter %s, port 0x%016Lx, unit 0x%016Lx\n",
@@ -3729,11 +3710,9 @@ zfcp_fsf_send_fcp_command_task_management(struct zfcp_adapter *adapter,
 	fcp_cmnd_iu->fcp_lun = unit->fcp_lun;
 	fcp_cmnd_iu->task_management_flags = tm_flags;
 
-	/* start QDIO request for this FSF request */
-	zfcp_fsf_start_scsi_er_timer(adapter);
-	retval = zfcp_fsf_req_send(fsf_req, NULL);
+	zfcp_fsf_start_timer(fsf_req, ZFCP_SCSI_ER_TIMEOUT);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
-		del_timer(&adapter->scsi_er_timer);
 		ZFCP_LOG_INFO("error: Could not send an FCP-command (task "
 			      "management) on adapter %s, port 0x%016Lx for "
 			      "unit LUN 0x%016Lx\n",
@@ -4237,7 +4216,6 @@ zfcp_fsf_send_fcp_command_task_management_handler(struct zfcp_fsf_req *fsf_req)
 	char *fcp_rsp_info = zfcp_get_fcp_rsp_info_ptr(fcp_rsp_iu);
 	struct zfcp_unit *unit = (struct zfcp_unit *) fsf_req->data;
 
-	del_timer(&fsf_req->adapter->scsi_er_timer);
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) {
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_TMFUNCFAILED;
 		goto skip_fsfstatus;
@@ -4306,7 +4284,6 @@ zfcp_fsf_control_file(struct zfcp_adapter *adapter,
 	struct zfcp_fsf_req *fsf_req;
 	struct fsf_qtcb_bottom_support *bottom;
 	volatile struct qdio_buffer_element *sbale;
-	struct timer_list *timer;
 	unsigned long lock_flags;
 	int req_flags = 0;
 	int direction;
@@ -4338,12 +4315,6 @@ zfcp_fsf_control_file(struct zfcp_adapter *adapter,
 		goto out;
 	}
 
-	timer = kmalloc(sizeof(struct timer_list), GFP_KERNEL);
-	if (!timer) {
-		retval = -ENOMEM;
-		goto out;
- 	}
-
 	retval = zfcp_fsf_req_create(adapter, fsf_command, req_flags,
 				     NULL, &lock_flags, &fsf_req);
 	if (retval < 0) {
@@ -4378,12 +4349,8 @@ zfcp_fsf_control_file(struct zfcp_adapter *adapter,
 	} else
 		sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-	init_timer(timer);
-	timer->function = zfcp_fsf_request_timeout_handler;
-	timer->data = (unsigned long) adapter;
-	timer->expires = ZFCP_FSF_REQUEST_TIMEOUT;
-
-	retval = zfcp_fsf_req_send(fsf_req, timer);
+	zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("initiation of cfdc up/download failed"
 			      "(adapter %s)\n",
@@ -4403,15 +4370,12 @@ zfcp_fsf_control_file(struct zfcp_adapter *adapter,
 	           fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
 
 	*fsf_req_ptr = fsf_req;
-	del_timer_sync(timer);
-	goto free_timer;
+	goto out;
 
  free_fsf_req:
 	zfcp_fsf_req_free(fsf_req);
  unlock_queue_lock:
 	write_unlock_irqrestore(&adapter->request_queue.queue_lock, lock_flags);
- free_timer:
-	kfree(timer);
  out:
 	return retval;
 }
@@ -4688,7 +4652,8 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags,
 		adapter->req_no++;
 	fsf_req->req_id = adapter->req_no++;
 
-        zfcp_fsf_req_qtcb_init(fsf_req);
+	init_timer(&fsf_req->timer);
+	zfcp_fsf_req_qtcb_init(fsf_req);
 
 	/* initialize waitqueue which may be used to wait on 
 	   this request completion */
@@ -4758,8 +4723,7 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags,
  * returns:	0 - request transfer succesfully started
  *		!0 - start of request transfer failed
  */
-static int
-zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
+static int zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req)
 {
 	struct zfcp_adapter *adapter;
 	struct zfcp_qdio_queue *req_queue;
@@ -4787,12 +4751,6 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
 
 	inc_seq_no = (fsf_req->qtcb != NULL);
 
-	/* figure out expiration time of timeout and start timeout */
-	if (unlikely(timer)) {
-		timer->expires += jiffies;
-		add_timer(timer);
-	}
-
 	ZFCP_LOG_TRACE("request queue of adapter %s: "
 		       "next free SBAL is %i, %i free SBALs\n",
 		       zfcp_get_busid_by_adapter(adapter),
@@ -4829,12 +4787,7 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
 	if (unlikely(retval)) {
 		/* Queues are down..... */
 		retval = -EIO;
-		/*
-		 * FIXME(potential race):
-		 * timer might be expired (absolutely unlikely)
-		 */
-		if (timer)
-			del_timer(timer);
+		del_timer(&fsf_req->timer);
 		spin_lock(&adapter->req_list_lock);
 		zfcp_reqlist_remove(adapter, fsf_req->req_id);
 		spin_unlock(&adapter->req_list_lock);
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 043ed7c0a7ed5..753bb9b2fe74a 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -231,7 +231,7 @@ zfcp_scsi_command_fail(struct scsi_cmnd *scpnt, int result)
  */
 int
 zfcp_scsi_command_async(struct zfcp_adapter *adapter, struct zfcp_unit *unit,
-			struct scsi_cmnd *scpnt, struct timer_list *timer)
+			struct scsi_cmnd *scpnt, int use_timer)
 {
 	int tmp;
 	int retval;
@@ -267,7 +267,7 @@ zfcp_scsi_command_async(struct zfcp_adapter *adapter, struct zfcp_unit *unit,
 		goto out;
 	}
 
-	tmp = zfcp_fsf_send_fcp_command_task(adapter, unit, scpnt, timer,
+	tmp = zfcp_fsf_send_fcp_command_task(adapter, unit, scpnt, use_timer,
 					     ZFCP_REQ_AUTO_CLEANUP);
 
 	if (unlikely(tmp < 0)) {
@@ -291,21 +291,22 @@ zfcp_scsi_command_sync_handler(struct scsi_cmnd *scpnt)
  * zfcp_scsi_command_sync - send a SCSI command and wait for completion
  * @unit: unit where command is sent to
  * @scpnt: scsi command to be sent
- * @timer: timer to be started if request is successfully initiated
+ * @use_timer: indicates whether timer should be setup or not
  * Return: 0
  *
  * Errors are indicated in scpnt->result
  */
 int
 zfcp_scsi_command_sync(struct zfcp_unit *unit, struct scsi_cmnd *scpnt,
-		       struct timer_list *timer)
+		       int use_timer)
 {
 	int ret;
 	DECLARE_COMPLETION(wait);
 
 	scpnt->SCp.ptr = (void *) &wait;  /* silent re-use */
 	scpnt->scsi_done = zfcp_scsi_command_sync_handler;
-	ret = zfcp_scsi_command_async(unit->port->adapter, unit, scpnt, timer);
+	ret = zfcp_scsi_command_async(unit->port->adapter, unit, scpnt,
+				      use_timer);
 	if (ret == 0)
 		wait_for_completion(&wait);
 
@@ -341,7 +342,7 @@ zfcp_scsi_queuecommand(struct scsi_cmnd *scpnt,
 	adapter = (struct zfcp_adapter *) scpnt->device->host->hostdata[0];
 	unit = (struct zfcp_unit *) scpnt->device->hostdata;
 
-	return zfcp_scsi_command_async(adapter, unit, scpnt, NULL);
+	return zfcp_scsi_command_async(adapter, unit, scpnt, 0);
 }
 
 static struct zfcp_unit *
@@ -538,8 +539,6 @@ zfcp_task_management_function(struct zfcp_unit *unit, u8 tm_flags,
 
 /**
  * zfcp_scsi_eh_host_reset_handler - handler for host and bus reset
- *
- * If ERP is already running it will be stopped.
  */
 int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
 {
@@ -638,16 +637,6 @@ zfcp_adapter_scsi_unregister(struct zfcp_adapter *adapter)
 	return;
 }
 
-
-void
-zfcp_fsf_start_scsi_er_timer(struct zfcp_adapter *adapter)
-{
-	adapter->scsi_er_timer.function = zfcp_fsf_scsi_er_timeout_handler;
-	adapter->scsi_er_timer.data = (unsigned long) adapter;
-	adapter->scsi_er_timer.expires = jiffies + ZFCP_SCSI_ER_TIMEOUT;
-	add_timer(&adapter->scsi_er_timer);
-}
-
 /*
  * Support functions for FC transport class
  */
-- 
GitLab


From 8165428610446ea9e6aa9dfa5485ab78e58cc9fc Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <aherrman@de.ibm.com>
Date: Mon, 18 Sep 2006 22:30:36 +0200
Subject: [PATCH 1049/1063] [SCSI] zfcp: fix: avoid removal of fsf reqs before
 qdio queues are down

Fix the fix ... One of my previous fixes introduced removal of all fsf
requests in zfcp's eh_host_reset_handler. But this must not happen
before qdio queues are shut down. So, I revert the changes of
zfcp_scsi_eh_host_reset_handler.

Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/s390/scsi/zfcp_erp.c  |  3 ++-
 drivers/s390/scsi/zfcp_ext.h  |  1 -
 drivers/s390/scsi/zfcp_scsi.c | 19 ++-----------------
 3 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index af42a0eadf03b..862a411a4aa01 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -91,6 +91,7 @@ static int zfcp_erp_unit_strategy_clearstati(struct zfcp_unit *);
 static int zfcp_erp_unit_strategy_close(struct zfcp_erp_action *);
 static int zfcp_erp_unit_strategy_open(struct zfcp_erp_action *);
 
+static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *);
 static void zfcp_erp_action_dismiss_port(struct zfcp_port *);
 static void zfcp_erp_action_dismiss_unit(struct zfcp_unit *);
 static void zfcp_erp_action_dismiss(struct zfcp_erp_action *);
@@ -3157,7 +3158,7 @@ zfcp_erp_action_cleanup(int action, struct zfcp_adapter *adapter,
 }
 
 
-void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
+static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
 {
 	struct zfcp_port *port;
 
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 3125a42a63433..b8794d77285dd 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -134,7 +134,6 @@ extern void zfcp_erp_modify_adapter_status(struct zfcp_adapter *, u32, int);
 extern int  zfcp_erp_adapter_reopen(struct zfcp_adapter *, int);
 extern int  zfcp_erp_adapter_shutdown(struct zfcp_adapter *, int);
 extern void zfcp_erp_adapter_failed(struct zfcp_adapter *);
-extern void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *);
 
 extern void zfcp_erp_modify_port_status(struct zfcp_port *, u32, int);
 extern int  zfcp_erp_port_reopen(struct zfcp_port *, int);
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 753bb9b2fe74a..7cafa34e4c7f5 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -544,7 +544,6 @@ int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
 {
 	struct zfcp_unit *unit;
 	struct zfcp_adapter *adapter;
-	unsigned long flags;
 
 	unit = (struct zfcp_unit*) scpnt->device->hostdata;
 	adapter = unit->port->adapter;
@@ -552,22 +551,8 @@ int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
 	ZFCP_LOG_NORMAL("host/bus reset because of problems with "
 			"unit 0x%016Lx\n", unit->fcp_lun);
 
-	write_lock_irqsave(&adapter->erp_lock, flags);
-	if (atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
-			     &adapter->status)) {
-		zfcp_erp_modify_adapter_status(adapter,
-		       ZFCP_STATUS_COMMON_UNBLOCKED|ZFCP_STATUS_COMMON_OPEN,
-		       ZFCP_CLEAR);
-		zfcp_erp_action_dismiss_adapter(adapter);
-		write_unlock_irqrestore(&adapter->erp_lock, flags);
-		zfcp_fsf_req_dismiss_all(adapter);
-		adapter->fsf_req_seq_no = 0;
-		zfcp_erp_adapter_reopen(adapter, 0);
-	} else {
-		write_unlock_irqrestore(&adapter->erp_lock, flags);
-		zfcp_erp_adapter_reopen(adapter, 0);
-		zfcp_erp_wait(adapter);
-	}
+	zfcp_erp_adapter_reopen(adapter, 0);
+	zfcp_erp_wait(adapter);
 
 	return SUCCESS;
 }
-- 
GitLab


From 99005e91eb2289c0ff0875257b5d18d9b7cc4eb7 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <aherrman@de.ibm.com>
Date: Mon, 18 Sep 2006 22:32:15 +0200
Subject: [PATCH 1050/1063] [SCSI] zfcp: update maintainers file

Removed myself as maintainer of the s390 zfcp driver --
I will not maintain it any longer.

Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 MAINTAINERS | 2 --
 1 file changed, 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index b08c537018de2..c6bd9b90757cf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2484,8 +2484,6 @@ W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 
 S390 ZFCP DRIVER
-P:	Andreas Herrmann
-M:	aherrman@de.ibm.com
 M:	linux390@de.ibm.com
 L:	linux-390@vm.marist.edu
 W:	http://www.ibm.com/developerworks/linux/linux390/
-- 
GitLab


From 9cbb889786548c1212fb77a9df8d09ed883a3480 Mon Sep 17 00:00:00 2001
From: Swen Schillig <swen@vnet.ibm.com>
Date: Thu, 21 Sep 2006 16:29:31 +0200
Subject: [PATCH 1051/1063] [SCSI] zfcp: update maintainers file

As Andreas stated he will not maintain the zfcp driver anymore.
Instead I will take over the responsibility.

Signed-off-by: Swen Schillig <swen@vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c6bd9b90757cf..ed9757ee4a67e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2484,6 +2484,8 @@ W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 
 S390 ZFCP DRIVER
+P:	Swen Schillig
+M:	swen@vnet.ibm.com
 M:	linux390@de.ibm.com
 L:	linux-390@vm.marist.edu
 W:	http://www.ibm.com/developerworks/linux/linux390/
-- 
GitLab


From 73af07de3e32b9ac328c3d1417258bb98a9b0a9b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 24 Sep 2006 09:30:19 +1000
Subject: [PATCH 1052/1063] [CRYPTO] hmac: Fix error truncation by unlikely()

The error return values are truncated by unlikely so we need to
save it first.  Thanks to Kyle Moffett for spotting this.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 crypto/hmac.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/crypto/hmac.c b/crypto/hmac.c
index d52b234835cf1..b521bcd2b2c60 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -92,13 +92,17 @@ static int hmac_init(struct hash_desc *pdesc)
 	struct hmac_ctx *ctx = align_ptr(ipad + bs * 2 + ds, sizeof(void *));
 	struct hash_desc desc;
 	struct scatterlist tmp;
+	int err;
 
 	desc.tfm = ctx->child;
 	desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
 	sg_set_buf(&tmp, ipad, bs);
 
-	return unlikely(crypto_hash_init(&desc)) ?:
-	       crypto_hash_update(&desc, &tmp, bs);
+	err = crypto_hash_init(&desc);
+	if (unlikely(err))
+		return err;
+
+	return crypto_hash_update(&desc, &tmp, bs);
 }
 
 static int hmac_update(struct hash_desc *pdesc,
@@ -123,13 +127,17 @@ static int hmac_final(struct hash_desc *pdesc, u8 *out)
 	struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *));
 	struct hash_desc desc;
 	struct scatterlist tmp;
+	int err;
 
 	desc.tfm = ctx->child;
 	desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
 	sg_set_buf(&tmp, opad, bs + ds);
 
-	return unlikely(crypto_hash_final(&desc, digest)) ?:
-	       crypto_hash_digest(&desc, &tmp, bs + ds, out);
+	err = crypto_hash_final(&desc, digest);
+	if (unlikely(err))
+		return err;
+
+	return crypto_hash_digest(&desc, &tmp, bs + ds, out);
 }
 
 static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
@@ -145,6 +153,7 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
 	struct hash_desc desc;
 	struct scatterlist sg1[2];
 	struct scatterlist sg2[1];
+	int err;
 
 	desc.tfm = ctx->child;
 	desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
@@ -154,8 +163,11 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
 	sg1[1].length = 0;
 	sg_set_buf(sg2, opad, bs + ds);
 
-	return unlikely(crypto_hash_digest(&desc, sg1, nbytes + bs, digest)) ?:
-	       crypto_hash_digest(&desc, sg2, bs + ds, out);
+	err = crypto_hash_digest(&desc, sg1, nbytes + bs, digest);
+	if (unlikely(err))
+		return err;
+
+	return crypto_hash_digest(&desc, sg2, bs + ds, out);
 }
 
 static int hmac_init_tfm(struct crypto_tfm *tfm)
-- 
GitLab


From 65101355450df2d935f8d56ac3abef279f28a0e2 Mon Sep 17 00:00:00 2001
From: Mark Haverkamp <markh@osdl.org>
Date: Tue, 19 Sep 2006 08:59:23 -0700
Subject: [PATCH 1053/1063] [SCSI] aacraid: misc cleanup

Received from Mark Salyzyn:

Basically cleanup, nothing here will have an affect. Adjusting some
error codes, removing superfluous definitions and code fragments.

Signed-off-by: Mark Haverkamp <markh@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aacraid/aachba.c  | 8 ++++----
 drivers/scsi/aacraid/aacraid.h | 1 -
 drivers/scsi/aacraid/commsup.c | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 37c55ddce2143..b14f7cac30e98 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -1803,7 +1803,7 @@ static int query_disk(struct aac_dev *dev, void __user *arg)
 
 	fsa_dev_ptr = dev->fsa_dev;
 	if (!fsa_dev_ptr)
-		return -ENODEV;
+		return -EBUSY;
 	if (copy_from_user(&qd, arg, sizeof (struct aac_query_disk)))
 		return -EFAULT;
 	if (qd.cnum == -1)
@@ -1842,6 +1842,8 @@ static int force_delete_disk(struct aac_dev *dev, void __user *arg)
 	struct fsa_dev_info *fsa_dev_ptr;
 
 	fsa_dev_ptr = dev->fsa_dev;
+	if (!fsa_dev_ptr)
+		return -EBUSY;
 
 	if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk)))
 		return -EFAULT;
@@ -1866,9 +1868,7 @@ static int delete_disk(struct aac_dev *dev, void __user *arg)
 
 	fsa_dev_ptr = dev->fsa_dev;
 	if (!fsa_dev_ptr)
-		return -ENODEV;
-	if (!fsa_dev_ptr)
-		return -ENODEV;
+		return -EBUSY;
 
 	if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk)))
 		return -EFAULT;
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 8924c183d9c3f..e5f7be6f3cd68 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1525,7 +1525,6 @@ struct aac_get_name {
 	__le32		count;	/* sizeof(((struct aac_get_name_resp *)NULL)->data) */
 };
 
-#define CT_OK        218
 struct aac_get_name_resp {
 	__le32		dummy0;
 	__le32		dummy1;
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 53add53be0bde..907161d6e92cc 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -467,7 +467,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 	dprintk((KERN_DEBUG "  fib being sent=%p\n",fibptr));
 
 	if (!dev->queues)
-		return -ENODEV;
+		return -EBUSY;
 	q = &dev->queues->queue[AdapNormCmdQueue];
 
 	if(wait)
-- 
GitLab


From 653ba58d55feb708c6f97e6f3e84901b3a03c9c0 Mon Sep 17 00:00:00 2001
From: Mark Haverkamp <markh@osdl.org>
Date: Tue, 19 Sep 2006 08:59:43 -0700
Subject: [PATCH 1054/1063] [SCSI] aacraid: expose physical devices

Received from Mark Salyzyn:

I am placing this functionality into an insmod parameter. Normally the physical
components are exported to sg, and are blocked from showing up in sd.

Note that the pass-through I/O path via the driver through the Firmware to the
physical disks is not an optimized path, the card is designed for Hardware
RAID, elevator sorting and caching. This should not be used as a means for
utilizing the aacraid based controllers as a generic scsi/SATA/SAS controller,
performance should suck by a few percentage points, any RAID meta-data on the
drives will confuse the controller about who owns the drives and there is a
high risk of destroying content in both directions. Unreliable and for
experimentation or strange controlled circumstances only.

Signed-off-by: Mark Haverkamp <markh@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aacraid/aachba.c | 6 +++++-
 drivers/scsi/aacraid/linit.c  | 5 ++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index b14f7cac30e98..ac108f9e26741 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -169,6 +169,10 @@ MODULE_PARM_DESC(numacb, "Request a limit to the number of adapter control block
 int acbsize = -1;
 module_param(acbsize, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. Valid values are 512, 2048, 4096 and 8192. Default is to use suggestion from Firmware.");
+
+int expose_physicals = 0;
+module_param(expose_physicals, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. 0=off, 1=on");
 /**
  *	aac_get_config_status	-	check the adapter configuration
  *	@common: adapter to query
@@ -1535,7 +1539,7 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
 				return 0;
 			}
 		} else {  /* check for physical non-dasd devices */
-			if(dev->nondasd_support == 1){
+			if ((dev->nondasd_support == 1) || expose_physicals) {
 				if (dev->in_reset)
 					return -1;
 				return aac_send_srb_fib(scsicmd);
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index d67058f80816b..6e4eafa4eceee 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -82,6 +82,8 @@ static LIST_HEAD(aac_devices);
 static int aac_cfg_major = -1;
 char aac_driver_version[] = AAC_DRIVER_FULL_VERSION;
 
+extern int expose_physicals;
+
 /*
  * Because of the way Linux names scsi devices, the order in this table has
  * become important.  Check for on-board Raid first, add-in cards second.
@@ -394,6 +396,7 @@ static int aac_slave_configure(struct scsi_device *sdev)
 		sdev->skip_ms_page_3f = 1;
 	}
 	if ((sdev->type == TYPE_DISK) &&
+			!expose_physicals &&
 			(sdev_channel(sdev) != CONTAINER_CHANNEL)) {
 		struct aac_dev *aac = (struct aac_dev *)sdev->host->hostdata;
 		if (!aac->raid_scsi_mode || (sdev_channel(sdev) != 2))
@@ -928,7 +931,7 @@ static int __devinit aac_probe_one(struct pci_dev *pdev,
 	 * all containers are on the virtual channel 0 (CONTAINER_CHANNEL)
 	 * physical channels are address by their actual physical number+1
 	 */
-	if (aac->nondasd_support == 1)
+	if ((aac->nondasd_support == 1) || expose_physicals)
 		shost->max_channel = aac->maximum_num_channels;
 	else
 		shost->max_channel = 0;
-- 
GitLab


From 76a7f8fdc0c2381ae1ba55ef71837712223ecb3c Mon Sep 17 00:00:00 2001
From: Mark Haverkamp <markh@osdl.org>
Date: Tue, 19 Sep 2006 09:00:02 -0700
Subject: [PATCH 1055/1063] [SCSI] aacraid: merge rx and rkt code

Received from Mark Salyzyn:

The only real difference between the rkt and rx platform modules is the
offset of the message registers. This patch recognizes this similarity
and simplifies the driver to reduce it's code footprint and to improve
maintainability by reducing the code duplication.

Visibly, the 'rkt.c' portion of this patch looks more complicated than
it really is. View it as retaining the rkt-only specifics of the
interface.

Signed-off-by: Mark Haverkamp <markh@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aacraid/aacraid.h  |  14 +-
 drivers/scsi/aacraid/comminit.c |  11 +-
 drivers/scsi/aacraid/linit.c    |  12 +-
 drivers/scsi/aacraid/rkt.c      | 461 ++------------------------------
 drivers/scsi/aacraid/rx.c       |  90 +++++--
 drivers/scsi/aacraid/sa.c       |  21 ++
 6 files changed, 112 insertions(+), 497 deletions(-)

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index e5f7be6f3cd68..eb3ed91bac796 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -494,6 +494,7 @@ struct adapter_ops
 	int  (*adapter_sync_cmd)(struct aac_dev *dev, u32 command, u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6, u32 *status, u32 *r1, u32 *r2, u32 *r3, u32 *r4);
 	int  (*adapter_check_health)(struct aac_dev *dev);
 	int  (*adapter_send)(struct fib * fib);
+	int  (*adapter_ioremap)(struct aac_dev * dev, u32 size);
 };
 
 /*
@@ -682,14 +683,6 @@ struct rx_inbound {
 	__le32	Mailbox[8];
 };
 
-#define	InboundMailbox0		IndexRegs.Mailbox[0]
-#define	InboundMailbox1		IndexRegs.Mailbox[1]
-#define	InboundMailbox2		IndexRegs.Mailbox[2]
-#define	InboundMailbox3		IndexRegs.Mailbox[3]
-#define	InboundMailbox4		IndexRegs.Mailbox[4]
-#define	InboundMailbox5		IndexRegs.Mailbox[5]
-#define	InboundMailbox6		IndexRegs.Mailbox[6]
-
 #define	INBOUNDDOORBELL_0	0x00000001
 #define INBOUNDDOORBELL_1	0x00000002
 #define INBOUNDDOORBELL_2	0x00000004
@@ -1010,6 +1003,8 @@ struct aac_dev
 		struct rx_registers __iomem *rx;
 		struct rkt_registers __iomem *rkt;
 	} regs;
+	volatile void __iomem *base;
+	volatile struct rx_inbound __iomem *IndexRegs;
 	u32			OIMR; /* Mask Register Cache */
 	/*
 	 *	AIF thread states
@@ -1050,6 +1045,9 @@ struct aac_dev
 #define aac_adapter_send(fib) \
 	((fib)->dev)->a_ops.adapter_send(fib)
 
+#define aac_adapter_ioremap(dev, size) \
+	(dev)->a_ops.adapter_ioremap(dev, size)
+
 #define FIB_CONTEXT_FLAG_TIMED_OUT		(0x00000001)
 
 /*
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index 87a9550967615..d5cf8b91a0e7a 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -307,17 +307,12 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
 		if (status[1] & AAC_OPT_NEW_COMM)
 			dev->new_comm_interface = dev->a_ops.adapter_send != 0;
 		if (dev->new_comm_interface && (status[2] > dev->base_size)) {
-			iounmap(dev->regs.sa);
+			aac_adapter_ioremap(dev, 0);
 			dev->base_size = status[2];
-			dprintk((KERN_DEBUG "ioremap(%lx,%d)\n",
-			  host->base, status[2]));
-			dev->regs.sa = ioremap(host->base, status[2]);
-			if (dev->regs.sa == NULL) {
+			if (aac_adapter_ioremap(dev, status[2])) {
 				/* remap failed, go back ... */
 				dev->new_comm_interface = 0;
-				dev->regs.sa = ioremap(host->base, 
-						AAC_MIN_FOOTPRINT_SIZE);
-				if (dev->regs.sa == NULL) {	
+				if (aac_adapter_ioremap(dev, AAC_MIN_FOOTPRINT_SIZE)) {
 					printk(KERN_WARNING
 					  "aacraid: unable to map adapter.\n");
 					return NULL;
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 6e4eafa4eceee..359e7ddfdb471 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -867,13 +867,6 @@ static int __devinit aac_probe_one(struct pci_dev *pdev,
 	 *	Map in the registers from the adapter.
 	 */
 	aac->base_size = AAC_MIN_FOOTPRINT_SIZE;
-	if ((aac->regs.sa = ioremap(
-	  (unsigned long)aac->scsi_host_ptr->base, AAC_MIN_FOOTPRINT_SIZE))
-	  == NULL) {	
-		printk(KERN_WARNING "%s: unable to map adapter.\n",
-		  AAC_DRIVERNAME);
-		goto out_free_fibs;
-	}
 	if ((*aac_drivers[index].init)(aac))
 		goto out_unmap;
 
@@ -972,8 +965,7 @@ static int __devinit aac_probe_one(struct pci_dev *pdev,
 	aac_fib_map_free(aac);
 	pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys);
 	kfree(aac->queues);
-	iounmap(aac->regs.sa);
- out_free_fibs:
+	aac_adapter_ioremap(aac, 0);
 	kfree(aac->fibs);
 	kfree(aac->fsa_dev);
  out_free_host:
@@ -1008,7 +1000,7 @@ static void __devexit aac_remove_one(struct pci_dev *pdev)
 	kfree(aac->queues);
 
 	free_irq(pdev->irq, aac);
-	iounmap(aac->regs.sa);
+	aac_adapter_ioremap(aac, 0);
 	
 	kfree(aac->fibs);
 	kfree(aac->fsa_dev);
diff --git a/drivers/scsi/aacraid/rkt.c b/drivers/scsi/aacraid/rkt.c
index f850c3a7cce97..643f23b5ded88 100644
--- a/drivers/scsi/aacraid/rkt.c
+++ b/drivers/scsi/aacraid/rkt.c
@@ -28,389 +28,27 @@
  *
  */
 
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
 #include <linux/blkdev.h>
-#include <linux/delay.h>
-#include <linux/completion.h>
-#include <linux/time.h>
-#include <linux/interrupt.h>
-#include <asm/semaphore.h>
 
 #include <scsi/scsi_host.h>
 
 #include "aacraid.h"
 
-static irqreturn_t aac_rkt_intr(int irq, void *dev_id, struct pt_regs *regs)
-{
-	struct aac_dev *dev = dev_id;
-
-	if (dev->new_comm_interface) {
-		u32 Index = rkt_readl(dev, MUnit.OutboundQueue);
-		if (Index == 0xFFFFFFFFL)
-			Index = rkt_readl(dev, MUnit.OutboundQueue);
-		if (Index != 0xFFFFFFFFL) {
-			do {
-				if (aac_intr_normal(dev, Index)) {
-					rkt_writel(dev, MUnit.OutboundQueue, Index);
-					rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormRespReady);
-				}
-				Index = rkt_readl(dev, MUnit.OutboundQueue);
-			} while (Index != 0xFFFFFFFFL);
-			return IRQ_HANDLED;
-		}
-	} else {
-		unsigned long bellbits;
-		u8 intstat;
-		intstat = rkt_readb(dev, MUnit.OISR);
-		/*
-		 *	Read mask and invert because drawbridge is reversed.
-		 *	This allows us to only service interrupts that have 
-		 *	been enabled.
-		 *	Check to see if this is our interrupt.  If it isn't just return
-		 */
-		if (intstat & ~(dev->OIMR))
-		{
-			bellbits = rkt_readl(dev, OutboundDoorbellReg);
-			if (bellbits & DoorBellPrintfReady) {
-				aac_printf(dev, rkt_readl (dev, IndexRegs.Mailbox[5]));
-				rkt_writel(dev, MUnit.ODR,DoorBellPrintfReady);
-				rkt_writel(dev, InboundDoorbellReg,DoorBellPrintfDone);
-			}
-			else if (bellbits & DoorBellAdapterNormCmdReady) {
-				rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady);
-				aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
-//				rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady);
-			}
-			else if (bellbits & DoorBellAdapterNormRespReady) {
-				rkt_writel(dev, MUnit.ODR,DoorBellAdapterNormRespReady);
-				aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
-			}
-			else if (bellbits & DoorBellAdapterNormCmdNotFull) {
-				rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
-			}
-			else if (bellbits & DoorBellAdapterNormRespNotFull) {
-				rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
-				rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormRespNotFull);
-			}
-			return IRQ_HANDLED;
-		}
-	}
-	return IRQ_NONE;
-}
-
-/**
- *	aac_rkt_disable_interrupt	-	Disable interrupts
- *	@dev: Adapter
- */
-
-static void aac_rkt_disable_interrupt(struct aac_dev *dev)
-{
-	rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
-}
-
-/**
- *	rkt_sync_cmd	-	send a command and wait
- *	@dev: Adapter
- *	@command: Command to execute
- *	@p1: first parameter
- *	@ret: adapter status
- *
- *	This routine will send a synchronous command to the adapter and wait 
- *	for its	completion.
- */
-
-static int rkt_sync_cmd(struct aac_dev *dev, u32 command,
-	u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6,
-	u32 *status, u32 *r1, u32 *r2, u32 *r3, u32 *r4)
-{
-	unsigned long start;
-	int ok;
-	/*
-	 *	Write the command into Mailbox 0
-	 */
-	rkt_writel(dev, InboundMailbox0, command);
-	/*
-	 *	Write the parameters into Mailboxes 1 - 6
-	 */
-	rkt_writel(dev, InboundMailbox1, p1);
-	rkt_writel(dev, InboundMailbox2, p2);
-	rkt_writel(dev, InboundMailbox3, p3);
-	rkt_writel(dev, InboundMailbox4, p4);
-	/*
-	 *	Clear the synch command doorbell to start on a clean slate.
-	 */
-	rkt_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
-	/*
-	 *	Disable doorbell interrupts
-	 */
-	rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
-	/*
-	 *	Force the completion of the mask register write before issuing
-	 *	the interrupt.
-	 */
-	rkt_readb (dev, MUnit.OIMR);
-	/*
-	 *	Signal that there is a new synch command
-	 */
-	rkt_writel(dev, InboundDoorbellReg, INBOUNDDOORBELL_0);
-
-	ok = 0;
-	start = jiffies;
-
-	/*
-	 *	Wait up to 30 seconds
-	 */
-	while (time_before(jiffies, start+30*HZ)) 
-	{
-		udelay(5);	/* Delay 5 microseconds to let Mon960 get info. */
-		/*
-		 *	Mon960 will set doorbell0 bit when it has completed the command.
-		 */
-		if (rkt_readl(dev, OutboundDoorbellReg) & OUTBOUNDDOORBELL_0) {
-			/*
-			 *	Clear the doorbell.
-			 */
-			rkt_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
-			ok = 1;
-			break;
-		}
-		/*
-		 *	Yield the processor in case we are slow 
-		 */
-		msleep(1);
-	}
-	if (ok != 1) {
-		/*
-		 *	Restore interrupt mask even though we timed out
-		 */
-		if (dev->new_comm_interface)
-			rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
-		else
-			rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
-		return -ETIMEDOUT;
-	}
-	/*
-	 *	Pull the synch status from Mailbox 0.
-	 */
-	if (status)
-		*status = rkt_readl(dev, IndexRegs.Mailbox[0]);
-	if (r1)
-		*r1 = rkt_readl(dev, IndexRegs.Mailbox[1]);
-	if (r2)
-		*r2 = rkt_readl(dev, IndexRegs.Mailbox[2]);
-	if (r3)
-		*r3 = rkt_readl(dev, IndexRegs.Mailbox[3]);
-	if (r4)
-		*r4 = rkt_readl(dev, IndexRegs.Mailbox[4]);
-	/*
-	 *	Clear the synch command doorbell.
-	 */
-	rkt_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
-	/*
-	 *	Restore interrupt mask
-	 */
-	if (dev->new_comm_interface)
-		rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
-	else
-		rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
-	return 0;
-
-}
-
-/**
- *	aac_rkt_interrupt_adapter	-	interrupt adapter
- *	@dev: Adapter
- *
- *	Send an interrupt to the i960 and breakpoint it.
- */
-
-static void aac_rkt_interrupt_adapter(struct aac_dev *dev)
-{
-	rkt_sync_cmd(dev, BREAKPOINT_REQUEST, 0, 0, 0, 0, 0, 0,
-	  NULL, NULL, NULL, NULL, NULL);
-}
-
 /**
- *	aac_rkt_notify_adapter		-	send an event to the adapter
- *	@dev: Adapter
- *	@event: Event to send
+ *	aac_rkt_ioremap
+ *	@size: mapping resize request
  *
- *	Notify the i960 that something it probably cares about has
- *	happened.
  */
-
-static void aac_rkt_notify_adapter(struct aac_dev *dev, u32 event)
+static int aac_rkt_ioremap(struct aac_dev * dev, u32 size)
 {
-	switch (event) {
-
-	case AdapNormCmdQue:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_1);
-		break;
-	case HostNormRespNotFull:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_4);
-		break;
-	case AdapNormRespQue:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_2);
-		break;
-	case HostNormCmdNotFull:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_3);
-		break;
-	case HostShutdown:
-//		rkt_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, 0, 0,
-//		  NULL, NULL, NULL, NULL, NULL);
-		break;
-	case FastIo:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_6);
-		break;
-	case AdapPrintfDone:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_5);
-		break;
-	default:
-		BUG();
-		break;
+	if (!size) {
+		iounmap(dev->regs.rkt);
+		return 0;
 	}
-}
-
-/**
- *	aac_rkt_start_adapter		-	activate adapter
- *	@dev:	Adapter
- *
- *	Start up processing on an i960 based AAC adapter
- */
-
-static void aac_rkt_start_adapter(struct aac_dev *dev)
-{
-	struct aac_init *init;
-
-	init = dev->init;
-	init->HostElapsedSeconds = cpu_to_le32(get_seconds());
-	// We can only use a 32 bit address here
-	rkt_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa,
-	  0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL);
-}
-
-/**
- *	aac_rkt_check_health
- *	@dev: device to check if healthy
- *
- *	Will attempt to determine if the specified adapter is alive and
- *	capable of handling requests, returning 0 if alive.
- */
-static int aac_rkt_check_health(struct aac_dev *dev)
-{
-	u32 status = rkt_readl(dev, MUnit.OMRx[0]);
-
-	/*
-	 *	Check to see if the board failed any self tests.
-	 */
-	if (status & SELF_TEST_FAILED)
+	dev->base = dev->regs.rkt = ioremap(dev->scsi_host_ptr->base, size);
+	if (dev->base == NULL)
 		return -1;
-	/*
-	 *	Check to see if the board panic'd.
-	 */
-	if (status & KERNEL_PANIC) {
-		char * buffer;
-		struct POSTSTATUS {
-			__le32 Post_Command;
-			__le32 Post_Address;
-		} * post;
-		dma_addr_t paddr, baddr;
-		int ret;
-
-		if ((status & 0xFF000000L) == 0xBC000000L)
-			return (status >> 16) & 0xFF;
-		buffer = pci_alloc_consistent(dev->pdev, 512, &baddr);
-		ret = -2;
-		if (buffer == NULL)
-			return ret;
-		post = pci_alloc_consistent(dev->pdev,
-		  sizeof(struct POSTSTATUS), &paddr);
-		if (post == NULL) {
-			pci_free_consistent(dev->pdev, 512, buffer, baddr);
-			return ret;
-		}
-                memset(buffer, 0, 512);
-		post->Post_Command = cpu_to_le32(COMMAND_POST_RESULTS);
-                post->Post_Address = cpu_to_le32(baddr);
-                rkt_writel(dev, MUnit.IMRx[0], paddr);
-                rkt_sync_cmd(dev, COMMAND_POST_RESULTS, baddr, 0, 0, 0, 0, 0,
-		  NULL, NULL, NULL, NULL, NULL);
-		pci_free_consistent(dev->pdev, sizeof(struct POSTSTATUS),
-		  post, paddr);
-                if ((buffer[0] == '0') && ((buffer[1] == 'x') || (buffer[1] == 'X'))) {
-                        ret = (buffer[2] <= '9') ? (buffer[2] - '0') : (buffer[2] - 'A' + 10);
-                        ret <<= 4;
-                        ret += (buffer[3] <= '9') ? (buffer[3] - '0') : (buffer[3] - 'A' + 10);
-                }
-		pci_free_consistent(dev->pdev, 512, buffer, baddr);
-                return ret;
-        }
-	/*
-	 *	Wait for the adapter to be up and running.
-	 */
-	if (!(status & KERNEL_UP_AND_RUNNING))
-		return -3;
-	/*
-	 *	Everything is OK
-	 */
-	return 0;
-}
-
-/**
- *	aac_rkt_send
- *	@fib: fib to issue
- *
- *	Will send a fib, returning 0 if successful.
- */
-static int aac_rkt_send(struct fib * fib)
-{
-	u64 addr = fib->hw_fib_pa;
-	struct aac_dev *dev = fib->dev;
-	volatile void __iomem *device = dev->regs.rkt;
-	u32 Index;
-
-	dprintk((KERN_DEBUG "%p->aac_rkt_send(%p->%llx)\n", dev, fib, addr));
-	Index = rkt_readl(dev, MUnit.InboundQueue);
-	if (Index == 0xFFFFFFFFL)
-		Index = rkt_readl(dev, MUnit.InboundQueue);
-	dprintk((KERN_DEBUG "Index = 0x%x\n", Index));
-	if (Index == 0xFFFFFFFFL)
-		return Index;
-	device += Index;
-	dprintk((KERN_DEBUG "entry = %x %x %u\n", (u32)(addr & 0xffffffff),
-	  (u32)(addr >> 32), (u32)le16_to_cpu(fib->hw_fib->header.Size)));
-	writel((u32)(addr & 0xffffffff), device);
-	device += sizeof(u32);
-	writel((u32)(addr >> 32), device);
-	device += sizeof(u32);
-	writel(le16_to_cpu(fib->hw_fib->header.Size), device);
-	rkt_writel(dev, MUnit.InboundQueue, Index);
-	dprintk((KERN_DEBUG "aac_rkt_send - return 0\n"));
-	return 0;
-}
-
-static int aac_rkt_restart_adapter(struct aac_dev *dev)
-{
-	u32 var;
-
-	printk(KERN_ERR "%s%d: adapter kernel panic'd.\n",
-			dev->name, dev->id);
-
-	if (aac_rkt_check_health(dev) <= 0)
-		return 1;
-	if (rkt_sync_cmd(dev, IOP_RESET, 0, 0, 0, 0, 0, 0,
-			&var, NULL, NULL, NULL, NULL))
-		return 1;
-	if (var != 0x00000001)
-		 return 1;
-	if (rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC)
-		return 1;
+	dev->IndexRegs = &dev->regs.rkt->IndexRegs;
 	return 0;
 }
 
@@ -425,74 +63,18 @@ static int aac_rkt_restart_adapter(struct aac_dev *dev)
 
 int aac_rkt_init(struct aac_dev *dev)
 {
-	unsigned long start;
-	unsigned long status;
-	int instance;
-	const char * name;
-
-	instance = dev->id;
-	name     = dev->name;
+	int retval;
+	extern int _aac_rx_init(struct aac_dev *dev);
+	extern void aac_rx_start_adapter(struct aac_dev *dev);
 
-	/*
-	 *	Check to see if the board panic'd while booting.
-	 */
-	if (rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC)
-		if (aac_rkt_restart_adapter(dev))
-			goto error_iounmap;
-	/*
-	 *	Check to see if the board failed any self tests.
-	 */
-	if (rkt_readl(dev, MUnit.OMRx[0]) & SELF_TEST_FAILED) {
-		printk(KERN_ERR "%s%d: adapter self-test failed.\n", dev->name, instance);
-		goto error_iounmap;
-	}
-	/*
-	 *	Check to see if the monitor panic'd while booting.
-	 */
-	if (rkt_readl(dev, MUnit.OMRx[0]) & MONITOR_PANIC) {
-		printk(KERN_ERR "%s%d: adapter monitor panic.\n", dev->name, instance);
-		goto error_iounmap;
-	}
-	start = jiffies;
-	/*
-	 *	Wait for the adapter to be up and running. Wait up to 3 minutes
-	 */
-	while (!(rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_UP_AND_RUNNING))
-	{
-		if(time_after(jiffies, start+startup_timeout*HZ))
-		{
-			status = rkt_readl(dev, MUnit.OMRx[0]);
-			printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %lx.\n", 
-					dev->name, instance, status);
-			goto error_iounmap;
-		}
-		msleep(1);
-	}
-	if (request_irq(dev->scsi_host_ptr->irq, aac_rkt_intr, IRQF_SHARED|IRQF_DISABLED, "aacraid", (void *)dev)<0)
-	{
-		printk(KERN_ERR "%s%d: Interrupt unavailable.\n", name, instance);
-		goto error_iounmap;
-	}
 	/*
 	 *	Fill in the function dispatch table.
 	 */
-	dev->a_ops.adapter_interrupt = aac_rkt_interrupt_adapter;
-	dev->a_ops.adapter_disable_int = aac_rkt_disable_interrupt;
-	dev->a_ops.adapter_notify = aac_rkt_notify_adapter;
-	dev->a_ops.adapter_sync_cmd = rkt_sync_cmd;
-	dev->a_ops.adapter_check_health = aac_rkt_check_health;
-	dev->a_ops.adapter_send = aac_rkt_send;
-
-	/*
-	 *	First clear out all interrupts.  Then enable the one's that we
-	 *	can handle.
-	 */
-	rkt_writeb(dev, MUnit.OIMR, 0xff);
-	rkt_writel(dev, MUnit.ODR, 0xffffffff);
-	rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
+	dev->a_ops.adapter_ioremap = aac_rkt_ioremap;
 
-	if (aac_init_adapter(dev) == NULL)
-		goto error_irq;
+	retval = _aac_rx_init(dev);
+	if (retval)
+		return retval;
 	if (dev->new_comm_interface) {
 		/*
 		 * FIB Setup has already been done, but we can minimize the
@@ -509,20 +91,11 @@ int aac_rkt_init(struct aac_dev *dev)
 			dev->init->MaxIoCommands = cpu_to_le32(246);
 			dev->scsi_host_ptr->can_queue = 246 - AAC_NUM_MGT_FIB;
 		}
-		rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
 	}
 	/*
 	 *	Tell the adapter that all is configured, and it can start
 	 *	accepting requests
 	 */
-	aac_rkt_start_adapter(dev);
+	aac_rx_start_adapter(dev);
 	return 0;
-
-error_irq:
-	rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
-	free_irq(dev->scsi_host_ptr->irq, (void *)dev);
-
-error_iounmap:
-
-	return -1;
 }
diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index c715c4b2442db..a1d214d770eb6 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c
@@ -79,7 +79,7 @@ static irqreturn_t aac_rx_intr(int irq, void *dev_id, struct pt_regs *regs)
 		{
 			bellbits = rx_readl(dev, OutboundDoorbellReg);
 			if (bellbits & DoorBellPrintfReady) {
-				aac_printf(dev, rx_readl (dev, IndexRegs.Mailbox[5]));
+				aac_printf(dev, readl (&dev->IndexRegs->Mailbox[5]));
 				rx_writel(dev, MUnit.ODR,DoorBellPrintfReady);
 				rx_writel(dev, InboundDoorbellReg,DoorBellPrintfDone);
 			}
@@ -134,14 +134,14 @@ static int rx_sync_cmd(struct aac_dev *dev, u32 command,
 	/*
 	 *	Write the command into Mailbox 0
 	 */
-	rx_writel(dev, InboundMailbox0, command);
+	writel(command, &dev->IndexRegs->Mailbox[0]);
 	/*
 	 *	Write the parameters into Mailboxes 1 - 6
 	 */
-	rx_writel(dev, InboundMailbox1, p1);
-	rx_writel(dev, InboundMailbox2, p2);
-	rx_writel(dev, InboundMailbox3, p3);
-	rx_writel(dev, InboundMailbox4, p4);
+	writel(p1, &dev->IndexRegs->Mailbox[1]);
+	writel(p2, &dev->IndexRegs->Mailbox[2]);
+	writel(p3, &dev->IndexRegs->Mailbox[3]);
+	writel(p4, &dev->IndexRegs->Mailbox[4]);
 	/*
 	 *	Clear the synch command doorbell to start on a clean slate.
 	 */
@@ -199,15 +199,15 @@ static int rx_sync_cmd(struct aac_dev *dev, u32 command,
 	 *	Pull the synch status from Mailbox 0.
 	 */
 	if (status)
-		*status = rx_readl(dev, IndexRegs.Mailbox[0]);
+		*status = readl(&dev->IndexRegs->Mailbox[0]);
 	if (r1)
-		*r1 = rx_readl(dev, IndexRegs.Mailbox[1]);
+		*r1 = readl(&dev->IndexRegs->Mailbox[1]);
 	if (r2)
-		*r2 = rx_readl(dev, IndexRegs.Mailbox[2]);
+		*r2 = readl(&dev->IndexRegs->Mailbox[2]);
 	if (r3)
-		*r3 = rx_readl(dev, IndexRegs.Mailbox[3]);
+		*r3 = readl(&dev->IndexRegs->Mailbox[3]);
 	if (r4)
-		*r4 = rx_readl(dev, IndexRegs.Mailbox[4]);
+		*r4 = readl(&dev->IndexRegs->Mailbox[4]);
 	/*
 	 *	Clear the synch command doorbell.
 	 */
@@ -261,8 +261,6 @@ static void aac_rx_notify_adapter(struct aac_dev *dev, u32 event)
 		rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_3);
 		break;
 	case HostShutdown:
-//		rx_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, 0, 0,
-//		  NULL, NULL, NULL, NULL, NULL);
 		break;
 	case FastIo:
 		rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_6);
@@ -283,7 +281,7 @@ static void aac_rx_notify_adapter(struct aac_dev *dev, u32 event)
  *	Start up processing on an i960 based AAC adapter
  */
 
-static void aac_rx_start_adapter(struct aac_dev *dev)
+void aac_rx_start_adapter(struct aac_dev *dev)
 {
 	struct aac_init *init;
 
@@ -381,7 +379,7 @@ static int aac_rx_send(struct fib * fib)
 	dprintk((KERN_DEBUG "Index = 0x%x\n", Index));
 	if (Index == 0xFFFFFFFFL)
 		return Index;
-	device += Index;
+	device = dev->base + Index;
 	dprintk((KERN_DEBUG "entry = %x %x %u\n", (u32)(addr & 0xffffffff),
 	  (u32)(addr >> 32), (u32)le16_to_cpu(fib->hw_fib->header.Size)));
 	writel((u32)(addr & 0xffffffff), device);
@@ -394,6 +392,24 @@ static int aac_rx_send(struct fib * fib)
 	return 0;
 }
 
+/**
+ *	aac_rx_ioremap
+ *	@size: mapping resize request
+ *
+ */
+static int aac_rx_ioremap(struct aac_dev * dev, u32 size)
+{
+	if (!size) {
+		iounmap(dev->regs.rx);
+		return 0;
+	}
+	dev->base = dev->regs.rx = ioremap(dev->scsi_host_ptr->base, size);
+	if (dev->base == NULL)
+		return -1;
+	dev->IndexRegs = &dev->regs.rx->IndexRegs;
+	return 0;
+}
+
 static int aac_rx_restart_adapter(struct aac_dev *dev)
 {
 	u32 var;
@@ -422,7 +438,7 @@ static int aac_rx_restart_adapter(struct aac_dev *dev)
  *	to the comm region.
  */
 
-int aac_rx_init(struct aac_dev *dev)
+int _aac_rx_init(struct aac_dev *dev)
 {
 	unsigned long start;
 	unsigned long status;
@@ -432,23 +448,30 @@ int aac_rx_init(struct aac_dev *dev)
 	instance = dev->id;
 	name     = dev->name;
 
+	if (aac_adapter_ioremap(dev, dev->base_size)) {
+		printk(KERN_WARNING "%s: unable to map adapter.\n", name);
+		goto error_iounmap;
+	}
+
 	/*
 	 *	Check to see if the board panic'd while booting.
 	 */
-	if (rx_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC)
+	status = rx_readl(dev, MUnit.OMRx[0]);
+	if (status & KERNEL_PANIC)
 		if (aac_rx_restart_adapter(dev))
 			goto error_iounmap;
 	/*
 	 *	Check to see if the board failed any self tests.
 	 */
-	if (rx_readl(dev, MUnit.OMRx[0]) & SELF_TEST_FAILED) {
+	status = rx_readl(dev, MUnit.OMRx[0]);
+	if (status & SELF_TEST_FAILED) {
 		printk(KERN_ERR "%s%d: adapter self-test failed.\n", dev->name, instance);
 		goto error_iounmap;
 	}
 	/*
 	 *	Check to see if the monitor panic'd while booting.
 	 */
-	if (rx_readl(dev, MUnit.OMRx[0]) & MONITOR_PANIC) {
+	if (status & MONITOR_PANIC) {
 		printk(KERN_ERR "%s%d: adapter monitor panic.\n", dev->name, instance);
 		goto error_iounmap;
 	}
@@ -456,12 +479,10 @@ int aac_rx_init(struct aac_dev *dev)
 	/*
 	 *	Wait for the adapter to be up and running. Wait up to 3 minutes
 	 */
-	while ((!(rx_readl(dev, IndexRegs.Mailbox[7]) & KERNEL_UP_AND_RUNNING))
-		|| (!(rx_readl(dev, MUnit.OMRx[0]) & KERNEL_UP_AND_RUNNING)))
+	while (!((status = rx_readl(dev, MUnit.OMRx[0])) & KERNEL_UP_AND_RUNNING))
 	{
 		if(time_after(jiffies, start+startup_timeout*HZ))
 		{
-			status = rx_readl(dev, IndexRegs.Mailbox[7]);
 			printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %lx.\n", 
 					dev->name, instance, status);
 			goto error_iounmap;
@@ -496,11 +517,6 @@ int aac_rx_init(struct aac_dev *dev)
 	if (dev->new_comm_interface)
 		rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
 
-	/*
-	 *	Tell the adapter that all is configured, and it can start
-	 *	accepting requests
-	 */
-	aac_rx_start_adapter(dev);
 	return 0;
 
 error_irq:
@@ -511,3 +527,23 @@ int aac_rx_init(struct aac_dev *dev)
 
 	return -1;
 }
+
+int aac_rx_init(struct aac_dev *dev)
+{
+	int retval;
+
+	/*
+	 *	Fill in the function dispatch table.
+	 */
+	dev->a_ops.adapter_ioremap = aac_rx_ioremap;
+
+	retval = _aac_rx_init(dev);
+	if (!retval) {
+		/*
+		 *	Tell the adapter that all is configured, and it can
+		 * start accepting requests
+		 */
+		aac_rx_start_adapter(dev);
+	}
+	return retval;
+}
diff --git a/drivers/scsi/aacraid/sa.c b/drivers/scsi/aacraid/sa.c
index cd586cc8f9bef..f906ead239dd4 100644
--- a/drivers/scsi/aacraid/sa.c
+++ b/drivers/scsi/aacraid/sa.c
@@ -280,6 +280,21 @@ static int aac_sa_check_health(struct aac_dev *dev)
 	return 0;
 }
 
+/**
+ *	aac_sa_ioremap
+ *	@size: mapping resize request
+ *
+ */
+static int aac_sa_ioremap(struct aac_dev * dev, u32 size)
+{
+	if (!size) {
+		iounmap(dev->regs.sa);
+		return 0;
+	}
+	dev->base = dev->regs.sa = ioremap(dev->scsi_host_ptr->base, size);
+	return (dev->base == NULL) ? -1 : 0;
+}
+
 /**
  *	aac_sa_init	-	initialize an ARM based AAC card
  *	@dev: device to configure
@@ -299,6 +314,11 @@ int aac_sa_init(struct aac_dev *dev)
 	instance = dev->id;
 	name     = dev->name;
 
+	if (aac_sa_ioremap(dev, dev->base_size)) {
+		printk(KERN_WARNING "%s: unable to map adapter.\n", name);
+		goto error_iounmap;
+	}
+
 	/*
 	 *	Check to see if the board failed any self tests.
 	 */
@@ -341,6 +361,7 @@ int aac_sa_init(struct aac_dev *dev)
 	dev->a_ops.adapter_notify = aac_sa_notify_adapter;
 	dev->a_ops.adapter_sync_cmd = sa_sync_cmd;
 	dev->a_ops.adapter_check_health = aac_sa_check_health;
+	dev->a_ops.adapter_ioremap = aac_sa_ioremap;
 
 	/*
 	 *	First clear out all interrupts.  Then enable the one's that 
-- 
GitLab


From ac5826ca91243272f97b3f01e80d71e3618f105f Mon Sep 17 00:00:00 2001
From: Mark Haverkamp <markh@osdl.org>
Date: Tue, 19 Sep 2006 09:00:18 -0700
Subject: [PATCH 1056/1063] [SCSI] aacraid: remove scsi_remove_device

Received from Mark Salyzyn:

Until the system is stabilized, I am suggesting the enclosed
modification to prevent the driver from tickling the panic. Once sysfs
and friends are stabilized, the patch may be backed out. We have yet to
evaluate if we really want to relinquish existing Scsi Devices in any
case, holding on to them as configuration of arrays comes and goes makes
some sense as well. As a result, we have opted to pull the lines rather
than comment them in legacy.

Signed-off-by: Mark Haverkamp <markh@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/aacraid/commsup.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 907161d6e92cc..8734a045558e1 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1033,13 +1033,7 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
 	if (device) {
 		switch (device_config_needed) {
 		case DELETE:
-			scsi_remove_device(device);
-			break;
 		case CHANGE:
-			if (!dev->fsa_dev[container].valid) {
-				scsi_remove_device(device);
-				break;
-			}
 			scsi_rescan_device(&device->sdev_gendev);
 
 		default:
-- 
GitLab


From 2538363eb51a70948ed36bf3971a728268d10766 Mon Sep 17 00:00:00 2001
From: Mark Haverkamp <markh@osdl.org>
Date: Tue, 19 Sep 2006 09:00:39 -0700
Subject: [PATCH 1057/1063] [SCSI] aacraid: README update

Received from Mark Salyzyn:

This patch to the driver's documentation adds a few new product entries,
sorts the entries on OEM lines first for easy searching, followed by
product id order to make it easier to compare against the open source
pci list. The driver has 'family match' so is somewhat future proof, no
code changes are required to recognize the new products.

Signed-off-by: Mark Haverkamp <markh@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 Documentation/scsi/aacraid.txt | 53 +++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 23 deletions(-)

diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt
index be55670851a43..ee03678c80292 100644
--- a/Documentation/scsi/aacraid.txt
+++ b/Documentation/scsi/aacraid.txt
@@ -11,38 +11,43 @@ the original).
 Supported Cards/Chipsets
 -------------------------
 	PCI ID (pci.ids)	OEM	Product
-	9005:0285:9005:028a	Adaptec	2020ZCR (Skyhawk)
-	9005:0285:9005:028e	Adaptec	2020SA (Skyhawk)
-	9005:0285:9005:028b	Adaptec 2025ZCR (Terminator)
-	9005:0285:9005:028f	Adaptec 2025SA (Terminator)
-	9005:0285:9005:0286	Adaptec	2120S (Crusader)
-	9005:0286:9005:028d	Adaptec	2130S (Lancer)
+	9005:0283:9005:0283	Adaptec	Catapult (3210S with arc firmware)
+	9005:0284:9005:0284	Adaptec	Tomcat (3410S with arc firmware)
 	9005:0285:9005:0285	Adaptec	2200S (Vulcan)
+	9005:0285:9005:0286	Adaptec	2120S (Crusader)
 	9005:0285:9005:0287	Adaptec	2200S (Vulcan-2m)
+	9005:0285:9005:0288	Adaptec	3230S (Harrier)
+	9005:0285:9005:0289	Adaptec	3240S (Tornado)
+	9005:0285:9005:028a	Adaptec	2020ZCR (Skyhawk)
+	9005:0285:9005:028b	Adaptec 2025ZCR (Terminator)
 	9005:0286:9005:028c	Adaptec	2230S (Lancer)
 	9005:0286:9005:028c	Adaptec	2230SLP (Lancer)
-	9005:0285:9005:0296	Adaptec	2240S (SabreExpress)
+	9005:0286:9005:028d	Adaptec	2130S (Lancer)
+	9005:0285:9005:028e	Adaptec	2020SA (Skyhawk)
+	9005:0285:9005:028f	Adaptec 2025SA (Terminator)
 	9005:0285:9005:0290	Adaptec	2410SA (Jaguar)
-	9005:0285:9005:0293	Adaptec 21610SA (Corsair-16)
 	9005:0285:103c:3227	Adaptec 2610SA (Bearcat HP release)
+	9005:0285:9005:0293	Adaptec 21610SA (Corsair-16)
+	9005:0285:9005:0296	Adaptec	2240S (SabreExpress)
 	9005:0285:9005:0292	Adaptec	2810SA (Corsair-8)
 	9005:0285:9005:0294	Adaptec	Prowler
-	9005:0286:9005:029d	Adaptec	2420SA (Intruder HP release)
-	9005:0286:9005:029c	Adaptec	2620SA (Intruder)
-	9005:0286:9005:029b	Adaptec	2820SA (Intruder)
-	9005:0286:9005:02a7	Adaptec	2830SA (Skyray)
-	9005:0286:9005:02a8	Adaptec	2430SA (Skyray)
-	9005:0285:9005:0288	Adaptec	3230S (Harrier)
-	9005:0285:9005:0289	Adaptec	3240S (Tornado)
-	9005:0285:9005:0298	Adaptec	4000SAS (BlackBird)
 	9005:0285:9005:0297	Adaptec	4005SAS (AvonPark)
+	9005:0285:9005:0298	Adaptec	4000SAS (BlackBird)
 	9005:0285:9005:0299	Adaptec	4800SAS (Marauder-X)
 	9005:0285:9005:029a	Adaptec	4805SAS (Marauder-E)
+	9005:0286:9005:029b	Adaptec	2820SA (Intruder)
+	9005:0286:9005:029c	Adaptec	2620SA (Intruder)
+	9005:0286:9005:029d	Adaptec	2420SA (Intruder HP release)
 	9005:0286:9005:02a2	Adaptec	3800SAS (Hurricane44)
+	9005:0286:9005:02a7	Adaptec	3805SAS (Hurricane80)
+	9005:0286:9005:02a8	Adaptec	3400SAS (Hurricane40)
+	9005:0286:9005:02ac	Adaptec	1800SAS (Typhoon44)
+	9005:0286:9005:02b3	Adaptec	2400SAS (Hurricane40lm)
+	9005:0285:9005:02b5	Adaptec ASR5800 (Voodoo44)
+	9005:0285:9005:02b6	Adaptec ASR5805 (Voodoo80)
+	9005:0285:9005:02b7	Adaptec ASR5808 (Voodoo08)
 	1011:0046:9005:0364	Adaptec	5400S (Mustang)
 	1011:0046:9005:0365	Adaptec	5400S (Mustang)
-	9005:0283:9005:0283	Adaptec	Catapult (3210S with arc firmware)
-	9005:0284:9005:0284	Adaptec	Tomcat (3410S with arc firmware)
 	9005:0287:9005:0800	Adaptec	Themisto (Jupiter)
 	9005:0200:9005:0200	Adaptec	Themisto (Jupiter)
 	9005:0286:9005:0800	Adaptec	Callisto (Jupiter)
@@ -64,18 +69,20 @@ Supported Cards/Chipsets
 	9005:0285:9005:0290	IBM	ServeRAID 7t (Jaguar)
 	9005:0285:1014:02F2	IBM	ServeRAID 8i (AvonPark)
 	9005:0285:1014:0312	IBM	ServeRAID 8i (AvonParkLite)
-	9005:0286:1014:9580	IBM	ServeRAID 8k/8k-l8 (Aurora)
 	9005:0286:1014:9540	IBM	ServeRAID 8k/8k-l4 (AuroraLite)
-	9005:0286:9005:029f	ICP	ICP9014R0 (Lancer)
+	9005:0286:1014:9580	IBM	ServeRAID 8k/8k-l8 (Aurora)
+	9005:0286:1014:034d	IBM	ServeRAID 8s (Hurricane)
 	9005:0286:9005:029e	ICP	ICP9024R0 (Lancer)
+	9005:0286:9005:029f	ICP	ICP9014R0 (Lancer)
 	9005:0286:9005:02a0	ICP	ICP9047MA (Lancer)
 	9005:0286:9005:02a1	ICP	ICP9087MA (Lancer)
+	9005:0286:9005:02a3	ICP	ICP5445AU (Hurricane44)
 	9005:0286:9005:02a4	ICP	ICP9085LI (Marauder-X)
 	9005:0286:9005:02a5	ICP	ICP5085BR (Marauder-E)
-	9005:0286:9005:02a3	ICP	ICP5445AU (Hurricane44)
 	9005:0286:9005:02a6	ICP	ICP9067MA (Intruder-6)
-	9005:0286:9005:02a9	ICP	ICP5087AU (Skyray)
-	9005:0286:9005:02aa	ICP	ICP5047AU (Skyray)
+	9005:0286:9005:02a9	ICP	ICP5085AU (Hurricane80)
+	9005:0286:9005:02aa	ICP	ICP5045AU (Hurricane40)
+	9005:0286:9005:02b4	ICP	ICP5045AL (Hurricane40lm)
 
 People
 -------------------------
-- 
GitLab


From 1a68d41a334a406d4bd35999f0be4d47f193e477 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Mon, 18 Sep 2006 19:18:34 +0100
Subject: [PATCH 1058/1063] [SCSI] eata_pio cleanup and PCI fix

This started as a PCI reference fixup but to do that I need to build it,
to build it I need to fix it and its full of 32bitisms and uglies.

It has been resurrected, I'm not sure if this is a thank you for the
work on the license stuff or punishment for some unknown misdeed however
8). I've also fixed a memory scribble in the init code.

One oddity - the changes from HZ * to constants are deliberate. Whoever
originally wrote the code (or cleaned it up) used HZ for a cycle timing
loop even though is not HZ related. I've put it back to the counts used
in the old days when the driver was most used.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/eata_generic.h |   1 +
 drivers/scsi/eata_pio.c     | 127 +++++++++++++++++++-----------------
 2 files changed, 69 insertions(+), 59 deletions(-)

diff --git a/drivers/scsi/eata_generic.h b/drivers/scsi/eata_generic.h
index 34bce2c9e92e5..635c14861f86a 100644
--- a/drivers/scsi/eata_generic.h
+++ b/drivers/scsi/eata_generic.h
@@ -364,6 +364,7 @@ typedef struct hstd {
     __u8   moresupport;		 /* HBA supports MORE flag     */
     struct Scsi_Host *next;	    
     struct Scsi_Host *prev;
+    struct pci_dev *pdev;	/* PCI device or NULL for non PCI */
     struct eata_sp sp;		 /* status packet	       */ 
     struct eata_ccb ccb[0];	 /* ccb array begins here      */
 }hostdata;
diff --git a/drivers/scsi/eata_pio.c b/drivers/scsi/eata_pio.c
index 771b01984cbc9..d312633db92be 100644
--- a/drivers/scsi/eata_pio.c
+++ b/drivers/scsi/eata_pio.c
@@ -71,11 +71,11 @@
 #include "eata_pio.h"
 
 
-static uint ISAbases[MAXISA] =	{
+static unsigned int ISAbases[MAXISA] =	{
 	 0x1F0, 0x170, 0x330, 0x230
 };
 
-static uint ISAirqs[MAXISA] = {
+static unsigned int ISAirqs[MAXISA] = {
 	14, 12, 15, 11
 };
 
@@ -84,7 +84,7 @@ static unsigned char EISAbases[] = {
 	1, 1, 1, 1, 1, 1, 1, 1 
 };
 
-static uint registered_HBAs;
+static unsigned int registered_HBAs;
 static struct Scsi_Host *last_HBA;
 static struct Scsi_Host *first_HBA;
 static unsigned char reg_IRQ[16];
@@ -165,6 +165,7 @@ static int eata_pio_proc_info(struct Scsi_Host *shost, char *buffer, char **star
 
 static int eata_pio_release(struct Scsi_Host *sh)
 {
+	hostdata *hd = SD(sh);
 	if (sh->irq && reg_IRQ[sh->irq] == 1)
 		free_irq(sh->irq, NULL);
 	else
@@ -173,10 +174,13 @@ static int eata_pio_release(struct Scsi_Host *sh)
 		if (sh->io_port && sh->n_io_port)
 			release_region(sh->io_port, sh->n_io_port);
 	}
+	/* At this point the PCI reference can go */
+	if (hd->pdev)
+		pci_dev_put(hd->pdev);
 	return 1;
 }
 
-static void IncStat(struct scsi_pointer *SCp, uint Increment)
+static void IncStat(struct scsi_pointer *SCp, unsigned int Increment)
 {
 	SCp->ptr += Increment;
 	if ((SCp->this_residual -= Increment) == 0) {
@@ -190,46 +194,49 @@ static void IncStat(struct scsi_pointer *SCp, uint Increment)
 	}
 }
 
-static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs);
+static irqreturn_t eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs);
 
 static irqreturn_t do_eata_pio_int_handler(int irq, void *dev_id,
 						struct pt_regs *regs)
 {
 	unsigned long flags;
 	struct Scsi_Host *dev = dev_id;
+	irqreturn_t ret;
 
 	spin_lock_irqsave(dev->host_lock, flags);
-	eata_pio_int_handler(irq, dev_id, regs);
+	ret = eata_pio_int_handler(irq, dev_id, regs);
 	spin_unlock_irqrestore(dev->host_lock, flags);
-	return IRQ_HANDLED;
+	return ret;
 }
 
-static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs)
 {
-	uint eata_stat = 0xfffff;
+	unsigned int eata_stat = 0xfffff;
 	struct scsi_cmnd *cmd;
 	hostdata *hd;
 	struct eata_ccb *cp;
-	uint base;
-	uint x, z;
+	unsigned long base;
+	unsigned int x, z;
 	struct Scsi_Host *sh;
 	unsigned short zwickel = 0;
 	unsigned char stat, odd;
+	irqreturn_t ret = IRQ_NONE;
 
 	for (x = 1, sh = first_HBA; x <= registered_HBAs; x++, sh = SD(sh)->prev) 
 	{
 		if (sh->irq != irq)
 			continue;
-		if (inb((uint) sh->base + HA_RSTATUS) & HA_SBUSY)
+		if (inb(sh->base + HA_RSTATUS) & HA_SBUSY)
 			continue;
 
 		int_counter++;
+		ret = IRQ_HANDLED;
 
 		hd = SD(sh);
 
 		cp = &hd->ccb[0];
 		cmd = cp->cmd;
-		base = (uint) cmd->device->host->base;
+		base = cmd->device->host->base;
 
 		do {
 			stat = inb(base + HA_RSTATUS);
@@ -304,7 +311,7 @@ static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs)
 		if (!(inb(base + HA_RSTATUS) & HA_SERROR)) {
 			cmd->result = (DID_OK << 16);
 			hd->devflags |= (1 << cp->cp_id);
-		} else if (hd->devflags & 1 << cp->cp_id)
+		} else if (hd->devflags & (1 << cp->cp_id))
 			cmd->result = (DID_OK << 16) + 0x02;
 		else
 			cmd->result = (DID_NO_CONNECT << 16);
@@ -313,7 +320,7 @@ static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs)
 			cp->status = FREE;
 			eata_stat = inb(base + HA_RSTATUS);
 			printk(KERN_CRIT "eata_pio: int_handler, freeing locked " "queueslot\n");
-			return;
+			return ret;
 		}
 #if DBG_INTR2
 		if (stat != 0x50)
@@ -325,12 +332,12 @@ static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs)
 		cmd->scsi_done(cmd);
 	}
 
-	return;
+	return ret;
 }
 
-static inline uint eata_pio_send_command(uint base, unsigned char command)
+static inline unsigned int eata_pio_send_command(unsigned long base, unsigned char command)
 {
-	uint loop = HZ / 2;
+	unsigned int loop = 50;
 
 	while (inb(base + HA_RSTATUS) & HA_SBUSY)
 		if (--loop == 0)
@@ -349,8 +356,8 @@ static inline uint eata_pio_send_command(uint base, unsigned char command)
 static int eata_pio_queue(struct scsi_cmnd *cmd,
 		void (*done)(struct scsi_cmnd *))
 {
-	uint x, y;
-	uint base;
+	unsigned int x, y;
+	unsigned long base;
 
 	hostdata *hd;
 	struct Scsi_Host *sh;
@@ -360,7 +367,7 @@ static int eata_pio_queue(struct scsi_cmnd *cmd,
 
 	hd = HD(cmd);
 	sh = cmd->device->host;
-	base = (uint) sh->base;
+	base = sh->base;
 
 	/* use only slot 0, as 2001 can handle only one cmd at a time */
 
@@ -395,9 +402,9 @@ static int eata_pio_queue(struct scsi_cmnd *cmd,
 		cp->DataIn = 0;	/* Input mode  */
 
 	cp->Interpret = (cmd->device->id == hd->hostid);
-	cp->cp_datalen = htonl((unsigned long) cmd->request_bufflen);
+	cp->cp_datalen = cpu_to_be32(cmd->request_bufflen);
 	cp->Auto_Req_Sen = 0;
-	cp->cp_reqDMA = htonl(0);
+	cp->cp_reqDMA = 0;
 	cp->reqlen = 0;
 
 	cp->cp_id = cmd->device->id;
@@ -406,7 +413,7 @@ static int eata_pio_queue(struct scsi_cmnd *cmd,
 	cp->cp_identify = 1;
 	memcpy(cp->cp_cdb, cmd->cmnd, COMMAND_SIZE(*cmd->cmnd));
 
-	cp->cp_statDMA = htonl(0);
+	cp->cp_statDMA = 0;
 
 	cp->cp_viraddr = cp;
 	cp->cmd = cmd;
@@ -445,14 +452,14 @@ static int eata_pio_queue(struct scsi_cmnd *cmd,
 
 	DBG(DBG_QUEUE, scmd_printk(KERN_DEBUG, cmd,
 		"Queued base %#.4lx pid: %ld "
-		"slot %d irq %d\n", (long) sh->base, cmd->pid, y, sh->irq));
+		"slot %d irq %d\n", sh->base, cmd->pid, y, sh->irq));
 
 	return (0);
 }
 
 static int eata_pio_abort(struct scsi_cmnd *cmd)
 {
-	uint loop = HZ;
+	unsigned int loop = 100;
 
 	DBG(DBG_ABNORM, scmd_printk(KERN_WARNING, cmd,
 		"eata_pio_abort called pid: %ld\n",
@@ -485,7 +492,7 @@ static int eata_pio_abort(struct scsi_cmnd *cmd)
 
 static int eata_pio_host_reset(struct scsi_cmnd *cmd)
 {
-	uint x, limit = 0;
+	unsigned int x, limit = 0;
 	unsigned char success = 0;
 	struct scsi_cmnd *sp;
 	struct Scsi_Host *host = cmd->device->host;
@@ -518,7 +525,7 @@ static int eata_pio_host_reset(struct scsi_cmnd *cmd)
 	}
 
 	/* hard reset the HBA  */
-	outb(EATA_CMD_RESET, (uint) cmd->device->host->base + HA_WCOMMAND);
+	outb(EATA_CMD_RESET, cmd->device->host->base + HA_WCOMMAND);
 
 	DBG(DBG_ABNORM, printk(KERN_WARNING "eata_pio_reset: board reset done.\n"));
 	HD(cmd)->state = RESET;
@@ -558,7 +565,7 @@ static int eata_pio_host_reset(struct scsi_cmnd *cmd)
 	}
 }
 
-static char *get_pio_board_data(unsigned long base, uint irq, uint id, unsigned long cplen, unsigned short cppadlen)
+static char *get_pio_board_data(unsigned long base, unsigned int irq, unsigned int id, unsigned long cplen, unsigned short cppadlen)
 {
 	struct eata_ccb cp;
 	static char buff[256];
@@ -570,8 +577,8 @@ static char *get_pio_board_data(unsigned long base, uint irq, uint id, unsigned
 	cp.DataIn = 1;
 	cp.Interpret = 1;	/* Interpret command */
 
-	cp.cp_datalen = htonl(254);
-	cp.cp_dataDMA = htonl(0);
+	cp.cp_datalen = cpu_to_be32(254);
+	cp.cp_dataDMA = cpu_to_be32(0);
 
 	cp.cp_id = id;
 	cp.cp_lun = 0;
@@ -583,7 +590,7 @@ static char *get_pio_board_data(unsigned long base, uint irq, uint id, unsigned
 	cp.cp_cdb[4] = 254;
 	cp.cp_cdb[5] = 0;
 
-	if (eata_pio_send_command((uint) base, EATA_CMD_PIO_SEND_CP))
+	if (eata_pio_send_command(base, EATA_CMD_PIO_SEND_CP))
 		return (NULL);
 	while (!(inb(base + HA_RSTATUS) & HA_SDRQ));
 	outsw(base + HA_RDATA, &cp, cplen);
@@ -604,7 +611,7 @@ static char *get_pio_board_data(unsigned long base, uint irq, uint id, unsigned
 	}
 }
 
-static int get_pio_conf_PIO(u32 base, struct get_conf *buf)
+static int get_pio_conf_PIO(unsigned long base, struct get_conf *buf)
 {
 	unsigned long loop = HZ / 2;
 	int z;
@@ -619,30 +626,30 @@ static int get_pio_conf_PIO(u32 base, struct get_conf *buf)
 		if (--loop == 0)
 			goto fail;
 
-	DBG(DBG_PIO && DBG_PROBE, printk(KERN_DEBUG "Issuing PIO READ CONFIG to HBA at %#x\n", base));
+	DBG(DBG_PIO && DBG_PROBE, printk(KERN_DEBUG "Issuing PIO READ CONFIG to HBA at %#lx\n", base));
 	eata_pio_send_command(base, EATA_CMD_PIO_READ_CONFIG);
 
-	loop = HZ / 2;
+	loop = 50;
 	for (p = (unsigned short *) buf; (long) p <= ((long) buf + (sizeof(struct get_conf) / 2)); p++) {
 		while (!(inb(base + HA_RSTATUS) & HA_SDRQ))
 			if (--loop == 0)
 				goto fail;
 
-		loop = HZ / 2;
+		loop = 50;
 		*p = inw(base + HA_RDATA);
 	}
 	if (inb(base + HA_RSTATUS) & HA_SERROR) {
 		DBG(DBG_PROBE, printk("eata_dma: get_conf_PIO, error during "
-					"transfer for HBA at %x\n", base));
+					"transfer for HBA at %lx\n", base));
 		goto fail;
 	}
 
-	if (htonl(EATA_SIGNATURE) != buf->signature)
+	if (cpu_to_be32(EATA_SIGNATURE) != buf->signature)
 		goto fail;
 
 	DBG(DBG_PIO && DBG_PROBE, printk(KERN_NOTICE "EATA Controller found "
-				"at %#4x EATA Level: %x\n",
-				base, (uint) (buf->version)));
+				"at %#4lx EATA Level: %x\n",
+				base, (unsigned int) (buf->version)));
 
 	while (inb(base + HA_RSTATUS) & HA_SDRQ)
 		inw(base + HA_RDATA);
@@ -665,12 +672,12 @@ static int get_pio_conf_PIO(u32 base, struct get_conf *buf)
 static void print_pio_config(struct get_conf *gc)
 {
 	printk("Please check values: (read config data)\n");
-	printk("LEN: %d ver:%d OCS:%d TAR:%d TRNXFR:%d MORES:%d\n", (uint) ntohl(gc->len), gc->version, gc->OCS_enabled, gc->TAR_support, gc->TRNXFR, gc->MORE_support);
-	printk("HAAV:%d SCSIID0:%d ID1:%d ID2:%d QUEUE:%d SG:%d SEC:%d\n", gc->HAA_valid, gc->scsi_id[3], gc->scsi_id[2], gc->scsi_id[1], ntohs(gc->queuesiz), ntohs(gc->SGsiz), gc->SECOND);
+	printk("LEN: %d ver:%d OCS:%d TAR:%d TRNXFR:%d MORES:%d\n", be32_to_cpu(gc->len), gc->version, gc->OCS_enabled, gc->TAR_support, gc->TRNXFR, gc->MORE_support);
+	printk("HAAV:%d SCSIID0:%d ID1:%d ID2:%d QUEUE:%d SG:%d SEC:%d\n", gc->HAA_valid, gc->scsi_id[3], gc->scsi_id[2], gc->scsi_id[1], be16_to_cpu(gc->queuesiz), be16_to_cpu(gc->SGsiz), gc->SECOND);
 	printk("IRQ:%d IRQT:%d FORCADR:%d MCH:%d RIDQ:%d\n", gc->IRQ, gc->IRQ_TR, gc->FORCADR, gc->MAX_CHAN, gc->ID_qest);
 }
 
-static uint print_selftest(uint base)
+static unsigned int print_selftest(unsigned int base)
 {
 	unsigned char buffer[512];
 #ifdef VERBOSE_SETUP
@@ -697,7 +704,7 @@ static uint print_selftest(uint base)
 	return (!(inb(base + HA_RSTATUS) & HA_SERROR));
 }
 
-static int register_pio_HBA(long base, struct get_conf *gc)
+static int register_pio_HBA(long base, struct get_conf *gc, struct pci_dev *pdev)
 {
 	unsigned long size = 0;
 	char *buff;
@@ -714,17 +721,17 @@ static int register_pio_HBA(long base, struct get_conf *gc)
 			return 0;
 	}
 
-	if ((buff = get_pio_board_data((uint) base, gc->IRQ, gc->scsi_id[3], cplen = (htonl(gc->cplen) + 1) / 2, cppadlen = (htons(gc->cppadlen) + 1) / 2)) == NULL) {
-		printk("HBA at %#lx didn't react on INQUIRY. Sorry.\n", (unsigned long) base);
+	if ((buff = get_pio_board_data(base, gc->IRQ, gc->scsi_id[3], cplen = (cpu_to_be32(gc->cplen) + 1) / 2, cppadlen = (cpu_to_be16(gc->cppadlen) + 1) / 2)) == NULL) {
+		printk("HBA at %#lx didn't react on INQUIRY. Sorry.\n", base);
 		return 0;
 	}
 
 	if (!print_selftest(base) && !ALLOW_DMA_BOARDS) {
-		printk("HBA at %#lx failed while performing self test & setup.\n", (unsigned long) base);
+		printk("HBA at %#lx failed while performing self test & setup.\n", base);
 		return 0;
 	}
 
-	size = sizeof(hostdata) + (sizeof(struct eata_ccb) * ntohs(gc->queuesiz));
+	size = sizeof(hostdata) + (sizeof(struct eata_ccb) * be16_to_cpu(gc->queuesiz));
 
 	sh = scsi_register(&driver_template, size);
 	if (sh == NULL)
@@ -749,8 +756,8 @@ static int register_pio_HBA(long base, struct get_conf *gc)
 
 	hd = SD(sh);
 
-	memset(hd->ccb, 0, (sizeof(struct eata_ccb) * ntohs(gc->queuesiz)));
-	memset(hd->reads, 0, sizeof(unsigned long) * 26);
+	memset(hd->ccb, 0, (sizeof(struct eata_ccb) * be16_to_cpu(gc->queuesiz)));
+	memset(hd->reads, 0, sizeof(hd->reads));
 
 	strlcpy(SD(sh)->vendor, &buff[8], sizeof(SD(sh)->vendor));
 	strlcpy(SD(sh)->name, &buff[16], sizeof(SD(sh)->name));
@@ -761,7 +768,7 @@ static int register_pio_HBA(long base, struct get_conf *gc)
 	SD(sh)->revision[4] = buff[35];
 	SD(sh)->revision[5] = 0;
 
-	switch (ntohl(gc->len)) {
+	switch (be32_to_cpu(gc->len)) {
 	case 0x1c:
 		SD(sh)->EATA_revision = 'a';
 		break;
@@ -777,7 +784,7 @@ static int register_pio_HBA(long base, struct get_conf *gc)
 		SD(sh)->EATA_revision = '?';
 	}
 
-	if (ntohl(gc->len) >= 0x22) {
+	if (be32_to_cpu(gc->len) >= 0x22) {
 		if (gc->is_PCI)
 			hd->bustype = IS_PCI;
 		else if (gc->is_EISA)
@@ -811,6 +818,8 @@ static int register_pio_HBA(long base, struct get_conf *gc)
 
 	hd->channel = 0;
 
+	hd->pdev = pci_dev_get(pdev);	/* Keep a PCI reference */
+
 	sh->max_id = 8;
 	sh->max_lun = 8;
 
@@ -841,7 +850,7 @@ static void find_pio_ISA(struct get_conf *buf)
 			continue;
 		if (!get_pio_conf_PIO(ISAbases[i], buf))
 			continue;
-		if (!register_pio_HBA(ISAbases[i], buf))
+		if (!register_pio_HBA(ISAbases[i], buf, NULL))
 			release_region(ISAbases[i], 9);
 		else
 			ISAbases[i] = 0;
@@ -873,7 +882,7 @@ static void find_pio_EISA(struct get_conf *buf)
 				if (get_pio_conf_PIO(base, buf)) {
 					DBG(DBG_PROBE && DBG_EISA, print_pio_config(buf));
 					if (buf->IRQ) {
-						if (!register_pio_HBA(base, buf))
+						if (!register_pio_HBA(base, buf, NULL))
 							release_region(base, 9);
 					} else {
 						printk(KERN_NOTICE "eata_dma: No valid IRQ. HBA " "removed from list\n");
@@ -896,9 +905,9 @@ static void find_pio_PCI(struct get_conf *buf)
 	printk("eata_dma: kernel PCI support not enabled. Skipping scan for PCI HBAs.\n");
 #else
 	struct pci_dev *dev = NULL;
-	u32 base, x;
+	unsigned long base, x;
 
-	while ((dev = pci_find_device(PCI_VENDOR_ID_DPT, PCI_DEVICE_ID_DPT, dev)) != NULL) {
+	while ((dev = pci_get_device(PCI_VENDOR_ID_DPT, PCI_DEVICE_ID_DPT, dev)) != NULL) {
 		DBG(DBG_PROBE && DBG_PCI, printk("eata_pio: find_PCI, HBA at %s\n", pci_name(dev)));
 		if (pci_enable_device(dev))
 			continue;
@@ -926,7 +935,7 @@ static void find_pio_PCI(struct get_conf *buf)
 				 * eventually remove it from the EISA and ISA list 
 				 */
 
-				if (!register_pio_HBA(base, buf)) {
+				if (!register_pio_HBA(base, buf, dev)) {
 					release_region(base, 9);
 					continue;
 				}
@@ -976,12 +985,12 @@ static int eata_pio_detect(struct scsi_host_template *tpnt)
 		printk("Registered HBAs:\n");
 		printk("HBA no. Boardtype: Revis: EATA: Bus: BaseIO: IRQ: Ch: ID: Pr:" " QS: SG: CPL:\n");
 		for (i = 1; i <= registered_HBAs; i++) {
-			printk("scsi%-2d: %.10s v%s 2.0%c  %s %#.4x   %2d   %d   %d   %c"
+			printk("scsi%-2d: %.10s v%s 2.0%c  %s %#.4lx   %2d   %d   %d   %c"
 			       "  %2d  %2d  %2d\n",
 			       HBA_ptr->host_no, SD(HBA_ptr)->name, SD(HBA_ptr)->revision,
 			       SD(HBA_ptr)->EATA_revision, (SD(HBA_ptr)->bustype == 'P') ?
 			       "PCI " : (SD(HBA_ptr)->bustype == 'E') ? "EISA" : "ISA ",
-			       (uint) HBA_ptr->base, HBA_ptr->irq, SD(HBA_ptr)->channel, HBA_ptr->this_id,
+			       HBA_ptr->base, HBA_ptr->irq, SD(HBA_ptr)->channel, HBA_ptr->this_id,
 			       SD(HBA_ptr)->primary ? 'Y' : 'N', HBA_ptr->can_queue,
 			       HBA_ptr->sg_tablesize, HBA_ptr->cmd_per_lun);
 			HBA_ptr = SD(HBA_ptr)->next;
-- 
GitLab


From a07f353701acae77e023f6270e8af353b37af7c4 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 15 Sep 2006 15:34:32 +0100
Subject: [PATCH 1059/1063] [SCSI] Switch some more scsi drivers to
 pci_get_device and refcounted pci structures

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/BusLogic.c    | 15 ++++++++-------
 drivers/scsi/aic7xxx_old.c |  9 +++++++--
 drivers/scsi/dpt_i2o.c     |  6 +++++-
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 59d1adaed73ea..4ea49fd7965e5 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -662,7 +662,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd
 	   particular standard ISA I/O Address need not be probed.
 	 */
 	PrimaryProbeInfo->IO_Address = 0;
-	while ((PCI_Device = pci_find_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER, PCI_Device)) != NULL) {
+	while ((PCI_Device = pci_get_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER, PCI_Device)) != NULL) {
 		struct BusLogic_HostAdapter *HostAdapter = PrototypeHostAdapter;
 		struct BusLogic_PCIHostAdapterInformation PCIHostAdapterInformation;
 		enum BusLogic_ISACompatibleIOPort ModifyIOAddressRequest;
@@ -762,7 +762,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd
 			PrimaryProbeInfo->Bus = Bus;
 			PrimaryProbeInfo->Device = Device;
 			PrimaryProbeInfo->IRQ_Channel = IRQ_Channel;
-			PrimaryProbeInfo->PCI_Device = PCI_Device;
+			PrimaryProbeInfo->PCI_Device = pci_dev_get(PCI_Device);
 			PCIMultiMasterCount++;
 		} else if (BusLogic_ProbeInfoCount < BusLogic_MaxHostAdapters) {
 			struct BusLogic_ProbeInfo *ProbeInfo = &BusLogic_ProbeInfoList[BusLogic_ProbeInfoCount++];
@@ -773,7 +773,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd
 			ProbeInfo->Bus = Bus;
 			ProbeInfo->Device = Device;
 			ProbeInfo->IRQ_Channel = IRQ_Channel;
-			ProbeInfo->PCI_Device = PCI_Device;
+			ProbeInfo->PCI_Device = pci_dev_get(PCI_Device);
 			NonPrimaryPCIMultiMasterCount++;
 			PCIMultiMasterCount++;
 		} else
@@ -823,7 +823,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd
 	   noting the PCI bus location and assigned IRQ Channel.
 	 */
 	PCI_Device = NULL;
-	while ((PCI_Device = pci_find_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC, PCI_Device)) != NULL) {
+	while ((PCI_Device = pci_get_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC, PCI_Device)) != NULL) {
 		unsigned char Bus;
 		unsigned char Device;
 		unsigned int IRQ_Channel;
@@ -850,7 +850,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd
 				ProbeInfo->Bus = Bus;
 				ProbeInfo->Device = Device;
 				ProbeInfo->IRQ_Channel = IRQ_Channel;
-				ProbeInfo->PCI_Device = PCI_Device;
+				ProbeInfo->PCI_Device = pci_dev_get(PCI_Device);
 				break;
 			}
 		}
@@ -874,7 +874,7 @@ static int __init BusLogic_InitializeFlashPointProbeInfo(struct BusLogic_HostAda
 	/*
 	   Interrogate PCI Configuration Space for any FlashPoint Host Adapters.
 	 */
-	while ((PCI_Device = pci_find_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_FLASHPOINT, PCI_Device)) != NULL) {
+	while ((PCI_Device = pci_get_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_FLASHPOINT, PCI_Device)) != NULL) {
 		unsigned char Bus;
 		unsigned char Device;
 		unsigned int IRQ_Channel;
@@ -923,7 +923,7 @@ static int __init BusLogic_InitializeFlashPointProbeInfo(struct BusLogic_HostAda
 			ProbeInfo->Bus = Bus;
 			ProbeInfo->Device = Device;
 			ProbeInfo->IRQ_Channel = IRQ_Channel;
-			ProbeInfo->PCI_Device = PCI_Device;
+			ProbeInfo->PCI_Device = pci_dev_get(PCI_Device);
 			FlashPointCount++;
 		} else
 			BusLogic_Warning("BusLogic: Too many Host Adapters " "detected\n", NULL);
@@ -1890,6 +1890,7 @@ static void BusLogic_ReleaseResources(struct BusLogic_HostAdapter *HostAdapter)
 	 */
 	if (HostAdapter->MailboxSpace)
 		pci_free_consistent(HostAdapter->PCI_Device, HostAdapter->MailboxSize, HostAdapter->MailboxSpace, HostAdapter->MailboxSpaceHandle);
+	pci_dev_put(HostAdapter->PCI_Device);
 	HostAdapter->MailboxSpace = NULL;
 	HostAdapter->MailboxSpaceHandle = 0;
 	HostAdapter->MailboxSize = 0;
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index ba3bccafe1137..5dcef48d414fa 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -9194,7 +9194,7 @@ aic7xxx_detect(struct scsi_host_template *template)
     for (i = 0; i < ARRAY_SIZE(aic_pdevs); i++)
     {
       pdev = NULL;
-      while ((pdev = pci_find_device(aic_pdevs[i].vendor_id,
+      while ((pdev = pci_get_device(aic_pdevs[i].vendor_id,
                                      aic_pdevs[i].device_id,
                                      pdev))) {
 	if (pci_enable_device(pdev))
@@ -9651,6 +9651,9 @@ aic7xxx_detect(struct scsi_host_template *template)
            */
           aic7xxx_configure_bugs(temp_p);
 
+          /* Hold a pci device reference */
+          pci_dev_get(temp_p->pdev);
+
           if ( list_p == NULL )
           {
             list_p = current_p = temp_p;
@@ -10987,8 +10990,10 @@ aic7xxx_release(struct Scsi_Host *host)
   if(!p->pdev)
     release_region(p->base, MAXREG - MINREG);
 #ifdef CONFIG_PCI
-  else
+  else {
     pci_release_regions(p->pdev);
+    pci_dev_put(p->pdev);
+  }
 #endif
   prev = NULL;
   next = first_aic7xxx;
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index 45806336ce02e..7b3bd34faf47e 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -184,7 +184,7 @@ static int adpt_detect(struct scsi_host_template* sht)
 	PINFO("Detecting Adaptec I2O RAID controllers...\n");
 
         /* search for all Adatpec I2O RAID cards */
-	while ((pDev = pci_find_device( PCI_DPT_VENDOR_ID, PCI_ANY_ID, pDev))) {
+	while ((pDev = pci_get_device( PCI_DPT_VENDOR_ID, PCI_ANY_ID, pDev))) {
 		if(pDev->device == PCI_DPT_DEVICE_ID ||
 		   pDev->device == PCI_DPT_RAPTOR_DEVICE_ID){
 			if(adpt_install_hba(sht, pDev) ){
@@ -192,8 +192,11 @@ static int adpt_detect(struct scsi_host_template* sht)
 				PERROR("Will not try to detect others.\n");
 				return hba_count-1;
 			}
+			pci_dev_get(pDev);
 		}
 	}
+	if (pDev)
+		pci_dev_put(pDev);
 
 	/* In INIT state, Activate IOPs */
 	for (pHba = hba_chain; pHba; pHba = pHba->next) {
@@ -1075,6 +1078,7 @@ static void adpt_i2o_delete_hba(adpt_hba* pHba)
 			}
 		}
 	}
+	pci_dev_put(pHba->pDev);
 	kfree(pHba);
 
 	if(hba_count <= 0){
-- 
GitLab


From e17ba8f51f69782abc4575dd30848ba5eafaa797 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 23 Sep 2006 18:31:42 -0700
Subject: [PATCH 1060/1063] [SPARC64]: Fix sched_clock() wrapping every ~17
 seconds.

Unfortunately, sparc64 doesn't have an easy way to do a "64 X 64 -->
128" bit multiply like PowerPC and IA64 do.  We were doing a
"64 X 64 --> 64" bit multiple which causes overflow very quickly with
a 30-bit quotient shift.

So use a quotientshift count of 10 instead of 30, just like x86 and
ARM do.

This also fixes the wrapping of printk timestamp values every ~17
seconds.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/time.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index 094d3e35be183..b0b4feeec0982 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -983,7 +983,7 @@ static struct time_interpolator sparc64_cpu_interpolator = {
 };
 
 /* The quotient formula is taken from the IA64 port. */
-#define SPARC64_NSEC_PER_CYC_SHIFT	30UL
+#define SPARC64_NSEC_PER_CYC_SHIFT	10UL
 void __init time_init(void)
 {
 	unsigned long clock = sparc64_init_timers();
-- 
GitLab


From 6973dddee264723720e18ad2be5a0a454c0f52d9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sat, 23 Sep 2006 18:32:38 -0700
Subject: [PATCH 1061/1063] [SPARC64]: Update defconfig.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/defconfig | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig
index 43d9229fca07b..51cf6027b7014 100644
--- a/arch/sparc64/defconfig
+++ b/arch/sparc64/defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.18-rc2
-# Fri Jul 21 14:19:24 2006
+# Linux kernel version: 2.6.18
+# Sat Sep 23 18:32:19 2006
 #
 CONFIG_SPARC=y
 CONFIG_SPARC64=y
@@ -9,6 +9,7 @@ CONFIG_64BIT=y
 CONFIG_MMU=y
 CONFIG_TIME_INTERPOLATION=y
 CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_AUDIT_ARCH=y
 CONFIG_SPARC64_PAGE_SIZE_8KB=y
 # CONFIG_SPARC64_PAGE_SIZE_64KB is not set
 # CONFIG_SPARC64_PAGE_SIZE_512KB is not set
@@ -37,14 +38,14 @@ CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
-CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
 # CONFIG_IKCONFIG is not set
 CONFIG_RELAY=y
 CONFIG_INITRAMFS_SOURCE=""
-CONFIG_UID16=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 # CONFIG_EMBEDDED is not set
+CONFIG_UID16=y
+CONFIG_SYSCTL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,12 +54,12 @@ CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_BASE_FULL=y
-CONFIG_RT_MUTEXES=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
 CONFIG_SHMEM=y
 CONFIG_SLAB=y
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_RT_MUTEXES=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 # CONFIG_SLOB is not set
@@ -169,6 +170,7 @@ CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
 CONFIG_XFRM=y
 CONFIG_XFRM_USER=m
+# CONFIG_XFRM_SUB_POLICY is not set
 CONFIG_NET_KEY=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
@@ -214,11 +216,15 @@ CONFIG_IPV6_ROUTE_INFO=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+# CONFIG_IPV6_MIP6 is not set
 CONFIG_INET6_XFRM_TUNNEL=m
 CONFIG_INET6_TUNNEL=m
 CONFIG_INET6_XFRM_MODE_TRANSPORT=m
 CONFIG_INET6_XFRM_MODE_TUNNEL=m
+# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
 CONFIG_IPV6_TUNNEL=m
+# CONFIG_IPV6_SUBTREES is not set
+# CONFIG_IPV6_MULTIPLE_TABLES is not set
 # CONFIG_NETWORK_SECMARK is not set
 # CONFIG_NETFILTER is not set
 
@@ -233,6 +239,7 @@ CONFIG_IP_DCCP_ACKVEC=y
 # DCCP CCIDs Configuration (EXPERIMENTAL)
 #
 CONFIG_IP_DCCP_CCID2=m
+# CONFIG_IP_DCCP_CCID2_DEBUG is not set
 CONFIG_IP_DCCP_CCID3=m
 CONFIG_IP_DCCP_TFRC_LIB=m
 
@@ -259,7 +266,6 @@ CONFIG_VLAN_8021Q=m
 # CONFIG_ATALK is not set
 # CONFIG_X25 is not set
 # CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
 
@@ -1386,6 +1392,10 @@ CONFIG_KEYS=y
 # Cryptographic options
 #
 CONFIG_CRYPTO=y
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_MANAGER=m
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_MD4=y
@@ -1395,9 +1405,12 @@ CONFIG_CRYPTO_SHA256=m
 CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_DES=y
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_TWOFISH_COMMON=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_AES=m
 CONFIG_CRYPTO_CAST5=m
-- 
GitLab


From dfdc58ba354adb80d67c99f7be84f95a8e02e466 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Wed, 20 Sep 2006 12:00:18 -0400
Subject: [PATCH 1062/1063] [SCSI] SPI transport class: misc DV fixes

Key more of the domain validation settings off the inquiry data from
the disk (in particular, don't try IU or DT unless the disk claims to
support them.

Also add a new dv_in_progress flag to prevent recursive DV.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_transport_spi.c | 30 +++++++++++++++++++++++-------
 include/scsi/scsi_transport_spi.h |  3 ++-
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index 29a9a53cdd1a1..9f070f0d0f2bf 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -47,6 +47,7 @@
 
 /* Private data accessors (keep these out of the header file) */
 #define spi_dv_pending(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_pending)
+#define spi_dv_in_progress(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_in_progress)
 #define spi_dv_mutex(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_mutex)
 
 struct spi_internal {
@@ -240,6 +241,7 @@ static int spi_setup_transport_attrs(struct transport_container *tc,
 	spi_pcomp_en(starget) = 0;
 	spi_hold_mcs(starget) = 0;
 	spi_dv_pending(starget) = 0;
+	spi_dv_in_progress(starget) = 0;
 	spi_initial_dv(starget) = 0;
 	mutex_init(&spi_dv_mutex(starget));
 
@@ -830,28 +832,37 @@ spi_dv_device_internal(struct scsi_device *sdev, u8 *buffer)
 	DV_SET(period, spi_min_period(starget));
 	/* try QAS requests; this should be harmless to set if the
 	 * target supports it */
-	if (scsi_device_qas(sdev))
+	if (scsi_device_qas(sdev)) {
 		DV_SET(qas, 1);
-	/* Also try IU transfers */
-	if (scsi_device_ius(sdev))
+	} else {
+		DV_SET(qas, 0);
+	}
+
+	if (scsi_device_ius(sdev) && spi_min_period(starget) < 9) {
+		/* This u320 (or u640). Set IU transfers */
 		DV_SET(iu, 1);
-	if (spi_min_period(starget) < 9) {
-		/* This u320 (or u640). Ignore the coupled parameters
-		 * like DT and IU, but set the optional ones */
+		/* Then set the optional parameters */
 		DV_SET(rd_strm, 1);
 		DV_SET(wr_flow, 1);
 		DV_SET(rti, 1);
 		if (spi_min_period(starget) == 8)
 			DV_SET(pcomp_en, 1);
+	} else {
+		DV_SET(iu, 0);
 	}
+
 	/* now that we've done all this, actually check the bus
 	 * signal type (if known).  Some devices are stupid on
 	 * a SE bus and still claim they can try LVD only settings */
 	if (i->f->get_signalling)
 		i->f->get_signalling(shost);
 	if (spi_signalling(shost) == SPI_SIGNAL_SE ||
-	    spi_signalling(shost) == SPI_SIGNAL_HVD)
+	    spi_signalling(shost) == SPI_SIGNAL_HVD ||
+	    !scsi_device_dt(sdev)) {
 		DV_SET(dt, 0);
+	} else {
+		DV_SET(dt, 1);
+	}
 	/* Do the read only INQUIRY tests */
 	spi_dv_retrain(sdev, buffer, buffer + sdev->inquiry_len,
 		       spi_dv_device_compare_inquiry);
@@ -907,6 +918,10 @@ spi_dv_device(struct scsi_device *sdev)
 	if (unlikely(scsi_device_get(sdev)))
 		return;
 
+	if (unlikely(spi_dv_in_progress(starget)))
+		return;
+	spi_dv_in_progress(starget) = 1;
+
 	buffer = kzalloc(len, GFP_KERNEL);
 
 	if (unlikely(!buffer))
@@ -938,6 +953,7 @@ spi_dv_device(struct scsi_device *sdev)
  out_free:
 	kfree(buffer);
  out_put:
+	spi_dv_in_progress(starget) = 0;
 	scsi_device_put(sdev);
 }
 EXPORT_SYMBOL(spi_dv_device);
diff --git a/include/scsi/scsi_transport_spi.h b/include/scsi/scsi_transport_spi.h
index 302680c0c0dea..da180f738477e 100644
--- a/include/scsi/scsi_transport_spi.h
+++ b/include/scsi/scsi_transport_spi.h
@@ -53,7 +53,8 @@ struct spi_transport_attrs {
 	unsigned int support_ius; /* support Information Units */
 	unsigned int support_qas; /* supports quick arbitration and selection */
 	/* Private Fields */
-	unsigned int dv_pending:1; /* Internal flag */
+	unsigned int dv_pending:1; /* Internal flag: DV Requested */
+	unsigned int dv_in_progress:1;	/* Internal: DV started */
 	struct mutex dv_mutex; /* semaphore to serialise dv */
 };
 
-- 
GitLab


From 1b73c4bb063c4aa0cdc25425809bb87f65ee75af Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@sparkweed.localdomain>
Date: Sat, 23 Sep 2006 22:07:20 -0500
Subject: [PATCH 1063/1063] [SCSI] scsi_transport_fc: fixup netlink arguments

nlmsg_multicast now takes an extra allocation flag, so add it to
the use in the fibre channel transport class.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_transport_fc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 4ab176ed480d1..38c215a78f69f 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -480,7 +480,8 @@ fc_host_post_event(struct Scsi_Host *shost, u32 event_number,
 	event->event_code = event_code;
 	event->event_data = event_data;
 
-	err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS);
+	err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS,
+			      GFP_KERNEL);
 	if (err && (err != -ESRCH))	/* filter no recipient errors */
 		/* nlmsg_multicast already kfree_skb'd */
 		goto send_fail;
@@ -554,7 +555,8 @@ fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number,
 	event->event_code = FCH_EVT_VENDOR_UNIQUE;
 	memcpy(&event->event_data, data_buf, data_len);
 
-	err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS);
+	err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS,
+			      GFP_KERNEL);
 	if (err && (err != -ESRCH))	/* filter no recipient errors */
 		/* nlmsg_multicast already kfree_skb'd */
 		goto send_vendor_fail;
-- 
GitLab